... | ... |
@@ -124,7 +124,8 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
124 | 124 |
title = "Batch Variance before correction") + |
125 | 125 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
126 | 126 |
|
127 |
- inSCE <- getUMAP(inSCE, useAssay = origAssay, reducedDimName = "umap.before") |
|
127 |
+ inSCE <- runUMAP(inSCE, useAssay = origAssay, useReducedDim = NULL, |
|
128 |
+ reducedDimName = "umap.before") |
|
128 | 129 |
umap.before <- plotSCEDimReduceColData(inSCE, batch, "umap.before", |
129 | 130 |
shape = condition, axisLabelSize = 9, |
130 | 131 |
axisSize = 8, dotSize = 1, |
... | ... |
@@ -145,10 +146,11 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
145 | 146 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
146 | 147 |
|
147 | 148 |
if (method == "ComBatSeq") { |
148 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, logNorm = TRUE, |
|
149 |
- reducedDimName = "umap.after") |
|
149 |
+ inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL, |
|
150 |
+ logNorm = TRUE, reducedDimName = "umap.after") |
|
150 | 151 |
} else { |
151 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
152 |
+ inSCE <- runUMAP(inSCE, useAssay = corrMat, useReducedDim = NULL, |
|
153 |
+ logNorm = FALSE, reducedDimName = "umap.after") |
|
152 | 154 |
} |
153 | 155 |
} else if (matType == "altExp") { |
154 | 156 |
# Doing log, because only Seurat returns altExp, |
... | ... |
@@ -161,8 +163,8 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
161 | 163 |
title = paste0("Batch Variance corrected with ", |
162 | 164 |
method)) + |
163 | 165 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
164 |
- inSCE <- getUMAP(inSCE, useAltExp = corrMat, useAssay = corrMat, |
|
165 |
- reducedDimName = "umap.after") |
|
166 |
+ inSCE <- runQuickUMAP(inSCE, useAssay = corrMat, useAltExp = corrMat, |
|
167 |
+ reducedDimName = "umap.after") |
|
166 | 168 |
} else if (matType == "reducedDim") { |
167 | 169 |
bv.after <- plotBatchVariance(inSCE, useReddim = corrMat, batch = batch, |
168 | 170 |
condition = condition, |
... | ... |
@@ -173,7 +175,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
173 | 175 |
SingleCellExperiment::reducedDim(inSCE, "umap.after") <- |
174 | 176 |
SingleCellExperiment::reducedDim(inSCE, corrMat) |
175 | 177 |
} else { |
176 |
- inSCE <- getUMAP(inSCE, useReducedDim = corrMat, |
|
178 |
+ inSCE <- runUMAP(inSCE, useReducedDim = corrMat, |
|
177 | 179 |
reducedDimName = "umap.after") |
178 | 180 |
} |
179 | 181 |
} else { |
... | ... |
@@ -55,9 +55,8 @@ |
55 | 55 |
"matType. Force using user specification.") |
56 | 56 |
} |
57 | 57 |
matType <- ifelse(is.null(matType), bcInfo$matType, matType) |
58 |
- batch <- ifelse(is.null(batch), bcInfo$batch, batch) |
|
58 |
+ if (is.null(batch)) batch <- bcInfo$batch |
|
59 | 59 |
if (is.null(condition)) condition <- bcInfo$condition |
60 |
- #condition <- ifelse(is.null(condition), bcInfo$condition, condition) |
|
61 | 60 |
} |
62 | 61 |
} |
63 | 62 |
return(list(origAssay = origAssay, |
... | ... |
@@ -94,7 +93,7 @@ |
94 | 93 |
#' @return An object of class \code{"gtable"}, combining four \code{ggplot}s. |
95 | 94 |
#' @examples |
96 | 95 |
#' data("sceBatches") |
97 |
-#' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts") |
|
96 |
+#' logcounts(sceBatches) <- log1p(counts(sceBatches)) |
|
98 | 97 |
#' sceBatches <- runLimmaBC(sceBatches) |
99 | 98 |
#' plotBatchCorrCompare(sceBatches, "LIMMA", condition = "cell_type") |
100 | 99 |
#' @export |
... | ... |
@@ -232,59 +231,33 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
232 | 231 |
#' condition = "cell_type") |
233 | 232 |
plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
234 | 233 |
useAltExp = NULL, batch = 'batch', |
235 |
- condition=NULL, title = NULL) { |
|
236 |
- if(!inherits(inSCE, 'SingleCellExperiment')){ |
|
237 |
- stop("'inSCE' must inherit from 'SingleCellExperiment'.") |
|
238 |
- } |
|
239 |
- if(is.null(useAssay) + is.null(useReddim) + is.null(useAltExp) != 2){ |
|
240 |
- stop("One and only one of `useAssay`, `useReddim`, ", |
|
241 |
- "`usAltExp` has to be specified.") |
|
242 |
- } |
|
243 |
- if(!is.null(useAssay)){ |
|
244 |
- if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){ |
|
245 |
- stop("'useAssay' not found in 'inSCE'.") |
|
246 |
- } |
|
247 |
- mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
248 |
- } |
|
249 |
- if(!is.null(useReddim)){ |
|
250 |
- if(!useReddim %in% SingleCellExperiment::reducedDimNames(inSCE)){ |
|
251 |
- stop("'useReddim not found in 'inSCE'.") |
|
252 |
- } |
|
253 |
- mat <- t(SingleCellExperiment::reducedDim(inSCE, useReddim)) |
|
254 |
- } |
|
255 |
- if(!is.null(useAltExp)){ |
|
256 |
- if(!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)){ |
|
257 |
- stop("'useAltExp not found in 'inSCE'.") |
|
258 |
- } |
|
259 |
- ae <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
260 |
- mat <- SummarizedExperiment::assay(ae) |
|
261 |
- } |
|
234 |
+ condition = NULL, title = NULL) { |
|
235 |
+ useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, |
|
236 |
+ useReducedDim = useReddim, useAltExp = useAltExp, |
|
237 |
+ returnMatrix = TRUE, cellAsCol = TRUE) |
|
238 |
+ mat <- useMat$mat |
|
262 | 239 |
if(is.null(batch)){ |
263 | 240 |
stop("Batch annotation has to be given.") |
264 |
- } else{ |
|
265 |
- if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
266 |
- stop("'batch' not found in 'inSCE'.") |
|
267 |
- } |
|
268 | 241 |
} |
242 |
+ batchCol <- .manageCellVar(inSCE, var = batch, as.factor = TRUE) |
|
269 | 243 |
if(!inherits(mat, 'matrix')){ |
270 | 244 |
mat <- as.matrix(mat) |
271 | 245 |
} |
272 |
- batchCol <- SummarizedExperiment::colData(inSCE)[, batch] |
|
273 |
- nlb <- nlevels(as.factor(batchCol)) |
|
246 |
+ nlb <- nlevels(batchCol) |
|
274 | 247 |
if (nlb <= 1){ |
275 | 248 |
stop("No more than one batch found in specified annotation") |
276 | 249 |
} else { |
277 |
- batchMod <- stats::model.matrix(~as.factor(batchCol)) |
|
250 |
+ batchMod <- stats::model.matrix(~batchCol) |
|
278 | 251 |
} |
279 | 252 |
if (is.null(condition)){ |
280 | 253 |
condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
281 | 254 |
} else { |
282 |
- condCol <- SingleCellExperiment::colData(inSCE)[, condition] |
|
283 |
- nlc <- nlevels(as.factor(condCol)) |
|
255 |
+ condCol <- .manageCellVar(inSCE, var = condition, as.factor = TRUE) |
|
256 |
+ nlc <- nlevels(condCol) |
|
284 | 257 |
if (nlc <= 1){ |
285 | 258 |
condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
286 | 259 |
} else { |
287 |
- condMod <- stats::model.matrix(~as.factor(condCol)) |
|
260 |
+ condMod <- stats::model.matrix(~condCol) |
|
288 | 261 |
} |
289 | 262 |
} |
290 | 263 |
mod <- cbind(condMod, batchMod[, -1]) |
... | ... |
@@ -146,7 +146,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
146 | 146 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
147 | 147 |
|
148 | 148 |
if (method == "ComBatSeq") { |
149 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, logNorm = TRUE, |
|
149 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, logNorm = TRUE, |
|
150 | 150 |
reducedDimName = "umap.after") |
151 | 151 |
} else { |
152 | 152 |
inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
... | ... |
@@ -174,7 +174,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
174 | 174 |
SingleCellExperiment::reducedDim(inSCE, "umap.after") <- |
175 | 175 |
SingleCellExperiment::reducedDim(inSCE, corrMat) |
176 | 176 |
} else { |
177 |
- inSCE <- getUMAP(inSCE, useReducedDim = corrMat, |
|
177 |
+ inSCE <- getUMAP(inSCE, useReducedDim = corrMat, |
|
178 | 178 |
reducedDimName = "umap.after") |
179 | 179 |
} |
180 | 180 |
} else { |
... | ... |
@@ -366,7 +366,7 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
366 | 366 |
#' \code{colData(inSCE)}. Default \code{"batch"}. |
367 | 367 |
#' @param xlab label for x-axis. Default \code{"batch"}. |
368 | 368 |
#' @param ylab label for y-axis. Default \code{"Feature Mean"}. |
369 |
-#' @param ... Additional arguments passed to \code{\link{.ggViolin}}. |
|
369 |
+#' @param ... Additional arguments passed to \code{.ggViolin}. |
|
370 | 370 |
#' @examples |
371 | 371 |
#' data('sceBatches', package = 'singleCellTK') |
372 | 372 |
#' plotSCEBatchFeatureMean(sceBatches, useAssay = "counts") |
... | ... |
@@ -146,10 +146,10 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
146 | 146 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
147 | 147 |
|
148 | 148 |
if (method == "ComBatSeq") { |
149 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
149 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, logNorm = TRUE, |
|
150 |
+ reducedDimName = "umap.after") |
|
150 | 151 |
} else { |
151 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after", |
|
152 |
- logNorm = FALSE) |
|
152 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
153 | 153 |
} |
154 | 154 |
} else if (matType == "altExp") { |
155 | 155 |
# Doing log, because only Seurat returns altExp, |
... | ... |
@@ -174,7 +174,7 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
174 | 174 |
SingleCellExperiment::reducedDim(inSCE, "umap.after") <- |
175 | 175 |
SingleCellExperiment::reducedDim(inSCE, corrMat) |
176 | 176 |
} else { |
177 |
- inSCE <- getUMAP(inSCE, useAssay = NULL, useReducedDim = corrMat, |
|
177 |
+ inSCE <- getUMAP(inSCE, useReducedDim = corrMat, |
|
178 | 178 |
reducedDimName = "umap.after") |
179 | 179 |
} |
180 | 180 |
} else { |
... | ... |
@@ -93,6 +93,7 @@ |
93 | 93 |
#' \code{"reducedDim"}. |
94 | 94 |
#' @return An object of class \code{"gtable"}, combining four \code{ggplot}s. |
95 | 95 |
#' @examples |
96 |
+#' data("sceBatches") |
|
96 | 97 |
#' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts") |
97 | 98 |
#' sceBatches <- runLimmaBC(sceBatches) |
98 | 99 |
#' plotBatchCorrCompare(sceBatches, "LIMMA", condition = "cell_type") |
... | ... |
@@ -99,8 +99,8 @@ |
99 | 99 |
#' @export |
100 | 100 |
#' @author Yichen Wang |
101 | 101 |
plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
102 |
- origAssay = NULL, origLogged = NULL, method = NULL, |
|
103 |
- matType = NULL) { |
|
102 |
+ origAssay = NULL, origLogged = NULL, |
|
103 |
+ method = NULL, matType = NULL) { |
|
104 | 104 |
if(!inherits(inSCE, "SingleCellExperiment")){ |
105 | 105 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
106 | 106 |
} |
... | ... |
@@ -144,8 +144,12 @@ plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
144 | 144 |
method)) + |
145 | 145 |
ggplot2::theme(text=ggplot2::element_text(size=10)) |
146 | 146 |
|
147 |
- |
|
148 |
- inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
147 |
+ if (method == "ComBatSeq") { |
|
148 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
149 |
+ } else { |
|
150 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after", |
|
151 |
+ logNorm = FALSE) |
|
152 |
+ } |
|
149 | 153 |
} else if (matType == "altExp") { |
150 | 154 |
# Doing log, because only Seurat returns altExp, |
151 | 155 |
# and the assay inside is not logged |
... | ... |
@@ -1,3 +1,193 @@ |
1 |
+.searchBCDefaultInfo <- function(inSCE, corrMat, origAssay, matType) { |
|
2 |
+ if (is.null(origAssay)) { |
|
3 |
+ if ("counts" %in% expDataNames(inSCE)) { |
|
4 |
+ origAssay <- "counts" |
|
5 |
+ } else { |
|
6 |
+ origAssay <- expDataNames(inSCE)[1] |
|
7 |
+ } |
|
8 |
+ warning("using '", origAssay, "' for comparison.") |
|
9 |
+ } |
|
10 |
+ |
|
11 |
+ if (is.null(matType)) { |
|
12 |
+ if (corrMat %in% SummarizedExperiment::assayNames(inSCE)) { |
|
13 |
+ matType <- "assay" |
|
14 |
+ } else if (corrMat %in% SingleCellExperiment::altExpNames(inSCE)) { |
|
15 |
+ matType <- "altExp" |
|
16 |
+ } else if (corrMat %in% SingleCellExperiment::reducedDimNames(inSCE)) { |
|
17 |
+ matType <- "reducedDim" |
|
18 |
+ } else { |
|
19 |
+ stop("Corrected Matrix name '", corrMat, "' not found in inSCE") |
|
20 |
+ } |
|
21 |
+ } |
|
22 |
+ |
|
23 |
+ return(c(origAssay, matType)) |
|
24 |
+} |
|
25 |
+ |
|
26 |
+.checkBCMeta <- function(inSCE, corrMat, origAssay, origLogged, method, matType, |
|
27 |
+ batch, condition) { |
|
28 |
+ if (!is.null(matType)) { |
|
29 |
+ if (!matType %in% c("assay", "altExp", "reducedDim")) { |
|
30 |
+ stop("Wrong matrix type '", matType, "'. Choose from 'assay', 'altExp', ", |
|
31 |
+ "'reducedDim'.") |
|
32 |
+ } |
|
33 |
+ } |
|
34 |
+ if (!"batchCorr" %in% names(S4Vectors::metadata(inSCE))) { |
|
35 |
+ warning("Batch correction result from SCTK not found.") |
|
36 |
+ s <- .searchBCDefaultInfo(inSCE, corrMat, origAssay, matType) |
|
37 |
+ origAssay <- ifelse(is.null(origAssay), s[1], origAssay) |
|
38 |
+ method <- ifelse(is.null(method), "Unidentified Method", method) |
|
39 |
+ matType <- ifelse(is.null(matType), s[2], matType) |
|
40 |
+ } else { |
|
41 |
+ if (!corrMat %in% names(S4Vectors::metadata(inSCE)$batchCorr)) { |
|
42 |
+ warning("'", corrMat, "' not identified as a Batch correction result ", |
|
43 |
+ "from SCTK") |
|
44 |
+ s <- .searchBCDefaultInfo(inSCE, corrMat, origAssay, matType) |
|
45 |
+ origAssay <- ifelse(is.null(origAssay), s[1], origAssay) |
|
46 |
+ method <- ifelse(is.null(method), "Unidentified Method", method) |
|
47 |
+ matType <- ifelse(is.null(matType), s[2], matType) |
|
48 |
+ } else { |
|
49 |
+ bcInfo <- S4Vectors::metadata(inSCE)$batchCorr[[corrMat]] |
|
50 |
+ origAssay <- ifelse(is.null(origAssay), bcInfo$useAssay, origAssay) |
|
51 |
+ origLogged <- ifelse(is.null(origLogged), bcInfo$origLogged, origLogged) |
|
52 |
+ method <- ifelse(is.null(method), bcInfo$method, method) |
|
53 |
+ if (!is.null(matType) && matType != bcInfo$matType) { |
|
54 |
+ warning("User specified matType different from SCTK identified ", |
|
55 |
+ "matType. Force using user specification.") |
|
56 |
+ } |
|
57 |
+ matType <- ifelse(is.null(matType), bcInfo$matType, matType) |
|
58 |
+ batch <- ifelse(is.null(batch), bcInfo$batch, batch) |
|
59 |
+ if (is.null(condition)) condition <- bcInfo$condition |
|
60 |
+ #condition <- ifelse(is.null(condition), bcInfo$condition, condition) |
|
61 |
+ } |
|
62 |
+ } |
|
63 |
+ return(list(origAssay = origAssay, |
|
64 |
+ origLogged = origLogged, |
|
65 |
+ method = method, |
|
66 |
+ matType = matType, |
|
67 |
+ batch = batch, |
|
68 |
+ condition = condition)) |
|
69 |
+} |
|
70 |
+ |
|
71 |
+#' Plot comparison of batch corrected result against original assay |
|
72 |
+#' @details Four plots will be combined. Two of them are violin/box-plots for |
|
73 |
+#' percent variance explained by the batch variation, and optionally the |
|
74 |
+#' covariate, for original and corrected. The other two are UMAPs of the |
|
75 |
+#' original assay and the correction result matrix. If SCTK batch correction |
|
76 |
+#' methods are performed in advance, this function will automatically detect |
|
77 |
+#' necessary input. Otherwise, users can also customize the input. Future |
|
78 |
+#' improvement might include solution to reduce redundant UMAP calculation. |
|
79 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. |
|
80 |
+#' @param corrMat A single character indicating the name of the corrected matrix. |
|
81 |
+#' @param batch A single character. The name of batch annotation column in |
|
82 |
+#' \code{colData(inSCE)}. |
|
83 |
+#' @param condition A single character. The name of an additional covariate |
|
84 |
+#' annotation column in \code{colData(inSCE)}. |
|
85 |
+#' @param origAssay A single character indicating what the original assay used |
|
86 |
+#' for batch correction is. |
|
87 |
+#' @param origLogged Logical scalar indicating whether \code{origAssay} is |
|
88 |
+#' log-normalized. |
|
89 |
+#' @param method A single character indicating the name of the batch correction |
|
90 |
+#' method. Only used for the titles of plots. |
|
91 |
+#' @param matType A single character indicating the type of the batch correction |
|
92 |
+#' result matrix, choose from \code{"assay"}, \code{"altExp"}, |
|
93 |
+#' \code{"reducedDim"}. |
|
94 |
+#' @return An object of class \code{"gtable"}, combining four \code{ggplot}s. |
|
95 |
+#' @examples |
|
96 |
+#' sceBatches <- scaterlogNormCounts(sceBatches, "logcounts") |
|
97 |
+#' sceBatches <- runLimmaBC(sceBatches) |
|
98 |
+#' plotBatchCorrCompare(sceBatches, "LIMMA", condition = "cell_type") |
|
99 |
+#' @export |
|
100 |
+#' @author Yichen Wang |
|
101 |
+plotBatchCorrCompare <- function(inSCE, corrMat, batch = NULL, condition = NULL, |
|
102 |
+ origAssay = NULL, origLogged = NULL, method = NULL, |
|
103 |
+ matType = NULL) { |
|
104 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
105 |
+ stop("\"inSCE\" should be a SingleCellExperiment Object.") |
|
106 |
+ } |
|
107 |
+ m <- .checkBCMeta(inSCE, corrMat, origAssay, origLogged, method, matType, |
|
108 |
+ batch, condition) |
|
109 |
+ origAssay <- m$origAssay |
|
110 |
+ origLogged <- m$origLogged |
|
111 |
+ method <- m$method |
|
112 |
+ matType <- m$matType |
|
113 |
+ batch <- m$batch |
|
114 |
+ condition <- m$condition |
|
115 |
+ |
|
116 |
+ if (isFALSE(origLogged)) { |
|
117 |
+ inSCE <- scaterlogNormCounts(inSCE, origAssay, origAssay) |
|
118 |
+ } |
|
119 |
+ |
|
120 |
+ # Batch Variance Plot for origAssay |
|
121 |
+ bv.before <- plotBatchVariance(inSCE, useAssay = origAssay, useReddim = NULL, |
|
122 |
+ useAltExp = NULL, batch = batch, |
|
123 |
+ condition = condition, |
|
124 |
+ title = "Batch Variance before correction") + |
|
125 |
+ ggplot2::theme(text=ggplot2::element_text(size=10)) |
|
126 |
+ |
|
127 |
+ inSCE <- getUMAP(inSCE, useAssay = origAssay, reducedDimName = "umap.before") |
|
128 |
+ umap.before <- plotSCEDimReduceColData(inSCE, batch, "umap.before", |
|
129 |
+ shape = condition, axisLabelSize = 9, |
|
130 |
+ axisSize = 8, dotSize = 1, |
|
131 |
+ titleSize = 12, labelClusters = FALSE, |
|
132 |
+ legendSize = 10, legendTitle = "batch", |
|
133 |
+ legendTitleSize = 10, |
|
134 |
+ title = "UMAP before correction") |
|
135 |
+ |
|
136 |
+ if (matType == "assay") { |
|
137 |
+ if (isFALSE(origLogged)) { |
|
138 |
+ inSCE <- scaterlogNormCounts(inSCE, corrMat, corrMat) |
|
139 |
+ } |
|
140 |
+ # Batch Variance Plot for CorrMat |
|
141 |
+ bv.after <- plotBatchVariance(inSCE, useAssay = corrMat, batch = batch, |
|
142 |
+ condition = condition, |
|
143 |
+ title = paste0("Batch Variance corrected with ", |
|
144 |
+ method)) + |
|
145 |
+ ggplot2::theme(text=ggplot2::element_text(size=10)) |
|
146 |
+ |
|
147 |
+ |
|
148 |
+ inSCE <- getUMAP(inSCE, useAssay = corrMat, reducedDimName = "umap.after") |
|
149 |
+ } else if (matType == "altExp") { |
|
150 |
+ # Doing log, because only Seurat returns altExp, |
|
151 |
+ # and the assay inside is not logged |
|
152 |
+ ae <- SingleCellExperiment::altExp(inSCE, corrMat) |
|
153 |
+ ae <- scaterlogNormCounts(ae, corrMat, corrMat) |
|
154 |
+ SingleCellExperiment::altExp(inSCE, corrMat) <- ae |
|
155 |
+ bv.after <- plotBatchVariance(inSCE, useAltExp = corrMat, batch = batch, |
|
156 |
+ condition = condition, |
|
157 |
+ title = paste0("Batch Variance corrected with ", |
|
158 |
+ method)) + |
|
159 |
+ ggplot2::theme(text=ggplot2::element_text(size=10)) |
|
160 |
+ inSCE <- getUMAP(inSCE, useAltExp = corrMat, useAssay = corrMat, |
|
161 |
+ reducedDimName = "umap.after") |
|
162 |
+ } else if (matType == "reducedDim") { |
|
163 |
+ bv.after <- plotBatchVariance(inSCE, useReddim = corrMat, batch = batch, |
|
164 |
+ condition = condition, |
|
165 |
+ title = paste0("Batch Variance corrected with ", |
|
166 |
+ method)) + |
|
167 |
+ ggplot2::theme(text=ggplot2::element_text(size=10)) |
|
168 |
+ if (method == "BBKNN") { |
|
169 |
+ SingleCellExperiment::reducedDim(inSCE, "umap.after") <- |
|
170 |
+ SingleCellExperiment::reducedDim(inSCE, corrMat) |
|
171 |
+ } else { |
|
172 |
+ inSCE <- getUMAP(inSCE, useAssay = NULL, useReducedDim = corrMat, |
|
173 |
+ reducedDimName = "umap.after") |
|
174 |
+ } |
|
175 |
+ } else { |
|
176 |
+ stop("Cannot identify result matrix type") |
|
177 |
+ } |
|
178 |
+ umap.after <- plotSCEDimReduceColData(inSCE, batch, "umap.after", dim1 = 1, |
|
179 |
+ dim2 = 2, |
|
180 |
+ shape = condition, axisLabelSize = 9, |
|
181 |
+ axisSize = 8, dotSize = 1, |
|
182 |
+ titleSize = 12, labelClusters = FALSE, |
|
183 |
+ legendSize = 10, legendTitle = "batch", |
|
184 |
+ legendTitleSize = 10, |
|
185 |
+ title = "UMAP after correction") + |
|
186 |
+ ggplot2::theme(text=ggplot2::element_text(size=8)) |
|
187 |
+ return(gridExtra::grid.arrange(bv.before, bv.after, |
|
188 |
+ umap.before, umap.after, nrow = 2)) |
|
189 |
+} |
|
190 |
+ |
|
1 | 191 |
#' Plot the percent of the variation that is explained by batch and condition |
2 | 192 |
#' in the data |
3 | 193 |
#' |
... | ... |
@@ -101,23 +291,27 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
101 | 291 |
explainedVariation <- round(cbind(`Full (Condition+Batch)` = r2Full, |
102 | 292 |
Condition = condR2, |
103 | 293 |
Batch = batchR2), 5) * 100 |
104 |
- colnames(explainedVariation) <- c('Full (Condition+Batch)', |
|
105 |
- paste("Condition:", condition), |
|
106 |
- paste("Batch:", batch)) |
|
294 |
+ colnames(explainedVariation) <- c("Full", |
|
295 |
+ ifelse(is.null(condition), "No Condition", condition), |
|
296 |
+ batch) |
|
107 | 297 |
exVarM <- reshape2::melt(explainedVariation) |
108 | 298 |
colnames(exVarM) <- c("Gene", "Model", "Percent.Explained.Variation") |
109 | 299 |
exVarM$Model <- factor(exVarM$Model) |
110 | 300 |
a <- ggplot2::ggplot(exVarM, |
111 | 301 |
ggplot2::aes_string("Model", |
112 | 302 |
"Percent.Explained.Variation")) + |
113 |
- ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
114 |
- ggplot2::geom_boxplot(width = .1) + |
|
115 |
- ggplot2::xlab("Model") + |
|
116 |
- ggplot2::ylab("Percent Explained Variation") + |
|
117 |
- ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
118 |
- guide = FALSE) + |
|
119 |
- ggplot2::ggtitle(title) |
|
120 |
- a <- .ggSCTKTheme(a) |
|
303 |
+ ggplot2::geom_point(position = ggplot2::position_jitter(width = 0.2), |
|
304 |
+ size = 1, alpha = 0.9) + |
|
305 |
+ ggplot2::geom_violin(ggplot2::aes_string(fill = "Model"), alpha = 0.7, ) + |
|
306 |
+ ggplot2::geom_boxplot(alpha = 0.4, width = 0.2) + |
|
307 |
+ ggplot2::ylim(0, 100) + |
|
308 |
+ ggplot2::xlab("Model") + |
|
309 |
+ ggplot2::ylab("Explained Variation %") + |
|
310 |
+ ggplot2::ggtitle(title) + |
|
311 |
+ ggplot2::theme_bw() + |
|
312 |
+ ggplot2::theme(legend.position = "none", |
|
313 |
+ panel.grid.major = ggplot2::element_blank(), |
|
314 |
+ panel.grid.minor = ggplot2::element_blank()) |
|
121 | 315 |
return(a) |
122 | 316 |
} |
123 | 317 |
|
... | ... |
@@ -30,13 +30,11 @@ |
30 | 30 |
#' condition, and batch+condition. |
31 | 31 |
#' @export |
32 | 32 |
#' @examples |
33 |
-#' \dontrun{ |
|
34 |
-#' data('sceBatches', package = 'singleCellTK') |
|
35 |
-#' plotBatchVariance(sceBatches, |
|
36 |
-#' useAssay="logcounts", |
|
37 |
-#' batch="batch", |
|
38 |
-#' condition = "cell_type") |
|
39 |
-#' } |
|
33 |
+#' data('sceBatches', package = 'singleCellTK') |
|
34 |
+#' plotBatchVariance(sceBatches, |
|
35 |
+#' useAssay="counts", |
|
36 |
+#' batch="batch", |
|
37 |
+#' condition = "cell_type") |
|
40 | 38 |
plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
41 | 39 |
useAltExp = NULL, batch = 'batch', |
42 | 40 |
condition=NULL, title = NULL) { |
... | ... |
@@ -171,10 +169,8 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
171 | 169 |
#' @param ylab label for y-axis. Default \code{"Feature Mean"}. |
172 | 170 |
#' @param ... Additional arguments passed to \code{\link{.ggViolin}}. |
173 | 171 |
#' @examples |
174 |
-#' \dontrun{ |
|
175 |
-#' data('sceBatches', package = 'singleCellTK') |
|
176 |
-#' plotSCEBatchFeatureMean(sceBatches, useAssay = "counts") |
|
177 |
-#' } |
|
172 |
+#' data('sceBatches', package = 'singleCellTK') |
|
173 |
+#' plotSCEBatchFeatureMean(sceBatches, useAssay = "counts") |
|
178 | 174 |
#' @return ggplot |
179 | 175 |
#' @export |
180 | 176 |
plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
... | ... |
@@ -31,18 +31,12 @@ |
31 | 31 |
#' @export |
32 | 32 |
#' @examples |
33 | 33 |
#' \dontrun{ |
34 |
-#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
35 |
-#' library(bladderbatch) |
|
36 |
-#' data(bladderdata) |
|
37 |
-#' dat <- as(as(bladderEset, "SummarizedExperiment"), |
|
38 |
-#' "SingleCellExperiment") |
|
39 |
-#' plotBatchVariance(dat, |
|
40 |
-#' useAssay="exprs", |
|
41 |
-#' batch="batch", |
|
42 |
-#' condition = "cancer") |
|
43 |
-#' } |
|
34 |
+#' data('sceBatches', package = 'singleCellTK') |
|
35 |
+#' plotBatchVariance(sceBatches, |
|
36 |
+#' useAssay="logcounts", |
|
37 |
+#' batch="batch", |
|
38 |
+#' condition = "cell_type") |
|
44 | 39 |
#' } |
45 |
-#' |
|
46 | 40 |
plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
47 | 41 |
useAltExp = NULL, batch = 'batch', |
48 | 42 |
condition=NULL, title = NULL) { |
... | ... |
@@ -177,8 +171,10 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
177 | 171 |
#' @param ylab label for y-axis. Default \code{"Feature Mean"}. |
178 | 172 |
#' @param ... Additional arguments passed to \code{\link{.ggViolin}}. |
179 | 173 |
#' @examples |
180 |
-#' data('sceBatches', package = 'singleCellTK') |
|
181 |
-#' plotSCEBatchFeatureMean(sceBatches, useAssay = "logcounts") |
|
174 |
+#' \dontrun{ |
|
175 |
+#' data('sceBatches', package = 'singleCellTK') |
|
176 |
+#' plotSCEBatchFeatureMean(sceBatches, useAssay = "counts") |
|
177 |
+#' } |
|
182 | 178 |
#' @return ggplot |
183 | 179 |
#' @export |
184 | 180 |
plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
... | ... |
@@ -34,8 +34,12 @@ |
34 | 34 |
#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
35 | 35 |
#' library(bladderbatch) |
36 | 36 |
#' data(bladderdata) |
37 |
-#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SingleCellExperiment") |
|
38 |
-#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
37 |
+#' dat <- as(as(bladderEset, "SummarizedExperiment"), |
|
38 |
+#' "SingleCellExperiment") |
|
39 |
+#' plotBatchVariance(dat, |
|
40 |
+#' useAssay="exprs", |
|
41 |
+#' batch="batch", |
|
42 |
+#' condition = "cancer") |
|
39 | 43 |
#' } |
40 | 44 |
#' } |
41 | 45 |
#' |
... | ... |
@@ -172,6 +172,9 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
172 | 172 |
#' @param xlab label for x-axis. Default \code{"batch"}. |
173 | 173 |
#' @param ylab label for y-axis. Default \code{"Feature Mean"}. |
174 | 174 |
#' @param ... Additional arguments passed to \code{\link{.ggViolin}}. |
175 |
+#' @examples |
|
176 |
+#' data('sceBatches', package = 'singleCellTK') |
|
177 |
+#' plotSCEBatchFeatureMean(sceBatches, useAssay = "logcounts") |
|
175 | 178 |
#' @return ggplot |
176 | 179 |
#' @export |
177 | 180 |
plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
... | ... |
@@ -220,7 +220,7 @@ plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
220 | 220 |
allMeans <- c(allMeans, DelayedArray::rowMeans(mat[,batchCol == i])) |
221 | 221 |
groupBy <- c(groupBy, rep(i, nrow(mat))) |
222 | 222 |
} |
223 |
- p <- .ggViolin(allMeans, groupby = groupBy, xlab = xlab, ylab = ylab, ...) |
|
223 |
+ p <- .ggViolin(allMeans, groupBy = groupBy, xlab = xlab, ylab = ylab, ...) |
|
224 | 224 |
p <- .ggSCTKTheme(p) |
225 | 225 |
return(p) |
226 | 226 |
} |
... | ... |
@@ -24,6 +24,8 @@ |
24 | 24 |
#' \code{colData(inSCE)}. Default \code{"batch"}. |
25 | 25 |
#' @param condition A single character. The name of an additional condition |
26 | 26 |
#' annotation column in \code{colData(inSCE)}. Default \code{NULL}. |
27 |
+#' @param title A single character. The title text on the top. Default |
|
28 |
+#' \code{NULL}. |
|
27 | 29 |
#' @return A ggplot object of a boxplot of variation explained by batch, |
28 | 30 |
#' condition, and batch+condition. |
29 | 31 |
#' @export |
... | ... |
@@ -39,7 +41,7 @@ |
39 | 41 |
#' |
40 | 42 |
plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
41 | 43 |
useAltExp = NULL, batch = 'batch', |
42 |
- condition=NULL) { |
|
44 |
+ condition=NULL, title = NULL) { |
|
43 | 45 |
if(!inherits(inSCE, 'SingleCellExperiment')){ |
44 | 46 |
stop("'inSCE' must inherit from 'SingleCellExperiment'.") |
45 | 47 |
} |
... | ... |
@@ -117,7 +119,8 @@ plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
117 | 119 |
ggplot2::xlab("Model") + |
118 | 120 |
ggplot2::ylab("Percent Explained Variation") + |
119 | 121 |
ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
120 |
- guide = FALSE) |
|
122 |
+ guide = FALSE) + |
|
123 |
+ ggplot2::ggtitle(title) |
|
121 | 124 |
a <- .ggSCTKTheme(a) |
122 | 125 |
return(a) |
123 | 126 |
} |
... | ... |
@@ -2,18 +2,30 @@ |
2 | 2 |
#' in the data |
3 | 3 |
#' |
4 | 4 |
#' Visualize the percent variation in the data that is explained by batch and |
5 |
-#' condition if it is given. |
|
5 |
+#' condition, individually, and that explained by combining both annotations. |
|
6 |
+#' Plotting only the variation explained by batch is supported but not |
|
7 |
+#' recommended, because this can be confounded by potential condition. |
|
6 | 8 |
#' |
7 |
-#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
|
8 |
-#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
|
9 |
-#' @param batch The column in the annotation data that corresponds to batch. |
|
10 |
-#' Required |
|
11 |
-#' @param condition The column in the annotation data that corresponds to |
|
12 |
-#' condition. Optional |
|
13 |
-#' @param pcInput A logical scalar indicating whether \code{useAssay} is in |
|
14 |
-#' \code{names(reducedDims(inSCE))}. Default \code{FALSE}. |
|
15 |
-#' @return A boxplot of variation explained by batch, condition, and |
|
16 |
-#' batch+condition (if applicable). |
|
9 |
+#' When condition and batch both are causing some variation, if the difference |
|
10 |
+#' between full variation and condition variation is close to batch variation, |
|
11 |
+#' this might imply that batches are causing some effect; if the difference is |
|
12 |
+#' much less than batch variation, then the batches are likely to be confounded |
|
13 |
+#' by the conditions. |
|
14 |
+#' |
|
15 |
+#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. |
|
16 |
+#' @param useAssay A single character. The name of the assay that stores the |
|
17 |
+#' value to plot. For \code{useReddim} and \code{useAltExp} also. Default |
|
18 |
+#' \code{NULL}. |
|
19 |
+#' @param useReddim A single character. The name of the dimension reduced |
|
20 |
+#' matrix that stores the value to plot. Default \code{NULL}. |
|
21 |
+#' @param useAltExp A single character. The name of the alternative experiment |
|
22 |
+#' that stores an assay of the value to plot. Default \code{NULL}. |
|
23 |
+#' @param batch A single character. The name of batch annotation column in |
|
24 |
+#' \code{colData(inSCE)}. Default \code{"batch"}. |
|
25 |
+#' @param condition A single character. The name of an additional condition |
|
26 |
+#' annotation column in \code{colData(inSCE)}. Default \code{NULL}. |
|
27 |
+#' @return A ggplot object of a boxplot of variation explained by batch, |
|
28 |
+#' condition, and batch+condition. |
|
17 | 29 |
#' @export |
18 | 30 |
#' @examples |
19 | 31 |
#' \dontrun{ |
... | ... |
@@ -25,27 +37,59 @@ |
25 | 37 |
#' } |
26 | 38 |
#' } |
27 | 39 |
#' |
28 |
-plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
|
29 |
- condition=NULL, pcInput = FALSE){ |
|
30 |
- if(isTRUE(pcInput)){ |
|
31 |
- mat <- t(SingleCellExperiment::reducedDim(inSCE, useAssay)) |
|
32 |
- } else { |
|
40 |
+plotBatchVariance <- function(inSCE, useAssay = NULL, useReddim = NULL, |
|
41 |
+ useAltExp = NULL, batch = 'batch', |
|
42 |
+ condition=NULL) { |
|
43 |
+ if(!inherits(inSCE, 'SingleCellExperiment')){ |
|
44 |
+ stop("'inSCE' must inherit from 'SingleCellExperiment'.") |
|
45 |
+ } |
|
46 |
+ if(is.null(useAssay) + is.null(useReddim) + is.null(useAltExp) != 2){ |
|
47 |
+ stop("One and only one of `useAssay`, `useReddim`, ", |
|
48 |
+ "`usAltExp` has to be specified.") |
|
49 |
+ } |
|
50 |
+ if(!is.null(useAssay)){ |
|
51 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){ |
|
52 |
+ stop("'useAssay' not found in 'inSCE'.") |
|
53 |
+ } |
|
33 | 54 |
mat <- SummarizedExperiment::assay(inSCE, useAssay) |
34 | 55 |
} |
56 |
+ if(!is.null(useReddim)){ |
|
57 |
+ if(!useReddim %in% SingleCellExperiment::reducedDimNames(inSCE)){ |
|
58 |
+ stop("'useReddim not found in 'inSCE'.") |
|
59 |
+ } |
|
60 |
+ mat <- t(SingleCellExperiment::reducedDim(inSCE, useReddim)) |
|
61 |
+ } |
|
62 |
+ if(!is.null(useAltExp)){ |
|
63 |
+ if(!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)){ |
|
64 |
+ stop("'useAltExp not found in 'inSCE'.") |
|
65 |
+ } |
|
66 |
+ ae <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
67 |
+ mat <- SummarizedExperiment::assay(ae) |
|
68 |
+ } |
|
69 |
+ if(is.null(batch)){ |
|
70 |
+ stop("Batch annotation has to be given.") |
|
71 |
+ } else{ |
|
72 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
73 |
+ stop("'batch' not found in 'inSCE'.") |
|
74 |
+ } |
|
75 |
+ } |
|
76 |
+ if(!inherits(mat, 'matrix')){ |
|
77 |
+ mat <- as.matrix(mat) |
|
78 |
+ } |
|
35 | 79 |
batchCol <- SummarizedExperiment::colData(inSCE)[, batch] |
36 | 80 |
nlb <- nlevels(as.factor(batchCol)) |
37 | 81 |
if (nlb <= 1){ |
38 |
- batchMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
82 |
+ stop("No more than one batch found in specified annotation") |
|
39 | 83 |
} else { |
40 | 84 |
batchMod <- stats::model.matrix(~as.factor(batchCol)) |
41 | 85 |
} |
42 |
- condCol <- SingleCellExperiment::colData(inSCE)[, condition] |
|
43 | 86 |
if (is.null(condition)){ |
44 |
- stop("condition required for now") |
|
87 |
+ condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
|
45 | 88 |
} else { |
89 |
+ condCol <- SingleCellExperiment::colData(inSCE)[, condition] |
|
46 | 90 |
nlc <- nlevels(as.factor(condCol)) |
47 | 91 |
if (nlc <= 1){ |
48 |
- condMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
92 |
+ condMod <- matrix(rep(1, ncol(mat)), ncol = 1) |
|
49 | 93 |
} else { |
50 | 94 |
condMod <- stats::model.matrix(~as.factor(condCol)) |
51 | 95 |
} |
... | ... |
@@ -59,16 +103,22 @@ plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
59 | 103 |
explainedVariation <- round(cbind(`Full (Condition+Batch)` = r2Full, |
60 | 104 |
Condition = condR2, |
61 | 105 |
Batch = batchR2), 5) * 100 |
106 |
+ colnames(explainedVariation) <- c('Full (Condition+Batch)', |
|
107 |
+ paste("Condition:", condition), |
|
108 |
+ paste("Batch:", batch)) |
|
62 | 109 |
exVarM <- reshape2::melt(explainedVariation) |
63 | 110 |
colnames(exVarM) <- c("Gene", "Model", "Percent.Explained.Variation") |
64 | 111 |
exVarM$Model <- factor(exVarM$Model) |
65 |
- a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Percent.Explained.Variation")) + |
|
66 |
- ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
67 |
- ggplot2::geom_boxplot(width = .1) + |
|
68 |
- ggplot2::xlab("Model") + |
|
69 |
- ggplot2::ylab("Percent Explained Variation") + |
|
70 |
- ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
71 |
- guide = FALSE) |
|
112 |
+ a <- ggplot2::ggplot(exVarM, |
|
113 |
+ ggplot2::aes_string("Model", |
|
114 |
+ "Percent.Explained.Variation")) + |
|
115 |
+ ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
116 |
+ ggplot2::geom_boxplot(width = .1) + |
|
117 |
+ ggplot2::xlab("Model") + |
|
118 |
+ ggplot2::ylab("Percent Explained Variation") + |
|
119 |
+ ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
120 |
+ guide = FALSE) |
|
121 |
+ a <- .ggSCTKTheme(a) |
|
72 | 122 |
return(a) |
73 | 123 |
} |
74 | 124 |
|
... | ... |
@@ -107,14 +157,15 @@ plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
107 | 157 |
|
108 | 158 |
#' Plot mean feature value in each batch of a SingleCellExperiment object |
109 | 159 |
#' @param inSCE \linkS4class{SingleCellExperiment} inherited object. |
110 |
-#' @param useAssay The name of the assay that stores the value to plot. Use |
|
111 |
-#' \code{useReddim} for dimension reduced matrix instead. Default \code{NULL}. |
|
112 |
-#' @param useReddim The name of the dimension reduced matrix that stores the |
|
113 |
-#' value to plot. Default \code{NULL}. |
|
114 |
-#' @param useAltExp The name of the alternative experiment that stores an assay |
|
115 |
-#' of the value to plot. Default \code{NULL}. |
|
116 |
-#' @param batch The column name of \code{colData(inSCE)} that indicates the |
|
117 |
-#' batch annotation. Default \code{"batch"}. |
|
160 |
+#' @param useAssay A single character. The name of the assay that stores the |
|
161 |
+#' value to plot. For \code{useReddim} and \code{useAltExp} also. Default |
|
162 |
+#' \code{NULL}. |
|
163 |
+#' @param useReddim A single character. The name of the dimension reduced |
|
164 |
+#' matrix that stores the value to plot. Default \code{NULL}. |
|
165 |
+#' @param useAltExp A single character. The name of the alternative experiment |
|
166 |
+#' that stores an assay of the value to plot. Default \code{NULL}. |
|
167 |
+#' @param batch A single character. The name of batch annotation column in |
|
168 |
+#' \code{colData(inSCE)}. Default \code{"batch"}. |
|
118 | 169 |
#' @param xlab label for x-axis. Default \code{"batch"}. |
119 | 170 |
#' @param ylab label for y-axis. Default \code{"Feature Mean"}. |
120 | 171 |
#' @param ... Additional arguments passed to \code{\link{.ggViolin}}. |
... | ... |
@@ -166,7 +166,6 @@ plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
166 | 166 |
allMeans <- c(allMeans, DelayedArray::rowMeans(mat[,batchCol == i])) |
167 | 167 |
groupBy <- c(groupBy, rep(i, nrow(mat))) |
168 | 168 |
} |
169 |
- print(allMeans) |
|
170 | 169 |
p <- .ggViolin(allMeans, groupby = groupBy, xlab = xlab, ylab = ylab, ...) |
171 | 170 |
p <- .ggSCTKTheme(p) |
172 | 171 |
return(p) |
... | ... |
@@ -155,14 +155,19 @@ plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
155 | 155 |
stop("'batch' not found in 'inSCE'.") |
156 | 156 |
} |
157 | 157 |
} |
158 |
+ if(!inherits(mat, 'matrix')){ |
|
159 |
+ mat <- as.matrix(mat) |
|
160 |
+ } |
|
158 | 161 |
batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
159 | 162 |
uniqBatch <- as.vector(unique(batchCol)) #as.vector in case batchCol is factor |
160 | 163 |
allMeans <- numeric() |
161 | 164 |
groupBy <- character() |
162 | 165 |
for(i in uniqBatch){ |
163 | 166 |
allMeans <- c(allMeans, DelayedArray::rowMeans(mat[,batchCol == i])) |
164 |
- groupBy <- c(groupBy, rep(i, nrow(inSCE))) |
|
167 |
+ groupBy <- c(groupBy, rep(i, nrow(mat))) |
|
165 | 168 |
} |
169 |
+ print(allMeans) |
|
166 | 170 |
p <- .ggViolin(allMeans, groupby = groupBy, xlab = xlab, ylab = ylab, ...) |
171 |
+ p <- .ggSCTKTheme(p) |
|
167 | 172 |
return(p) |
168 | 173 |
} |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#' Visualize the percent variation in the data that is explained by batch and |
5 | 5 |
#' condition if it is given. |
6 | 6 |
#' |
7 |
-#' @param inSCE Input SCtkExperiment object. Required |
|
7 |
+#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
|
8 | 8 |
#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
9 | 9 |
#' @param batch The column in the annotation data that corresponds to batch. |
10 | 10 |
#' Required |
... | ... |
@@ -20,7 +20,7 @@ |
20 | 20 |
#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
21 | 21 |
#' library(bladderbatch) |
22 | 22 |
#' data(bladderdata) |
23 |
-#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
23 |
+#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SingleCellExperiment") |
|
24 | 24 |
#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
25 | 25 |
#' } |
26 | 26 |
#' } |
... | ... |
@@ -16,11 +16,13 @@ |
16 | 16 |
#' batch+condition (if applicable). |
17 | 17 |
#' @export |
18 | 18 |
#' @examples |
19 |
-#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
20 |
-#' library(bladderbatch) |
|
21 |
-#' data(bladderdata) |
|
22 |
-#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
23 |
-#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
19 |
+#' \dontrun{ |
|
20 |
+#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
21 |
+#' library(bladderbatch) |
|
22 |
+#' data(bladderdata) |
|
23 |
+#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
24 |
+#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
25 |
+#' } |
|
24 | 26 |
#' } |
25 | 27 |
#' |
26 | 28 |
plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
... | ... |
@@ -49,8 +51,8 @@ plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
49 | 51 |
} |
50 | 52 |
} |
51 | 53 |
mod <- cbind(condMod, batchMod[, -1]) |
52 |
- condTest <- batchqc_f.pvalue(mat, mod, batchMod) |
|
53 |
- batchTest <- batchqc_f.pvalue(mat, mod, condMod) |
|
54 |
+ condTest <- .batchqc_f.pvalue(mat, mod, batchMod) |
|
55 |
+ batchTest <- .batchqc_f.pvalue(mat, mod, condMod) |
|
54 | 56 |
r2Full <- condTest$r2Full |
55 | 57 |
condR2 <- batchTest$r2Reduced |
56 | 58 |
batchR2 <- condTest$r2Reduced |
... | ... |
@@ -70,7 +70,7 @@ plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
70 | 70 |
return(a) |
71 | 71 |
} |
72 | 72 |
|
73 |
-batchqc_f.pvalue <- function(dat, mod, mod0) { |
|
73 |
+.batchqc_f.pvalue <- function(dat, mod, mod0) { |
|
74 | 74 |
# F-test (full/reduced model) and returns R2 values |
75 | 75 |
# (full/reduced) as well. |
76 | 76 |
mod00 <- matrix(rep(1, ncol(dat)), ncol = 1) |
... | ... |
@@ -163,4 +163,4 @@ plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
163 | 163 |
} |
164 | 164 |
p <- .ggViolin(allMeans, groupby = groupBy, xlab = xlab, ylab = ylab, ...) |
165 | 165 |
return(p) |
166 |
-} |
|
167 | 166 |
\ No newline at end of file |
167 |
+} |
... | ... |
@@ -109,6 +109,8 @@ batchqc_f.pvalue <- function(dat, mod, mod0) { |
109 | 109 |
#' \code{useReddim} for dimension reduced matrix instead. Default \code{NULL}. |
110 | 110 |
#' @param useReddim The name of the dimension reduced matrix that stores the |
111 | 111 |
#' value to plot. Default \code{NULL}. |
112 |
+#' @param useAltExp The name of the alternative experiment that stores an assay |
|
113 |
+#' of the value to plot. Default \code{NULL}. |
|
112 | 114 |
#' @param batch The column name of \code{colData(inSCE)} that indicates the |
113 | 115 |
#' batch annotation. Default \code{"batch"}. |
114 | 116 |
#' @param xlab label for x-axis. Default \code{"batch"}. |
... | ... |
@@ -117,14 +119,13 @@ batchqc_f.pvalue <- function(dat, mod, mod0) { |
117 | 119 |
#' @return ggplot |
118 | 120 |
#' @export |
119 | 121 |
plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
120 |
- batch = 'batch', xlab='batch', ylab='Feature Mean', ...){ |
|
122 |
+ useAltExp = NULL, batch = 'batch', xlab='batch', ylab='Feature Mean', ...){ |
|
121 | 123 |
if(!inherits(inSCE, 'SingleCellExperiment')){ |
122 | 124 |
stop("'inSCE' must inherit from 'SingleCellExperiment'.") |
123 | 125 |
} |
124 |
- if(is.null(useAssay) & is.null(useReddim)){ |
|
125 |
- stop("Either `useAssay` or `useReddim` has to be specified.") |
|
126 |
- } else if(!is.null(useAssay) & !is.null(useReddim)){ |
|
127 |
- stop("Only one of `useAssay` and `useReddim` can be specified.") |
|
126 |
+ if(is.null(useAssay) + is.null(useReddim) + is.null(useAltExp) != 2){ |
|
127 |
+ stop("One and only one of `useAssay`, `useReddim`, ", |
|
128 |
+ "`usAltExp` has to be specified.") |
|
128 | 129 |
} |
129 | 130 |
if(!is.null(useAssay)){ |
130 | 131 |
if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)){ |
... | ... |
@@ -138,6 +139,13 @@ plotSCEBatchFeatureMean <- function(inSCE, useAssay = NULL, useReddim = NULL, |
138 | 139 |
} |
139 | 140 |
mat <- t(SingleCellExperiment::reducedDim(inSCE, useReddim)) |
140 | 141 |
} |
142 |
+ if(!is.null(useAltExp)){ |
|
143 |
+ if(!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)){ |
|
144 |
+ stop("'useAltExp not found in 'inSCE'.") |
|
145 |
+ } |
|
146 |
+ ae <- SingleCellExperiment::altExp(inSCE, useAltExp) |
|
147 |
+ mat <- SummarizedExperiment::assay(ae) |
|
148 |
+ } |
|
141 | 149 |
if(is.null(batch)){ |
142 | 150 |
stop("Batch annotation has to be given.") |
143 | 151 |
} else{ |
... | ... |
@@ -1,104 +1,104 @@ |
1 |
-#' Plot the percent of the variation that is explained by batch and condition |
|
2 |
-#' in the data |
|
3 |
-#' |
|
4 |
-#' Visualize the percent variation in the data that is explained by batch and |
|
5 |
-#' condition if it is given. |
|
6 |
-#' |
|
7 |
-#' @param inSCE Input SCtkExperiment object. Required |
|
8 |
-#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
|
9 |
-#' @param batch The column in the annotation data that corresponds to batch. |
|
10 |
-#' Required |
|
11 |
-#' @param condition The column in the annotation data that corresponds to |
|
12 |
-#' condition. Optional |
|
13 |
-#' |
|
14 |
-#' @return A boxplot of variation explained by batch, condition, and |
|
15 |
-#' batch+condition (if applicable). |
|
16 |
-#' @export |
|
17 |
-#' @examples |
|
18 |
-#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
19 |
-#' library(bladderbatch) |
|
20 |
-#' data(bladderdata) |
|
21 |
-#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
22 |
-#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
23 |
-#' } |
|
24 |
-#' |
|
25 |
-plotBatchVariance <- function(inSCE, useAssay="logcounts", batch, |
|
26 |
- condition=NULL){ |
|
27 |
- nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCE)[, batch])) |
|
28 |
- if (nlb <= 1){ |
|
29 |
- batchMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
30 |
- } else { |
|
31 |
- batchMod <- stats::model.matrix( |
|
32 |
- ~as.factor(SingleCellExperiment::colData(inSCE)[, batch])) |
|
33 |
- } |
|
34 |
- if (is.null(condition)){ |
|
35 |
- stop("condition required for now") |
|
36 |
- } else { |
|
37 |
- nlc <- nlevels(as.factor( |
|
38 |
- SingleCellExperiment::colData(inSCE)[, condition])) |
|
39 |
- if (nlc <= 1){ |
|
40 |
- condMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
41 |
- } else { |
|
42 |
- condMod <- stats::model.matrix( |
|
43 |
- ~as.factor(SingleCellExperiment::colData(inSCE)[, condition])) |
|
44 |
- } |
|
45 |
- } |
|
46 |
- |
|
47 |
- mod <- cbind(condMod, batchMod[, -1]) |
|
48 |
- |
|
49 |
- condTest <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCE, useAssay), |
|
50 |
- mod, batchMod) |
|
51 |
- batchTest <- batchqc_f.pvalue( |
|
52 |
- SummarizedExperiment::assay(inSCE, useAssay), mod, condMod) |
|
53 |
- |
|
54 |
- r2Full <- condTest$r2Full |
|
55 |
- condR2 <- batchTest$r2Reduced |
|
56 |
- batchR2 <- condTest$r2Reduced |
|
57 |
- explainedVariation <- round(cbind(`Full (Condition+Batch)` = r2Full, |
|
58 |
- Condition = condR2, |
|
59 |
- Batch = batchR2), 5) * 100 |
|
60 |
- exVarM <- reshape2::melt(explainedVariation) |
|
61 |
- colnames(exVarM) <- c("Gene", "Model", "Percent.Explained.Variation") |
|
62 |
- exVarM$Model <- factor(exVarM$Model) |
|
63 |
- a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Percent.Explained.Variation")) + |
|
64 |
- ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
65 |
- ggplot2::geom_boxplot(width = .1) + |
|
66 |
- ggplot2::xlab("Model") + |
|
67 |
- ggplot2::ylab("Percent Explained Variation") + |
|
68 |
- ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
69 |
- guide = FALSE) |
|
70 |
- return(a) |
|
71 |
-} |
|
72 |
- |
|
73 |
-batchqc_f.pvalue <- function(dat, mod, mod0) { |
|
74 |
- # F-test (full/reduced model) and returns R2 values |
|
75 |
- # (full/reduced) as well. |
|
76 |
- mod00 <- matrix(rep(1, ncol(dat)), ncol = 1) |
|
77 |
- n <- dim(dat)[2] |
|
78 |
- m <- dim(dat)[1] |
|
79 |
- df1 <- dim(mod)[2] |
|
80 |
- df0 <- dim(mod0)[2] |
|
81 |
- p <- rep(0, m) |
|
82 |
- |
|
83 |
- resid <- dat - dat %*% mod %*% solve(t(mod) %*% mod) %*% t(mod) |
|
84 |
- rss1 <- rowSums(resid * resid) |
|
85 |
- rm(resid) |
|
86 |
- |
|
87 |
- resid0 <- dat - dat %*% mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0) |
|
88 |
- rss0 <- rowSums(resid0 * resid0) |
|
89 |
- rm(resid0) |
|
90 |
- |
|
91 |
- resid00 <- dat - dat %*% mod00 %*% solve(t(mod00) %*% mod00) %*% t(mod00) |
|
92 |
- rss00 <- rowSums(resid00 * resid00) |
|
93 |
- rm(resid00) |
|
94 |
- |
|
95 |
- r2Full <- 1 - rss1 / rss00 |
|
96 |
- r2Reduced <- 1 - rss0 / rss00 |
|
97 |
- |
|
98 |
- p <- 1 |
|
99 |
- if (df1 > df0) { |
|
100 |
- fstats <- ((rss0 - rss1) / (df1 - df0)) / (rss1 / (n - df1)) |
|
101 |
- p <- 1 - stats::pf(fstats, df1 = (df1 - df0), df2 = (n - df1)) |
|
102 |
- } |
|
103 |
- return(list(p = p, r2Full = r2Full, r2Reduced = r2Reduced)) |
|
104 |
-} |
|
1 |
+#' Plot the percent of the variation that is explained by batch and condition |
|
2 |
+#' in the data |
|
3 |
+#' |
|
4 |
+#' Visualize the percent variation in the data that is explained by batch and |
|
5 |
+#' condition if it is given. |
|
6 |
+#' |
|
7 |
+#' @param inSCE Input SCtkExperiment object. Required |
|
8 |
+#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
|
9 |
+#' @param batch The column in the annotation data that corresponds to batch. |
|
10 |
+#' Required |
|
11 |
+#' @param condition The column in the annotation data that corresponds to |
|
12 |
+#' condition. Optional |
|
13 |
+#' @param pcInput A logical scalar indicating whether \code{useAssay} is in |
|
14 |
+#' \code{names(reducedDims(inSCE))}. Default \code{FALSE}. |
|
15 |
+#' @return A boxplot of variation explained by batch, condition, and |
|
16 |
+#' batch+condition (if applicable). |
|
17 |
+#' @export |
|
18 |
+#' @examples |
|
19 |
+#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
20 |
+#' library(bladderbatch) |
|
21 |
+#' data(bladderdata) |
|
22 |
+#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
23 |
+#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
24 |
+#' } |
|
25 |
+#' |
|
26 |
+plotBatchVariance <- function(inSCE, useAssay="logcounts", batch='batch', |
|
27 |
+ condition=NULL, pcInput = FALSE){ |
|
28 |
+ if(isTRUE(pcInput)){ |
|
29 |
+ mat <- t(SingleCellExperiment::reducedDim(inSCE, useAssay)) |
|
30 |
+ } else { |
|
31 |
+ mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
32 |
+ } |
|
33 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[, batch] |
|
34 |
+ nlb <- nlevels(as.factor(batchCol)) |
|
35 |
+ if (nlb <= 1){ |
|
36 |
+ batchMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
37 |
+ } else { |
|
38 |
+ batchMod <- stats::model.matrix(~as.factor(batchCol)) |
|
39 |
+ } |
|
40 |
+ condCol <- SingleCellExperiment::colData(inSCE)[, condition] |
|
41 |
+ if (is.null(condition)){ |
|
42 |
+ stop("condition required for now") |
|
43 |
+ } else { |
|
44 |
+ nlc <- nlevels(as.factor(condCol)) |
|
45 |
+ if (nlc <= 1){ |
|
46 |
+ condMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
47 |
+ } else { |
|
48 |
+ condMod <- stats::model.matrix(~as.factor(condCol)) |
|
49 |
+ } |
|
50 |
+ } |
|
51 |
+ mod <- cbind(condMod, batchMod[, -1]) |
|
52 |
+ condTest <- batchqc_f.pvalue(mat, mod, batchMod) |
|
53 |
+ batchTest <- batchqc_f.pvalue(mat, mod, condMod) |
|
54 |
+ r2Full <- condTest$r2Full |
|
55 |
+ condR2 <- batchTest$r2Reduced |
|
56 |
+ batchR2 <- condTest$r2Reduced |
|
57 |
+ explainedVariation <- round(cbind(`Full (Condition+Batch)` = r2Full, |
|
58 |
+ Condition = condR2, |
|
59 |
+ Batch = batchR2), 5) * 100 |
|
60 |
+ exVarM <- reshape2::melt(explainedVariation) |
|
61 |
+ colnames(exVarM) <- c("Gene", "Model", "Percent.Explained.Variation") |
|
62 |
+ exVarM$Model <- factor(exVarM$Model) |
|
63 |
+ a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Percent.Explained.Variation")) + |
|
64 |
+ ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
65 |
+ ggplot2::geom_boxplot(width = .1) + |
|
66 |
+ ggplot2::xlab("Model") + |
|
67 |
+ ggplot2::ylab("Percent Explained Variation") + |
|
68 |
+ ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
69 |
+ guide = FALSE) |
|
70 |
+ return(a) |
|
71 |
+} |
|
72 |
+ |
|
73 |
+batchqc_f.pvalue <- function(dat, mod, mod0) { |
|
74 |
+ # F-test (full/reduced model) and returns R2 values |
|
75 |
+ # (full/reduced) as well. |
|
76 |
+ mod00 <- matrix(rep(1, ncol(dat)), ncol = 1) |
|
77 |
+ n <- dim(dat)[2] |
|
78 |
+ m <- dim(dat)[1] |
|
79 |
+ df1 <- dim(mod)[2] |
|
80 |
+ df0 <- dim(mod0)[2] |
|
81 |
+ p <- rep(0, m) |
|
82 |
+ |
|
83 |
+ resid <- dat - dat %*% mod %*% solve(t(mod) %*% mod) %*% t(mod) |
|
84 |
+ rss1 <- rowSums(resid * resid) |
|
85 |
+ rm(resid) |
|
86 |
+ |
|
87 |
+ resid0 <- dat - dat %*% mod0 %*% solve(t(mod0) %*% mod0) %*% t(mod0) |
|
88 |
+ rss0 <- rowSums(resid0 * resid0) |
|
89 |
+ rm(resid0) |
|
90 |
+ |
|
91 |
+ resid00 <- dat - dat %*% mod00 %*% solve(t(mod00) %*% mod00) %*% t(mod00) |
|
92 |
+ rss00 <- rowSums(resid00 * resid00) |
|
93 |
+ rm(resid00) |
|
94 |
+ |
|
95 |
+ r2Full <- 1 - rss1 / rss00 |
|
96 |
+ r2Reduced <- 1 - rss0 / rss00 |
|
97 |
+ |
|
98 |
+ p <- 1 |
|
99 |
+ if (df1 > df0) { |
|
100 |
+ fstats <- ((rss0 - rss1) / (df1 - df0)) / (rss1 / (n - df1)) |
|
101 |
+ p <- 1 - stats::pf(fstats, df1 = (df1 - df0), df2 = (n - df1)) |
|
102 |
+ } |
|
103 |
+ return(list(p = p, r2Full = r2Full, r2Reduced = r2Reduced)) |
|
104 |
+} |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#' Visualize the percent variation in the data that is explained by batch and |
5 | 5 |
#' condition if it is given. |
6 | 6 |
#' |
7 |
-#' @param inSCE Input SCtkExperiment object. Requireds |
|
7 |
+#' @param inSCE Input SCtkExperiment object. Required |
|
8 | 8 |
#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
9 | 9 |
#' @param batch The column in the annotation data that corresponds to batch. |
10 | 10 |
#' Required |
... | ... |
@@ -58,12 +58,13 @@ plotBatchVariance <- function(inSCE, useAssay="logcounts", batch, |
58 | 58 |
Condition = condR2, |
59 | 59 |
Batch = batchR2), 5) * 100 |
60 | 60 |
exVarM <- reshape2::melt(explainedVariation) |
61 |
- colnames(exVarM) <- c("Gene", "Model", "Value") |
|
61 |
+ colnames(exVarM) <- c("Gene", "Model", "Percent.Explained.Variation") |
|
62 | 62 |
exVarM$Model <- factor(exVarM$Model) |
63 |
- a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Value")) + |
|
63 |
+ a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Percent.Explained.Variation")) + |
|
64 | 64 |
ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
65 | 65 |
ggplot2::geom_boxplot(width = .1) + |
66 | 66 |
ggplot2::xlab("Model") + |
67 |
+ ggplot2::ylab("Percent Explained Variation") + |
|
67 | 68 |
ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
68 | 69 |
guide = FALSE) |
69 | 70 |
return(a) |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#' Visualize the percent variation in the data that is explained by batch and |
5 | 5 |
#' condition if it is given. |
6 | 6 |
#' |
7 |
-#' @param inSCESet Input SCtkExperiment object. Required |
|
7 |
+#' @param inSCE Input SCtkExperiment object. Requireds |
|
8 | 8 |
#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
9 | 9 |
#' @param batch The column in the annotation data that corresponds to batch. |
10 | 10 |
#' Required |
... | ... |
@@ -22,34 +22,34 @@ |
22 | 22 |
#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
23 | 23 |
#' } |
24 | 24 |
#' |
25 |
-plotBatchVariance <- function(inSCESet, useAssay="logcounts", batch, |
|
25 |
+plotBatchVariance <- function(inSCE, useAssay="logcounts", batch, |
|
26 | 26 |
condition=NULL){ |
27 |
- nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
27 |
+ nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCE)[, batch])) |
|
28 | 28 |
if (nlb <= 1){ |
29 |
- batchMod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
29 |
+ batchMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
30 | 30 |
} else { |
31 | 31 |
batchMod <- stats::model.matrix( |
32 |
- ~as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
32 |
+ ~as.factor(SingleCellExperiment::colData(inSCE)[, batch])) |
|
33 | 33 |
} |
34 | 34 |
if (is.null(condition)){ |
35 | 35 |
stop("condition required for now") |
36 | 36 |
} else { |
37 | 37 |
nlc <- nlevels(as.factor( |
38 |
- SingleCellExperiment::colData(inSCESet)[, condition])) |
|
38 |
+ SingleCellExperiment::colData(inSCE)[, condition])) |
|
39 | 39 |
if (nlc <= 1){ |
40 |
- condMod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
40 |
+ condMod <- matrix(rep(1, ncol(inSCE)), ncol = 1) |
|
41 | 41 |
} else { |
42 | 42 |
condMod <- stats::model.matrix( |
43 |
- ~as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
|
43 |
+ ~as.factor(SingleCellExperiment::colData(inSCE)[, condition])) |
|
44 | 44 |
} |
45 | 45 |
} |
46 | 46 |
|
47 | 47 |
mod <- cbind(condMod, batchMod[, -1]) |
48 | 48 |
|
49 |
- condTest <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCESet, useAssay), |
|
49 |
+ condTest <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCE, useAssay), |
|
50 | 50 |
mod, batchMod) |
51 | 51 |
batchTest <- batchqc_f.pvalue( |
52 |
- SummarizedExperiment::assay(inSCESet, useAssay), mod, condMod) |
|
52 |
+ SummarizedExperiment::assay(inSCE, useAssay), mod, condMod) |
|
53 | 53 |
|
54 | 54 |
r2Full <- condTest$r2Full |
55 | 55 |
condR2 <- batchTest$r2Reduced |
... | ... |
@@ -5,7 +5,7 @@ |
5 | 5 |
#' condition if it is given. |
6 | 6 |
#' |
7 | 7 |
#' @param inSCESet Input SCtkExperiment object. Required |
8 |
-#' @param use_assay Indicate which assay to use for PCA. Default is "logcounts" |
|
8 |
+#' @param useAssay Indicate which assay to use for PCA. Default is "logcounts" |
|
9 | 9 |
#' @param batch The column in the annotation data that corresponds to batch. |
10 | 10 |
#' Required |
11 | 11 |
#' @param condition The column in the annotation data that corresponds to |
... | ... |
@@ -19,55 +19,59 @@ |
19 | 19 |
#' library(bladderbatch) |
20 | 20 |
#' data(bladderdata) |
21 | 21 |
#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
22 |
-#' plotBatchVariance(dat, use_assay="exprs", batch="batch", condition = "cancer") |
|
22 |
+#' plotBatchVariance(dat, useAssay="exprs", batch="batch", condition = "cancer") |
|
23 | 23 |
#' } |
24 | 24 |
#' |
25 |
-plotBatchVariance <- function(inSCESet, use_assay="logcounts", batch, |
|
25 |
+plotBatchVariance <- function(inSCESet, useAssay="logcounts", batch, |
|
26 | 26 |
condition=NULL){ |
27 | 27 |
nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
28 | 28 |
if (nlb <= 1){ |
29 |
- batch_mod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
29 |
+ batchMod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
30 | 30 |
} else { |
31 |
- batch_mod <- stats::model.matrix(~as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
31 |
+ batchMod <- stats::model.matrix( |
|
32 |
+ ~as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
32 | 33 |
} |
33 | 34 |
if (is.null(condition)){ |
34 | 35 |
stop("condition required for now") |
35 | 36 |
} else { |
36 |
- nlc <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
|
37 |
+ nlc <- nlevels(as.factor( |
|
38 |
+ SingleCellExperiment::colData(inSCESet)[, condition])) |
|
37 | 39 |
if (nlc <= 1){ |
38 |
- cond_mod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
40 |
+ condMod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
|
39 | 41 |
} else { |
40 |
- cond_mod <- stats::model.matrix(~as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
|
42 |
+ condMod <- stats::model.matrix( |
|
43 |
+ ~as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
|
41 | 44 |
} |
42 | 45 |
} |
43 | 46 |
|
44 |
- mod <- cbind(cond_mod, batch_mod[, -1]) |
|
47 |
+ mod <- cbind(condMod, batchMod[, -1]) |
|
45 | 48 |
|
46 |
- cond_test <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCESet, use_assay), mod, batch_mod) |
|
47 |
- batch_test <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCESet, use_assay), mod, cond_mod) |
|
49 |
+ condTest <- batchqc_f.pvalue(SummarizedExperiment::assay(inSCESet, useAssay), |
|
50 |
+ mod, batchMod) |
|
51 |
+ batchTest <- batchqc_f.pvalue( |
|
52 |
+ SummarizedExperiment::assay(inSCESet, useAssay), mod, condMod) |
|
48 | 53 |
|
49 |
- cond_ps <- cond_test$p |
|
50 |
- batch_ps <- batch_test$p |
|
51 |
- |
|
52 |
- r2_full <- cond_test$r2_full |
|
53 |
- cond_r2 <- batch_test$r2_reduced |
|
54 |
- batch_r2 <- cond_test$r2_reduced |
|
55 |
- explained_variation <- round(cbind(`Full (Condition+Batch)` = r2_full, |
|
56 |
- Condition = cond_r2, Batch = batch_r2), 5) * 100 |
|
57 |
- ex_var_m <- reshape2::melt(explained_variation) |
|
58 |
- colnames(ex_var_m) <- c("Gene", "Model", "Value") |
|
59 |
- ex_var_m$Model <- factor(ex_var_m$Model) |
|
60 |
- a <- ggplot2::ggplot(ex_var_m, ggplot2::aes_string("Model", "Value")) + |
|
54 |
+ r2Full <- condTest$r2Full |
|
55 |
+ condR2 <- batchTest$r2Reduced |
|
56 |
+ batchR2 <- condTest$r2Reduced |
|
57 |
+ explainedVariation <- round(cbind(`Full (Condition+Batch)` = r2Full, |
|
58 |
+ Condition = condR2, |
|
59 |
+ Batch = batchR2), 5) * 100 |
|
60 |
+ exVarM <- reshape2::melt(explainedVariation) |
|
61 |
+ colnames(exVarM) <- c("Gene", "Model", "Value") |
|
62 |
+ exVarM$Model <- factor(exVarM$Model) |
|
63 |
+ a <- ggplot2::ggplot(exVarM, ggplot2::aes_string("Model", "Value")) + |
|
61 | 64 |
ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
62 | 65 |
ggplot2::geom_boxplot(width = .1) + |
63 | 66 |
ggplot2::xlab("Model") + |
64 |
- ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), guide = FALSE) |
|
67 |
+ ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), |
|
68 |
+ guide = FALSE) |
|
65 | 69 |
return(a) |
66 | 70 |
} |
67 | 71 |
|
68 | 72 |
batchqc_f.pvalue <- function(dat, mod, mod0) { |
69 |
- ## F-test (full/reduced model) and returns R2 values |
|
70 |
- ## (full/reduced) as well. |
|
73 |
+ # F-test (full/reduced model) and returns R2 values |
|
74 |
+ # (full/reduced) as well. |
|
71 | 75 |
mod00 <- matrix(rep(1, ncol(dat)), ncol = 1) |
72 | 76 |
n <- dim(dat)[2] |
73 | 77 |
m <- dim(dat)[1] |
... | ... |
@@ -87,13 +91,13 @@ batchqc_f.pvalue <- function(dat, mod, mod0) { |
87 | 91 |
rss00 <- rowSums(resid00 * resid00) |
88 | 92 |
rm(resid00) |
89 | 93 |
|
90 |
- r2_full <- 1 - rss1 / rss00 |
|
91 |
- r2_reduced <- 1 - rss0 / rss00 |
|
94 |
+ r2Full <- 1 - rss1 / rss00 |
|
95 |
+ r2Reduced <- 1 - rss0 / rss00 |
|
92 | 96 |
|
93 | 97 |
p <- 1 |
94 | 98 |
if (df1 > df0) { |
95 | 99 |
fstats <- ((rss0 - rss1) / (df1 - df0)) / (rss1 / (n - df1)) |
96 | 100 |
p <- 1 - stats::pf(fstats, df1 = (df1 - df0), df2 = (n - df1)) |
97 | 101 |
} |
98 |
- return(list(p = p, r2_full = r2_full, r2_reduced = r2_reduced)) |
|
102 |
+ return(list(p = p, r2Full = r2Full, r2Reduced = r2Reduced)) |
|
99 | 103 |
} |
... | ... |
@@ -56,8 +56,9 @@ plotBatchVariance <- function(inSCESet, use_assay="logcounts", batch, |
56 | 56 |
Condition = cond_r2, Batch = batch_r2), 5) * 100 |
57 | 57 |
ex_var_m <- reshape2::melt(explained_variation) |
58 | 58 |
colnames(ex_var_m) <- c("Gene", "Model", "Value") |
59 |
- a <- ggplot2::ggplot(ex_var_m, ggplot2::aes(factor(Model), Value)) + |
|
60 |
- ggplot2::geom_violin(ggplot2::aes(fill = factor(Model))) + |
|
59 |
+ ex_var_m$Model <- factor(ex_var_m$Model) |
|
60 |
+ a <- ggplot2::ggplot(ex_var_m, ggplot2::aes_string("Model", "Value")) + |
|
61 |
+ ggplot2::geom_violin(ggplot2::aes_string(fill = "Model")) + |
|
61 | 62 |
ggplot2::geom_boxplot(width = .1) + |
62 | 63 |
ggplot2::xlab("Model") + |
63 | 64 |
ggplot2::scale_fill_manual(values = RColorBrewer::brewer.pal(9, "Set1"), guide = FALSE) |
... | ... |
@@ -25,16 +25,16 @@ |
25 | 25 |
plotBatchVariance <- function(inSCESet, use_assay="logcounts", batch, |
26 | 26 |
condition=NULL){ |
27 | 27 |
nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
28 |
- if(nlb <= 1){ |
|
28 |
+ if (nlb <= 1){ |
|
29 | 29 |
batch_mod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
30 | 30 |
} else { |
31 | 31 |
batch_mod <- stats::model.matrix(~as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
32 | 32 |
} |
33 |
- if(is.null(condition)){ |
|
33 |
+ if (is.null(condition)){ |
|
34 | 34 |
stop("condition required for now") |
35 | 35 |
} else { |
36 | 36 |
nlc <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
37 |
- if(nlc <= 1){ |
|
37 |
+ if (nlc <= 1){ |
|
38 | 38 |
cond_mod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
39 | 39 |
} else { |
40 | 40 |
cond_mod <- stats::model.matrix(~as.factor(SingleCellExperiment::colData(inSCESet)[, condition])) |
... | ... |
@@ -2,7 +2,7 @@ |
2 | 2 |
#' in the data |
3 | 3 |
#' |
4 | 4 |
#' Visualize the percent variation in the data that is explained by batch and |
5 |
-#' condition if it is givent. |
|
5 |
+#' condition if it is given. |
|
6 | 6 |
#' |
7 | 7 |
#' @param inSCESet Input SCtkExperiment object. Required |
8 | 8 |
#' @param use_assay Indicate which assay to use for PCA. Default is "logcounts" |
... | ... |
@@ -14,30 +14,37 @@ |
14 | 14 |
#' @return A boxplot of variation explained by batch, condition, and |
15 | 15 |
#' batch+condition (if applicable). |
16 | 16 |
#' @export |
17 |
+#' @examples |
|
18 |
+#' if(requireNamespace("bladderbatch", quietly = TRUE)) { |
|
19 |
+#' library(bladderbatch) |
|
20 |
+#' data(bladderdata) |
|
21 |
+#' dat <- as(as(bladderEset, "SummarizedExperiment"), "SCtkExperiment") |
|
22 |
+#' plotBatchVariance(dat, use_assay="exprs", batch="batch", condition = "cancer") |
|
23 |
+#' } |
|
17 | 24 |
#' |
18 | 25 |
plotBatchVariance <- function(inSCESet, use_assay="logcounts", batch, |
19 | 26 |
condition=NULL){ |
20 |
- nlb <- nlevels(as.factor(colData(inSCESet)[, batch])) |
|
27 |
+ nlb <- nlevels(as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
21 | 28 |
if(nlb <= 1){ |
22 | 29 |
batch_mod <- matrix(rep(1, ncol(inSCESet)), ncol = 1) |
23 | 30 |
} else { |
24 |
- batch_mod <- stats::model.matrix(~as.factor(colData(inSCESet)[, batch])) |
|
31 |
+ batch_mod <- stats::model.matrix(~as.factor(SingleCellExperiment::colData(inSCESet)[, batch])) |
|
25 | 32 |
} |
26 | 33 |
if(is.null(condition)){ |
27 | 34 |