... | ... |
@@ -255,6 +255,8 @@ importFrom(SingleCellExperiment,SingleCellExperiment) |
255 | 255 |
importFrom(SingleCellExperiment,altExp) |
256 | 256 |
importFrom(SingleCellExperiment,counts) |
257 | 257 |
importFrom(SingleCellExperiment,reducedDim) |
258 |
+importFrom(SingleCellExperiment,reducedDimNames) |
|
259 |
+importFrom(SingleCellExperiment,reducedDims) |
|
258 | 260 |
importFrom(SingleCellExperiment,rowSubset) |
259 | 261 |
importFrom(SummarizedExperiment,"assay<-") |
260 | 262 |
importFrom(SummarizedExperiment,"assayNames<-") |
... | ... |
@@ -262,6 +264,7 @@ importFrom(SummarizedExperiment,"assays<-") |
262 | 264 |
importFrom(SummarizedExperiment,"colData<-") |
263 | 265 |
importFrom(SummarizedExperiment,"rowData<-") |
264 | 266 |
importFrom(SummarizedExperiment,assay) |
267 |
+importFrom(SummarizedExperiment,assayNames) |
|
265 | 268 |
importFrom(SummarizedExperiment,assays) |
266 | 269 |
importFrom(SummarizedExperiment,colData) |
267 | 270 |
importFrom(SummarizedExperiment,rowData) |
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
.constructSCE <- function( |
2 |
- matrices, |
|
3 |
- features, |
|
4 |
- barcodes, |
|
5 |
- metadata, |
|
6 |
- reducedDims) { |
|
7 |
- |
|
2 |
+ matrices, |
|
3 |
+ features, |
|
4 |
+ barcodes, |
|
5 |
+ metadata, |
|
6 |
+ reducedDims) { |
|
7 |
+ |
|
8 | 8 |
sce <- SingleCellExperiment::SingleCellExperiment(assays = matrices) |
9 | 9 |
SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(features) |
10 | 10 |
SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(barcodes) |
... | ... |
@@ -34,7 +34,7 @@ |
34 | 34 |
} else { |
35 | 35 |
fill <- NA |
36 | 36 |
} |
37 |
- |
|
37 |
+ |
|
38 | 38 |
### combine row |
39 | 39 |
if (isTRUE(combineRow) & (!is.null(row))) { |
40 | 40 |
missRow <- row[!row %in% rownames(x)] |
... | ... |
@@ -43,14 +43,14 @@ |
43 | 43 |
if (!isTRUE(sparse)) { |
44 | 44 |
missMat <- as.matrix(missMat) |
45 | 45 |
} |
46 |
- |
|
46 |
+ |
|
47 | 47 |
mat <- rbind(matOrigin, missMat) |
48 | 48 |
if (anyDuplicated(rownames(mat))) { |
49 | 49 |
mat <- mat[!duplicated(rownames(mat)), ] |
50 | 50 |
} |
51 | 51 |
matOrigin <- mat[row, ] |
52 | 52 |
} |
53 |
- |
|
53 |
+ |
|
54 | 54 |
### combine cols |
55 | 55 |
if (isTRUE(combineCol) & (!is.null(col))) { |
56 | 56 |
missCol <- col[!col %in% colnames(x)] |
... | ... |
@@ -59,7 +59,7 @@ |
59 | 59 |
if (!isTRUE(sparse)) { |
60 | 60 |
missMat <- as.matrix(missMat) |
61 | 61 |
} |
62 |
- |
|
62 |
+ |
|
63 | 63 |
mat <- cbind(matOrigin, missMat) |
64 | 64 |
if (anyDuplicated(colnames(mat))) { |
65 | 65 |
mat <- mat[, !duplicated(colnames(mat))] |
... | ... |
@@ -76,12 +76,12 @@ |
76 | 76 |
rw[['rownames']] <- rownames(rw) |
77 | 77 |
return(rw) |
78 | 78 |
}) |
79 |
- |
|
79 |
+ |
|
80 | 80 |
## Get merged rowData |
81 | 81 |
by.r <- unique(c('rownames', by.r)) |
82 | 82 |
unionFe <- Reduce(function(r1, r2) merge(r1, r2, by=by.r, all=TRUE), feList) |
83 | 83 |
allGenes <- unique(unlist(lapply(feList, rownames))) |
84 |
- |
|
84 |
+ |
|
85 | 85 |
## rowData |
86 | 86 |
newFe <- unionFe |
87 | 87 |
if (nrow(newFe) != length(allGenes)) { |
... | ... |
@@ -102,7 +102,7 @@ |
102 | 102 |
cD[['rownames']] <- rownames(cD) |
103 | 103 |
return(cD) |
104 | 104 |
}) |
105 |
- |
|
105 |
+ |
|
106 | 106 |
by.c <- unique(c("rownames", by.c)) |
107 | 107 |
unionCb <- Reduce(function(c1, c2) merge(c1, c2, by=by.c, all=TRUE), cbList) |
108 | 108 |
rownames(unionCb) <- unionCb[['rownames']] |
... | ... |
@@ -118,19 +118,19 @@ |
118 | 118 |
reduceList <- lapply(sceList, SingleCellExperiment::reducedDims) |
119 | 119 |
## get every reducedDim exists in at least one SCEs |
120 | 120 |
UnionReducedDims <- unique(unlist(lapply(sceList, SingleCellExperiment::reducedDimNames))) |
121 |
- |
|
121 |
+ |
|
122 | 122 |
## for each reducedDim, get union row/cols |
123 | 123 |
reducedDims <- list() |
124 | 124 |
for (reduceDim in UnionReducedDims) { |
125 | 125 |
x <- lapply(sceList, function(x) {if (reduceDim %in% SingleCellExperiment::reducedDimNames(x)) {SingleCellExperiment::reducedDim(x, reduceDim)}}) |
126 | 126 |
reducedDims[[reduceDim]] <- .getDimUnion(x) |
127 | 127 |
} |
128 |
- |
|
128 |
+ |
|
129 | 129 |
## Merge reducedDim for each SCE |
130 | 130 |
redList <- list() |
131 | 131 |
for (idx in seq_along(sceList)){ |
132 | 132 |
redMat <- reduceList[[idx]] |
133 |
- |
|
133 |
+ |
|
134 | 134 |
for (DimName in UnionReducedDims) { |
135 | 135 |
if (DimName %in% names(redMat)) { |
136 | 136 |
redMat[[DimName]] <- .getMatUnion(reducedDims[[DimName]], redMat[[DimName]], |
... | ... |
@@ -142,10 +142,10 @@ |
142 | 142 |
dimnames = list(colnames(sceList[[idx]]), reducedDims[[DimName]][[2]])) |
143 | 143 |
} |
144 | 144 |
} |
145 |
- |
|
145 |
+ |
|
146 | 146 |
redList[[idx]] <- redMat |
147 | 147 |
} |
148 |
- |
|
148 |
+ |
|
149 | 149 |
return(redList) |
150 | 150 |
} |
151 | 151 |
|
... | ... |
@@ -157,7 +157,7 @@ |
157 | 157 |
unique(unlist(lapply(sceList, rownames))), |
158 | 158 |
unique(unlist(lapply(sceList, colnames))) |
159 | 159 |
) |
160 |
- |
|
160 |
+ |
|
161 | 161 |
asList <- list() |
162 | 162 |
for (idx in seq_along(assayList)){ |
163 | 163 |
assay <- assayList[[idx]] |
... | ... |
@@ -174,7 +174,7 @@ |
174 | 174 |
} |
175 | 175 |
asList[[idx]] <- assay |
176 | 176 |
} |
177 |
- |
|
177 |
+ |
|
178 | 178 |
return(asList) |
179 | 179 |
} |
180 | 180 |
|
... | ... |
@@ -197,27 +197,48 @@ |
197 | 197 |
# } |
198 | 198 |
|
199 | 199 |
.mergeMetaSCE <- function(SCE_list) { |
200 |
+ # Merge highest level metadata entries (except "sctk") by sample names in |
|
201 |
+ # SCE_list. For analysis results in "sctk", merge by SCE_list sample name if |
|
202 |
+ # given "all_cells" in one object, else, use existing sample names. |
|
203 |
+ samples <- names(SCE_list) |
|
200 | 204 |
sampleMeta <- lapply(SCE_list, S4Vectors::metadata) |
201 | 205 |
metaNames <- unique(unlist(lapply(sampleMeta, names))) |
206 |
+ sampleSctkMeta <- lapply(SCE_list, function(x) {S4Vectors::metadata(x)$sctk}) |
|
207 |
+ sctkMetaNames <- unique(unlist(lapply(sampleMeta, |
|
208 |
+ function(x) names(x$sctk)))) |
|
209 |
+ sampleMeta$sctk <- NULL |
|
210 |
+ metaNames <- metaNames[!metaNames %in% c("sctk")] |
|
202 | 211 |
NewMeta <- list() |
203 |
- |
|
212 |
+ for (meta in sctkMetaNames) { |
|
213 |
+ for (i in seq_along(sampleSctkMeta)) { |
|
214 |
+ # `i` is for identifying each SCE object, usually matching a sample in |
|
215 |
+ # the pipeline. `sampleAvail` for samples stored in metadata entry, in |
|
216 |
+ # case users are merging merged objects. |
|
217 |
+ sampleAvail <- names(sampleSctkMeta[[i]][[meta]]) |
|
218 |
+ if (length(sampleAvail) == 1) { |
|
219 |
+ NewMeta$sctk[[meta]][[samples[i]]] <- sampleSctkMeta[[i]][[meta]][[1]] |
|
220 |
+ } else if (length(sampleAvail) > 1) { |
|
221 |
+ names(sampleSctkMeta[[i]][[meta]]) <- |
|
222 |
+ paste(names(sampleSctkMeta)[i], |
|
223 |
+ names(sampleSctkMeta[[i]][[meta]]), sep = "_") |
|
224 |
+ NewMeta$sctk[[meta]] <- c(NewMeta$sctk[[meta]], |
|
225 |
+ sampleSctkMeta[[i]][[meta]]) |
|
226 |
+ } |
|
227 |
+ } |
|
228 |
+ } |
|
204 | 229 |
for (meta in metaNames) { |
205 | 230 |
for (i in seq_along(sampleMeta)) { |
206 |
- NewMeta[[meta]][[i]] <- sampleMeta[[i]][[meta]] |
|
231 |
+ NewMeta[[meta]][[samples[i]]] <- sampleMeta[[i]][[meta]] |
|
207 | 232 |
} |
208 | 233 |
} |
209 |
- |
|
210 |
- if ("runBarcodeRanksMetaOutput" %in% metaNames) { |
|
211 |
- NewMeta[["runBarcodeRanksMetaOutput"]] <- unlist(NewMeta[["runBarcodeRanksMetaOutput"]]) |
|
212 |
- } |
|
213 |
- |
|
234 |
+ |
|
214 | 235 |
if ("assayType" %in% metaNames) { |
215 | 236 |
assayType <- lapply(SCE_list, function(x){S4Vectors::metadata(x)$assayType}) |
216 | 237 |
assayType <- BiocGenerics::Reduce(dplyr::union, assayType) |
217 | 238 |
|
218 | 239 |
NewMeta[["assayType"]] <- assayType |
219 | 240 |
} |
220 |
- |
|
241 |
+ |
|
221 | 242 |
return(NewMeta) |
222 | 243 |
} |
223 | 244 |
|
... | ... |
@@ -241,7 +262,7 @@ |
241 | 262 |
#' @export |
242 | 263 |
|
243 | 264 |
combineSCE <- function(sceList, by.r = NULL, by.c = NULL, combined = TRUE){ |
244 |
- if(length(sceList) == 1){ |
|
265 |
+ if (length(sceList) == 1) { |
|
245 | 266 |
return(sceList[[1]]) |
246 | 267 |
} |
247 | 268 |
## rowData |
... | ... |
@@ -252,16 +273,21 @@ combineSCE <- function(sceList, by.r = NULL, by.c = NULL, combined = TRUE){ |
252 | 273 |
redMatList <- .mergeRedimSCE(sceList) |
253 | 274 |
## assay |
254 | 275 |
assayList <- .mergeAssaySCE(sceList) |
255 |
- |
|
276 |
+ samples <- names(sceList) |
|
277 |
+ if (is.null(samples)) { |
|
278 |
+ samples <- paste0("sample", seq_along(sceList)) |
|
279 |
+ } |
|
256 | 280 |
New_SCE <- list() |
257 | 281 |
for (i in seq(length(sceList))) { |
258 | 282 |
## create new sce |
259 |
- New_SCE[[i]] <- .constructSCE(matrices = assayList[[i]], features = newFeList, |
|
260 |
- barcodes = newCbList[[i]], |
|
261 |
- metadata = S4Vectors::metadata(sceList[[i]]), |
|
262 |
- reducedDims = redMatList[[i]]) |
|
283 |
+ sampleName <- samples[i] |
|
284 |
+ New_SCE[[sampleName]] <- .constructSCE(matrices = assayList[[i]], |
|
285 |
+ features = newFeList, |
|
286 |
+ barcodes = newCbList[[i]], |
|
287 |
+ metadata = S4Vectors::metadata(sceList[[i]]), |
|
288 |
+ reducedDims = redMatList[[i]]) |
|
263 | 289 |
} |
264 |
- |
|
290 |
+ |
|
265 | 291 |
if (isTRUE(combined)) { |
266 | 292 |
sce <- do.call(SingleCellExperiment::cbind, New_SCE) |
267 | 293 |
meta <- .mergeMetaSCE(New_SCE) |
... | ... |
@@ -2,251 +2,255 @@ |
2 | 2 |
## (https://github.com/chris-mcginnis-ucsf/DoubletFinder) |
3 | 3 |
|
4 | 4 |
.parallel_paramSweep <- function(n, n.real.cells, real.cells, pK, pN, data, |
5 |
- orig.commands, PCs, sct, verbose, seed) { |
|
6 |
- sweep.res.list <- list() |
|
7 |
- list.ind <- 0 |
|
8 |
- |
|
9 |
- ## Make merged real-artifical data |
|
5 |
+ orig.commands, PCs, sct, verbose, seed) { |
|
6 |
+ sweep.res.list <- list() |
|
7 |
+ list.ind <- 0 |
|
8 |
+ |
|
9 |
+ ## Make merged real-artifical data |
|
10 |
+ if (verbose) { |
|
11 |
+ message(date(), " ... Creating artificial doublets for pN = ", |
|
12 |
+ pN[n] * 100, "%") |
|
13 |
+ } |
|
14 |
+ n_doublets <- ceiling(n.real.cells / (1 - pN[n]) - n.real.cells) |
|
15 |
+ real.cells1 <- sample(real.cells, n_doublets, replace = TRUE) |
|
16 |
+ real.cells2 <- sample(real.cells, n_doublets, replace = TRUE) |
|
17 |
+ doublets <- (data[, real.cells1] + data[, real.cells2]) / 2 |
|
18 |
+ colnames(doublets) <- paste("X", seq(n_doublets), sep = "") |
|
19 |
+ data_wdoublets <- cbind(data, doublets) |
|
20 |
+ |
|
21 |
+ ## Pre-process Seurat object |
|
22 |
+ if (sct == FALSE) { |
|
10 | 23 |
if (verbose) { |
11 |
- message(date(), " ... Creating artificial doublets for pN = ", |
|
12 |
- pN[n] * 100, "%") |
|
13 |
- } |
|
14 |
- n_doublets <- ceiling(n.real.cells / (1 - pN[n]) - n.real.cells) |
|
15 |
- real.cells1 <- sample(real.cells, n_doublets, replace = TRUE) |
|
16 |
- real.cells2 <- sample(real.cells, n_doublets, replace = TRUE) |
|
17 |
- doublets <- (data[, real.cells1] + data[, real.cells2]) / 2 |
|
18 |
- colnames(doublets) <- paste("X", seq(n_doublets), sep = "") |
|
19 |
- data_wdoublets <- cbind(data, doublets) |
|
20 |
- |
|
21 |
- ## Pre-process Seurat object |
|
22 |
- if (sct == FALSE) { |
|
23 |
- if (verbose) { |
|
24 |
- message(date(), " ... Creating Seurat object") |
|
25 |
- } |
|
26 |
- seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
|
27 |
- |
|
28 |
- if (verbose) { |
|
29 |
- message(date(), " ... Normalizing Seurat object") |
|
30 |
- } |
|
31 |
- seu_wdoublets <- Seurat::NormalizeData(seu_wdoublets, |
|
32 |
- normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method, |
|
33 |
- scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor, |
|
34 |
- margin = orig.commands$NormalizeData.RNA@params$margin, |
|
35 |
- verbose = verbose |
|
36 |
- ) |
|
37 |
- |
|
38 |
- if (verbose) { |
|
39 |
- message(date(), " ... Finding variable genes") |
|
40 |
- } |
|
41 |
- seu_wdoublets <- Seurat::FindVariableFeatures(seu_wdoublets, |
|
42 |
- selection.method = orig.commands$FindVariableFeatures.RNA$selection.method, |
|
43 |
- loess.span = orig.commands$FindVariableFeatures.RNA$loess.span, |
|
44 |
- clip.max = orig.commands$FindVariableFeatures.RNA$clip.max, |
|
45 |
- mean.function = orig.commands$FindVariableFeatures.RNA$mean.function, |
|
46 |
- dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function, |
|
47 |
- num.bin = orig.commands$FindVariableFeatures.RNA$num.bin, |
|
48 |
- binning.method = orig.commands$FindVariableFeatures.RNA$binning.method, |
|
49 |
- nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures, |
|
50 |
- mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff, |
|
51 |
- dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff, |
|
52 |
- verbose = verbose |
|
53 |
- ) |
|
54 |
- |
|
55 |
- if (verbose) { |
|
56 |
- message(date(), " ... Scaling data") |
|
57 |
- } |
|
58 |
- seu_wdoublets <- Seurat::ScaleData( |
|
59 |
- seu_wdoublets, |
|
60 |
- features = orig.commands$ScaleData.RNA$features, |
|
61 |
- model.use = orig.commands$ScaleData.RNA$model.use, |
|
62 |
- do.scale = orig.commands$ScaleData.RNA$do.scale, |
|
63 |
- do.center = orig.commands$ScaleData.RNA$do.center, |
|
64 |
- scale.max = orig.commands$ScaleData.RNA$scale.max, |
|
65 |
- block.size = orig.commands$ScaleData.RNA$block.size, |
|
66 |
- min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block, |
|
67 |
- verbose = verbose |
|
68 |
- ) |
|
69 |
- |
|
70 |
- if (verbose) { |
|
71 |
- message(date(), " ... Running PCA") |
|
72 |
- } |
|
73 |
- seu_wdoublets <- Seurat::RunPCA( |
|
74 |
- seu_wdoublets, |
|
75 |
- features = orig.commands$ScaleData.RNA$features, |
|
76 |
- npcs = length(PCs), |
|
77 |
- rev.pca = orig.commands$RunPCA.RNA$rev.pca, |
|
78 |
- weight.by.var = orig.commands$RunPCA.RNA$weight.by.var, |
|
79 |
- seed.use = seed, |
|
80 |
- verbose = FALSE |
|
81 |
- ) |
|
82 |
- } |
|
83 |
- |
|
84 |
- if (sct == TRUE) { |
|
85 |
- if (verbose) { |
|
86 |
- message(date(), " ... Creating Seurat object") |
|
87 |
- } |
|
88 |
- seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
|
89 |
- |
|
90 |
- if (verbose) { |
|
91 |
- message(date(), " ... Running SCTransform") |
|
92 |
- } |
|
93 |
- seu_wdoublets <- Seurat::SCTransform(seu_wdoublets) |
|
94 |
- |
|
95 |
- if (verbose) { |
|
96 |
- message(date(), " ... Running PCA") |
|
97 |
- } |
|
98 |
- seu_wdoublets <- Seurat::RunPCA(seu_wdoublets, |
|
99 |
- npcs = length(PCs), seed.use = seed, |
|
100 |
- verbose = verbose |
|
101 |
- ) |
|
24 |
+ message(date(), " ... Creating Seurat object") |
|
102 | 25 |
} |
103 |
- |
|
104 |
- ## Compute PC distance matrix |
|
26 |
+ seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
|
27 |
+ |
|
105 | 28 |
if (verbose) { |
106 |
- message(date(), " ... Calculating PC distance matrix") |
|
29 |
+ message(date(), " ... Normalizing Seurat object") |
|
107 | 30 |
} |
108 |
- nCells <- nrow(seu_wdoublets@meta.data) |
|
109 |
- pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[, PCs] |
|
110 |
- seu_wdoublets <- NULL |
|
111 |
- gc() |
|
112 |
- dist.mat <- fields::rdist(pca.coord)[, seq(n.real.cells)] |
|
113 |
- |
|
114 |
- ## Pre-order PC distance matrix prior to iterating across pK |
|
115 |
- ## for pANN computations |
|
31 |
+ seu_wdoublets <- Seurat::NormalizeData( |
|
32 |
+ seu_wdoublets, |
|
33 |
+ normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method, |
|
34 |
+ scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor, |
|
35 |
+ margin = orig.commands$NormalizeData.RNA@params$margin, |
|
36 |
+ verbose = verbose |
|
37 |
+ ) |
|
38 |
+ |
|
116 | 39 |
if (verbose) { |
117 |
- message(date(), " ... Defining neighborhoods") |
|
40 |
+ message(date(), " ... Finding variable genes") |
|
118 | 41 |
} |
119 |
- |
|
120 |
- for (i in seq(n.real.cells)) { |
|
121 |
- dist.mat[, i] <- order(dist.mat[, i]) |
|
42 |
+ seu_wdoublets <- Seurat::FindVariableFeatures( |
|
43 |
+ seu_wdoublets, |
|
44 |
+ selection.method = orig.commands$FindVariableFeatures.RNA$selection.method, |
|
45 |
+ loess.span = orig.commands$FindVariableFeatures.RNA$loess.span, |
|
46 |
+ clip.max = orig.commands$FindVariableFeatures.RNA$clip.max, |
|
47 |
+ mean.function = orig.commands$FindVariableFeatures.RNA$mean.function, |
|
48 |
+ dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function, |
|
49 |
+ num.bin = orig.commands$FindVariableFeatures.RNA$num.bin, |
|
50 |
+ binning.method = orig.commands$FindVariableFeatures.RNA$binning.method, |
|
51 |
+ nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures, |
|
52 |
+ mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff, |
|
53 |
+ dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff, |
|
54 |
+ verbose = verbose |
|
55 |
+ ) |
|
56 |
+ |
|
57 |
+ if (verbose) { |
|
58 |
+ message(date(), " ... Scaling data") |
|
122 | 59 |
} |
123 |
- |
|
124 |
- ## Trim PC distance matrix for faster manipulations |
|
125 |
- ind <- round(nCells * max(pK)) + 5 |
|
126 |
- dist.mat <- dist.mat[seq(ind), ] |
|
127 |
- |
|
128 |
- ## Compute pANN across pK sweep |
|
129 |
- |
|
60 |
+ seu_wdoublets <- Seurat::ScaleData( |
|
61 |
+ seu_wdoublets, |
|
62 |
+ features = orig.commands$ScaleData.RNA$features, |
|
63 |
+ model.use = orig.commands$ScaleData.RNA$model.use, |
|
64 |
+ do.scale = orig.commands$ScaleData.RNA$do.scale, |
|
65 |
+ do.center = orig.commands$ScaleData.RNA$do.center, |
|
66 |
+ scale.max = orig.commands$ScaleData.RNA$scale.max, |
|
67 |
+ block.size = orig.commands$ScaleData.RNA$block.size, |
|
68 |
+ min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block, |
|
69 |
+ verbose = verbose |
|
70 |
+ ) |
|
71 |
+ |
|
130 | 72 |
if (verbose) { |
131 |
- message(date(), " ... Computing pANN across all pK") |
|
73 |
+ message(date(), " ... Running PCA") |
|
132 | 74 |
} |
133 |
- for (k in seq_along(pK)) { |
|
134 |
- if (verbose) { |
|
135 |
- message(date(), " ... pK = ", pK[k]) |
|
136 |
- } |
|
137 |
- pk.temp <- round(nCells * pK[k]) |
|
138 |
- pANN <- as.data.frame(matrix(0L, nrow = n.real.cells, ncol = 1)) |
|
139 |
- colnames(pANN) <- "pANN" |
|
140 |
- rownames(pANN) <- real.cells |
|
141 |
- list.ind <- list.ind + 1 |
|
142 |
- |
|
143 |
- for (i in seq(n.real.cells)) { |
|
144 |
- neighbors <- dist.mat[2:(pk.temp + 1), i] |
|
145 |
- pANN$pANN[i] <- length(which(neighbors > n.real.cells)) / pk.temp |
|
146 |
- } |
|
147 |
- |
|
148 |
- sweep.res.list[[list.ind]] <- pANN |
|
75 |
+ seu_wdoublets <- Seurat::RunPCA( |
|
76 |
+ seu_wdoublets, |
|
77 |
+ features = orig.commands$ScaleData.RNA$features, |
|
78 |
+ npcs = length(PCs), |
|
79 |
+ rev.pca = orig.commands$RunPCA.RNA$rev.pca, |
|
80 |
+ weight.by.var = orig.commands$RunPCA.RNA$weight.by.var, |
|
81 |
+ seed.use = seed, |
|
82 |
+ verbose = FALSE |
|
83 |
+ ) |
|
84 |
+ } |
|
85 |
+ |
|
86 |
+ if (sct == TRUE) { |
|
87 |
+ if (verbose) { |
|
88 |
+ message(date(), " ... Creating Seurat object") |
|
149 | 89 |
} |
150 |
- |
|
151 |
- return(sweep.res.list) |
|
152 |
-} |
|
153 |
- |
|
154 |
-.paramSweep <- function(seu, PCs = seq(10), sct = FALSE, |
|
155 |
- verbose = FALSE, num.cores, seed) { |
|
156 |
- ## Set pN-pK param sweep ranges |
|
157 |
- pK <- c(0.0005, 0.001, 0.005, seq(0.01, 0.3, by = 0.01)) |
|
158 |
- pN <- seq(0.05, 0.3, by = 0.05) |
|
159 |
- ## Remove pK values with too few cells |
|
160 |
- min.cells <- round(nrow(seu@meta.data) / (1 - 0.05) - nrow(seu@meta.data)) |
|
161 |
- pK.test <- round(pK * min.cells) |
|
162 |
- pK <- pK[which(pK.test >= 1)] |
|
163 |
- |
|
164 |
- ## Extract pre-processing parameters from original data analysis workflow |
|
165 |
- orig.commands <- seu@commands |
|
166 |
- |
|
167 |
- ## Down-sample cells to 10000 (when applicable) for computational effiency |
|
168 |
- if (nrow(seu@meta.data) > 10000) { |
|
169 |
- real.cells <- rownames(seu@meta.data)[sample(seq(nrow(seu@meta.data)), |
|
170 |
- 10000, replace = FALSE)] |
|
171 |
- data <- seu@assays$RNA@counts[, real.cells] |
|
172 |
- n.real.cells <- ncol(data) |
|
90 |
+ seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
|
91 |
+ |
|
92 |
+ if (verbose) { |
|
93 |
+ message(date(), " ... Running SCTransform") |
|
173 | 94 |
} |
174 |
- |
|
175 |
- if (nrow(seu@meta.data) <= 10000) { |
|
176 |
- real.cells <- rownames(seu@meta.data) |
|
177 |
- data <- seu@assays$RNA@counts |
|
178 |
- n.real.cells <- ncol(data) |
|
95 |
+ seu_wdoublets <- Seurat::SCTransform(seu_wdoublets) |
|
96 |
+ |
|
97 |
+ if (verbose) { |
|
98 |
+ message(date(), " ... Running PCA") |
|
179 | 99 |
} |
180 |
- ## Iterate through pN, computing pANN vectors at varying pK |
|
181 |
- # no_cores <- detectCores()-1 |
|
182 |
- if (is.null(num.cores)) { |
|
183 |
- num.cores <- 1 |
|
100 |
+ seu_wdoublets <- Seurat::RunPCA(seu_wdoublets, |
|
101 |
+ npcs = length(PCs), seed.use = seed, |
|
102 |
+ verbose = verbose |
|
103 |
+ ) |
|
104 |
+ } |
|
105 |
+ |
|
106 |
+ ## Compute PC distance matrix |
|
107 |
+ if (verbose) { |
|
108 |
+ message(date(), " ... Calculating PC distance matrix") |
|
109 |
+ } |
|
110 |
+ nCells <- ncol(seu_wdoublets) |
|
111 |
+ pca.coord <- Seurat::Reductions(seu_wdoublets, "pca")@cell.embeddings[, PCs] |
|
112 |
+ #seu_wdoublets <- NULL |
|
113 |
+ rm(seu_wdoublets) |
|
114 |
+ gc() |
|
115 |
+ dist.mat <- fields::rdist(pca.coord)[, seq(n.real.cells)] |
|
116 |
+ |
|
117 |
+ ## Pre-order PC distance matrix prior to iterating across pK |
|
118 |
+ ## for pANN computations |
|
119 |
+ if (verbose) { |
|
120 |
+ message(date(), " ... Defining neighborhoods") |
|
121 |
+ } |
|
122 |
+ |
|
123 |
+ for (i in seq(n.real.cells)) { |
|
124 |
+ dist.mat[, i] <- order(dist.mat[, i]) |
|
125 |
+ } |
|
126 |
+ |
|
127 |
+ ## Trim PC distance matrix for faster manipulations |
|
128 |
+ ind <- round(nCells * max(pK)) + 5 |
|
129 |
+ dist.mat <- dist.mat[seq(ind), ] |
|
130 |
+ |
|
131 |
+ ## Compute pANN across pK sweep |
|
132 |
+ |
|
133 |
+ if (verbose) { |
|
134 |
+ message(date(), " ... Computing pANN across all pK") |
|
135 |
+ } |
|
136 |
+ for (k in seq_along(pK)) { |
|
137 |
+ if (verbose) { |
|
138 |
+ message(date(), " ... pK = ", pK[k]) |
|
184 | 139 |
} |
185 |
- |
|
186 |
- if (num.cores > 1) { |
|
187 |
- cl <- parallel::makeCluster(num.cores) |
|
188 |
- |
|
189 |
- output2 <- withr::with_seed( |
|
190 |
- seed, |
|
191 |
- parallel::mclapply(as.list(seq_along(pN)), |
|
192 |
- FUN = .parallel_paramSweep, |
|
193 |
- n.real.cells, |
|
194 |
- real.cells, |
|
195 |
- pK, |
|
196 |
- pN, |
|
197 |
- data, |
|
198 |
- orig.commands, |
|
199 |
- PCs, |
|
200 |
- sct, mc.cores = num.cores, |
|
201 |
- verbose = verbose, |
|
202 |
- seed = seed, |
|
203 |
- mc.set.seed = FALSE |
|
204 |
- ) |
|
205 |
- ) |
|
206 |
- parallel::stopCluster(cl) |
|
207 |
- } else { |
|
208 |
- output2 <- lapply(as.list(seq_along(pN)), |
|
209 |
- FUN = .parallel_paramSweep, |
|
210 |
- n.real.cells, |
|
211 |
- real.cells, |
|
212 |
- pK, |
|
213 |
- pN, |
|
214 |
- data, |
|
215 |
- orig.commands, |
|
216 |
- PCs, |
|
217 |
- sct, |
|
218 |
- seed = seed, |
|
219 |
- verbose = verbose |
|
220 |
- ) |
|
140 |
+ pk.temp <- round(nCells * pK[k]) |
|
141 |
+ pANN <- as.data.frame(matrix(0L, nrow = n.real.cells, ncol = 1)) |
|
142 |
+ colnames(pANN) <- "pANN" |
|
143 |
+ rownames(pANN) <- real.cells |
|
144 |
+ list.ind <- list.ind + 1 |
|
145 |
+ |
|
146 |
+ for (i in seq(n.real.cells)) { |
|
147 |
+ neighbors <- dist.mat[2:(pk.temp + 1), i] |
|
148 |
+ pANN$pANN[i] <- length(which(neighbors > n.real.cells)) / pk.temp |
|
221 | 149 |
} |
150 |
+ |
|
151 |
+ sweep.res.list[[list.ind]] <- pANN |
|
152 |
+ } |
|
153 |
+ |
|
154 |
+ return(sweep.res.list) |
|
155 |
+} |
|
222 | 156 |
|
223 |
- ## Write parallelized output into list |
|
224 |
- sweep.res.list <- list() |
|
225 |
- list.ind <- 0 |
|
226 |
- for (i in seq_along(output2)) { |
|
227 |
- for (j in seq_along(output2[[i]])) { |
|
228 |
- list.ind <- list.ind + 1 |
|
229 |
- sweep.res.list[[list.ind]] <- output2[[i]][[j]] |
|
230 |
- } |
|
231 |
- } |
|
232 |
- ## Assign names to list of results |
|
233 |
- name.vec <- NULL |
|
234 |
- for (j in seq_along(pN)) { |
|
235 |
- name.vec <- c(name.vec, paste("pN", pN[j], "pK", pK, sep = "_")) |
|
157 |
+.paramSweep <- function(seu, PCs = seq(10), sct = FALSE, |
|
158 |
+ verbose = FALSE, num.cores, seed) { |
|
159 |
+ ## Set pN-pK param sweep ranges |
|
160 |
+ pK <- c(0.0005, 0.001, 0.005, seq(0.01, 0.3, by = 0.01)) |
|
161 |
+ pN <- seq(0.05, 0.3, by = 0.05) |
|
162 |
+ ## Remove pK values with too few cells |
|
163 |
+ min.cells <- round(nrow(seu[[]]) / (1 - 0.05) - nrow(seu[[]])) |
|
164 |
+ pK.test <- round(pK * min.cells) |
|
165 |
+ pK <- pK[which(pK.test >= 1)] |
|
166 |
+ |
|
167 |
+ ## Extract pre-processing parameters from original data analysis workflow |
|
168 |
+ orig.commands <- seu@commands |
|
169 |
+ |
|
170 |
+ ## Down-sample cells to 10000 (when applicable) for computational effiency |
|
171 |
+ if (nrow(seu[[]]) > 10000) { |
|
172 |
+ real.cells <- rownames(seu[[]])[sample(seq(nrow(seu[[]])), |
|
173 |
+ 10000, replace = FALSE)] |
|
174 |
+ data <- Seurat::GetAssayData(seu, slot = "counts", |
|
175 |
+ assay = "RNA")[, real.cells] |
|
176 |
+ n.real.cells <- ncol(data) |
|
177 |
+ } |
|
178 |
+ |
|
179 |
+ if (ncol(seu) <= 10000) { |
|
180 |
+ real.cells <- colnames(seu) |
|
181 |
+ data <- Seurat::GetAssayData(seu, slot = "counts", assay = "RNA") |
|
182 |
+ n.real.cells <- ncol(data) |
|
183 |
+ } |
|
184 |
+ ## Iterate through pN, computing pANN vectors at varying pK |
|
185 |
+ # no_cores <- detectCores()-1 |
|
186 |
+ if (is.null(num.cores)) { |
|
187 |
+ num.cores <- 1 |
|
188 |
+ } |
|
189 |
+ |
|
190 |
+ if (num.cores > 1) { |
|
191 |
+ cl <- parallel::makeCluster(num.cores) |
|
192 |
+ |
|
193 |
+ output2 <- withr::with_seed( |
|
194 |
+ seed, |
|
195 |
+ parallel::mclapply(as.list(seq_along(pN)), |
|
196 |
+ FUN = .parallel_paramSweep, |
|
197 |
+ n.real.cells, |
|
198 |
+ real.cells, |
|
199 |
+ pK, |
|
200 |
+ pN, |
|
201 |
+ data, |
|
202 |
+ orig.commands, |
|
203 |
+ PCs, |
|
204 |
+ sct, mc.cores = num.cores, |
|
205 |
+ verbose = verbose, |
|
206 |
+ seed = seed, |
|
207 |
+ mc.set.seed = FALSE |
|
208 |
+ ) |
|
209 |
+ ) |
|
210 |
+ parallel::stopCluster(cl) |
|
211 |
+ } else { |
|
212 |
+ output2 <- lapply(as.list(seq_along(pN)), |
|
213 |
+ FUN = .parallel_paramSweep, |
|
214 |
+ n.real.cells, |
|
215 |
+ real.cells, |
|
216 |
+ pK, |
|
217 |
+ pN, |
|
218 |
+ data, |
|
219 |
+ orig.commands, |
|
220 |
+ PCs, |
|
221 |
+ sct, |
|
222 |
+ seed = seed, |
|
223 |
+ verbose = verbose |
|
224 |
+ ) |
|
225 |
+ } |
|
226 |
+ |
|
227 |
+ ## Write parallelized output into list |
|
228 |
+ sweep.res.list <- list() |
|
229 |
+ list.ind <- 0 |
|
230 |
+ for (i in seq_along(output2)) { |
|
231 |
+ for (j in seq_along(output2[[i]])) { |
|
232 |
+ list.ind <- list.ind + 1 |
|
233 |
+ sweep.res.list[[list.ind]] <- output2[[i]][[j]] |
|
236 | 234 |
} |
237 |
- names(sweep.res.list) <- name.vec |
|
238 |
- return(sweep.res.list) |
|
239 |
- |
|
235 |
+ } |
|
236 |
+ ## Assign names to list of results |
|
237 |
+ name.vec <- NULL |
|
238 |
+ for (j in seq_along(pN)) { |
|
239 |
+ name.vec <- c(name.vec, paste("pN", pN[j], "pK", pK, sep = "_")) |
|
240 |
+ } |
|
241 |
+ names(sweep.res.list) <- name.vec |
|
242 |
+ return(sweep.res.list) |
|
243 |
+ |
|
240 | 244 |
} |
241 | 245 |
|
242 | 246 |
.runDoubletFinder <- function(counts, seuratPcs, seuratRes, formationRate, |
243 | 247 |
seuratNfeatures, verbose = FALSE, |
244 | 248 |
nCores = NULL, seed = 12345) { |
245 |
- |
|
249 |
+ |
|
246 | 250 |
## Convert to sparse matrix if not already in that format |
247 | 251 |
counts <- .convertToMatrix(counts) |
248 | 252 |
colnames(counts) <- gsub("_", "-", colnames(counts)) |
249 |
- |
|
253 |
+ |
|
250 | 254 |
seurat <- suppressWarnings(Seurat::CreateSeuratObject( |
251 | 255 |
counts = counts, |
252 | 256 |
project = "seurat", min.features = 0 |
... | ... |
@@ -257,51 +261,53 @@ |
257 | 261 |
verbose = verbose |
258 | 262 |
) |
259 | 263 |
seurat <- Seurat::FindVariableFeatures(seurat, |
260 |
- selection.method = "vst", |
|
261 |
- nfeatures = seuratNfeatures, |
|
262 |
- verbose = verbose |
|
264 |
+ selection.method = "vst", |
|
265 |
+ nfeatures = seuratNfeatures, |
|
266 |
+ verbose = verbose |
|
263 | 267 |
) |
264 |
- |
|
268 |
+ |
|
265 | 269 |
allGenes <- rownames(seurat) |
266 | 270 |
seurat <- Seurat::ScaleData(seurat, features = allGenes, verbose = verbose) |
267 |
- |
|
268 |
- numPc <- min(ncol(seurat@assays$RNA@scale.data) - 1, 50) |
|
271 |
+ |
|
272 |
+ numPc <- min(nrow(Seurat::GetAssayData(seurat, slot = "scale.data", |
|
273 |
+ assay = "RNA")) - 1, |
|
274 |
+ 50) |
|
269 | 275 |
seurat <- Seurat::RunPCA(seurat, |
270 |
- features = |
|
271 |
- Seurat::VariableFeatures(object = seurat), |
|
272 |
- npcs = numPc, verbose = verbose, seed.use = seed |
|
276 |
+ features = |
|
277 |
+ Seurat::VariableFeatures(object = seurat), |
|
278 |
+ npcs = numPc, verbose = verbose, seed.use = seed |
|
273 | 279 |
) |
274 | 280 |
seurat <- Seurat::FindNeighbors(seurat, dims = seuratPcs, verbose = verbose) |
275 | 281 |
seurat <- Seurat::FindClusters(seurat, |
276 |
- resolution = seuratRes, |
|
277 |
- verbose = verbose, |
|
278 |
- random.seed = seed |
|
282 |
+ resolution = seuratRes, |
|
283 |
+ verbose = verbose, |
|
284 |
+ random.seed = seed |
|
279 | 285 |
) |
280 | 286 |
invisible(sweepResListSeurat <- .paramSweep(seurat, |
281 |
- PCs = seuratPcs, |
|
282 |
- sct = FALSE, |
|
283 |
- num.cores = nCores, |
|
284 |
- verbose = verbose, |
|
285 |
- seed = seed |
|
287 |
+ PCs = seuratPcs, |
|
288 |
+ sct = FALSE, |
|
289 |
+ num.cores = nCores, |
|
290 |
+ verbose = verbose, |
|
291 |
+ seed = seed |
|
286 | 292 |
)) |
287 | 293 |
sweepStatsSeurat <- .summarizeSweep(sweepResListSeurat, |
288 |
- GT = FALSE |
|
294 |
+ GT = FALSE |
|
289 | 295 |
) |
290 | 296 |
bcmvnSeurat <- .find.pK(sweepStatsSeurat) |
291 | 297 |
pkOptimal <- as.numeric(as.matrix(bcmvnSeurat$pK[ |
292 | 298 |
which.max(bcmvnSeurat$MeanBC) |
293 | 299 |
])) |
294 |
- annotations <- seurat@meta.data$seurat_clusters |
|
295 |
- homotypicProp <- .modelHomotypic(annotations) |
|
296 |
- nExpPoi <- round(formationRate * ncol(seurat@assays$RNA)) |
|
300 |
+ #annotations <- seurat[[]]$seurat_clusters |
|
301 |
+ #homotypicProp <- .modelHomotypic(annotations) |
|
302 |
+ nExpPoi <- round(formationRate * ncol(seurat)) |
|
297 | 303 |
seurat <- .doubletFinder_v3(seurat, |
298 |
- PCs = seuratPcs, |
|
299 |
- pN = 0.25, |
|
300 |
- pK = pkOptimal, |
|
301 |
- nExp = nExpPoi, |
|
302 |
- reuse.pANN = FALSE, |
|
303 |
- sct = FALSE, |
|
304 |
- verbose = FALSE |
|
304 |
+ PCs = seuratPcs, |
|
305 |
+ pN = 0.25, |
|
306 |
+ pK = pkOptimal, |
|
307 |
+ nExp = nExpPoi, |
|
308 |
+ reuse.pANN = FALSE, |
|
309 |
+ sct = FALSE, |
|
310 |
+ verbose = FALSE |
|
305 | 311 |
) |
306 | 312 |
names(seurat@meta.data)[6] <- "doubletFinderAnnScore" |
307 | 313 |
names(seurat@meta.data)[7] <- "doubletFinderLabel" |
... | ... |
@@ -311,34 +317,39 @@ |
311 | 317 |
#' @title Generates a doublet score for each cell via doubletFinder |
312 | 318 |
#' @description Uses doubletFinder to determine cells within the dataset |
313 | 319 |
#' suspected to be doublets. |
314 |
-#' @param inSCE Input SingleCellExperiment object. Must contain a counts matrix |
|
315 |
-#' @param useAssay Indicate which assay to use. Default "counts". |
|
316 |
-#' @param sample Numeric vector. Each cell will be assigned a sample number. |
|
317 |
-#' @param seed Seed for the random number generator. Default 12345. |
|
320 |
+#' @param inSCE inSCE A \linkS4class{SingleCellExperiment} object. |
|
321 |
+#' @param sample Character vector or colData variable name. Indicates which |
|
322 |
+#' sample each cell belongs to. Default \code{NULL}. |
|
323 |
+#' @param useAssay A string specifying which assay in the SCE to use. Default |
|
324 |
+#' \code{"counts"}. |
|
325 |
+#' @param seed Seed for the random number generator, can be set to \code{NULL}. |
|
326 |
+#' Default \code{12345}. |
|
318 | 327 |
#' @param seuratNfeatures Integer. Number of highly variable genes to use. |
319 |
-#' Default 2000. |
|
328 |
+#' Default \code{2000}. |
|
320 | 329 |
#' @param seuratPcs Numeric vector. The PCs used in seurat function to |
321 |
-#' determine number of clusters. Default 1:15. |
|
322 |
-#' @param seuratRes Numeric vector. The resolution parameter used in seurat, |
|
323 |
-#' which adjusts the number of clusters determined via the algorithm. |
|
324 |
-#' Default 1.5. |
|
325 |
-#' @param formationRate Doublet formation rate used within algorithm. |
|
326 |
-#' Default 0.075. |
|
327 |
-#' @param nCores Number of cores used for running the function. |
|
328 |
-#' @param verbose Boolean. Wheter to print messages from Seurat and |
|
329 |
-#' DoubletFinder. Default FALSE. |
|
330 |
-#' @return SingleCellExperiment object containing the |
|
331 |
-#' 'doublet_finder_doublet_score'. |
|
330 |
+#' determine number of clusters. Default \code{1:15}. |
|
331 |
+#' @param seuratRes Numeric vector. The resolution parameter used in Seurat, |
|
332 |
+#' which adjusts the number of clusters determined via the algorithm. Default |
|
333 |
+#' \code{1.5}. |
|
334 |
+#' @param formationRate Doublet formation rate used within algorithm. Default |
|
335 |
+#' \code{0.075}. |
|
336 |
+#' @param nCores Number of cores used for running the function. Default |
|
337 |
+#' \code{NULL}. |
|
338 |
+#' @param verbose Boolean. Wheter to print messages from Seurat and |
|
339 |
+#' DoubletFinder. Default \code{FALSE}. |
|
340 |
+#' @return \linkS4class{SingleCellExperiment} object containing the |
|
341 |
+#' \code{doublet_finder_doublet_score} variable in \code{colData} slot. |
|
342 |
+#' @seealso \code{\link{runCellQC}}, \code{\link{plotDoubletFinderResults}} |
|
332 | 343 |
#' @examples |
333 | 344 |
#' data(scExample, package = "singleCellTK") |
334 | 345 |
#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
335 | 346 |
#' sce <- runDoubletFinder(sce) |
336 | 347 |
#' @export |
337 |
-#' @importFrom SummarizedExperiment colData colData<- |
|
348 |
+#' @importFrom SummarizedExperiment colData colData<- assayNames assayNames<- |
|
338 | 349 |
#' @importFrom SingleCellExperiment reducedDim<- |
339 | 350 |
runDoubletFinder <- function(inSCE, |
340 |
- useAssay = "counts", |
|
341 | 351 |
sample = NULL, |
352 |
+ useAssay = "counts", |
|
342 | 353 |
seed = 12345, |
343 | 354 |
seuratNfeatures = 2000, |
344 | 355 |
seuratPcs = seq(15), |
... | ... |
@@ -346,28 +357,25 @@ runDoubletFinder <- function(inSCE, |
346 | 357 |
formationRate = 0.075, |
347 | 358 |
nCores = NULL, |
348 | 359 |
verbose = FALSE) { |
349 |
- |
|
350 |
- # argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
|
360 |
+ |
|
351 | 361 |
argsList <- mget(names(formals()), sys.frame(sys.nframe())) |
352 |
- |
|
362 |
+ argsList <- argsList[!names(argsList) %in% c("inSCE")] |
|
363 |
+ argsList$packageVersion <- "2.0.2" |
|
353 | 364 |
tempSCE <- inSCE |
354 |
- SummarizedExperiment::assayNames(inSCE)[which(useAssay %in% SummarizedExperiment::assayNames(inSCE))] <- "counts" |
|
355 |
- useAssay <- "counts" |
|
356 |
- |
|
357 |
- if (!is.null(sample)) { |
|
358 |
- if (length(sample) != ncol(inSCE)) { |
|
359 |
- stop("'sample' must be the same length as the number of columns in 'inSCE'") |
|
360 |
- } |
|
361 |
- } else { |
|
365 |
+ #assayNames(inSCE)[which(useAssay %in% assayNames(inSCE))] <- "counts" |
|
366 |
+ #useAssay <- "counts" |
|
367 |
+ |
|
368 |
+ sample <- .manageCellVar(inSCE, var = sample) |
|
369 |
+ if (is.null(sample)) { |
|
362 | 370 |
sample <- rep(1, ncol(inSCE)) |
363 | 371 |
} |
364 |
- |
|
372 |
+ |
|
365 | 373 |
message(date(), " ... Running 'doubletFinder'") |
366 |
- |
|
367 |
- doubletScore <- rep(NA, ncol(inSCE)) |
|
368 |
- doubletLabel <- rep(NA, ncol(inSCE)) |
|
369 |
- allSampleNumbers <- sort(unique(sample)) |
|
370 |
- |
|
374 |
+ |
|
375 |
+ #doubletScore <- rep(NA, ncol(inSCE)) |
|
376 |
+ #doubletLabel <- rep(NA, ncol(inSCE)) |
|
377 |
+ #allSampleNumbers <- sort(unique(sample)) |
|
378 |
+ |
|
371 | 379 |
for (res in seuratRes) { |
372 | 380 |
output <- S4Vectors::DataFrame( |
373 | 381 |
row.names = colnames(inSCE), |
... | ... |
@@ -378,21 +386,19 @@ runDoubletFinder <- function(inSCE, |
378 | 386 |
nrow = ncol(inSCE)) |
379 | 387 |
rownames(umapDims) = colnames(inSCE) |
380 | 388 |
colnames(umapDims) = c("UMAP_1", "UMAP_2") |
381 |
- |
|
389 |
+ |
|
382 | 390 |
## Loop through each sample and run doubletFinder |
383 | 391 |
samples <- unique(sample) |
384 |
- |
|
385 |
- for (i in seq_len(length(samples))) { |
|
386 |
- sceSampleInd <- sample == samples[i] |
|
392 |
+ |
|
393 |
+ for (s in samples) { |
|
394 |
+ sceSampleInd <- sample == s |
|
387 | 395 |
sceSample <- inSCE[, sceSampleInd] |
388 | 396 |
mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
389 |
- if(length(seuratPcs) > ncol(mat)){ |
|
397 |
+ if (length(seuratPcs) > ncol(mat)) { |
|
390 | 398 |
seuratPcs <- seq(ncol(mat)) |
391 | 399 |
} |
392 |
- |
|
393 |
- result <- suppressMessages(withr::with_seed( |
|
394 |
- seed, |
|
395 |
- .runDoubletFinder( |
|
400 |
+ .withSeed(seed, { |
|
401 |
+ result <- .runDoubletFinder( |
|
396 | 402 |
counts = mat, |
397 | 403 |
seuratPcs = seuratPcs, |
398 | 404 |
seuratRes = res, |
... | ... |
@@ -402,34 +408,39 @@ runDoubletFinder <- function(inSCE, |
402 | 408 |
verbose = verbose, |
403 | 409 |
seed = seed |
404 | 410 |
) |
405 |
- )) |
|
411 |
+ }) |
|
412 |
+ |
|
406 | 413 |
result <- suppressMessages(Seurat::RunUMAP(result, |
407 | 414 |
dims = seq(10), |
408 | 415 |
verbose = verbose, |
409 | 416 |
umap.method = "uwot")) |
410 |
- |
|
417 |
+ |
|
411 | 418 |
seuratDims <- Seurat::Embeddings(result, reduction = "umap") |
412 | 419 |
umapDims[sceSampleInd, 1] <- seuratDims[,1] |
413 | 420 |
umapDims[sceSampleInd, 2] <- seuratDims[,2] |
414 |
- output[sceSampleInd, 1] <- result@meta.data$doubletFinderAnnScore |
|
415 |
- output[sceSampleInd, 2] <- result@meta.data$doubletFinderLabel |
|
421 |
+ output[sceSampleInd, 1] <- result[[]]$doubletFinderAnnScore |
|
422 |
+ output[sceSampleInd, 2] <- result[[]]$doubletFinderLabel |
|
423 |
+ |
|
424 |
+ if (!identical(samples, 1)) { |
|
425 |
+ metadata(inSCE)$sctk$runDoubletFinder[[s]] <- argsList |
|
426 |
+ } |
|
427 |
+ } |
|
428 |
+ if (identical(samples, 1)) { |
|
429 |
+ metadata(inSCE)$sctk$runDoubletFinder$all_cells <- argsList |
|
416 | 430 |
} |
417 |
- |
|
418 | 431 |
colData(inSCE)[, paste0(colnames(output), "_resolution_", res)] <- NULL |
419 |
- |
|
432 |
+ |
|
420 | 433 |
colnames(output) <- paste0(colnames(output), "_resolution_", res) |
421 | 434 |
output[,2] <- factor(output[,2], levels = c("Singlet", "Doublet")) |
422 |
- argsList <- argsList[!names(argsList) %in% ("...")] |
|
423 |
- inSCE@metadata$runDoubletFinder <- argsList[-1] |
|
424 |
- inSCE@metadata$runDoubletFinder$packageVersion <- "2.0.2" |
|
435 |
+ |
|
425 | 436 |
colData(inSCE) <- cbind(colData(inSCE), output) |
426 | 437 |
reducedDim(inSCE,'doubletFinder_UMAP') <- umapDims |
427 | 438 |
} |
428 |
- |
|
429 |
- tempSCE@colData <- inSCE@colData |
|
430 |
- tempSCE@metadata <- inSCE@metadata |
|
439 |
+ |
|
440 |
+ colData(tempSCE) <- colData(inSCE) |
|
441 |
+ S4Vectors::metadata(tempSCE) <- S4Vectors::metadata(inSCE) |
|
431 | 442 |
reducedDims(tempSCE) <- reducedDims(inSCE) |
432 |
- |
|
443 |
+ |
|
433 | 444 |
return(tempSCE) |
434 | 445 |
} |
435 | 446 |
|
... | ... |
@@ -438,54 +449,63 @@ runDoubletFinder <- function(inSCE, |
438 | 449 |
#require(KernSmooth); require(ROCR) |
439 | 450 |
## Set pN-pK param sweep ranges |
440 | 451 |
name.vec <- names(sweep.list) |
441 |
- name.vec <- unlist(strsplit(name.vec, split="pN_")) |
|
442 |
- name.vec <- name.vec[seq(2, length(name.vec), by=2)] |
|
443 |
- name.vec <- unlist(strsplit(name.vec, split="_pK_")) |
|
444 |
- pN <- as.numeric(unique(name.vec[seq(1, length(name.vec), by=2)])) |
|
445 |
- pK <- as.numeric(unique(name.vec[seq(2, length(name.vec), by=2)])) |
|
446 |
- |
|
447 |
- ## Initialize data structure w/ or w/o AUC column, depending on whether ground-truth doublet classifications are available |
|
452 |
+ name.vec <- unlist(strsplit(name.vec, split = "pN_")) |
|
453 |
+ name.vec <- name.vec[seq(2, length(name.vec), by = 2)] |
|
454 |
+ name.vec <- unlist(strsplit(name.vec, split = "_pK_")) |
|
455 |
+ pN <- as.numeric(unique(name.vec[seq(1, length(name.vec), by = 2)])) |
|
456 |
+ pK <- as.numeric(unique(name.vec[seq(2, length(name.vec), by = 2)])) |
|
457 |
+ |
|
458 |
+ ## Initialize data structure w/ or w/o AUC column, depending on whether |
|
459 |
+ ## ground-truth doublet classifications are available |
|
448 | 460 |
if (GT == TRUE) { |
449 |
- sweep.stats <- as.data.frame(matrix(0L, nrow=length(sweep.list), ncol=4)) |
|
461 |
+ sweep.stats <- as.data.frame(matrix(0L, nrow = length(sweep.list), |
|
462 |
+ ncol = 4)) |
|
450 | 463 |
colnames(sweep.stats) <- c("pN","pK","AUC","BCreal") |
451 |
- sweep.stats$pN <- factor(rep(pN, each=length(pK), levels = pN)) |
|
464 |
+ sweep.stats$pN <- factor(rep(pN, each = length(pK), levels = pN)) |
|
452 | 465 |
sweep.stats$pK <- factor(rep(pK, length(pN),levels = pK)) |
453 | 466 |
} |
454 |
- |
|
467 |
+ |
|
455 | 468 |
if (GT == FALSE) { |
456 |
- sweep.stats <- as.data.frame(matrix(0L, nrow=length(sweep.list), ncol=3)) |
|
469 |
+ sweep.stats <- as.data.frame(matrix(0L, nrow = length(sweep.list), |
|
470 |
+ ncol = 3)) |
|
457 | 471 |
colnames(sweep.stats) <- c("pN","pK","BCreal") |
458 |
- sweep.stats$pN <- factor(rep(pN, each=length(pK), levels = pN)) |
|
472 |
+ sweep.stats$pN <- factor(rep(pN, each = length(pK), levels = pN)) |
|
459 | 473 |
sweep.stats$pK <- factor(rep(pK, length(pN),levels = pK)) |
460 | 474 |
} |
461 |
- |
|
475 |
+ |
|
462 | 476 |
## Perform pN-pK parameter sweep summary |
463 | 477 |
for (i in seq_along(sweep.list)) { |
464 | 478 |
res.temp <- sweep.list[[i]] |
465 |
- |
|
466 |
- ## Use gaussian kernel density estimation of pANN vector to compute bimodality coefficient |
|
467 |
- gkde <- stats::approxfun(KernSmooth::bkde(res.temp$pANN, kernel="normal")) |
|
468 |
- x <- seq(from=min(res.temp$pANN), to=max(res.temp$pANN), length.out=nrow(res.temp)) |
|
479 |
+ |
|
480 |
+ ## Use gaussian kernel density estimation of pANN vector to compute |
|
481 |
+ ## bimodality coefficient |
|
482 |
+ gkde <- stats::approxfun(KernSmooth::bkde(res.temp$pANN, kernel = "normal")) |
|
483 |
+ x <- seq(from = min(res.temp$pANN), to = max(res.temp$pANN), |
|
484 |
+ length.out = nrow(res.temp)) |
|
469 | 485 |
sweep.stats$BCreal[i] <- .bimodality_coefficient(gkde(x)) |
470 |
- |
|
486 |
+ |
|
471 | 487 |
if (GT == FALSE) { next } |
472 |
- |
|
473 |
- ## If ground-truth doublet classifications are available, perform ROC analysis on logistic |
|
474 |
- ## regression model trained using pANN vector |
|
475 |
- meta <- as.data.frame(matrix(0L, nrow=nrow(res.temp), ncol=2)) |
|
488 |
+ |
|
489 |
+ ## If ground-truth doublet classifications are available, perform ROC |
|
490 |
+ ## analysis on logistic regression model trained using pANN vector |
|
491 |
+ meta <- as.data.frame(matrix(0L, nrow = nrow(res.temp), ncol = 2)) |
|
476 | 492 |
meta[,1] <- GT.calls |
477 | 493 |
meta[,2] <- res.temp$pANN |
478 |
- train.ind <- sample(seq(nrow(meta)), round(nrow(meta)/2), replace=FALSE) |
|
494 |
+ train.ind <- sample(seq(nrow(meta)), round(nrow(meta)/2), replace = FALSE) |
|
479 | 495 |
test.ind <- seq(nrow(meta))[-train.ind] |
480 | 496 |
colnames(meta) <- c("SinDub","pANN") |
481 | 497 |
meta$SinDub <- factor(meta$SinDub, levels = c("Singlet","Doublet")) |
482 |
- model.lm <- stats::glm(SinDub ~ pANN, family=stats::binomial(link='logit'), data=meta, subset=train.ind) |
|
483 |
- prob <- stats::predict(model.lm, newdata=meta[test.ind, ], type="response") |
|
484 |
- ROCpred <- ROCR::prediction(predictions=prob, labels=meta$SinDub[test.ind]) |
|
485 |
- perf.auc <- ROCR::performance(ROCpred, measure="auc") |
|
498 |
+ model.lm <- stats::glm(SinDub ~ pANN, |
|
499 |
+ family = stats::binomial(link = 'logit'), |
|
500 |
+ data = meta, subset = train.ind) |
|
501 |
+ prob <- stats::predict(model.lm, newdata = meta[test.ind, ], |
|
502 |
+ type = "response") |
|
503 |
+ ROCpred <- ROCR::prediction(predictions = prob, |
|
504 |
+ labels = meta$SinDub[test.ind]) |
|
505 |
+ perf.auc <- ROCR::performance(ROCpred, measure = "auc") |
|
486 | 506 |
sweep.stats$AUC[i] <- perf.auc@y.values[[1]] |
487 | 507 |
} |
488 |
- |
|
508 |
+ |
|
489 | 509 |
return(sweep.stats) |
490 | 510 |
} |
491 | 511 |
|
... | ... |
@@ -495,21 +515,21 @@ runDoubletFinder <- function(inSCE, |
495 | 515 |
sample.excess.kurtosis <- .kurtosis(x) |
496 | 516 |
K <- sample.excess.kurtosis |
497 | 517 |
n <- length(x) |
498 |
- B <- ((G^2)+1)/(K+((3*((n-1)^2))/((n-2)*(n-3)))) |
|
518 |
+ B <- ((G^2) + 1)/(K + ((3*((n - 1)^2))/((n - 2)*(n - 3)))) |
|
499 | 519 |
return(B) |
500 | 520 |
} |
501 | 521 |
|
502 | 522 |
.skewness <- function(x) { |
503 | 523 |
n <- length(x) |
504 |
- S <- (1/n)*sum((x-mean(x))^3)/(((1/n)*sum((x-mean(x))^2))^1.5) |
|
505 |
- S <- S*(sqrt(n*(n-1)))/(n-2) |
|
524 |
+ S <- (1/n)*sum((x - mean(x))^3)/(((1/n)*sum((x - mean(x))^2))^1.5) |
|
525 |
+ S <- S*(sqrt(n*(n - 1)))/(n - 2) |
|
506 | 526 |
return(S) |
507 | 527 |
} |
508 | 528 |
|
509 |
-.kurtosis <- function (x) { |
|
529 |
+.kurtosis <- function(x) { |
|
510 | 530 |
n <- length(x) |
511 |
- K <- (1/n)*sum((x-mean(x))^4)/(((1/n)*sum((x-mean(x))^2))^2)-3 |
|
512 |
- K <- ((n - 1)*((n+1)*K-3*(n-1))/((n-2)*(n-3)))+3 |
|
531 |
+ K <- (1/n)*sum((x - mean(x))^4)/(((1/n)*sum((x - mean(x))^2))^2) - 3 |
|
532 |
+ K <- ((n - 1)*((n + 1)*K - 3*(n - 1))/((n - 2)*(n - 3))) + 3 |
|
513 | 533 |
return(K) |
514 | 534 |
} |
515 | 535 |
|
... | ... |
@@ -520,37 +540,43 @@ runDoubletFinder <- function(inSCE, |
520 | 540 |
} |
521 | 541 |
|
522 | 542 |
.find.pK <- function(sweep.stats) { |
523 |
- |
|
543 |
+ |
|
524 | 544 |
## Implementation for data without ground-truth doublet classifications |
525 | 545 |
'%ni%' <- Negate('%in%') |
526 | 546 |
if ("AUC" %ni% colnames(sweep.stats) == TRUE) { |
527 | 547 |
## Initialize data structure for results storage |
528 |
- bc.mvn <- as.data.frame(matrix(0L, nrow=length(unique(sweep.stats$pK)), ncol=5)) |
|
548 |
+ bc.mvn <- as.data.frame(matrix(0L, nrow = length(unique(sweep.stats$pK)), |
|
549 |
+ ncol = 5)) |
|
529 | 550 |
colnames(bc.mvn) <- c("ParamID","pK","MeanBC","VarBC","BCmetric") |
530 | 551 |
bc.mvn$pK <- unique(sweep.stats$pK) |
531 | 552 |
bc.mvn$ParamID <- seq(nrow(bc.mvn)) |
532 |
- |
|
533 |
- ## Compute bimodality coefficient mean, variance, and BCmvn across pN-pK sweep results |
|
553 |
+ |
|
554 |
+ ## Compute bimodality coefficient mean, variance, and BCmvn across pN-pK |
|
555 |
+ ## sweep results |
|
534 | 556 |
x <- 0 |
535 | 557 |
for (i in unique(bc.mvn$pK)) { |
536 | 558 |
x <- x + 1 |
537 | 559 |
ind <- which(sweep.stats$pK == i) |
538 | 560 |
bc.mvn$MeanBC[x] <- mean(sweep.stats[ind, "BCreal"]) |
539 | 561 |
bc.mvn$VarBC[x] <- stats::sd(sweep.stats[ind, "BCreal"])^2 |
540 |
- bc.mvn$BCmetric[x] <- mean(sweep.stats[ind, "BCreal"])/(stats::sd(sweep.stats[ind, "BCreal"])^2) |
|
562 |
+ bc.mvn$BCmetric[x] <- mean(sweep.stats[ind, "BCreal"]) / |
|
563 |
+ (stats::sd(sweep.stats[ind, "BCreal"])^2) |
|
541 | 564 |
} |
542 | 565 |
return(bc.mvn) |
543 | 566 |
} |
544 |
- |
|
545 |
- ## Implementation for data with ground-truth doublet classifications (e.g., MULTI-seq, CellHashing, Demuxlet, etc.) |
|
567 |
+ |
|
568 |
+ ## Implementation for data with ground-truth doublet classifications (e.g., |
|
569 |
+ ## MULTI-seq, CellHashing, Demuxlet, etc.) |
|
546 | 570 |
if ("AUC" %in% colnames(sweep.stats) == TRUE) { |
547 | 571 |
## Initialize data structure for results storage |
548 |
- bc.mvn <- as.data.frame(matrix(0L, nrow=length(unique(sweep.stats$pK)), ncol=6)) |
|
572 |
+ bc.mvn <- as.data.frame(matrix(0L, nrow = length(unique(sweep.stats$pK)), |
|
573 |
+ ncol = 6)) |
|
549 | 574 |
colnames(bc.mvn) <- c("ParamID","pK","MeanAUC","MeanBC","VarBC","BCmetric") |
550 | 575 |
bc.mvn$pK <- unique(sweep.stats$pK) |
551 | 576 |
bc.mvn$ParamID <- seq(nrow(bc.mvn)) |
552 |
- |
|
553 |
- ## Compute bimodality coefficient mean, variance, and BCmvn across pN-pK sweep results |
|
577 |
+ |
|
578 |
+ ## Compute bimodality coefficient mean, variance, and BCmvn across pN-pK |
|
579 |
+ ## sweep results |
|
554 | 580 |
x <- 0 |
555 | 581 |
for (i in unique(bc.mvn$pK)) { |
556 | 582 |
x <- x + 1 |
... | ... |
@@ -558,29 +584,34 @@ runDoubletFinder <- function(inSCE, |
558 | 584 |
bc.mvn$MeanAUC[x] <- mean(sweep.stats[ind, "AUC"]) |
559 | 585 |
bc.mvn$MeanBC[x] <- mean(sweep.stats[ind, "BCreal"]) |
560 | 586 |
bc.mvn$VarBC[x] <- stats::sd(sweep.stats[ind, "BCreal"])^2 |
561 |
- bc.mvn$BCmetric[x] <- mean(sweep.stats[ind, "BCreal"])/(stats::sd(sweep.stats[ind, "BCreal"])^2) |
|
587 |
+ bc.mvn$BCmetric[x] <- mean(sweep.stats[ind, "BCreal"]) / |
|
588 |
+ (stats::sd(sweep.stats[ind, "BCreal"])^2) |
|
562 | 589 |
} |
563 |
- |
|
590 |
+ |
|
564 | 591 |
return(bc.mvn) |
565 |
- |
|
592 |
+ |
|
566 | 593 |
} |
567 | 594 |
} |
568 | 595 |
|
569 |
-.doubletFinder_v3 <- function(seu, PCs, pN = 0.25, pK, nExp, reuse.pANN = FALSE, sct = FALSE, verbose = FALSE) { |
|
570 |
- |
|
571 |
- ## Generate new list of doublet classificatons from existing pANN vector to save time |
|
596 |
+.doubletFinder_v3 <- function(seu, PCs, pN = 0.25, pK, nExp, reuse.pANN = FALSE, |
|
597 |
+ sct = FALSE, verbose = FALSE) { |
|
598 |
+ |
|
599 |
+ ## Generate new list of doublet classificatons from existing pANN vector to |
|
600 |
+ ## save time |
|
572 | 601 |
if (reuse.pANN != FALSE ) { |
573 |
- pANN.old <- seu@meta.data[ , reuse.pANN] |
|
602 |
+ pANN.old <- seu[[]][ , reuse.pANN] |
|
574 | 603 |
classifications <- rep("Singlet", length(pANN.old)) |
575 |
- classifications[order(pANN.old, decreasing=TRUE)[seq(nExp)]] <- "Doublet" |
|
576 |
- seu@meta.data[, paste("DF.classifications",pN,pK,nExp,sep="_")] <- classifications |
|
604 |
+ classifications[order(pANN.old, decreasing = TRUE)[seq(nExp)]] <- "Doublet" |
|
605 |
+ seu[[]][, paste("DF.classifications", pN, pK, nExp, sep = "_")] <- |
|
606 |
+ classifications |
|
577 | 607 |
return(seu) |
578 | 608 |
} |
579 |
- |
|
609 |
+ |
|
580 | 610 |
if (reuse.pANN == FALSE) { |
581 | 611 |
## Make merged real-artifical data |
582 |
- real.cells <- rownames(seu@meta.data) |
|
583 |
- data <- seu@assays$RNA@counts[, real.cells] |
|
612 |
+ real.cells <- colnames(seu) |
|
613 |
+ data <- Seurat::GetAssayData(seu, slot = "counts", |
|
614 |
+ assay = "RNA")[, real.cells] |
|
584 | 615 |
n_real.cells <- length(real.cells) |
585 | 616 |
n_doublets <- round(n_real.cells/(1 - pN) - n_real.cells) |
586 | 617 |
if (verbose) { |
... | ... |
@@ -591,71 +622,81 @@ runDoubletFinder <- function(inSCE, |
591 | 622 |
doublets <- (data[, real.cells1] + data[, real.cells2])/2 |
592 | 623 |
colnames(doublets) <- paste("X", seq(n_doublets), sep = "") |
593 | 624 |
data_wdoublets <- cbind(data, doublets) |
594 |
- |
|
625 |
+ |
|
595 | 626 |
## Store important pre-processing information |
596 | 627 |
orig.commands <- seu@commands |
597 |
- |
|
628 |
+ |
|
598 | 629 |
## Pre-process Seurat object |
599 | 630 |
if (sct == FALSE) { |
600 | 631 |
seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
601 |
- |
|
602 |
- seu_wdoublets <- Seurat::NormalizeData(seu_wdoublets, |
|
603 |
- normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method, |
|
604 |
- scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor, |
|
605 |
- margin = orig.commands$NormalizeData.RNA@params$margin) |
|
606 |
- |
|
607 |
- seu_wdoublets <- Seurat::FindVariableFeatures(seu_wdoublets, |
|
608 |
- selection.method = orig.commands$FindVariableFeatures.RNA$selection.method, |
|
609 |
- loess.span = orig.commands$FindVariableFeatures.RNA$loess.span, |
|
610 |
- clip.max = orig.commands$FindVariableFeatures.RNA$clip.max, |
|
611 |
- mean.function = orig.commands$FindVariableFeatures.RNA$mean.function, |
|
612 |
- dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function, |
|
613 |
- num.bin = orig.commands$FindVariableFeatures.RNA$num.bin, |
|
614 |
- binning.method = orig.commands$FindVariableFeatures.RNA$binning.method, |
|
615 |
- nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures, |
|
616 |
- mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff, |
|
617 |
- dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff) |
|
618 |
- |
|
619 |
- seu_wdoublets <- Seurat::ScaleData(seu_wdoublets, |
|
620 |
- features = orig.commands$ScaleData.RNA$features, |
|
621 |
- model.use = orig.commands$ScaleData.RNA$model.use, |
|
622 |
- do.scale = orig.commands$ScaleData.RNA$do.scale, |
|
623 |
- do.center = orig.commands$ScaleData.RNA$do.center, |
|
624 |
- scale.max = orig.commands$ScaleData.RNA$scale.max, |
|
625 |
- block.size = orig.commands$ScaleData.RNA$block.size, |
|
626 |
- min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block) |
|
627 |
- |
|
628 |
- seu_wdoublets <- Seurat::RunPCA(seu_wdoublets, |
|
629 |
- features = orig.commands$ScaleData.RNA$features, |
|
630 |
- npcs = length(PCs), |
|
631 |
- rev.pca = orig.commands$RunPCA.RNA$rev.pca, |
|
632 |
- weight.by.var = orig.commands$RunPCA.RNA$weight.by.var, |
|
633 |
- verbose=FALSE) |
|
634 |
- pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[ , PCs] |
|
635 |
- cell.names <- rownames(seu_wdoublets@meta.data) |
|
632 |
+ |
|
633 |
+ seu_wdoublets <- Seurat::NormalizeData( |
|
634 |
+ seu_wdoublets, |
|
635 |
+ normalization.method = orig.commands$NormalizeData.RNA@params$normalization.method, |
|
636 |
+ scale.factor = orig.commands$NormalizeData.RNA@params$scale.factor, |
|
637 |
+ margin = orig.commands$NormalizeData.RNA@params$margin |
|
638 |
+ ) |
|
639 |
+ |
|
640 |
+ seu_wdoublets <- Seurat::FindVariableFeatures( |
|
641 |
+ seu_wdoublets, |
|
642 |
+ selection.method = orig.commands$FindVariableFeatures.RNA$selection.method, |
|
643 |
+ loess.span = orig.commands$FindVariableFeatures.RNA$loess.span, |
|
644 |
+ clip.max = orig.commands$FindVariableFeatures.RNA$clip.max, |
|
645 |
+ mean.function = orig.commands$FindVariableFeatures.RNA$mean.function, |
|
646 |
+ dispersion.function = orig.commands$FindVariableFeatures.RNA$dispersion.function, |
|
647 |
+ num.bin = orig.commands$FindVariableFeatures.RNA$num.bin, |
|
648 |
+ binning.method = orig.commands$FindVariableFeatures.RNA$binning.method, |
|
649 |
+ nfeatures = orig.commands$FindVariableFeatures.RNA$nfeatures, |
|
650 |
+ mean.cutoff = orig.commands$FindVariableFeatures.RNA$mean.cutoff, |
|
651 |
+ dispersion.cutoff = orig.commands$FindVariableFeatures.RNA$dispersion.cutoff |
|
652 |
+ ) |
|
653 |
+ |
|
654 |
+ seu_wdoublets <- Seurat::ScaleData( |
|
655 |
+ seu_wdoublets, |
|
656 |
+ features = orig.commands$ScaleData.RNA$features, |
|
657 |
+ model.use = orig.commands$ScaleData.RNA$model.use, |
|
658 |
+ do.scale = orig.commands$ScaleData.RNA$do.scale, |
|
659 |
+ do.center = orig.commands$ScaleData.RNA$do.center, |
|
660 |
+ scale.max = orig.commands$ScaleData.RNA$scale.max, |
|
661 |
+ block.size = orig.commands$ScaleData.RNA$block.size, |
|
662 |
+ min.cells.to.block = orig.commands$ScaleData.RNA$min.cells.to.block |
|
663 |
+ ) |
|
664 |
+ |
|
665 |
+ seu_wdoublets <- Seurat::RunPCA( |
|
666 |
+ seu_wdoublets, |
|
667 |
+ features = orig.commands$ScaleData.RNA$features, |
|
668 |
+ npcs = length(PCs), |
|
669 |
+ rev.pca = orig.commands$RunPCA.RNA$rev.pca, |
|
670 |
+ weight.by.var = orig.commands$RunPCA.RNA$weight.by.var, |
|
671 |
+ verbose = FALSE |
|
672 |
+ ) |
|
673 |
+ pca.coord <- Seurat::Reductions(seu_wdoublets, |
|
674 |
+ "pca")@cell.embeddings[ , PCs] |
|
675 |
+ cell.names <- rownames(seu_wdoublets[[]]) |
|
636 | 676 |
nCells <- length(cell.names) |
637 | 677 |
rm(seu_wdoublets); gc() # Free up memory |
638 | 678 |
} |
639 |
- |
|
679 |
+ |
|
640 | 680 |
if (sct == TRUE) { |
641 | 681 |
#require(sctransform) |
642 | 682 |
message(date(), " ... Creating Seurat object") |
643 | 683 |
seu_wdoublets <- Seurat::CreateSeuratObject(counts = data_wdoublets) |
644 |
- |
|
684 |
+ |
|
645 | 685 |
message(date(), " ... Running SCTransform") |
646 | 686 |
seu_wdoublets <- Seurat::SCTransform(seu_wdoublets) |
647 |
- |
|
687 |
+ |
|
648 | 688 |
message(date(), " ... Running PCA") |
649 | 689 |
seu_wdoublets <- Seurat::RunPCA(seu_wdoublets, npcs = length(PCs)) |
650 |
- pca.coord <- seu_wdoublets@reductions$pca@cell.embeddings[ , PCs] |
|
651 |
- cell.names <- rownames(seu_wdoublets@meta.data) |
|
690 |
+ pca.coord <- Seurat::Reductions(seu_wdoublets, |
|
691 |
+ "pca")@cell.embeddings[ , PCs] |
|
692 |
+ cell.names <- rownames(seu_wdoublets[[]]) |
|
652 | 693 |
nCells <- length(cell.names) |
653 | 694 |
rm(seu_wdoublets); gc() |
654 | 695 |
} |
655 |
- |
|
696 |
+ |
|
656 | 697 |
## Compute PC distance matrix |
657 | 698 |
dist.mat <- fields::rdist(pca.coord) |
658 |
- |
|
699 |
+ |
|
659 | 700 |
## Compute pANN |
660 | 701 |
pANN <- as.data.frame(matrix(0L, nrow = n_real.cells, ncol = 1)) |
661 | 702 |
rownames(pANN) <- real.cells |
... | ... |
@@ -664,14 +705,17 @@ runDoubletFinder <- function(inSCE, |
664 | 705 |
for (i in seq(n_real.cells)) { |
665 | 706 |
neighbors <- order(dist.mat[, i]) |
666 | 707 |
neighbors <- neighbors[2:(k + 1)] |
667 |
- neighbor.names <- rownames(dist.mat)[neighbors] |
|
708 |
+ #neighbor.names <- rownames(dist.mat)[neighbors] |
|
668 | 709 |
pANN$pANN[i] <- length(which(neighbors > n_real.cells))/k |
669 | 710 |
} |
670 |
- |
|
711 |
+ |
|
671 | 712 |
classifications <- rep("Singlet",n_real.cells) |
672 |
- classifications[order(pANN$pANN[seq(n_real.cells)], decreasing=TRUE)[seq(nExp)]] <- "Doublet" |
|
673 |
- seu@meta.data[, paste("pANN",pN,pK,nExp,sep="_")] <- pANN[rownames(seu@meta.data), 1] |
|
674 |
- seu@meta.data[, paste("DF.classifications",pN,pK,nExp,sep="_")] <- classifications |
|
713 |
+ classifications[order(pANN$pANN[seq(n_real.cells)], |
|
714 |
+ decreasing = TRUE)[seq(nExp)]] <- "Doublet" |
|
715 |
+ seu[[paste("pANN", pN, pK, nExp, sep = "_")]] <- |
|
716 |
+ pANN[colnames(seu), 1] |
|
717 |
+ seu[[paste("DF.classifications", pN, pK, nExp, sep = "_")]] <- |
|
718 |
+ classifications |
|
675 | 719 |
return(seu) |
676 | 720 |
} |
677 | 721 |
} |
... | ... |
@@ -1,21 +1,22 @@ |
1 |
- |
|
2 |
-.runBarcodeRankDrops <- function(barcode.matrix, lower=lower, |
|
3 |
- fit.bounds=fit.bounds, |
|
4 |
- df=df) { |
|
5 |
- |
|
1 |
+.runBarcodeRankDrops <- function(barcode.matrix, lower = lower, |
|
2 |
+ fit.bounds = fit.bounds, |
|
3 |
+ df = df) { |
|
4 |
+ |
|
6 | 5 |
## Convert to sparse matrix if not already in that format |
7 | 6 |
barcode.matrix <- .convertToMatrix(barcode.matrix) |
8 |
- |
|
9 |
- output <- DropletUtils::barcodeRanks(m = barcode.matrix, lower=lower, |
|
10 |
- fit.bounds=fit.bounds, |
|
11 |
- df=df) |
|
12 |
- |
|
13 |
- knee.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$knee) |
|
14 |
- inflection.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$inflection) |
|
15 |
- rank.ix<- as.integer(output$rank) |
|
16 |
- total.ix<- as.integer(output$total) |
|
17 |
- fitted.ix<- as.integer(output$fitted) |
|
18 |
- |
|
7 |
+ |
|
8 |
+ output <- DropletUtils::barcodeRanks(m = barcode.matrix, lower = lower, |
|
9 |
+ fit.bounds = fit.bounds, |
|
10 |
+ df = df) |
|
11 |
+ |
|
12 |
+ knee.ix <- as.integer(output@listData$total >= |
|
13 |
+ S4Vectors::metadata(output)$knee) |
|
14 |
+ inflection.ix <- as.integer(output@listData$total >= |
|
15 |
+ S4Vectors::metadata(output)$inflection) |
|
16 |
+ rank.ix <- as.integer(output$rank) |
|
17 |
+ total.ix <- as.integer(output$total) |
|
18 |
+ fitted.ix <- as.integer(output$fitted) |
|
19 |
+ |
|
19 | 20 |
result <- cbind(knee.ix, inflection.ix, rank.ix, total.ix, fitted.ix) |
20 | 21 |
colnames(result) <- c("dropletUtils_barcodeRank_knee", |
21 | 22 |
"dropletUtils_barcodeRank_inflection", |
... | ... |