Browse code

Fix combineSCE related SCTK_QC pipeline

Yichen Wang authored on 17/10/2022 07:25:02
Showing 4 changed files

... ...
@@ -1,10 +1,10 @@
1 1
 .constructSCE <- function(
2
-  matrices,
3
-  features,
4
-  barcodes,
5
-  metadata,
6
-  reducedDims) {
7
-
2
+    matrices,
3
+    features,
4
+    barcodes,
5
+    metadata,
6
+    reducedDims) {
7
+  
8 8
   sce <- SingleCellExperiment::SingleCellExperiment(assays = matrices)
9 9
   SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(features)
10 10
   SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(barcodes)
... ...
@@ -34,7 +34,7 @@
34 34
   } else {
35 35
     fill <- NA
36 36
   }
37
-
37
+  
38 38
   ### combine row
39 39
   if (isTRUE(combineRow) & (!is.null(row))) {
40 40
     missRow <- row[!row %in% rownames(x)]
... ...
@@ -43,14 +43,14 @@
43 43
     if (!isTRUE(sparse)) {
44 44
       missMat <- as.matrix(missMat)
45 45
     }
46
-
46
+    
47 47
     mat <- rbind(matOrigin, missMat)
48 48
     if (anyDuplicated(rownames(mat))) {
49 49
       mat <- mat[!duplicated(rownames(mat)), ]
50 50
     }
51 51
     matOrigin <- mat[row, ]
52 52
   }
53
-
53
+  
54 54
   ### combine cols
55 55
   if (isTRUE(combineCol) & (!is.null(col))) {
56 56
     missCol <- col[!col %in% colnames(x)]
... ...
@@ -59,7 +59,7 @@
59 59
     if (!isTRUE(sparse)) {
60 60
       missMat <- as.matrix(missMat)
61 61
     }
62
-
62
+    
63 63
     mat <- cbind(matOrigin, missMat)
64 64
     if (anyDuplicated(colnames(mat))) {
65 65
       mat <- mat[, !duplicated(colnames(mat))]
... ...
@@ -76,12 +76,12 @@
76 76
     rw[['rownames']] <- rownames(rw)
77 77
     return(rw)
78 78
   })
79
-
79
+  
80 80
   ## Get merged rowData
81 81
   by.r <- unique(c('rownames', by.r))
82 82
   unionFe <- Reduce(function(r1, r2) merge(r1, r2, by=by.r, all=TRUE), feList)
83 83
   allGenes <- unique(unlist(lapply(feList, rownames)))
84
-
84
+  
85 85
   ## rowData
86 86
   newFe <- unionFe
87 87
   if (nrow(newFe) != length(allGenes)) {
... ...
@@ -102,7 +102,7 @@
102 102
     cD[['rownames']] <- rownames(cD)
103 103
     return(cD)
104 104
   })
105
-
105
+  
106 106
   by.c <- unique(c("rownames", by.c))
107 107
   unionCb <- Reduce(function(c1, c2) merge(c1, c2, by=by.c, all=TRUE), cbList)
108 108
   rownames(unionCb) <- unionCb[['rownames']]
... ...
@@ -118,19 +118,19 @@
118 118
   reduceList <- lapply(sceList, SingleCellExperiment::reducedDims)
119 119
   ## get every reducedDim exists in at least one SCEs
120 120
   UnionReducedDims <- unique(unlist(lapply(sceList, SingleCellExperiment::reducedDimNames)))
121
-
121
+  
122 122
   ## for each reducedDim, get union row/cols
123 123
   reducedDims <- list()
124 124
   for (reduceDim in UnionReducedDims) {
125 125
     x <- lapply(sceList, function(x) {if (reduceDim %in% SingleCellExperiment::reducedDimNames(x)) {SingleCellExperiment::reducedDim(x, reduceDim)}})
126 126
     reducedDims[[reduceDim]] <- .getDimUnion(x)
127 127
   }
128
-
128
+  
129 129
   ## Merge reducedDim for each SCE
130 130
   redList <- list()
131 131
   for (idx in seq_along(sceList)){
132 132
     redMat <- reduceList[[idx]]
133
-
133
+    
134 134
     for (DimName in UnionReducedDims) {
135 135
       if (DimName %in% names(redMat)) {
136 136
         redMat[[DimName]] <- .getMatUnion(reducedDims[[DimName]], redMat[[DimName]],
... ...
@@ -142,10 +142,10 @@
142 142
                                           dimnames = list(colnames(sceList[[idx]]), reducedDims[[DimName]][[2]]))
143 143
       }
144 144
     }
145
-
145
+    
146 146
     redList[[idx]] <- redMat
147 147
   }
148
-
148
+  
149 149
   return(redList)
150 150
 }
151 151
 
... ...
@@ -157,7 +157,7 @@
157 157
     unique(unlist(lapply(sceList, rownames))),
158 158
     unique(unlist(lapply(sceList, colnames)))
159 159
   )
160
-
160
+  
161 161
   asList <- list()
162 162
   for (idx in seq_along(assayList)){
163 163
     assay <- assayList[[idx]]
... ...
@@ -174,7 +174,7 @@
174 174
     }
175 175
     asList[[idx]] <- assay
176 176
   }
177
-
177
+  
178 178
   return(asList)
179 179
 }
180 180
 
... ...
@@ -197,27 +197,48 @@
197 197
 # }
198 198
 
199 199
 .mergeMetaSCE <- function(SCE_list) {
200
+  # Merge highest level metadata entries (except "sctk") by sample names in
201
+  # SCE_list. For analysis results in "sctk", merge by SCE_list sample name if 
202
+  # given "all_cells" in one object, else, use existing sample names. 
203
+  samples <- names(SCE_list)
200 204
   sampleMeta <- lapply(SCE_list, S4Vectors::metadata)
201 205
   metaNames <- unique(unlist(lapply(sampleMeta, names)))
206
+  sampleSctkMeta <- lapply(SCE_list, function(x) {S4Vectors::metadata(x)$sctk})
207
+  sctkMetaNames <- unique(unlist(lapply(sampleMeta, 
208
+                                        function(x) names(x$sctk))))
209
+  sampleMeta$sctk <- NULL
210
+  metaNames <- metaNames[!metaNames %in% c("sctk")]
202 211
   NewMeta <- list()
203
-
212
+  for (meta in sctkMetaNames) {
213
+    for (i in seq_along(sampleSctkMeta)) {
214
+      # `i` is for identifying each SCE object, usually matching a sample in 
215
+      # the pipeline. `sampleAvail` for samples stored in metadata entry, in
216
+      # case users are merging merged objects. 
217
+      sampleAvail <- names(sampleSctkMeta[[i]][[meta]])
218
+      if (length(sampleAvail) == 1) {
219
+        NewMeta$sctk[[meta]][[samples[i]]] <- sampleSctkMeta[[i]][[meta]][[1]]
220
+      } else if (length(sampleAvail) > 1) {
221
+        names(sampleSctkMeta[[i]][[meta]]) <- 
222
+          paste(names(sampleSctkMeta)[i], 
223
+                names(sampleSctkMeta[[i]][[meta]]), sep = "_")
224
+        NewMeta$sctk[[meta]] <- c(NewMeta$sctk[[meta]], 
225
+                                  sampleSctkMeta[[i]][[meta]])
226
+      }
227
+    }
228
+  }
204 229
   for (meta in metaNames) {
205 230
     for (i in seq_along(sampleMeta)) {
206
-      NewMeta[[meta]][[i]] <- sampleMeta[[i]][[meta]]
231
+      NewMeta[[meta]][[samples[i]]] <- sampleMeta[[i]][[meta]]
207 232
     }
208 233
   }
209
-
210
-  if ("runBarcodeRanksMetaOutput" %in% metaNames) {
211
-    NewMeta[["runBarcodeRanksMetaOutput"]] <- unlist(NewMeta[["runBarcodeRanksMetaOutput"]])
212
-  }
213
-
234
+  
214 235
   if ("assayType" %in% metaNames) {
215 236
     assayType <- lapply(SCE_list, function(x){S4Vectors::metadata(x)$assayType})
216 237
     assayType <- BiocGenerics::Reduce(dplyr::union, assayType)
217 238
     
218 239
     NewMeta[["assayType"]] <- assayType
219 240
   }
220
-
241
+  
221 242
   return(NewMeta)
222 243
 }
223 244
 
... ...
@@ -241,7 +262,7 @@
241 262
 #' @export
242 263
 
243 264
 combineSCE <- function(sceList, by.r = NULL, by.c = NULL, combined = TRUE){
244
-  if(length(sceList) == 1){
265
+  if (length(sceList) == 1) {
245 266
     return(sceList[[1]])
246 267
   }
247 268
   ##  rowData
... ...
@@ -252,16 +273,21 @@ combineSCE <- function(sceList, by.r = NULL, by.c = NULL, combined = TRUE){
252 273
   redMatList <- .mergeRedimSCE(sceList)
253 274
   ## assay
254 275
   assayList <- .mergeAssaySCE(sceList)
255
-
276
+  samples <- names(sceList)
277
+  if (is.null(samples)) {
278
+    samples <- paste0("sample", seq_along(sceList))
279
+  }
256 280
   New_SCE <- list()
257 281
   for (i in seq(length(sceList))) {
258 282
     ## create new sce
259
-    New_SCE[[i]] <- .constructSCE(matrices = assayList[[i]], features = newFeList,
260
-                                  barcodes = newCbList[[i]],
261
-                                  metadata = S4Vectors::metadata(sceList[[i]]),
262
-                                  reducedDims = redMatList[[i]])
283
+    sampleName <- samples[i]
284
+    New_SCE[[sampleName]] <- .constructSCE(matrices = assayList[[i]], 
285
+                                           features = newFeList,
286
+                                           barcodes = newCbList[[i]],
287
+                                           metadata = S4Vectors::metadata(sceList[[i]]),
288
+                                           reducedDims = redMatList[[i]])
263 289
   }
264
-
290
+  
265 291
   if (isTRUE(combined)) {
266 292
     sce <- do.call(SingleCellExperiment::cbind, New_SCE)
267 293
     meta <- .mergeMetaSCE(New_SCE)
... ...
@@ -110,7 +110,7 @@ runSoupX <- function(inSCE,
110 110
                      pCut = 0.01) {
111 111
     SingleBGBatchForAllBatch <- FALSE
112 112
     adjustMethod <- match.arg(adjustMethod)
113
-    if(!is.null(sample)) {
113
+    if (!is.null(sample)) {
114 114
         sample <- .manageCellVar(inSCE, var = sample)
115 115
         if (is.factor(sample)) {
116 116
             sample <- as.character(sample)
... ...
@@ -129,7 +129,7 @@ runSoupX <- function(inSCE,
129 129
                              "in background colData")
130 130
                     }
131 131
                     bgBatch <- SummarizedExperiment::colData(background)[[bgBatch]]
132
-                } else if(length(bgBatch) != ncol(background)) {
132
+                } else if (length(bgBatch) != ncol(background)) {
133 133
                     stop("'bgBatch' must be the same length as ",
134 134
                          "the number of columns in 'background'")
135 135
                 }
... ...
@@ -429,11 +429,11 @@ setMethod("getSoupX",
429 429
                    sampleID = NULL,
430 430
                    background = FALSE){
431 431
     if (isTRUE(background)) {
432
-        all.results <- S4Vectors::metadata(inSCE)$sctk$SoupX_bg
432
+        all.results <- S4Vectors::metadata(inSCE)$sctk$runSoupX_bg
433 433
     } else {
434
-        all.results <- S4Vectors::metadata(inSCE)$sctk$SoupX
434
+        all.results <- S4Vectors::metadata(inSCE)$sctk$runSoupX
435 435
     }
436
-    if(is.null(all.results)) {
436
+    if (is.null(all.results)) {
437 437
         stop("No result from 'SoupX' is found. Please run `runSoupX()` first, ",
438 438
              "or check the setting of `background`.")
439 439
     }
... ...
@@ -441,7 +441,8 @@ setMethod("getSoupX",
441 441
     if (!is.null(sampleID)) {
442 442
         if (!all(sampleID  %in% names(all.results))) {
443 443
             stop("Sample(s) not found in the results for tool 'SoupX': ",
444
-                 paste(sampleID[!sampleID %in% names(all.results)], collapse = ", "))
444
+                 paste(sampleID[!sampleID %in% names(all.results)], 
445
+                       collapse = ", "))
445 446
         }
446 447
         results <- all.results[sampleID]
447 448
     }
... ...
@@ -458,9 +459,9 @@ setReplaceMethod("getSoupX",
458 459
                           background = FALSE,
459 460
                           value) {
460 461
                      if (isTRUE(background)) {
461
-                         inSCE@metadata$sctk$SoupX_bg[[sampleID]] <- value
462
+                         inSCE@metadata$sctk$runSoupX_bg[[sampleID]] <- value
462 463
                      } else {
463
-                         inSCE@metadata$sctk$SoupX[[sampleID]] <- value
464
+                         inSCE@metadata$sctk$runSoupX[[sampleID]] <- value
464 465
                      }
465 466
                      return(inSCE)
466 467
                  })
... ...
@@ -549,17 +550,16 @@ plotSoupXResults <- function(inSCE,
549 550
                             )
550 551
 {
551 552
     combinePlot <- match.arg(combinePlot)
552
-    sampleID <- unique(sample)
553
-    results <- getSoupX(inSCE, sampleID = sampleID, background = background)
553
+    sample <- .manageCellVar(inSCE, var = sample)
554
+    samples <- unique(sample)
555
+    results <- getSoupX(inSCE, sampleID = samples, background = background)
554 556
     # Doing this redundancy-like step because: If sample given NULL when running
555 557
     # runSoupX(), the actual sample label saved will be "all_cells", which users
556 558
     # won't know.
557
-    samples <- names(results)
558 559
     samplePlots <- list()
559 560
     for (s in samples) {
560
-        sampleRes <- results[[s]]
561
-        param <- sampleRes$param
562
-        sampleIdx <- param$sample == s
561
+        param <- results[[s]]$param
562
+        sampleIdx <- sample == s
563 563
         tmpSCE <- inSCE[,sampleIdx]
564 564
         markerTable <- results[[s]]$markersUsed
565 565
         markerTable <- markerTable[order(markerTable$qval),]
... ...
@@ -574,7 +574,7 @@ plotSoupXResults <- function(inSCE,
574 574
         while (length(markerToUse) < nMarkerToUse) {
575 575
             nIter <- nIter + 1
576 576
             # c is the cluster to look at in this iteration
577
-            c <- uniqCluster[(nIter-1)%%length(uniqCluster) + 1]
577
+            c <- uniqCluster[(nIter - 1) %% length(uniqCluster) + 1]
578 578
             markerPerCluster <- markerTable[markerTable$cluster == c,]
579 579
             topMarker <- markerPerCluster[1, "gene"]
580 580
             markerToUse <- c(markerToUse, topMarker)
... ...
@@ -595,19 +595,19 @@ plotSoupXResults <- function(inSCE,
595 595
                                         title = "Cluster",
596 596
                                         labelClusters = labelClusters,
597 597
                                         clusterLabelSize = clusterLabelSize,
598
-                                        xlab=xlab,
599
-                                        ylab=ylab,
600
-                                        dim1=dim1,
601
-                                        dim2=dim2,
602
-                                        defaultTheme=defaultTheme,
603
-                                        dotSize=dotSize,
604
-                                        transparency=transparency,
605
-                                        baseSize=baseSize,
606
-                                        titleSize=titleSize,
607
-                                        axisLabelSize=axisLabelSize,
608
-                                        axisSize=axisSize,
609
-                                        legendSize=legendSize,
610
-                                        legendTitleSize=legendTitleSize)
598
+                                        xlab = xlab,
599
+                                        ylab = ylab,
600
+                                        dim1 = dim1,
601
+                                        dim2 = dim2,
602
+                                        defaultTheme = defaultTheme,
603
+                                        dotSize = dotSize,
604
+                                        transparency = transparency,
605
+                                        baseSize = baseSize,
606
+                                        titleSize = titleSize,
607
+                                        axisLabelSize = axisLabelSize,
608
+                                        axisSize = axisSize,
609
+                                        legendSize = legendSize,
610
+                                        legendTitleSize = legendTitleSize)
611 611
             )
612 612
         for (g in markerToUse) {
613 613
             # Soup fraction was calculated basing on SoupX's original method.
... ...
@@ -207,7 +207,7 @@ MitoImport <- opt[["detectMitoLevel"]]
207 207
 MitoType <- opt[["mitoType"]]
208 208
 QCReport <- opt[["QCReport"]]
209 209
 
210
-print("The output directory is")
210
+message("The output directory is")
211 211
 print(directory)
212 212
 
213 213
 if (!is.null(basepath)) { basepath <- unlist(strsplit(opt[["basePath"]], ",")) }
... ...
@@ -1146,7 +1146,7 @@ for (sample in samples){
1146 1146
 
1147 1147
 
1148 1148
 ```{r, "SoupX-description", include=FALSE, warning=FALSE, message=FALSE}
1149
-description_SoupX<- singleCellTK:::descriptionSoupX()
1149
+description_SoupX <- singleCellTK:::descriptionSoupX()
1150 1150
 ```
1151 1151
 
1152 1152
 ```{r, "SoupX", results="asis", fig.align="center", warning=FALSE, message=FALSE, echo=FALSE}
... ...
@@ -1157,21 +1157,15 @@ soupXData <- c("soupX_nUMIs", "soupX_contamination")
1157 1157
 # soupX cluster label might be supplied by users, thus not checking it.
1158 1158
 skipSoupX <- any(!soupXData %in% names(colData(sce.qc)))
1159 1159
 
1160
-if (length(samples) == 1) {
1161
-  soupXSample <- "all_cells"
1162
-} else {
1163
-  soupXSample <- samples
1164
-}
1165
-
1166 1160
 if (skipSoupX) {
1167 1161
   #cat("runDecontX did not complete successfully. The section of DecontX is skipped")
1168 1162
   plotSoupX <- NULL
1169 1163
 } else {
1170 1164
   cat(paste0('## ', i, ' \n'))
1171
-  plotSoupX<- tryCatch(
1165
+  plotSoupX <- tryCatch(
1172 1166
     {plotSoupXResults(inSCE = sce.qc, 
1173
-                     soupXSample, 
1174
-                     combinePlot = "none")},
1167
+                      sceSample, 
1168
+                      combinePlot = "none")},
1175 1169
     error = function(e) {
1176 1170
       cat("runSoupX did not complete successfully. The section of SoupX is skipped\n\n")
1177 1171
       skipSoupX <<- TRUE