... | ... |
@@ -19,18 +19,28 @@ |
19 | 19 |
rowData = sce_rowdata, |
20 | 20 |
colData = sce_coldata) |
21 | 21 |
colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
22 |
+ |
|
23 |
+ multi_Assay <- reticulate::py_to_r(anndata$layers$as_dict()) |
|
24 |
+ for(assay_name in names(multi_Assay)){ |
|
25 |
+ tryCatch({ |
|
26 |
+ SummarizedExperiment::assay(sce, assay_name, withDimnames = FALSE) <- t(reticulate::py_to_r(multi_Assay[[assay_name]])) |
|
27 |
+ base::dimnames(SummarizedExperiment::assay(sce, assay_name)) <- base::dimnames(SummarizedExperiment::assay(sce, "counts")) |
|
28 |
+ }, error = function(x){ |
|
29 |
+ error_message <- paste0("Warning: unable to add '",assay_name,"' from .layers AnnData slot to SCE assay. Skipping. ") |
|
30 |
+ message(error_message) |
|
31 |
+ }) |
|
32 |
+ } |
|
22 | 33 |
|
23 | 34 |
multidim_observations <- reticulate::py_to_r(anndata$obsm_keys()) |
24 | 35 |
for(obsm_name in multidim_observations){ |
25 | 36 |
tryCatch({ |
26 |
- reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
|
37 |
+ SingleCellExperiment::reducedDims(sce)[[obsm_name]] <- reticulate::py_to_r(anndata$obsm[obsm_name]) |
|
27 | 38 |
}, error = function(x){ |
28 |
- error_message <- paste0("Warning: unable to add '",uns_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
39 |
+ error_message <- paste0("Warning: unable to add '",obsm_name,"' from .obsm AnnData slot to SCE metadata. Skipping. ") |
|
29 | 40 |
message(error_message) |
30 | 41 |
}) |
31 |
- |
|
32 | 42 |
} |
33 |
- |
|
43 |
+ |
|
34 | 44 |
unstructured_data <- reticulate::py_to_r(anndata$uns_keys()) |
35 | 45 |
for(uns_name in unstructured_data){ |
36 | 46 |
tryCatch({ |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
#' @return A list of \link[SingleCellExperiment]{SingleCellExperiment} object containing |
5 | 5 |
#' the droplet or cell data or both,depending on the dataType that users provided. |
6 | 6 |
#' @export |
7 |
-importMultipleSources <- function(allImportEntries) { |
|
7 |
+importMultipleSources <- function(allImportEntries, delayedArray = FALSE) { |
|
8 | 8 |
sceObjs <- list() |
9 | 9 |
for (entry in allImportEntries$samples) { |
10 | 10 |
if (entry$type == "cellRanger2") { |
... | ... |
@@ -12,12 +12,14 @@ importMultipleSources <- function(allImportEntries) { |
12 | 12 |
newSce <- importCellRangerV2Sample( |
13 | 13 |
dataDir = entry$params$dataDir, |
14 | 14 |
sampleName = entry$params$sampleName, |
15 |
+ delayedArray = delayedArray |
|
15 | 16 |
) |
16 | 17 |
} else { |
17 | 18 |
newSce <- importCellRangerV2( |
18 | 19 |
cellRangerDirs = entry$params$cellRangerDirs, |
19 | 20 |
sampleDirs = entry$params$sampleDirs, |
20 | 21 |
sampleNames = entry$params$sampleNames, |
22 |
+ delayedArray = delayedArray |
|
21 | 23 |
) |
22 | 24 |
} |
23 | 25 |
|
... | ... |
@@ -26,42 +28,50 @@ importMultipleSources <- function(allImportEntries) { |
26 | 28 |
newSce <- importCellRangerV3Sample( |
27 | 29 |
dataDir = entry$params$dataDir, |
28 | 30 |
sampleName = entry$params$sampleName, |
31 |
+ delayedArray = delayedArray |
|
29 | 32 |
) |
30 | 33 |
} else { |
31 | 34 |
newSce <- importCellRangerV3( |
32 | 35 |
cellRangerDirs = entry$params$cellRangerDirs, |
33 | 36 |
sampleDirs = entry$params$sampleDirs, |
34 | 37 |
sampleNames = entry$params$sampleNames, |
38 |
+ delayedArray = delayedArray |
|
35 | 39 |
) |
36 | 40 |
} |
37 | 41 |
} else if (entry$type == "starSolo") { |
38 | 42 |
newSce <- importSTARsolo( |
39 | 43 |
STARsoloDirs = entry$params$STARsoloDirs, |
40 |
- samples = entry$params$amples |
|
44 |
+ samples = entry$params$amples, |
|
45 |
+ delayedArray = delayedArray |
|
41 | 46 |
) |
42 | 47 |
} else if (entry$type == "busTools") { |
43 | 48 |
newSce <- importBUStools( |
44 | 49 |
BUStoolsDirs = entry$params$BUStoolsDirs, |
45 | 50 |
samples = entry$params$samples, |
51 |
+ delayedArray = delayedArray |
|
46 | 52 |
) |
47 | 53 |
} else if (entry$type == "seqc") { |
48 | 54 |
newSce <- importSEQC( |
49 | 55 |
seqcDirs = entry$params$seqcDirs, |
50 | 56 |
samples = entry$params$samples, |
51 | 57 |
prefix = entry$params$prefix, |
58 |
+ delayedArray = delayedArray |
|
52 | 59 |
) |
53 | 60 |
} else if (entry$type == "optimus") { |
54 | 61 |
newSce <- importOptimus( |
55 | 62 |
OptimusDirs = entry$params$OptimusDirs, |
56 |
- samples = entry$params$samples |
|
63 |
+ samples = entry$params$samples, |
|
64 |
+ delayedArray = delayedArray |
|
57 | 65 |
) |
58 | 66 |
} else if (entry$type == "files") { |
59 | 67 |
newSce <- importFromFiles(assayFile = entry$params$assayFile, |
60 | 68 |
annotFile = entry$params$annotFile, |
61 | 69 |
featureFile = entry$params$featureFile, |
62 |
- assayName = entry$params$assayName) |
|
70 |
+ assayName = entry$params$assayName, |
|
71 |
+ delayedArray = delayedArray) |
|
63 | 72 |
} else if (entry$type == "example") { |
64 |
- newSce <- importExampleData(dataset = entry$params$dataset) |
|
73 |
+ newSce <- importExampleData(dataset = entry$params$dataset, |
|
74 |
+ delayedArray = delayedArray) |
|
65 | 75 |
} else if (entry$type == "rds") { |
66 | 76 |
importedrds <- readRDS(entry$params$rdsFile) |
67 | 77 |
if (base::inherits(importedrds, "SummarizedExperiment")) { |
... | ... |
@@ -71,13 +81,24 @@ importMultipleSources <- function(allImportEntries) { |
71 | 81 |
} else { |
72 | 82 |
stop("The '.rds' file should contain a 'SingleCellExperiment' or 'Seurat' object.") |
73 | 83 |
} |
84 |
+ |
|
85 |
+ for(assay in SummarizedExperiment::assayNames(newSce)) { |
|
86 |
+ if(!base::inherits(SummarizedExperiment::assay(sce, assay), "dgCMatrix") && !isTRUE(delayedArray)) { |
|
87 |
+ SummarizedExperiment::assay(sce, assay) <- .convertToMatrix(SummarizedExperiment::assay(sce, assay)) |
|
88 |
+ } |
|
89 |
+ |
|
90 |
+ if(!base::inherits(SummarizedExperiment::assay(sce, assay), "DelayedArray") && isTRUE(delayedArray)) { |
|
91 |
+ SummarizedExperiment::assay(sce, assay) <- DelayedArray::DelayedArray(SummarizedExperiment::assay(sce, assay)) |
|
92 |
+ } |
|
93 |
+ } |
|
94 |
+ |
|
74 | 95 |
} |
75 | 96 |
sceObjs = c(sceObjs, list(newSce)) |
76 | 97 |
} |
77 | 98 |
|
78 | 99 |
return(combineSCE(sceList = sceObjs, |
79 |
- by.r = NULL, |
|
80 |
- by.c = Reduce(intersect, lapply(sceObjs, function(x) { colnames(colData(x))})), |
|
100 |
+ by.r = Reduce(base::intersect, lapply(sceObjs, function(x) { colnames(rowData(x))})), |
|
101 |
+ by.c = Reduce(base::intersect, lapply(sceObjs, function(x) { colnames(colData(x))})), |
|
81 | 102 |
combined = TRUE) |
82 | 103 |
) |
83 | 104 |
} |
... | ... |
@@ -43,12 +43,12 @@ |
43 | 43 |
} |
44 | 44 |
} |
45 | 45 |
|
46 |
- # Furthermore, the other assays will for now also be saved to .obsm |
|
46 |
+ # Furthermore, the other assays will for now also be saved to .layers |
|
47 | 47 |
allAssayNames <- SummarizedExperiment::assayNames(SCE) |
48 | 48 |
for (i in 1:length(allAssayNames)) { |
49 | 49 |
oneName <- allAssayNames[i] |
50 | 50 |
if (!oneName == useAssay) { |
51 |
- AnnData$obsm$'__setitem__'(oneName, t(SummarizedExperiment::assay(SCE, oneName))) |
|
51 |
+ AnnData$layers$'__setitem__'(oneName, t(SummarizedExperiment::assay(SCE, oneName))) |
|
52 | 52 |
} |
53 | 53 |
} |
54 | 54 |
return(AnnData) |
... | ... |
@@ -97,6 +97,10 @@ The optional arguments are as follows. Their usage depend on type of data and us |
97 | 97 |
-n, --numCores. Number of cores used to run the pipeline. By default is 1. Parallel computing is enabled if -n is greater than 1. <br> <br> |
98 | 98 |
|
99 | 99 |
-T, --parallelType. Type of parallel computing used for parallel computing. Parallel computing used in this pipeline depends on "BiocParallel" package. Default is 'MulticoreParam'. It can be 'MulticoreParam' or 'SnowParam'. This argument will be evaluated only when numCores > 1. <br> <br> |
100 |
+ |
|
101 |
+-i, --studyDesign. The txt file containing the desrciption of the study design. Default is NULL. This would be shown at the begining the html report of cell and droplet QC. <br> <br> |
|
102 |
+ |
|
103 |
+-L, --subTitle. The subtitle used in the cell and droplet QC HTML report. Default is None. The subtitle can contain information of the sample, like sample name, etc. If -S is set as TRUE, the length of subsitle should be the same as the length of samples. If -S is set as FALSE, the length of subtitle should be one or NULL. <br> <br> |
|
100 | 104 |
</p> |
101 | 105 |
</details> |
102 | 106 |
|
... | ... |
@@ -327,18 +331,43 @@ The Singulatiry image can easily be built using Docker Hub as a source: |
327 | 331 |
singularity pull docker://campbio/sctk_qc:1.7.5 |
328 | 332 |
``` |
329 | 333 |
|
330 |
-The usage of singleCellTK Singularity image is very similar to that of Docker. In Singularity 3.0+, the mount volume is [automatically overlaid](https://singularity.lbl.gov/docs-mount). However, you can use argument --bind/-B to specify your own mount volume. The example is shown as below: |
|
334 |
+The usage of singleCellTK Singularity image is very similar to that of Docker. In Singularity 3.0+, the mount volume is [automatically overlaid](https://singularity.lbl.gov/docs-mount). |
|
335 |
+ |
|
336 |
+It's recommended to re-set the home directory when you run singularity. Singularity will mount \$HOME path on your machine by default, which might contain your personal R/Python library folder. If we don't re-set the home to mount, singularity will try to use R/Python libraries which are not built within the singularity image and cause some conflicts. You can point to some "sanitized home", which is different from \$HOME path on your machine, using argument [-H/--home](https://singularity.lbl.gov/faq#solution-1-specify-the-home-to-mount). Besides, you can use argument --bind/-B to specify your own mount volume, which is the path that contains the dataset and will be used to store the output of QC pipeline. The example is shown as below: |
|
331 | 337 |
|
332 | 338 |
``` |
333 |
-singularity run sctk_qc_1.7.5.sif \ |
|
334 |
--b ./cellranger \ |
|
339 |
+singularity run --home=/PathToSanitizedHome \ |
|
340 |
+--bind /PathToData:/data sctk_qc_1.7.5.sif \ |
|
335 | 341 |
-P CellRangerV3 \ |
336 |
--s pbmc_100x100 \ |
|
337 |
--o ./result/tenx_v3_pbmc \ |
|
338 |
--g ./mitochondrial_human_symbol.gmt |
|
342 |
+-s gencodev34_pbmc_1k_v3 \ |
|
343 |
+-b /data/gencodev34_pbmc_1k_v3 |
|
344 |
+-o /data/result/gencodev34_pbmc_1k_v3 \ |
|
345 |
+-S TRUE \ |
|
346 |
+-F R,Python,FlatFile,HTAN \ |
|
347 |
+-n 15 \ |
|
348 |
+-T MulticoreParam |
|
339 | 349 |
``` |
340 | 350 |
|
341 |
-The code above assumed that the dataset is in your current directory, which is automatically mounted by Singularity. If you run Singularity image on BU SCC,it's recommended to re-set the home directory to mount. Otherwise, the container will load libraries in the SCC shared libraries, which might cause some conflicts. You can point to some "sanitized home" using argument [-H/--home](https://singularity.lbl.gov/faq#solution-1-specify-the-home-to-mount). Also, you might want to specify cpu architecture when run the docker on BU SCC using #$ -l cpu_arch=broadwell|haswell|skylake|cascadelake. Because the python packages are compiled by SIMD instructions that are only available on these two cpu architectures. |
|
351 |
+Also, you might want to specify cpu architecture when run the Singularity image on BU SCC using '#$ -l cpu_arch=broadwell|haswell|skylake|cascadelake' command. Because the python packages are compiled by SIMD instructions that are only compatible on these cpu architectures. If you are runnning singularity on other cluster, please contact IT helps about how to specify cpu architecture when you run the singularity image. One of the example is shown below: |
|
352 |
+ |
|
353 |
+``` |
|
354 |
+#!/bin/bash |
|
355 |
+#$ -cwd |
|
356 |
+#$ -j y |
|
357 |
+#$ -P camplab |
|
358 |
+#$ -pe omp 16 |
|
359 |
+#$ -l cpu_arch=broadwell|haswell|skylake|cascadelake |
|
360 |
+singularity run --home=/PathToSanitizedHome \ |
|
361 |
+--bind /PathToData:/data sctk_qc_1.7.5.sif \ |
|
362 |
+-P CellRangerV3 \ |
|
363 |
+-s gencodev34_pbmc_1k_v3 \ |
|
364 |
+-b /data/gencodev34_pbmc_1k_v3 |
|
365 |
+-o /data/result/gencodev34_pbmc_1k_v3 \ |
|
366 |
+-S TRUE \ |
|
367 |
+-F R,Python,FlatFile,HTAN \ |
|
368 |
+-n 15 \ |
|
369 |
+-T MulticoreParam |
|
370 |
+``` |
|
342 | 371 |
|
343 | 372 |
## Documentation of tools that are currently available within the pipeline: |
344 | 373 |
#### Empty droplet detection: |
... | ... |
@@ -4,7 +4,7 @@ |
4 | 4 |
\alias{importMultipleSources} |
5 | 5 |
\title{Imports samples from different sources and compiles them into a list of SCE objects} |
6 | 6 |
\usage{ |
7 |
-importMultipleSources(allImportEntries) |
|
7 |
+importMultipleSources(allImportEntries, delayedArray = FALSE) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{allImportEntries}{object containing the sources and parameters of all the samples being imported (from the UI)} |