... | ... |
@@ -7,13 +7,24 @@ sudo: false |
7 | 7 |
warnings_are_errors: false |
8 | 8 |
bioc_required: true |
9 | 9 |
|
10 |
+# Current version of xgboost fails install |
|
11 |
+# Need to force install older version using 'devtools::install_version' |
|
12 |
+# which is done in 'before_install' (see below) |
|
13 |
+# However 'remotes::dev_package_deps' will try to auto upgrade packages |
|
14 |
+# Therefore, a new install command is used with 'upgrade="never"' |
|
15 |
+# These parts can be deleted once the xgboost error is fixed |
|
16 |
+ |
|
10 | 17 |
before_install: |
11 | 18 |
- pyenv install 3.6.1 |
12 | 19 |
- pyenv global 3.6.1 |
13 |
- - Rscript -e 'update.packages(ask = FALSE)' |
|
20 |
+ - Rscript -e 'install.packages("devtools")' |
|
21 |
+ - Rscript -e 'devtools::install_version("xgboost", version = "0.90.0.2", repos = "http://cran.us.r-project.org")' |
|
14 | 22 |
- pip install --upgrade pip |
15 | 23 |
- pip install scipy scrublet scanpy scanorama scgen bbknn |
16 | 24 |
|
25 |
+install: |
|
26 |
+- R -e 'devtools::install_deps(dep = TRUE, upgrade="never")' |
|
27 |
+ |
|
17 | 28 |
addons: |
18 | 29 |
apt: |
19 | 30 |
packages: |
... | ... |
@@ -23,6 +34,7 @@ r_build_args: "--no-build-vignettes" |
23 | 34 |
r_check_args: "--no-vignettes --no-build-vignettes --as-cran" |
24 | 35 |
r_github_packages: |
25 | 36 |
- jimhester/lintr |
37 |
+ |
|
26 | 38 |
notifications: |
27 | 39 |
slack: |
28 | 40 |
secure: sw/He3WX7Fs61DTufgG9hx7fWDhwx57zv+dYB4sr30LgKyIJNbZYSU76ZN4oPwHFGAJ0eyk/22XCuKF8/seSB2ArQuc4kFlsaeWWg1CxDqW9iR0zs91MJL3lJoUOSayy5WcWbsUxD+x8GcB37FCgoTXcveN956bfgrDb2nGrFY6QMSVsnkWi/kiMdzhR4VPzOfmAUZUcr1aDU3WN4ROwbDg89zCfu9HbVKZwyQdYZk7L2Pgg8xGIcGZkaMN1MMD/r0Wc2aVYqCOcn5qO/9CaspEBf7cd6ejVzgAl/LM4dTIPlx9E6ekQGwJXpt1Qxyc0sqVSQWFZvFkWyV3wkqX8pfSXV6ojlP2cGJt+PSQXwLHj0TKWYx7yHbw+55da/b//SVLZVHl+P6VXxQBe2hmFD6sCk3V3uq2CkxzI52bK4Wv4RRncVKIZEpFZOc9P2ik5mf6pICmLHwttlbHeDjJbQ+raYftF704xb1sOxapLwuy+c2SWKPeqOiimfZNgOK+J7JbvQvpnaiHN+3CA3wQXs1auKBwI+swvsZPAzeKssnLv/0AQmX8ckVzvXJzUvl1J73c9V2zG2F44xmIsXwzruEp6leSbeXcLifrfbfQdmaHzq4M/Z83o/fwGRzdWfG7YXDyPIabik7DOMrSUvF+wLGVTjOart8HrA4eVEkKFnRE= |
... | ... |
@@ -15,6 +15,7 @@ export(convertSeuratToSCE) |
15 | 15 |
export(createSCE) |
16 | 16 |
export(distinctColors) |
17 | 17 |
export(enrichRSCE) |
18 |
+export(exportSCEtoAnnData) |
|
18 | 19 |
export(exportSCEtoFlatFile) |
19 | 20 |
export(filterSCData) |
20 | 21 |
export(generateSimulatedData) |
... | ... |
@@ -31,6 +32,7 @@ export(importCellRangerV2) |
31 | 32 |
export(importCellRangerV2Sample) |
32 | 33 |
export(importCellRangerV3) |
33 | 34 |
export(importCellRangerV3Sample) |
35 |
+export(importDropEst) |
|
34 | 36 |
export(importOptimus) |
35 | 37 |
export(importSEQC) |
36 | 38 |
export(importSTARsolo) |
... | ... |
@@ -37,9 +37,9 @@ |
37 | 37 |
#' Two batches of pancreas scRNAseq dataset are combined with their original |
38 | 38 |
#' counts. Cell types and batches are annotated in `colData(sceBatches)`. |
39 | 39 |
#' Two batches came from Wang, et al., 2016, annotated as `'w'`; and Xin, et |
40 |
-#' al., 2016, annotated as `'x'`. Four common cell types, `'alpha'`, `'beta'`, |
|
41 |
-#' `'gamma'`, and `'delta'` that could be found in both original study were kept |
|
42 |
-#' for cleaner demonstration. |
|
40 |
+#' al., 2016, annotated as `'x'`. Two common cell types, `'alpha'` and |
|
41 |
+#' `'beta'`, that could be found in both original studies with relatively |
|
42 |
+#' large population were kept for cleaner demonstration. |
|
43 | 43 |
#' |
44 | 44 |
#' @name sceBatches |
45 | 45 |
#' @docType data |
... | ... |
@@ -49,3 +49,17 @@ |
49 | 49 |
#' @examples |
50 | 50 |
#' data('sceBatches') |
51 | 51 |
"sceBatches" |
52 |
+ |
|
53 |
+#' Stably Expressed Gene (SEG) list obect, with SEG sets for human and mouse. |
|
54 |
+#' |
|
55 |
+#' The two gene sets came from dataset called `segList` of package `scMerge`. |
|
56 |
+#' @name SEG |
|
57 |
+#' @docType data |
|
58 |
+#' @format list, with two entries `"human"` and `"mouse"`, each is a charactor |
|
59 |
+#' array. |
|
60 |
+#' @source `data('segList', package='scMerge')`` |
|
61 |
+#' @keywords datasets |
|
62 |
+#' @examples |
|
63 |
+#' data('SEG') |
|
64 |
+#' humanSEG <- SEG$human |
|
65 |
+"SEG" |
|
52 | 66 |
\ No newline at end of file |
53 | 67 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,63 @@ |
1 |
+#source: https://github.com/klmr/decorator |
|
2 |
+ |
|
3 |
+`%@%` = function (decorator, f) UseMethod('%@%') |
|
4 |
+ |
|
5 |
+`%@%.default` = function (decorator, f) |
|
6 |
+ stop(deparse(substitute(decorator)), ' is not a decorator') |
|
7 |
+ |
|
8 |
+`%@%.decorator` = function (decorator, f) { |
|
9 |
+ pretty_decorators = as.list(match.call())[-1] |
|
10 |
+ # Patch delayed decorator. |
|
11 |
+ if (! is.null({pretty_patched = attr(decorator, 'calls')})) |
|
12 |
+ pretty_decorators = c(pretty_patched, pretty_decorators[-1]) |
|
13 |
+ |
|
14 |
+ # Handle operator precedence so that we can chain decorators. |
|
15 |
+ if (inherits(f, 'decorator')) |
|
16 |
+ .delayed_decorate(decorator, f, pretty_decorators) |
|
17 |
+ else |
|
18 |
+ prettify(decorator(f), f, pretty_decorators[-length(pretty_decorators)]) |
|
19 |
+} |
|
20 |
+ |
|
21 |
+decorator = function (f) |
|
22 |
+ structure(f, class = 'decorator') |
|
23 |
+ |
|
24 |
+decorator = decorator(decorator) |
|
25 |
+ |
|
26 |
+print.decorated = function (x, useSource = TRUE, ...) { |
|
27 |
+ bare = function (f) { |
|
28 |
+ bare = unclass(f) |
|
29 |
+ attr(bare, 'decorators') = NULL |
|
30 |
+ bare |
|
31 |
+ } |
|
32 |
+ |
|
33 |
+ fun_def = capture.output(print.function(bare(x), useSource = useSource, ...)) |
|
34 |
+ for (decorator in attr(x, 'decorators')) |
|
35 |
+ cat(deparse(decorator), '%@%\n') |
|
36 |
+ cat(fun_def, sep = '\n') |
|
37 |
+ invisible(x) |
|
38 |
+} |
|
39 |
+ |
|
40 |
+# modules::register_S3_method('print', 'decorated', print.decorated) |
|
41 |
+ |
|
42 |
+prettify = function (f, original, decorator_calls) { |
|
43 |
+ attr(f, 'srcref') = pretty_code(original) |
|
44 |
+ attr(f, 'decorators') = decorator_calls |
|
45 |
+ class(f) = c(class(f), 'decorated') |
|
46 |
+ f |
|
47 |
+} |
|
48 |
+ |
|
49 |
+pretty_code = function (f) { |
|
50 |
+ srcref = attr(f, 'srcref') |
|
51 |
+ if (is.null(srcref)) body(f) else srcref |
|
52 |
+} |
|
53 |
+ |
|
54 |
+.delayed_decorate = function (d1, d2, decorator_calls) |
|
55 |
+ structure(decorator(function (f) d1(d2(f))), calls = decorator_calls) |
|
56 |
+ |
|
57 |
+# Console logging |
|
58 |
+simpleLog <- decorator %@% function(f) { |
|
59 |
+ function(...) { |
|
60 |
+ print(match.call()) |
|
61 |
+ f(...) |
|
62 |
+ } |
|
63 |
+} |
|
0 | 64 |
\ No newline at end of file |
... | ... |
@@ -1,18 +1,22 @@ |
1 | 1 |
|
2 |
-.runBarcodeRankDrops <- function(barcode.matrix, ...) { |
|
3 |
- |
|
2 |
+.runBarcodeRankDrops <- function(barcode.matrix, lower=100, |
|
3 |
+ fit.bounds=NULL, |
|
4 |
+ df=20) { |
|
5 |
+ |
|
4 | 6 |
## Convert to sparse matrix if not already in that format |
5 | 7 |
barcode.matrix <- .convertToMatrix(barcode.matrix) |
6 | 8 |
|
7 |
- output <- DropletUtils::barcodeRanks(m = barcode.matrix, ...) |
|
8 |
- |
|
9 |
+ output <- DropletUtils::barcodeRanks(m = barcode.matrix, lower=100, |
|
10 |
+ fit.bounds=NULL, |
|
11 |
+ df=20) |
|
12 |
+ |
|
9 | 13 |
knee.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$knee) |
10 | 14 |
inflection.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$inflection) |
11 |
- |
|
15 |
+ |
|
12 | 16 |
result <- cbind(knee.ix, inflection.ix) |
13 | 17 |
colnames(result) <- c("dropletUtils_barcodeRank_knee", |
14 | 18 |
"dropletUtils_barcodeRank_inflection") |
15 |
- |
|
19 |
+ |
|
16 | 20 |
return(result) |
17 | 21 |
} |
18 | 22 |
|
... | ... |
@@ -48,9 +52,11 @@ |
48 | 52 |
#' sce <- runBarcodeRankDrops(inSCE = emptyDropsSceExample) |
49 | 53 |
#' @export |
50 | 54 |
runBarcodeRankDrops <- function(inSCE, |
51 |
- sample = NULL, |
|
52 |
- useAssay = "counts", |
|
53 |
- ... |
|
55 |
+ sample = NULL, |
|
56 |
+ useAssay = "counts", |
|
57 |
+ lower=100, |
|
58 |
+ fit.bounds=NULL, |
|
59 |
+ df=20 |
|
54 | 60 |
) { |
55 | 61 |
if(!is.null(sample)) { |
56 | 62 |
if(length(sample) != ncol(inSCE)) { |
... | ... |
@@ -59,27 +65,35 @@ runBarcodeRankDrops <- function(inSCE, |
59 | 65 |
} else { |
60 | 66 |
sample = rep(1, ncol(inSCE)) |
61 | 67 |
} |
62 |
- |
|
68 |
+ |
|
63 | 69 |
message(paste0(date(), " ... Running 'barcodeRanks'")) |
64 |
- |
|
70 |
+ |
|
71 |
+ ## Getting current arguments values |
|
72 |
+ argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
|
73 |
+ |
|
65 | 74 |
## Define result matrix for all samples |
66 | 75 |
output <- S4Vectors::DataFrame(row.names = colnames(inSCE), |
67 |
- dropletUtils_BarcodeRank_Knee = integer(ncol(inSCE)), |
|
68 |
- dropletUtils_BarcodeRank_Inflection = integer(ncol(inSCE))) |
|
69 |
- |
|
76 |
+ dropletUtils_BarcodeRank_Knee = integer(ncol(inSCE)), |
|
77 |
+ dropletUtils_BarcodeRank_Inflection = integer(ncol(inSCE))) |
|
78 |
+ |
|
70 | 79 |
## Loop through each sample and run barcodeRank |
71 | 80 |
samples <- unique(sample) |
72 | 81 |
for (i in seq_len(length(samples))) { |
73 | 82 |
sceSampleInd <- sample == samples[i] |
74 | 83 |
sceSample <- inSCE[, sceSampleInd] |
75 |
- |
|
84 |
+ |
|
76 | 85 |
mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
77 |
- result <- .runBarcodeRankDrops(barcode.matrix = mat, ...) |
|
78 |
- |
|
86 |
+ result <- .runBarcodeRankDrops(barcode.matrix = mat, lower=100, |
|
87 |
+ fit.bounds=NULL, |
|
88 |
+ df=20) |
|
89 |
+ |
|
79 | 90 |
output[sceSampleInd, ] <- result |
80 | 91 |
} |
81 |
- |
|
92 |
+ |
|
82 | 93 |
colData(inSCE) = cbind(colData(inSCE), output) |
83 |
- |
|
94 |
+ |
|
95 |
+ inSCE@metadata$runBarcodeRankDrops <- argsList[-1] |
|
96 |
+ inSCE@metadata$runBarcodeRankDrops$packageVersion <- packageDescription("DropletUtils")$Version |
|
97 |
+ |
|
84 | 98 |
return(inSCE) |
85 | 99 |
} |
... | ... |
@@ -1,10 +1,24 @@ |
1 |
-.runEmptyDrops <- function(barcode.matrix, ...) { |
|
2 |
- |
|
1 |
+.runEmptyDrops <- function(barcode.matrix, lower=100, |
|
2 |
+ niters=10000, |
|
3 |
+ test.ambient=FALSE, |
|
4 |
+ ignore=NULL, |
|
5 |
+ alpha=NULL, |
|
6 |
+ retain=NULL, |
|
7 |
+ barcode.args=list(), |
|
8 |
+ BPPARAM=SerialParam()) { |
|
9 |
+ |
|
3 | 10 |
barcode.matrix <- .convertToMatrix(barcode.matrix) |
4 |
- |
|
5 |
- result <- DropletUtils::emptyDrops(m = barcode.matrix, ...) |
|
11 |
+ |
|
12 |
+ result <- DropletUtils::emptyDrops(m = barcode.matrix, lower=100, |
|
13 |
+ niters=10000, |
|
14 |
+ test.ambient=FALSE, |
|
15 |
+ ignore=NULL, |
|
16 |
+ alpha=NULL, |
|
17 |
+ retain=NULL, |
|
18 |
+ barcode.args=list(), |
|
19 |
+ BPPARAM=SerialParam()) |
|
6 | 20 |
colnames(result) <- paste0("dropletUtils_emptyDrops_", colnames(result)) |
7 |
- |
|
21 |
+ |
|
8 | 22 |
return(result) |
9 | 23 |
} |
10 | 24 |
|
... | ... |
@@ -43,13 +57,20 @@ |
43 | 57 |
#' @import DropletUtils |
44 | 58 |
#' @export |
45 | 59 |
runEmptyDrops <- function(inSCE, |
46 |
- sample = NULL, |
|
47 |
- useAssay = "counts", |
|
48 |
- ... |
|
60 |
+ sample = NULL, |
|
61 |
+ useAssay = "counts", |
|
62 |
+ lower=100, |
|
63 |
+ niters=10000, |
|
64 |
+ test.ambient=FALSE, |
|
65 |
+ ignore=NULL, |
|
66 |
+ alpha=NULL, |
|
67 |
+ retain=NULL, |
|
68 |
+ barcode.args=list(), |
|
69 |
+ BPPARAM=SerialParam() |
|
49 | 70 |
) { |
50 | 71 |
# getting the current argument values |
51 |
- current_params <- as.list(sys.call()) |
|
52 |
- metadata_params <- inSCE@metadata$QCParams |
|
72 |
+ argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame())) |
|
73 |
+ |
|
53 | 74 |
|
54 | 75 |
if(!is.null(sample)) { |
55 | 76 |
if(length(sample) != ncol(inSCE)) { |
... | ... |
@@ -58,32 +79,43 @@ runEmptyDrops <- function(inSCE, |
58 | 79 |
} else { |
59 | 80 |
sample = rep(1, ncol(inSCE)) |
60 | 81 |
} |
61 |
- |
|
82 |
+ |
|
62 | 83 |
message(date(), " ... Running 'emptyDrops'") |
63 |
- |
|
84 |
+ |
|
64 | 85 |
## Define result matrix for all samples |
65 | 86 |
output <- S4Vectors::DataFrame(row.names = colnames(inSCE), |
66 |
- dropletUtils_emptyDrops_total = integer(ncol(inSCE)), |
|
67 |
- dropletUtils_emptyDrops_logprob = numeric(ncol(inSCE)), |
|
68 |
- dropletUtils_emptyDrops_pvalue = numeric(ncol(inSCE)), |
|
69 |
- dropletUtils_emptyDrops_limited = logical(ncol(inSCE)), |
|
70 |
- dropletUtils_emptyDrops_fdr = numeric(ncol(inSCE))) |
|
71 |
- |
|
87 |
+ dropletUtils_emptyDrops_total = integer(ncol(inSCE)), |
|
88 |
+ dropletUtils_emptyDrops_logprob = numeric(ncol(inSCE)), |
|
89 |
+ dropletUtils_emptyDrops_pvalue = numeric(ncol(inSCE)), |
|
90 |
+ dropletUtils_emptyDrops_limited = logical(ncol(inSCE)), |
|
91 |
+ dropletUtils_emptyDrops_fdr = numeric(ncol(inSCE))) |
|
92 |
+ |
|
72 | 93 |
## Loop through each sample and run barcodeRank |
73 | 94 |
samples <- unique(sample) |
74 | 95 |
for (i in seq_len(length(samples))) { |
75 | 96 |
sceSampleInd <- sample == samples[i] |
76 | 97 |
sceSample <- inSCE[, sceSampleInd] |
77 |
- |
|
98 |
+ |
|
78 | 99 |
mat <- SummarizedExperiment::assay(sceSample, i = useAssay) |
79 |
- result <- .runEmptyDrops(barcode.matrix = mat, ...) |
|
80 |
- |
|
100 |
+ result <- .runEmptyDrops(barcode.matrix = mat, lower=100, |
|
101 |
+ niters=10000, |
|
102 |
+ test.ambient=FALSE, |
|
103 |
+ ignore=NULL, |
|
104 |
+ alpha=NULL, |
|
105 |
+ retain=NULL, |
|
106 |
+ barcode.args=list(), |
|
107 |
+ BPPARAM=SerialParam()) |
|
108 |
+ |
|
109 |
+ |
|
81 | 110 |
output[sceSampleInd, ] <- result |
111 |
+ metadata(output[sceSampleInd, ]) <- metadata(result) |
|
82 | 112 |
} |
83 | 113 |
|
84 | 114 |
colData(inSCE) = cbind(colData(inSCE), output) |
85 |
- inSCE@metadata$QCParams <- metadata_params |
|
86 |
- inSCE@metadata$QCParams$runEmptyDrops <- current_params |
|
115 |
+ inSCE@metadata = metadata(output) |
|
116 |
+ |
|
117 |
+ inSCE@metadata$runEmptyDrops <- argsList[-1] |
|
118 |
+ inSCE@metadata$runEmptyDrops$packageVersion <- packageDescription("DropletUtils")$Version |
|
87 | 119 |
|
88 | 120 |
return(inSCE) |
89 |
-} |
|
121 |
+} |
|
90 | 122 |
\ No newline at end of file |
91 | 123 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,63 @@ |
1 |
+#' @title Export a \link[SingleCellExperiment]{SingleCellExperiment} R object as |
|
2 |
+#' Python annData object |
|
3 |
+#' @description Writes all assays, colData, rowData, reducedDims, and altExps objects in a |
|
4 |
+#' \link[SingleCellExperiment]{SingleCellExperiment} to a Python annData object in the .h5ad format |
|
5 |
+#' All parameters of Anndata.write_h5ad function (https://icb-anndata.readthedocs-hosted.com/en/stable/anndata.AnnData.write_h5ad.html) |
|
6 |
+#' are available as parameters to this export function and set to defaults. Defaults can be |
|
7 |
+#' overridden at function call. |
|
8 |
+#' @param sce \link[SingleCellExperiment]{SingleCellExperiment} R object to be |
|
9 |
+#' exported. |
|
10 |
+#' @param useAssay Character, default `"counts"`. The name of assay of |
|
11 |
+#' interests that will be set as the primary matrix of the output AnnData. |
|
12 |
+#' @param outputDir Path to the directory where .h5ad outputs will be written |
|
13 |
+#' @param overwrite Boolean. Default \code{TRUE}. |
|
14 |
+#' @param compression Default \code{None}.If output file compression is required, this variable accepts |
|
15 |
+#' 'gzip' or 'lzf' as inputs. |
|
16 |
+#' @param compression_opts Integer. Default \code{NULL} Sets the compression level |
|
17 |
+#' @param forceDense Default \code{False} Write sparse data as a dense matrix. |
|
18 |
+#' Refer anndata.write_h5ad documentation for details |
|
19 |
+#' @examples |
|
20 |
+#' data(sce_chcl, package = "scds") |
|
21 |
+#' exportSCEtoAnnData(sce=sce_chcl, compression="gzip") |
|
22 |
+#' |
|
23 |
+#' @export |
|
24 |
+ |
|
25 |
+exportSCEtoAnnData <- function(sce, |
|
26 |
+ useAssay='counts', |
|
27 |
+ outputDir="./", |
|
28 |
+ sample = "sample", |
|
29 |
+ overwrite=TRUE, |
|
30 |
+ compression= c('None','lzf','gzip'), |
|
31 |
+ compressionOpts = NULL, |
|
32 |
+ forceDense= c('False','True')){ |
|
33 |
+ compression <- match.arg(compression) |
|
34 |
+ forceDense <- match.arg(forceDense) |
|
35 |
+ if (compression == 'None'){ |
|
36 |
+ compression <- NULL |
|
37 |
+ } |
|
38 |
+ |
|
39 |
+ if (!reticulate::py_module_available(module = "scanpy")) { |
|
40 |
+ warning("Cannot find python module 'scanpy', please install Conda and", |
|
41 |
+ " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
42 |
+ "If one of these have been previously run to install the modules,", |
|
43 |
+ "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
44 |
+ " respectively, if R has been restarted since the module installation.", |
|
45 |
+ " Alternatively, Scrublet can be installed on the local machine", |
|
46 |
+ "with pip (e.g. pip install --user scanpy) and then the 'use_python()'", |
|
47 |
+ " function from the 'reticulate' package can be used to select the", |
|
48 |
+ " correct Python environment.") |
|
49 |
+ return(sce)} |
|
50 |
+ |
|
51 |
+ annData <- .sce2adata(sce,useAssay) |
|
52 |
+ fileName <- paste0(sample,".h5ad") |
|
53 |
+ filePath <- file.path(outputDir,fileName) |
|
54 |
+ |
|
55 |
+ if (file.exists(filePath) && !isTRUE(overwrite)) { |
|
56 |
+ stop(paste0(path, " already exists. Change 'outputDir' or set 'overwrite' to TRUE.")) |
|
57 |
+ } |
|
58 |
+ |
|
59 |
+ annData$write_h5ad(filePath, |
|
60 |
+ compression = compression, |
|
61 |
+ compression_opts = compressionOpts, |
|
62 |
+ force_dense = forceDense) |
|
63 |
+} |
0 | 64 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,143 @@ |
1 |
+ |
|
2 |
+.readDropEstFile <- function(sampleDir, dataType,rdsFileName){ |
|
3 |
+ dropEst_cell_counts <- file.path(sampleDir, paste(rdsFileName, '.rds', sep='')) |
|
4 |
+ if (!file.exists(dropEst_cell_counts)){ |
|
5 |
+ stop("DropEst output not found at location specified. Please check path provided and/or filename.") |
|
6 |
+ } |
|
7 |
+ dropEst_rds <- readRDS(dropEst_cell_counts) |
|
8 |
+ |
|
9 |
+ return(dropEst_rds) |
|
10 |
+} |
|
11 |
+ |
|
12 |
+.constructColdata <- function(dropEst_rds,counts_matrix, dataType){ |
|
13 |
+ coldata_fields <- c("mean_reads_per_umi","aligned_reads_per_cell","aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb") |
|
14 |
+ coldata_df <- list() |
|
15 |
+ for (field in coldata_fields){ |
|
16 |
+ if (field %in% names(dropEst_rds)){ |
|
17 |
+ coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]])) |
|
18 |
+ names(coldata_field_df)[1] <- field |
|
19 |
+ coldata_field_df$cell <- row.names(coldata_field_df) |
|
20 |
+ |
|
21 |
+ coldata_df[[field]] <- coldata_field_df |
|
22 |
+ }} |
|
23 |
+ coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df) |
|
24 |
+ row.names(coldata_df_merged) <- coldata_df_merged$cell |
|
25 |
+ coldata_df_merged <- S4Vectors::DataFrame(as.matrix(coldata_df_merged)) |
|
26 |
+ if (dataType == 'filtered'){ |
|
27 |
+ coldata_df_merged <- coldata_df_merged[coldata_df_merged$cell %in% colnames(counts_matrix),] |
|
28 |
+ } |
|
29 |
+ return(coldata_df_merged) |
|
30 |
+} |
|
31 |
+ |
|
32 |
+.extractMetadata <- function(dropEst_rds){ |
|
33 |
+ metadata_fields <- c("saturation_info","merge_targets","reads_per_umi_per_cell") |
|
34 |
+ metadata <- c() |
|
35 |
+ for (md in metadata_fields){ |
|
36 |
+ if (md %in% names(dropEst_rds)){ |
|
37 |
+ metadata[[md]] <- dropEst_rds[[md]] |
|
38 |
+ }} |
|
39 |
+ return(metadata) |
|
40 |
+} |
|
41 |
+ |
|
42 |
+.importDropEstSample <- function(sampleDir = './', |
|
43 |
+ dataType, |
|
44 |
+ rdsFileName, |
|
45 |
+ sampleName = 'sample', |
|
46 |
+ delayedArray = delayedArrary){ |
|
47 |
+ ## Read DropEst RDS |
|
48 |
+ dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName) |
|
49 |
+ if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) { |
|
50 |
+ counts_matrix <- dropEst_rds$cm |
|
51 |
+ } else if (dataType == 'raw' && 'cm_raw' %in% names(dropEst_rds)) { |
|
52 |
+ counts_matrix <- dropEst_rds$cm_raw |
|
53 |
+ } else { |
|
54 |
+ stop("No counts matrix found in the .rds provided! Exiting.") |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ if (isTRUE(delayedArray)) { |
|
58 |
+ counts_matrix <- DelayedArray::DelayedArray(counts_matrix) |
|
59 |
+ } |
|
60 |
+ ## Create SingleCellExperiment object |
|
61 |
+ ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells. |
|
62 |
+ ## append sample name to cells in SCE |
|
63 |
+ sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix)) |
|
64 |
+ colnames(sce) <- paste0(sampleName,"_",colnames(sce)) |
|
65 |
+ sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType) |
|
66 |
+ row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata)) |
|
67 |
+ |
|
68 |
+ if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){ |
|
69 |
+ SummarizedExperiment::colData(sce) <- sce_coldata |
|
70 |
+ } else { |
|
71 |
+ warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.") |
|
72 |
+ } |
|
73 |
+ |
|
74 |
+ ## Add SCE metadata |
|
75 |
+ sce_metadata <- .extractMetadata(dropEst_rds) |
|
76 |
+ metadata(sce) <- sce_metadata |
|
77 |
+ |
|
78 |
+ return(sce) |
|
79 |
+} |
|
80 |
+ |
|
81 |
+#' @name importDropEst |
|
82 |
+#' @rdname importDropEst |
|
83 |
+#' @title Create a SingleCellExperiment Object from DropEst output |
|
84 |
+#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and |
|
85 |
+#' create a SingleCellExperiment object from either the raw or filtered counts matrix. |
|
86 |
+#' Additionally parse through the RDS to obtain appropriate feature annotations as |
|
87 |
+#' SCE coldata, in addition to any metadata. |
|
88 |
+#' @param sampleDir A path to the directory containing the data files. Default "./". |
|
89 |
+#' @param sampleName A User-defined sample name. This will be prepended to all cell barcode IDs. |
|
90 |
+#' Default "sample". |
|
91 |
+#' @param dataType can be "filtered" or "raw". Default is "filtered" |
|
92 |
+#' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts" |
|
93 |
+#' @param delayedArray Boolean. Whether to read the expression matrix as |
|
94 |
+#' \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}. |
|
95 |
+#' @details |
|
96 |
+#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered |
|
97 |
+#' counts matrix stored as "cm" in the DropEst rds output. |
|
98 |
+#' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell", |
|
99 |
+#' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb" |
|
100 |
+#' If using filtered counts matrix, the colData dataframe is |
|
101 |
+#' subset to contain features from the filtered counts matrix alone. |
|
102 |
+#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are |
|
103 |
+#' found in the DropEst rds, they will be added to the SCE metadata field |
|
104 |
+#' @return A \code{SingleCellExperiment} object containing the count matrix, |
|
105 |
+#' the feature annotations from DropEst as ColData, and any metadata from DropEst |
|
106 |
+#' @examples |
|
107 |
+#' # Example #1 |
|
108 |
+#' Example DropEst outputs were downloaded from the DropEst Github |
|
109 |
+#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip). |
|
110 |
+#' To run the dropest import function with the example dataset, |
|
111 |
+#' set the sampleDirs variable to the example dropEst provided in SCTK as follows- |
|
112 |
+#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'), |
|
113 |
+#' dataType='filtered', sampleNames=c('sample')) |
|
114 |
+#' @export |
|
115 |
+importDropEst <- function(sampleDirs = NULL, |
|
116 |
+ dataType = c('filtered','raw'), |
|
117 |
+ rdsFileName = 'cell.counts', |
|
118 |
+ sampleNames = NULL, |
|
119 |
+ delayedArray = TRUE) { |
|
120 |
+ dataType <- match.arg(dataType) |
|
121 |
+ |
|
122 |
+ if (length(sampleDirs)!=length(sampleNames)){ |
|
123 |
+ stop("Please provide sample names for all input directories") |
|
124 |
+ } |
|
125 |
+ |
|
126 |
+ res <- vector("list", length = length(sampleDirs)) |
|
127 |
+ |
|
128 |
+ for (i in seq_along(sampleDirs)){ |
|
129 |
+ scei <- .importDropEstSample(sampleDir = sampleDirs[[i]], |
|
130 |
+ sampleName = sampleNames[[i]], |
|
131 |
+ dataType = dataType, |
|
132 |
+ rdsFileName = rdsFileName, |
|
133 |
+ delayedArray = delayedArray) |
|
134 |
+ res[[i]] <- scei |
|
135 |
+ } |
|
136 |
+ sce <- do.call(SingleCellExperiment::cbind, res) |
|
137 |
+ return(sce) |
|
138 |
+} |
|
139 |
+ |
|
140 |
+ |
|
141 |
+ |
|
142 |
+ |
|
143 |
+ |
... | ... |
@@ -71,7 +71,7 @@ summarizeTable <- function(inSCE, useAssay="counts", expressionCutoff=1700){ |
71 | 71 |
#' newSCE <- createSCE(assayFile = counts_mat, annotFile = sample_annot, |
72 | 72 |
#' featureFile = row_annot, assayName = "counts", |
73 | 73 |
#' inputDataFrames = TRUE, createLogCounts = TRUE) |
74 |
-createSCE <- function(assayFile=NULL, annotFile=NULL, featureFile=NULL, |
|
74 |
+createSCE <- simpleLog %@% function(assayFile=NULL, annotFile=NULL, featureFile=NULL, |
|
75 | 75 |
assayName="counts", inputDataFrames=FALSE, |
76 | 76 |
createLogCounts=TRUE){ |
77 | 77 |
|
... | ... |
@@ -265,7 +265,6 @@ distinctColors <- function(n, hues = c("red", "cyan", "orange", "blue", |
265 | 265 |
} |
266 | 266 |
} |
267 | 267 |
|
268 |
- |
|
269 | 268 |
## Convert a matrix to a sparse matrix and preserve column/row names |
270 | 269 |
.convertToMatrix <- function(x) { |
271 | 270 |
cn <- colnames(x) |
... | ... |
@@ -278,15 +277,3 @@ distinctColors <- function(n, hues = c("red", "cyan", "orange", "blue", |
278 | 277 |
return(x) |
279 | 278 |
} |
280 | 279 |
|
281 |
-`%@%` = function(decorator, f) { |
|
282 |
- decorator(f) |
|
283 |
-} |
|
284 |
- |
|
285 |
-simpleLog <- function(f) { |
|
286 |
- function(...) { |
|
287 |
- print(match.call()) |
|
288 |
- f(...) |
|
289 |
- } |
|
290 |
-} |
|
291 |
- |
|
292 |
- |
... | ... |
@@ -5,10 +5,10 @@ |
5 | 5 |
#' across all batches of the data. |
6 | 6 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
7 | 7 |
#' and analysis procedures. |
8 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
8 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
9 | 9 |
#' of the assay requiring batch correction in "inSCE", should exist in |
10 | 10 |
#' `assayNames(inSCE)`. |
11 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
11 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
12 | 12 |
#' field of `colData(inSCE)` that defines different batches. |
13 | 13 |
#' @param reducedDimName character, default `"BBKNN"`. The name for the |
14 | 14 |
#' corrected low-dimensional representation. |
... | ... |
@@ -26,22 +26,33 @@ |
26 | 26 |
#' data('sceBatches', package = 'singleCellTK') |
27 | 27 |
#' sceCorr <- runBBKNN(sceBatches) |
28 | 28 |
#' } |
29 |
-runBBKNN <-function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
|
29 |
+runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
30 | 30 |
reducedDimName = 'BBKNN', nComponents = 50L){ |
31 | 31 |
## Input check |
32 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
33 |
- !class(inSCE) == "SCtkExperiment"){ |
|
32 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
34 | 33 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
35 | 34 |
} |
36 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
37 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
35 |
+ if(!reticulate::py_module_available(module = "bbknn")){ |
|
36 |
+ warning("Cannot find python module 'bbknn', please install Conda and", |
|
37 |
+ " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
38 |
+ "If one of these have been previously run to install the modules,", |
|
39 |
+ "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
40 |
+ " respectively, if R has been restarted since the module installation.", |
|
41 |
+ " Alternatively, bbknn can be installed on the local machine", |
|
42 |
+ "with pip (e.g. pip install bbknn) and then the 'use_python()'", |
|
43 |
+ " function from the 'reticulate' package can be used to select the", |
|
44 |
+ " correct Python environment.") |
|
45 |
+ return(inSCE) |
|
46 |
+ } |
|
47 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
48 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
38 | 49 |
} |
39 | 50 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
40 | 51 |
|
41 | 52 |
## Run algorithm |
42 |
- adata <- .sce2adata(inSCE, mainAssay = exprs) |
|
53 |
+ adata <- .sce2adata(inSCE, mainAssay = useAssay) |
|
43 | 54 |
sc$tl$pca(adata, n_comps = nComponents) |
44 |
- bbknn$bbknn(adata, batch_key = batchKey, n_pcs = nComponents) |
|
55 |
+ bbknn$bbknn(adata, batch_key = batch, n_pcs = nComponents) |
|
45 | 56 |
sc$tl$umap(adata, n_components = nComponents) |
46 | 57 |
bbknnUmap <- adata$obsm[["X_umap"]] |
47 | 58 |
SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- bbknnUmap |
... | ... |
@@ -5,74 +5,61 @@ |
5 | 5 |
#' more robust performance. For introduction of MNN, see `runMNNCorrect` |
6 | 6 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
7 | 7 |
#' and analysis procedures. |
8 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
8 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
9 | 9 |
#' of the assay requiring batch correction in "inSCE", should exist in |
10 | 10 |
#' `assayNames(inSCE)`. Alternatively, see `pcInput` parameter. |
11 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
11 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
12 | 12 |
#' field of `colData(inSCE)` that defines different batches. |
13 | 13 |
#' @param reducedDimName character, default `"fastMNN"`. The name for the |
14 | 14 |
#' corrected low-dimensional representation. |
15 | 15 |
#' @param pcInput bool, default `FALSE`. Whether to use a reduceDim matrix for |
16 |
-#' batch effect correction. If TRUE, `exprs` should exist in |
|
16 |
+#' batch effect correction. If TRUE, `useAssay` should exist in |
|
17 | 17 |
#' `reduceDimNames(inSCE)`. Note that more dimensions in precomputed reducedDim |
18 | 18 |
#' have shown better correction results. |
19 | 19 |
#' @return SingleCellExperiment object with `reducedDim(inSCE, reducedDimName)` |
20 | 20 |
#' updated with corrected low-dimentional representation. |
21 | 21 |
#' @export |
22 | 22 |
#' @references Lun ATL, et al., 2016 |
23 |
-#' @examples |
|
23 |
+#' @examples |
|
24 |
+#' \dontrun{ |
|
24 | 25 |
#' data('sceBatches', package = 'singleCellTK') |
25 |
-#' sceCorr <- runFastMNN(sceBatches, exprs = 'PCA', pcInput = TRUE) |
|
26 |
-runFastMNN <- function(inSCE, exprs = "logcounts", reducedDimName = "MNN", |
|
27 |
- batchKey = 'batch', pcInput = FALSE){ |
|
26 |
+#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts', pcInput = FALSE) |
|
27 |
+#' } |
|
28 |
+runFastMNN <- function(inSCE, useAssay = "logcounts", reducedDimName = "MNN", |
|
29 |
+ batch = 'batch', pcInput = FALSE){ |
|
28 | 30 |
## Input check |
29 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
30 |
- !class(inSCE) == "SCtkExperiment"){ |
|
31 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
31 | 32 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
32 | 33 |
} |
33 |
- if(pcInput){ |
|
34 |
- if(!exprs %in% SingleCellExperiment::reducedDimNames(inSCE)) { |
|
35 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
34 |
+ if(isTRUE(pcInput)){ |
|
35 |
+ if(!(useAssay %in% SingleCellExperiment::reducedDimNames(inSCE))) { |
|
36 |
+ stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found.")) |
|
36 | 37 |
} |
37 | 38 |
} else { |
38 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
39 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
39 |
+ if(!(useAssay %in% SummarizedExperiment::assayNames(inSCE))) { |
|
40 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
40 | 41 |
} |
41 | 42 |
} |
42 | 43 |
|
43 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
44 |
- stop(paste("\"batchKey name:", batchKey, "not found.")) |
|
44 |
+ if(!(batch %in% names(SummarizedExperiment::colData(inSCE)))){ |
|
45 |
+ stop(paste("\"batch name:", batch, "not found.")) |
|
45 | 46 |
} |
46 | 47 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
47 | 48 |
|
48 | 49 |
## Run algorithm |
49 |
- # Extract each batch |
|
50 |
- if(pcInput){ |
|
51 |
- mat <- SingleCellExperiment::reducedDim(inSCE, exprs) |
|
52 |
- } else { |
|
53 |
- mat <- SummarizedExperiment::assay(inSCE, exprs) |
|
54 |
- } |
|
55 | 50 |
batches <- list() |
56 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
57 |
- batchIndicator <- unique(batchCol) |
|
58 |
- for(i in batchIndicator){ |
|
59 |
- if(pcInput){ |
|
60 |
- batches[[i]] <- mat[batchCol == i,] |
|
61 |
- } else { |
|
62 |
- batches[[i]] <- mat[,batchCol == i] |
|
63 |
- } |
|
64 |
- |
|
65 |
- } |
|
51 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
52 |
+ batchFactor <- as.factor(batchCol) |
|
53 |
+ |
|
66 | 54 |
if(pcInput){ |
67 |
- mnn <- batchelor::fastMNN(batches[[1]], |
|
68 |
- batches[[2]], |
|
69 |
- pc.input = TRUE) |
|
55 |
+ mat <- SingleCellExperiment::reducedDim(inSCE, useAssay) |
|
56 |
+ redMNN <- batchelor::reducedMNN(mat, batch = batchFactor) |
|
57 |
+ newRedDim <- redMNN$corrected |
|
70 | 58 |
} else { |
71 |
- mnn <- batchelor::fastMNN(batches[[1]], |
|
72 |
- batches[[2]]) |
|
59 |
+ mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
60 |
+ mnnSCE <- batchelor::fastMNN(mat, batch = batchFactor) |
|
61 |
+ newRedDim <- SingleCellExperiment::reducedDim(mnnSCE, 'corrected') |
|
73 | 62 |
} |
74 |
- rn <- rownames(mat) |
|
75 |
- newMat <- mnn$corrected[rn,] |
|
76 |
- SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newMat |
|
63 |
+ SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newRedDim |
|
77 | 64 |
return(inSCE) |
78 | 65 |
} |
... | ... |
@@ -4,15 +4,15 @@ |
4 | 4 |
#' cells group by cell type rather than dataset-specific conditions. |
5 | 5 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
6 | 6 |
#' and analysis procedures. |
7 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
7 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
8 | 8 |
#' of the assay requiring batch correction in "inSCE", should exist in |
9 | 9 |
#' `assayNames(inSCE)`. |
10 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
10 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
11 | 11 |
#' field of `colData(inSCE)` that defines different batches. |
12 | 12 |
#' @param reducedDimName character, default `"HARMONY"`. The name for the |
13 | 13 |
#' corrected low-dimensional representation. |
14 | 14 |
#' @param pcInput bool, default `FALSE`. Whether to do correction on a low-dim |
15 |
-#' matrix. If `TRUE`, will look for `exprs` in `reducedDim(inSCE)`. |
|
15 |
+#' matrix. If `TRUE`, will look for `useAssay` in `reducedDim(inSCE)`. |
|
16 | 16 |
#' @param nComponents integer, default `20`. Number of principle components or |
17 | 17 |
#' dimensionality to generate in the resulting reducedDim. If `pcInput`, will |
18 | 18 |
#' directly follow the dimensionality of the specified reducedDim. |
... | ... |
@@ -25,46 +25,34 @@ |
25 | 25 |
#' @references Ilya Korsunsky, et al., 2019 |
26 | 26 |
#' @examples |
27 | 27 |
#' data('sceBatches', package = 'singleCellTK') |
28 |
-#' sceBatches |
|
29 |
-#' ## class: SingleCellExperiment |
|
30 |
-#' ## dim: 27610 1820 |
|
31 |
-#' ## metadata(0): |
|
32 |
-#' ## assays(3): normcounts logcounts |
|
33 |
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238 |
|
34 |
-#' ## rowData names(0): |
|
35 |
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600 |
|
36 |
-#' ## colData names(2): cell_type1 batch |
|
37 |
-#' ## reducedDimNames(5): PCA |
|
38 |
-#' ## spikeNames(0): |
|
39 | 28 |
#' sceCorr <- runHarmony(sceBatches, nComponents = 10) |
40 |
-runHarmony <- function(inSCE, exprs = "logcounts", pcInput = FALSE, |
|
41 |
- batchKey = "batch", reducedDimName = "HARMONY", |
|
29 |
+runHarmony <- function(inSCE, useAssay = "logcounts", pcInput = FALSE, |
|
30 |
+ batch = "batch", reducedDimName = "HARMONY", |
|
42 | 31 |
nComponents = 50, theta = 5, nIter = 10){ |
43 | 32 |
## Input check |
44 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
45 |
- !class(inSCE) == "SCtkExperiment"){ |
|
33 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
46 | 34 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
47 | 35 |
} |
48 | 36 |
if(pcInput){ |
49 |
- if(!exprs %in% SingleCellExperiment::reducedDimNames(inSCE)) { |
|
50 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
37 |
+ if(!useAssay %in% SingleCellExperiment::reducedDimNames(inSCE)) { |
|
38 |
+ stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found.")) |
|
51 | 39 |
} |
52 | 40 |
} else { |
53 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
54 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
41 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
42 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
55 | 43 |
} |
56 | 44 |
} |
57 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
58 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
45 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
46 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
59 | 47 |
} |
60 | 48 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
61 | 49 |
|
62 | 50 |
## Run algorithm |
63 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
51 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
64 | 52 |
if(pcInput){ |
65 |
- mat <- SingleCellExperiment::reducedDim(inSCE, exprs) |
|
53 |
+ mat <- SingleCellExperiment::reducedDim(inSCE, useAssay) |
|
66 | 54 |
} else{ |
67 |
- sceTmp <- scater::runPCA(inSCE, exprs_values = exprs, |
|
55 |
+ sceTmp <- scater::runPCA(inSCE, exprs_values = useAssay, |
|
68 | 56 |
ncomponents = nComponents) |
69 | 57 |
mat <- SingleCellExperiment::reducedDim(sceTmp, 'PCA') |
70 | 58 |
} |
... | ... |
@@ -4,10 +4,10 @@ |
4 | 4 |
#' shared and dataset-specific factors. |
5 | 5 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
6 | 6 |
#' and analysis procedures. |
7 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
7 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
8 | 8 |
#' of the assay requiring batch correction in "inSCE", should exist in |
9 | 9 |
#' `assayNames(inSCE)`. |
10 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
10 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
11 | 11 |
#' field of `colData(inSCE)` that defines different batches. |
12 | 12 |
#' @param reducedDimName character, default `"LIGER"`. The name for the |
13 | 13 |
#' corrected low-dimensional representation. |
... | ... |
@@ -28,31 +28,30 @@ |
28 | 28 |
#' data('sceBatches', package = 'singleCellTK') |
29 | 29 |
#' sceCorr <- runLIGER(sceBatches) |
30 | 30 |
#' } |
31 |
-runLIGER <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
|
31 |
+runLIGER <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
32 | 32 |
reducedDimName = 'LIGER', nComponents = 20L, lambda = 5.0, |
33 | 33 |
resolution = 1.0){ |
34 | 34 |
## Input check |
35 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
36 |
- !class(inSCE) == "SCtkExperiment"){ |
|
35 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
37 | 36 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
38 | 37 |
} |
39 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
40 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found")) |
|
38 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
39 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found")) |
|
41 | 40 |
} |
42 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
43 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
41 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
42 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
44 | 43 |
} |
45 | 44 |
reducedDimName <- gsub(' ', '_', reducedDimName) |
46 | 45 |
|
47 | 46 |
## Run algorithm |
48 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
47 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
49 | 48 |
batches <- unique(batchCol) |
50 | 49 |
nBatch <- length(batches) |
51 | 50 |
batchMatrices <- list() |
52 | 51 |
for(i in 1:nBatch){ |
53 | 52 |
b <- batches[i] |
54 | 53 |
batchMatrices[[b]] <- |
55 |
- SummarizedExperiment::assay(inSCE, exprs)[,batchCol == b] |
|
54 |
+ SummarizedExperiment::assay(inSCE, useAssay)[,batchCol == b] |
|
56 | 55 |
} |
57 | 56 |
ligerex <- liger::createLiger(batchMatrices) |
58 | 57 |
ligerex <- liger::normalize(ligerex) |
... | ... |
@@ -4,10 +4,10 @@ |
4 | 4 |
#' removes the component due to the batch effects. |
5 | 5 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
6 | 6 |
#' and analysis procedures. |
7 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
7 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
8 | 8 |
#' of the assay requiring batch correction in "inSCE", should exist in |
9 | 9 |
#' `assayNames(inSCE)`. |
10 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
10 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
11 | 11 |
#' field of `colData(inSCE)` that defines different batches. |
12 | 12 |
#' @param assayName character, default `"LIMMA"`. The name for the corrected |
13 | 13 |
#' full-sized expression matrix. |
... | ... |
@@ -17,38 +17,26 @@ |
17 | 17 |
#' @references Gordon K Smyth, et al., 2003 |
18 | 18 |
#' @examples |
19 | 19 |
#' data('sceBatches', package = 'singleCellTK') |
20 |
-#' sceBatches |
|
21 |
-#' ## class: SingleCellExperiment |
|
22 |
-#' ## dim: 27610 1820 |
|
23 |
-#' ## metadata(0): |
|
24 |
-#' ## assays(3): normcounts logcounts |
|
25 |
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238 |
|
26 |
-#' ## rowData names(0): |
|
27 |
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600 |
|
28 |
-#' ## colData names(2): cell_type1 batch |
|
29 |
-#' ## reducedDimNames(5): PCA |
|
30 |
-#' ## spikeNames(0): |
|
31 | 20 |
#' sceCorr <- runLimmaBC(sceBatches) |
32 |
-runLimmaBC <- function(inSCE, exprs = "logcounts", assayName = "LIMMA", |
|
33 |
- batchKey = "batch") { |
|
21 |
+runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA", |
|
22 |
+ batch = "batch") { |
|
34 | 23 |
## Input check |
35 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
36 |
- !class(inSCE) == "SCtkExperiment"){ |
|
24 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
37 | 25 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
38 | 26 |
} |
39 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
40 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
27 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
28 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
41 | 29 |
} |
42 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
43 |
- stop(paste("\"batchKey\" name:", batchKey, "not found.")) |
|
30 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
31 |
+ stop(paste("\"batch\" name:", batch, "not found.")) |
|
44 | 32 |
} |
45 | 33 |
|
46 | 34 |
assayName <- gsub(' ', '_', assayName) |
47 | 35 |
|
48 | 36 |
## Run algorithm |
49 | 37 |
## One more check for the batch names |
50 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
51 |
- mat <- SummarizedExperiment::assay(inSCE, exprs) |
|
38 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
39 |
+ mat <- SummarizedExperiment::assay(inSCE, useAssay) |
|
52 | 40 |
newMat <- limma::removeBatchEffect(mat, batch = batchCol) |
53 | 41 |
SummarizedExperiment::assay(inSCE, assayName) <- newMat |
54 | 42 |
return(inSCE) |
... | ... |
@@ -8,17 +8,13 @@ |
8 | 8 |
#' applying a Gaussian smoothing kernel with bandwidth `sigma`. |
9 | 9 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
10 | 10 |
#' and analysis procedures. |
11 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
11 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
12 | 12 |
#' of the assay requiring batch correction in "inSCE", should exist in |
13 | 13 |
#' `assayNames(inSCE)`. |
14 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
14 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
15 | 15 |
#' field of `colData(inSCE)` that defines different batches. |
16 |
-#' @param reducedDimName character, default `"MNN"`. The name for the |
|
17 |
-#' corrected low-dimensional representation. |
|
18 |
-#' @param nHVG integer, default `1000`. The number of top highly variable genes |
|
19 |
-#' to select per batch. Afterwards the intersection of all HVG sets will be |
|
20 |
-#' taken for running , thus the exact dimensionality of the resulting |
|
21 |
-#' reducedDim is not certain. |
|
16 |
+#' @param assayName character, default `"MNN"`. The name for the corrected |
|
17 |
+#' full-sized expression matrix. |
|
22 | 18 |
#' @param k integer, default `20`. Specifies the number of nearest neighbours to |
23 | 19 |
#' consider when defining MNN pairs. This should be interpreted as the minimum |
24 | 20 |
#' frequency of each cell type or state in each batch. Larger values will |
... | ... |
@@ -37,57 +33,26 @@ |
37 | 33 |
#' @examples |
38 | 34 |
#' data('sceBatches', package = 'singleCellTK') |
39 | 35 |
#' sceCorr <- runMNNCorrect(sceBatches) |
40 |
-runMNNCorrect <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
|
41 |
- reducedDimName = 'MNN', nHVG = 1000, |
|
42 |
- k = 20, sigma = 0.1){ |
|
36 |
+runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
37 |
+ assayName = 'MNN', k = 20, sigma = 0.1){ |
|
43 | 38 |
## Input check |
44 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
45 |
- !class(inSCE) == "SCtkExperiment"){ |
|
39 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
46 | 40 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
47 | 41 |
} |
48 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
49 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
42 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
43 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
50 | 44 |
} |
51 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
52 |
- stop(paste("\"batchKey name:", batchKey, "not found.")) |
|
45 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
46 |
+ stop(paste("\"batch name:", batch, "not found.")) |
|
53 | 47 |
} |
54 |
- reducedDimName <- gsub(' ', '_', reducedDimName) |
|
48 |
+ assayName <- gsub(' ', '_', assayName) |
|
55 | 49 |
|
56 | 50 |
## Run algorithm |
57 |
- # Split the batches |
|
58 |
- batches <- list() |
|
59 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
60 |
- uniqBatch <- unique(batchCol) |
|
61 |
- for(i in uniqBatch){ |
|
62 |
- batches[[i]] <- inSCE[, batchCol == i] |
|
63 |
- } |
|
64 |
- |
|
65 |
- # Select HVG |
|
66 |
- topVarGenesPerBatch <- list() |
|
67 |
- for(i in uniqBatch){ |
|
68 |
- if(nrow(batches[[i]]) <= nHVG){ |
|
69 |
- topVarGenesPerBatch[[i]] <- 1:nrow(batches[[i]]) |
|
70 |
- } else { |
|
71 |
- sce.var <- scran::modelGeneVar(batches[[i]]) |
|
72 |
- topVarGenesPerBatch[[i]] <- order(sce.var$bio,decreasing = TRUE)[seq(nHVG)] |
|
73 |
-# mvTrend <- scran::trendVar(batches[[i]], use.spikes=FALSE) |
|
74 |
-# decomposeTrend <- scran::decomposeVar(batches[[i]], mvTrend) |
|
75 |
-# topVarGenesPerBatch[[i]] <- order(decomposeTrend$bio, |
|
76 |
-# decreasing = TRUE)[1:nHVG] |
|
77 |
- } |
|
78 |
- } |
|
79 |
- selectedHVG <- BiocGenerics::Reduce(intersect, topVarGenesPerBatch) |
|
80 |
- |
|
81 |
- ## Run mnnCorrect |
|
82 |
- inputAssays <- list() |
|
83 |
- for(i in uniqBatch){ |
|
84 |
- inputAssays[[i]] <- |
|
85 |
- SummarizedExperiment::assay(batches[[i]], exprs)[selectedHVG,] |
|
86 |
- } |
|
87 |
- corrected <- BiocGenerics::do.call(batchelor::mnnCorrect, |
|
88 |
- c(inputAssays, list(k = k, sigma = sigma))) |
|
89 |
- corrected <- t(assay(corrected, 'corrected')[,colnames(inSCE)]) |
|
90 |
- SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- corrected |
|
91 |
- |
|
51 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
52 |
+ batchFactor <- as.factor(batchCol) |
|
53 |
+ mnnSCE <- batchelor::mnnCorrect(inSCE, batch = batchFactor, |
|
54 |
+ k = k, sigma = sigma) |
|
55 |
+ corrected <- SummarizedExperiment::assay(mnnSCE, 'corrected') |
|
56 |
+ SummarizedExperiment::assay(inSCE, assayName) <- corrected |
|
92 | 57 |
return(inSCE) |
93 | 58 |
} |
... | ... |
@@ -6,10 +6,10 @@ |
6 | 6 |
#' panorama. |
7 | 7 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
8 | 8 |
#' and analysis procedures. |
9 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
9 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
10 | 10 |
#' of the assay requiring batch correction in "inSCE", should exist in |
11 | 11 |
#' `assayNames(inSCE)`. |
12 |
-#' @param batchKey character, default `"batch"`. A string indicating the |
|
12 |
+#' @param batch character, default `"batch"`. A string indicating the |
|
13 | 13 |
#' field of `colData(inSCE)` that defines different batches. |
14 | 14 |
#' @param assayName character, default `"SCANORAMA"`. The name for the |
15 | 15 |
#' corrected full-sized expression matrix. |
... | ... |
@@ -24,37 +24,39 @@ |
24 | 24 |
#' @export |
25 | 25 |
#' @references Brian Hie et al, 2019 |
26 | 26 |
#' @examples |
27 |
+#' \dontrun{ |
|
27 | 28 |
#' data('sceBatches', package = 'singleCellTK') |
28 |
-#' sceBatches |
|
29 |
-#' ## class: SingleCellExperiment |
|
30 |
-#' ## dim: 27610 1820 |
|
31 |
-#' ## metadata(0): |
|
32 |
-#' ## assays(3): normcounts logcounts |
|
33 |
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238 |
|
34 |
-#' ## rowData names(0): |
|
35 |
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600 |
|
36 |
-#' ## colData names(2): cell_type1 batch |
|
37 |
-#' ## reducedDimNames(5): PCA |
|
38 |
-#' ## spikeNames(0): |
|
39 | 29 |
#' sceCorr <- runSCANORAMA(sceBatches) |
40 |
-runSCANORAMA <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
|
30 |
+#' } |
|
31 |
+runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
41 | 32 |
assayName = 'SCANORAMA', SIGMA = 15, ALPHA = 0.1, |
42 | 33 |
KNN = 20L){ |
43 | 34 |
## Input check |
44 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
45 |
- !class(inSCE) == "SCtkExperiment"){ |
|
35 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
46 | 36 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
47 | 37 |
} |
48 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
49 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found")) |
|
38 |
+ if(!reticulate::py_module_available(module = "scanorama")){ |
|
39 |
+ warning("Cannot find python module 'scanorama', please install Conda and", |
|
40 |
+ " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
41 |
+ "If one of these have been previously run to install the modules,", |
|
42 |
+ "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
43 |
+ " respectively, if R has been restarted since the module installation.", |
|
44 |
+ " Alternatively, scanorama can be installed on the local machine", |
|
45 |
+ "with pip (e.g. pip install scanorama) and then the 'use_python()'", |
|
46 |
+ " function from the 'reticulate' package can be used to select the", |
|
47 |
+ " correct Python environment.") |
|
48 |
+ return(inSCE) |
|
50 | 49 |
} |
51 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
52 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
50 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
51 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found")) |
|
52 |
+ } |
|
53 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
54 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
53 | 55 |
} |
54 | 56 |
assayName <- gsub(' ', '_', assayName) |
55 | 57 |
|
56 | 58 |
## Run algorithm |
57 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
59 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
58 | 60 |
batches <- unique(batchCol) |
59 | 61 |
nBatch <- length(batches) |
60 | 62 |
ixList <- list() |
... | ... |
@@ -63,7 +65,7 @@ runSCANORAMA <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
63 | 65 |
} |
64 | 66 |
exprsList <- list() |
65 | 67 |
for(i in 1:nBatch){ |
66 |
- exprsList[[i]] <- t(as.matrix(SummarizedExperiment::assay(inSCE[,ixList[[i]]], exprs))) |
|
68 |
+ exprsList[[i]] <- t(as.matrix(SummarizedExperiment::assay(inSCE[,ixList[[i]]], useAssay))) |
|
67 | 69 |
} |
68 | 70 |
geneList <- list() |
69 | 71 |
for(i in 1:nBatch){ |
... | ... |
@@ -9,12 +9,12 @@ |
9 | 9 |
#' cores. |
10 | 10 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
11 | 11 |
#' and analysis procedures. |
12 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
12 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
13 | 13 |
#' of the assay requiring batch correction in "inSCE", should exist in |
14 | 14 |
#' `assayNames(inSCE)`. |
15 |
-#' @param batchKey character, default `"batch"`. A string indicating the field |
|
15 |
+#' @param batch character, default `"batch"`. A string indicating the field |
|
16 | 16 |
#' of `colData(inSCE)` that defines different batches. |
17 |
-#' @param cellTypeKey character, default `"cell_type"`. A string indicating the |
|
17 |
+#' @param cellType character, default `"cell_type"`. A string indicating the |
|
18 | 18 |
#' field of `colData(inSCE)` that defines different cell types. |
19 | 19 |
#' @param assayName character, default `"SCGEN"`. The name for the corrected |
20 | 20 |
#' full-sized expression matrix. |
... | ... |
@@ -27,29 +27,40 @@ |
27 | 27 |
#' data('sceBatches', package = 'singleCellTK') |
28 | 28 |
#' sceCorr <- runSCGEN(sceBatches) |
29 | 29 |
#' } |
30 |
-runSCGEN <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', |
|
31 |
- cellTypeKey = "cell_type", assayName = 'SCGEN', |
|
30 |
+runSCGEN <- function(inSCE, useAssay = 'logcounts', batch = 'batch', |
|
31 |
+ cellType = "cell_type", assayName = 'SCGEN', |
|
32 | 32 |
nEpochs = 50L){ |
33 | 33 |
## Input check |
34 |
- if(!class(inSCE) == "SingleCellExperiment" |
|
35 |
- && !class(inSCE) == "SCtkExperiment"){ |
|
34 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
36 | 35 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
37 | 36 |
} |
38 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
39 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
37 |
+ if (!reticulate::py_module_available(module = "scgen")) { |
|
38 |
+ warning("Cannot find python module 'scgen', please install Conda and", |
|
39 |
+ " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().", |
|
40 |
+ "If one of these have been previously run to install the modules,", |
|
41 |
+ "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),", |
|
42 |
+ " respectively, if R has been restarted since the module installation.", |
|
43 |
+ " Alternatively, scgen can be installed on the local machine", |
|
44 |
+ "with pip (e.g. pip install scgen) and then the 'use_python()'", |
|
45 |
+ " function from the 'reticulate' package can be used to select the", |
|
46 |
+ " correct Python environment.") |
|
47 |
+ return(inSCE) |
|
40 | 48 |
} |
41 |
- if(!cellTypeKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
42 |
- stop(paste("\"cellTypeKey\" name:", batchKey, "not found")) |
|
49 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
50 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
51 |
+ } |
|
52 |
+ if(!cellType %in% names(SummarizedExperiment::colData(inSCE))){ |
|
53 |
+ stop(paste("\"cellType\" name:", cellType, "not found")) |
|
43 | 54 |
} |
44 | 55 |
assayName <- gsub(' ', '_', assayName) |
45 | 56 |
nEpochs <- as.integer(nEpochs) |
46 | 57 |
|
47 | 58 |
## Run algorithm |
48 |
- adata <- .sce2adata(inSCE, mainAssay = exprs) |
|
59 |
+ adata <- .sce2adata(inSCE, mainAssay = useAssay) |
|
49 | 60 |
network = scgen$VAEArith(x_dimension = adata$n_vars) |
50 | 61 |
network$train(train_data = adata, n_epochs = nEpochs) |
51 |
- corrAdata <- scgen$batch_removal(network, adata, batch_key = batchKey, |
|
52 |
- cell_label_key = cellTypeKey) |
|
62 |
+ corrAdata <- scgen$batch_removal(network, adata, batch_key = batch, |
|
63 |
+ cell_label_key = cellType) |
|
53 | 64 |
corrMat <- t(corrAdata$X) |
54 | 65 |
SummarizedExperiment::assay(inSCE, assayName) <- corrMat |
55 | 66 |
return(inSCE) |
... | ... |
@@ -5,128 +5,103 @@ |
5 | 5 |
#' scRNA-Seq data. |
6 | 6 |
#' @param inSCE SingleCellExperiment object. An object that stores your dataset |
7 | 7 |
#' and analysis procedures. |
8 |
-#' @param exprs character, default `"logcounts"`. A string indicating the name |
|
8 |
+#' @param useAssay character, default `"logcounts"`. A string indicating the name |
|
9 | 9 |
#' of the assay requiring batch correction in "inSCE", should exist in |
10 | 10 |
#' `assayNames(inSCE)`. |
11 |
-#' @param batchKey character, default `"batch"`. A string indicating the field |
|
11 |
+#' @param batch character, default `"batch"`. A string indicating the field |
|
12 | 12 |
#' of `colData(inSCE)` that defines different batches. |
13 | 13 |
#' @param assayName character, default `"scMerge"`. The name for the corrected |
14 | 14 |
#' full-sized expression matrix. |
15 |
-#' @param kmeansK vector of int, default `NULL`. Indicates the kmeans's K for |
|
16 |
-#' each batch. The length of `kmeansK` needs to be the same as the number of |
|
17 |
-#' batches. If not given, this vector will be identified by counting cell types |
|
18 |
-#' in each batch. |
|
19 |
-#' @param cellTypeKey character, default `"cell_type"`. A string indicating the |
|
20 |
-#' field of `colData(inSCE)` that defines different cell types. Only needed |
|
21 |
-#' when `kmeansK` is left to `NULL`. |
|
22 |
-#' @param species character, default `NULL`. Choose from `{"human", "mouse"}`. |
|
23 |
-#' If given, the algorithm will take default species specific SEG (Stably |
|
24 |
-#' Expressed Genes) set as negative control. |
|
25 |
-#' @param seg character vecter, default `NULL`. A gene list that specifies SEG |
|
26 |
-#' (Stably Expressed Genes) set as negative control. If `species` set, will be |
|
27 |
-#' ignored; if both `species` and `seg` are `NULL`, will take some time to |
|
28 |
-#' automatically detect top 1000 SEGs from `assay(inSCE, exprs)`. |
|
29 |
-#' @param nHVG integer, default `1000`. The number of top highly variable genes |
|
30 |
-#' to select per batch. |
|
15 |
+#' @param kmeansK vector of int, default `NULL`. A vector indicating the |
|
16 |
+#' kmeans' K-value for each batch, in order to construct pseudo-replicates. The |
|
17 |
+#' length of `kmeansK` needs to be the same as the number of batches. |
|
18 |
+#' @param cellType character, default `"cell_type"`. A string indicating the |
|
19 |
+#' field of `colData(inSCE)` that defines different cell types. |
|
20 |
+#' @param seg array, default `NULL`. An array of gene names or indices that |
|
21 |
+#' specifies SEG (Stably Expressed Genes) set as negative control. Pre-defined |
|
22 |
+#' dataset with human and mouse SEG lists is available to user by running |
|
23 |
+#' `data('SEG')`. |
|
24 |
+#' @param nCores integer, default `parallel::detectCores()`. The number of |
|
25 |
+#' cores of processors to allocate for the task. By default it takes all the |
|
26 |
+#' cores available to the user. |
|
31 | 27 |
#' @return SingleCellExperiment object with `assay(inSCE, assayName)` updated |
32 | 28 |
#' with corrected full-sized expression matrix. |
33 | 29 |
#' @export |
34 | 30 |
#' @references Hoa, et al., 2020 |
35 | 31 |
#' @examples |
36 | 32 |
#' data('sceBatches', package = 'singleCellTK') |
37 |
-#' sceCorr <- runSCMerge(sceBatches, species = 'human') |
|
38 |
-runSCMerge <- function(inSCE, exprs = "logcounts", batchKey = 'batch', |
|
39 |
- assayName = "scMerge", kmeansK = NULL, |
|
40 |
- cellTypeKey = 'cell_type', species = NULL, |
|
41 |
- seg = NULL, nHVG = 1000){ |
|
33 |
+#' sceCorr <- runSCMerge(sceBatches) |
|
34 |
+runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', |
|
35 |
+ assayName = "scMerge", seg = NULL, kmeansK = NULL, |
|
36 |
+ cellType = 'cell_type', |
|
37 |
+ nCores = 1){ |
|
42 | 38 |
## Input check |
43 |
- if(!class(inSCE) == "SingleCellExperiment" && |
|
44 |
- !class(inSCE) == "SCtkExperiment"){ |
|
39 |
+ if(!inherits(inSCE, "SingleCellExperiment")){ |
|
45 | 40 |
stop("\"inSCE\" should be a SingleCellExperiment Object.") |
46 | 41 |
} |
47 |
- if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) { |
|
48 |
- stop(paste("\"exprs\" (assay) name: ", exprs, " not found.")) |
|
42 |
+ if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) { |
|
43 |
+ stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found.")) |
|
49 | 44 |
} |
50 |
- if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
51 |
- stop(paste("\"batchKey\" name:", batchKey, "not found")) |
|
45 |
+ if(!batch %in% names(SummarizedExperiment::colData(inSCE))){ |
|
46 |
+ stop(paste("\"batch\" name:", batch, "not found")) |
|
52 | 47 |
} |
53 |
- if(!cellTypeKey %in% names(SummarizedExperiment::colData(inSCE))){ |
|
54 |
- stop(paste("\"cellTypeKey\" name:", cellTypeKey, "not found")) |
|
48 |
+ if(is.null(cellType) & is.null(kmeansK)){ |
|
49 |
+ stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time") |
|
55 | 50 |
} |
56 |
- if (!is.null(species) && !is.null(seg)){ |
|
57 |
- stop("None or only one of the arguments \"species\" and \"seg\" should be applied.") |
|
58 |
- } |
|
59 |
- |
|
60 |
- assayName <- gsub(' ', '_', assayName) |
|
61 |
- |
|
62 |
- if(!is.null(species)){ |
|
63 |
- if(species %in% c('human', 'mouse')){ |
|
64 |
- # Automatic selection |
|
65 |
- genes <- rownames(inSCE) |
|
66 |
- if(all(startsWith(genes, 'ENG'))){ |
|
67 |
- message("All gene IDs start with \"ENG\", using Ensembl IDs") |
|
68 |
- ctl <- SEG[[species]][[1]] |
|
69 |
- } else { |
|
70 |
- message("Not all gene IDs start with \"ENG\", using gene symbols") |
|
71 |
- ctl <- SEG[[species]][[2]] |
|
72 |
- } |
|
73 |
- } else { |
|
74 |
- stop("Please choose \"species\" from {\"human\", \"mouse\"}") |
|
75 |
- } |
|
76 |
- } else if (!is.null(seg)){ |
|
77 |
- ctl <- seg |
|
78 |
- } else { |
|
79 |
- seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, exprs), |
|
80 |
- SummarizedExperiment::colData(inSCE)[[cellTypeKey]], |
|
81 |
- ncore = parallel::detectCores()) |
|
82 |
- ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[1:1000],]) |
|
51 |
+ if(!cellType %in% names(SummarizedExperiment::colData(inSCE))){ |
|
52 |
+ # If NULL, scMerge still works |
|
53 |
+ stop(paste("\"cellType\" name:", cellType, "not found")) |
|
83 | 54 |
} |
55 |
+ |
|
56 |
+ nCores <- min(as.integer(nCores), parallel::detectCores()) |
|
57 |
+ assayName <- gsub(' ', '_', assayName) |
|
84 | 58 |
|
85 | 59 |
## Run algorithm |
86 |
- |
|
87 |
- # Select HVG |
|
88 |
- batches <- list() |
|
89 |
- batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]] |
|
60 |
+ |
|
61 |
+ batchCol <- SummarizedExperiment::colData(inSCE)[[batch]] |
|
90 | 62 |
uniqBatch <- unique(batchCol) |
91 |
- for(i in uniqBatch){ |
|
92 |
- batches[[i]] <- inSCE[, batchCol == i] |
|
93 |
- } |
|
94 |
- topVarGenesPerBatch <- list() |
|
95 |
- for(i in uniqBatch){ |
|
96 |
- if(nrow(batches[[i]]) <= nHVG){ |
|
97 |
- topVarGenesPerBatch[[i]] <- 1:nrow(batches[[i]]) |
|
98 |
- } else { |
|
99 |
- sce.var <- scran::modelGeneVar(batches[[i]]) |
|
100 |
- topVarGenesPerBatch[[i]] <- order(sce.var$bio,decreasing = TRUE)[seq(nHVG)] |
|
101 |
- #mvTrend <- scran::trendVar(batches[[i]], use.spikes=FALSE) |
|
102 |
- #decomposeTrend <- scran::decomposeVar(batches[[i]], mvTrend) |
|
103 |
- #topVarGenesPerBatch[[i]] <- order(decomposeTrend$bio, |
|
104 |
- #decreasing = TRUE)[1:nHVG] |
|
105 |
- } |
|
106 |
- } |
|
107 | 63 |
|
108 |
- if(is.null(kmeansK)){ |
|
64 |
+ # Infer parameters |
|
65 |
+ if(is.null(cellType)){ |
|
66 |
+ cellTypeCol <- NULL |
|
67 |
+ } else { |
|
68 |
+ cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]] |
|
69 |
+ } |
|
70 |
+ ## kmeansK |
|
71 |
+ if(!is.null(cellType) && is.null(kmeansK)){ |
|
109 | 72 |
# If kmeansK not given, detect by cell type. |
110 |
- cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellTypeKey]] |
|
73 |
+ cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]] |
|
111 | 74 |
kmeansK <- c() |
112 | 75 |
for (i in 1:length(uniqBatch)){ |
113 | 76 |
cellTypePerBatch <- cellTypeCol[batchCol == uniqBatch[i]] |
114 | 77 |
kmeansK <- c(kmeansK, length(unique(cellTypePerBatch))) |
115 | 78 |
} |
116 |
- print("Detected kmeansK:") |
|
117 |
- print(kmeansK) |
|
79 |
+ cat("Detected kmeansK:\n") |
|
80 |
+ print(t(data.frame(K = kmeansK, row.names = uniqBatch))) |
|
81 |
+ } |
|
82 |
+ ## SEG |
|
83 |
+ if(is.null(seg)){ |
|
84 |
+ bpParam <- BiocParallel::MulticoreParam(workers = nCores) |
|
85 |
+ seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, useAssay), |
|
86 |
+ cell_type = cellTypeCol, |
|
87 |
+ BPPARAM = bpParam) |
|
88 |
+ ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[1:1000],]) |
|
89 |
+ } else { |
|
90 |
+ ctl <- seg |
|
118 | 91 |
} |