Browse code

Resolve conflict from upstream devel

rz2333 authored on 17/04/2020 18:43:57
Showing 85 changed files

... ...
@@ -7,13 +7,24 @@ sudo: false
7 7
 warnings_are_errors: false
8 8
 bioc_required: true
9 9
 
10
+# Current version of xgboost fails install
11
+# Need to force install older version using 'devtools::install_version'
12
+#    which is done in 'before_install' (see below) 
13
+# However 'remotes::dev_package_deps' will try to auto upgrade packages
14
+#    Therefore, a new install command is used with 'upgrade="never"'
15
+# These parts can be deleted once the xgboost error is fixed
16
+
10 17
 before_install:
11 18
   - pyenv install 3.6.1
12 19
   - pyenv global 3.6.1
13
-  - Rscript -e 'update.packages(ask = FALSE)'
20
+  - Rscript -e 'install.packages("devtools")'
21
+  - Rscript -e 'devtools::install_version("xgboost", version = "0.90.0.2", repos = "http://cran.us.r-project.org")'
14 22
   - pip install --upgrade pip
15 23
   - pip install scipy scrublet scanpy scanorama scgen bbknn
16 24
 
25
+install:
26
+- R -e 'devtools::install_deps(dep = TRUE, upgrade="never")'
27
+
17 28
 addons:
18 29
   apt:
19 30
     packages:
... ...
@@ -23,6 +34,7 @@ r_build_args: "--no-build-vignettes"
23 34
 r_check_args: "--no-vignettes --no-build-vignettes --as-cran"
24 35
 r_github_packages:
25 36
   - jimhester/lintr
37
+
26 38
 notifications:
27 39
   slack:
28 40
     secure: sw/He3WX7Fs61DTufgG9hx7fWDhwx57zv+dYB4sr30LgKyIJNbZYSU76ZN4oPwHFGAJ0eyk/22XCuKF8/seSB2ArQuc4kFlsaeWWg1CxDqW9iR0zs91MJL3lJoUOSayy5WcWbsUxD+x8GcB37FCgoTXcveN956bfgrDb2nGrFY6QMSVsnkWi/kiMdzhR4VPzOfmAUZUcr1aDU3WN4ROwbDg89zCfu9HbVKZwyQdYZk7L2Pgg8xGIcGZkaMN1MMD/r0Wc2aVYqCOcn5qO/9CaspEBf7cd6ejVzgAl/LM4dTIPlx9E6ekQGwJXpt1Qxyc0sqVSQWFZvFkWyV3wkqX8pfSXV6ojlP2cGJt+PSQXwLHj0TKWYx7yHbw+55da/b//SVLZVHl+P6VXxQBe2hmFD6sCk3V3uq2CkxzI52bK4Wv4RRncVKIZEpFZOc9P2ik5mf6pICmLHwttlbHeDjJbQ+raYftF704xb1sOxapLwuy+c2SWKPeqOiimfZNgOK+J7JbvQvpnaiHN+3CA3wQXs1auKBwI+swvsZPAzeKssnLv/0AQmX8ckVzvXJzUvl1J73c9V2zG2F44xmIsXwzruEp6leSbeXcLifrfbfQdmaHzq4M/Z83o/fwGRzdWfG7YXDyPIabik7DOMrSUvF+wLGVTjOart8HrA4eVEkKFnRE=
... ...
@@ -33,6 +33,7 @@ Imports:
33 33
     ape,
34 34
     batchelor,
35 35
     BiocGenerics,
36
+    BiocParallel,
36 37
     colourpicker,
37 38
     cowplot,
38 39
     cluster,
... ...
@@ -15,6 +15,7 @@ export(convertSeuratToSCE)
15 15
 export(createSCE)
16 16
 export(distinctColors)
17 17
 export(enrichRSCE)
18
+export(exportSCEtoAnnData)
18 19
 export(exportSCEtoFlatFile)
19 20
 export(filterSCData)
20 21
 export(generateSimulatedData)
... ...
@@ -31,6 +32,7 @@ export(importCellRangerV2)
31 32
 export(importCellRangerV2Sample)
32 33
 export(importCellRangerV3)
33 34
 export(importCellRangerV3Sample)
35
+export(importDropEst)
34 36
 export(importOptimus)
35 37
 export(importSEQC)
36 38
 export(importSTARsolo)
... ...
@@ -37,9 +37,9 @@
37 37
 #' Two batches of pancreas scRNAseq dataset are combined with their original
38 38
 #' counts. Cell types and batches are annotated in `colData(sceBatches)`.
39 39
 #' Two batches came from Wang, et al., 2016, annotated as `'w'`; and Xin, et
40
-#' al., 2016, annotated as `'x'`. Four common cell types, `'alpha'`, `'beta'`,
41
-#' `'gamma'`, and `'delta'` that could be found in both original study were kept
42
-#' for cleaner demonstration.
40
+#' al., 2016, annotated as `'x'`. Two common cell types, `'alpha'` and 
41
+#' `'beta'`, that could be found in both original studies with relatively 
42
+#' large population were kept for cleaner demonstration.
43 43
 #'
44 44
 #' @name sceBatches
45 45
 #' @docType data
... ...
@@ -49,3 +49,17 @@
49 49
 #' @examples
50 50
 #' data('sceBatches')
51 51
 "sceBatches"
52
+
53
+#' Stably Expressed Gene (SEG) list obect, with SEG sets for human and mouse.
54
+#' 
55
+#' The two gene sets came from dataset called `segList` of package `scMerge`.
56
+#' @name SEG
57
+#' @docType data
58
+#' @format list, with two entries `"human"` and `"mouse"`, each is a charactor
59
+#' array.
60
+#' @source `data('segList', package='scMerge')``
61
+#' @keywords datasets
62
+#' @examples 
63
+#' data('SEG')
64
+#' humanSEG <- SEG$human
65
+"SEG"
52 66
\ No newline at end of file
53 67
new file mode 100644
... ...
@@ -0,0 +1,63 @@
1
+#source: https://github.com/klmr/decorator
2
+
3
+`%@%` = function (decorator, f) UseMethod('%@%')
4
+
5
+`%@%.default` = function (decorator, f)
6
+  stop(deparse(substitute(decorator)), ' is not a decorator')
7
+
8
+`%@%.decorator` = function (decorator, f) {
9
+  pretty_decorators = as.list(match.call())[-1]
10
+  # Patch delayed decorator.
11
+  if (! is.null({pretty_patched = attr(decorator, 'calls')}))
12
+    pretty_decorators = c(pretty_patched, pretty_decorators[-1])
13
+  
14
+  # Handle operator precedence so that we can chain decorators.
15
+  if (inherits(f, 'decorator'))
16
+    .delayed_decorate(decorator, f, pretty_decorators)
17
+  else
18
+    prettify(decorator(f), f, pretty_decorators[-length(pretty_decorators)])
19
+}
20
+
21
+decorator = function (f)
22
+  structure(f, class = 'decorator')
23
+
24
+decorator = decorator(decorator)
25
+
26
+print.decorated = function (x, useSource = TRUE, ...) {
27
+  bare = function (f) {
28
+    bare = unclass(f)
29
+    attr(bare, 'decorators') = NULL
30
+    bare
31
+  }
32
+  
33
+  fun_def = capture.output(print.function(bare(x), useSource = useSource, ...))
34
+  for (decorator in attr(x, 'decorators'))
35
+    cat(deparse(decorator), '%@%\n')
36
+  cat(fun_def, sep = '\n')
37
+  invisible(x)
38
+}
39
+
40
+# modules::register_S3_method('print', 'decorated', print.decorated)
41
+
42
+prettify = function (f, original, decorator_calls) {
43
+  attr(f, 'srcref') = pretty_code(original)
44
+  attr(f, 'decorators') = decorator_calls
45
+  class(f) = c(class(f), 'decorated')
46
+  f
47
+}
48
+
49
+pretty_code = function (f) {
50
+  srcref = attr(f, 'srcref')
51
+  if (is.null(srcref)) body(f) else srcref
52
+}
53
+
54
+.delayed_decorate = function (d1, d2, decorator_calls)
55
+  structure(decorator(function (f) d1(d2(f))), calls = decorator_calls)
56
+
57
+# Console logging 
58
+simpleLog <- decorator %@% function(f) {
59
+  function(...) {
60
+    print(match.call())
61
+    f(...)
62
+  }
63
+}
0 64
\ No newline at end of file
... ...
@@ -1,18 +1,22 @@
1 1
 
2
-.runBarcodeRankDrops <- function(barcode.matrix, ...) {
3
-
2
+.runBarcodeRankDrops <- function(barcode.matrix, lower=100, 
3
+                                 fit.bounds=NULL, 
4
+                                 df=20) {
5
+  
4 6
   ## Convert to sparse matrix if not already in that format
5 7
   barcode.matrix <- .convertToMatrix(barcode.matrix)
6 8
   
7
-  output <- DropletUtils::barcodeRanks(m = barcode.matrix, ...)
8
-
9
+  output <- DropletUtils::barcodeRanks(m = barcode.matrix, lower=100, 
10
+                                       fit.bounds=NULL, 
11
+                                       df=20)
12
+  
9 13
   knee.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$knee)
10 14
   inflection.ix <- as.integer(output@listData$total >= S4Vectors::metadata(output)$inflection)
11
-
15
+  
12 16
   result <- cbind(knee.ix, inflection.ix)
13 17
   colnames(result) <- c("dropletUtils_barcodeRank_knee",
14 18
                         "dropletUtils_barcodeRank_inflection")
15
-
19
+  
16 20
   return(result)
17 21
 }
18 22
 
... ...
@@ -48,9 +52,11 @@
48 52
 #' sce <- runBarcodeRankDrops(inSCE = emptyDropsSceExample)
49 53
 #' @export
50 54
 runBarcodeRankDrops <- function(inSCE,
51
-    sample = NULL,
52
-    useAssay = "counts",
53
-    ...
55
+                                sample = NULL,
56
+                                useAssay = "counts", 
57
+                                lower=100, 
58
+                                fit.bounds=NULL, 
59
+                                df=20
54 60
 ) {
55 61
   if(!is.null(sample)) {
56 62
     if(length(sample) != ncol(inSCE)) {
... ...
@@ -59,27 +65,35 @@ runBarcodeRankDrops <- function(inSCE,
59 65
   } else {
60 66
     sample = rep(1, ncol(inSCE))
61 67
   }
62
-
68
+  
63 69
   message(paste0(date(), " ... Running 'barcodeRanks'"))
64
-
70
+  
71
+  ##  Getting current arguments values
72
+  argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame()))
73
+  
65 74
   ## Define result matrix for all samples
66 75
   output <- S4Vectors::DataFrame(row.names = colnames(inSCE),
67
-            dropletUtils_BarcodeRank_Knee = integer(ncol(inSCE)),
68
-            dropletUtils_BarcodeRank_Inflection = integer(ncol(inSCE)))
69
-
76
+                                 dropletUtils_BarcodeRank_Knee = integer(ncol(inSCE)),
77
+                                 dropletUtils_BarcodeRank_Inflection = integer(ncol(inSCE)))
78
+  
70 79
   ## Loop through each sample and run barcodeRank
71 80
   samples <- unique(sample)
72 81
   for (i in seq_len(length(samples))) {
73 82
     sceSampleInd <- sample == samples[i]
74 83
     sceSample <- inSCE[, sceSampleInd]
75
-
84
+    
76 85
     mat <- SummarizedExperiment::assay(sceSample, i = useAssay)
77
-    result <- .runBarcodeRankDrops(barcode.matrix = mat, ...)
78
-
86
+    result <- .runBarcodeRankDrops(barcode.matrix = mat, lower=100,
87
+                                   fit.bounds=NULL, 
88
+                                   df=20)
89
+    
79 90
     output[sceSampleInd, ] <- result
80 91
   }
81
-
92
+  
82 93
   colData(inSCE) = cbind(colData(inSCE), output)
83
-
94
+  
95
+  inSCE@metadata$runBarcodeRankDrops <- argsList[-1]
96
+  inSCE@metadata$runBarcodeRankDrops$packageVersion <- packageDescription("DropletUtils")$Version
97
+  
84 98
   return(inSCE)
85 99
 }
... ...
@@ -1,10 +1,24 @@
1
-.runEmptyDrops <- function(barcode.matrix, ...) {
2
-
1
+.runEmptyDrops <- function(barcode.matrix, lower=100,
2
+                           niters=10000,
3
+                           test.ambient=FALSE,
4
+                           ignore=NULL, 
5
+                           alpha=NULL,
6
+                           retain=NULL,
7
+                           barcode.args=list(),
8
+                           BPPARAM=SerialParam()) {
9
+  
3 10
   barcode.matrix <- .convertToMatrix(barcode.matrix)
4
-
5
-  result <- DropletUtils::emptyDrops(m = barcode.matrix, ...)
11
+  
12
+  result <- DropletUtils::emptyDrops(m = barcode.matrix, lower=100,
13
+                                     niters=10000,
14
+                                     test.ambient=FALSE,
15
+                                     ignore=NULL, 
16
+                                     alpha=NULL,
17
+                                     retain=NULL,
18
+                                     barcode.args=list(),
19
+                                     BPPARAM=SerialParam())
6 20
   colnames(result) <- paste0("dropletUtils_emptyDrops_", colnames(result))
7
-
21
+  
8 22
   return(result)
9 23
 }
10 24
 
... ...
@@ -43,13 +57,20 @@
43 57
 #' @import DropletUtils
44 58
 #' @export
45 59
 runEmptyDrops <- function(inSCE,
46
-    sample = NULL,
47
-    useAssay = "counts",
48
-    ...
60
+                          sample = NULL,
61
+                          useAssay = "counts", 
62
+                          lower=100,
63
+                          niters=10000,
64
+                          test.ambient=FALSE,
65
+                          ignore=NULL, 
66
+                          alpha=NULL,
67
+                          retain=NULL,
68
+                          barcode.args=list(),
69
+                          BPPARAM=SerialParam()
49 70
 ) {
50 71
   # getting the current argument values
51
-  current_params <- as.list(sys.call())
52
-  metadata_params <- inSCE@metadata$QCParams
72
+  argsList <- as.list(formals(fun = sys.function(sys.parent()), envir = parent.frame()))
73
+  
53 74
   
54 75
   if(!is.null(sample)) {
55 76
     if(length(sample) != ncol(inSCE)) {
... ...
@@ -58,32 +79,43 @@ runEmptyDrops <- function(inSCE,
58 79
   } else {
59 80
     sample = rep(1, ncol(inSCE))
60 81
   }
61
-
82
+  
62 83
   message(date(), " ... Running 'emptyDrops'")
63
-
84
+  
64 85
   ## Define result matrix for all samples
65 86
   output <- S4Vectors::DataFrame(row.names = colnames(inSCE),
66
-                dropletUtils_emptyDrops_total = integer(ncol(inSCE)),
67
-                dropletUtils_emptyDrops_logprob = numeric(ncol(inSCE)),
68
-                dropletUtils_emptyDrops_pvalue = numeric(ncol(inSCE)),
69
-                dropletUtils_emptyDrops_limited = logical(ncol(inSCE)),
70
-                dropletUtils_emptyDrops_fdr = numeric(ncol(inSCE)))
71
-
87
+                                 dropletUtils_emptyDrops_total = integer(ncol(inSCE)),
88
+                                 dropletUtils_emptyDrops_logprob = numeric(ncol(inSCE)),
89
+                                 dropletUtils_emptyDrops_pvalue = numeric(ncol(inSCE)),
90
+                                 dropletUtils_emptyDrops_limited = logical(ncol(inSCE)),
91
+                                 dropletUtils_emptyDrops_fdr = numeric(ncol(inSCE)))
92
+  
72 93
   ## Loop through each sample and run barcodeRank
73 94
   samples <- unique(sample)
74 95
   for (i in seq_len(length(samples))) {
75 96
     sceSampleInd <- sample == samples[i]
76 97
     sceSample <- inSCE[, sceSampleInd]
77
-
98
+    
78 99
     mat <- SummarizedExperiment::assay(sceSample, i = useAssay)
79
-    result <- .runEmptyDrops(barcode.matrix = mat, ...)
80
-
100
+    result <- .runEmptyDrops(barcode.matrix = mat, lower=100,
101
+                             niters=10000,
102
+                             test.ambient=FALSE,
103
+                             ignore=NULL, 
104
+                             alpha=NULL,
105
+                             retain=NULL,
106
+                             barcode.args=list(),
107
+                             BPPARAM=SerialParam())
108
+    
109
+    
81 110
     output[sceSampleInd, ] <- result
111
+    metadata(output[sceSampleInd, ]) <- metadata(result)
82 112
   }
83 113
   
84 114
   colData(inSCE) = cbind(colData(inSCE), output)
85
-  inSCE@metadata$QCParams <- metadata_params
86
-  inSCE@metadata$QCParams$runEmptyDrops <- current_params
115
+  inSCE@metadata = metadata(output)
116
+  
117
+  inSCE@metadata$runEmptyDrops <- argsList[-1]
118
+  inSCE@metadata$runEmptyDrops$packageVersion <- packageDescription("DropletUtils")$Version
87 119
   
88 120
   return(inSCE)
89
-}
121
+}
90 122
\ No newline at end of file
91 123
new file mode 100644
... ...
@@ -0,0 +1,63 @@
1
+#' @title Export a \link[SingleCellExperiment]{SingleCellExperiment} R object as 
2
+#' Python annData object 
3
+#' @description Writes all assays, colData, rowData, reducedDims, and altExps objects in a
4
+#' \link[SingleCellExperiment]{SingleCellExperiment} to a Python annData object in the .h5ad format
5
+#' All parameters of Anndata.write_h5ad function (https://icb-anndata.readthedocs-hosted.com/en/stable/anndata.AnnData.write_h5ad.html)
6
+#' are available as parameters to this export function and set to defaults. Defaults can be
7
+#' overridden at function call.
8
+#' @param sce \link[SingleCellExperiment]{SingleCellExperiment} R object to be
9
+#'  exported.
10
+#'  @param useAssay Character, default `"counts"`. The name of assay of
11
+#' interests that will be set as the primary matrix of the output AnnData.
12
+#' @param outputDir Path to the directory where .h5ad outputs will be written
13
+#' @param overwrite Boolean. Default \code{TRUE}.
14
+#' @param compression Default \code{None}.If output file compression is required, this variable accepts
15
+#' 'gzip' or 'lzf' as inputs.
16
+#' @param compression_opts Integer. Default \code{NULL} Sets the compression level
17
+#' @param forceDense Default \code{False} Write sparse data as a dense matrix.
18
+#' Refer anndata.write_h5ad documentation for details
19
+#' @examples
20
+#' data(sce_chcl, package = "scds")
21
+#' exportSCEtoAnnData(sce=sce_chcl, compression="gzip")
22
+#'
23
+#' @export
24
+
25
+exportSCEtoAnnData <- function(sce, 
26
+                                useAssay='counts',
27
+                                outputDir="./",
28
+                                sample = "sample",
29
+                                overwrite=TRUE,
30
+                                compression= c('None','lzf','gzip'),
31
+                                compressionOpts = NULL,
32
+                                forceDense= c('False','True')){
33
+  compression <- match.arg(compression)
34
+  forceDense <- match.arg(forceDense)
35
+  if (compression == 'None'){
36
+    compression <- NULL
37
+  }
38
+
39
+  if (!reticulate::py_module_available(module = "scanpy")) {
40
+    warning("Cannot find python module 'scanpy', please install Conda and",
41
+            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
42
+            "If one of these have been previously run to install the modules,",
43
+            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
44
+            " respectively, if R has been restarted since the module installation.",
45
+            " Alternatively, Scrublet can be installed on the local machine",
46
+            "with pip (e.g. pip install --user scanpy) and then the 'use_python()'",
47
+            " function from the 'reticulate' package can be used to select the",
48
+            " correct Python environment.")
49
+    return(sce)}
50
+  
51
+  annData <- .sce2adata(sce,useAssay)
52
+  fileName <- paste0(sample,".h5ad")
53
+  filePath <- file.path(outputDir,fileName)
54
+  
55
+  if (file.exists(filePath) && !isTRUE(overwrite)) {
56
+    stop(paste0(path, " already exists. Change 'outputDir' or set 'overwrite' to TRUE."))
57
+    }
58
+
59
+  annData$write_h5ad(filePath,
60
+                     compression = compression, 
61
+                     compression_opts = compressionOpts,
62
+                     force_dense = forceDense)
63
+}
0 64
new file mode 100644
... ...
@@ -0,0 +1,143 @@
1
+
2
+.readDropEstFile <- function(sampleDir, dataType,rdsFileName){
3
+  dropEst_cell_counts <- file.path(sampleDir, paste(rdsFileName, '.rds', sep=''))
4
+  if (!file.exists(dropEst_cell_counts)){
5
+    stop("DropEst output not found at location specified. Please check path provided and/or filename.")
6
+  }
7
+  dropEst_rds <- readRDS(dropEst_cell_counts) 
8
+  
9
+  return(dropEst_rds)
10
+}
11
+
12
+.constructColdata <- function(dropEst_rds,counts_matrix, dataType){
13
+  coldata_fields <- c("mean_reads_per_umi","aligned_reads_per_cell","aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb")
14
+  coldata_df <-  list()
15
+  for (field in coldata_fields){
16
+    if (field %in% names(dropEst_rds)){
17
+      coldata_field_df <- data.frame(as.matrix(dropEst_rds[[field]]))
18
+      names(coldata_field_df)[1] <- field
19
+      coldata_field_df$cell <- row.names(coldata_field_df)
20
+      
21
+      coldata_df[[field]] <- coldata_field_df
22
+    }}
23
+  coldata_df_merged <- Reduce(function(x, y) merge(x, y, all=TRUE,by="cell"), coldata_df)
24
+  row.names(coldata_df_merged) <- coldata_df_merged$cell
25
+  coldata_df_merged <- S4Vectors::DataFrame(as.matrix(coldata_df_merged))
26
+  if (dataType == 'filtered'){
27
+    coldata_df_merged <- coldata_df_merged[coldata_df_merged$cell %in% colnames(counts_matrix),]
28
+  }
29
+  return(coldata_df_merged)
30
+}
31
+
32
+.extractMetadata <- function(dropEst_rds){
33
+  metadata_fields <- c("saturation_info","merge_targets","reads_per_umi_per_cell")
34
+  metadata <- c()
35
+  for (md in metadata_fields){
36
+    if (md %in% names(dropEst_rds)){
37
+      metadata[[md]] <- dropEst_rds[[md]]
38
+    }}
39
+  return(metadata)
40
+}
41
+
42
+.importDropEstSample <- function(sampleDir = './', 
43
+                                 dataType, 
44
+                                 rdsFileName, 
45
+                                 sampleName = 'sample',
46
+                                 delayedArray = delayedArrary){
47
+  ## Read DropEst RDS
48
+  dropEst_rds <- .readDropEstFile(sampleDir,dataType,rdsFileName)
49
+  if (dataType == 'filtered' && 'cm' %in% names(dropEst_rds)) {
50
+    counts_matrix <- dropEst_rds$cm
51
+  } else if (dataType == 'raw' && 'cm_raw' %in% names(dropEst_rds)) {
52
+    counts_matrix <- dropEst_rds$cm_raw
53
+  } else {
54
+    stop("No counts matrix found in the .rds provided! Exiting.")
55
+  }
56
+  
57
+  if (isTRUE(delayedArray)) {
58
+    counts_matrix <- DelayedArray::DelayedArray(counts_matrix)
59
+    }
60
+  ## Create SingleCellExperiment object
61
+  ## Add SCE ColData. If using filtered counts matrix, colData is subset to include filtered cells.
62
+  ## append sample name to cells in SCE 
63
+  sce <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = counts_matrix))
64
+  colnames(sce) <- paste0(sampleName,"_",colnames(sce)) 
65
+  sce_coldata <- .constructColdata(dropEst_rds, counts_matrix, dataType)
66
+  row.names(sce_coldata) <- paste0(sampleName,"_",row.names(sce_coldata))
67
+  
68
+  if (dim(counts_matrix)[2] == dim(sce_coldata)[1]){
69
+    SummarizedExperiment::colData(sce) <- sce_coldata
70
+  } else {
71
+    warning("Unable to add ColData to SCE. nCol of Counts Matrix not equal to nRow of ColData matrix.")
72
+  }
73
+  
74
+  ## Add SCE metadata
75
+  sce_metadata <- .extractMetadata(dropEst_rds)
76
+  metadata(sce) <- sce_metadata
77
+  
78
+  return(sce)
79
+}
80
+
81
+#' @name importDropEst
82
+#' @rdname importDropEst
83
+#' @title Create a SingleCellExperiment Object from DropEst output 
84
+#' @description imports the RDS file created by DropEst (https://github.com/hms-dbmi/dropEst) and
85
+#' create a SingleCellExperiment object from either the raw or filtered counts matrix.
86
+#' Additionally parse through the RDS to obtain appropriate feature annotations as 
87
+#' SCE coldata, in addition to any metadata.
88
+#' @param sampleDir  A path to the directory containing the data files. Default "./".
89
+#' @param sampleName A User-defined sample name. This will be prepended to all cell barcode IDs.
90
+#'  Default "sample".
91
+#'  @param dataType can be "filtered" or "raw". Default is "filtered"
92
+#' @param rdsFileName File name prefix of the DropEst RDS output. default is "cell.counts"
93
+#' @param delayedArray Boolean. Whether to read the expression matrix as
94
+#'  \link[DelayedArray]{DelayedArray} object or not. Default \code{TRUE}.
95
+#' @details
96
+#' \code{importDropEst} expects either raw counts matrix stored as "cm_raw" or filtered
97
+#' counts matrix stored as "cm" in the DropEst rds output. 
98
+#' ColData is obtained from the DropEst corresponding to "mean_reads_per_umi","aligned_reads_per_cell",
99
+#' "aligned_umis_per_cell","requested_umis_per_cb","requested_reads_per_cb"
100
+#' If using filtered counts matrix, the colData dataframe is 
101
+#' subset to contain features from the filtered counts matrix alone.
102
+#' If any annotations of ("saturation_info","merge_targets","reads_per_umi_per_cell") are 
103
+#' found in the DropEst rds, they will be added to the SCE metadata field
104
+#' @return A \code{SingleCellExperiment} object containing the count matrix,
105
+#'  the feature annotations from DropEst as ColData, and any metadata from DropEst
106
+#' @examples
107
+#' # Example #1
108
+#' Example DropEst outputs were downloaded from the DropEst Github 
109
+#' (http://pklab.med.harvard.edu/viktor/dropest_paper/dropest_0.8.5.zip). 
110
+#' To run the dropest import function with the example dataset, 
111
+#' set the sampleDirs variable to the example dropEst provided in SCTK as follows-
112
+#' sce <- importDropEst(sampleDirs = c('path/to/dropest/folder/'), 
113
+#'                      dataType='filtered', sampleNames=c('sample'))
114
+#' @export
115
+importDropEst <- function(sampleDirs = NULL, 
116
+                          dataType = c('filtered','raw'),
117
+                          rdsFileName = 'cell.counts',
118
+                          sampleNames = NULL,
119
+                          delayedArray = TRUE) {
120
+  dataType <- match.arg(dataType)
121
+  
122
+  if (length(sampleDirs)!=length(sampleNames)){
123
+    stop("Please provide sample names for all input directories")
124
+  }
125
+  
126
+  res <- vector("list", length = length(sampleDirs))
127
+  
128
+  for (i in seq_along(sampleDirs)){
129
+    scei <- .importDropEstSample(sampleDir = sampleDirs[[i]],
130
+                         sampleName = sampleNames[[i]],
131
+                         dataType = dataType,
132
+                         rdsFileName = rdsFileName,
133
+                         delayedArray = delayedArray)
134
+    res[[i]] <- scei
135
+  }
136
+  sce <- do.call(SingleCellExperiment::cbind, res)
137
+  return(sce)
138
+}
139
+  
140
+  
141
+
142
+
143
+
... ...
@@ -71,7 +71,7 @@ summarizeTable <- function(inSCE, useAssay="counts", expressionCutoff=1700){
71 71
 #' newSCE <- createSCE(assayFile = counts_mat, annotFile = sample_annot,
72 72
 #'                     featureFile = row_annot, assayName = "counts",
73 73
 #'                     inputDataFrames = TRUE, createLogCounts = TRUE)
74
-createSCE <- function(assayFile=NULL, annotFile=NULL, featureFile=NULL,
74
+createSCE <- simpleLog %@% function(assayFile=NULL, annotFile=NULL, featureFile=NULL,
75 75
                       assayName="counts", inputDataFrames=FALSE,
76 76
                       createLogCounts=TRUE){
77 77
   
... ...
@@ -265,7 +265,6 @@ distinctColors <- function(n, hues = c("red", "cyan", "orange", "blue",
265 265
   }
266 266
 }
267 267
 
268
-
269 268
 ## Convert a matrix to a sparse matrix and preserve column/row names
270 269
 .convertToMatrix <- function(x) {
271 270
   cn <- colnames(x)
... ...
@@ -278,15 +277,3 @@ distinctColors <- function(n, hues = c("red", "cyan", "orange", "blue",
278 277
   return(x)
279 278
 }
280 279
 
281
-`%@%` = function(decorator, f) {
282
-  decorator(f)
283
-}
284
-
285
-simpleLog <- function(f) {
286
-  function(...) {
287
-    print(match.call())
288
-    f(...)
289
-  }
290
-}
291
-
292
-
... ...
@@ -5,10 +5,10 @@
5 5
 #' across all batches of the data. 
6 6
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
7 7
 #' and analysis procedures.
8
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
8
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
9 9
 #' of the assay requiring batch correction in "inSCE", should exist in 
10 10
 #' `assayNames(inSCE)`.
11
-#' @param batchKey character, default `"batch"`. A string indicating the 
11
+#' @param batch character, default `"batch"`. A string indicating the 
12 12
 #' field of `colData(inSCE)` that defines different batches.
13 13
 #' @param reducedDimName character, default `"BBKNN"`. The name for the 
14 14
 #' corrected low-dimensional representation.
... ...
@@ -26,22 +26,33 @@
26 26
 #' data('sceBatches', package = 'singleCellTK')
27 27
 #' sceCorr <- runBBKNN(sceBatches)
28 28
 #' }
29
-runBBKNN <-function(inSCE, exprs = 'logcounts', batchKey = 'batch', 
29
+runBBKNN <-function(inSCE, useAssay = 'logcounts', batch = 'batch', 
30 30
                     reducedDimName = 'BBKNN', nComponents = 50L){
31 31
     ## Input check
32
-    if(!class(inSCE) == "SingleCellExperiment" && 
33
-       !class(inSCE) == "SCtkExperiment"){
32
+    if(!inherits(inSCE, "SingleCellExperiment")){
34 33
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
35 34
     }
36
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
37
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
35
+    if(!reticulate::py_module_available(module = "bbknn")){
36
+        warning("Cannot find python module 'bbknn', please install Conda and",
37
+            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
38
+            "If one of these have been previously run to install the modules,",
39
+            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
40
+            " respectively, if R has been restarted since the module installation.",
41
+            " Alternatively, bbknn can be installed on the local machine",
42
+            "with pip (e.g. pip install bbknn) and then the 'use_python()'",
43
+            " function from the 'reticulate' package can be used to select the",
44
+            " correct Python environment.")
45
+        return(inSCE)
46
+    }
47
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
48
+        stop(paste("\"batch\" name:", batch, "not found"))
38 49
     }
39 50
     reducedDimName <- gsub(' ', '_', reducedDimName)
40 51
     
41 52
     ## Run algorithm
42
-    adata <- .sce2adata(inSCE, mainAssay = exprs)
53
+    adata <- .sce2adata(inSCE, mainAssay = useAssay)
43 54
     sc$tl$pca(adata, n_comps = nComponents)
44
-    bbknn$bbknn(adata, batch_key = batchKey, n_pcs = nComponents)
55
+    bbknn$bbknn(adata, batch_key = batch, n_pcs = nComponents)
45 56
     sc$tl$umap(adata, n_components = nComponents)
46 57
     bbknnUmap <- adata$obsm[["X_umap"]]
47 58
     SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- bbknnUmap
... ...
@@ -5,74 +5,61 @@
5 5
 #' more robust performance. For introduction of MNN, see `runMNNCorrect`
6 6
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
7 7
 #' and analysis procedures.
8
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
8
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
9 9
 #' of the assay requiring batch correction in "inSCE", should exist in 
10 10
 #' `assayNames(inSCE)`. Alternatively, see `pcInput` parameter.
11
-#' @param batchKey character, default `"batch"`. A string indicating the 
11
+#' @param batch character, default `"batch"`. A string indicating the 
12 12
 #' field of `colData(inSCE)` that defines different batches.
13 13
 #' @param reducedDimName character, default `"fastMNN"`. The name for the 
14 14
 #' corrected low-dimensional representation.
15 15
 #' @param pcInput bool, default `FALSE`. Whether to use a reduceDim matrix for 
16
-#' batch effect correction. If TRUE, `exprs` should exist in 
16
+#' batch effect correction. If TRUE, `useAssay` should exist in 
17 17
 #' `reduceDimNames(inSCE)`. Note that more dimensions in precomputed reducedDim
18 18
 #' have shown better correction results.
19 19
 #' @return SingleCellExperiment object with `reducedDim(inSCE, reducedDimName)` 
20 20
 #' updated with corrected low-dimentional representation.
21 21
 #' @export
22 22
 #' @references Lun ATL, et al., 2016
23
-#' @examples  
23
+#' @examples
24
+#' \dontrun{
24 25
 #' data('sceBatches', package = 'singleCellTK')
25
-#' sceCorr <- runFastMNN(sceBatches, exprs = 'PCA', pcInput = TRUE)
26
-runFastMNN <- function(inSCE, exprs = "logcounts", reducedDimName = "MNN", 
27
-                       batchKey = 'batch', pcInput = FALSE){
26
+#' sceCorr <- runFastMNN(sceBatches, useAssay = 'logcounts', pcInput = FALSE)
27
+#' }
28
+runFastMNN <- function(inSCE, useAssay = "logcounts", reducedDimName = "MNN", 
29
+                       batch = 'batch', pcInput = FALSE){
28 30
     ## Input check
29
-    if(!class(inSCE) == "SingleCellExperiment" && 
30
-       !class(inSCE) == "SCtkExperiment"){
31
+    if(!inherits(inSCE, "SingleCellExperiment")){
31 32
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
32 33
     }
33
-    if(pcInput){
34
-        if(!exprs %in% SingleCellExperiment::reducedDimNames(inSCE)) {
35
-            stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
34
+    if(isTRUE(pcInput)){
35
+        if(!(useAssay %in% SingleCellExperiment::reducedDimNames(inSCE))) {
36
+            stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found."))
36 37
         }
37 38
     } else {
38
-        if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
39
-            stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
39
+        if(!(useAssay %in% SummarizedExperiment::assayNames(inSCE))) {
40
+            stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
40 41
         }
41 42
     }
42 43
     
43
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
44
-        stop(paste("\"batchKey name:", batchKey, "not found."))
44
+    if(!(batch %in% names(SummarizedExperiment::colData(inSCE)))){
45
+        stop(paste("\"batch name:", batch, "not found."))
45 46
     }
46 47
     reducedDimName <- gsub(' ', '_', reducedDimName)
47 48
     
48 49
     ## Run algorithm
49
-    # Extract each batch
50
-    if(pcInput){
51
-        mat <- SingleCellExperiment::reducedDim(inSCE, exprs)
52
-    } else {
53
-        mat <- SummarizedExperiment::assay(inSCE, exprs)
54
-    }
55 50
     batches <- list()
56
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
57
-    batchIndicator <- unique(batchCol)
58
-    for(i in batchIndicator){
59
-        if(pcInput){
60
-            batches[[i]] <- mat[batchCol == i,]
61
-        } else {
62
-            batches[[i]] <- mat[,batchCol == i]
63
-        }
64
-        
65
-    }
51
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
52
+    batchFactor <- as.factor(batchCol)
53
+    
66 54
     if(pcInput){
67
-        mnn <- batchelor::fastMNN(batches[[1]], 
68
-                              batches[[2]], 
69
-                              pc.input = TRUE)    
55
+        mat <- SingleCellExperiment::reducedDim(inSCE, useAssay)
56
+        redMNN <- batchelor::reducedMNN(mat, batch = batchFactor)
57
+        newRedDim <- redMNN$corrected
70 58
     } else {
71
-        mnn <- batchelor::fastMNN(batches[[1]], 
72
-                              batches[[2]])
59
+        mat <- SummarizedExperiment::assay(inSCE, useAssay)
60
+        mnnSCE <- batchelor::fastMNN(mat, batch = batchFactor)
61
+        newRedDim <- SingleCellExperiment::reducedDim(mnnSCE, 'corrected')
73 62
     }
74
-    rn <- rownames(mat)
75
-    newMat <- mnn$corrected[rn,]
76
-    SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newMat
63
+    SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- newRedDim
77 64
     return(inSCE)
78 65
 }
... ...
@@ -4,15 +4,15 @@
4 4
 #' cells group by cell type rather than dataset-specific conditions. 
5 5
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
6 6
 #' and analysis procedures.
7
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
7
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
8 8
 #' of the assay requiring batch correction in "inSCE", should exist in 
9 9
 #' `assayNames(inSCE)`.
10
-#' @param batchKey character, default `"batch"`. A string indicating the 
10
+#' @param batch character, default `"batch"`. A string indicating the 
11 11
 #' field of `colData(inSCE)` that defines different batches.
12 12
 #' @param reducedDimName character, default `"HARMONY"`. The name for the 
13 13
 #' corrected low-dimensional representation.
14 14
 #' @param pcInput bool, default `FALSE`. Whether to do correction on a low-dim
15
-#' matrix. If `TRUE`, will look for `exprs` in `reducedDim(inSCE)`.
15
+#' matrix. If `TRUE`, will look for `useAssay` in `reducedDim(inSCE)`.
16 16
 #' @param nComponents integer, default `20`. Number of principle components or 
17 17
 #' dimensionality to generate in the resulting reducedDim. If `pcInput`, will
18 18
 #' directly follow the dimensionality of the specified reducedDim.
... ...
@@ -25,46 +25,34 @@
25 25
 #' @references Ilya Korsunsky, et al., 2019
26 26
 #' @examples  
27 27
 #' data('sceBatches', package = 'singleCellTK')
28
-#' sceBatches
29
-#' ## class: SingleCellExperiment 
30
-#' ## dim: 27610 1820 
31
-#' ## metadata(0):
32
-#' ## assays(3): normcounts logcounts
33
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238
34
-#' ## rowData names(0):
35
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600
36
-#' ## colData names(2): cell_type1 batch
37
-#' ## reducedDimNames(5): PCA
38
-#' ## spikeNames(0):
39 28
 #' sceCorr <- runHarmony(sceBatches, nComponents = 10)
40
-runHarmony <- function(inSCE, exprs = "logcounts", pcInput = FALSE, 
41
-                       batchKey = "batch", reducedDimName = "HARMONY", 
29
+runHarmony <- function(inSCE, useAssay = "logcounts", pcInput = FALSE, 
30
+                       batch = "batch", reducedDimName = "HARMONY", 
42 31
                        nComponents = 50, theta = 5, nIter = 10){
43 32
     ## Input check
44
-    if(!class(inSCE) == "SingleCellExperiment" && 
45
-       !class(inSCE) == "SCtkExperiment"){
33
+    if(!inherits(inSCE, "SingleCellExperiment")){
46 34
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
47 35
     }    
48 36
     if(pcInput){
49
-        if(!exprs %in% SingleCellExperiment::reducedDimNames(inSCE)) {
50
-            stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
37
+        if(!useAssay %in% SingleCellExperiment::reducedDimNames(inSCE)) {
38
+            stop(paste("\"useAssay\" (reducedDim) name: ", useAssay, " not found."))
51 39
         }
52 40
     } else {
53
-        if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
54
-            stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
41
+        if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
42
+            stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
55 43
         }
56 44
     }
57
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
58
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
45
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
46
+        stop(paste("\"batch\" name:", batch, "not found"))
59 47
     }
60 48
     reducedDimName <- gsub(' ', '_', reducedDimName)
61 49
     
62 50
     ## Run algorithm
63
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
51
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
64 52
     if(pcInput){
65
-        mat <- SingleCellExperiment::reducedDim(inSCE, exprs)
53
+        mat <- SingleCellExperiment::reducedDim(inSCE, useAssay)
66 54
     } else{
67
-        sceTmp <- scater::runPCA(inSCE, exprs_values = exprs, 
55
+        sceTmp <- scater::runPCA(inSCE, exprs_values = useAssay, 
68 56
                                  ncomponents = nComponents)
69 57
         mat <- SingleCellExperiment::reducedDim(sceTmp, 'PCA')
70 58
     }
... ...
@@ -4,10 +4,10 @@
4 4
 #' shared and dataset-specific factors.
5 5
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
6 6
 #' and analysis procedures.
7
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
7
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
8 8
 #' of the assay requiring batch correction in "inSCE", should exist in 
9 9
 #' `assayNames(inSCE)`.
10
-#' @param batchKey character, default `"batch"`. A string indicating the 
10
+#' @param batch character, default `"batch"`. A string indicating the 
11 11
 #' field of `colData(inSCE)` that defines different batches.
12 12
 #' @param reducedDimName character, default `"LIGER"`. The name for the 
13 13
 #' corrected low-dimensional representation.
... ...
@@ -28,31 +28,30 @@
28 28
 #' data('sceBatches', package = 'singleCellTK')
29 29
 #' sceCorr <- runLIGER(sceBatches)
30 30
 #' }
31
-runLIGER <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', 
31
+runLIGER <- function(inSCE, useAssay = 'logcounts', batch = 'batch', 
32 32
                      reducedDimName = 'LIGER', nComponents = 20L, lambda = 5.0, 
33 33
                      resolution = 1.0){
34 34
     ## Input check
35
-    if(!class(inSCE) == "SingleCellExperiment" && 
36
-       !class(inSCE) == "SCtkExperiment"){
35
+    if(!inherits(inSCE, "SingleCellExperiment")){
37 36
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
38 37
     }
39
-    if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
40
-        stop(paste("\"exprs\" (assay) name: ", exprs, " not found"))
38
+    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
39
+        stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found"))
41 40
     }
42
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
43
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
41
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
42
+        stop(paste("\"batch\" name:", batch, "not found"))
44 43
     }
45 44
     reducedDimName <- gsub(' ', '_', reducedDimName)
46 45
     
47 46
     ## Run algorithm
48
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
47
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
49 48
     batches <- unique(batchCol)
50 49
     nBatch <- length(batches)
51 50
     batchMatrices <- list()
52 51
     for(i in 1:nBatch){
53 52
         b <- batches[i]
54 53
         batchMatrices[[b]] <- 
55
-            SummarizedExperiment::assay(inSCE, exprs)[,batchCol == b]
54
+            SummarizedExperiment::assay(inSCE, useAssay)[,batchCol == b]
56 55
     }
57 56
     ligerex <- liger::createLiger(batchMatrices)
58 57
     ligerex <- liger::normalize(ligerex)
... ...
@@ -4,10 +4,10 @@
4 4
 #' removes the component due to the batch effects.
5 5
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
6 6
 #' and analysis procedures.
7
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
7
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
8 8
 #' of the assay requiring batch correction in "inSCE", should exist in 
9 9
 #' `assayNames(inSCE)`.
10
-#' @param batchKey character, default `"batch"`. A string indicating the 
10
+#' @param batch character, default `"batch"`. A string indicating the 
11 11
 #' field of `colData(inSCE)` that defines different batches.
12 12
 #' @param assayName character, default `"LIMMA"`. The name for the corrected 
13 13
 #' full-sized expression matrix.
... ...
@@ -17,38 +17,26 @@
17 17
 #' @references Gordon K Smyth, et al., 2003
18 18
 #' @examples  
19 19
 #' data('sceBatches', package = 'singleCellTK')
20
-#' sceBatches
21
-#' ## class: SingleCellExperiment 
22
-#' ## dim: 27610 1820 
23
-#' ## metadata(0):
24
-#' ## assays(3): normcounts logcounts
25
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238
26
-#' ## rowData names(0):
27
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600
28
-#' ## colData names(2): cell_type1 batch
29
-#' ## reducedDimNames(5): PCA
30
-#' ## spikeNames(0):
31 20
 #' sceCorr <- runLimmaBC(sceBatches)
32
-runLimmaBC <- function(inSCE, exprs = "logcounts", assayName = "LIMMA", 
33
-                       batchKey = "batch") {
21
+runLimmaBC <- function(inSCE, useAssay = "logcounts", assayName = "LIMMA", 
22
+                       batch = "batch") {
34 23
     ## Input check
35
-    if(!class(inSCE) == "SingleCellExperiment" && 
36
-       !class(inSCE) == "SCtkExperiment"){
24
+    if(!inherits(inSCE, "SingleCellExperiment")){
37 25
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
38 26
     }
39
-    if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
40
-        stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
27
+    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
28
+        stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
41 29
     }
42
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
43
-        stop(paste("\"batchKey\" name:", batchKey, "not found."))
30
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
31
+        stop(paste("\"batch\" name:", batch, "not found."))
44 32
     }
45 33
     
46 34
     assayName <- gsub(' ', '_', assayName)
47 35
     
48 36
     ## Run algorithm
49 37
     ## One more check for the batch names
50
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
51
-    mat <- SummarizedExperiment::assay(inSCE, exprs)
38
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
39
+    mat <- SummarizedExperiment::assay(inSCE, useAssay)
52 40
     newMat <- limma::removeBatchEffect(mat, batch = batchCol)
53 41
     SummarizedExperiment::assay(inSCE, assayName) <- newMat
54 42
     return(inSCE)
... ...
@@ -8,17 +8,13 @@
8 8
 #' applying a Gaussian smoothing kernel with bandwidth `sigma`.
9 9
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
10 10
 #' and analysis procedures.
11
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
11
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
12 12
 #' of the assay requiring batch correction in "inSCE", should exist in 
13 13
 #' `assayNames(inSCE)`.
14
-#' @param batchKey character, default `"batch"`. A string indicating the 
14
+#' @param batch character, default `"batch"`. A string indicating the 
15 15
 #' field of `colData(inSCE)` that defines different batches.
16
-#' @param reducedDimName character, default `"MNN"`. The name for the 
17
-#' corrected low-dimensional representation.
18
-#' @param nHVG integer, default `1000`. The number of top highly variable genes 
19
-#' to select per batch. Afterwards the intersection of all HVG sets will be 
20
-#' taken for running , thus the exact dimensionality of the resulting
21
-#' reducedDim is not certain. 
16
+#' @param assayName character, default `"MNN"`. The name for the corrected 
17
+#' full-sized expression matrix.
22 18
 #' @param k integer, default `20`. Specifies the number of nearest neighbours to 
23 19
 #' consider when defining MNN pairs. This should be interpreted as the minimum 
24 20
 #' frequency of each cell type or state in each batch. Larger values will 
... ...
@@ -37,57 +33,26 @@
37 33
 #' @examples  
38 34
 #' data('sceBatches', package = 'singleCellTK')
39 35
 #' sceCorr <- runMNNCorrect(sceBatches)
40
-runMNNCorrect <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', 
41
-                          reducedDimName = 'MNN', nHVG = 1000, 
42
-                          k = 20, sigma = 0.1){
36
+runMNNCorrect <- function(inSCE, useAssay = 'logcounts', batch = 'batch', 
37
+                          assayName = 'MNN', k = 20, sigma = 0.1){
43 38
     ## Input check
44
-    if(!class(inSCE) == "SingleCellExperiment" && 
45
-       !class(inSCE) == "SCtkExperiment"){
39
+    if(!inherits(inSCE, "SingleCellExperiment")){
46 40
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
47 41
     }
48
-    if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
49
-        stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
42
+    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
43
+        stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
50 44
     }
51
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
52
-        stop(paste("\"batchKey name:", batchKey, "not found."))
45
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
46
+        stop(paste("\"batch name:", batch, "not found."))
53 47
     }
54
-    reducedDimName <- gsub(' ', '_', reducedDimName)
48
+    assayName <- gsub(' ', '_', assayName)
55 49
     
56 50
     ## Run algorithm
57
-    # Split the batches
58
-    batches <- list()
59
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
60
-    uniqBatch <- unique(batchCol)
61
-    for(i in uniqBatch){
62
-        batches[[i]] <- inSCE[, batchCol == i]
63
-    }
64
-    
65
-    # Select HVG
66
-    topVarGenesPerBatch <- list()
67
-    for(i in uniqBatch){
68
-        if(nrow(batches[[i]]) <= nHVG){
69
-            topVarGenesPerBatch[[i]] <- 1:nrow(batches[[i]])
70
-        } else {
71
-          sce.var <- scran::modelGeneVar(batches[[i]])
72
-          topVarGenesPerBatch[[i]] <- order(sce.var$bio,decreasing = TRUE)[seq(nHVG)]
73
-#            mvTrend <- scran::trendVar(batches[[i]], use.spikes=FALSE)
74
-#            decomposeTrend <- scran::decomposeVar(batches[[i]], mvTrend)
75
-#            topVarGenesPerBatch[[i]] <- order(decomposeTrend$bio, 
76
-#                                              decreasing = TRUE)[1:nHVG]
77
-        }    
78
-    }
79
-    selectedHVG <- BiocGenerics::Reduce(intersect, topVarGenesPerBatch)
80
-    
81
-    ## Run mnnCorrect
82
-    inputAssays <- list()
83
-    for(i in uniqBatch){
84
-        inputAssays[[i]] <- 
85
-            SummarizedExperiment::assay(batches[[i]], exprs)[selectedHVG,]
86
-    }
87
-    corrected <- BiocGenerics::do.call(batchelor::mnnCorrect, 
88
-                         c(inputAssays, list(k = k, sigma = sigma)))
89
-    corrected <- t(assay(corrected, 'corrected')[,colnames(inSCE)])
90
-    SingleCellExperiment::reducedDim(inSCE, reducedDimName) <- corrected
91
-    
51
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
52
+    batchFactor <- as.factor(batchCol)
53
+    mnnSCE <- batchelor::mnnCorrect(inSCE, batch = batchFactor, 
54
+                                    k = k, sigma = sigma)
55
+    corrected <- SummarizedExperiment::assay(mnnSCE, 'corrected')
56
+    SummarizedExperiment::assay(inSCE, assayName) <- corrected
92 57
     return(inSCE)
93 58
 }
... ...
@@ -6,10 +6,10 @@
6 6
 #' panorama. 
7 7
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
8 8
 #' and analysis procedures.
9
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
9
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
10 10
 #' of the assay requiring batch correction in "inSCE", should exist in 
11 11
 #' `assayNames(inSCE)`.
12
-#' @param batchKey character, default `"batch"`. A string indicating the 
12
+#' @param batch character, default `"batch"`. A string indicating the 
13 13
 #' field of `colData(inSCE)` that defines different batches.
14 14
 #' @param assayName character, default `"SCANORAMA"`. The name for the 
15 15
 #' corrected full-sized expression matrix.
... ...
@@ -24,37 +24,39 @@
24 24
 #' @export
25 25
 #' @references Brian Hie et al, 2019
26 26
 #' @examples 
27
+#' \dontrun{
27 28
 #' data('sceBatches', package = 'singleCellTK')
28
-#' sceBatches
29
-#' ## class: SingleCellExperiment 
30
-#' ## dim: 27610 1820 
31
-#' ## metadata(0):
32
-#' ## assays(3): normcounts logcounts
33
-#' ## rownames(27610): GCG MALAT1 ... LOC102724004 LOC102724238
34
-#' ## rowData names(0):
35
-#' ## colnames(1820): reads.12732 reads.12733 ... Sample_1598 Sample_1600
36
-#' ## colData names(2): cell_type1 batch
37
-#' ## reducedDimNames(5): PCA
38
-#' ## spikeNames(0):
39 29
 #' sceCorr <- runSCANORAMA(sceBatches)
40
-runSCANORAMA <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', 
30
+#' }
31
+runSCANORAMA <- function(inSCE, useAssay = 'logcounts', batch = 'batch', 
41 32
                          assayName = 'SCANORAMA', SIGMA = 15, ALPHA = 0.1, 
42 33
                          KNN = 20L){
43 34
     ## Input check
44
-    if(!class(inSCE) == "SingleCellExperiment" && 
45
-       !class(inSCE) == "SCtkExperiment"){
35
+    if(!inherits(inSCE, "SingleCellExperiment")){
46 36
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
47 37
     }
48
-    if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
49
-        stop(paste("\"exprs\" (assay) name: ", exprs, " not found"))
38
+    if(!reticulate::py_module_available(module = "scanorama")){
39
+        warning("Cannot find python module 'scanorama', please install Conda and",
40
+            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
41
+            "If one of these have been previously run to install the modules,",
42
+            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
43
+            " respectively, if R has been restarted since the module installation.",
44
+            " Alternatively, scanorama can be installed on the local machine",
45
+            "with pip (e.g. pip install scanorama) and then the 'use_python()'",
46
+            " function from the 'reticulate' package can be used to select the",
47
+            " correct Python environment.")
48
+        return(inSCE)
50 49
     }
51
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
52
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
50
+    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
51
+        stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found"))
52
+    }
53
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
54
+        stop(paste("\"batch\" name:", batch, "not found"))
53 55
     }
54 56
     assayName <- gsub(' ', '_', assayName)
55 57
     
56 58
     ## Run algorithm
57
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
59
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
58 60
     batches <- unique(batchCol)
59 61
     nBatch <- length(batches)
60 62
     ixList <- list()
... ...
@@ -63,7 +65,7 @@ runSCANORAMA <- function(inSCE, exprs = 'logcounts', batchKey = 'batch',
63 65
     }
64 66
     exprsList <- list()
65 67
     for(i in 1:nBatch){
66
-        exprsList[[i]] <- t(as.matrix(SummarizedExperiment::assay(inSCE[,ixList[[i]]], exprs)))
68
+        exprsList[[i]] <- t(as.matrix(SummarizedExperiment::assay(inSCE[,ixList[[i]]], useAssay)))
67 69
     }
68 70
     geneList <- list()
69 71
     for(i in 1:nBatch){
... ...
@@ -9,12 +9,12 @@
9 9
 #' cores.
10 10
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
11 11
 #' and analysis procedures.
12
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
12
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
13 13
 #' of the assay requiring batch correction in "inSCE", should exist in 
14 14
 #' `assayNames(inSCE)`.
15
-#' @param batchKey character, default `"batch"`. A string indicating the field 
15
+#' @param batch character, default `"batch"`. A string indicating the field 
16 16
 #' of `colData(inSCE)` that defines different batches.
17
-#' @param cellTypeKey character, default `"cell_type"`. A string indicating the 
17
+#' @param cellType character, default `"cell_type"`. A string indicating the 
18 18
 #' field of `colData(inSCE)` that defines different cell types.
19 19
 #' @param assayName character, default `"SCGEN"`. The name for the corrected 
20 20
 #' full-sized expression matrix.
... ...
@@ -27,29 +27,40 @@
27 27
 #' data('sceBatches', package = 'singleCellTK')
28 28
 #' sceCorr <- runSCGEN(sceBatches)
29 29
 #' }
30
-runSCGEN <- function(inSCE, exprs = 'logcounts', batchKey = 'batch', 
31
-                     cellTypeKey = "cell_type", assayName = 'SCGEN', 
30
+runSCGEN <- function(inSCE, useAssay = 'logcounts', batch = 'batch', 
31
+                     cellType = "cell_type", assayName = 'SCGEN', 
32 32
                      nEpochs = 50L){
33 33
     ## Input check
34
-    if(!class(inSCE) == "SingleCellExperiment" 
35
-       && !class(inSCE) == "SCtkExperiment"){
34
+    if(!inherits(inSCE, "SingleCellExperiment")){
36 35
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
37 36
     }
38
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
39
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
37
+    if (!reticulate::py_module_available(module = "scgen")) {
38
+        warning("Cannot find python module 'scgen', please install Conda and",
39
+            " run sctkPythonInstallConda() or run sctkPythonInstallVirtualEnv().",
40
+            "If one of these have been previously run to install the modules,",
41
+            "make sure to run selectSCTKConda() or selectSCTKVirtualEnvironment(),",
42
+            " respectively, if R has been restarted since the module installation.",
43
+            " Alternatively, scgen can be installed on the local machine",
44
+            "with pip (e.g. pip install scgen) and then the 'use_python()'",
45
+            " function from the 'reticulate' package can be used to select the",
46
+            " correct Python environment.")
47
+        return(inSCE)
40 48
     }
41
-    if(!cellTypeKey %in% names(SummarizedExperiment::colData(inSCE))){
42
-        stop(paste("\"cellTypeKey\" name:", batchKey, "not found"))
49
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
50
+        stop(paste("\"batch\" name:", batch, "not found"))
51
+    }
52
+    if(!cellType %in% names(SummarizedExperiment::colData(inSCE))){
53
+        stop(paste("\"cellType\" name:", cellType, "not found"))
43 54
     }
44 55
     assayName <- gsub(' ', '_', assayName)
45 56
     nEpochs <- as.integer(nEpochs)
46 57
 
47 58
     ## Run algorithm
48
-    adata <- .sce2adata(inSCE, mainAssay = exprs)
59
+    adata <- .sce2adata(inSCE, mainAssay = useAssay)
49 60
     network = scgen$VAEArith(x_dimension = adata$n_vars)
50 61
     network$train(train_data = adata, n_epochs = nEpochs)
51
-    corrAdata <- scgen$batch_removal(network, adata, batch_key = batchKey, 
52
-                                     cell_label_key = cellTypeKey)
62
+    corrAdata <- scgen$batch_removal(network, adata, batch_key = batch, 
63
+                                     cell_label_key = cellType)
53 64
     corrMat <- t(corrAdata$X)
54 65
     SummarizedExperiment::assay(inSCE, assayName) <- corrMat
55 66
     return(inSCE)
... ...
@@ -5,128 +5,103 @@
5 5
 #' scRNA-Seq data. 
6 6
 #' @param inSCE SingleCellExperiment object. An object that stores your dataset
7 7
 #' and analysis procedures.
8
-#' @param exprs character, default `"logcounts"`. A string indicating the name 
8
+#' @param useAssay character, default `"logcounts"`. A string indicating the name 
9 9
 #' of the assay requiring batch correction in "inSCE", should exist in 
10 10
 #' `assayNames(inSCE)`.
11
-#' @param batchKey character, default `"batch"`. A string indicating the field 
11
+#' @param batch character, default `"batch"`. A string indicating the field 
12 12
 #' of `colData(inSCE)` that defines different batches.
13 13
 #' @param assayName character, default `"scMerge"`. The name for the corrected 
14 14
 #' full-sized expression matrix.
15
-#' @param kmeansK vector of int, default `NULL`. Indicates the kmeans's K for 
16
-#' each batch. The length of `kmeansK` needs to be the same as the number of 
17
-#' batches. If not given, this vector will be identified by counting cell types 
18
-#' in each batch.
19
-#' @param cellTypeKey character, default `"cell_type"`. A string indicating the 
20
-#' field of `colData(inSCE)` that defines different cell types. Only needed 
21
-#' when `kmeansK` is left to `NULL`. 
22
-#' @param species character, default `NULL`. Choose from `{"human", "mouse"}`. 
23
-#' If given, the algorithm will take default species specific SEG (Stably 
24
-#' Expressed Genes) set as negative control. 
25
-#' @param seg character vecter, default `NULL`. A gene list that specifies SEG 
26
-#' (Stably Expressed Genes) set as negative control. If `species` set, will be 
27
-#' ignored; if both `species` and `seg` are `NULL`, will take some time to 
28
-#' automatically detect top 1000 SEGs from `assay(inSCE, exprs)`.
29
-#' @param nHVG integer, default `1000`. The number of top highly variable genes 
30
-#' to select per batch.
15
+#' @param kmeansK vector of int, default `NULL`. A vector indicating the 
16
+#' kmeans' K-value for each batch, in order to construct pseudo-replicates. The 
17
+#' length of `kmeansK` needs to be the same as the number of batches.
18
+#' @param cellType character, default `"cell_type"`. A string indicating the 
19
+#' field of `colData(inSCE)` that defines different cell types.
20
+#' @param seg array, default `NULL`. An array of gene names or indices that 
21
+#' specifies SEG (Stably Expressed Genes) set as negative control. Pre-defined 
22
+#' dataset with human and mouse SEG lists is available to user by running 
23
+#' `data('SEG')`.
24
+#' @param nCores integer, default `parallel::detectCores()`. The number of 
25
+#' cores of processors to allocate for the task. By default it takes all the 
26
+#' cores available to the user. 
31 27
 #' @return SingleCellExperiment object with `assay(inSCE, assayName)` updated 
32 28
 #' with corrected full-sized expression matrix.
33 29
 #' @export
34 30
 #' @references Hoa, et al., 2020
35 31
 #' @examples 
36 32
 #' data('sceBatches', package = 'singleCellTK')
37
-#' sceCorr <- runSCMerge(sceBatches, species = 'human')
38
-runSCMerge <- function(inSCE, exprs = "logcounts", batchKey = 'batch', 
39
-                       assayName = "scMerge", kmeansK = NULL, 
40
-                       cellTypeKey = 'cell_type', species = NULL, 
41
-                       seg = NULL, nHVG = 1000){
33
+#' sceCorr <- runSCMerge(sceBatches)
34
+runSCMerge <- function(inSCE, useAssay = "logcounts", batch = 'batch', 
35
+                       assayName = "scMerge", seg = NULL, kmeansK = NULL, 
36
+                       cellType = 'cell_type', 
37
+                       nCores = 1){
42 38
     ## Input check
43
-    if(!class(inSCE) == "SingleCellExperiment" && 
44
-       !class(inSCE) == "SCtkExperiment"){
39
+    if(!inherits(inSCE, "SingleCellExperiment")){
45 40
         stop("\"inSCE\" should be a SingleCellExperiment Object.")
46 41
     }
47
-    if(!exprs %in% SummarizedExperiment::assayNames(inSCE)) {
48
-        stop(paste("\"exprs\" (assay) name: ", exprs, " not found."))
42
+    if(!useAssay %in% SummarizedExperiment::assayNames(inSCE)) {
43
+        stop(paste("\"useAssay\" (assay) name: ", useAssay, " not found."))
49 44
     }
50
-    if(!batchKey %in% names(SummarizedExperiment::colData(inSCE))){
51
-        stop(paste("\"batchKey\" name:", batchKey, "not found"))
45
+    if(!batch %in% names(SummarizedExperiment::colData(inSCE))){
46
+        stop(paste("\"batch\" name:", batch, "not found"))
52 47
     }
53
-    if(!cellTypeKey %in% names(SummarizedExperiment::colData(inSCE))){
54
-        stop(paste("\"cellTypeKey\" name:", cellTypeKey, "not found"))
48
+    if(is.null(cellType) & is.null(kmeansK)){
49
+        stop("\"cellType\" and \"kmeansK\" cannot be NULL at the same time")
55 50
     }
56
-    if (!is.null(species) && !is.null(seg)){
57
-        stop("None or only one of the arguments \"species\" and \"seg\" should be applied.")
58
-    } 
59
-    
60
-    assayName <- gsub(' ', '_', assayName)
61
-    
62
-    if(!is.null(species)){
63
-        if(species %in% c('human', 'mouse')){
64
-            # Automatic selection
65
-            genes <- rownames(inSCE)
66
-            if(all(startsWith(genes, 'ENG'))){
67
-                message("All gene IDs start with \"ENG\", using Ensembl IDs")
68
-                ctl <- SEG[[species]][[1]]
69
-            } else {
70
-                message("Not all gene IDs start with \"ENG\", using gene symbols")
71
-                ctl <- SEG[[species]][[2]]
72
-            }
73
-        } else {
74
-            stop("Please choose \"species\" from {\"human\", \"mouse\"}")
75
-        }
76
-    } else if (!is.null(seg)){
77
-        ctl <- seg
78
-    } else {
79
-        seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, exprs), 
80
-                            SummarizedExperiment::colData(inSCE)[[cellTypeKey]],
81
-                            ncore = parallel::detectCores())
82
-        ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[1:1000],])
51
+    if(!cellType %in% names(SummarizedExperiment::colData(inSCE))){
52
+        # If NULL, scMerge still works
53
+        stop(paste("\"cellType\" name:", cellType, "not found"))
83 54
     }
55
+
56
+    nCores <- min(as.integer(nCores), parallel::detectCores())
57
+    assayName <- gsub(' ', '_', assayName)
84 58
     
85 59
     ## Run algorithm
86
-    
87
-    # Select HVG
88
-    batches <- list()
89
-    batchCol <- SummarizedExperiment::colData(inSCE)[[batchKey]]
60
+
61
+    batchCol <- SummarizedExperiment::colData(inSCE)[[batch]]
90 62
     uniqBatch <- unique(batchCol)
91
-    for(i in uniqBatch){
92
-        batches[[i]] <- inSCE[, batchCol == i]
93
-    }
94
-    topVarGenesPerBatch <- list()
95
-    for(i in uniqBatch){
96
-        if(nrow(batches[[i]]) <= nHVG){
97
-            topVarGenesPerBatch[[i]] <- 1:nrow(batches[[i]])
98
-        } else {
99
-          sce.var <- scran::modelGeneVar(batches[[i]])
100
-          topVarGenesPerBatch[[i]] <- order(sce.var$bio,decreasing = TRUE)[seq(nHVG)]
101
-            #mvTrend <- scran::trendVar(batches[[i]], use.spikes=FALSE)
102
-            #decomposeTrend <- scran::decomposeVar(batches[[i]], mvTrend)
103
-            #topVarGenesPerBatch[[i]] <- order(decomposeTrend$bio, 
104
-                                              #decreasing = TRUE)[1:nHVG]
105
-        }    
106
-    }
107 63
     
108
-    if(is.null(kmeansK)){
64
+    # Infer parameters
65
+    if(is.null(cellType)){
66
+        cellTypeCol <- NULL
67
+    } else {
68
+        cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]]
69
+    }
70
+    ## kmeansK
71
+    if(!is.null(cellType) && is.null(kmeansK)){
109 72
         # If kmeansK not given, detect by cell type.
110
-        cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellTypeKey]]
73
+        cellTypeCol <- SummarizedExperiment::colData(inSCE)[[cellType]]
111 74
         kmeansK <- c()
112 75
         for (i in 1:length(uniqBatch)){
113 76
             cellTypePerBatch <- cellTypeCol[batchCol == uniqBatch[i]]
114 77
             kmeansK <- c(kmeansK, length(unique(cellTypePerBatch)))
115 78
         }
116
-        print("Detected kmeansK:")
117
-        print(kmeansK)
79
+        cat("Detected kmeansK:\n")
80
+        print(t(data.frame(K = kmeansK, row.names = uniqBatch)))
81
+    }
82
+    ## SEG
83
+    if(is.null(seg)){
84
+        bpParam <- BiocParallel::MulticoreParam(workers = nCores)
85
+        seg <- scMerge::scSEGIndex(SummarizedExperiment::assay(inSCE, useAssay),
86
+                                   cell_type = cellTypeCol,
87
+                                   BPPARAM = bpParam)
88
+        ctl <- rownames(seg[order(seg$segIdx, decreasing = TRUE)[1:1000],])
89
+    } else {
90
+        ctl <- seg
118 91
     }