Browse code

Added more unit tests

Yichen Wang authored on 28/06/2022 20:51:30
Showing 16 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,7 @@
1
+R/importOptimus.R
2
+R/scrublet_doubletDetection.R
3
+R/sce2adata.R
4
+R/exportSCEtoAnndata.R
5
+R/reticulate_setup.R
6
+R/importAnnData.R
7
+
... ...
@@ -1,12 +1,12 @@
1 1
 #' Run t-SNE embedding with Rtsne method
2
-#' @description T-Stochastic Neighbour Embedding (t-SNE) algorithm is commonly 
3
-#' for 2D visualization of single-cell data. This function wraps the 
4
-#' Rtsne \code{\link[Rtsne]{Rtsne}} function. 
5
-#' 
2
+#' @description T-Stochastic Neighbour Embedding (t-SNE) algorithm is commonly
3
+#' for 2D visualization of single-cell data. This function wraps the
4
+#' Rtsne \code{\link[Rtsne]{Rtsne}} function.
5
+#'
6 6
 #' With this funciton, users can create tSNE embedding directly from raw count
7
-#' matrix, with necessary preprocessing including normalization, scaling, 
8
-#' dimension reduction all automated. Yet we still recommend having the PCA as 
9
-#' input, so that the result can match with the clustering based on the same 
7
+#' matrix, with necessary preprocessing including normalization, scaling,
8
+#' dimension reduction all automated. Yet we still recommend having the PCA as
9
+#' input, so that the result can match with the clustering based on the same
10 10
 #' input PCA, and will be much faster.
11 11
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
12 12
 #' @param useAssay Assay to use for tSNE computation. If \code{useAltExp} is
... ...
@@ -21,33 +21,33 @@
21 21
 #' @param logNorm Whether the counts will need to be log-normalized prior to
22 22
 #' generating the tSNE via \code{\link{scaterlogNormCounts}}. Ignored when using
23 23
 #' \code{useReducedDim}. Default \code{FALSE}.
24
-#' @param useFeatureSubset Subset of feature to use for dimension reduction. A 
24
+#' @param useFeatureSubset Subset of feature to use for dimension reduction. A
25 25
 #' character string indicating a \code{rowData} variable that stores the logical
26
-#' vector of HVG selection, or a vector that can subset the rows of 
26
+#' vector of HVG selection, or a vector that can subset the rows of
27 27
 #' \code{inSCE}. Default \code{NULL}.
28 28
 #' @param nTop Automatically detect this number of variable features to use for
29
-#' dimension reduction. Ignored when using \code{useReducedDim} or using 
29
+#' dimension reduction. Ignored when using \code{useReducedDim} or using
30 30
 #' \code{useFeatureSubset}. Default \code{2000}.
31
-#' @param center Whether data should be centered before PCA is applied. Ignored 
31
+#' @param center Whether data should be centered before PCA is applied. Ignored
32 32
 #' when using \code{useReducedDim}. Default \code{TRUE}.
33
-#' @param scale Whether data should be scaled before PCA is applied. Ignored 
33
+#' @param scale Whether data should be scaled before PCA is applied. Ignored
34 34
 #' when using \code{useReducedDim}. Default \code{TRUE}.
35
-#' @param pca Whether an initial PCA step should be performed. Ignored when 
35
+#' @param pca Whether an initial PCA step should be performed. Ignored when
36 36
 #' using \code{useReducedDim}. Default \code{TRUE}.
37 37
 #' @param partialPCA Whether truncated PCA should be used to calculate principal
38
-#' components (requires the irlba package). This is faster for large input 
38
+#' components (requires the irlba package). This is faster for large input
39 39
 #' matrices. Ignored when using \code{useReducedDim}. Default \code{FALSE}.
40 40
 #' @param initialDims Number of dimensions from PCA to use as input in tSNE.
41 41
 #' Default \code{25}.
42
-#' @param theta Numeric value for speed/accuracy trade-off (increase for less 
42
+#' @param theta Numeric value for speed/accuracy trade-off (increase for less
43 43
 #' accuracy), set to \code{0.0} for exact TSNE. Default \code{0.5}.
44
-#' @param perplexity perplexity parameter. Should not be bigger than 
45
-#' \code{3 * perplexity < ncol(inSCE) - 1}. Default \code{30}. See 
46
-#' \code{\link[Rtsne]{Rtsne}} details for interpretation. 
44
+#' @param perplexity perplexity parameter. Should not be bigger than
45
+#' \code{3 * perplexity < ncol(inSCE) - 1}. Default \code{30}. See
46
+#' \code{\link[Rtsne]{Rtsne}} details for interpretation.
47 47
 #' @param nIterations maximum iterations. Default \code{1000}.
48
-#' @param numThreads Integer, number of threads to use using OpenMP, Default 
48
+#' @param numThreads Integer, number of threads to use using OpenMP, Default
49 49
 #' \code{1}. \code{0} corresponds to using all available cores.
50
-#' @param seed Random seed for reproducibility of tSNE results. 
50
+#' @param seed Random seed for reproducibility of tSNE results.
51 51
 #' Default \code{NULL} will use global seed in use by the R environment.
52 52
 #' @return A \linkS4class{SingleCellExperiment} object with tSNE computation
53 53
 #' updated in \code{reducedDim(inSCE, reducedDimName)}.
... ...
@@ -62,16 +62,16 @@
62 62
 #' # Run from PCA
63 63
 #' sce <- scaterlogNormCounts(sce, "logcounts")
64 64
 #' sce <- runModelGeneVar(sce)
65
-#' sce <- scaterPCA(sce, useAssay = "logcounts", 
65
+#' sce <- scaterPCA(sce, useAssay = "logcounts",
66 66
 #'                  useFeatureSubset = "HVG_modelGeneVar2000", scale = TRUE)
67 67
 #' sce <- getTSNE(sce, useReducedDim = "PCA")
68 68
 #' }
69 69
 #' @importFrom S4Vectors metadata<-
70
-getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL, 
71
-                    useAltExp = NULL, reducedDimName = "TSNE", logNorm = FALSE, 
72
-                    useFeatureSubset = NULL, nTop = 2000, center = TRUE, 
73
-                    scale = TRUE, pca = TRUE, partialPCA = FALSE, 
74
-                    initialDims = 25, theta = 0.5, perplexity = 30, 
70
+getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
71
+                    useAltExp = NULL, reducedDimName = "TSNE", logNorm = FALSE,
72
+                    useFeatureSubset = NULL, nTop = 2000, center = TRUE,
73
+                    scale = TRUE, pca = TRUE, partialPCA = FALSE,
74
+                    initialDims = 25, theta = 0.5, perplexity = 30,
75 75
                     nIterations = 1000, numThreads = 1, seed = NULL){
76 76
   params <- as.list(environment())
77 77
   params$inSCE <- NULL
... ...
@@ -80,11 +80,11 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
80 80
     stop("Please use a SingleCellExperiment object")
81 81
   }
82 82
   # matrix specification check
83
-  # When useReducedDim, never useAssay; 
83
+  # When useReducedDim, never useAssay;
84 84
   # when useAltExp, useAssay/reducedDim from there
85 85
   if (is.null(useAssay) && is.null(useReducedDim)) {
86 86
     stop("`useAssay` and `useReducedDim` cannot be NULL at the same time.")
87
-  } 
87
+  }
88 88
   if (!is.null(useAltExp)) {
89 89
     if (!useAltExp %in% SingleCellExperiment::altExpNames(inSCE)) {
90 90
       stop("Specified `useAltExp` not found.")
... ...
@@ -104,15 +104,15 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
104 104
       stop("Specified `useAssay` not found.")
105 105
     }
106 106
   }
107
-  
107
+
108 108
   if (!is.null(useAssay)) {
109 109
     if (isTRUE(logNorm)) {
110
-      sce <- scaterlogNormCounts(sce, "logcounts")
110
+      sce <- scaterlogNormCounts(sce, assayName = "logcounts", useAssay = useAssay)
111 111
       useAssay <- "logcounts"
112 112
     }
113 113
     if (!is.null(useFeatureSubset)) {
114
-      subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset, 
115
-                                           altExpObj = sce, 
114
+      subset_row <- .parseUseFeatureSubset(inSCE, useFeatureSubset,
115
+                                           altExpObj = sce,
116 116
                                            returnType = "logical")
117 117
       sce <- sce[subset_row,]
118 118
     } else {
... ...
@@ -120,7 +120,7 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
120 120
         suppressMessages({
121 121
           sce <- runFeatureSelection(sce, useAssay, method = "modelGeneVar")
122 122
         })
123
-        sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop, 
123
+        sce <- setTopHVG(sce, method = "modelGeneVar", hvgNumber = nTop,
124 124
                          featureSubsetName = "tsneHVG")
125 125
         sce <- subsetSCERows(sce, rowData = "tsneHVG", returnAsAltExp = FALSE)
126 126
       }
... ...
@@ -139,7 +139,7 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
139 139
       mat <- mat[,seq(initialDims)]
140 140
     }
141 141
   }
142
-  
142
+
143 143
   if (is.null(perplexity)){
144 144
     perplexity <- floor(ncol(inSCE) / 5)
145 145
   }
... ...
@@ -148,11 +148,11 @@ getTSNE <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
148 148
   message(paste0(date(), " ... Computing Rtsne."))
149 149
   .withSeed(seed, {
150 150
     tsneOut <- Rtsne::Rtsne(mat, pca_scale = scale, pca_center = center,
151
-                            pca = pca, partial_pca = partialPCA, 
151
+                            pca = pca, partial_pca = partialPCA,
152 152
                             perplexity = perplexity,
153 153
                             initial_dims = initialDims,
154
-                            max_iter = nIterations, theta = theta, 
155
-                            num_threads = numThreads) 
154
+                            max_iter = nIterations, theta = theta,
155
+                            num_threads = numThreads)
156 156
   })
157 157
   tsneOut <- tsneOut$Y
158 158
   rownames(tsneOut) <- colnames(inSCE)
... ...
@@ -44,7 +44,7 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
44 44
     } else if (entry$type == "starSolo") {
45 45
       newSce <- importSTARsolo(
46 46
         STARsoloDirs = entry$params$STARsoloDirs,
47
-        samples = entry$params$amples,
47
+        samples = entry$params$samples,
48 48
         delayedArray = delayedArray
49 49
       )
50 50
     } else if (entry$type == "busTools") {
... ...
@@ -96,7 +96,7 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
96 96
       }
97 97
 
98 98
     }
99
-    
99
+
100 100
     # Begin Set Tags
101 101
     if(entry$type %in% c("cellRanger2", "cellRanger3", "starSolo", "busTools", "seqc", "optimus", "example")){
102 102
       newSce <- expSetDataTag(
... ...
@@ -114,7 +114,7 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
114 114
             assayType = "raw",
115 115
             assays = "counts")
116 116
         }, silent = TRUE)
117
-        
117
+
118 118
         try({
119 119
           logcounts(newSce)
120 120
           newSce <- expSetDataTag(
... ...
@@ -122,7 +122,7 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
122 122
             assayType = "transformed",
123 123
             assays = "logcounts")
124 124
         }, silent = TRUE)
125
-        
125
+
126 126
         try({
127 127
           normcounts(newSce)
128 128
           newSce <- expSetDataTag(
... ...
@@ -130,7 +130,7 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
130 130
             assayType = "transformed",
131 131
             assays = "normcounts")
132 132
         }, silent = TRUE)
133
-        
133
+
134 134
         try({
135 135
           celda::decontXcounts(newSce)
136 136
           newSce <- expSetDataTag(
... ...
@@ -138,21 +138,21 @@ importMultipleSources <- function(allImportEntries, delayedArray = FALSE) {
138 138
             assayType = "raw",
139 139
             assays = "decontXcounts")
140 140
         }, silent = TRUE)
141
-        
141
+
142 142
         untaggedAssays <- SummarizedExperiment::assayNames(newSce)
143 143
         untaggedAssays <- untaggedAssays[! untaggedAssays %in% c('counts', 'logcounts', 'normcounts', 'decontX')]
144
-        
144
+
145 145
         newSce <- expSetDataTag(
146 146
           inSCE = newSce,
147 147
           assayType = "uncategorized",
148
-          assays = untaggedAssays) 
148
+          assays = untaggedAssays)
149 149
       }
150 150
       # End Set Tags
151 151
     }
152
-    
152
+
153 153
     sceObjs = c(sceObjs, list(newSce))
154 154
   }
155
-  
155
+
156 156
   return(combineSCE(sceList = sceObjs,
157 157
                     by.r = Reduce(base::intersect, lapply(sceObjs, function(x) { colnames(rowData(x))})),
158 158
                     by.c = Reduce(base::intersect, lapply(sceObjs, function(x) { colnames(colData(x))})),
... ...
@@ -69,7 +69,7 @@
69 69
     }
70 70
 
71 71
     sce <- do.call(SingleCellExperiment::cbind, res)
72
-    
72
+
73 73
     if (isTRUE(rowNamesDedup)) {
74 74
         if (any(duplicated(rownames(sce)))) {
75 75
             message("Duplicated gene names found, adding '-1', '-2', ",
... ...
@@ -77,7 +77,7 @@
77 77
         }
78 78
         sce <- dedupRowNames(sce)
79 79
     }
80
-    
80
+
81 81
     # Load metrics summary and store in sce
82 82
     metrics_summary <- .importMetricsStarSolo(STARsoloDirs, samples, "Gene", "Summary.csv")
83 83
     # sce <- setSampleSummaryStatsTable(sce, "starsolo", metrics_summary)
... ...
@@ -127,7 +127,7 @@
127 127
 #'  \link[base]{matrix} function). Default "Matrix".
128 128
 #' @param delayedArray Boolean. Whether to read the expression matrix as
129 129
 #'  \link{DelayedArray} object or not. Default \code{FALSE}.
130
-#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default 
130
+#' @param rowNamesDedup Boolean. Whether to deduplicate rownames. Default
131 131
 #'  \code{TRUE}.
132 132
 #' @return A \code{SingleCellExperiment} object containing the count
133 133
 #'  matrix, the gene annotation, and the cell annotation.
... ...
@@ -195,7 +195,7 @@ importSTARsolo <- function(
195 195
   if(!identical(length(samplePaths), length(sampleNames))){
196 196
     stop("Vectors samplePaths and sampleNames must be equal in length.")
197 197
   }
198
-  
198
+
199 199
   # Processing
200 200
   metrics_summary <- list()
201 201
   for(i in seq(samplePaths)){
... ...
@@ -210,7 +210,7 @@ importSTARsolo <- function(
210 210
       colnames(metrics_summary[[i]]) <- ms_colnames_union
211 211
     }
212 212
   }
213
-  
213
+
214 214
   # Merge StarSolo summary csv files from all/multiple samples into a single data.frame
215 215
   for(i in seq_along(metrics_summary)){
216 216
     metrics_summary[[i]] <- as.data.frame(t(metrics_summary[[i]]))
... ...
@@ -19,7 +19,7 @@
19 19
 #' plotPCA(mouseBrainSubsetSCE, colorBy = "level1class",
20 20
 #'         reducedDimName = "PCA_counts")
21 21
 #'
22
-plotPCA <- function(inSCE, colorBy="No Color", shape="No Shape", pcX="PC1",
22
+plotPCA <- function(inSCE, colorBy=NULL, shape=NULL, pcX="PC1",
23 23
                     pcY="PC2", reducedDimName="PCA", runPCA=FALSE,
24 24
                     useAssay="logcounts"){
25 25
   if(!(reducedDimName %in% names(SingleCellExperiment::reducedDims(inSCE)))){
... ...
@@ -40,17 +40,11 @@ plotPCA <- function(inSCE, colorBy="No Color", shape="No Shape", pcX="PC1",
40 40
     stop("pcY dimension ", pcY, " is not in the reducedDim data")
41 41
   }
42 42
 
43
-  
43
+
44 44
   # Need to add back in variances in the plot axis labels
45 45
   pcXlab <- pcX
46 46
   pcYlab <- pcY
47 47
 
48
-  if (colorBy == "No Color"){
49
-    colorBy <- NULL
50
-  }
51
-  if (shape == "No Shape"){
52
-    shape <- NULL
53
-  }
54 48
   if (!is.null(colorBy)){
55 49
     pcaDf$color <- SingleCellExperiment::colData(inSCE)[, colorBy]
56 50
   }
... ...
@@ -17,7 +17,7 @@
17 17
 #' plotTSNE(mouseBrainSubsetSCE, colorBy = "level1class",
18 18
 #'          reducedDimName = "TSNE_counts")
19 19
 #'
20
-plotTSNE <- function(inSCE, colorBy="No Color", shape="No Shape",
20
+plotTSNE <- function(inSCE, colorBy=NULL, shape=NULL,
21 21
                      reducedDimName="TSNE", runTSNE=FALSE,
22 22
                      useAssay="logcounts"){
23 23
   if(!(reducedDimName %in% names(SingleCellExperiment::reducedDims(inSCE)))){
... ...
@@ -38,12 +38,6 @@ plotTSNE <- function(inSCE, colorBy="No Color", shape="No Shape",
38 38
   colnames(tsneDf)[2] <- "tSNE2"
39 39
   xdim <- colnames(tsneDf)[1]
40 40
   ydim <- colnames(tsneDf)[2]
41
-  if (colorBy == "No Color"){
42
-    colorBy <- NULL
43
-  }
44
-  if (shape == "No Shape"){
45
-    shape <- NULL
46
-  }
47 41
   if (!is.null(colorBy)){
48 42
     tsneDf$color <- SingleCellExperiment::colData(inSCE)[, colorBy]
49 43
   }
... ...
@@ -20,7 +20,7 @@
20 20
 #' plotUMAP(sce, shape = "No Shape", reducedDimName = "UMAP",
21 21
 #'          runUMAP = TRUE, useAssay = "counts")
22 22
 #'
23
-plotUMAP <- function(inSCE, colorBy = "No Color", shape = "No Shape",
23
+plotUMAP <- function(inSCE, colorBy = NULL, shape = NULL,
24 24
                      reducedDimName = "UMAP", runUMAP = FALSE,
25 25
                      useAssay = "logcounts"){
26 26
   if(!(reducedDimName %in% names(SingleCellExperiment::reducedDims(inSCE)))){
... ...
@@ -41,12 +41,7 @@ plotUMAP <- function(inSCE, colorBy = "No Color", shape = "No Shape",
41 41
   colnames(UMAPDf)[2] <- "UMAP2"
42 42
   xdim <- colnames(UMAPDf)[1]
43 43
   ydim <- colnames(UMAPDf)[2]
44
-  if (colorBy == "No Color"){
45
-    colorBy <- NULL
46
-  }
47
-  if (shape == "No Shape"){
48
-    shape <- NULL
49
-  }
44
+
50 45
   if (!is.null(colorBy)){
51 46
     UMAPDf$color <- SingleCellExperiment::colData(inSCE)[, colorBy]
52 47
   }
... ...
@@ -45,44 +45,44 @@ reductions. Default \code{"TSNE"}.}
45 45
 generating the tSNE via \code{\link{scaterlogNormCounts}}. Ignored when using
46 46
 \code{useReducedDim}. Default \code{FALSE}.}
47 47
 
48
-\item{useFeatureSubset}{Subset of feature to use for dimension reduction. A 
48
+\item{useFeatureSubset}{Subset of feature to use for dimension reduction. A
49 49
 character string indicating a \code{rowData} variable that stores the logical
50
-vector of HVG selection, or a vector that can subset the rows of 
50
+vector of HVG selection, or a vector that can subset the rows of
51 51
 \code{inSCE}. Default \code{NULL}.}
52 52
 
53 53
 \item{nTop}{Automatically detect this number of variable features to use for
54
-dimension reduction. Ignored when using \code{useReducedDim} or using 
54
+dimension reduction. Ignored when using \code{useReducedDim} or using
55 55
 \code{useFeatureSubset}. Default \code{2000}.}
56 56
 
57
-\item{center}{Whether data should be centered before PCA is applied. Ignored 
57
+\item{center}{Whether data should be centered before PCA is applied. Ignored
58 58
 when using \code{useReducedDim}. Default \code{TRUE}.}
59 59
 
60
-\item{scale}{Whether data should be scaled before PCA is applied. Ignored 
60
+\item{scale}{Whether data should be scaled before PCA is applied. Ignored
61 61
 when using \code{useReducedDim}. Default \code{TRUE}.}
62 62
 
63
-\item{pca}{Whether an initial PCA step should be performed. Ignored when 
63
+\item{pca}{Whether an initial PCA step should be performed. Ignored when
64 64
 using \code{useReducedDim}. Default \code{TRUE}.}
65 65
 
66 66
 \item{partialPCA}{Whether truncated PCA should be used to calculate principal
67
-components (requires the irlba package). This is faster for large input 
67
+components (requires the irlba package). This is faster for large input
68 68
 matrices. Ignored when using \code{useReducedDim}. Default \code{FALSE}.}
69 69
 
70 70
 \item{initialDims}{Number of dimensions from PCA to use as input in tSNE.
71 71
 Default \code{25}.}
72 72
 
73
-\item{theta}{Numeric value for speed/accuracy trade-off (increase for less 
73
+\item{theta}{Numeric value for speed/accuracy trade-off (increase for less
74 74
 accuracy), set to \code{0.0} for exact TSNE. Default \code{0.5}.}
75 75
 
76
-\item{perplexity}{perplexity parameter. Should not be bigger than 
77
-\code{3 * perplexity < ncol(inSCE) - 1}. Default \code{30}. See 
76
+\item{perplexity}{perplexity parameter. Should not be bigger than
77
+\code{3 * perplexity < ncol(inSCE) - 1}. Default \code{30}. See
78 78
 \code{\link[Rtsne]{Rtsne}} details for interpretation.}
79 79
 
80 80
 \item{nIterations}{maximum iterations. Default \code{1000}.}
81 81
 
82
-\item{numThreads}{Integer, number of threads to use using OpenMP, Default 
82
+\item{numThreads}{Integer, number of threads to use using OpenMP, Default
83 83
 \code{1}. \code{0} corresponds to using all available cores.}
84 84
 
85
-\item{seed}{Random seed for reproducibility of tSNE results. 
85
+\item{seed}{Random seed for reproducibility of tSNE results.
86 86
 Default \code{NULL} will use global seed in use by the R environment.}
87 87
 }
88 88
 \value{
... ...
@@ -90,14 +90,14 @@ A \linkS4class{SingleCellExperiment} object with tSNE computation
90 90
 updated in \code{reducedDim(inSCE, reducedDimName)}.
91 91
 }
92 92
 \description{
93
-T-Stochastic Neighbour Embedding (t-SNE) algorithm is commonly 
94
-for 2D visualization of single-cell data. This function wraps the 
95
-Rtsne \code{\link[Rtsne]{Rtsne}} function. 
93
+T-Stochastic Neighbour Embedding (t-SNE) algorithm is commonly
94
+for 2D visualization of single-cell data. This function wraps the
95
+Rtsne \code{\link[Rtsne]{Rtsne}} function.
96 96
 
97 97
 With this funciton, users can create tSNE embedding directly from raw count
98
-matrix, with necessary preprocessing including normalization, scaling, 
99
-dimension reduction all automated. Yet we still recommend having the PCA as 
100
-input, so that the result can match with the clustering based on the same 
98
+matrix, with necessary preprocessing including normalization, scaling,
99
+dimension reduction all automated. Yet we still recommend having the PCA as
100
+input, so that the result can match with the clustering based on the same
101 101
 input PCA, and will be much faster.
102 102
 }
103 103
 \examples{
... ...
@@ -110,7 +110,7 @@ sce <- getTSNE(inSCE = sce, useAssay = "counts", logNorm = TRUE, nTop = 2000,
110 110
 # Run from PCA
111 111
 sce <- scaterlogNormCounts(sce, "logcounts")
112 112
 sce <- runModelGeneVar(sce)
113
-sce <- scaterPCA(sce, useAssay = "logcounts", 
113
+sce <- scaterPCA(sce, useAssay = "logcounts",
114 114
                  useFeatureSubset = "HVG_modelGeneVar2000", scale = TRUE)
115 115
 sce <- getTSNE(sce, useReducedDim = "PCA")
116 116
 }
... ...
@@ -60,7 +60,7 @@ object. Can be one of "Matrix" (as returned by
60 60
 \item{delayedArray}{Boolean. Whether to read the expression matrix as
61 61
 \link{DelayedArray} object or not. Default \code{FALSE}.}
62 62
 
63
-\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default 
63
+\item{rowNamesDedup}{Boolean. Whether to deduplicate rownames. Default
64 64
 \code{TRUE}.}
65 65
 }
66 66
 \value{
... ...
@@ -6,8 +6,8 @@
6 6
 \usage{
7 7
 plotPCA(
8 8
   inSCE,
9
-  colorBy = "No Color",
10
-  shape = "No Shape",
9
+  colorBy = NULL,
10
+  shape = NULL,
11 11
   pcX = "PC1",
12 12
   pcY = "PC2",
13 13
   reducedDimName = "PCA",
... ...
@@ -6,8 +6,8 @@
6 6
 \usage{
7 7
 plotTSNE(
8 8
   inSCE,
9
-  colorBy = "No Color",
10
-  shape = "No Shape",
9
+  colorBy = NULL,
10
+  shape = NULL,
11 11
   reducedDimName = "TSNE",
12 12
   runTSNE = FALSE,
13 13
   useAssay = "logcounts"
... ...
@@ -6,8 +6,8 @@
6 6
 \usage{
7 7
 plotUMAP(
8 8
   inSCE,
9
-  colorBy = "No Color",
10
-  shape = "No Shape",
9
+  colorBy = NULL,
10
+  shape = NULL,
11 11
   reducedDimName = "UMAP",
12 12
   runUMAP = FALSE,
13 13
   useAssay = "logcounts"
14 14
new file mode 100644
... ...
@@ -0,0 +1,32 @@
1
+# description
2
+library(singleCellTK)
3
+test_that(desc = "Testing description", {
4
+    a <- singleCellTK:::description_subsetSCECols()
5
+    testthat::expect_type(a, "list")
6
+    a <- singleCellTK:::descriptionBarcodeRank()
7
+    testthat::expect_type(a, "list")
8
+    a <- singleCellTK:::descriptionBCDS()
9
+    testthat::expect_type(a, "list")
10
+    a <- singleCellTK:::descriptionCXDS()
11
+    testthat::expect_type(a, "list")
12
+    a <- singleCellTK:::descriptionDecontX()
13
+    testthat::expect_type(a, "list")
14
+    a <- singleCellTK:::descriptionDoubletFinder()
15
+    testthat::expect_type(a, "list")
16
+    a <- singleCellTK:::descriptionEmptyDrops()
17
+    testthat::expect_type(a, "list")
18
+    a <- singleCellTK:::descriptionRunCellQC()
19
+    testthat::expect_type(a, "list")
20
+    a <- singleCellTK:::descriptionRunDropletQC()
21
+    testthat::expect_type(a, "list")
22
+    a <- singleCellTK:::descriptionRunPerCellQC()
23
+    testthat::expect_type(a, "list")
24
+    a <- singleCellTK:::descriptionScDblFinder()
25
+    testthat::expect_type(a, "list")
26
+    a <- singleCellTK:::descriptionScdsHybrid()
27
+    testthat::expect_type(a, "list")
28
+    a <- singleCellTK:::descriptionScrublet()
29
+    testthat::expect_type(a, "list")
30
+    a <- singleCellTK:::descriptionSoupX()
31
+    testthat::expect_type(a, "list")
32
+})
0 33
new file mode 100644
... ...
@@ -0,0 +1,129 @@
1
+# dimension reduction
2
+library(singleCellTK)
3
+library(testthat)
4
+context("Testing dimension reduction")
5
+data(scExample, package = "singleCellTK")
6
+sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'")
7
+sce <- scaterlogNormCounts(sce, "logcounts")
8
+sce <- runFeatureSelection(sce, useAssay = "counts")
9
+sce <- setTopHVG(sce, featureSubsetName = "hvg")
10
+sce <- setTopHVG(sce, featureSubsetName = "hvgAltExp", altExp = TRUE)
11
+
12
+test_that(desc = "Testing scaterPCA", {
13
+    sce <- scaterPCA(sce, useAssay = "logcounts", useFeatureSubset = "hvg",
14
+                     reducedDimName = "PCA1")
15
+    testthat::expect_true("PCA1" %in% reducedDimNames(sce))
16
+
17
+    sce <- scaterPCA(sce, useAssay = "hvgAltExplogcounts", useAltExp = "hvgAltExp",
18
+                     reducedDimName = "PCA2")
19
+    testthat::expect_true("PCA2" %in% reducedDimNames(sce))
20
+
21
+    expect_error({
22
+        scaterPCA(sce, useAssay = "null", useFeatureSubset = "hvg")
23
+    }, "Specified assay")
24
+    expect_error({
25
+        scaterPCA(sce, useAssay = "null", useAltExp = "null", useFeatureSubset = "hvg")
26
+    }, "Specified altExp ")
27
+    expect_error({
28
+        scaterPCA(sce, useAssay = "null", useAltExp = "hvgAltExp", useFeatureSubset = "hvg")
29
+    }, "not found in the specified altExp")
30
+
31
+    p1 <- plotPCA(sce, reducedDimName = "PCA1", colorBy = "type", shape = "type")
32
+    p2 <- plotPCA(sce, reducedDimName = "PCA3", runPCA = TRUE)
33
+    expect_is(p1, "ggplot")
34
+    expect_is(p2, "ggplot")
35
+    expect_error({
36
+        plotPCA(sce, pcX = "foo", reducedDimName = "PCA1")
37
+    }, regexp = "pcX dimension")
38
+    expect_error({
39
+        plotPCA(sce, pcY = "bar", reducedDimName = "PCA1")
40
+    }, regexp = "pcY dimension")
41
+    expect_error({
42
+        plotPCA(sce, reducedDimName = "UMAP")
43
+    }, regexp = "dimension not found")
44
+})
45
+
46
+test_that(desc = "Testing scater UMAP", {
47
+    sce <- scaterPCA(sce, useFeatureSubset = "hvg", seed = 12345, reducedDimName = "PCA1")
48
+    sce <- getUMAP(sce, useReducedDim = "PCA1", reducedDimName = "UMAP1")
49
+    testthat::expect_true("UMAP1" %in% reducedDimNames(sce))
50
+    sce <- getUMAP(sce, useAssay = "hvgAltExplogcounts", useAltExp = "hvgAltExp",
51
+                   reducedDimName = "UMAP2")
52
+    testthat::expect_true("UMAP2" %in% reducedDimNames(sce))
53
+    # TODO: Still some runable conditions
54
+    expect_error({
55
+        getUMAP(inSCE = 1)
56
+    }, "Please use a SingleCellExperiment object")
57
+    expect_error({
58
+        getUMAP(sce, useAssay = NULL, useReducedDim = NULL)
59
+    }, "`useAssay` and `useReducedDim` cannot be NULL")
60
+    expect_error({
61
+        getUMAP(sce, useAltExp = "altexp")
62
+    }, "Specified `useAltExp` not found.")
63
+    expect_error({
64
+        getUMAP(sce, useReducedDim = "TSNE")
65
+    }, "Specified `useReducedDim` not found.")
66
+    expect_error({
67
+        getUMAP(sce, useAssay = "TSNE")
68
+    }, regexp = "Specified `useAssay` not found.")
69
+    expect_error({
70
+        getUMAP(sce, sample = "batch")
71
+    }, regexp = "Given sample annotation ")
72
+    expect_error({
73
+        getUMAP(sce, sample = letters)
74
+    }, regexp = "'sample' must be the same length")
75
+
76
+    p1 <- plotUMAP(sce, reducedDimName = "UMAP1", colorBy = "type", shape = "type")
77
+    p2 <- plotUMAP(sce, reducedDimName = "UMAP3", runUMAP = TRUE)
78
+    expect_is(p1, "ggplot")
79
+    expect_is(p2, "ggplot")
80
+    expect_error({
81
+        plotUMAP(sce, reducedDimName = "TSNE")
82
+    }, regexp = "dimension not found")
83
+    reducedDim(sce, "UMAP4") <- cbind(reducedDim(sce, "UMAP1"), reducedDim(sce, "UMAP1"))
84
+    expect_warning({
85
+        plotUMAP(sce, reducedDimName = "UMAP4")
86
+    }, "More than two UMAP dimensions")
87
+})
88
+
89
+test_that(desc = "Testing Rtsne TSNE", {
90
+    sce <- scaterPCA(sce, useFeatureSubset = "hvg", seed = 12345, reducedDimName = "PCA1")
91
+    sce <- getTSNE(sce, useReducedDim = "PCA1", reducedDimName = "TSNE1")
92
+    testthat::expect_true("TSNE1" %in% reducedDimNames(sce))
93
+    sce <- getTSNE(sce, useAssay = "hvgAltExpcounts", useAltExp = "hvgAltExp",
94
+                   reducedDimName = "TSNE2", logNorm = TRUE, nTop = 50)
95
+    testthat::expect_true("TSNE2" %in% reducedDimNames(sce))
96
+    # TODO: Still some runable conditions
97
+    expect_error({
98
+        getTSNE(inSCE = 1)
99
+    }, "Please use a SingleCellExperiment object")
100
+    expect_error({
101
+        getTSNE(sce, useAssay = NULL, useReducedDim = NULL)
102
+    }, "`useAssay` and `useReducedDim` cannot be NULL")
103
+    expect_error({
104
+        getTSNE(sce, useAltExp = "altexp")
105
+    }, "Specified `useAltExp` not found.")
106
+    expect_error({
107
+        getUMAP(sce, useReducedDim = "TSNE")
108
+    }, "Specified `useReducedDim` not found.")
109
+    expect_error({
110
+        getUMAP(sce, useAssay = "TSNE")
111
+    }, regexp = "Specified `useAssay` not found.")
112
+
113
+    p1 <- plotTSNE(sce, reducedDimName = "TSNE1", colorBy = "type", shape = "type")
114
+    p2 <- plotTSNE(sce, reducedDimName = "TSNE3", runTSNE = TRUE)
115
+    expect_is(p1, "ggplot")
116
+    expect_is(p2, "ggplot")
117
+    expect_error({
118
+        plotTSNE(sce, reducedDimName = "UMAP")
119
+    }, regexp = "dimension not found")
120
+    reducedDim(sce, "TSNE4") <- cbind(reducedDim(sce, "TSNE1"), reducedDim(sce, "TSNE1"))
121
+    expect_warning({
122
+        plotTSNE(sce, reducedDimName = "TSNE4")
123
+    }, "More than two t-SNE dimensions")
124
+
125
+    p3 <- plotDimRed(sce, useReduction = "PCA1")
126
+    p4 <- plotDimRed(sce, useReduction = "PCA1", xAxisLabel = "PC_1", yAxisLabel = "PC_2")
127
+    expect_is(p3, "ggplot")
128
+    expect_is(p4, "ggplot")
129
+})
... ...
@@ -9,39 +9,37 @@ test_that(desc = "Testing FindHVG", {
9 9
     sce <- runModelGeneVar(sce, "seuratNormData")
10 10
     metricNames.mgv <- metadata(sce)$sctk$runFeatureSelection$modelGeneVar$rowData
11 11
     testthat::expect_true(all(metricNames.mgv %in% names(rowData(sce))))
12
-    
13
-    sce <- runSeuratFindHVG(sce, useAssay = "counts", method = "vst")
12
+
13
+    sce <- runFeatureSelection(sce, "counts", "vst")
14 14
     metricNames.vst <- metadata(sce)$sctk$runFeatureSelection$vst$rowData
15 15
     testthat::expect_true(all(metricNames.vst %in% names(rowData(sce))))
16
-    
17
-    sce <- runSeuratFindHVG(sce, useAssay = "seuratNormData", 
18
-                            method = "dispersion")
16
+
17
+    sce <- runFeatureSelection(sce, "seuratNormData", "dispersion")
19 18
     metricNames.disp <- metadata(sce)$sctk$runFeatureSelection$dispersion$rowData
20 19
     testthat::expect_true(all(metricNames.disp %in% names(rowData(sce))))
21
-    
22
-    sce <- runSeuratFindHVG(sce, useAssay = "seuratNormData", 
23
-                            method = "mean.var.plot")
20
+
21
+    sce <- runFeatureSelection(sce, "seuratNormData", "mean.var.plot")
24 22
     metricNames.mvp <- metadata(sce)$sctk$runFeatureSelection$mean.var.plot$rowData
25 23
     testthat::expect_true(all(metricNames.mvp %in% names(rowData(sce))))
26
-    
24
+
27 25
     # Test accessor functions
28 26
     sce <- setTopHVG(sce, "modelGeneVar", hvgNumber = 2000, altExp = TRUE)
29 27
     nHVG <- length(getTopHVG(sce, useFeatureSubset = "HVG_modelGeneVar2000"))
30 28
     testthat::expect_true(is.logical(rowData(sce)$HVG_modelGeneVar2000))
31 29
     testthat::expect_equal(nrow(altExp(sce, "HVG_modelGeneVar2000")), nHVG)
32
-    testthat::expect_equal(metadata(sce)$sctk$featureSubsets$HVG_modelGeneVar2000$useAssay, 
30
+    testthat::expect_equal(metadata(sce)$sctk$featureSubsets$HVG_modelGeneVar2000$useAssay,
33 31
                            "seuratNormData")
34
-    
35
-    hvgs <- getTopHVG(sce, "mean.var.plot", hvgNumber = 2000, 
32
+
33
+    hvgs <- getTopHVG(sce, "mean.var.plot", hvgNumber = 2000,
36 34
                       featureDisplay = "feature_name")
37 35
     testthat::expect_false(all(startsWith(hvgs, "ENSG00000")))
38 36
     hvgs <- getTopHVG(sce, "vst", hvgNumber = 2000)
39 37
     vm1 <- plotTopHVG(sce, "dispersion")
40
-    vm2 <- plotTopHVG(sce, "modelGeneVar", hvgNumber = 30, labelsCount = 10, 
38
+    vm2 <- plotTopHVG(sce, "modelGeneVar", hvgNumber = 30, labelsCount = 10,
41 39
                      featureDisplay = "feature_name")
42
-    vm3 <- plotTopHVG(sce, method = "mean.var.plot", 
40
+    vm3 <- plotTopHVG(sce, method = "mean.var.plot",
43 41
                       useFeatureSubset = "HVG_modelGeneVar2000")
44 42
     testthat::expect_true(inherits(vm1, "ggplot"))
45 43
     testthat::expect_true(inherits(vm2, "ggplot"))
46 44
     testthat::expect_true(inherits(vm3, "ggplot"))
47
-})
48 45
\ No newline at end of file
46
+})
... ...
@@ -11,20 +11,48 @@ context("Testing import functions")
11 11
 ## Importing scRNA-seq Data Functions
12 12
 #####################################
13 13
 
14
-test_that(desc = "Testing importBUStools", {
15
-  
16
-  sce <- importBUStools(BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", package = "singleCellTK"),
17
-                        samples = "PBMC_1k_v3_20x20")
18
-  expect_true(validObject(sce))
19
-})
20
-
21
-
22
-test_that(desc = "Testing importCellRanger", {
23
-  sce <- importCellRanger(cellRangerDirs = system.file("extdata",package = "singleCellTK"),
24
-                          sampleDirs = "hgmm_1k_v3_20x20",
25
-                          sampleNames = "hgmm1kv3",
26
-                          dataType = "filtered")
14
+test_that(desc = "Testing import single-cell data", {
15
+  allImportEntries <- list(
16
+    samples = list()
17
+  )
18
+  allImportEntries$samples$a <- list(
19
+    type = "busTools",
20
+    params = list(BUStoolsDirs = system.file("extdata/BUStools_PBMC_1k_v3_20x20/genecount/", package = "singleCellTK"),
21
+                  samples = "BUStools_20x20")
22
+  )
23
+  allImportEntries$samples$b <- list(
24
+    type = "cellRanger3",
25
+    params = list(cellRangerDirs = system.file("extdata",package = "singleCellTK"),
26
+                  sampleDirs = "hgmm_1k_v3_20x20",
27
+                  sampleNames = "cellRanger3_20x20",
28
+                  dataType = "filtered")
29
+  )
30
+  allImportEntries$samples$c <- list(
31
+    type = "seqc",
32
+    params = list(seqcDirs = system.file("extdata/pbmc_1k_50x50",package = "singleCellTK"),
33
+                  samples = "seqc_50x50",
34
+                  prefix = "pbmc_1k",
35
+                  combinedSample = FALSE)
36
+  )
37
+  allImportEntries$samples$d <- list(
38
+    type = "starSolo",
39
+    params = list(STARsoloDirs = system.file("extdata/STARsolo_PBMC_1k_v3_20x20",package = "singleCellTK"),
40
+                  samples = "STARsolo_20x20")
41
+  )
42
+  allImportEntries$samples$e <- list(
43
+    type = "files",
44
+    params = list(assayFile = system.file("extdata/pbmc_1k_50x50/pbmc_1k_sparse_molecule_counts.mtx",package = "singleCellTK"),
45
+                  annotFile = system.file("extdata/pbmc_1k_50x50/pbmc_1k_sparse_counts_barcodes.csv",package = "singleCellTK"),
46
+                  featureFile = system.file("extdata/pbmc_1k_50x50/pbmc_1k_sparse_counts_genes.csv",package = "singleCellTK"),
47
+                  assayName = "counts")
48
+  )
49
+  expect_warning({
50
+    sce <- importMultipleSources(allImportEntries)
51
+  }, "column names 'cell_barcode.x'")
27 52
   expect_true(validObject(sce))
53
+  summary <- as.data.frame(table(sce$sample))
54
+  expect_true(all(summary$Var1 %in% c("BUStools_20x20", "cellRanger3_20x20", "seqc_50x50", "STARsolo_20x20", "sample")))
55
+  expect_equal(summary$Freq, c(20, 20, 50, 50, 20))
28 56
 })
29 57
 
30 58
 test_that(desc = "Testing importDropEst", {
... ...
@@ -33,22 +61,7 @@ test_that(desc = "Testing importDropEst", {
33 61
   expect_true(validObject(sce))
34 62
 })
35 63
 
36
-
37
-test_that(desc = "Testing importSeqc", {
38
-  sce <- importSEQC(seqcDirs = system.file("extdata/pbmc_1k_50x50",package = "singleCellTK"),
39
-                    samples = "pbmc_1k_50x50",
40
-                    prefix = "pbmc_1k",
41
-                    combinedSample = FALSE)
42
-  expect_true(validObject(sce))
43
-})
44
-
45
-test_that(desc = "Testing importSTARSolo", {
46
-  sce <- importSTARsolo(STARsoloDirs = system.file("extdata/STARsolo_PBMC_1k_v3_20x20",package = "singleCellTK"),
47
-                        samples = "PBMC_1k_v3_20x20")
48
-  
49
-  expect_true(validObject(sce))
50
-})
51
-# 
64
+#
52 65
 # test_that(desc = "Testing importOptimus", {
53 66
 #   if (!reticulate::py_module_available("scipy.sparse") || (!reticulate::py_module_available("numpy"))){
54 67
 #     skip("scipy.sparse or numpy not available. Skipping testing importOptimus")
... ...
@@ -56,7 +69,7 @@ test_that(desc = "Testing importSTARSolo", {
56 69
 #   sce <- importOptimus(OptimusDirs = system.file("extdata/Optimus_20x1000",package = "singleCellTK"),
57 70
 #                        samples = "Optimus_20x1000")
58 71
 #   expect_true(validObject(sce))
59
-# }) 
72
+# })
60 73
 
61 74
 # test_that(desc = "Testing importAnnData", {
62 75
 #   if (!reticulate::py_module_available("anndata")){
... ...
@@ -69,17 +82,15 @@ test_that(desc = "Testing importSTARSolo", {
69 82
 ##################################
70 83
 ## Importing Gene Set Functions
71 84
 ##################################
72
-
85
+data(scExample)
73 86
 test_that(desc = "Testing importGeneSetFromGMT", {
74
-  data(scExample)
75 87
   gmt <- system.file("extdata/mito_subset.gmt", package = "singleCellTK")
76 88
   sce <- importGeneSetsFromGMT(inSCE = sce, file = gmt, by = NULL,
77 89
                                collectionName = "test")
78 90
   expect_true(inherits(sce@metadata$sctk$genesets$test[[1]], "GeneSet"))
79
-}) 
91
+})
80 92
 
81 93
 test_that(desc = "Testing importGeneSetFromList", {
82
-  data(scExample)
83 94
   gs1 <- rownames(sce)[1:10]
84 95
   gs2 <- rownames(sce)[11:20]
85 96
   gs <- list("geneset1" = gs1, "geneset2" = gs2)
... ...
@@ -88,10 +99,9 @@ test_that(desc = "Testing importGeneSetFromList", {
88 99
                                 collectionName = "test",
89 100
                                 by = "rownames")
90 101
   expect_true(inherits(sce@metadata$sctk$genesets$test[[1]], "GeneSet"))
91
-}) 
102
+})
92 103
 
93 104
 test_that(desc = "Testing importGeneSetFromCollection", {
94
-  data(scExample)
95 105
   gs1 <- GSEABase::GeneSet(setName = "geneset1", geneIds = rownames(sce)[1:10])
96 106
   gs2 <- GSEABase::GeneSet(setName = "geneset2", geneIds = rownames(sce)[11:20])
97 107
   gsc <- GSEABase::GeneSetCollection(list(gs1, gs2))
... ...
@@ -100,17 +110,26 @@ test_that(desc = "Testing importGeneSetFromCollection", {
100 110
                                       collectionName = "test",
101 111
                                       by = "rownames")
102 112
   expect_true(inherits(sce@metadata$sctk$genesets$test[[1]], "GeneSet"))
103
-}) 
113
+  gscList <- sctkListGeneSetCollections(sce)
114
+  expect_equal(gscList, "test")
115
+  gsList <- getGenesetNamesFromCollection(sce, "test")
116
+  expect_equal(gsList, c("geneset1", "geneset2"))
117
+})
104 118
 
105 119
 test_that(desc = "Testing importGeneSetFromMSigDB", {
106
-  data(scExample)
107 120
   sce <- importGeneSetsFromMSigDB(inSCE = sce,
108 121
                                   categoryIDs = "C2-CP",
109 122
                                   species = "Homo sapiens",
110 123
                                   mapping = "gene_symbol",
111 124
                                   by = "feature_name")
112 125
   expect_true(inherits(sce@metadata$sctk$genesets$"C2-CP"[[1]], "GeneSet"))
113
-}) 
114
-
126
+})
115 127
 
128
+data(scExample)
129
+test_that(desc = "Testing export", {
130
+  exportSCEtoFlatFile(sce)
131
+  expect_true(file.exists("./assays/SCE_counts.mtx.gz"))
132
+  expect_true(file.exists("./SCE_cellData.txt.gz"))
133
+  expect_true(file.exists("./SCE_featureData.txt.gz"))
134
+})
116 135