Browse code

Added subsampling parameters for perplexity calculation to recursive split functions

Joshua D. Campbell authored on 28/03/2022 12:00:25
Showing 12 changed files

... ...
@@ -125,6 +125,7 @@ import(grDevices)
125 125
 import(graphics)
126 126
 import(grid)
127 127
 import(uwot)
128
+importClassesFrom(Matrix,dgCMatrix)
128 129
 importClassesFrom(SingleCellExperiment,SingleCellExperiment)
129 130
 importFrom(MCMCprecision,fit_dirichlet)
130 131
 importFrom(Matrix,colSums)
... ...
@@ -353,7 +353,7 @@ setMethod(
353 353
 #'  matrix object.
354 354
 #' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
355 355
 #' matrix will be resampled according to a multinomial distribution to introduce
356
-#' noise before caculating perplexity. Default \code{FALSE}.
356
+#' noise before calculating perplexity. Default \code{FALSE}.
357 357
 #' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
358 358
 #' the original counts matrix will be randomly selected. Default \code{TRUE}.
359 359
 #' @param numResample Integer. The number of times to resample the counts matrix
... ...
@@ -134,6 +134,17 @@
134 134
 #' @param perplexity Logical. Whether to calculate perplexity for each model.
135 135
 #'  If FALSE, then perplexity can be calculated later with
136 136
 #'  \link{resamplePerplexity}. Default TRUE.
137
+#' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
138
+#' matrix will be resampled according to a multinomial distribution to introduce
139
+#' noise before calculating perplexity. Default \code{FALSE}.
140
+#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
141
+#' the original counts matrix will be randomly selected. Default \code{TRUE}.
142
+#' @param numResample Integer. The number of times to resample the counts matrix
143
+#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
144
+#' Default \code{5}.
145
+#' @param numSubsample Integer. The number of cells to sample from the
146
+#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
147
+#' Default \code{5000}.
137 148
 #' @param verbose Logical. Whether to print log messages. Default TRUE.
138 149
 #' @param logfile Character. Messages will be redirected to a file named
139 150
 #'  "logfile". If NULL, messages will be printed to stdout.  Default NULL.
... ...
@@ -162,6 +173,10 @@ setGeneric("recursiveSplitCell",
162 173
         reorder = TRUE,
163 174
         seed = 12345,
164 175
         perplexity = TRUE,
176
+        doResampling = FALSE,
177
+        doSubsampling = TRUE,
178
+        numResample = 5,
179
+        numSubsample = 5000,
165 180
         logfile = NULL,
166 181
         verbose = TRUE) {
167 182
     standardGeneric("recursiveSplitCell")})
... ...
@@ -205,6 +220,10 @@ setMethod("recursiveSplitCell",
205 220
         reorder = TRUE,
206 221
         seed = 12345,
207 222
         perplexity = TRUE,
223
+        doResampling = FALSE,
224
+        doSubsampling = TRUE,
225
+        numResample = 5,
226
+        numSubsample = 5000,
208 227
         logfile = NULL,
209 228
         verbose = TRUE) {
210 229
 
... ...
@@ -243,6 +262,10 @@ setMethod("recursiveSplitCell",
243 262
             reorder = reorder,
244 263
             seed = seed,
245 264
             perplexity = perplexity,
265
+            doResampling = doResampling,
266
+            doSubsampling = doSubsampling,
267
+            numResample = numResample,
268
+            numSubsample = numSubsample,
246 269
             logfile = logfile,
247 270
             verbose = verbose)
248 271
 
... ...
@@ -311,6 +334,10 @@ setMethod("recursiveSplitCell",
311 334
         reorder = TRUE,
312 335
         seed = 12345,
313 336
         perplexity = TRUE,
337
+        doResampling = FALSE,
338
+        doSubsampling = TRUE,
339
+        numResample = 5,
340
+        numSubsample = 5000,
314 341
         logfile = NULL,
315 342
         verbose = TRUE) {
316 343
 
... ...
@@ -340,6 +367,10 @@ setMethod("recursiveSplitCell",
340 367
             reorder = reorder,
341 368
             seed = seed,
342 369
             perplexity = perplexity,
370
+            doResampling = doResampling,
371
+            doSubsampling = doSubsampling,
372
+            numResample = numResample,
373
+            numSubsample = numSubsample,
343 374
             logfile = logfile,
344 375
             verbose = verbose)
345 376
 
... ...
@@ -383,6 +414,10 @@ setMethod("recursiveSplitCell",
383 414
     reorder,
384 415
     seed,
385 416
     perplexity,
417
+    doResampling,
418
+    doSubsampling,
419
+    numResample,
420
+    numSubsample,
386 421
     logfile,
387 422
     verbose) {
388 423
 
... ...
@@ -400,6 +435,10 @@ setMethod("recursiveSplitCell",
400 435
             minCell = minCell,
401 436
             reorder = reorder,
402 437
             perplexity = perplexity,
438
+            doResampling = doResampling,
439
+            doSubsampling = doSubsampling,
440
+            numResample = numResample,
441
+            numSubsample = numSubsample,
403 442
             logfile = logfile,
404 443
             verbose = verbose)
405 444
     } else {
... ...
@@ -418,6 +457,10 @@ setMethod("recursiveSplitCell",
418 457
                 minCell = minCell,
419 458
                 reorder = reorder,
420 459
                 perplexity = perplexity,
460
+                doResampling = doResampling,
461
+                doSubsampling = doSubsampling,
462
+                numResample = numResample,
463
+                numSubsample = numSubsample,
421 464
                 logfile = logfile,
422 465
                 verbose = verbose)
423 466
         )
... ...
@@ -440,6 +483,10 @@ setMethod("recursiveSplitCell",
440 483
                                minCell,
441 484
                                reorder,
442 485
                                perplexity,
486
+                               doResampling,
487
+                               doSubsampling,
488
+                               numResample,
489
+                               numSubsample,
443 490
                                logfile,
444 491
                                verbose) {
445 492
 
... ...
@@ -841,7 +888,11 @@ setMethod("recursiveSplitCell",
841 888
       verbose = verbose,
842 889
       logfile = NULL
843 890
     )
844
-    celdaRes <- resamplePerplexity(counts, celdaRes)
891
+    celdaRes <- resamplePerplexity(counts, celdaRes,
892
+                                   doResampling = doResampling,
893
+                                   doSubsampling = doSubsampling,
894
+                                   numResample = numResample,
895
+                                   numSubsample = numSubsample)
845 896
   }
846 897
   endTime <- Sys.time()
847 898
   .logMessages(
... ...
@@ -918,7 +969,18 @@ setMethod("recursiveSplitCell",
918 969
 #'  \link[withr]{with_seed} are made.
919 970
 #' @param perplexity Logical. Whether to calculate perplexity for each model.
920 971
 #'  If FALSE, then perplexity can be calculated later with
921
-#'  \link{resamplePerplexity}. Default TRUE.
972
+#'  \link{resamplePerplexity}. Default \code{TRUE}.
973
+#' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
974
+#' matrix will be resampled according to a multinomial distribution to introduce
975
+#' noise before calculating perplexity. Default \code{FALSE}.
976
+#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
977
+#' the original counts matrix will be randomly selected. Default \code{TRUE}.
978
+#' @param numResample Integer. The number of times to resample the counts matrix
979
+#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
980
+#' Default \code{5}.
981
+#' @param numSubsample Integer. The number of cells to sample from the
982
+#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
983
+#' Default \code{5000}.
922 984
 #' @param verbose Logical. Whether to print log messages. Default TRUE.
923 985
 #' @param logfile Character. Messages will be redirected to a file named
924 986
 #'  "logfile". If NULL, messages will be printed to stdout.  Default NULL.
... ...
@@ -947,6 +1009,10 @@ setGeneric("recursiveSplitModule",
947 1009
         reorder = TRUE,
948 1010
         seed = 12345,
949 1011
         perplexity = TRUE,
1012
+        doResampling = FALSE,
1013
+        doSubsampling = TRUE,
1014
+        numResample = 5,
1015
+        numSubsample = 5000,
950 1016
         verbose = TRUE,
951 1017
         logfile = NULL) {
952 1018
     standardGeneric("recursiveSplitModule")})
... ...
@@ -983,6 +1049,10 @@ setMethod("recursiveSplitModule",
983 1049
         reorder = TRUE,
984 1050
         seed = 12345,
985 1051
         perplexity = TRUE,
1052
+        doResampling = FALSE,
1053
+        doSubsampling = TRUE,
1054
+        numResample = 5,
1055
+        numSubsample = 5000,
986 1056
         verbose = TRUE,
987 1057
         logfile = NULL) {
988 1058
 
... ...
@@ -1021,6 +1091,10 @@ setMethod("recursiveSplitModule",
1021 1091
             reorder = reorder,
1022 1092
             seed = seed,
1023 1093
             perplexity = perplexity,
1094
+            doResampling = doResampling,
1095
+            doSubsampling = doSubsampling,
1096
+            numResample = numResample,
1097
+            numSubsample = numSubsample,
1024 1098
             verbose = verbose,
1025 1099
             logfile = logfile)
1026 1100
 
... ...
@@ -1082,6 +1156,10 @@ setMethod("recursiveSplitModule",
1082 1156
         reorder = TRUE,
1083 1157
         seed = 12345,
1084 1158
         perplexity = TRUE,
1159
+        doResampling = FALSE,
1160
+        doSubsampling = TRUE,
1161
+        numResample = 5,
1162
+        numSubsample = 5000,
1085 1163
         verbose = TRUE,
1086 1164
         logfile = NULL) {
1087 1165
 
... ...
@@ -1111,6 +1189,10 @@ setMethod("recursiveSplitModule",
1111 1189
             reorder = reorder,
1112 1190
             seed = seed,
1113 1191
             perplexity = perplexity,
1192
+            doResampling = doResampling,
1193
+            doSubsampling = doSubsampling,
1194
+            numResample = numResample,
1195
+            numSubsample = numSubsample,
1114 1196
             verbose = verbose,
1115 1197
             logfile = logfile)
1116 1198
 
... ...
@@ -1154,6 +1236,10 @@ setMethod("recursiveSplitModule",
1154 1236
     reorder,
1155 1237
     seed,
1156 1238
     perplexity,
1239
+    doResampling,
1240
+    doSubsampling,
1241
+    numResample,
1242
+    numSubsample,
1157 1243
     verbose,
1158 1244
     logfile) {
1159 1245
 
... ...
@@ -1173,7 +1259,11 @@ setMethod("recursiveSplitModule",
1173 1259
             reorder = reorder,
1174 1260
             perplexity = perplexity,
1175 1261
             verbose = verbose,
1176
-            logfile = logfile)
1262
+            logfile = logfile,
1263
+            doResampling = doResampling,
1264
+            doSubsampling = doSubsampling,
1265
+            numResample = numResample,
1266
+            numSubsample = numSubsample)
1177 1267
     } else {
1178 1268
         with_seed(seed,
1179 1269
             celdaList <- .recursiveSplitModule(
... ...
@@ -1191,7 +1281,11 @@ setMethod("recursiveSplitModule",
1191 1281
                 reorder = reorder,
1192 1282
                 perplexity = perplexity,
1193 1283
                 verbose = verbose,
1194
-                logfile = logfile)
1284
+                logfile = logfile,
1285
+                doResampling = doResampling,
1286
+                doSubsampling = doSubsampling,
1287
+                numResample = numResample,
1288
+                numSubsample = numSubsample)
1195 1289
         )
1196 1290
     }
1197 1291
 
... ...
@@ -1213,7 +1307,11 @@ setMethod("recursiveSplitModule",
1213 1307
                                  reorder = TRUE,
1214 1308
                                  perplexity = TRUE,
1215 1309
                                  verbose = TRUE,
1216
-                                 logfile = NULL) {
1310
+                                 logfile = NULL,
1311
+                                 doResampling = FALSE,
1312
+                                 doSubsampling = TRUE,
1313
+                                 numResample = 5,
1314
+                                 numSubsample = 5000) {
1217 1315
 
1218 1316
   .logMessages(paste(rep("=", 50), collapse = ""),
1219 1317
     logfile = logfile,
... ...
@@ -1573,7 +1671,11 @@ setMethod("recursiveSplitModule",
1573 1671
       verbose = verbose,
1574 1672
       logfile = NULL
1575 1673
     )
1576
-    celdaRes <- resamplePerplexity(counts, celdaRes)
1674
+    celdaRes <- resamplePerplexity(counts, celdaRes,
1675
+                                   doResampling = doResampling,
1676
+                                   doSubsampling = doSubsampling,
1677
+                                   numResample = numResample,
1678
+                                   numSubsample = numSubsample)
1577 1679
   }
1578 1680
 
1579 1681
   endTime <- Sys.time()
1580 1682
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{eigenMatMultInt}
4
+\alias{eigenMatMultInt}
5
+\title{Fast matrix multiplication for double x int}
6
+\usage{
7
+eigenMatMultInt(A, B)
8
+}
9
+\arguments{
10
+\item{A}{a double matrix}
11
+
12
+\item{B}{an integer matrix}
13
+}
14
+\value{
15
+An integer matrix representing the product of A and B
16
+}
17
+\description{
18
+Fast matrix multiplication for double x int
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{eigenMatMultNumeric}
4
+\alias{eigenMatMultNumeric}
5
+\title{Fast matrix multiplication for double x double}
6
+\usage{
7
+eigenMatMultNumeric(A, B)
8
+}
9
+\arguments{
10
+\item{A}{a double matrix}
11
+
12
+\item{B}{an integer matrix}
13
+}
14
+\value{
15
+An integer matrix representing the product of A and B
16
+}
17
+\description{
18
+Fast matrix multiplication for double x double
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{fastNormProp}
4
+\alias{fastNormProp}
5
+\title{Fast normalization for numeric matrix}
6
+\usage{
7
+fastNormProp(R_counts, R_alpha)
8
+}
9
+\arguments{
10
+\item{R_counts}{An integer matrix}
11
+
12
+\item{R_alpha}{A double value to be added to the matrix as a pseudocount}
13
+}
14
+\value{
15
+A numeric matrix where the columns have been normalized to proportions
16
+}
17
+\description{
18
+Fast normalization for numeric matrix
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{fastNormPropLog}
4
+\alias{fastNormPropLog}
5
+\title{Fast normalization for numeric matrix}
6
+\usage{
7
+fastNormPropLog(R_counts, R_alpha)
8
+}
9
+\arguments{
10
+\item{R_counts}{An integer matrix}
11
+
12
+\item{R_alpha}{A double value to be added to the matrix as a pseudocount}
13
+}
14
+\value{
15
+A numeric matrix where the columns have been normalized to proportions
16
+}
17
+\description{
18
+Fast normalization for numeric matrix
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{fastNormPropSqrt}
4
+\alias{fastNormPropSqrt}
5
+\title{Fast normalization for numeric matrix}
6
+\usage{
7
+fastNormPropSqrt(R_counts, R_alpha)
8
+}
9
+\arguments{
10
+\item{R_counts}{An integer matrix}
11
+
12
+\item{R_alpha}{A double value to be added to the matrix as a pseudocount}
13
+}
14
+\value{
15
+A numeric matrix where the columns have been normalized to proportions
16
+}
17
+\description{
18
+Fast normalization for numeric matrix
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/RcppExports.R
3
+\name{nonzero}
4
+\alias{nonzero}
5
+\title{get row and column indices of none zero elements in the matrix}
6
+\usage{
7
+nonzero(R_counts)
8
+}
9
+\arguments{
10
+\item{R_counts}{A matrix}
11
+}
12
+\value{
13
+An integer matrix where each row is a row, column indices pair
14
+}
15
+\description{
16
+get row and column indices of none zero elements in the matrix
17
+}
... ...
@@ -23,6 +23,10 @@ recursiveSplitCell(
23 23
   reorder = TRUE,
24 24
   seed = 12345,
25 25
   perplexity = TRUE,
26
+  doResampling = FALSE,
27
+  doSubsampling = TRUE,
28
+  numResample = 5,
29
+  numSubsample = 5000,
26 30
   logfile = NULL,
27 31
   verbose = TRUE
28 32
 )
... ...
@@ -44,6 +48,10 @@ recursiveSplitCell(
44 48
   reorder = TRUE,
45 49
   seed = 12345,
46 50
   perplexity = TRUE,
51
+  doResampling = FALSE,
52
+  doSubsampling = TRUE,
53
+  numResample = 5,
54
+  numSubsample = 5000,
47 55
   logfile = NULL,
48 56
   verbose = TRUE
49 57
 )
... ...
@@ -65,6 +73,10 @@ recursiveSplitCell(
65 73
   reorder = TRUE,
66 74
   seed = 12345,
67 75
   perplexity = TRUE,
76
+  doResampling = FALSE,
77
+  doSubsampling = TRUE,
78
+  numResample = 5,
79
+  numSubsample = 5000,
68 80
   logfile = NULL,
69 81
   verbose = TRUE
70 82
 )
... ...
@@ -135,6 +147,21 @@ a default value of 12345 is used. If NULL, no calls to
135 147
 If FALSE, then perplexity can be calculated later with
136 148
 \link{resamplePerplexity}. Default TRUE.}
137 149
 
150
+\item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts
151
+matrix will be resampled according to a multinomial distribution to introduce
152
+noise before calculating perplexity. Default \code{FALSE}.}
153
+
154
+\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
155
+the original counts matrix will be randomly selected. Default \code{TRUE}.}
156
+
157
+\item{numResample}{Integer. The number of times to resample the counts matrix
158
+for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
159
+Default \code{5}.}
160
+
161
+\item{numSubsample}{Integer. The number of cells to sample from the
162
+the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
163
+Default \code{5000}.}
164
+
138 165
 \item{logfile}{Character. Messages will be redirected to a file named
139 166
 "logfile". If NULL, messages will be printed to stdout.  Default NULL.}
140 167
 
... ...
@@ -23,6 +23,10 @@ recursiveSplitModule(
23 23
   reorder = TRUE,
24 24
   seed = 12345,
25 25
   perplexity = TRUE,
26
+  doResampling = FALSE,
27
+  doSubsampling = TRUE,
28
+  numResample = 5,
29
+  numSubsample = 5000,
26 30
   verbose = TRUE,
27 31
   logfile = NULL
28 32
 )
... ...
@@ -44,6 +48,10 @@ recursiveSplitModule(
44 48
   reorder = TRUE,
45 49
   seed = 12345,
46 50
   perplexity = TRUE,
51
+  doResampling = FALSE,
52
+  doSubsampling = TRUE,
53
+  numResample = 5,
54
+  numSubsample = 5000,
47 55
   verbose = TRUE,
48 56
   logfile = NULL
49 57
 )
... ...
@@ -65,6 +73,10 @@ recursiveSplitModule(
65 73
   reorder = TRUE,
66 74
   seed = 12345,
67 75
   perplexity = TRUE,
76
+  doResampling = FALSE,
77
+  doSubsampling = TRUE,
78
+  numResample = 5,
79
+  numSubsample = 5000,
68 80
   verbose = TRUE,
69 81
   logfile = NULL
70 82
 )
... ...
@@ -128,7 +140,22 @@ a default value of 12345 is used. If NULL, no calls to
128 140
 
129 141
 \item{perplexity}{Logical. Whether to calculate perplexity for each model.
130 142
 If FALSE, then perplexity can be calculated later with
131
-\link{resamplePerplexity}. Default TRUE.}
143
+\link{resamplePerplexity}. Default \code{TRUE}.}
144
+
145
+\item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts
146
+matrix will be resampled according to a multinomial distribution to introduce
147
+noise before calculating perplexity. Default \code{FALSE}.}
148
+
149
+\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
150
+the original counts matrix will be randomly selected. Default \code{TRUE}.}
151
+
152
+\item{numResample}{Integer. The number of times to resample the counts matrix
153
+for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
154
+Default \code{5}.}
155
+
156
+\item{numSubsample}{Integer. The number of cells to sample from the
157
+the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
158
+Default \code{5000}.}
132 159
 
133 160
 \item{verbose}{Logical. Whether to print log messages. Default TRUE.}
134 161
 
... ...
@@ -59,7 +59,7 @@ to use. Default "featureSubset".}
59 59
 
60 60
 \item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts
61 61
 matrix will be resampled according to a multinomial distribution to introduce
62
-noise before caculating perplexity. Default \code{FALSE}.}
62
+noise before calculating perplexity. Default \code{FALSE}.}
63 63
 
64 64
 \item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
65 65
 the original counts matrix will be randomly selected. Default \code{TRUE}.}