Browse code

Removed ability to subsample counts matrix when calculating perplexity. This will take a much larger update where theta, probability of each cell population in each sample, will need to be calculated from new samples by predicting the population for each cell.

Joshua D. Campbell authored on 11/04/2022 18:48:36
Showing 5 changed files

... ...
@@ -354,14 +354,9 @@ setMethod(
354 354
 #' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
355 355
 #' matrix will be resampled according to a multinomial distribution to introduce
356 356
 #' noise before calculating perplexity. Default \code{FALSE}.
357
-#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
358
-#' the original counts matrix will be randomly selected. Default \code{TRUE}.
359 357
 #' @param numResample Integer. The number of times to resample the counts matrix
360
-#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
358
+#' for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
361 359
 #' Default \code{5}.
362
-#' @param numSubsample Integer. The number of cells to sample from the
363
-#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
364
-#' Default \code{5000}.
365 360
 #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
366 361
 #'  a default value of \code{12345} is used. If \code{NULL}, no calls to
367 362
 #'  \link[withr]{with_seed} are made.
... ...
@@ -376,9 +371,7 @@ setGeneric("resamplePerplexity",
376 371
         useAssay = "counts",
377 372
         altExpName = "featureSubset",
378 373
         doResampling = FALSE,
379
-        doSubsampling = TRUE,
380 374
         numResample = 5,
381
-        numSubsample = 5000,
382 375
         seed = 12345) {
383 376
     standardGeneric("resamplePerplexity")})
384 377
 
... ...
@@ -395,9 +388,7 @@ setMethod("resamplePerplexity",
395 388
         useAssay = "counts",
396 389
         altExpName = "featureSubset",
397 390
         doResampling = FALSE,
398
-        doSubsampling = TRUE,
399 391
         numResample = 5,
400
-        numSubsample = 5000,
401 392
         seed = 12345) {
402 393
 
403 394
         altExp <- SingleCellExperiment::altExp(x, altExpName)
... ...
@@ -409,18 +400,14 @@ setMethod("resamplePerplexity",
409 400
                 counts = counts,
410 401
                 celdaList = celdaList,
411 402
                 doResampling = doResampling,
412
-                doSubsampling = doSubsampling,
413
-                numResample = numResample,
414
-                numSubsample = numSubsample)
403
+                numResample = numResample)
415 404
         } else {
416 405
             with_seed(seed,
417 406
                 res <- .resamplePerplexity(
418 407
                     counts = counts,
419 408
                     celdaList = celdaList,
420 409
                     doResampling = doResampling,
421
-                    doSubsampling = doSubsampling,
422
-                    numResample = numResample,
423
-                    numSubsample = numSubsample))
410
+                    numResample = numResample))
424 411
         }
425 412
 
426 413
         S4Vectors::metadata(altExp)$celda_grid_search <- res
... ...
@@ -444,9 +431,7 @@ setMethod("resamplePerplexity",
444 431
     function(x,
445 432
         celdaList,
446 433
         doResampling = FALSE,
447
-        doSubsampling = TRUE,
448 434
         numResample = 5,
449
-        numSubsample = 5000,
450 435
         seed = 12345) {
451 436
 
452 437
         if (is.null(seed)) {
... ...
@@ -454,18 +439,14 @@ setMethod("resamplePerplexity",
454 439
                 counts = x,
455 440
                 celdaList = celdaList,
456 441
                 doResampling = doResampling,
457
-                doSubsampling = doSubsampling,
458
-                numResample = numResample,
459
-                numSubsample = numSubsample)
442
+                numResample = numResample)
460 443
         } else {
461 444
             with_seed(seed,
462 445
                 res <- .resamplePerplexity(
463 446
                     counts = x,
464 447
                     celdaList = celdaList,
465 448
                     doResampling = doResampling,
466
-                    doSubsampling = doSubsampling,
467
-                    numResample = numResample,
468
-                    numSubsample = numSubsample))
449
+                    numResample = numResample))
469 450
         }
470 451
 
471 452
         return(res)
... ...
@@ -476,9 +457,7 @@ setMethod("resamplePerplexity",
476 457
 .resamplePerplexity <- function(counts,
477 458
     celdaList,
478 459
     doResampling = FALSE,
479
-    doSubsampling = TRUE,
480
-    numResample = 5,
481
-    numSubsample = 5000) {
460
+    numResample = 5) {
482 461
 
483 462
     if (!methods::is(celdaList, "celdaList")) {
484 463
         stop("celdaList parameter was not of class celdaList.")
... ...
@@ -486,31 +465,17 @@ setMethod("resamplePerplexity",
486 465
     if (!isTRUE(is.logical(doResampling))) {
487 466
         stop("The 'doResampling' parameter needs to be logical (TRUE/FALSE).")
488 467
     } 
489
-    if (!isTRUE(is.logical(doSubsampling))) {
490
-        stop("The 'doSubsampling' parameter needs to be logical (TRUE/FALSE).")
491
-    } 
492 468
     if (!isTRUE(doResampling) & (!is.numeric(numResample) || numResample < 1)) {
493 469
         stop("The 'numResample' parameter needs to be an integer greater ",
494 470
         "than 0.")
495 471
     }
496
-    if (!isTRUE(doSubsampling) & (!is.numeric(numSubsample) || numSubsample < 1)) {
497
-        stop("The 'numResample' parameter needs to be an integer between ",
498
-             "1 and the number of cells.")
499
-    }
500 472
 
501
-    if(isTRUE(doSubsampling) & numSubsample < ncol(counts)) {
502
-        ix <- sample(seq(ncol(counts)), size = numSubsample)
503
-        newCounts <- counts[,ix]
504
-        
505
-    } else {
506
-        newCounts <- counts
507
-    }
508 473
     if(isTRUE(doResampling)) {
509 474
         perpRes <- matrix(NA,
510 475
                           nrow = length(resList(celdaList)),
511 476
                           ncol = numResample)
512 477
         for (j in seq(numResample)) {
513
-            newCounts <- .resampleCountMatrix(newCounts)
478
+            newCounts <- .resampleCountMatrix(counts)
514 479
             for (i in seq(length(resList(celdaList)))) {
515 480
                 perpRes[i, j] <- perplexity(x = counts,
516 481
                     celdaMod = resList(celdaList)[[i]],
... ...
@@ -526,7 +491,7 @@ setMethod("resamplePerplexity",
526 491
         for (i in seq(length(resList(celdaList)))) {
527 492
             perpRes[i,1] <- perplexity(x = counts,
528 493
                                        celdaMod = resList(celdaList)[[i]],
529
-                                       newCounts = newCounts)
494
+                                       newCounts = counts)
530 495
         }
531 496
     }    
532 497
    
... ...
@@ -137,14 +137,9 @@
137 137
 #' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
138 138
 #' matrix will be resampled according to a multinomial distribution to introduce
139 139
 #' noise before calculating perplexity. Default \code{FALSE}.
140
-#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
141
-#' the original counts matrix will be randomly selected. Default \code{TRUE}.
142 140
 #' @param numResample Integer. The number of times to resample the counts matrix
143
-#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
141
+#' for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
144 142
 #' Default \code{5}.
145
-#' @param numSubsample Integer. The number of cells to sample from the
146
-#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
147
-#' Default \code{5000}.
148 143
 #' @param verbose Logical. Whether to print log messages. Default TRUE.
149 144
 #' @param logfile Character. Messages will be redirected to a file named
150 145
 #'  "logfile". If NULL, messages will be printed to stdout.  Default NULL.
... ...
@@ -174,9 +169,7 @@ setGeneric("recursiveSplitCell",
174 169
         seed = 12345,
175 170
         perplexity = TRUE,
176 171
         doResampling = FALSE,
177
-        doSubsampling = TRUE,
178 172
         numResample = 5,
179
-        numSubsample = 5000,
180 173
         logfile = NULL,
181 174
         verbose = TRUE) {
182 175
     standardGeneric("recursiveSplitCell")})
... ...
@@ -221,9 +214,7 @@ setMethod("recursiveSplitCell",
221 214
         seed = 12345,
222 215
         perplexity = TRUE,
223 216
         doResampling = FALSE,
224
-        doSubsampling = TRUE,
225 217
         numResample = 5,
226
-        numSubsample = 5000,
227 218
         logfile = NULL,
228 219
         verbose = TRUE) {
229 220
 
... ...
@@ -263,9 +254,7 @@ setMethod("recursiveSplitCell",
263 254
             seed = seed,
264 255
             perplexity = perplexity,
265 256
             doResampling = doResampling,
266
-            doSubsampling = doSubsampling,
267 257
             numResample = numResample,
268
-            numSubsample = numSubsample,
269 258
             logfile = logfile,
270 259
             verbose = verbose)
271 260
 
... ...
@@ -335,9 +324,7 @@ setMethod("recursiveSplitCell",
335 324
         seed = 12345,
336 325
         perplexity = TRUE,
337 326
         doResampling = FALSE,
338
-        doSubsampling = TRUE,
339 327
         numResample = 5,
340
-        numSubsample = 5000,
341 328
         logfile = NULL,
342 329
         verbose = TRUE) {
343 330
 
... ...
@@ -368,9 +355,7 @@ setMethod("recursiveSplitCell",
368 355
             seed = seed,
369 356
             perplexity = perplexity,
370 357
             doResampling = doResampling,
371
-            doSubsampling = doSubsampling,
372 358
             numResample = numResample,
373
-            numSubsample = numSubsample,
374 359
             logfile = logfile,
375 360
             verbose = verbose)
376 361
 
... ...
@@ -415,9 +400,7 @@ setMethod("recursiveSplitCell",
415 400
     seed,
416 401
     perplexity,
417 402
     doResampling,
418
-    doSubsampling,
419 403
     numResample,
420
-    numSubsample,
421 404
     logfile,
422 405
     verbose) {
423 406
 
... ...
@@ -436,9 +419,7 @@ setMethod("recursiveSplitCell",
436 419
             reorder = reorder,
437 420
             perplexity = perplexity,
438 421
             doResampling = doResampling,
439
-            doSubsampling = doSubsampling,
440 422
             numResample = numResample,
441
-            numSubsample = numSubsample,
442 423
             logfile = logfile,
443 424
             verbose = verbose)
444 425
     } else {
... ...
@@ -458,9 +439,7 @@ setMethod("recursiveSplitCell",
458 439
                 reorder = reorder,
459 440
                 perplexity = perplexity,
460 441
                 doResampling = doResampling,
461
-                doSubsampling = doSubsampling,
462 442
                 numResample = numResample,
463
-                numSubsample = numSubsample,
464 443
                 logfile = logfile,
465 444
                 verbose = verbose)
466 445
         )
... ...
@@ -484,9 +463,7 @@ setMethod("recursiveSplitCell",
484 463
                                reorder,
485 464
                                perplexity,
486 465
                                doResampling,
487
-                               doSubsampling,
488 466
                                numResample,
489
-                               numSubsample,
490 467
                                logfile,
491 468
                                verbose) {
492 469
 
... ...
@@ -890,9 +867,7 @@ setMethod("recursiveSplitCell",
890 867
     )
891 868
     celdaRes <- resamplePerplexity(counts, celdaRes,
892 869
                                    doResampling = doResampling,
893
-                                   doSubsampling = doSubsampling,
894
-                                   numResample = numResample,
895
-                                   numSubsample = numSubsample)
870
+                                   numResample = numResample)
896 871
   }
897 872
   endTime <- Sys.time()
898 873
   .logMessages(
... ...
@@ -973,14 +948,9 @@ setMethod("recursiveSplitCell",
973 948
 #' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts
974 949
 #' matrix will be resampled according to a multinomial distribution to introduce
975 950
 #' noise before calculating perplexity. Default \code{FALSE}.
976
-#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from
977
-#' the original counts matrix will be randomly selected. Default \code{TRUE}.
978 951
 #' @param numResample Integer. The number of times to resample the counts matrix
979
-#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
952
+#' for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
980 953
 #' Default \code{5}.
981
-#' @param numSubsample Integer. The number of cells to sample from the
982
-#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
983
-#' Default \code{5000}.
984 954
 #' @param verbose Logical. Whether to print log messages. Default TRUE.
985 955
 #' @param logfile Character. Messages will be redirected to a file named
986 956
 #'  "logfile". If NULL, messages will be printed to stdout.  Default NULL.
... ...
@@ -1010,9 +980,7 @@ setGeneric("recursiveSplitModule",
1010 980
         seed = 12345,
1011 981
         perplexity = TRUE,
1012 982
         doResampling = FALSE,
1013
-        doSubsampling = TRUE,
1014 983
         numResample = 5,
1015
-        numSubsample = 5000,
1016 984
         verbose = TRUE,
1017 985
         logfile = NULL) {
1018 986
     standardGeneric("recursiveSplitModule")})
... ...
@@ -1050,9 +1018,7 @@ setMethod("recursiveSplitModule",
1050 1018
         seed = 12345,
1051 1019
         perplexity = TRUE,
1052 1020
         doResampling = FALSE,
1053
-        doSubsampling = TRUE,
1054 1021
         numResample = 5,
1055
-        numSubsample = 5000,
1056 1022
         verbose = TRUE,
1057 1023
         logfile = NULL) {
1058 1024
 
... ...
@@ -1092,9 +1058,7 @@ setMethod("recursiveSplitModule",
1092 1058
             seed = seed,
1093 1059
             perplexity = perplexity,
1094 1060
             doResampling = doResampling,
1095
-            doSubsampling = doSubsampling,
1096 1061
             numResample = numResample,
1097
-            numSubsample = numSubsample,
1098 1062
             verbose = verbose,
1099 1063
             logfile = logfile)
1100 1064
 
... ...
@@ -1157,9 +1121,7 @@ setMethod("recursiveSplitModule",
1157 1121
         seed = 12345,
1158 1122
         perplexity = TRUE,
1159 1123
         doResampling = FALSE,
1160
-        doSubsampling = TRUE,
1161 1124
         numResample = 5,
1162
-        numSubsample = 5000,
1163 1125
         verbose = TRUE,
1164 1126
         logfile = NULL) {
1165 1127
 
... ...
@@ -1190,9 +1152,7 @@ setMethod("recursiveSplitModule",
1190 1152
             seed = seed,
1191 1153
             perplexity = perplexity,
1192 1154
             doResampling = doResampling,
1193
-            doSubsampling = doSubsampling,
1194 1155
             numResample = numResample,
1195
-            numSubsample = numSubsample,
1196 1156
             verbose = verbose,
1197 1157
             logfile = logfile)
1198 1158
 
... ...
@@ -1237,9 +1197,7 @@ setMethod("recursiveSplitModule",
1237 1197
     seed,
1238 1198
     perplexity,
1239 1199
     doResampling,
1240
-    doSubsampling,
1241 1200
     numResample,
1242
-    numSubsample,
1243 1201
     verbose,
1244 1202
     logfile) {
1245 1203
 
... ...
@@ -1261,9 +1219,7 @@ setMethod("recursiveSplitModule",
1261 1219
             verbose = verbose,
1262 1220
             logfile = logfile,
1263 1221
             doResampling = doResampling,
1264
-            doSubsampling = doSubsampling,
1265
-            numResample = numResample,
1266
-            numSubsample = numSubsample)
1222
+            numResample = numResample)
1267 1223
     } else {
1268 1224
         with_seed(seed,
1269 1225
             celdaList <- .recursiveSplitModule(
... ...
@@ -1283,9 +1239,7 @@ setMethod("recursiveSplitModule",
1283 1239
                 verbose = verbose,
1284 1240
                 logfile = logfile,
1285 1241
                 doResampling = doResampling,
1286
-                doSubsampling = doSubsampling,
1287
-                numResample = numResample,
1288
-                numSubsample = numSubsample)
1242
+                numResample = numResample)
1289 1243
         )
1290 1244
     }
1291 1245
 
... ...
@@ -1309,9 +1263,7 @@ setMethod("recursiveSplitModule",
1309 1263
                                  verbose = TRUE,
1310 1264
                                  logfile = NULL,
1311 1265
                                  doResampling = FALSE,
1312
-                                 doSubsampling = TRUE,
1313
-                                 numResample = 5,
1314
-                                 numSubsample = 5000) {
1266
+                                 numResample = 5) {
1315 1267
 
1316 1268
   .logMessages(paste(rep("=", 50), collapse = ""),
1317 1269
     logfile = logfile,
... ...
@@ -1673,9 +1625,7 @@ setMethod("recursiveSplitModule",
1673 1625
     )
1674 1626
     celdaRes <- resamplePerplexity(counts, celdaRes,
1675 1627
                                    doResampling = doResampling,
1676
-                                   doSubsampling = doSubsampling,
1677
-                                   numResample = numResample,
1678
-                                   numSubsample = numSubsample)
1628
+                                   numResample = numResample)
1679 1629
   }
1680 1630
 
1681 1631
   endTime <- Sys.time()
... ...
@@ -24,9 +24,7 @@ recursiveSplitCell(
24 24
   seed = 12345,
25 25
   perplexity = TRUE,
26 26
   doResampling = FALSE,
27
-  doSubsampling = TRUE,
28 27
   numResample = 5,
29
-  numSubsample = 5000,
30 28
   logfile = NULL,
31 29
   verbose = TRUE
32 30
 )
... ...
@@ -49,9 +47,7 @@ recursiveSplitCell(
49 47
   seed = 12345,
50 48
   perplexity = TRUE,
51 49
   doResampling = FALSE,
52
-  doSubsampling = TRUE,
53 50
   numResample = 5,
54
-  numSubsample = 5000,
55 51
   logfile = NULL,
56 52
   verbose = TRUE
57 53
 )
... ...
@@ -74,9 +70,7 @@ recursiveSplitCell(
74 70
   seed = 12345,
75 71
   perplexity = TRUE,
76 72
   doResampling = FALSE,
77
-  doSubsampling = TRUE,
78 73
   numResample = 5,
79
-  numSubsample = 5000,
80 74
   logfile = NULL,
81 75
   verbose = TRUE
82 76
 )
... ...
@@ -151,17 +145,10 @@ If FALSE, then perplexity can be calculated later with
151 145
 matrix will be resampled according to a multinomial distribution to introduce
152 146
 noise before calculating perplexity. Default \code{FALSE}.}
153 147
 
154
-\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
155
-the original counts matrix will be randomly selected. Default \code{TRUE}.}
156
-
157 148
 \item{numResample}{Integer. The number of times to resample the counts matrix
158
-for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
149
+for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
159 150
 Default \code{5}.}
160 151
 
161
-\item{numSubsample}{Integer. The number of cells to sample from the
162
-the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
163
-Default \code{5000}.}
164
-
165 152
 \item{logfile}{Character. Messages will be redirected to a file named
166 153
 "logfile". If NULL, messages will be printed to stdout.  Default NULL.}
167 154
 
... ...
@@ -24,9 +24,7 @@ recursiveSplitModule(
24 24
   seed = 12345,
25 25
   perplexity = TRUE,
26 26
   doResampling = FALSE,
27
-  doSubsampling = TRUE,
28 27
   numResample = 5,
29
-  numSubsample = 5000,
30 28
   verbose = TRUE,
31 29
   logfile = NULL
32 30
 )
... ...
@@ -49,9 +47,7 @@ recursiveSplitModule(
49 47
   seed = 12345,
50 48
   perplexity = TRUE,
51 49
   doResampling = FALSE,
52
-  doSubsampling = TRUE,
53 50
   numResample = 5,
54
-  numSubsample = 5000,
55 51
   verbose = TRUE,
56 52
   logfile = NULL
57 53
 )
... ...
@@ -74,9 +70,7 @@ recursiveSplitModule(
74 70
   seed = 12345,
75 71
   perplexity = TRUE,
76 72
   doResampling = FALSE,
77
-  doSubsampling = TRUE,
78 73
   numResample = 5,
79
-  numSubsample = 5000,
80 74
   verbose = TRUE,
81 75
   logfile = NULL
82 76
 )
... ...
@@ -146,17 +140,10 @@ If FALSE, then perplexity can be calculated later with
146 140
 matrix will be resampled according to a multinomial distribution to introduce
147 141
 noise before calculating perplexity. Default \code{FALSE}.}
148 142
 
149
-\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
150
-the original counts matrix will be randomly selected. Default \code{TRUE}.}
151
-
152 143
 \item{numResample}{Integer. The number of times to resample the counts matrix
153
-for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
144
+for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
154 145
 Default \code{5}.}
155 146
 
156
-\item{numSubsample}{Integer. The number of cells to sample from the
157
-the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
158
-Default \code{5000}.}
159
-
160 147
 \item{verbose}{Logical. Whether to print log messages. Default TRUE.}
161 148
 
162 149
 \item{logfile}{Character. Messages will be redirected to a file named
... ...
@@ -12,9 +12,7 @@ resamplePerplexity(
12 12
   useAssay = "counts",
13 13
   altExpName = "featureSubset",
14 14
   doResampling = FALSE,
15
-  doSubsampling = TRUE,
16 15
   numResample = 5,
17
-  numSubsample = 5000,
18 16
   seed = 12345
19 17
 )
20 18
 
... ...
@@ -23,9 +21,7 @@ resamplePerplexity(
23 21
   useAssay = "counts",
24 22
   altExpName = "featureSubset",
25 23
   doResampling = FALSE,
26
-  doSubsampling = TRUE,
27 24
   numResample = 5,
28
-  numSubsample = 5000,
29 25
   seed = 12345
30 26
 )
31 27
 
... ...
@@ -33,9 +29,7 @@ resamplePerplexity(
33 29
   x,
34 30
   celdaList,
35 31
   doResampling = FALSE,
36
-  doSubsampling = TRUE,
37 32
   numResample = 5,
38
-  numSubsample = 5000,
39 33
   seed = 12345
40 34
 )
41 35
 }
... ...
@@ -61,17 +55,10 @@ to use. Default "featureSubset".}
61 55
 matrix will be resampled according to a multinomial distribution to introduce
62 56
 noise before calculating perplexity. Default \code{FALSE}.}
63 57
 
64
-\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from
65
-the original counts matrix will be randomly selected. Default \code{TRUE}.}
66
-
67 58
 \item{numResample}{Integer. The number of times to resample the counts matrix
68
-for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}.
59
+for evaluating perplexity if \code{doResampling} is set to \code{TRUE}.
69 60
 Default \code{5}.}
70 61
 
71
-\item{numSubsample}{Integer. The number of cells to sample from the
72
-the counts matrix if \code{doSubsampling} is set to \code{TRUE}. 
73
-Default \code{5000}.}
74
-
75 62
 \item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
76 63
 a default value of \code{12345} is used. If \code{NULL}, no calls to
77 64
 \link[withr]{with_seed} are made.}