#### Added subsampling parameters for perplexity calculation to recursive split functions

Joshua D. Campbell authored on 28/03/2022 12:00:25
Showing 12 changed files

 ... ... @@ -125,6 +125,7 @@ import(grDevices) 125 125  import(graphics) 126 126  import(grid) 127 127  import(uwot) 128 +importClassesFrom(Matrix,dgCMatrix) 128 129  importClassesFrom(SingleCellExperiment,SingleCellExperiment) 129 130  importFrom(MCMCprecision,fit_dirichlet) 130 131  importFrom(Matrix,colSums)
 ... ... @@ -353,7 +353,7 @@ setMethod( 353 353  #' matrix object. 354 354  #' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts 355 355  #' matrix will be resampled according to a multinomial distribution to introduce 356 -#' noise before caculating perplexity. Default \code{FALSE}. 356 +#' noise before calculating perplexity. Default \code{FALSE}. 357 357  #' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from 358 358  #' the original counts matrix will be randomly selected. Default \code{TRUE}. 359 359  #' @param numResample Integer. The number of times to resample the counts matrix
 ... ... @@ -134,6 +134,17 @@ 134 134  #' @param perplexity Logical. Whether to calculate perplexity for each model. 135 135  #' If FALSE, then perplexity can be calculated later with 136 136  #' \link{resamplePerplexity}. Default TRUE. 137 +#' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts 138 +#' matrix will be resampled according to a multinomial distribution to introduce 139 +#' noise before calculating perplexity. Default \code{FALSE}. 140 +#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from 141 +#' the original counts matrix will be randomly selected. Default \code{TRUE}. 142 +#' @param numResample Integer. The number of times to resample the counts matrix 143 +#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}. 144 +#' Default \code{5}. 145 +#' @param numSubsample Integer. The number of cells to sample from the 146 +#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}.  147 +#' Default \code{5000}. 137 148  #' @param verbose Logical. Whether to print log messages. Default TRUE. 138 149  #' @param logfile Character. Messages will be redirected to a file named 139 150  #' "logfile". If NULL, messages will be printed to stdout. Default NULL. ... ... @@ -162,6 +173,10 @@ setGeneric("recursiveSplitCell", 162 173  reorder = TRUE, 163 174  seed = 12345, 164 175  perplexity = TRUE, 176 + doResampling = FALSE, 177 + doSubsampling = TRUE, 178 + numResample = 5, 179 + numSubsample = 5000, 165 180  logfile = NULL, 166 181  verbose = TRUE) { 167 182  standardGeneric("recursiveSplitCell")}) ... ... @@ -205,6 +220,10 @@ setMethod("recursiveSplitCell", 205 220  reorder = TRUE, 206 221  seed = 12345, 207 222  perplexity = TRUE, 223 + doResampling = FALSE, 224 + doSubsampling = TRUE, 225 + numResample = 5, 226 + numSubsample = 5000, 208 227  logfile = NULL, 209 228  verbose = TRUE) { 210 229   ... ... @@ -243,6 +262,10 @@ setMethod("recursiveSplitCell", 243 262  reorder = reorder, 244 263  seed = seed, 245 264  perplexity = perplexity, 265 + doResampling = doResampling, 266 + doSubsampling = doSubsampling, 267 + numResample = numResample, 268 + numSubsample = numSubsample, 246 269  logfile = logfile, 247 270  verbose = verbose) 248 271   ... ... @@ -311,6 +334,10 @@ setMethod("recursiveSplitCell", 311 334  reorder = TRUE, 312 335  seed = 12345, 313 336  perplexity = TRUE, 337 + doResampling = FALSE, 338 + doSubsampling = TRUE, 339 + numResample = 5, 340 + numSubsample = 5000, 314 341  logfile = NULL, 315 342  verbose = TRUE) { 316 343   ... ... @@ -340,6 +367,10 @@ setMethod("recursiveSplitCell", 340 367  reorder = reorder, 341 368  seed = seed, 342 369  perplexity = perplexity, 370 + doResampling = doResampling, 371 + doSubsampling = doSubsampling, 372 + numResample = numResample, 373 + numSubsample = numSubsample, 343 374  logfile = logfile, 344 375  verbose = verbose) 345 376   ... ... @@ -383,6 +414,10 @@ setMethod("recursiveSplitCell", 383 414  reorder, 384 415  seed, 385 416  perplexity, 417 + doResampling, 418 + doSubsampling, 419 + numResample, 420 + numSubsample, 386 421  logfile, 387 422  verbose) { 388 423   ... ... @@ -400,6 +435,10 @@ setMethod("recursiveSplitCell", 400 435  minCell = minCell, 401 436  reorder = reorder, 402 437  perplexity = perplexity, 438 + doResampling = doResampling, 439 + doSubsampling = doSubsampling, 440 + numResample = numResample, 441 + numSubsample = numSubsample, 403 442  logfile = logfile, 404 443  verbose = verbose) 405 444  } else { ... ... @@ -418,6 +457,10 @@ setMethod("recursiveSplitCell", 418 457  minCell = minCell, 419 458  reorder = reorder, 420 459  perplexity = perplexity, 460 + doResampling = doResampling, 461 + doSubsampling = doSubsampling, 462 + numResample = numResample, 463 + numSubsample = numSubsample, 421 464  logfile = logfile, 422 465  verbose = verbose) 423 466  ) ... ... @@ -440,6 +483,10 @@ setMethod("recursiveSplitCell", 440 483  minCell, 441 484  reorder, 442 485  perplexity, 486 + doResampling, 487 + doSubsampling, 488 + numResample, 489 + numSubsample, 443 490  logfile, 444 491  verbose) { 445 492   ... ... @@ -841,7 +888,11 @@ setMethod("recursiveSplitCell", 841 888  verbose = verbose, 842 889  logfile = NULL 843 890  ) 844 - celdaRes <- resamplePerplexity(counts, celdaRes) 891 + celdaRes <- resamplePerplexity(counts, celdaRes, 892 + doResampling = doResampling, 893 + doSubsampling = doSubsampling, 894 + numResample = numResample, 895 + numSubsample = numSubsample) 845 896  } 846 897  endTime <- Sys.time() 847 898  .logMessages( ... ... @@ -918,7 +969,18 @@ setMethod("recursiveSplitCell", 918 969  #' \link[withr]{with_seed} are made. 919 970  #' @param perplexity Logical. Whether to calculate perplexity for each model. 920 971  #' If FALSE, then perplexity can be calculated later with 921 -#' \link{resamplePerplexity}. Default TRUE. 972 +#' \link{resamplePerplexity}. Default \code{TRUE}. 973 +#' @param doResampling Boolean. If \code{TRUE}, then each cell in the counts 974 +#' matrix will be resampled according to a multinomial distribution to introduce 975 +#' noise before calculating perplexity. Default \code{FALSE}. 976 +#' @param doSubsampling Boolean. If \code{TRUE}, then a subset of cells from 977 +#' the original counts matrix will be randomly selected. Default \code{TRUE}. 978 +#' @param numResample Integer. The number of times to resample the counts matrix 979 +#' for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}. 980 +#' Default \code{5}. 981 +#' @param numSubsample Integer. The number of cells to sample from the 982 +#' the counts matrix if \code{doSubsampling} is set to \code{TRUE}.  983 +#' Default \code{5000}. 922 984  #' @param verbose Logical. Whether to print log messages. Default TRUE. 923 985  #' @param logfile Character. Messages will be redirected to a file named 924 986  #' "logfile". If NULL, messages will be printed to stdout. Default NULL. ... ... @@ -947,6 +1009,10 @@ setGeneric("recursiveSplitModule", 947 1009  reorder = TRUE, 948 1010  seed = 12345, 949 1011  perplexity = TRUE, 1012 + doResampling = FALSE, 1013 + doSubsampling = TRUE, 1014 + numResample = 5, 1015 + numSubsample = 5000, 950 1016  verbose = TRUE, 951 1017  logfile = NULL) { 952 1018  standardGeneric("recursiveSplitModule")}) ... ... @@ -983,6 +1049,10 @@ setMethod("recursiveSplitModule", 983 1049  reorder = TRUE, 984 1050  seed = 12345, 985 1051  perplexity = TRUE, 1052 + doResampling = FALSE, 1053 + doSubsampling = TRUE, 1054 + numResample = 5, 1055 + numSubsample = 5000, 986 1056  verbose = TRUE, 987 1057  logfile = NULL) { 988 1058   ... ... @@ -1021,6 +1091,10 @@ setMethod("recursiveSplitModule", 1021 1091  reorder = reorder, 1022 1092  seed = seed, 1023 1093  perplexity = perplexity, 1094 + doResampling = doResampling, 1095 + doSubsampling = doSubsampling, 1096 + numResample = numResample, 1097 + numSubsample = numSubsample, 1024 1098  verbose = verbose, 1025 1099  logfile = logfile) 1026 1100   ... ... @@ -1082,6 +1156,10 @@ setMethod("recursiveSplitModule", 1082 1156  reorder = TRUE, 1083 1157  seed = 12345, 1084 1158  perplexity = TRUE, 1159 + doResampling = FALSE, 1160 + doSubsampling = TRUE, 1161 + numResample = 5, 1162 + numSubsample = 5000, 1085 1163  verbose = TRUE, 1086 1164  logfile = NULL) { 1087 1165   ... ... @@ -1111,6 +1189,10 @@ setMethod("recursiveSplitModule", 1111 1189  reorder = reorder, 1112 1190  seed = seed, 1113 1191  perplexity = perplexity, 1192 + doResampling = doResampling, 1193 + doSubsampling = doSubsampling, 1194 + numResample = numResample, 1195 + numSubsample = numSubsample, 1114 1196  verbose = verbose, 1115 1197  logfile = logfile) 1116 1198   ... ... @@ -1154,6 +1236,10 @@ setMethod("recursiveSplitModule", 1154 1236  reorder, 1155 1237  seed, 1156 1238  perplexity, 1239 + doResampling, 1240 + doSubsampling, 1241 + numResample, 1242 + numSubsample, 1157 1243  verbose, 1158 1244  logfile) { 1159 1245   ... ... @@ -1173,7 +1259,11 @@ setMethod("recursiveSplitModule", 1173 1259  reorder = reorder, 1174 1260  perplexity = perplexity, 1175 1261  verbose = verbose, 1176 - logfile = logfile) 1262 + logfile = logfile, 1263 + doResampling = doResampling, 1264 + doSubsampling = doSubsampling, 1265 + numResample = numResample, 1266 + numSubsample = numSubsample) 1177 1267  } else { 1178 1268  with_seed(seed, 1179 1269  celdaList <- .recursiveSplitModule( ... ... @@ -1191,7 +1281,11 @@ setMethod("recursiveSplitModule", 1191 1281  reorder = reorder, 1192 1282  perplexity = perplexity, 1193 1283  verbose = verbose, 1194 - logfile = logfile) 1284 + logfile = logfile, 1285 + doResampling = doResampling, 1286 + doSubsampling = doSubsampling, 1287 + numResample = numResample, 1288 + numSubsample = numSubsample) 1195 1289  ) 1196 1290  } 1197 1291   ... ... @@ -1213,7 +1307,11 @@ setMethod("recursiveSplitModule", 1213 1307  reorder = TRUE, 1214 1308  perplexity = TRUE, 1215 1309  verbose = TRUE, 1216 - logfile = NULL) { 1310 + logfile = NULL, 1311 + doResampling = FALSE, 1312 + doSubsampling = TRUE, 1313 + numResample = 5, 1314 + numSubsample = 5000) { 1217 1315   1218 1316  .logMessages(paste(rep("=", 50), collapse = ""), 1219 1317  logfile = logfile, ... ... @@ -1573,7 +1671,11 @@ setMethod("recursiveSplitModule", 1573 1671  verbose = verbose, 1574 1672  logfile = NULL 1575 1673  ) 1576 - celdaRes <- resamplePerplexity(counts, celdaRes) 1674 + celdaRes <- resamplePerplexity(counts, celdaRes, 1675 + doResampling = doResampling, 1676 + doSubsampling = doSubsampling, 1677 + numResample = numResample, 1678 + numSubsample = numSubsample) 1577 1679  } 1578 1680   1579 1681  endTime <- Sys.time()
 1580 1682 new file mode 100644 ... ... @@ -0,0 +1,19 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{eigenMatMultInt} 4 +\alias{eigenMatMultInt} 5 +\title{Fast matrix multiplication for double x int} 6 +\usage{ 7 +eigenMatMultInt(A, B) 8 +} 9 +\arguments{ 10 +\item{A}{a double matrix} 11 + 12 +\item{B}{an integer matrix} 13 +} 14 +\value{ 15 +An integer matrix representing the product of A and B 16 +} 17 +\description{ 18 +Fast matrix multiplication for double x int 19 +}
 0 20 new file mode 100644 ... ... @@ -0,0 +1,19 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{eigenMatMultNumeric} 4 +\alias{eigenMatMultNumeric} 5 +\title{Fast matrix multiplication for double x double} 6 +\usage{ 7 +eigenMatMultNumeric(A, B) 8 +} 9 +\arguments{ 10 +\item{A}{a double matrix} 11 + 12 +\item{B}{an integer matrix} 13 +} 14 +\value{ 15 +An integer matrix representing the product of A and B 16 +} 17 +\description{ 18 +Fast matrix multiplication for double x double 19 +}
 0 20 new file mode 100644 ... ... @@ -0,0 +1,19 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{fastNormProp} 4 +\alias{fastNormProp} 5 +\title{Fast normalization for numeric matrix} 6 +\usage{ 7 +fastNormProp(R_counts, R_alpha) 8 +} 9 +\arguments{ 10 +\item{R_counts}{An integer matrix} 11 + 12 +\item{R_alpha}{A double value to be added to the matrix as a pseudocount} 13 +} 14 +\value{ 15 +A numeric matrix where the columns have been normalized to proportions 16 +} 17 +\description{ 18 +Fast normalization for numeric matrix 19 +}
 0 20 new file mode 100644 ... ... @@ -0,0 +1,19 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{fastNormPropLog} 4 +\alias{fastNormPropLog} 5 +\title{Fast normalization for numeric matrix} 6 +\usage{ 7 +fastNormPropLog(R_counts, R_alpha) 8 +} 9 +\arguments{ 10 +\item{R_counts}{An integer matrix} 11 + 12 +\item{R_alpha}{A double value to be added to the matrix as a pseudocount} 13 +} 14 +\value{ 15 +A numeric matrix where the columns have been normalized to proportions 16 +} 17 +\description{ 18 +Fast normalization for numeric matrix 19 +}
 0 20 new file mode 100644 ... ... @@ -0,0 +1,19 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{fastNormPropSqrt} 4 +\alias{fastNormPropSqrt} 5 +\title{Fast normalization for numeric matrix} 6 +\usage{ 7 +fastNormPropSqrt(R_counts, R_alpha) 8 +} 9 +\arguments{ 10 +\item{R_counts}{An integer matrix} 11 + 12 +\item{R_alpha}{A double value to be added to the matrix as a pseudocount} 13 +} 14 +\value{ 15 +A numeric matrix where the columns have been normalized to proportions 16 +} 17 +\description{ 18 +Fast normalization for numeric matrix 19 +}
 0 20 new file mode 100644 ... ... @@ -0,0 +1,17 @@ 1 +% Generated by roxygen2: do not edit by hand 2 +% Please edit documentation in R/RcppExports.R 3 +\name{nonzero} 4 +\alias{nonzero} 5 +\title{get row and column indices of none zero elements in the matrix} 6 +\usage{ 7 +nonzero(R_counts) 8 +} 9 +\arguments{ 10 +\item{R_counts}{A matrix} 11 +} 12 +\value{ 13 +An integer matrix where each row is a row, column indices pair 14 +} 15 +\description{ 16 +get row and column indices of none zero elements in the matrix 17 +}
 ... ... @@ -23,6 +23,10 @@ recursiveSplitCell( 23 23  reorder = TRUE, 24 24  seed = 12345, 25 25  perplexity = TRUE, 26 + doResampling = FALSE, 27 + doSubsampling = TRUE, 28 + numResample = 5, 29 + numSubsample = 5000, 26 30  logfile = NULL, 27 31  verbose = TRUE 28 32  ) ... ... @@ -44,6 +48,10 @@ recursiveSplitCell( 44 48  reorder = TRUE, 45 49  seed = 12345, 46 50  perplexity = TRUE, 51 + doResampling = FALSE, 52 + doSubsampling = TRUE, 53 + numResample = 5, 54 + numSubsample = 5000, 47 55  logfile = NULL, 48 56  verbose = TRUE 49 57  ) ... ... @@ -65,6 +73,10 @@ recursiveSplitCell( 65 73  reorder = TRUE, 66 74  seed = 12345, 67 75  perplexity = TRUE, 76 + doResampling = FALSE, 77 + doSubsampling = TRUE, 78 + numResample = 5, 79 + numSubsample = 5000, 68 80  logfile = NULL, 69 81  verbose = TRUE 70 82  ) ... ... @@ -135,6 +147,21 @@ a default value of 12345 is used. If NULL, no calls to 135 147  If FALSE, then perplexity can be calculated later with 136 148  \link{resamplePerplexity}. Default TRUE.} 137 149   150 +\item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts 151 +matrix will be resampled according to a multinomial distribution to introduce 152 +noise before calculating perplexity. Default \code{FALSE}.} 153 + 154 +\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from 155 +the original counts matrix will be randomly selected. Default \code{TRUE}.} 156 + 157 +\item{numResample}{Integer. The number of times to resample the counts matrix 158 +for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}. 159 +Default \code{5}.} 160 + 161 +\item{numSubsample}{Integer. The number of cells to sample from the 162 +the counts matrix if \code{doSubsampling} is set to \code{TRUE}.  163 +Default \code{5000}.} 164 + 138 165  \item{logfile}{Character. Messages will be redirected to a file named 139 166  "logfile". If NULL, messages will be printed to stdout. Default NULL.} 140 167 
 ... ... @@ -23,6 +23,10 @@ recursiveSplitModule( 23 23  reorder = TRUE, 24 24  seed = 12345, 25 25  perplexity = TRUE, 26 + doResampling = FALSE, 27 + doSubsampling = TRUE, 28 + numResample = 5, 29 + numSubsample = 5000, 26 30  verbose = TRUE, 27 31  logfile = NULL 28 32  ) ... ... @@ -44,6 +48,10 @@ recursiveSplitModule( 44 48  reorder = TRUE, 45 49  seed = 12345, 46 50  perplexity = TRUE, 51 + doResampling = FALSE, 52 + doSubsampling = TRUE, 53 + numResample = 5, 54 + numSubsample = 5000, 47 55  verbose = TRUE, 48 56  logfile = NULL 49 57  ) ... ... @@ -65,6 +73,10 @@ recursiveSplitModule( 65 73  reorder = TRUE, 66 74  seed = 12345, 67 75  perplexity = TRUE, 76 + doResampling = FALSE, 77 + doSubsampling = TRUE, 78 + numResample = 5, 79 + numSubsample = 5000, 68 80  verbose = TRUE, 69 81  logfile = NULL 70 82  ) ... ... @@ -128,7 +140,22 @@ a default value of 12345 is used. If NULL, no calls to 128 140   129 141  \item{perplexity}{Logical. Whether to calculate perplexity for each model. 130 142  If FALSE, then perplexity can be calculated later with 131 -\link{resamplePerplexity}. Default TRUE.} 143 +\link{resamplePerplexity}. Default \code{TRUE}.} 144 + 145 +\item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts 146 +matrix will be resampled according to a multinomial distribution to introduce 147 +noise before calculating perplexity. Default \code{FALSE}.} 148 + 149 +\item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from 150 +the original counts matrix will be randomly selected. Default \code{TRUE}.} 151 + 152 +\item{numResample}{Integer. The number of times to resample the counts matrix 153 +for evaluating perplexity if \code{doSubsampling} is set to \code{TRUE}. 154 +Default \code{5}.} 155 + 156 +\item{numSubsample}{Integer. The number of cells to sample from the 157 +the counts matrix if \code{doSubsampling} is set to \code{TRUE}.  158 +Default \code{5000}.} 132 159   133 160  \item{verbose}{Logical. Whether to print log messages. Default TRUE.} 134 161 
 ... ... @@ -59,7 +59,7 @@ to use. Default "featureSubset".} 59 59   60 60  \item{doResampling}{Boolean. If \code{TRUE}, then each cell in the counts 61 61  matrix will be resampled according to a multinomial distribution to introduce 62 -noise before caculating perplexity. Default \code{FALSE}.} 62 +noise before calculating perplexity. Default \code{FALSE}.} 63 63   64 64  \item{doSubsampling}{Boolean. If \code{TRUE}, then a subset of cells from 65 65  the original counts matrix will be randomly selected. Default \code{TRUE}.}