Browse code

Updated UMAP and tSNE functions. Enabled PCA on UMAP with celda_C results. Switched to 'uwot' package for UMAP generation

Joshua D. Campbell authored on 23/08/2019 02:37:35
Showing 11 changed files

... ...
@@ -93,6 +93,7 @@ import(grid)
93 93
 import(gridExtra, except = c(combine))
94 94
 import(magrittr)
95 95
 import(stats, except = c(start, end))
96
+import(uwot)
96 97
 importFrom(MAST,FromMatrix)
97 98
 importFrom(MAST,summary)
98 99
 importFrom(MAST,zlm)
... ...
@@ -140,8 +141,6 @@ importFrom(scales,brewer_pal)
140 141
 importFrom(scales,dscale)
141 142
 importFrom(scales,hue_pal)
142 143
 importFrom(stringi,stri_list2matrix)
143
-importFrom(umap,umap)
144
-importFrom(umap,umap.defaults)
145 144
 importFrom(withr,with_seed)
146 145
 useDynLib(celda,"_colSumByGroup")
147 146
 useDynLib(celda,"_colSumByGroupChange")
... ...
@@ -596,33 +596,26 @@ setGeneric("celdaTsne",
596 596
 #'  requires more memory. Default 25000.
597 597
 #' @param minClusterSize Integer. Do not subsample cell clusters below this
598 598
 #'  threshold. Default 100.
599
-#' @param initialDims Integer. PCA will be used to reduce the dimentionality
600
-#'  of the dataset. The top 'initialDims' principal components will be used
601
-#'  for umap. Default 20.
602 599
 #' @param modules Integer vector. Determines which features modules to use for
603 600
 #'  tSNE. If NULL, all modules will be used. Default NULL.
604 601
 #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
605 602
 #'  a default value of 12345 is used. If NULL, no calls to
606 603
 #'  \link[withr]{with_seed} are made.
607
-#' @param umapConfig An object of class "umapConfig" specifying parameters to
608 604
 #'  the UMAP algorithm.
609
-#' @return Numeric Matrix of dimension `ncol(counts)` x 2, colums representing
610
-#'  the "X" and "Y" coordinates in the data's t-SNE represetation.
611
-#' @examples
605
+#' @param ... Additional parameters to `uwot::umap`
606
+#' @return A two column matrix of UMAP coordinates#' @examples
612 607
 #' data(celdaCGSim, celdaCGMod)
613
-#' tsneRes <- celdaUmap(celdaCGSim$counts, celdaCGMod)
614
-#' @importFrom umap umap.defaults
608
+#' umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod)
615 609
 #' @export
616 610
 setGeneric("celdaUmap",
617 611
     signature = "celdaMod",
618 612
     function(counts,
619 613
         celdaMod,
620
-        maxCells = 25000,
614
+        maxCells = NULL,
621 615
         minClusterSize = 100,
622
-        initialDims = 20,
623 616
         modules = NULL,
624 617
         seed = 12345,
625
-        umapConfig = umap::umap.defaults) {
618
+	    ...) {
626 619
         standardGeneric("celdaUmap")
627 620
     })
628 621
 
... ...
@@ -1042,8 +1042,6 @@ setMethod("celdaHeatmap", signature(celdaMod = "celda_C"),
1042 1042
 #' @param initialDims Integer. PCA will be used to reduce the dimentionality
1043 1043
 #'  of the dataset. The top 'initialDims' principal components will be used
1044 1044
 #'  for tSNE. Default 20.
1045
-#' @param modules Integer vector. Determines which features modules to use for
1046
-#'  tSNE. If NULL, all modules will be used. Default NULL.
1047 1045
 #' @param perplexity Numeric. Perplexity parameter for tSNE. Default 20.
1048 1046
 #' @param maxIter Integer. Maximum number of iterations in tSNE generation.
1049 1047
 #'  Default 2500.
... ...
@@ -1063,7 +1061,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1063 1061
         maxCells = NULL,
1064 1062
         minClusterSize = 100,
1065 1063
         initialDims = 20,
1066
-        modules = NULL,
1067 1064
         perplexity = 20,
1068 1065
         maxIter = 2500,
1069 1066
         seed = 12345) {
... ...
@@ -1074,7 +1071,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1074 1071
                 maxCells = maxCells,
1075 1072
                 minClusterSize = minClusterSize,
1076 1073
                 initialDims = initialDims,
1077
-                modules = modules,
1078 1074
                 perplexity = perplexity,
1079 1075
                 maxIter = maxIter)
1080 1076
         } else {
... ...
@@ -1084,7 +1080,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1084 1080
                     maxCells = maxCells,
1085 1081
                     minClusterSize = minClusterSize,
1086 1082
                     initialDims = initialDims,
1087
-                    modules = modules,
1088 1083
                     perplexity = perplexity,
1089 1084
                     maxIter = maxIter))
1090 1085
         }
... ...
@@ -1098,15 +1093,13 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1098 1093
     maxCells = NULL,
1099 1094
     minClusterSize = 100,
1100 1095
     initialDims = 20,
1101
-    modules = NULL,
1102 1096
     perplexity = 20,
1103 1097
     maxIter = 2500) {
1104 1098
 
1105 1099
     preparedCountInfo <- .prepareCountsForDimReductionCeldaC(counts,
1106 1100
         celdaMod,
1107 1101
         maxCells,
1108
-        minClusterSize,
1109
-        modules)
1102
+        minClusterSize)
1110 1103
 
1111 1104
     res <- .calculateTsne(preparedCountInfo$norm,
1112 1105
         perplexity = perplexity,
... ...
@@ -1117,7 +1110,7 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1117 1110
     final <- matrix(NA, nrow = ncol(counts), ncol = 2)
1118 1111
     final[preparedCountInfo$cellIx, ] <- res
1119 1112
     rownames(final) <- colnames(counts)
1120
-    colnames(final) <- c("tsne_1", "tsne_2")
1113
+    colnames(final) <- c("tSNE1", "tSNE_2")
1121 1114
     return(final)
1122 1115
 }
1123 1116
 
... ...
@@ -1136,44 +1129,73 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"),
1136 1129
 #'  Default NULL.
1137 1130
 #' @param minClusterSize Integer. Do not subsample cell clusters below this
1138 1131
 #'  threshold. Default 100.
1139
-#' @param modules Integer vector. Determines which features modules to use for
1140
-#'  UMAP. If NULL, all modules will be used. Default NULL.
1141 1132
 #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
1142 1133
 #'  a default value of 12345 is used. If NULL, no calls to
1143 1134
 #'  \link[withr]{with_seed} are made.
1144
-#' @param umapConfig An object of class "umap.config" specifying parameters to
1145
-#'  the UMAP algorithm.
1135
+#' @param nNeighbors The size of local neighborhood used for
1136
+#'   manifold approximation. Larger values result in more global
1137
+#'   views of the manifold, while smaller values result in more
1138
+#'   local data being preserved. Default 30. See `?uwot::umap` for more information.
1139
+#' @param minDist The effective minimum distance between embedded points.
1140
+#'          Smaller values will result in a more clustered/clumped
1141
+#'          embedding where nearby points on the manifold are drawn
1142
+#'          closer together, while larger values will result on a more
1143
+#'          even dispersal of points. Default 0.2. See `?uwot::umap` for more information.
1144
+#' @param spread The effective scale of embedded points. In combination with
1145
+#'          ‘min_dist’, this determines how clustered/clumped the
1146
+#'          embedded points are. Default 1. See `?uwot::umap` for more information.
1147
+#' @param pca Logical. Whether to perform
1148
+#' dimensionality reduction with PCA before UMAP.
1149
+#' @param initialDims Integer. Number of dimensions from PCA to use as
1150
+#' input in UMAP. Default 50.
1151
+#' @param nThreads Number of threads to use. Default 1.
1152
+#' @param ... Other parameters to pass to `uwot::umap`.
1146 1153
 #' @seealso `celda_C()` for clustering cells and `celdaHeatmap()` for displaying
1147 1154
 #'  expression.
1148 1155
 #' @examples
1149 1156
 #' data(celdaCSim, celdaCMod)
1150 1157
 #' umapRes <- celdaUmap(celdaCSim$counts, celdaCMod)
1151
-#' @return A two column matrix of umap coordinates
1158
+#' @return A two column matrix of UMAP coordinates
1152 1159
 #' @export
1153 1160
 setMethod("celdaUmap", signature(celdaMod = "celda_C"),
1154 1161
     function(counts,
1155 1162
         celdaMod,
1156 1163
         maxCells = NULL,
1157 1164
         minClusterSize = 100,
1158
-        modules = NULL,
1159 1165
         seed = 12345,
1160
-        umapConfig = umap::umap.defaults) {
1166
+        nNeighbors = 30,
1167
+        minDist = 0.2,
1168
+        spread = 1,
1169
+        pca = TRUE,
1170
+        initialDims = 50,
1171
+        nThreads = 1,        
1172
+        ...) {
1161 1173
 
1162 1174
         if (is.null(seed)) {
1163 1175
             res <- .celdaUmapC(counts = counts,
1164 1176
                 celdaMod = celdaMod,
1165 1177
                 maxCells = maxCells,
1166 1178
                 minClusterSize = minClusterSize,
1167
-                modules = modules,
1168
-                umapConfig = umapConfig)
1179
+                nNeighbors = nNeighbors,
1180
+                minDist = minDist,
1181
+                spread = spread,
1182
+                pca = pca,
1183
+                initialDims = initialDims,
1184
+                nThreads = nThreads,
1185
+                ...)
1169 1186
         } else {
1170 1187
             with_seed(seed,
1171 1188
                 res <- .celdaUmapC(counts = counts,
1172 1189
                     celdaMod = celdaMod,
1173 1190
                     maxCells = maxCells,
1174 1191
                     minClusterSize = minClusterSize,
1175
-                    modules = modules,
1176
-                    umapConfig = umapConfig))
1192
+                    nNeighbors = nNeighbors,
1193
+                    minDist = minDist,
1194
+                    spread = spread,
1195
+                    pca = pca,
1196
+                    initialDims = initialDims,
1197
+                    nThreads = nThreads,
1198
+                    ...))
1177 1199
         }
1178 1200
 
1179 1201
         return(res)
... ...
@@ -1184,19 +1206,31 @@ setMethod("celdaUmap", signature(celdaMod = "celda_C"),
1184 1206
     celdaMod,
1185 1207
     maxCells = NULL,
1186 1208
     minClusterSize = 100,
1187
-    modules = NULL,
1188
-    umapConfig = umap::umap.defaults) {
1209
+    nNeighbors = 30,
1210
+    minDist = 0.2,
1211
+    spread = 1,
1212
+    pca = TRUE,
1213
+    initialDims = 50,
1214
+    nThreads = 1,
1215
+    ...) {
1189 1216
 
1190 1217
     preparedCountInfo <- .prepareCountsForDimReductionCeldaC(counts,
1191 1218
         celdaMod,
1192 1219
         maxCells,
1193
-        minClusterSize,
1194
-        modules)
1195
-    res <- .calculateUmap(preparedCountInfo$norm, umapConfig)
1220
+        minClusterSize)
1221
+    umapRes <- .calculateUmap(preparedCountInfo$norm,
1222
+        nNeighbors = nNeighbors,
1223
+        minDist = minDist,
1224
+        spread = spread,
1225
+        pca = pca,
1226
+        initialDims = initialDims,        
1227
+        nThreads = nThreads,
1228
+        ...)
1229
+
1196 1230
     final <- matrix(NA, nrow = ncol(counts), ncol = 2)
1197
-    final[preparedCountInfo$cellIx, ] <- res
1231
+    final[preparedCountInfo$cellIx, ] <- umapRes
1198 1232
     rownames(final) <- colnames(counts)
1199
-    colnames(final) <- c("umap_1", "umap_2")
1233
+    colnames(final) <- c("UMAP_1", "UMAP_2")
1200 1234
     return(final)
1201 1235
 }
1202 1236
 
... ...
@@ -1204,8 +1238,7 @@ setMethod("celdaUmap", signature(celdaMod = "celda_C"),
1204 1238
 .prepareCountsForDimReductionCeldaC <- function(counts,
1205 1239
     celdaMod,
1206 1240
     maxCells = NULL,
1207
-    minClusterSize = 100,
1208
-    modules = NULL) {
1241
+    minClusterSize = 100) {
1209 1242
 
1210 1243
     counts <- .processCounts(counts)
1211 1244
     compareCountMatrix(counts, celdaMod)
... ...
@@ -1389,7 +1389,7 @@ setMethod("celdaTsne", signature(celdaMod = "celda_CG"),
1389 1389
     final <- matrix(NA, nrow = ncol(counts), ncol = 2)
1390 1390
     final[preparedCountInfo$cellIx, ] <- res
1391 1391
     rownames(final) <- colnames(counts)
1392
-    colnames(final) <- c("tsne_1", "tsne_2")
1392
+    colnames(final) <- c("tSNE_1", "tSNE_2")
1393 1393
     return(final)
1394 1394
 }
1395 1395
 
... ...
@@ -1411,12 +1411,24 @@ setMethod("celdaTsne", signature(celdaMod = "celda_CG"),
1411 1411
 #' @param minClusterSize Integer. Do not subsample cell clusters below this
1412 1412
 #'  threshold. Default 100.
1413 1413
 #' @param modules Integer vector. Determines which features modules to use for
1414
-#'  tSNE. If NULL, all modules will be used. Default NULL.
1414
+#'  UMAP. If NULL, all modules will be used. Default NULL.
1415 1415
 #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
1416 1416
 #'  a default value of 12345 is used. If NULL, no calls to
1417 1417
 #'  \link[withr]{with_seed} are made.
1418
-#' @param umapConfig Object of class `umap.config`. Configures parameters for
1419
-#'  umap. Default `umap::umap.defaults`.
1418
+#' @param nNeighbors The size of local neighborhood used for
1419
+#'   manifold approximation. Larger values result in more global
1420
+#'   views of the manifold, while smaller values result in more
1421
+#'   local data being preserved. Default 30. See `?uwot::umap` for more information.
1422
+#' @param minDist The effective minimum distance between embedded points.
1423
+#'          Smaller values will result in a more clustered/clumped
1424
+#'          embedding where nearby points on the manifold are drawn
1425
+#'          closer together, while larger values will result on a more
1426
+#'          even dispersal of points. Default 0.2. See `?uwot::umap` for more information.
1427
+#' @param spread The effective scale of embedded points. In combination with
1428
+#'          ‘min_dist’, this determines how clustered/clumped the
1429
+#'          embedded points are. Default 1. See `?uwot::umap` for more information.
1430
+#' @param nThreads Number of threads to use. Default 1.
1431
+#' @param ... Other parameters to pass to `uwot::umap`.
1420 1432
 #' @seealso `celda_CG()` for clustering features and cells and `celdaHeatmap()`
1421 1433
 #'  for displaying expression.
1422 1434
 #' @examples
... ...
@@ -1431,7 +1443,11 @@ setMethod("celdaUmap",
1431 1443
         minClusterSize = 100,
1432 1444
         modules = NULL,
1433 1445
         seed = 12345,
1434
-        umapConfig = umap::umap.defaults) {
1446
+        nNeighbors = 30,
1447
+        minDist = 0.2,
1448
+        spread = 1,
1449
+        nThreads = 1,
1450
+        ...) {
1435 1451
 
1436 1452
         if (is.null(seed)) {
1437 1453
             res <- .celdaUmapCG(counts = counts,
... ...
@@ -1439,7 +1455,11 @@ setMethod("celdaUmap",
1439 1455
                 maxCells = maxCells,
1440 1456
                 minClusterSize = minClusterSize,
1441 1457
                 modules = modules,
1442
-                umapConfig = umapConfig)
1458
+                nNeighbors = nNeighbors,
1459
+                minDist = minDist,
1460
+                spread = spread,
1461
+                nThreads = nThreads,
1462
+                ...)
1443 1463
         } else {
1444 1464
             with_seed(seed,
1445 1465
                 res <- .celdaUmapCG(counts = counts,
... ...
@@ -1447,7 +1467,11 @@ setMethod("celdaUmap",
1447 1467
                     maxCells = maxCells,
1448 1468
                     minClusterSize = minClusterSize,
1449 1469
                     modules = modules,
1450
-                    umapConfig = umapConfig))
1470
+                    nNeighbors = nNeighbors,
1471
+                    minDist = minDist,
1472
+                    spread = spread,
1473
+                    nThreads = nThreads,
1474
+                    ...))
1451 1475
         }
1452 1476
 
1453 1477
         return(res)
... ...
@@ -1459,18 +1483,28 @@ setMethod("celdaUmap",
1459 1483
     maxCells = NULL,
1460 1484
     minClusterSize = 100,
1461 1485
     modules = NULL,
1462
-    umapConfig = umap::umap.defaults) {
1486
+    nNeighbors = nNeighbors,
1487
+    minDist = minDist,
1488
+    spread = spread,
1489
+    nThreads = nThreads,
1490
+    ...) {
1463 1491
 
1464 1492
     preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(counts,
1465 1493
         celdaMod,
1466 1494
         maxCells,
1467 1495
         minClusterSize,
1468 1496
         modules)
1469
-    umapRes <- .calculateUmap(preparedCountInfo$norm, umapConfig)
1497
+    umapRes <- .calculateUmap(preparedCountInfo$norm,
1498
+        nNeighbors = nNeighbors,
1499
+        minDist = minDist,
1500
+        spread = spread,
1501
+        nThreads = nThreads,
1502
+        ...)
1503
+
1470 1504
     final <- matrix(NA, nrow = ncol(counts), ncol = 2)
1471 1505
     final[preparedCountInfo$cellIx, ] <- umapRes
1472 1506
     rownames(final) <- colnames(counts)
1473
-    colnames(final) <- c("umap_1", "umap_2")
1507
+    colnames(final) <- c("UMAP_1", "UMAP_2")
1474 1508
     return(final)
1475 1509
 }
1476 1510
 
... ...
@@ -1101,12 +1101,24 @@ setMethod("celdaTsne", signature(celdaMod = "celda_G"),
1101 1101
 #' @param minClusterSize Integer. Do not subsample cell clusters below this
1102 1102
 #'  threshold. Default 100.
1103 1103
 #' @param modules Integer vector. Determines which features modules to use for
1104
-#'  tSNE. If NULL, all modules will be used. Default NULL.
1104
+#'  UMAP. If NULL, all modules will be used. Default NULL.
1105 1105
 #' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
1106 1106
 #'  a default value of 12345 is used. If NULL, no calls to
1107 1107
 #'  \link[withr]{with_seed} are made.
1108
-#' @param umapConfig Object of class `umap.config`. Configures parameters for
1109
-#'  umap. Default `umap::umap.defaults`.
1108
+#' @param nNeighbors The size of local neighborhood used for
1109
+#'   manifold approximation. Larger values result in more global
1110
+#'   views of the manifold, while smaller values result in more
1111
+#'   local data being preserved. Default 30. See `?uwot::umap` for more information.
1112
+#' @param minDist The effective minimum distance between embedded points.
1113
+#'          Smaller values will result in a more clustered/clumped
1114
+#'          embedding where nearby points on the manifold are drawn
1115
+#'          closer together, while larger values will result on a more
1116
+#'          even dispersal of points. Default 0.2. See `?uwot::umap` for more information.
1117
+#' @param spread The effective scale of embedded points. In combination with
1118
+#'          ‘min_dist’, this determines how clustered/clumped the
1119
+#'          embedded points are. Default 1. See `?uwot::umap` for more information.
1120
+#' @param nThreads Number of threads to use. Default 1.
1121
+#' @param ... Other parameters to pass to `uwot::umap`.
1110 1122
 #' @seealso `celda_G()` for clustering features and cells  and `celdaHeatmap()`
1111 1123
 #'  for displaying expression
1112 1124
 #' @examples
... ...
@@ -1117,11 +1129,15 @@ setMethod("celdaTsne", signature(celdaMod = "celda_G"),
1117 1129
 setMethod("celdaUmap", signature(celdaMod = "celda_G"),
1118 1130
     function(counts,
1119 1131
         celdaMod,
1120
-        maxCells = 25000,
1132
+        maxCells = NULL,
1121 1133
         minClusterSize = 100,
1122 1134
         modules = NULL,
1123 1135
         seed = 12345,
1124
-        umapConfig = umap::umap.defaults) {
1136
+        nNeighbors = 30,
1137
+        minDist = 0.2,
1138
+        spread = 1,
1139
+        nThreads = 1,
1140
+        ...) {
1125 1141
 
1126 1142
         if (is.null(seed)) {
1127 1143
             res <- .celdaUmapG(counts = counts,
... ...
@@ -1129,7 +1145,11 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"),
1129 1145
                 maxCells = maxCells,
1130 1146
                 minClusterSize = minClusterSize,
1131 1147
                 modules = modules,
1132
-                umapConfig = umapConfig)
1148
+                nNeighbors = nNeighbors,
1149
+                minDist = minDist,
1150
+                spread = spread,
1151
+                nThreads = nThreads,
1152
+                ...)
1133 1153
         } else {
1134 1154
             with_seed(seed,
1135 1155
                 res <- .celdaUmapG(counts = counts,
... ...
@@ -1137,7 +1157,11 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"),
1137 1157
                     maxCells = maxCells,
1138 1158
                     minClusterSize = minClusterSize,
1139 1159
                     modules = modules,
1140
-                    umapConfig = umapConfig))
1160
+                    nNeighbors = nNeighbors,
1161
+                    minDist = minDist,
1162
+                    spread = spread,
1163
+                    nThreads = nThreads,
1164
+                    ...))
1141 1165
         }
1142 1166
 
1143 1167
         return(res)
... ...
@@ -1149,14 +1173,24 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"),
1149 1173
     maxCells = NULL,
1150 1174
     minClusterSize = 100,
1151 1175
     modules = NULL,
1152
-    umapConfig = umap::umap.defaults) {
1176
+    nNeighbors = nNeighbors,
1177
+    minDist = minDist,
1178
+    spread = spread,
1179
+    nThreads = nThreads,
1180
+    ...) {
1153 1181
 
1154 1182
     preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(counts,
1155 1183
         celdaMod,
1156 1184
         maxCells,
1157 1185
         minClusterSize,
1158 1186
         modules)
1159
-    umapRes <- .calculateUmap(preparedCountInfo$norm, umapConfig)
1187
+    umapRes <- .calculateUmap(preparedCountInfo$norm,
1188
+        nNeighbors = nNeighbors,
1189
+        minDist = minDist,
1190
+        spread = spread,
1191
+        nThreads = nThreads,
1192
+        ...)
1193
+        
1160 1194
     final <- matrix(NA, nrow = ncol(counts), ncol = 2)
1161 1195
     final[preparedCountInfo$cellIx, ] <- umapRes
1162 1196
     rownames(final) <- colnames(counts)
... ...
@@ -445,14 +445,14 @@ plotDimReduceCluster <- function(dim1,
445 445
 # @param maxIter Numeric vector. Determines iterations for tsne. Default 1000.
446 446
 # @param doPca Logical. Whether to perform
447 447
 # dimensionality reduction with PCA before tSNE.
448
-# @param initialDims Integer.Number of dimensions from PCA to use as
449
-# input in tSNE.
448
+# @param initialDims Integer. Number of dimensions from PCA to use as
449
+# input in tSNE. Default 50.
450 450
 #' @importFrom Rtsne Rtsne
451 451
 .calculateTsne <- function(norm,
452 452
     perplexity = 20,
453 453
     maxIter = 2500,
454 454
     doPca = FALSE,
455
-    initialDims = 20) {
455
+    initialDims = 50) {
456 456
 
457 457
     res <- Rtsne::Rtsne(
458 458
         norm,
... ...
@@ -467,12 +467,35 @@ plotDimReduceCluster <- function(dim1,
467 467
 }
468 468
 
469 469
 
470
-# Run the umap algorithm for dimensionality reduction
470
+# Run the UMAP algorithm for dimensionality reduction
471 471
 # @param norm Normalized count matrix.
472
-# @param umapConfig An object of class umap.config,
473
-# containing configuration parameters to be passed to umap.
474
-# Default umap::umap.defualts.
475
-#' @importFrom umap umap
476
-.calculateUmap <- function(norm, umapConfig = umap::umap.defaults) {
477
-    return(umap::umap(norm, umapConfig)$layout)
472
+# @param nNeighbors The size of local neighborhood used for
473
+#   manifold approximation. Larger values result in more global
474
+#   views of the manifold, while smaller values result in more
475
+#   local data being preserved. Default 30. See `?uwot::umap` for more information.
476
+# @param minDist The effective minimum distance between embedded points.
477
+#          Smaller values will result in a more clustered/clumped
478
+#          embedding where nearby points on the manifold are drawn
479
+#          closer together, while larger values will result on a more
480
+#          even dispersal of points. Default 0.2. See `?uwot::umap` for more information.
481
+# @param spread The effective scale of embedded points. In combination with
482
+#          ‘min_dist’, this determines how clustered/clumped the
483
+#          embedded points are. Default 1. See `?uwot::umap` for more information.
484
+# @param pca Logical. Whether to perform
485
+# dimensionality reduction with PCA before UMAP.
486
+# @param initialDims Integer. Number of dimensions from PCA to use as
487
+# input in UMAP. Default 50.
488
+# @param nThreads Number of threads to use. Default 1.
489
+# @param ... Other parameters to pass to `uwot::umap`.
490
+#' @import uwot
491
+.calculateUmap <- function(norm, nNeighbors = 30, minDist = 0.2, spread = 1, pca=FALSE, initialDims=50, nThreads = 1, ...) {
492
+    if (isTRUE(pca)) {
493
+      doPCA <- initialDims
494
+    } else {
495
+      doPCA <- NULL
496
+    }
497
+    
498
+    return(uwot::umap(norm, n_neighbors=nNeighbors,
499
+    		min_dist = minDist, spread = spread,
500
+    		n_threads = nThreads, n_sgd_threads = 1, pca = doPCA, ...))
478 501
 }
... ...
@@ -6,8 +6,8 @@
6 6
 \title{tSNE for celda_C}
7 7
 \usage{
8 8
 \S4method{celdaTsne}{celda_C}(counts, celdaMod, maxCells = NULL,
9
-  minClusterSize = 100, initialDims = 20, modules = NULL,
10
-  perplexity = 20, maxIter = 2500, seed = 12345)
9
+  minClusterSize = 100, initialDims = 20, perplexity = 20,
10
+  maxIter = 2500, seed = 12345)
11 11
 }
12 12
 \arguments{
13 13
 \item{counts}{Integer matrix. Rows represent features and columns represent
... ...
@@ -28,9 +28,6 @@ threshold. Default 100.}
28 28
 of the dataset. The top 'initialDims' principal components will be used
29 29
 for tSNE. Default 20.}
30 30
 
31
-\item{modules}{Integer vector. Determines which features modules to use for
32
-tSNE. If NULL, all modules will be used. Default NULL.}
33
-
34 31
 \item{perplexity}{Numeric. Perplexity parameter for tSNE. Default 20.}
35 32
 
36 33
 \item{maxIter}{Integer. Maximum number of iterations in tSNE generation.
... ...
@@ -6,8 +6,9 @@
6 6
 \title{umap for celda_C}
7 7
 \usage{
8 8
 \S4method{celdaUmap}{celda_C}(counts, celdaMod, maxCells = NULL,
9
-  minClusterSize = 100, modules = NULL, seed = 12345,
10
-  umapConfig = umap::umap.defaults)
9
+  minClusterSize = 100, seed = 12345, nNeighbors = 30,
10
+  minDist = 0.2, spread = 1, pca = TRUE, initialDims = 50,
11
+  nThreads = 1, ...)
11 12
 }
12 13
 \arguments{
13 14
 \item{counts}{Integer matrix. Rows represent features and columns represent
... ...
@@ -24,18 +25,37 @@ Default NULL.}
24 25
 \item{minClusterSize}{Integer. Do not subsample cell clusters below this
25 26
 threshold. Default 100.}
26 27
 
27
-\item{modules}{Integer vector. Determines which features modules to use for
28
-UMAP. If NULL, all modules will be used. Default NULL.}
29
-
30 28
 \item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
31 29
 a default value of 12345 is used. If NULL, no calls to
32 30
 \link[withr]{with_seed} are made.}
33 31
 
34
-\item{umapConfig}{An object of class "umap.config" specifying parameters to
35
-the UMAP algorithm.}
32
+\item{nNeighbors}{The size of local neighborhood used for
33
+manifold approximation. Larger values result in more global
34
+views of the manifold, while smaller values result in more
35
+local data being preserved. Default 30. See `?uwot::umap` for more information.}
36
+
37
+\item{minDist}{The effective minimum distance between embedded points.
38
+Smaller values will result in a more clustered/clumped
39
+embedding where nearby points on the manifold are drawn
40
+closer together, while larger values will result on a more
41
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.}
42
+
43
+\item{spread}{The effective scale of embedded points. In combination with
44
+‘min_dist’, this determines how clustered/clumped the
45
+embedded points are. Default 1. See `?uwot::umap` for more information.}
46
+
47
+\item{pca}{Logical. Whether to perform
48
+dimensionality reduction with PCA before UMAP.}
49
+
50
+\item{initialDims}{Integer. Number of dimensions from PCA to use as
51
+input in UMAP. Default 50.}
52
+
53
+\item{nThreads}{Number of threads to use. Default 1.}
54
+
55
+\item{...}{Other parameters to pass to `uwot::umap`.}
36 56
 }
37 57
 \value{
38
-A two column matrix of umap coordinates
58
+A two column matrix of UMAP coordinates
39 59
 }
40 60
 \description{
41 61
 Embeds cells in two dimensions using umap based on a `celda_C`
... ...
@@ -7,7 +7,7 @@
7 7
 \usage{
8 8
 \S4method{celdaUmap}{celda_CG}(counts, celdaMod, maxCells = NULL,
9 9
   minClusterSize = 100, modules = NULL, seed = 12345,
10
-  umapConfig = umap::umap.defaults)
10
+  nNeighbors = 30, minDist = 0.2, spread = 1, nThreads = 1, ...)
11 11
 }
12 12
 \arguments{
13 13
 \item{counts}{Integer matrix. Rows represent features and columns represent
... ...
@@ -25,14 +25,30 @@ Default NULL.}
25 25
 threshold. Default 100.}
26 26
 
27 27
 \item{modules}{Integer vector. Determines which features modules to use for
28
-tSNE. If NULL, all modules will be used. Default NULL.}
28
+UMAP. If NULL, all modules will be used. Default NULL.}
29 29
 
30 30
 \item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
31 31
 a default value of 12345 is used. If NULL, no calls to
32 32
 \link[withr]{with_seed} are made.}
33 33
 
34
-\item{umapConfig}{Object of class `umap.config`. Configures parameters for
35
-umap. Default `umap::umap.defaults`.}
34
+\item{nNeighbors}{The size of local neighborhood used for
35
+manifold approximation. Larger values result in more global
36
+views of the manifold, while smaller values result in more
37
+local data being preserved. Default 30. See `?uwot::umap` for more information.}
38
+
39
+\item{minDist}{The effective minimum distance between embedded points.
40
+Smaller values will result in a more clustered/clumped
41
+embedding where nearby points on the manifold are drawn
42
+closer together, while larger values will result on a more
43
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.}
44
+
45
+\item{spread}{The effective scale of embedded points. In combination with
46
+‘min_dist’, this determines how clustered/clumped the
47
+embedded points are. Default 1. See `?uwot::umap` for more information.}
48
+
49
+\item{nThreads}{Number of threads to use. Default 1.}
50
+
51
+\item{...}{Other parameters to pass to `uwot::umap`.}
36 52
 }
37 53
 \value{
38 54
 A two column matrix of umap coordinates
... ...
@@ -5,9 +5,9 @@
5 5
 \alias{celdaUmap,celda_G-method}
6 6
 \title{umap for celda_G}
7 7
 \usage{
8
-\S4method{celdaUmap}{celda_G}(counts, celdaMod, maxCells = 25000,
8
+\S4method{celdaUmap}{celda_G}(counts, celdaMod, maxCells = NULL,
9 9
   minClusterSize = 100, modules = NULL, seed = 12345,
10
-  umapConfig = umap::umap.defaults)
10
+  nNeighbors = 30, minDist = 0.2, spread = 1, nThreads = 1, ...)
11 11
 }
12 12
 \arguments{
13 13
 \item{counts}{Integer matrix. Rows represent features and columns represent
... ...
@@ -25,14 +25,30 @@ Default NULL.}
25 25
 threshold. Default 100.}
26 26
 
27 27
 \item{modules}{Integer vector. Determines which features modules to use for
28
-tSNE. If NULL, all modules will be used. Default NULL.}
28
+UMAP. If NULL, all modules will be used. Default NULL.}
29 29
 
30 30
 \item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
31 31
 a default value of 12345 is used. If NULL, no calls to
32 32
 \link[withr]{with_seed} are made.}
33 33
 
34
-\item{umapConfig}{Object of class `umap.config`. Configures parameters for
35
-umap. Default `umap::umap.defaults`.}
34
+\item{nNeighbors}{The size of local neighborhood used for
35
+manifold approximation. Larger values result in more global
36
+views of the manifold, while smaller values result in more
37
+local data being preserved. Default 30. See `?uwot::umap` for more information.}
38
+
39
+\item{minDist}{The effective minimum distance between embedded points.
40
+Smaller values will result in a more clustered/clumped
41
+embedding where nearby points on the manifold are drawn
42
+closer together, while larger values will result on a more
43
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.}
44
+
45
+\item{spread}{The effective scale of embedded points. In combination with
46
+‘min_dist’, this determines how clustered/clumped the
47
+embedded points are. Default 1. See `?uwot::umap` for more information.}
48
+
49
+\item{nThreads}{Number of threads to use. Default 1.}
50
+
51
+\item{...}{Other parameters to pass to `uwot::umap`.}
36 52
 }
37 53
 \value{
38 54
 A two column matrix of umap coordinates
... ...
@@ -4,9 +4,8 @@
4 4
 \alias{celdaUmap}
5 5
 \title{Embeds cells in two dimensions using umap.}
6 6
 \usage{
7
-celdaUmap(counts, celdaMod, maxCells = 25000, minClusterSize = 100,
8
-  initialDims = 20, modules = NULL, seed = 12345,
9
-  umapConfig = umap::umap.defaults)
7
+celdaUmap(counts, celdaMod, maxCells = NULL, minClusterSize = 100,
8
+  modules = NULL, seed = 12345, ...)
10 9
 }
11 10
 \arguments{
12 11
 \item{counts}{Integer matrix. Rows represent features and columns represent
... ...
@@ -22,28 +21,21 @@ requires more memory. Default 25000.}
22 21
 \item{minClusterSize}{Integer. Do not subsample cell clusters below this
23 22
 threshold. Default 100.}
24 23
 
25
-\item{initialDims}{Integer. PCA will be used to reduce the dimentionality
26
-of the dataset. The top 'initialDims' principal components will be used
27
-for umap. Default 20.}
28
-
29 24
 \item{modules}{Integer vector. Determines which features modules to use for
30 25
 tSNE. If NULL, all modules will be used. Default NULL.}
31 26
 
32 27
 \item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility,
33 28
 a default value of 12345 is used. If NULL, no calls to
34
-\link[withr]{with_seed} are made.}
35
-
36
-\item{umapConfig}{An object of class "umapConfig" specifying parameters to
29
+\link[withr]{with_seed} are made.
37 30
 the UMAP algorithm.}
31
+
32
+\item{...}{Additional parameters to `uwot::umap`}
38 33
 }
39 34
 \value{
40
-Numeric Matrix of dimension `ncol(counts)` x 2, colums representing
41
- the "X" and "Y" coordinates in the data's t-SNE represetation.
35
+A two column matrix of UMAP coordinates#' @examples
36
+data(celdaCGSim, celdaCGMod)
37
+umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod)
42 38
 }
43 39
 \description{
44 40
 Embeds cells in two dimensions using umap.
45 41
 }
46
-\examples{
47
-data(celdaCGSim, celdaCGMod)
48
-tsneRes <- celdaUmap(celdaCGSim$counts, celdaCGMod)
49
-}