... | ... |
@@ -93,6 +93,7 @@ import(grid) |
93 | 93 |
import(gridExtra, except = c(combine)) |
94 | 94 |
import(magrittr) |
95 | 95 |
import(stats, except = c(start, end)) |
96 |
+import(uwot) |
|
96 | 97 |
importFrom(MAST,FromMatrix) |
97 | 98 |
importFrom(MAST,summary) |
98 | 99 |
importFrom(MAST,zlm) |
... | ... |
@@ -140,8 +141,6 @@ importFrom(scales,brewer_pal) |
140 | 141 |
importFrom(scales,dscale) |
141 | 142 |
importFrom(scales,hue_pal) |
142 | 143 |
importFrom(stringi,stri_list2matrix) |
143 |
-importFrom(umap,umap) |
|
144 |
-importFrom(umap,umap.defaults) |
|
145 | 144 |
importFrom(withr,with_seed) |
146 | 145 |
useDynLib(celda,"_colSumByGroup") |
147 | 146 |
useDynLib(celda,"_colSumByGroupChange") |
... | ... |
@@ -596,33 +596,26 @@ setGeneric("celdaTsne", |
596 | 596 |
#' requires more memory. Default 25000. |
597 | 597 |
#' @param minClusterSize Integer. Do not subsample cell clusters below this |
598 | 598 |
#' threshold. Default 100. |
599 |
-#' @param initialDims Integer. PCA will be used to reduce the dimentionality |
|
600 |
-#' of the dataset. The top 'initialDims' principal components will be used |
|
601 |
-#' for umap. Default 20. |
|
602 | 599 |
#' @param modules Integer vector. Determines which features modules to use for |
603 | 600 |
#' tSNE. If NULL, all modules will be used. Default NULL. |
604 | 601 |
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
605 | 602 |
#' a default value of 12345 is used. If NULL, no calls to |
606 | 603 |
#' \link[withr]{with_seed} are made. |
607 |
-#' @param umapConfig An object of class "umapConfig" specifying parameters to |
|
608 | 604 |
#' the UMAP algorithm. |
609 |
-#' @return Numeric Matrix of dimension `ncol(counts)` x 2, colums representing |
|
610 |
-#' the "X" and "Y" coordinates in the data's t-SNE represetation. |
|
611 |
-#' @examples |
|
605 |
+#' @param ... Additional parameters to `uwot::umap` |
|
606 |
+#' @return A two column matrix of UMAP coordinates#' @examples |
|
612 | 607 |
#' data(celdaCGSim, celdaCGMod) |
613 |
-#' tsneRes <- celdaUmap(celdaCGSim$counts, celdaCGMod) |
|
614 |
-#' @importFrom umap umap.defaults |
|
608 |
+#' umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod) |
|
615 | 609 |
#' @export |
616 | 610 |
setGeneric("celdaUmap", |
617 | 611 |
signature = "celdaMod", |
618 | 612 |
function(counts, |
619 | 613 |
celdaMod, |
620 |
- maxCells = 25000, |
|
614 |
+ maxCells = NULL, |
|
621 | 615 |
minClusterSize = 100, |
622 |
- initialDims = 20, |
|
623 | 616 |
modules = NULL, |
624 | 617 |
seed = 12345, |
625 |
- umapConfig = umap::umap.defaults) { |
|
618 |
+ ...) { |
|
626 | 619 |
standardGeneric("celdaUmap") |
627 | 620 |
}) |
628 | 621 |
|
... | ... |
@@ -1042,8 +1042,6 @@ setMethod("celdaHeatmap", signature(celdaMod = "celda_C"), |
1042 | 1042 |
#' @param initialDims Integer. PCA will be used to reduce the dimentionality |
1043 | 1043 |
#' of the dataset. The top 'initialDims' principal components will be used |
1044 | 1044 |
#' for tSNE. Default 20. |
1045 |
-#' @param modules Integer vector. Determines which features modules to use for |
|
1046 |
-#' tSNE. If NULL, all modules will be used. Default NULL. |
|
1047 | 1045 |
#' @param perplexity Numeric. Perplexity parameter for tSNE. Default 20. |
1048 | 1046 |
#' @param maxIter Integer. Maximum number of iterations in tSNE generation. |
1049 | 1047 |
#' Default 2500. |
... | ... |
@@ -1063,7 +1061,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1063 | 1061 |
maxCells = NULL, |
1064 | 1062 |
minClusterSize = 100, |
1065 | 1063 |
initialDims = 20, |
1066 |
- modules = NULL, |
|
1067 | 1064 |
perplexity = 20, |
1068 | 1065 |
maxIter = 2500, |
1069 | 1066 |
seed = 12345) { |
... | ... |
@@ -1074,7 +1071,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1074 | 1071 |
maxCells = maxCells, |
1075 | 1072 |
minClusterSize = minClusterSize, |
1076 | 1073 |
initialDims = initialDims, |
1077 |
- modules = modules, |
|
1078 | 1074 |
perplexity = perplexity, |
1079 | 1075 |
maxIter = maxIter) |
1080 | 1076 |
} else { |
... | ... |
@@ -1084,7 +1080,6 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1084 | 1080 |
maxCells = maxCells, |
1085 | 1081 |
minClusterSize = minClusterSize, |
1086 | 1082 |
initialDims = initialDims, |
1087 |
- modules = modules, |
|
1088 | 1083 |
perplexity = perplexity, |
1089 | 1084 |
maxIter = maxIter)) |
1090 | 1085 |
} |
... | ... |
@@ -1098,15 +1093,13 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1098 | 1093 |
maxCells = NULL, |
1099 | 1094 |
minClusterSize = 100, |
1100 | 1095 |
initialDims = 20, |
1101 |
- modules = NULL, |
|
1102 | 1096 |
perplexity = 20, |
1103 | 1097 |
maxIter = 2500) { |
1104 | 1098 |
|
1105 | 1099 |
preparedCountInfo <- .prepareCountsForDimReductionCeldaC(counts, |
1106 | 1100 |
celdaMod, |
1107 | 1101 |
maxCells, |
1108 |
- minClusterSize, |
|
1109 |
- modules) |
|
1102 |
+ minClusterSize) |
|
1110 | 1103 |
|
1111 | 1104 |
res <- .calculateTsne(preparedCountInfo$norm, |
1112 | 1105 |
perplexity = perplexity, |
... | ... |
@@ -1117,7 +1110,7 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1117 | 1110 |
final <- matrix(NA, nrow = ncol(counts), ncol = 2) |
1118 | 1111 |
final[preparedCountInfo$cellIx, ] <- res |
1119 | 1112 |
rownames(final) <- colnames(counts) |
1120 |
- colnames(final) <- c("tsne_1", "tsne_2") |
|
1113 |
+ colnames(final) <- c("tSNE1", "tSNE_2") |
|
1121 | 1114 |
return(final) |
1122 | 1115 |
} |
1123 | 1116 |
|
... | ... |
@@ -1136,44 +1129,73 @@ setMethod("celdaTsne", signature(celdaMod = "celda_C"), |
1136 | 1129 |
#' Default NULL. |
1137 | 1130 |
#' @param minClusterSize Integer. Do not subsample cell clusters below this |
1138 | 1131 |
#' threshold. Default 100. |
1139 |
-#' @param modules Integer vector. Determines which features modules to use for |
|
1140 |
-#' UMAP. If NULL, all modules will be used. Default NULL. |
|
1141 | 1132 |
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
1142 | 1133 |
#' a default value of 12345 is used. If NULL, no calls to |
1143 | 1134 |
#' \link[withr]{with_seed} are made. |
1144 |
-#' @param umapConfig An object of class "umap.config" specifying parameters to |
|
1145 |
-#' the UMAP algorithm. |
|
1135 |
+#' @param nNeighbors The size of local neighborhood used for |
|
1136 |
+#' manifold approximation. Larger values result in more global |
|
1137 |
+#' views of the manifold, while smaller values result in more |
|
1138 |
+#' local data being preserved. Default 30. See `?uwot::umap` for more information. |
|
1139 |
+#' @param minDist The effective minimum distance between embedded points. |
|
1140 |
+#' Smaller values will result in a more clustered/clumped |
|
1141 |
+#' embedding where nearby points on the manifold are drawn |
|
1142 |
+#' closer together, while larger values will result on a more |
|
1143 |
+#' even dispersal of points. Default 0.2. See `?uwot::umap` for more information. |
|
1144 |
+#' @param spread The effective scale of embedded points. In combination with |
|
1145 |
+#' ‘min_dist’, this determines how clustered/clumped the |
|
1146 |
+#' embedded points are. Default 1. See `?uwot::umap` for more information. |
|
1147 |
+#' @param pca Logical. Whether to perform |
|
1148 |
+#' dimensionality reduction with PCA before UMAP. |
|
1149 |
+#' @param initialDims Integer. Number of dimensions from PCA to use as |
|
1150 |
+#' input in UMAP. Default 50. |
|
1151 |
+#' @param nThreads Number of threads to use. Default 1. |
|
1152 |
+#' @param ... Other parameters to pass to `uwot::umap`. |
|
1146 | 1153 |
#' @seealso `celda_C()` for clustering cells and `celdaHeatmap()` for displaying |
1147 | 1154 |
#' expression. |
1148 | 1155 |
#' @examples |
1149 | 1156 |
#' data(celdaCSim, celdaCMod) |
1150 | 1157 |
#' umapRes <- celdaUmap(celdaCSim$counts, celdaCMod) |
1151 |
-#' @return A two column matrix of umap coordinates |
|
1158 |
+#' @return A two column matrix of UMAP coordinates |
|
1152 | 1159 |
#' @export |
1153 | 1160 |
setMethod("celdaUmap", signature(celdaMod = "celda_C"), |
1154 | 1161 |
function(counts, |
1155 | 1162 |
celdaMod, |
1156 | 1163 |
maxCells = NULL, |
1157 | 1164 |
minClusterSize = 100, |
1158 |
- modules = NULL, |
|
1159 | 1165 |
seed = 12345, |
1160 |
- umapConfig = umap::umap.defaults) { |
|
1166 |
+ nNeighbors = 30, |
|
1167 |
+ minDist = 0.2, |
|
1168 |
+ spread = 1, |
|
1169 |
+ pca = TRUE, |
|
1170 |
+ initialDims = 50, |
|
1171 |
+ nThreads = 1, |
|
1172 |
+ ...) { |
|
1161 | 1173 |
|
1162 | 1174 |
if (is.null(seed)) { |
1163 | 1175 |
res <- .celdaUmapC(counts = counts, |
1164 | 1176 |
celdaMod = celdaMod, |
1165 | 1177 |
maxCells = maxCells, |
1166 | 1178 |
minClusterSize = minClusterSize, |
1167 |
- modules = modules, |
|
1168 |
- umapConfig = umapConfig) |
|
1179 |
+ nNeighbors = nNeighbors, |
|
1180 |
+ minDist = minDist, |
|
1181 |
+ spread = spread, |
|
1182 |
+ pca = pca, |
|
1183 |
+ initialDims = initialDims, |
|
1184 |
+ nThreads = nThreads, |
|
1185 |
+ ...) |
|
1169 | 1186 |
} else { |
1170 | 1187 |
with_seed(seed, |
1171 | 1188 |
res <- .celdaUmapC(counts = counts, |
1172 | 1189 |
celdaMod = celdaMod, |
1173 | 1190 |
maxCells = maxCells, |
1174 | 1191 |
minClusterSize = minClusterSize, |
1175 |
- modules = modules, |
|
1176 |
- umapConfig = umapConfig)) |
|
1192 |
+ nNeighbors = nNeighbors, |
|
1193 |
+ minDist = minDist, |
|
1194 |
+ spread = spread, |
|
1195 |
+ pca = pca, |
|
1196 |
+ initialDims = initialDims, |
|
1197 |
+ nThreads = nThreads, |
|
1198 |
+ ...)) |
|
1177 | 1199 |
} |
1178 | 1200 |
|
1179 | 1201 |
return(res) |
... | ... |
@@ -1184,19 +1206,31 @@ setMethod("celdaUmap", signature(celdaMod = "celda_C"), |
1184 | 1206 |
celdaMod, |
1185 | 1207 |
maxCells = NULL, |
1186 | 1208 |
minClusterSize = 100, |
1187 |
- modules = NULL, |
|
1188 |
- umapConfig = umap::umap.defaults) { |
|
1209 |
+ nNeighbors = 30, |
|
1210 |
+ minDist = 0.2, |
|
1211 |
+ spread = 1, |
|
1212 |
+ pca = TRUE, |
|
1213 |
+ initialDims = 50, |
|
1214 |
+ nThreads = 1, |
|
1215 |
+ ...) { |
|
1189 | 1216 |
|
1190 | 1217 |
preparedCountInfo <- .prepareCountsForDimReductionCeldaC(counts, |
1191 | 1218 |
celdaMod, |
1192 | 1219 |
maxCells, |
1193 |
- minClusterSize, |
|
1194 |
- modules) |
|
1195 |
- res <- .calculateUmap(preparedCountInfo$norm, umapConfig) |
|
1220 |
+ minClusterSize) |
|
1221 |
+ umapRes <- .calculateUmap(preparedCountInfo$norm, |
|
1222 |
+ nNeighbors = nNeighbors, |
|
1223 |
+ minDist = minDist, |
|
1224 |
+ spread = spread, |
|
1225 |
+ pca = pca, |
|
1226 |
+ initialDims = initialDims, |
|
1227 |
+ nThreads = nThreads, |
|
1228 |
+ ...) |
|
1229 |
+ |
|
1196 | 1230 |
final <- matrix(NA, nrow = ncol(counts), ncol = 2) |
1197 |
- final[preparedCountInfo$cellIx, ] <- res |
|
1231 |
+ final[preparedCountInfo$cellIx, ] <- umapRes |
|
1198 | 1232 |
rownames(final) <- colnames(counts) |
1199 |
- colnames(final) <- c("umap_1", "umap_2") |
|
1233 |
+ colnames(final) <- c("UMAP_1", "UMAP_2") |
|
1200 | 1234 |
return(final) |
1201 | 1235 |
} |
1202 | 1236 |
|
... | ... |
@@ -1204,8 +1238,7 @@ setMethod("celdaUmap", signature(celdaMod = "celda_C"), |
1204 | 1238 |
.prepareCountsForDimReductionCeldaC <- function(counts, |
1205 | 1239 |
celdaMod, |
1206 | 1240 |
maxCells = NULL, |
1207 |
- minClusterSize = 100, |
|
1208 |
- modules = NULL) { |
|
1241 |
+ minClusterSize = 100) { |
|
1209 | 1242 |
|
1210 | 1243 |
counts <- .processCounts(counts) |
1211 | 1244 |
compareCountMatrix(counts, celdaMod) |
... | ... |
@@ -1389,7 +1389,7 @@ setMethod("celdaTsne", signature(celdaMod = "celda_CG"), |
1389 | 1389 |
final <- matrix(NA, nrow = ncol(counts), ncol = 2) |
1390 | 1390 |
final[preparedCountInfo$cellIx, ] <- res |
1391 | 1391 |
rownames(final) <- colnames(counts) |
1392 |
- colnames(final) <- c("tsne_1", "tsne_2") |
|
1392 |
+ colnames(final) <- c("tSNE_1", "tSNE_2") |
|
1393 | 1393 |
return(final) |
1394 | 1394 |
} |
1395 | 1395 |
|
... | ... |
@@ -1411,12 +1411,24 @@ setMethod("celdaTsne", signature(celdaMod = "celda_CG"), |
1411 | 1411 |
#' @param minClusterSize Integer. Do not subsample cell clusters below this |
1412 | 1412 |
#' threshold. Default 100. |
1413 | 1413 |
#' @param modules Integer vector. Determines which features modules to use for |
1414 |
-#' tSNE. If NULL, all modules will be used. Default NULL. |
|
1414 |
+#' UMAP. If NULL, all modules will be used. Default NULL. |
|
1415 | 1415 |
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
1416 | 1416 |
#' a default value of 12345 is used. If NULL, no calls to |
1417 | 1417 |
#' \link[withr]{with_seed} are made. |
1418 |
-#' @param umapConfig Object of class `umap.config`. Configures parameters for |
|
1419 |
-#' umap. Default `umap::umap.defaults`. |
|
1418 |
+#' @param nNeighbors The size of local neighborhood used for |
|
1419 |
+#' manifold approximation. Larger values result in more global |
|
1420 |
+#' views of the manifold, while smaller values result in more |
|
1421 |
+#' local data being preserved. Default 30. See `?uwot::umap` for more information. |
|
1422 |
+#' @param minDist The effective minimum distance between embedded points. |
|
1423 |
+#' Smaller values will result in a more clustered/clumped |
|
1424 |
+#' embedding where nearby points on the manifold are drawn |
|
1425 |
+#' closer together, while larger values will result on a more |
|
1426 |
+#' even dispersal of points. Default 0.2. See `?uwot::umap` for more information. |
|
1427 |
+#' @param spread The effective scale of embedded points. In combination with |
|
1428 |
+#' ‘min_dist’, this determines how clustered/clumped the |
|
1429 |
+#' embedded points are. Default 1. See `?uwot::umap` for more information. |
|
1430 |
+#' @param nThreads Number of threads to use. Default 1. |
|
1431 |
+#' @param ... Other parameters to pass to `uwot::umap`. |
|
1420 | 1432 |
#' @seealso `celda_CG()` for clustering features and cells and `celdaHeatmap()` |
1421 | 1433 |
#' for displaying expression. |
1422 | 1434 |
#' @examples |
... | ... |
@@ -1431,7 +1443,11 @@ setMethod("celdaUmap", |
1431 | 1443 |
minClusterSize = 100, |
1432 | 1444 |
modules = NULL, |
1433 | 1445 |
seed = 12345, |
1434 |
- umapConfig = umap::umap.defaults) { |
|
1446 |
+ nNeighbors = 30, |
|
1447 |
+ minDist = 0.2, |
|
1448 |
+ spread = 1, |
|
1449 |
+ nThreads = 1, |
|
1450 |
+ ...) { |
|
1435 | 1451 |
|
1436 | 1452 |
if (is.null(seed)) { |
1437 | 1453 |
res <- .celdaUmapCG(counts = counts, |
... | ... |
@@ -1439,7 +1455,11 @@ setMethod("celdaUmap", |
1439 | 1455 |
maxCells = maxCells, |
1440 | 1456 |
minClusterSize = minClusterSize, |
1441 | 1457 |
modules = modules, |
1442 |
- umapConfig = umapConfig) |
|
1458 |
+ nNeighbors = nNeighbors, |
|
1459 |
+ minDist = minDist, |
|
1460 |
+ spread = spread, |
|
1461 |
+ nThreads = nThreads, |
|
1462 |
+ ...) |
|
1443 | 1463 |
} else { |
1444 | 1464 |
with_seed(seed, |
1445 | 1465 |
res <- .celdaUmapCG(counts = counts, |
... | ... |
@@ -1447,7 +1467,11 @@ setMethod("celdaUmap", |
1447 | 1467 |
maxCells = maxCells, |
1448 | 1468 |
minClusterSize = minClusterSize, |
1449 | 1469 |
modules = modules, |
1450 |
- umapConfig = umapConfig)) |
|
1470 |
+ nNeighbors = nNeighbors, |
|
1471 |
+ minDist = minDist, |
|
1472 |
+ spread = spread, |
|
1473 |
+ nThreads = nThreads, |
|
1474 |
+ ...)) |
|
1451 | 1475 |
} |
1452 | 1476 |
|
1453 | 1477 |
return(res) |
... | ... |
@@ -1459,18 +1483,28 @@ setMethod("celdaUmap", |
1459 | 1483 |
maxCells = NULL, |
1460 | 1484 |
minClusterSize = 100, |
1461 | 1485 |
modules = NULL, |
1462 |
- umapConfig = umap::umap.defaults) { |
|
1486 |
+ nNeighbors = nNeighbors, |
|
1487 |
+ minDist = minDist, |
|
1488 |
+ spread = spread, |
|
1489 |
+ nThreads = nThreads, |
|
1490 |
+ ...) { |
|
1463 | 1491 |
|
1464 | 1492 |
preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(counts, |
1465 | 1493 |
celdaMod, |
1466 | 1494 |
maxCells, |
1467 | 1495 |
minClusterSize, |
1468 | 1496 |
modules) |
1469 |
- umapRes <- .calculateUmap(preparedCountInfo$norm, umapConfig) |
|
1497 |
+ umapRes <- .calculateUmap(preparedCountInfo$norm, |
|
1498 |
+ nNeighbors = nNeighbors, |
|
1499 |
+ minDist = minDist, |
|
1500 |
+ spread = spread, |
|
1501 |
+ nThreads = nThreads, |
|
1502 |
+ ...) |
|
1503 |
+ |
|
1470 | 1504 |
final <- matrix(NA, nrow = ncol(counts), ncol = 2) |
1471 | 1505 |
final[preparedCountInfo$cellIx, ] <- umapRes |
1472 | 1506 |
rownames(final) <- colnames(counts) |
1473 |
- colnames(final) <- c("umap_1", "umap_2") |
|
1507 |
+ colnames(final) <- c("UMAP_1", "UMAP_2") |
|
1474 | 1508 |
return(final) |
1475 | 1509 |
} |
1476 | 1510 |
|
... | ... |
@@ -1101,12 +1101,24 @@ setMethod("celdaTsne", signature(celdaMod = "celda_G"), |
1101 | 1101 |
#' @param minClusterSize Integer. Do not subsample cell clusters below this |
1102 | 1102 |
#' threshold. Default 100. |
1103 | 1103 |
#' @param modules Integer vector. Determines which features modules to use for |
1104 |
-#' tSNE. If NULL, all modules will be used. Default NULL. |
|
1104 |
+#' UMAP. If NULL, all modules will be used. Default NULL. |
|
1105 | 1105 |
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
1106 | 1106 |
#' a default value of 12345 is used. If NULL, no calls to |
1107 | 1107 |
#' \link[withr]{with_seed} are made. |
1108 |
-#' @param umapConfig Object of class `umap.config`. Configures parameters for |
|
1109 |
-#' umap. Default `umap::umap.defaults`. |
|
1108 |
+#' @param nNeighbors The size of local neighborhood used for |
|
1109 |
+#' manifold approximation. Larger values result in more global |
|
1110 |
+#' views of the manifold, while smaller values result in more |
|
1111 |
+#' local data being preserved. Default 30. See `?uwot::umap` for more information. |
|
1112 |
+#' @param minDist The effective minimum distance between embedded points. |
|
1113 |
+#' Smaller values will result in a more clustered/clumped |
|
1114 |
+#' embedding where nearby points on the manifold are drawn |
|
1115 |
+#' closer together, while larger values will result on a more |
|
1116 |
+#' even dispersal of points. Default 0.2. See `?uwot::umap` for more information. |
|
1117 |
+#' @param spread The effective scale of embedded points. In combination with |
|
1118 |
+#' ‘min_dist’, this determines how clustered/clumped the |
|
1119 |
+#' embedded points are. Default 1. See `?uwot::umap` for more information. |
|
1120 |
+#' @param nThreads Number of threads to use. Default 1. |
|
1121 |
+#' @param ... Other parameters to pass to `uwot::umap`. |
|
1110 | 1122 |
#' @seealso `celda_G()` for clustering features and cells and `celdaHeatmap()` |
1111 | 1123 |
#' for displaying expression |
1112 | 1124 |
#' @examples |
... | ... |
@@ -1117,11 +1129,15 @@ setMethod("celdaTsne", signature(celdaMod = "celda_G"), |
1117 | 1129 |
setMethod("celdaUmap", signature(celdaMod = "celda_G"), |
1118 | 1130 |
function(counts, |
1119 | 1131 |
celdaMod, |
1120 |
- maxCells = 25000, |
|
1132 |
+ maxCells = NULL, |
|
1121 | 1133 |
minClusterSize = 100, |
1122 | 1134 |
modules = NULL, |
1123 | 1135 |
seed = 12345, |
1124 |
- umapConfig = umap::umap.defaults) { |
|
1136 |
+ nNeighbors = 30, |
|
1137 |
+ minDist = 0.2, |
|
1138 |
+ spread = 1, |
|
1139 |
+ nThreads = 1, |
|
1140 |
+ ...) { |
|
1125 | 1141 |
|
1126 | 1142 |
if (is.null(seed)) { |
1127 | 1143 |
res <- .celdaUmapG(counts = counts, |
... | ... |
@@ -1129,7 +1145,11 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"), |
1129 | 1145 |
maxCells = maxCells, |
1130 | 1146 |
minClusterSize = minClusterSize, |
1131 | 1147 |
modules = modules, |
1132 |
- umapConfig = umapConfig) |
|
1148 |
+ nNeighbors = nNeighbors, |
|
1149 |
+ minDist = minDist, |
|
1150 |
+ spread = spread, |
|
1151 |
+ nThreads = nThreads, |
|
1152 |
+ ...) |
|
1133 | 1153 |
} else { |
1134 | 1154 |
with_seed(seed, |
1135 | 1155 |
res <- .celdaUmapG(counts = counts, |
... | ... |
@@ -1137,7 +1157,11 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"), |
1137 | 1157 |
maxCells = maxCells, |
1138 | 1158 |
minClusterSize = minClusterSize, |
1139 | 1159 |
modules = modules, |
1140 |
- umapConfig = umapConfig)) |
|
1160 |
+ nNeighbors = nNeighbors, |
|
1161 |
+ minDist = minDist, |
|
1162 |
+ spread = spread, |
|
1163 |
+ nThreads = nThreads, |
|
1164 |
+ ...)) |
|
1141 | 1165 |
} |
1142 | 1166 |
|
1143 | 1167 |
return(res) |
... | ... |
@@ -1149,14 +1173,24 @@ setMethod("celdaUmap", signature(celdaMod = "celda_G"), |
1149 | 1173 |
maxCells = NULL, |
1150 | 1174 |
minClusterSize = 100, |
1151 | 1175 |
modules = NULL, |
1152 |
- umapConfig = umap::umap.defaults) { |
|
1176 |
+ nNeighbors = nNeighbors, |
|
1177 |
+ minDist = minDist, |
|
1178 |
+ spread = spread, |
|
1179 |
+ nThreads = nThreads, |
|
1180 |
+ ...) { |
|
1153 | 1181 |
|
1154 | 1182 |
preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(counts, |
1155 | 1183 |
celdaMod, |
1156 | 1184 |
maxCells, |
1157 | 1185 |
minClusterSize, |
1158 | 1186 |
modules) |
1159 |
- umapRes <- .calculateUmap(preparedCountInfo$norm, umapConfig) |
|
1187 |
+ umapRes <- .calculateUmap(preparedCountInfo$norm, |
|
1188 |
+ nNeighbors = nNeighbors, |
|
1189 |
+ minDist = minDist, |
|
1190 |
+ spread = spread, |
|
1191 |
+ nThreads = nThreads, |
|
1192 |
+ ...) |
|
1193 |
+ |
|
1160 | 1194 |
final <- matrix(NA, nrow = ncol(counts), ncol = 2) |
1161 | 1195 |
final[preparedCountInfo$cellIx, ] <- umapRes |
1162 | 1196 |
rownames(final) <- colnames(counts) |
... | ... |
@@ -445,14 +445,14 @@ plotDimReduceCluster <- function(dim1, |
445 | 445 |
# @param maxIter Numeric vector. Determines iterations for tsne. Default 1000. |
446 | 446 |
# @param doPca Logical. Whether to perform |
447 | 447 |
# dimensionality reduction with PCA before tSNE. |
448 |
-# @param initialDims Integer.Number of dimensions from PCA to use as |
|
449 |
-# input in tSNE. |
|
448 |
+# @param initialDims Integer. Number of dimensions from PCA to use as |
|
449 |
+# input in tSNE. Default 50. |
|
450 | 450 |
#' @importFrom Rtsne Rtsne |
451 | 451 |
.calculateTsne <- function(norm, |
452 | 452 |
perplexity = 20, |
453 | 453 |
maxIter = 2500, |
454 | 454 |
doPca = FALSE, |
455 |
- initialDims = 20) { |
|
455 |
+ initialDims = 50) { |
|
456 | 456 |
|
457 | 457 |
res <- Rtsne::Rtsne( |
458 | 458 |
norm, |
... | ... |
@@ -467,12 +467,35 @@ plotDimReduceCluster <- function(dim1, |
467 | 467 |
} |
468 | 468 |
|
469 | 469 |
|
470 |
-# Run the umap algorithm for dimensionality reduction |
|
470 |
+# Run the UMAP algorithm for dimensionality reduction |
|
471 | 471 |
# @param norm Normalized count matrix. |
472 |
-# @param umapConfig An object of class umap.config, |
|
473 |
-# containing configuration parameters to be passed to umap. |
|
474 |
-# Default umap::umap.defualts. |
|
475 |
-#' @importFrom umap umap |
|
476 |
-.calculateUmap <- function(norm, umapConfig = umap::umap.defaults) { |
|
477 |
- return(umap::umap(norm, umapConfig)$layout) |
|
472 |
+# @param nNeighbors The size of local neighborhood used for |
|
473 |
+# manifold approximation. Larger values result in more global |
|
474 |
+# views of the manifold, while smaller values result in more |
|
475 |
+# local data being preserved. Default 30. See `?uwot::umap` for more information. |
|
476 |
+# @param minDist The effective minimum distance between embedded points. |
|
477 |
+# Smaller values will result in a more clustered/clumped |
|
478 |
+# embedding where nearby points on the manifold are drawn |
|
479 |
+# closer together, while larger values will result on a more |
|
480 |
+# even dispersal of points. Default 0.2. See `?uwot::umap` for more information. |
|
481 |
+# @param spread The effective scale of embedded points. In combination with |
|
482 |
+# ‘min_dist’, this determines how clustered/clumped the |
|
483 |
+# embedded points are. Default 1. See `?uwot::umap` for more information. |
|
484 |
+# @param pca Logical. Whether to perform |
|
485 |
+# dimensionality reduction with PCA before UMAP. |
|
486 |
+# @param initialDims Integer. Number of dimensions from PCA to use as |
|
487 |
+# input in UMAP. Default 50. |
|
488 |
+# @param nThreads Number of threads to use. Default 1. |
|
489 |
+# @param ... Other parameters to pass to `uwot::umap`. |
|
490 |
+#' @import uwot |
|
491 |
+.calculateUmap <- function(norm, nNeighbors = 30, minDist = 0.2, spread = 1, pca=FALSE, initialDims=50, nThreads = 1, ...) { |
|
492 |
+ if (isTRUE(pca)) { |
|
493 |
+ doPCA <- initialDims |
|
494 |
+ } else { |
|
495 |
+ doPCA <- NULL |
|
496 |
+ } |
|
497 |
+ |
|
498 |
+ return(uwot::umap(norm, n_neighbors=nNeighbors, |
|
499 |
+ min_dist = minDist, spread = spread, |
|
500 |
+ n_threads = nThreads, n_sgd_threads = 1, pca = doPCA, ...)) |
|
478 | 501 |
} |
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
\title{tSNE for celda_C} |
7 | 7 |
\usage{ |
8 | 8 |
\S4method{celdaTsne}{celda_C}(counts, celdaMod, maxCells = NULL, |
9 |
- minClusterSize = 100, initialDims = 20, modules = NULL, |
|
10 |
- perplexity = 20, maxIter = 2500, seed = 12345) |
|
9 |
+ minClusterSize = 100, initialDims = 20, perplexity = 20, |
|
10 |
+ maxIter = 2500, seed = 12345) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{counts}{Integer matrix. Rows represent features and columns represent |
... | ... |
@@ -28,9 +28,6 @@ threshold. Default 100.} |
28 | 28 |
of the dataset. The top 'initialDims' principal components will be used |
29 | 29 |
for tSNE. Default 20.} |
30 | 30 |
|
31 |
-\item{modules}{Integer vector. Determines which features modules to use for |
|
32 |
-tSNE. If NULL, all modules will be used. Default NULL.} |
|
33 |
- |
|
34 | 31 |
\item{perplexity}{Numeric. Perplexity parameter for tSNE. Default 20.} |
35 | 32 |
|
36 | 33 |
\item{maxIter}{Integer. Maximum number of iterations in tSNE generation. |
... | ... |
@@ -6,8 +6,9 @@ |
6 | 6 |
\title{umap for celda_C} |
7 | 7 |
\usage{ |
8 | 8 |
\S4method{celdaUmap}{celda_C}(counts, celdaMod, maxCells = NULL, |
9 |
- minClusterSize = 100, modules = NULL, seed = 12345, |
|
10 |
- umapConfig = umap::umap.defaults) |
|
9 |
+ minClusterSize = 100, seed = 12345, nNeighbors = 30, |
|
10 |
+ minDist = 0.2, spread = 1, pca = TRUE, initialDims = 50, |
|
11 |
+ nThreads = 1, ...) |
|
11 | 12 |
} |
12 | 13 |
\arguments{ |
13 | 14 |
\item{counts}{Integer matrix. Rows represent features and columns represent |
... | ... |
@@ -24,18 +25,37 @@ Default NULL.} |
24 | 25 |
\item{minClusterSize}{Integer. Do not subsample cell clusters below this |
25 | 26 |
threshold. Default 100.} |
26 | 27 |
|
27 |
-\item{modules}{Integer vector. Determines which features modules to use for |
|
28 |
-UMAP. If NULL, all modules will be used. Default NULL.} |
|
29 |
- |
|
30 | 28 |
\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
31 | 29 |
a default value of 12345 is used. If NULL, no calls to |
32 | 30 |
\link[withr]{with_seed} are made.} |
33 | 31 |
|
34 |
-\item{umapConfig}{An object of class "umap.config" specifying parameters to |
|
35 |
-the UMAP algorithm.} |
|
32 |
+\item{nNeighbors}{The size of local neighborhood used for |
|
33 |
+manifold approximation. Larger values result in more global |
|
34 |
+views of the manifold, while smaller values result in more |
|
35 |
+local data being preserved. Default 30. See `?uwot::umap` for more information.} |
|
36 |
+ |
|
37 |
+\item{minDist}{The effective minimum distance between embedded points. |
|
38 |
+Smaller values will result in a more clustered/clumped |
|
39 |
+embedding where nearby points on the manifold are drawn |
|
40 |
+closer together, while larger values will result on a more |
|
41 |
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.} |
|
42 |
+ |
|
43 |
+\item{spread}{The effective scale of embedded points. In combination with |
|
44 |
+‘min_dist’, this determines how clustered/clumped the |
|
45 |
+embedded points are. Default 1. See `?uwot::umap` for more information.} |
|
46 |
+ |
|
47 |
+\item{pca}{Logical. Whether to perform |
|
48 |
+dimensionality reduction with PCA before UMAP.} |
|
49 |
+ |
|
50 |
+\item{initialDims}{Integer. Number of dimensions from PCA to use as |
|
51 |
+input in UMAP. Default 50.} |
|
52 |
+ |
|
53 |
+\item{nThreads}{Number of threads to use. Default 1.} |
|
54 |
+ |
|
55 |
+\item{...}{Other parameters to pass to `uwot::umap`.} |
|
36 | 56 |
} |
37 | 57 |
\value{ |
38 |
-A two column matrix of umap coordinates |
|
58 |
+A two column matrix of UMAP coordinates |
|
39 | 59 |
} |
40 | 60 |
\description{ |
41 | 61 |
Embeds cells in two dimensions using umap based on a `celda_C` |
... | ... |
@@ -7,7 +7,7 @@ |
7 | 7 |
\usage{ |
8 | 8 |
\S4method{celdaUmap}{celda_CG}(counts, celdaMod, maxCells = NULL, |
9 | 9 |
minClusterSize = 100, modules = NULL, seed = 12345, |
10 |
- umapConfig = umap::umap.defaults) |
|
10 |
+ nNeighbors = 30, minDist = 0.2, spread = 1, nThreads = 1, ...) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{counts}{Integer matrix. Rows represent features and columns represent |
... | ... |
@@ -25,14 +25,30 @@ Default NULL.} |
25 | 25 |
threshold. Default 100.} |
26 | 26 |
|
27 | 27 |
\item{modules}{Integer vector. Determines which features modules to use for |
28 |
-tSNE. If NULL, all modules will be used. Default NULL.} |
|
28 |
+UMAP. If NULL, all modules will be used. Default NULL.} |
|
29 | 29 |
|
30 | 30 |
\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
31 | 31 |
a default value of 12345 is used. If NULL, no calls to |
32 | 32 |
\link[withr]{with_seed} are made.} |
33 | 33 |
|
34 |
-\item{umapConfig}{Object of class `umap.config`. Configures parameters for |
|
35 |
-umap. Default `umap::umap.defaults`.} |
|
34 |
+\item{nNeighbors}{The size of local neighborhood used for |
|
35 |
+manifold approximation. Larger values result in more global |
|
36 |
+views of the manifold, while smaller values result in more |
|
37 |
+local data being preserved. Default 30. See `?uwot::umap` for more information.} |
|
38 |
+ |
|
39 |
+\item{minDist}{The effective minimum distance between embedded points. |
|
40 |
+Smaller values will result in a more clustered/clumped |
|
41 |
+embedding where nearby points on the manifold are drawn |
|
42 |
+closer together, while larger values will result on a more |
|
43 |
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.} |
|
44 |
+ |
|
45 |
+\item{spread}{The effective scale of embedded points. In combination with |
|
46 |
+‘min_dist’, this determines how clustered/clumped the |
|
47 |
+embedded points are. Default 1. See `?uwot::umap` for more information.} |
|
48 |
+ |
|
49 |
+\item{nThreads}{Number of threads to use. Default 1.} |
|
50 |
+ |
|
51 |
+\item{...}{Other parameters to pass to `uwot::umap`.} |
|
36 | 52 |
} |
37 | 53 |
\value{ |
38 | 54 |
A two column matrix of umap coordinates |
... | ... |
@@ -5,9 +5,9 @@ |
5 | 5 |
\alias{celdaUmap,celda_G-method} |
6 | 6 |
\title{umap for celda_G} |
7 | 7 |
\usage{ |
8 |
-\S4method{celdaUmap}{celda_G}(counts, celdaMod, maxCells = 25000, |
|
8 |
+\S4method{celdaUmap}{celda_G}(counts, celdaMod, maxCells = NULL, |
|
9 | 9 |
minClusterSize = 100, modules = NULL, seed = 12345, |
10 |
- umapConfig = umap::umap.defaults) |
|
10 |
+ nNeighbors = 30, minDist = 0.2, spread = 1, nThreads = 1, ...) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 | 13 |
\item{counts}{Integer matrix. Rows represent features and columns represent |
... | ... |
@@ -25,14 +25,30 @@ Default NULL.} |
25 | 25 |
threshold. Default 100.} |
26 | 26 |
|
27 | 27 |
\item{modules}{Integer vector. Determines which features modules to use for |
28 |
-tSNE. If NULL, all modules will be used. Default NULL.} |
|
28 |
+UMAP. If NULL, all modules will be used. Default NULL.} |
|
29 | 29 |
|
30 | 30 |
\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
31 | 31 |
a default value of 12345 is used. If NULL, no calls to |
32 | 32 |
\link[withr]{with_seed} are made.} |
33 | 33 |
|
34 |
-\item{umapConfig}{Object of class `umap.config`. Configures parameters for |
|
35 |
-umap. Default `umap::umap.defaults`.} |
|
34 |
+\item{nNeighbors}{The size of local neighborhood used for |
|
35 |
+manifold approximation. Larger values result in more global |
|
36 |
+views of the manifold, while smaller values result in more |
|
37 |
+local data being preserved. Default 30. See `?uwot::umap` for more information.} |
|
38 |
+ |
|
39 |
+\item{minDist}{The effective minimum distance between embedded points. |
|
40 |
+Smaller values will result in a more clustered/clumped |
|
41 |
+embedding where nearby points on the manifold are drawn |
|
42 |
+closer together, while larger values will result on a more |
|
43 |
+even dispersal of points. Default 0.2. See `?uwot::umap` for more information.} |
|
44 |
+ |
|
45 |
+\item{spread}{The effective scale of embedded points. In combination with |
|
46 |
+‘min_dist’, this determines how clustered/clumped the |
|
47 |
+embedded points are. Default 1. See `?uwot::umap` for more information.} |
|
48 |
+ |
|
49 |
+\item{nThreads}{Number of threads to use. Default 1.} |
|
50 |
+ |
|
51 |
+\item{...}{Other parameters to pass to `uwot::umap`.} |
|
36 | 52 |
} |
37 | 53 |
\value{ |
38 | 54 |
A two column matrix of umap coordinates |
... | ... |
@@ -4,9 +4,8 @@ |
4 | 4 |
\alias{celdaUmap} |
5 | 5 |
\title{Embeds cells in two dimensions using umap.} |
6 | 6 |
\usage{ |
7 |
-celdaUmap(counts, celdaMod, maxCells = 25000, minClusterSize = 100, |
|
8 |
- initialDims = 20, modules = NULL, seed = 12345, |
|
9 |
- umapConfig = umap::umap.defaults) |
|
7 |
+celdaUmap(counts, celdaMod, maxCells = NULL, minClusterSize = 100, |
|
8 |
+ modules = NULL, seed = 12345, ...) |
|
10 | 9 |
} |
11 | 10 |
\arguments{ |
12 | 11 |
\item{counts}{Integer matrix. Rows represent features and columns represent |
... | ... |
@@ -22,28 +21,21 @@ requires more memory. Default 25000.} |
22 | 21 |
\item{minClusterSize}{Integer. Do not subsample cell clusters below this |
23 | 22 |
threshold. Default 100.} |
24 | 23 |
|
25 |
-\item{initialDims}{Integer. PCA will be used to reduce the dimentionality |
|
26 |
-of the dataset. The top 'initialDims' principal components will be used |
|
27 |
-for umap. Default 20.} |
|
28 |
- |
|
29 | 24 |
\item{modules}{Integer vector. Determines which features modules to use for |
30 | 25 |
tSNE. If NULL, all modules will be used. Default NULL.} |
31 | 26 |
|
32 | 27 |
\item{seed}{Integer. Passed to \link[withr]{with_seed}. For reproducibility, |
33 | 28 |
a default value of 12345 is used. If NULL, no calls to |
34 |
-\link[withr]{with_seed} are made.} |
|
35 |
- |
|
36 |
-\item{umapConfig}{An object of class "umapConfig" specifying parameters to |
|
29 |
+\link[withr]{with_seed} are made. |
|
37 | 30 |
the UMAP algorithm.} |
31 |
+ |
|
32 |
+\item{...}{Additional parameters to `uwot::umap`} |
|
38 | 33 |
} |
39 | 34 |
\value{ |
40 |
-Numeric Matrix of dimension `ncol(counts)` x 2, colums representing |
|
41 |
- the "X" and "Y" coordinates in the data's t-SNE represetation. |
|
35 |
+A two column matrix of UMAP coordinates#' @examples |
|
36 |
+data(celdaCGSim, celdaCGMod) |
|
37 |
+umapRes <- celdaUmap(celdaCGSim$counts, celdaCGMod) |
|
42 | 38 |
} |
43 | 39 |
\description{ |
44 | 40 |
Embeds cells in two dimensions using umap. |
45 | 41 |
} |
46 |
-\examples{ |
|
47 |
-data(celdaCGSim, celdaCGMod) |
|
48 |
-tsneRes <- celdaUmap(celdaCGSim$counts, celdaCGMod) |
|
49 |
-} |