... | ... |
@@ -153,14 +153,8 @@ runTSCAN <- function(inSCE, |
153 | 153 |
sce <- runScranSNN(inSCE, useReducedDim = useReducedDim, seed = seed) |
154 | 154 |
cluster <- colData(sce)$scranSNN_cluster |
155 | 155 |
rm(sce) |
156 |
- } else if (length(cluster) == 1 & is.character(cluster)) { |
|
157 |
- if (!cluster %in% names(colData(inSCE))) { |
|
158 |
- stop("Specified cluster not found in `colData(inSCE)`.") |
|
159 |
- } |
|
160 |
- cluster <- colData(inSCE)[[cluster]] |
|
161 |
- } else if (length(cluster) != ncol(inSCE)) { |
|
162 |
- stop("`cluster` should either has length equal to `ncol(inSCE)` or be a ", |
|
163 |
- "colData variable.") |
|
156 |
+ } else { |
|
157 |
+ cluster <- .manageCellVar(inSCE, var = cluster) |
|
164 | 158 |
} |
165 | 159 |
if (length(unique(cluster)) == 1) { |
166 | 160 |
stop("Only one cluster found. Unable to calculate.") |
... | ... |
@@ -443,6 +443,7 @@ plotTSCANPseudotimeHeatmap <- function(inSCE, |
443 | 443 |
#' Use the viridis color scale to match with `plotTSCANResult` embedding color |
444 | 444 |
#' @param x numeric vector of pseudotime |
445 | 445 |
#' @return function object returned by circlize::colorRamp2 |
446 |
+#' @noRd |
|
446 | 447 |
.viridisPseudoTimeColor <- function(x) { |
447 | 448 |
a <- max(x, na.rm = TRUE) |
448 | 449 |
b <- min(x, na.rm = TRUE) |
... | ... |
@@ -862,6 +863,7 @@ plotTSCANDimReduceFeatures <- function( |
862 | 863 |
#' @param useCluster Vector of clusters, that are going to be the vertices for |
863 | 864 |
#' edge finding. |
864 | 865 |
#' @return data.frame, subset rows of line.data |
866 |
+#' @noRd |
|
865 | 867 |
.getClustersLineData <- function(line.data, useCluster) { |
866 | 868 |
idx <- vapply(useCluster, function(x){ |
867 | 869 |
grepl(paste0("^", x, "--"), line.data$edge) | |
... | ... |
@@ -442,6 +442,7 @@ plotTSCANPseudotimeHeatmap <- function(inSCE, |
442 | 442 |
#' Basing on pseudotime is presented within range from 0 to 100 |
443 | 443 |
#' Use the viridis color scale to match with `plotTSCANResult` embedding color |
444 | 444 |
#' @param x numeric vector of pseudotime |
445 |
+#' @return function object returned by circlize::colorRamp2 |
|
445 | 446 |
.viridisPseudoTimeColor <- function(x) { |
446 | 447 |
a <- max(x, na.rm = TRUE) |
447 | 448 |
b <- min(x, na.rm = TRUE) |
... | ... |
@@ -652,10 +652,7 @@ plotTSCANClusterPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
652 | 652 |
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters) |
653 | 653 |
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
654 | 654 |
clusters = NULL, use.dimred = useReducedDim) |
655 |
- line.data.sub <- line.data[grepl(paste0("^", useCluster, "--"), |
|
656 |
- line.data$edge) | |
|
657 |
- grepl(paste0("--", useCluster, "$"), |
|
658 |
- line.data$edge),] |
|
655 |
+ line.data.sub <- .getClustersLineData(line.data, useCluster) |
|
659 | 656 |
|
660 | 657 |
# Get Branch pseudotime |
661 | 658 |
tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst, |
... | ... |
@@ -694,6 +691,9 @@ plotTSCANClusterPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
694 | 691 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
695 | 692 |
#' @param useCluster Choose a cluster used for identifying DEG with |
696 | 693 |
#' \code{\link{runTSCANClusterDEAnalysis}}. Required. |
694 |
+#' @param pathIndex Specifies one of the branching paths from \code{useCluster} |
|
695 |
+#' and plot the top DEGs on this path. Ususally presented by the terminal |
|
696 |
+#' cluster of a path. By default \code{NULL} plot top DEGs of all paths. |
|
697 | 697 |
#' @param useReducedDim A single character for the matrix of 2D embedding. |
698 | 698 |
#' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}. |
699 | 699 |
#' @param topN Integer. Use top N genes identified. Default \code{9}. |
... | ... |
@@ -802,55 +802,50 @@ plotTSCANDimReduceFeatures <- function( |
802 | 802 |
combinePlot = c("all", "none")) |
803 | 803 |
{ |
804 | 804 |
results <- getTSCANResults(inSCE, analysisName = "Pseudotime") |
805 |
- if (!is.null(useCluster) && length(useCluster) != 1) |
|
806 |
- stop("`useCluster`: can only use one cluster.") |
|
807 | 805 |
combinePlot <- match.arg(combinePlot) |
808 | 806 |
clusters <- colData(inSCE)$TSCAN_clusters |
809 | 807 |
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters) |
810 | 808 |
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
811 | 809 |
clusters = NULL, use.dimred = useReducedDim) |
812 | 810 |
if (!is.null(useCluster)) { |
813 |
- line.data <- line.data[grepl(paste0("^", useCluster, "--"), |
|
814 |
- line.data$edge) | |
|
815 |
- grepl(paste0("--", useCluster, "$"), |
|
816 |
- line.data$edge),] |
|
811 |
+ line.data <- .getClustersLineData(line.data, useCluster) |
|
817 | 812 |
inSCE <- inSCE[, clusters %in% useCluster] |
818 | 813 |
} |
819 | 814 |
if (by == "rownames") by <- NULL |
820 | 815 |
plotList <- list() |
821 |
- if (!is.na(features)) { |
|
822 |
- for (f in features) { |
|
823 |
- g <- plotSCEDimReduceFeatures(inSCE, feature = f, |
|
824 |
- useAssay = useAssay, |
|
825 |
- featureLocation = by,dim1 = 1, dim2 = 2, |
|
826 |
- featureDisplay = featureDisplay, |
|
827 |
- reducedDimName = useReducedDim, |
|
828 |
- title = f) + |
|
829 |
- ggplot2::geom_line(data = line.data, |
|
830 |
- ggplot2::aes_string(x = colnames(line.data)[2], |
|
831 |
- y = colnames(line.data)[3], |
|
832 |
- group = "edge"), |
|
833 |
- inherit.aes = FALSE) |
|
834 |
- # Text labeling code credit: scater |
|
835 |
- reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE, |
|
836 |
- useReducedDim)) |
|
837 |
- text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters", |
|
838 |
- search = "colData") |
|
839 |
- by_text_x <- vapply(split(reddim[,1], text_out$val), |
|
840 |
- stats::median, FUN.VALUE = 0) |
|
841 |
- by_text_y <- vapply(split(reddim[,2], text_out$val), |
|
842 |
- stats::median, FUN.VALUE = 0) |
|
843 |
- g <- g + ggrepel::geom_text_repel( |
|
844 |
- data = data.frame(x = by_text_x, |
|
845 |
- y = by_text_y, |
|
846 |
- label = names(by_text_x)), |
|
847 |
- mapping = ggplot2::aes_string(x = "x", |
|
848 |
- y = "y", |
|
849 |
- label = "label"), |
|
850 |
- inherit.aes = FALSE |
|
851 |
- ) |
|
852 |
- plotList[[f]] <- g |
|
853 |
- } |
|
816 |
+ features <- stats::na.omit(features) |
|
817 |
+ for (f in features) { |
|
818 |
+ # Should not enter the loop if features is length zero after NA omit |
|
819 |
+ g <- plotSCEDimReduceFeatures(inSCE, feature = f, |
|
820 |
+ useAssay = useAssay, |
|
821 |
+ featureLocation = by,dim1 = 1, dim2 = 2, |
|
822 |
+ featureDisplay = featureDisplay, |
|
823 |
+ reducedDimName = useReducedDim, |
|
824 |
+ title = f) + |
|
825 |
+ ggplot2::geom_line(data = line.data, |
|
826 |
+ ggplot2::aes_string(x = colnames(line.data)[2], |
|
827 |
+ y = colnames(line.data)[3], |
|
828 |
+ group = "edge"), |
|
829 |
+ inherit.aes = FALSE) |
|
830 |
+ # Text labeling code credit: scater |
|
831 |
+ reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE, |
|
832 |
+ useReducedDim)) |
|
833 |
+ text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters", |
|
834 |
+ search = "colData") |
|
835 |
+ by_text_x <- vapply(split(reddim[,1], text_out$val), |
|
836 |
+ stats::median, FUN.VALUE = 0) |
|
837 |
+ by_text_y <- vapply(split(reddim[,2], text_out$val), |
|
838 |
+ stats::median, FUN.VALUE = 0) |
|
839 |
+ g <- g + ggrepel::geom_text_repel( |
|
840 |
+ data = data.frame(x = by_text_x, |
|
841 |
+ y = by_text_y, |
|
842 |
+ label = names(by_text_x)), |
|
843 |
+ mapping = ggplot2::aes_string(x = "x", |
|
844 |
+ y = "y", |
|
845 |
+ label = "label"), |
|
846 |
+ inherit.aes = FALSE, na.rm = TRUE, |
|
847 |
+ ) |
|
848 |
+ plotList[[f]] <- g |
|
854 | 849 |
} |
855 | 850 |
if (combinePlot == "all") { |
856 | 851 |
if (length(plotList) > 0) |
... | ... |
@@ -858,3 +853,22 @@ plotTSCANDimReduceFeatures <- function( |
858 | 853 |
} |
859 | 854 |
return(plotList) |
860 | 855 |
} |
856 |
+ |
|
857 |
+#' Extract edges that connects to the clusters of interest |
|
858 |
+#' @param line.data data.frame of three columns. First column should be named |
|
859 |
+#' "edge", the other two are coordinates of one of the vertices that this |
|
860 |
+#' edge connects. |
|
861 |
+#' @param useCluster Vector of clusters, that are going to be the vertices for |
|
862 |
+#' edge finding. |
|
863 |
+#' @return data.frame, subset rows of line.data |
|
864 |
+.getClustersLineData <- function(line.data, useCluster) { |
|
865 |
+ idx <- vapply(useCluster, function(x){ |
|
866 |
+ grepl(paste0("^", x, "--"), line.data$edge) | |
|
867 |
+ grepl(paste0("--", x, "$"), line.data$edge) |
|
868 |
+ }, logical(nrow(line.data))) |
|
869 |
+ # `idx` returned was c("matrix", "array") class. Each column is a logical |
|
870 |
+ # vector for edge selection of each cluster. |
|
871 |
+ # Next, do "or" for each row. |
|
872 |
+ idx <- rowSums(idx) > 0 |
|
873 |
+ line.data[idx,] |
|
874 |
+} |
... | ... |
@@ -21,7 +21,7 @@ |
21 | 21 |
#' \code{listTSCANTerminalNodes}. |
22 | 22 |
#' |
23 | 23 |
#' When \code{analysisName = "ClusterDEAnalysis"}, returns the list result from |
24 |
-#' \code{\link{runTSCANBranchDEG}}. Here \code{pathName} needs to match |
|
24 |
+#' \code{\link{runTSCANClusterDEAnalysis}}. Here \code{pathName} needs to match |
|
25 | 25 |
#' with the \code{useCluster} argument when running the algorithm. |
26 | 26 |
#' @export |
27 | 27 |
#' @rdname getTSCANResults |
... | ... |
@@ -50,10 +50,11 @@ setMethod("getTSCANResults", signature(x = "SingleCellExperiment"), |
50 | 50 |
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime |
51 | 51 |
else { |
52 | 52 |
if (is.null(pathName)) { |
53 |
- stop("Have to specify `pathName` for TSCAN DE analyses") |
|
53 |
+ results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]] |
|
54 |
+ } else { |
|
55 |
+ pathName <- as.character(pathName) |
|
56 |
+ results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] |
|
54 | 57 |
} |
55 |
- pathName <- as.character(pathName) |
|
56 |
- results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] |
|
57 | 58 |
} |
58 | 59 |
return(results) |
59 | 60 |
}) |
... | ... |
@@ -161,7 +162,9 @@ runTSCAN <- function(inSCE, |
161 | 162 |
stop("`cluster` should either has length equal to `ncol(inSCE)` or be a ", |
162 | 163 |
"colData variable.") |
163 | 164 |
} |
164 |
- |
|
165 |
+ if (length(unique(cluster)) == 1) { |
|
166 |
+ stop("Only one cluster found. Unable to calculate.") |
|
167 |
+ } |
|
165 | 168 |
message(date(), " ... Running TSCAN to estimate pseudotime") |
166 | 169 |
inSCE <- scran::computeSumFactors(inSCE, clusters = cluster) |
167 | 170 |
by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = cluster) |
... | ... |
@@ -205,12 +208,12 @@ runTSCAN <- function(inSCE, |
205 | 208 |
edgeList <- mst |
206 | 209 |
for (i in seq_along(unique(colData(inSCE)$TSCAN_clusters))){ |
207 | 210 |
degree <- sum(edgeList[i] != 0) |
208 |
- if (degree > 2) branchClusters <- c(branchClusters, i) |
|
211 |
+ if (degree > 1) branchClusters <- c(branchClusters, i) |
|
209 | 212 |
} |
210 | 213 |
|
211 | 214 |
## Save these results in a list and then make S4 accessor that passes the |
212 | 215 |
## entire list |
213 |
- result <- list(pseudo = tscan.pseudo, mst = mst, clusters = cluster, |
|
216 |
+ result <- list(pseudo = tscan.pseudo, mst = mst, |
|
214 | 217 |
maptscan = map.tscan, |
215 | 218 |
pathClusters = pathClusters, |
216 | 219 |
branchClusters = branchClusters, |
... | ... |
@@ -246,7 +249,8 @@ runTSCAN <- function(inSCE, |
246 | 249 |
plotTSCANResults <- function(inSCE, useReducedDim = "UMAP") { |
247 | 250 |
|
248 | 251 |
results <- getTSCANResults(inSCE, analysisName = "Pseudotime") |
249 |
- by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = results$clusters) |
|
252 |
+ clusters <- colData(inSCE)$TSCAN_clusters |
|
253 |
+ by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters) |
|
250 | 254 |
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
251 | 255 |
clusters = NULL, use.dimred = useReducedDim) |
252 | 256 |
|
... | ... |
@@ -543,12 +547,12 @@ plotTSCANPseudotimeGenes <- function(inSCE, |
543 | 547 |
#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
544 | 548 |
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
545 | 549 |
#' useReducedDim = "PCA_logcounts") |
546 |
-#' mouseBrainSubsetSCE <- runTSCANBranchDEG(inSCE = mouseBrainSubsetSCE, |
|
550 |
+#' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE, |
|
547 | 551 |
#' useCluster = 1) |
548 |
-runTSCANBranchDEG <- function(inSCE, |
|
549 |
- useCluster, |
|
550 |
- useAssay = "logcounts", |
|
551 |
- fdrThreshold = 0.05){ |
|
552 |
+runTSCANClusterDEAnalysis <- function(inSCE, |
|
553 |
+ useCluster, |
|
554 |
+ useAssay = "logcounts", |
|
555 |
+ fdrThreshold = 0.05) { |
|
552 | 556 |
results <- getTSCANResults(inSCE, analysisName = "Pseudotime") |
553 | 557 |
message(date(), " ... Finding DEG between TSCAN branches") |
554 | 558 |
tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst, |
... | ... |
@@ -637,14 +641,15 @@ runTSCANBranchDEG <- function(inSCE, |
637 | 641 |
#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
638 | 642 |
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
639 | 643 |
#' useReducedDim = "PCA_logcounts") |
640 |
-#' plotTSCANBranchPseudo(mouseBrainSubsetSCE, useCluster = 1, |
|
641 |
-#' useReducedDim = "TSNE_logcounts") |
|
642 |
-plotTSCANBranchPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
|
644 |
+#' plotTSCANClusterPseudo(mouseBrainSubsetSCE, useCluster = 1, |
|
645 |
+#' useReducedDim = "TSNE_logcounts") |
|
646 |
+plotTSCANClusterPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
|
643 | 647 |
combinePlot = c("all", "none")){ |
644 | 648 |
# Get the plotting info of edges |
645 | 649 |
results <- getTSCANResults(inSCE, analysisName = "Pseudotime") |
646 | 650 |
combinePlot <- match.arg(combinePlot) |
647 |
- by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = results$clusters) |
|
651 |
+ clusters <- colData(inSCE)$TSCAN_clusters |
|
652 |
+ by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters) |
|
648 | 653 |
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
649 | 654 |
clusters = NULL, use.dimred = useReducedDim) |
650 | 655 |
line.data.sub <- line.data[grepl(paste0("^", useCluster, "--"), |
... | ... |
@@ -681,20 +686,20 @@ plotTSCANBranchPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
681 | 686 |
### plot gene of interest in branch cluster |
682 | 687 |
################################################### |
683 | 688 |
|
684 |
-#' @title Plot features identified by \code{\link{runTSCANBranchDEG}} on cell |
|
685 |
-#' 2D embedding with MST overlaid |
|
689 |
+#' @title Plot features identified by \code{\link{runTSCANClusterDEAnalysis}} on |
|
690 |
+#' cell 2D embedding with MST overlaid |
|
686 | 691 |
#' @description A wrapper function which plot the top features expression |
687 |
-#' identified by \code{\link{runTSCANBranchDEG}} on the 2D embedding of the |
|
688 |
-#' cells cluster used in the analysis. The related MST edges are overlaid. |
|
692 |
+#' identified by \code{\link{runTSCANClusterDEAnalysis}} on the 2D embedding of |
|
693 |
+#' the cells cluster used in the analysis. The related MST edges are overlaid. |
|
689 | 694 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
690 | 695 |
#' @param useCluster Choose a cluster used for identifying DEG with |
691 |
-#' \code{\link{runTSCANBranchDEG}}. Required. |
|
696 |
+#' \code{\link{runTSCANClusterDEAnalysis}}. Required. |
|
692 | 697 |
#' @param useReducedDim A single character for the matrix of 2D embedding. |
693 | 698 |
#' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}. |
694 | 699 |
#' @param topN Integer. Use top N genes identified. Default \code{9}. |
695 | 700 |
#' @param useAssay A single character for the feature expression matrix. Should |
696 | 701 |
#' exist in \code{assayNames(inSCE)}. Default \code{NULL} for using the one used |
697 |
-#' in \code{\link{runTSCANBranchDEG}}. |
|
702 |
+#' in \code{\link{runTSCANClusterDEAnalysis}}. |
|
698 | 703 |
#' @param featureDisplay Specify the feature ID type to display. Users can set |
699 | 704 |
#' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or |
700 | 705 |
#' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character |
... | ... |
@@ -703,8 +708,8 @@ plotTSCANBranchPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
703 | 708 |
#' will combine plots of each feature into a single \code{.ggplot} object, |
704 | 709 |
#' while \code{"none"} will output a list of plots. Default \code{"all"}. |
705 | 710 |
#' @return A \code{.ggplot} object of cell scatter plot, colored by the |
706 |
-#' expression of a gene identified by \code{\link{runTSCANBranchDEG}}, with the |
|
707 |
-#' layer of trajectory. |
|
711 |
+#' expression of a gene identified by \code{\link{runTSCANClusterDEAnalysis}}, |
|
712 |
+#' with the layer of trajectory. |
|
708 | 713 |
#' @export |
709 | 714 |
#' @author Yichen Wang |
710 | 715 |
#' @importFrom S4Vectors metadata |
... | ... |
@@ -712,13 +717,14 @@ plotTSCANBranchPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP", |
712 | 717 |
#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
713 | 718 |
#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
714 | 719 |
#' useReducedDim = "PCA_logcounts") |
715 |
-#' mouseBrainSubsetSCE <- runTSCANBranchDEG(inSCE = mouseBrainSubsetSCE, |
|
716 |
-#' useCluster = 1) |
|
717 |
-#' plotTSCANBranchDEG(mouseBrainSubsetSCE, useCluster = 1, |
|
718 |
-#' useReducedDim = "TSNE_logcounts") |
|
719 |
-plotTSCANBranchDEG <- function( |
|
720 |
+#' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE, |
|
721 |
+#' useCluster = 1) |
|
722 |
+#' plotTSCANClusterDEG(mouseBrainSubsetSCE, useCluster = 1, |
|
723 |
+#' useReducedDim = "TSNE_logcounts") |
|
724 |
+plotTSCANClusterDEG <- function( |
|
720 | 725 |
inSCE, |
721 | 726 |
useCluster, |
727 |
+ pathIndex = NULL, |
|
722 | 728 |
useReducedDim = "UMAP", |
723 | 729 |
topN = 9, |
724 | 730 |
useAssay = NULL, |
... | ... |
@@ -731,12 +737,17 @@ plotTSCANBranchDEG <- function( |
731 | 737 |
pathName = useCluster) |
732 | 738 |
if (is.null(DEResults)) { |
733 | 739 |
stop("Branch DE result of specified cluster not found. Run ", |
734 |
- "`runTSCANBranchDEG()` with the same `useCluster` first.") |
|
740 |
+ "`runTSCANClusterDEAnalysis()` with the same `useCluster` first.") |
|
735 | 741 |
} |
736 | 742 |
combinePlot <- match.arg(combinePlot) |
737 |
- deg <- do.call(rbind, DEResults$DEgenes) |
|
738 |
- deg <- deg[order(deg$FDR),] |
|
739 |
- features <- rownames(deg)[seq(min(topN, nrow(deg)))] |
|
743 |
+ if (is.null(pathIndex)) { |
|
744 |
+ deg <- do.call(rbind, DEResults$DEgenes) |
|
745 |
+ deg <- deg[order(deg$FDR),] |
|
746 |
+ } else { |
|
747 |
+ pathIndex <- as.character(pathIndex) |
|
748 |
+ deg <- DEResults$DEgenes[[pathIndex]] |
|
749 |
+ } |
|
750 |
+ features <- rownames(deg)[seq(min(topN, nrow(deg), na.rm = TRUE))] |
|
740 | 751 |
if (is.null(useAssay)) useAssay <- DEResults$useAssay |
741 | 752 |
plotTSCANDimReduceFeatures(inSCE = inSCE, features = features, |
742 | 753 |
useReducedDim = useReducedDim, |
... | ... |
@@ -794,7 +805,8 @@ plotTSCANDimReduceFeatures <- function( |
794 | 805 |
if (!is.null(useCluster) && length(useCluster) != 1) |
795 | 806 |
stop("`useCluster`: can only use one cluster.") |
796 | 807 |
combinePlot <- match.arg(combinePlot) |
797 |
- by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = results$clusters) |
|
808 |
+ clusters <- colData(inSCE)$TSCAN_clusters |
|
809 |
+ by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters) |
|
798 | 810 |
line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
799 | 811 |
clusters = NULL, use.dimred = useReducedDim) |
800 | 812 |
if (!is.null(useCluster)) { |
... | ... |
@@ -802,44 +814,46 @@ plotTSCANDimReduceFeatures <- function( |
802 | 814 |
line.data$edge) | |
803 | 815 |
grepl(paste0("--", useCluster, "$"), |
804 | 816 |
line.data$edge),] |
805 |
- inSCE <- inSCE[, colData(inSCE)$TSCAN_clusters %in% useCluster] |
|
817 |
+ inSCE <- inSCE[, clusters %in% useCluster] |
|
806 | 818 |
} |
807 | 819 |
if (by == "rownames") by <- NULL |
808 |
- if (!is.null(featureDisplay) && |
|
809 |
- featureDisplay == "rownames") |
|
810 |
- featureDisplay <- NULL |
|
811 | 820 |
plotList <- list() |
812 |
- for (f in features) { |
|
813 |
- g <- plotSCEDimReduceFeatures(inSCE, feature = f, |
|
814 |
- featureLocation = by, |
|
815 |
- featureDisplay = featureDisplay, |
|
816 |
- reducedDimName = useReducedDim, |
|
817 |
- title = f) + |
|
818 |
- ggplot2::geom_line(data = line.data, |
|
819 |
- ggplot2::aes_string(x = colnames(line.data)[2], |
|
820 |
- y = colnames(line.data)[3]), |
|
821 |
- inherit.aes = FALSE) |
|
822 |
- # Text labeling code credit: scater |
|
823 |
- reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE, |
|
824 |
- useReducedDim)) |
|
825 |
- text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters", |
|
826 |
- search = "colData") |
|
827 |
- by_text_x <- vapply(split(reddim[,1], text_out$val), |
|
828 |
- stats::median, FUN.VALUE = 0) |
|
829 |
- by_text_y <- vapply(split(reddim[,2], text_out$val), |
|
830 |
- stats::median, FUN.VALUE = 0) |
|
831 |
- g <- g + ggrepel::geom_text_repel( |
|
832 |
- data = data.frame(x = by_text_x, |
|
833 |
- y = by_text_y, |
|
834 |
- label = names(by_text_x)), |
|
835 |
- mapping = ggplot2::aes_string(x = "x", |
|
836 |
- y = "y", |
|
837 |
- label = "label"), |
|
838 |
- inherit.aes = FALSE |
|
839 |
- ) |
|
840 |
- plotList[[f]] <- g |
|
821 |
+ if (!is.na(features)) { |
|
822 |
+ for (f in features) { |
|
823 |
+ g <- plotSCEDimReduceFeatures(inSCE, feature = f, |
|
824 |
+ useAssay = useAssay, |
|
825 |
+ featureLocation = by,dim1 = 1, dim2 = 2, |
|
826 |
+ featureDisplay = featureDisplay, |
|
827 |
+ reducedDimName = useReducedDim, |
|
828 |
+ title = f) + |
|
829 |
+ ggplot2::geom_line(data = line.data, |
|
830 |
+ ggplot2::aes_string(x = colnames(line.data)[2], |
|
831 |
+ y = colnames(line.data)[3], |
|
832 |
+ group = "edge"), |
|
833 |
+ inherit.aes = FALSE) |
|
834 |
+ # Text labeling code credit: scater |
|
835 |
+ reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE, |
|
836 |
+ useReducedDim)) |
|
837 |
+ text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters", |
|
838 |
+ search = "colData") |
|
839 |
+ by_text_x <- vapply(split(reddim[,1], text_out$val), |
|
840 |
+ stats::median, FUN.VALUE = 0) |
|
841 |
+ by_text_y <- vapply(split(reddim[,2], text_out$val), |
|
842 |
+ stats::median, FUN.VALUE = 0) |
|
843 |
+ g <- g + ggrepel::geom_text_repel( |
|
844 |
+ data = data.frame(x = by_text_x, |
|
845 |
+ y = by_text_y, |
|
846 |
+ label = names(by_text_x)), |
|
847 |
+ mapping = ggplot2::aes_string(x = "x", |
|
848 |
+ y = "y", |
|
849 |
+ label = "label"), |
|
850 |
+ inherit.aes = FALSE |
|
851 |
+ ) |
|
852 |
+ plotList[[f]] <- g |
|
853 |
+ } |
|
841 | 854 |
} |
842 | 855 |
if (combinePlot == "all") { |
856 |
+ if (length(plotList) > 0) |
|
843 | 857 |
plotList <- cowplot::plot_grid(plotlist = plotList) |
844 | 858 |
} |
845 | 859 |
return(plotList) |
... | ... |
@@ -1,61 +1,82 @@ |
1 | 1 |
################################################### |
2 | 2 |
### Setting accessor functions |
3 | 3 |
################################################### |
4 |
-#' @title getTSCANResults accessor function |
|
4 |
+#' @title getTSCANResults accessor function |
|
5 |
+#' @description SCTK allows user to access all TSCAN related results with |
|
6 |
+#' \code{"getTSCANResults"}. See details. |
|
5 | 7 |
#' @param x Input \linkS4class{SingleCellExperiment} object. |
6 |
-#' @param analysisName Algorithm name implemented |
|
7 |
-#' @param pathName Sub folder name within the \code{analysisName} |
|
8 |
-#' @param value Value to be stored within the \code{pathName} or |
|
8 |
+#' @param analysisName Algorithm name implemented, should be one of |
|
9 |
+#' \code{"Pseudotime"}, \code{"DEG"}, or \code{"ClusterDEAnalysis"}. |
|
10 |
+#' @param pathName Sub folder name within the \code{analysisName}. See details. |
|
11 |
+#' @param value Value to be stored within the \code{pathName} or |
|
9 | 12 |
#' \code{analysisName} |
13 |
+#' @details |
|
14 |
+#' When \code{analysisName = "Pseudotime"}, returns the list result from |
|
15 |
+#' \code{\link{runTSCAN}}, including the MST structure. |
|
16 |
+#' |
|
17 |
+#' When \code{analysisName = "DEG"}, returns the list result from |
|
18 |
+#' \code{\link{runTSCANDEG}}, including \code{DataFrame}s containing genes that |
|
19 |
+#' increase/decrease along each the pseudotime paths. \code{pathName} indicates |
|
20 |
+#' the path index, the available options of which can be listed by |
|
21 |
+#' \code{listTSCANTerminalNodes}. |
|
22 |
+#' |
|
23 |
+#' When \code{analysisName = "ClusterDEAnalysis"}, returns the list result from |
|
24 |
+#' \code{\link{runTSCANBranchDEG}}. Here \code{pathName} needs to match |
|
25 |
+#' with the \code{useCluster} argument when running the algorithm. |
|
10 | 26 |
#' @export |
11 | 27 |
#' @rdname getTSCANResults |
12 | 28 |
#' @return Get or set TSCAN results |
29 |
+#' @examples |
|
30 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
31 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
32 |
+#' useReducedDim = "PCA_logcounts") |
|
33 |
+#' results <- getTSCANResults(mouseBrainSubsetSCE, "Pseudotime") |
|
13 | 34 |
setGeneric("getTSCANResults", signature = "x", |
14 |
- function(x, analysisName=NULL, pathName = NULL) { |
|
35 |
+ function(x, analysisName = NULL, pathName = NULL) { |
|
15 | 36 |
standardGeneric("getTSCANResults") |
16 | 37 |
} |
17 | 38 |
) |
18 | 39 |
|
19 | 40 |
#' @export |
20 | 41 |
#' @rdname getTSCANResults |
21 |
-#' |
|
22 |
-setMethod("getTSCANResults", signature(x = "SingleCellExperiment"), |
|
23 |
- function(x, analysisName=NULL, pathName = NULL){ |
|
42 |
+#' |
|
43 |
+setMethod("getTSCANResults", signature(x = "SingleCellExperiment"), |
|
44 |
+ function(x, analysisName = NULL, pathName = NULL){ |
|
24 | 45 |
result.names <- listTSCANResults(x) |
25 | 46 |
if(!analysisName %in% result.names) { |
26 |
- stop("The analysis was not found in the results for tool 'Trajectory'") |
|
27 |
- } |
|
28 |
- if (is.null(pathName)){ |
|
29 |
- results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]] |
|
30 |
- |
|
47 |
+ stop("The analysis was not found for TSCAN results") |
|
31 | 48 |
} |
32 |
- else{ |
|
49 |
+ if (analysisName == "Pseudotime") |
|
50 |
+ results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime |
|
51 |
+ else { |
|
52 |
+ if (is.null(pathName)) { |
|
53 |
+ stop("Have to specify `pathName` for TSCAN DE analyses") |
|
54 |
+ } |
|
55 |
+ pathName <- as.character(pathName) |
|
33 | 56 |
results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] |
34 |
- |
|
35 | 57 |
} |
36 | 58 |
return(results) |
37 | 59 |
}) |
38 | 60 |
|
39 | 61 |
#' @export |
40 | 62 |
#' @rdname getTSCANResults |
41 |
-#' |
|
42 |
-setGeneric("getTSCANResults<-", |
|
43 |
- function(x, analysisName, pathName = NULL, value) |
|
63 |
+#' |
|
64 |
+setGeneric("getTSCANResults<-", |
|
65 |
+ function(x, analysisName, pathName = NULL, value) |
|
44 | 66 |
standardGeneric("getTSCANResults<-") |
45 | 67 |
) |
46 | 68 |
|
47 | 69 |
#' @export |
48 | 70 |
#' @rdname getTSCANResults |
49 |
-setReplaceMethod("getTSCANResults", signature(x = "SingleCellExperiment"), |
|
71 |
+setReplaceMethod("getTSCANResults", signature(x = "SingleCellExperiment"), |
|
50 | 72 |
function(x, analysisName, pathName = NULL, value) { |
51 |
- |
|
52 |
- if (is.null(pathName)){ |
|
53 |
- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]] <- value |
|
54 |
- |
|
55 |
- } |
|
56 |
- else{ |
|
73 |
+ if (analysisName == "Pseudotime") |
|
74 |
+ S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime <- value |
|
75 |
+ else { |
|
76 |
+ if (is.null(pathName)) |
|
77 |
+ stop("Have to specify `pathName` for TSCAN DE analyses") |
|
78 |
+ pathName <- as.character(pathName) |
|
57 | 79 |
S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] <- value |
58 |
- |
|
59 | 80 |
} |
60 | 81 |
return(x) |
61 | 82 |
}) |
... | ... |
@@ -70,810 +91,756 @@ setGeneric("listTSCANResults", signature = "x", |
70 | 91 |
#' @rdname getTSCANResults |
71 | 92 |
setMethod("listTSCANResults", "SingleCellExperiment", function(x){ |
72 | 93 |
all.results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN |
73 |
- if(is.null(all.results) || length(names(all.results)) == 0) { |
|
74 |
- stop("No results from 'Trajectory' are found. Please run `runTSCAN` first.") |
|
94 |
+ if (is.null(all.results) || |
|
95 |
+ !"Pseudotime" %in% names(all.results)) { |
|
96 |
+ stop("No TSCAN result found. Please run `runTSCAN` first.") |
|
75 | 97 |
} |
76 | 98 |
return(names(all.results)) |
77 | 99 |
}) |
78 | 100 |
|
79 | 101 |
#' @export |
80 | 102 |
#' @rdname getTSCANResults |
81 |
-#' |
|
82 | 103 |
setGeneric("listTSCANTerminalNodes", signature = "x", |
83 |
- function(x, analysisName=NULL, pathName = NULL) { |
|
104 |
+ function(x) { |
|
84 | 105 |
standardGeneric("listTSCANTerminalNodes") |
85 | 106 |
} |
86 | 107 |
) |
87 | 108 |
|
88 | 109 |
#' @export |
89 | 110 |
#' @rdname getTSCANResults |
90 |
-#' |
|
91 |
-setMethod("listTSCANTerminalNodes", signature(x = "SingleCellExperiment"), |
|
92 |
- function(x, analysisName=NULL, pathName = NULL){ |
|
93 |
- |
|
94 |
- result.names <- listTSCANResults(x) |
|
95 |
- if(!analysisName %in% result.names) { |
|
96 |
- stop("The analysis was not found in the results for tool 'Trajectory'") |
|
97 |
- } |
|
98 |
- |
|
99 |
- results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][["pathClusters"]] |
|
111 |
+setMethod("listTSCANTerminalNodes", signature(x = "SingleCellExperiment"), |
|
112 |
+ function(x){ |
|
113 |
+ if (is.null(S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime)) |
|
114 |
+ stop("No TSCAN result found. Please run `runTSCAN()` first.") |
|
115 |
+ results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime$pathClusters |
|
100 | 116 |
return(names(results)) |
101 |
- |
|
102 | 117 |
}) |
103 | 118 |
|
104 | 119 |
################################################### |
105 | 120 |
### STEP 1:: creating cluster and MST |
106 | 121 |
################################################### |
107 | 122 |
|
108 |
-#' @title Run runTSCAN function to obtain pseudotime values for cells |
|
109 |
-#' @description Wrapper for obtaining a pseudotime ordering of the cells by |
|
110 |
-#' projecting them onto the MST |
|
111 |
-#' |
|
123 |
+#' @title Run TSCAN to obtain pseudotime values for cells |
|
124 |
+#' @description Wrapper for obtaining a pseudotime ordering of the cells by |
|
125 |
+#' projecting them onto the minimum spanning tree (MST) |
|
112 | 126 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
113 |
-#' @param cluster Grouping for each cell in \code{inSCE}. A user may input a |
|
114 |
-#' vector equal length to the number of the samples in \code{inSCE}, or can be |
|
115 |
-#' retrieved from the \code{colData} slot. Default \code{NULL}. |
|
116 |
-#' @param seed An integer. Set the seed for random process that happens only in |
|
117 |
-#' "random" generation. Default \code{12345}. |
|
118 |
-#' @param useReducedDim Character. Saved dimension reduction name in |
|
119 |
-#' \code{inSCE} object. Required. Used for specifying which low-dimension |
|
120 |
-#' representation to perform the clustering algorithm and building nearest |
|
121 |
-#' neighbor graph on. Default \code{"PCA"} |
|
122 |
-#' |
|
123 |
-#' @return A \linkS4class{SingleCellExperiment} object with pseudotime ordering |
|
124 |
-#' of the cells along the paths |
|
125 |
-#' @export |
|
127 |
+#' @param useReducedDim Character. A low-dimension representation in |
|
128 |
+#' \code{reducedDims}, will be used for both clustering if \code{cluster} not |
|
129 |
+#' specified and MST construction. Default \code{"PCA"}. |
|
130 |
+#' @param cluster Grouping for each cell in \code{inSCE}. A vector with equal |
|
131 |
+#' length to the number of the cells in \code{inSCE}, or a single character for |
|
132 |
+#' retriving \code{colData} variable. Default \code{NULL}, will run |
|
133 |
+#' \code{runScranSNN} to obtain. |
|
134 |
+#' @param seed An integer. Random seed for clustering if \code{cluster} is not |
|
135 |
+#' specified. Default \code{12345}. |
|
136 |
+#' @return The input \code{inSCE} object with pseudotime ordering of the cells |
|
137 |
+#' along the paths and the cluster label stored in \code{colData}, and other |
|
138 |
+#' unstructured information in \code{metadata}. |
|
139 |
+#' @export |
|
126 | 140 |
#' @author Nida Pervaiz |
127 |
-#' @importFrom utils head |
|
128 | 141 |
#' @examples |
129 |
-#' data("scExample", package = "singleCellTK") |
|
130 |
-#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
131 |
-#' rowData(sce)$Symbol <- rowData(sce)$feature_name |
|
132 |
-#' rownames(sce) <- rowData(sce)$Symbol |
|
133 |
-#' sce <- runNormalization(sce, |
|
134 |
-#' normalizationMethod = "LogNormalize", |
|
135 |
-#' useAssay = "counts", |
|
136 |
-#' outAssayName = "logcounts") |
|
137 |
-#' sce <- runDimReduce(inSCE = sce, |
|
138 |
-#' method = "scaterPCA", |
|
139 |
-#' useAssay = "logcounts", |
|
140 |
-#' reducedDimName = "PCA") |
|
141 |
-#' sce <- runDimReduce(inSCE = sce, |
|
142 |
-#' method = "rTSNE", |
|
143 |
-#' useReducedDim = "PCA", |
|
144 |
-#' reducedDimName = "TSNE") |
|
145 |
-#' sce <- runTSCAN (inSCE = sce, |
|
146 |
-#' useReducedDim = "PCA", |
|
147 |
-#' seed = NULL) |
|
148 |
-runTSCAN <- function(inSCE, |
|
149 |
- useReducedDim, |
|
150 |
- cluster = NULL, |
|
151 |
- seed = 12345) { |
|
152 |
- |
|
153 |
- if (is.null(seed)) { |
|
154 |
- if(is.null(cluster)){ |
|
155 |
- inSCE <- runScranSNN(inSCE, useReducedDim = useReducedDim) |
|
156 |
- cluster <- colData(inSCE)$"scranSNN_cluster" |
|
142 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
143 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
144 |
+#' useReducedDim = "PCA_logcounts") |
|
145 |
+runTSCAN <- function(inSCE, |
|
146 |
+ useReducedDim = "PCA", |
|
147 |
+ cluster = NULL, |
|
148 |
+ seed = 12345) { |
|
149 |
+ if (is.null(cluster)) { |
|
150 |
+ # DON'T RETURN TO `inSCE` |
|
151 |
+ # It really overwrites existing cluster labels |
|
152 |
+ sce <- runScranSNN(inSCE, useReducedDim = useReducedDim, seed = seed) |
|
153 |
+ cluster <- colData(sce)$scranSNN_cluster |
|
154 |
+ rm(sce) |
|
155 |
+ } else if (length(cluster) == 1 & is.character(cluster)) { |
|
156 |
+ if (!cluster %in% names(colData(inSCE))) { |
|
157 |
+ stop("Specified cluster not found in `colData(inSCE)`.") |
|
157 | 158 |
} |
159 |
+ cluster <- colData(inSCE)[[cluster]] |
|
160 |
+ } else if (length(cluster) != ncol(inSCE)) { |
|
161 |
+ stop("`cluster` should either has length equal to `ncol(inSCE)` or be a ", |
|
162 |
+ "colData variable.") |
|
158 | 163 |
} |
159 |
- else{ |
|
160 |
- withr::with_seed( |
|
161 |
- seed, |
|
162 |
- if(is.null(cluster)){ |
|
163 |
- inSCE <- runScranSNN(inSCE, useReducedDim = useReducedDim) |
|
164 |
- cluster <- colData(inSCE)$"scranSNN_cluster" |
|
165 |
- }) |
|
166 |
- } |
|
164 |
+ |
|
165 |
+ message(date(), " ... Running TSCAN to estimate pseudotime") |
|
167 | 166 |
inSCE <- scran::computeSumFactors(inSCE, clusters = cluster) |
168 |
- by.cluster <- scuttle::aggregateAcrossCells(inSCE , ids = cluster) |
|
169 |
- centroids <- SingleCellExperiment::reducedDim(by.cluster, useReducedDim) |
|
170 |
- mst <- TSCAN::createClusterMST(centroids, clusters = NULL) |
|
171 |
- |
|
172 |
- # Map each cell to the closest edge on the MST, reporting also the distance to |
|
167 |
+ by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = cluster) |
|
168 |
+ centroids <- SingleCellExperiment::reducedDim(by.cluster, useReducedDim) |
|
169 |
+ mst <- TSCAN::createClusterMST(centroids, clusters = NULL) |
|
170 |
+ |
|
171 |
+ # Map each cell to the closest edge on the MST, reporting also the distance to |
|
173 | 172 |
# the corresponding vertices. |
174 |
- map.tscan <- TSCAN::mapCellsToEdges(inSCE , mst = mst, |
|
175 |
- use.dimred = useReducedDim , |
|
173 |
+ map.tscan <- TSCAN::mapCellsToEdges(inSCE , mst = mst, |
|
174 |
+ use.dimred = useReducedDim, |
|
176 | 175 |
clusters = cluster) |
177 |
- |
|
178 |
- # Compute a pseudotime for each cell lying on each path through the MST from a |
|
176 |
+ |
|
177 |
+ # Compute a pseudotime for each cell lying on each path through the MST from a |
|
179 | 178 |
# given starting node. |
180 | 179 |
tscan.pseudo <- TSCAN::orderCells(map.tscan, mst) |
181 |
- common.pseudo <- TrajectoryUtils::averagePseudotime(tscan.pseudo) |
|
182 |
- |
|
183 |
- colData(inSCE)$"TSCAN_clusters" <- cluster |
|
184 |
- colData(inSCE)$"TSCAN_pseudotime" <- common.pseudo |
|
185 |
- |
|
180 |
+ common.pseudo <- TrajectoryUtils::averagePseudotime(tscan.pseudo) |
|
181 |
+ |
|
182 |
+ colData(inSCE)$TSCAN_clusters <- cluster |
|
183 |
+ colData(inSCE)$TSCAN_pseudotime <- common.pseudo |
|
184 |
+ |
|
186 | 185 |
pathClusters <- list() |
187 | 186 |
pathList <- data.frame() |
188 | 187 |
for(i in seq(ncol(tscan.pseudo))){ |
189 | 188 |
pathIndex = as.character(colnames(tscan.pseudo)[i]) |
190 |
- inSCE[[paste0("Path_",pathIndex,"_pseudotime") ]] <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex] |
|
191 |
- nonNA <- inSCE[,!is.na(inSCE[[paste0("Path_",pathIndex,"_pseudotime") ]])] |
|
192 |
- pathClusters[[pathIndex]] <- (sort(unique(colData(nonNA)$TSCAN_clusters))) |
|
193 |
- |
|
189 |
+ colDataVarName <- paste0("Path_",pathIndex,"_pseudotime") |
|
190 |
+ inSCE[[colDataVarName]] <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex] |
|
191 |
+ nonNA.idx <- !is.na(colData(inSCE)[[colDataVarName]]) |
|
192 |
+ pathClusters[[pathIndex]] <- unique(colData(inSCE)$TSCAN_clusters[nonNA.idx]) |
|
194 | 193 |
pathList[i,1] <- pathIndex |
195 |
- pathList[i,2] <- toString (pathClusters[[pathIndex]]) |
|
196 |
- message("Cluster involved in path index ",pathIndex," are: ", pathClusters[i]) |
|
194 |
+ pathList[i,2] <- toString(pathClusters[[pathIndex]]) |
|
195 |
+ message(date(), " ... Clusters involved in path index ", pathIndex, |
|
196 |
+ " are: ", paste(pathClusters[[i]], collapse = ", ")) |
|
197 | 197 |
} |
198 |
- |
|
198 |
+ |
|
199 | 199 |
maxWidth <- max(stringr::str_length(pathList[, 1])) |
200 |
- choiceList <- pathList[, 1] |
|
201 |
- names(choiceList) <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth, |
|
202 |
- side="right"), |
|
203 |
- "|", pathList[, 2]) |
|
204 |
- |
|
205 |
- branchClustersList <- c() |
|
200 |
+ choiceList <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth, |
|
201 |
+ side="right"), |
|
202 |
+ "|", pathList[, 2]) |
|
203 |
+ |
|
204 |
+ branchClusters <- c() |
|
206 | 205 |
edgeList <- mst |
207 | 206 |
for (i in seq_along(unique(colData(inSCE)$TSCAN_clusters))){ |
208 |
- clustEdges <- sum(edgeList[i] != 0) |
|
209 |
- if(clustEdges > 1){ |
|
210 |
- branchClustersList <- sort(BiocGenerics::append(i,branchClustersList)) |
|
211 |
- } |
|
212 |
- |
|
207 |
+ degree <- sum(edgeList[i] != 0) |
|
208 |
+ if (degree > 2) branchClusters <- c(branchClusters, i) |
|
213 | 209 |
} |
214 |
- |
|
215 |
- ## Save these results in a list and then make S4 accessor that passes the |
|
210 |
+ |
|
211 |
+ ## Save these results in a list and then make S4 accessor that passes the |
|
216 | 212 |
## entire list |
217 |
- result <- list(pseudo = tscan.pseudo, mst = mst, clusters = cluster, |
|
218 |
- by.cluster = by.cluster, maptscan = map.tscan, |
|
219 |
- pathClusters = pathClusters, |
|
220 |
- branchClustersList = branchClustersList, |
|
221 |
- pathIndexList = names(choiceList)) |
|
222 |
- getTSCANResults(inSCE, analysisName = "Pseudotime") <- result |
|
223 |
- |
|
224 |
- message("Number of estimated paths is ", ncol(tscan.pseudo) ) |
|
225 |
- |
|
213 |
+ result <- list(pseudo = tscan.pseudo, mst = mst, clusters = cluster, |
|
214 |
+ maptscan = map.tscan, |
|
215 |
+ pathClusters = pathClusters, |
|
216 |
+ branchClusters = branchClusters, |
|
217 |
+ pathIndexList = choiceList) |
|
218 |
+ getTSCANResults(inSCE, analysisName = "Pseudotime") <- result |
|
219 |
+ |
|
220 |
+ message(date(), " ... Number of estimated paths is ", ncol(tscan.pseudo)) |
|
221 |
+ |
|
226 | 222 |
return (inSCE) |
227 |
-} |
|
223 |
+} |
|
228 | 224 |
|
229 | 225 |
################################################### |
230 | 226 |
### plot pseudotime values |
231 | 227 |
################################################### |
232 | 228 |
|
233 |
-#' @title Plot MST pseudotime values for cells |
|
234 |
-#' @description A wrapper function which visualizes outputs from the |
|
235 |
-#' \code{\link{runTSCAN}} function. Plots the pseudotime ordering of the cells |
|
236 |
-#' by projecting them onto the MST |
|
237 |
-#' |
|
229 |
+#' @title Plot MST pseudotime values on cell 2D embedding |
|
230 |
+#' @description A wrapper function which visualizes outputs from the |
|
231 |
+#' \code{\link{runTSCAN}} function. Plots the pseudotime ordering of the cells |
|
232 |
+#' and project them onto the MST. |
|
238 | 233 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
239 |
-#' @param useReducedDim Saved dimension reduction name in \code{inSCE} object. |
|
234 |
+#' @param useReducedDim Saved dimension reduction name in \code{inSCE} object. |
|
240 | 235 |
#' Required. |
241 |
-#' |
|
242 |
-#' @return A plot with the pseudotime ordering of the cells by projecting them |
|
243 |
-#' onto the MST. |
|
244 |
-#' @export |
|
236 |
+#' @return A \code{.ggplot} object with the pseudotime ordering of the cells |
|
237 |
+#' colored on a cell 2D embedding, and the MST path drawn on it. |
|
238 |
+#' @export |
|
245 | 239 |
#' @author Nida Pervaiz |
246 |
-#' @importFrom utils head |
|
247 | 240 |
#' @examples |
248 |
-#' data("scExample", package = "singleCellTK") |
|
249 |
-#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
250 |
-#' rowData(sce)$Symbol <- rowData(sce)$feature_name |
|
251 |
-#' rownames(sce) <- rowData(sce)$Symbol |
|
252 |
-#' sce <- runNormalization(sce, |
|
253 |
-#' normalizationMethod = "LogNormalize", |
|
254 |
-#' useAssay = "counts", |
|
255 |
-#' outAssayName = "logcounts") |
|
256 |
-#' sce <- runDimReduce(inSCE = sce, |
|
257 |
-#' method = "scaterPCA", |
|
258 |
-#' useAssay = "logcounts", |
|
259 |
-#' reducedDimName = "PCA") |
|
260 |
-#' sce <- runDimReduce(inSCE = sce, |
|
261 |
-#' method = "rTSNE", |
|
262 |
-#' useReducedDim = "PCA", |
|
263 |
-#' reducedDimName = "TSNE") |
|
264 |
-#' sce <- runTSCAN (inSCE = sce, |
|
265 |
-#' useReducedDim = "PCA", |
|
266 |
-#' seed = NULL) |
|
267 |
-#' plotTSCANResults(inSCE = sce, |
|
268 |
-#' useReducedDim = "TSNE") |
|
269 |
- |
|
270 |
-plotTSCANResults <- function(inSCE, |
|
271 |
- useReducedDim){ |
|
272 |
- |
|
241 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
242 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
243 |
+#' useReducedDim = "PCA_logcounts") |
|
244 |
+#' plotTSCANResults(inSCE = mouseBrainSubsetSCE, |
|
245 |
+#' useReducedDim = "TSNE_logcounts") |
|
246 |
+plotTSCANResults <- function(inSCE, useReducedDim = "UMAP") { |
|
247 |
+ |
|
273 | 248 |
results <- getTSCANResults(inSCE, analysisName = "Pseudotime") |
274 |
- line.data <- TSCAN::reportEdges(results$by.cluster, mst = results$mst, |
|
275 |
- clusters = NULL, use.dimred = useReducedDim) |
|
276 |
- |
|
277 |
- a <- b <- c <- NULL |
|
278 |
- a = unlist(line.data[,2]) |
|
279 |
- b = unlist(line.data[,3]) |
|
280 |
- c = unlist(line.data[,1]) |
|
281 |
- |
|
282 |
- scater::plotReducedDim(inSCE, dimred = useReducedDim, |
|
283 |
- colour_by = I(colData(inSCE)$"TSCAN_pseudotime"), |
|
284 |
- text_by = "TSCAN_clusters", text_colour = "black") + |
|
285 |
- ggplot2::geom_path(data = line.data, ggplot2::aes(x = a, y = b, group = c)) |
|
286 |
-} |
|
249 |
+ by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = results$clusters) |
|
250 |
+ line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst, |
|
251 |
+ clusters = NULL, use.dimred = useReducedDim) |
|
287 | 252 |
|
253 |
+ scater::plotReducedDim(inSCE, dimred = useReducedDim, |
|
254 |
+ colour_by = I(colData(inSCE)$TSCAN_pseudotime), |
|
255 |
+ text_by = "TSCAN_clusters", text_colour = "black") + |
|
256 |
+ suppressMessages(ggplot2::scale_colour_viridis_c(name = "Pseudotime")) + |
|
257 |
+ ggplot2::geom_path(data = line.data, |
|
258 |
+ ggplot2::aes_string(x = names(line.data)[2], |
|
259 |
+ y = names(line.data)[3], |
|
260 |
+ group = 'edge')) |
|
261 |
+} |
|
288 | 262 |
|
289 | 263 |
################################################### |
290 | 264 |
### STEP 2:: identify expressive genes |
291 | 265 |
################################################### |
292 |
-#' @title Run runTSCANDEG function to obtain changes along a trajectory |
|
293 |
-#' @description Wrapper for identifying genes with significant changes with |
|
294 |
-#' respect to one |
|
295 |
-#' of the TSCAN pseudotimes |
|
266 |
+#' @title Test gene expression changes along a TSCAN trajectory path |
|
267 |
+#' @description Wrapper for identifying genes with significant changes with |
|
268 |
+#' respect to one of the TSCAN pseudotime paths |
|
296 | 269 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
297 |
-#' @param pathIndex Path index for which the pseudotime values should be used. |
|
298 |
-#' PathIndex corresponds to the terminal node of specific path from the root |
|
299 |
-#' node to the terminal node. |
|
300 |
-#' @param useAssay Character. The name of the assay to use. This assay should |
|
301 |
-#' contain log normalized counts. |
|
302 |
-#' @param discardCluster Optional. Clusters which are not of use or masks other |
|
303 |
-#' interesting effects can be discarded. |
|
304 |
-#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC |
|
305 |
-#' larger than this value. Default Zero |
|
306 |
-#' |
|
307 |
-#' @return A \linkS4class{SingleCellExperiment} object with genes that decrease |
|
308 |
-#' and increase in expression with increasing pseudotime along the path in the |
|
309 |
-#' MST. |
|
310 |
-#' @export |
|
270 |
+#' @param pathIndex Path index for which the pseudotime values should be used. |
|
271 |
+#' This corresponds to the terminal node of specific path from the root |
|
272 |
+#' node to the terminal node. Run \code{listTSCANTerminalNodes(inSCE)} for |
|
273 |
+#' available options. |
|
274 |
+#' @param useAssay Character. The name of the assay to use for testing the |
|
275 |
+#' expression change. Should be log-normalized. Default \code{"logcounts"} |
|
276 |
+#' @param discardCluster Cluster(s) which are not of use or masks other |
|
277 |
+#' interesting effects can be discarded. Default \code{NULL}. |
|
278 |
+#' @return The input \code{inSCE} with results updated in \code{metadata}. |
|
279 |
+#' @export |
|
311 | 280 |
#' @author Nida Pervaiz |
312 |
-#' |
|
313 | 281 |
#' @examples |
314 |
-#' data("scExample", package = "singleCellTK") |
|
315 |
-#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
316 |
-#' rowData(sce)$Symbol <- rowData(sce)$feature_name |
|
317 |
-#' rownames(sce) <- rowData(sce)$Symbol |
|
318 |
-#' sce <- runNormalization(sce, |
|
319 |
-#' normalizationMethod = "LogNormalize", |
|
320 |
-#' useAssay = "counts", |
|
321 |
-#' outAssayName = "logcounts") |
|
322 |
-#' sce <- runDimReduce(inSCE = sce, |
|
323 |
-#' method = "scaterPCA", |
|
324 |
-#' useAssay = "logcounts", |
|
325 |
-#' reducedDimName = "PCA") |
|
326 |
-#' sce <- runDimReduce(inSCE = sce, |
|
327 |
-#' method = "rTSNE", |
|
328 |
-#' useReducedDim = "PCA", |
|
329 |
-#' reducedDimName = "TSNE") |
|
330 |
-#' sce <- runTSCAN (inSCE = sce, |
|
331 |
-#' useReducedDim = "PCA", |
|
332 |
-#' seed = NULL) |
|
333 |
-#' terminalNodes <- listTSCANTerminalNodes(sce, analysisName = "Pseudotime") |
|
334 |
-#' sce <- runTSCANDEG(inSCE = sce, |
|
335 |
-#' pathIndex = terminalNodes[1]) |
|
336 |
- |
|
337 |
-runTSCANDEG <- function(inSCE, |
|
338 |
- pathIndex, |
|
339 |
- useAssay = "logcounts", |
|
340 |
- discardCluster = NULL, |
|
341 |
- log2fcThreshold = 0 ) { |
|
342 |
- |
|
343 |
- |
|
344 |
- if(!is.null(discardCluster)){ |
|
345 |
- for (i in seq_along(discardCluster)){ |
|
346 |
- if (i == 1){ |
|
347 |
- nx <- inSCE[, colData(inSCE)$"TSCAN_clusters" != discardCluster[i]] |
|
348 |
- } |
|
349 |
- else{ |
|
350 |
- nx <- nx[, colData(nx)$"TSCAN_clusters" != discardCluster[i]] |
|
351 |
- |
|
352 |
- } |
|
282 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
283 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
284 |
+#' useReducedDim = "PCA_logcounts") |
|
285 |
+#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE) |
|
286 |
+#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE, |
|
287 |
+#' pathIndex = terminalNodes[1]) |
|
288 |
+runTSCANDEG <- function(inSCE, |
|
289 |
+ pathIndex, |
|
290 |
+ useAssay = "logcounts", |
|
291 |
+ discardCluster = NULL) { |
|
292 |
+ nx <- inSCE |
|
293 |
+ if (!is.null(discardCluster)) { |
|
294 |
+ if (any(!discardCluster %in% unique(colData(nx)$TSCAN_clusters))) { |
|
295 |
+ stop("Not all `discardCluster` exist in TSCAN clusters") |
|
353 | 296 |
} |
354 |
- |
|
355 |
- } |
|
356 |
- else{ |
|
357 |
- nx <- inSCE |
|
297 |
+ nx <- nx[, !(colData(nx)$TSCAN_clusters %in% discardCluster)] |
|
358 | 298 |
} |
359 |
- |
|
360 |
- pseudo <- TSCAN::testPseudotime(nx, |
|
361 |
- pseudotime = nx[[paste0("Path_", pathIndex, |
|
362 |
- "_pseudotime")]], |
|
299 |
+ |
|
300 |
+ pseudo <- TSCAN::testPseudotime(nx, |
|
301 |
+ pseudotime = nx[[paste0("Path_", pathIndex, |
|
302 |
+ "_pseudotime")]], |
|
363 | 303 |
assay.type = useAssay) |
364 |
- pseudo$SYMBOL <- rowData(nx)$Symbol |
|
365 |
- sorted <- pseudo[order(pseudo$p.value),] |
|
366 |
- up.left <- sorted[sorted$logFC < log2fcThreshold,] |
|
367 |
- up.right <- sorted[sorted$logFC > log2fcThreshold,] |
|
368 |
- |
|
369 |
- ## Save these results in a list and then make S4 accessor that passes the |
|
304 |
+ pseudo <- pseudo[order(pseudo$FDR),] |
|
305 |
+ up.left <- pseudo[pseudo$logFC < 0,] |
|
306 |
+ up.right <- pseudo[pseudo$logFC > 0,] |
|
307 |
+ |
|
308 |
+ ## Save these results in a list and then make S4 accessor that passes the |
|
370 | 309 |
## entire list |
371 |
- pathresults <- list(discardClusters = discardCluster, upLeft = up.left, |
|
372 |
- upRight = up.right) |
|
373 |
- getTSCANResults(inSCE, analysisName = "DEG", |
|
310 |
+ pathresults <- list(discardClusters = discardCluster, upLeft = up.left, |
|
311 |
+ upRight = up.right, useAssay = useAssay) |
|
312 |
+ getTSCANResults(inSCE, analysisName = "DEG", |
|
374 | 313 |
pathName = as.character(pathIndex)) <- pathresults |
375 |
- |
|
314 |
+ |
|
376 | 315 |
return (inSCE) |
377 | 316 |
} |
378 | 317 |
|
379 | 318 |
################################################### |
380 | 319 |
### plot heatmap of top genes |
381 | 320 |
################################################### |
382 |
-#' @title Run plotTSCANPseudotimeHeatmap function to plot heatmap for top genes |
|
383 |
-#' @description A wrapper function which visualizes outputs from the |
|
384 |
-#' runTSCANDEG function. Plots the top genes that increase in expression with |
|
385 |
-#' increasing pseudotime along the path in the MST |
|
321 |
+#' @title Plot heatmap of genes with expression change along TSCAN pseudotime |
|
322 |
+#' @description A wrapper function which visualizes outputs from the |
|
323 |
+#' \code{\link{runTSCANDEG}} function. Plots the top genes that change in |
|
324 |
+#' expression with increasing pseudotime along the path in the MST. |
|
325 |
+#' \code{\link{runTSCANDEG}} has to be run in advance with using the same |
|
326 |
+#' \code{pathIndex} of interest. |
|
386 | 327 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
387 |
-#' @param pathIndex Path index for which the pseudotime values should be used. |
|
388 |
-#' PathIndex corresponds to the terminal node of specific path from the root |
|
389 |
-#' node to the terminal node. |
|
390 |
-#' @param topN An integer. Only to plot this number of top genes along the |
|
391 |
-#' path in the MST, in terms of log2FC value. Use NULL to cancel the top N |
|
392 |
-#' subscription. Default 50 |
|
393 |
-#' |
|
394 |
-#' @return A plot with the top genes that increase in expression with increasing |
|
395 |
-#' pseudotime along the path in the MST |
|
396 |
-#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
|
397 |
-#' @param pathIndex Path number for which the pseudotime values should be used. |
|
398 |
-#' PathIndex corresponds to one path from the root node to one of the terminal |
|
399 |
-#' nodes. |
|
400 |
-#' @param topN An integer. Only to plot this number of top genes along the path |
|
401 |
-#' in the MST, in terms of log2FC value. Use \code{NULL} to cancel the top N |
|
402 |
-#' subscription. Default \code{50}. |
|
403 |
-#' @return A plot with the top genes that increase in expression with increasing |
|
404 |
-#' pseudotime along the path in the MST. |
|
405 |
-#' @export |
|
328 |
+#' @param pathIndex Path index for which the pseudotime values should be used. |
|
329 |
+#' Should have being used in \code{\link{runTSCANDEG}}. |
|
330 |
+#' @param direction Should we show features with expression increasing or |
|
331 |
+#' decreeasing along the increase in TSCAN pseudotime? Choices are |
|
332 |
+#' \code{"both"}, \code{"increasing"} or \code{"decreasing"}. |
|
333 |
+#' @param topN An integer. Only to plot this number of top genes along the path |
|
334 |
+#' in the MST, in terms of FDR value. Use \code{NULL} to cancel the top N |
|
335 |
+#' subscription. Default \code{30}. |
|
336 |
+#' @param log2fcThreshold Only output DEGs with the absolute values of log2FC |
|
337 |
+#' larger than this value. Default \code{NULL}. |
|
338 |
+#' @param useAssay A single character to specify a feature expression matrix in |
|
339 |
+#' \code{assays} slot. The expression of top features from here will be |
|
340 |
+#' visualized. Default \code{NULL} use the one used for |
|
341 |
+#' \code{\link{runTSCANDEG}}. |
|
342 |
+#' @param featureDisplay Whether to display feature ID and what ID type to |
|
343 |
+#' display. Users can set default ID type by \code{\link{setSCTKDisplayRow}}. |
|
344 |
+#' \code{NULL} will display when number of features to display is less than 60. |
|
345 |
+#' \code{FALSE} for no display. Variable name in \code{rowData} to indicate ID |
|
346 |
+#' type. \code{"rownames"} or \code{TRUE} for using \code{rownames(inSCE)}. |
|
347 |
+#' @return A ComplexHeatmap in \code{.ggplot} class |
|
348 |
+#' @export |
|
406 | 349 |
#' @author Nida Pervaiz |
407 |
-#' @importFrom utils head |
|
350 |
+#' @importFrom S4Vectors metadata |
|
408 | 351 |
#' @examples |
409 |
-#' data("scExample", package = "singleCellTK") |
|
410 |
-#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
411 |
-#' rowData(sce)$Symbol <- rowData(sce)$feature_name |
|
412 |
-#' rownames(sce) <- rowData(sce)$Symbol |
|
413 |
-#' sce <- runNormalization(sce, |
|
414 |
-#' normalizationMethod = "LogNormalize", |
|
415 |
-#' useAssay = "counts", |
|
416 |
-#' outAssayName = "logcounts") |
|
417 |
-#' sce <- runDimReduce(inSCE = sce, |
|
418 |
-#' method = "scaterPCA", |
|
419 |
-#' useAssay = "logcounts", |
|
420 |
-#' reducedDimName = "PCA") |
|
421 |
-#' sce <- runDimReduce(inSCE = sce, |
|
422 |
-#' method = "rTSNE", |
|
423 |
-#' useReducedDim = "PCA", |
|
424 |
-#' reducedDimName = "TSNE") |
|
425 |
-#' sce <- runTSCAN (inSCE = sce, |
|
426 |
-#' useReducedDim = "PCA", |
|
427 |
-#' seed = NULL) |
|
428 |
-#' terminalNodes <- listTSCANTerminalNodes(sce, analysisName = "Pseudotime") |
|
429 |
-#' sce <- runTSCANDEG(inSCE = sce, |
|
430 |
-#' pathIndex = terminalNodes[1]) |
|
431 |
-#' plotTSCANPseudotimeHeatmap(inSCE = sce, |
|
432 |
-#' pathIndex = terminalNodes[1], |
|
433 |
-#' topN = 5) |
|
434 |
- |
|
435 |
-plotTSCANPseudotimeHeatmap <- function(inSCE, |
|
436 |
- pathIndex, |
|
437 |
- topN = 50){ |
|
438 |
- |
|
439 |
- pathIndex = as.character(pathIndex) |
|
440 |
- |
|
441 |
- results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex) |
|
352 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
353 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
354 |
+#' useReducedDim = "PCA_logcounts") |
|
355 |
+#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE) |
|
356 |
+#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE, |
|
357 |
+#' pathIndex = terminalNodes[1]) |
|
358 |
+#' plotTSCANPseudotimeHeatmap(mouseBrainSubsetSCE, |
|
359 |
+#' pathIndex = terminalNodes[1]) |
|
360 |
+plotTSCANPseudotimeHeatmap <- function(inSCE, |
|
361 |
+ pathIndex, |
|
362 |
+ direction = c("both", "increasing", |
|
363 |
+ "decreasing"), |
|
364 |
+ topN = 50, |
|
365 |
+ log2fcThreshold = NULL, |
|
366 |
+ useAssay = NULL, |
|
367 |
+ featureDisplay = metadata(inSCE)$featureDisplay){ |
|
368 |
+ results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex) |
|
369 |
+ direction <- match.arg(direction) |
|
370 |
+ # Organizing cells |
|
371 |
+ cell.idx <- rep(TRUE, ncol(inSCE)) |
|
442 | 372 |
if(!is.null(results$discardClusters)){ |
443 |
- for (i in seq_along(results$discardClusters)){ |
|
444 |
- if (i == 1){ |
|
445 |
- x <- inSCE[, colData(inSCE)$"TSCAN_clusters" != results$discardClusters[i]] |
|
446 |
- } |
|
447 |
- else{ |
|
448 |
- x <- x[, colData(x)$"TSCAN_clusters" != results$discardClusters[i]] |
|
449 |
- |
|
450 |
- } |
|
373 |
+ cell.idx <- cell.idx & |
|
374 |
+ (!colData(inSCE)$TSCAN_clusters %in% results$discardClusters) |
|
375 |
+ } |
|
376 |
+ colPathPseudo <- paste0("Path_", pathIndex, "_pseudotime") |
|
377 |
+ cell.idx <- cell.idx & !is.na(colData(inSCE)[[colPathPseudo]]) |
|
378 |
+ cell.order <- order(colData(inSCE)[[colPathPseudo]]) |
|
379 |
+ cell.order <- cell.order[cell.order %in% which(cell.idx)] |
|
380 |
+ |
|
381 |
+ # Organizing genes |
|
382 |
+ if (direction == "both") { |
|
383 |
+ degR <- results$upRight |
|
384 |
+ degL <- results$upLeft |
|
385 |
+ if (!is.null(log2fcThreshold)) { |
|
386 |
+ degR <- degR[abs(degR$logFC) > log2fcThreshold,] |
|
387 |
+ degL <- degL[abs(degL$logFC) > log2fcThreshold,] |
|
388 |
+ } |
|
389 |
+ genesR <- rownames(degR)[seq(min(topN, nrow(degR)))] |
|
390 |
+ genesL <- rownames(degL)[seq(min(topN, nrow(degL)))] |
|
391 |
+ genesR <- genesR[!is.na(genesR)] |
|
392 |
+ genesL <- genesL[!is.na(genesL)] |
|
393 |
+ genes <- c(genesL, genesR) |
|
394 |
+ if (length(genes) < 1) stop("No feature passed the filter.") |
|
395 |
+ direction.df <- data.frame( |
|
396 |
+ Direction = factor(c(rep("decreasing", length(genesL)), |
|
397 |
+ rep("increasing", length(genesR))), |
|
398 |
+ levels = c("increasing", "decreasing")), |
|
399 |
+ row.names = genes) |
|
400 |
+ } else { |
|
401 |
+ if (direction == "increasing") deg <- results$upRight |
|
402 |
+ if (direction == "decreasing") deg <- results$upLeft |
|
403 |
+ if (!is.null(log2fcThreshold)) { |
|
404 |
+ deg <- deg[abs(deg$logFC) > log2fcThreshold,] |
|
451 | 405 |
} |
452 |
- |
|
406 |
+ topN <- min(topN, nrow(deg)) |
|
407 |
+ if (topN < 1) stop("No feature passed the filter.") |
|
408 |
+ genes <- rownames(deg)[seq(topN)] |
|
409 |
+ direction.df <- data.frame(Direction = rep(direction, length(genes)), |
|
410 |
+ row.names = genes) |
|
453 | 411 |
} |
454 |
- else{ |
|
455 |
- x <- inSCE |
|
412 |
+ rowLabel <- featureDisplay |
|
413 |
+ if (is.null(featureDisplay)) { |
|
414 |
+ if (length(genes) <= 60) rowLabel <- TRUE else rowLabel <- FALSE |
|
415 |
+ } else if (featureDisplay == "rownames") { |
|
416 |
+ rowLabel <- TRUE |
|
456 | 417 |
} |
457 |
- |
|
458 |
- colPathPseudo <- paste0("Path_",pathIndex,"_pseudotime") |
|
459 |
- on.path <- !is.na(colData(x)[names(colData(x)) == colPathPseudo]) |
|
460 |
- on.path <- as.vector(on.path[,max(ncol(on.path))]) |
|
461 |
- |
|
462 |
- x <- x[,on.path] |
|
463 |
- plotSCEHeatmap(inSCE = x, |
|
464 |
- featureIndex = utils::head(results$upRight$SYMBOL, topN), |
|
465 |
- colDend = FALSE, |
|
418 |
+ cellAnnotationColor = list( |
|
419 |
+ .viridisPseudoTimeColor(colData(inSCE)[[colPathPseudo]]) |
|
420 |
+ ) |
|
421 |
+ names(cellAnnotationColor) <- colPathPseudo |
|
422 |
+ if (is.null(useAssay)) useAssay <- results$useAssay |
|
423 |
+ plotSCEHeatmap(inSCE = inSCE, |
|
424 |
+ useAssay = useAssay, |
|
425 |
+ cellIndex = cell.order, |
|
426 |
+ featureIndex = genes, |
|
427 |
+ colDend = FALSE, |
|
466 | 428 |
rowDend = FALSE, |
467 |
- cluster_columns = FALSE, |
|
468 |
- colDataName = c("TSCAN_clusters", colPathPseudo), |
|
469 |
- rowLabel = TRUE, |
|
470 |
- colSplitBy = colPathPseudo |
|
429 |
+ cluster_columns = FALSE, cluster_rows = TRUE, |
|
430 |
+ colDataName = c("TSCAN_clusters", colPathPseudo), |
|
431 |
+ rowLabel = rowLabel, |
|
432 |
+ featureAnnotations = direction.df, |
|
433 |
+ rowSplitBy = "Direction", |
|
434 |
+ cellAnnotationColor = cellAnnotationColor |
|
471 | 435 |
) |
472 | 436 |
} |
473 | 437 |
|
438 |
+#' Basing on pseudotime is presented within range from 0 to 100 |
|
439 |
+#' Use the viridis color scale to match with `plotTSCANResult` embedding color |
|
440 |
+#' @param x numeric vector of pseudotime |
|
441 |
+.viridisPseudoTimeColor <- function(x) { |
|
442 |
+ a <- max(x, na.rm = TRUE) |
|
443 |
+ b <- min(x, na.rm = TRUE) |
|
444 |
+ chunk <- (a - b) / 4 |
|
445 |
+ circlize::colorRamp2(seq(0,4) * chunk + b, |
|
446 |
+ c("#440154", "#3b528b", "#21918c", "#5ec962", "#fde725")) |
|
447 |
+} |
|
448 |
+ |
|
474 | 449 |
################################################### |
475 | 450 |
### plot expressive genes |
476 | 451 |
################################################### |
477 |
-#' @title Run plotTSCANPseudotimeGenes function to plot genes with significant |
|
478 |
-#' changes |
|
479 |
-#' @description A wrapper function which visualizes outputs from the |
|
452 |
+#' @title Plot expression changes of top features along a TSCAN pseudotime path |
|
453 |
+#' @description A wrapper function which visualizes outputs from the |
|
480 | 454 |
#' \code{\link{runTSCANDEG}} function. Plots the genes that increase or decrease |
481 | 455 |
#' in expression with increasing pseudotime along the path in the MST. |
456 |
+#' \code{\link{runTSCANDEG}} has to be run in advance with using the same |
|
457 |
+#' \code{pathIndex} of interest. |
|
482 | 458 |
#' @param inSCE Input \linkS4class{SingleCellExperiment} object. |
483 |
-#' @param pathIndex Path index for which the pseudotime values should be used. |
|
484 |
-#' PathIndex corresponds to the terminal node of specific path from the root |
|
485 |
-#' node to the terminal node. |
|
486 |
-#' @param direction Which direction to use. Choices are increasing or |
|
487 |
-#' decreasing. |
|
488 |
-#' @param n An integer. Only to plot this number of top genes that are |
|
489 |
-#' increasing/decreasing in expression with increasing pseudotime along |
|
459 |
+#' @param pathIndex Path index for which the pseudotime values should be used. |
|
460 |
+#' Should have being used in \code{\link{runTSCANDEG}}. |
|
461 |
+#' @param direction Should we show features with expression increasing or |
|
462 |
+#' decreeasing along the increase in TSCAN pseudotime? Choices are |
|
463 |
+#' \code{"increasing"} or \code{"decreasing"}. |
|
464 |
+#' @param topN An integer. Only to plot this number of top genes that are |
|
465 |
+#' increasing/decreasing in expression with increasing pseudotime along |
|
490 | 466 |
#' the path in the MST. Default 10 |
491 |
-#' |
|
492 |
-#' @return A plot with the top genes that increase/decrease in expression with |
|
493 |
-#' increasing pseudotime along the path in the MST |
|
494 |
-#' @export |
|
467 |
+#' @param useAssay A single character to specify a feature expression matrix in |
|
468 |
+#' \code{assays} slot. The expression of top features from here will be |
|
469 |
+#' visualized. Default \code{NULL} use the one used for |
|
470 |
+#' \code{\link{runTSCANDEG}}. |
|
471 |
+#' @param featureDisplay Specify the feature ID type to display. Users can set |
|
472 |
+#' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or |
|
473 |
+#' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character |
|
474 |
+#' values indicates \code{rowData} variable. |
|
475 |
+#' @return A \code{.ggplot} object with the facets of the top genes. Expression |
|
476 |
+#' on y-axis, pseudotime on x-axis. |
|
477 |
+#' @export |
|
495 | 478 |
#' @author Nida Pervaiz |
496 | 479 |
#' @importFrom utils head |
480 |
+#' @importFrom S4Vectors metadata |
|
481 |
+#' @importFrom SummarizedExperiment rowData |
|
497 | 482 |
#' @examples |
498 |
-#' data("scExample", package = "singleCellTK") |
|
499 |
-#' sce <- subsetSCECols(sce, colData = "type != 'EmptyDroplet'") |
|
500 |
-#' rowData(sce)$Symbol <- rowData(sce)$feature_name |
|
501 |
-#' rownames(sce) <- rowData(sce)$Symbol |
|
502 |
-#' sce <- runNormalization(sce, |
|
503 |
-#' normalizationMethod = "LogNormalize", |
|
504 |
-#' useAssay = "counts", |
|
505 |
-#' outAssayName = "logcounts") |
|
506 |
-#' sce <- runDimReduce(inSCE = sce, |
|
507 |
-#' method = "scaterPCA", |
|
508 |
-#' useAssay = "logcounts", |
|
509 |
-#' reducedDimName = "PCA") |
|
510 |
-#' sce <- runDimReduce(inSCE = sce, |
|
511 |
-#' method = "rTSNE", |
|
512 |
-#' useReducedDim = "PCA", |
|
513 |
-#' reducedDimName = "TSNE") |
|
514 |
-#' sce <- runTSCAN (inSCE = sce, |
|
515 |
-#' useReducedDim = "PCA", |
|
516 |
-#' seed = NULL) |
|
517 |
-#' terminalNodes <- listTSCANTerminalNodes(sce, analysisName = "Pseudotime") |
|
518 |
-#' sce <- runTSCANDEG(inSCE = sce, |
|
519 |
-#' pathIndex = terminalNodes[1]) |
|
520 |
-#' plotTSCANPseudotimeGenes(inSCE = sce, |
|
521 |
-#' pathIndex = terminalNodes[1], |
|
522 |
-#' direction = "increasing") |
|
523 |
- |
|
524 |
-plotTSCANPseudotimeGenes <- function (inSCE, |
|
525 |
- pathIndex, |
|
526 |
- direction = c("increasing", "decreasing"), |
|
527 |
- n = 10){ |
|
528 |
- |
|
529 |
- pathIndex = as.character(pathIndex) |
|
530 |
- results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex) |
|
531 |
- if(!is.null(results$discardClusters)){ |
|
532 |
- for (i in seq_along(results$discardClusters)){ |
|
533 |
- if (i == 1){ |
|
534 |
- y <- inSCE[, colData(inSCE)$"TSCAN_clusters" != results$discardClusters[i]] |
|
535 |
- } |
|
536 |
- else{ |
|
537 |
- y <- y[, colData(y)$"TSCAN_clusters" != results$discardClusters[i]] |
|
538 |
- |
|
539 |
- } |
|
540 |
- } |
|
541 |
- |
|
542 |
- } |
|
543 |
- else{ |
|
544 |
- y <- inSCE |
|
545 |
- } |
|
483 |
+#' data("mouseBrainSubsetSCE", package = "singleCellTK") |
|
484 |
+#' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE, |
|
485 |
+#' useReducedDim = "PCA_logcounts") |
|
486 |
+#' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE) |
|
487 |
+#' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE, |
|
488 |
+#' pathIndex = terminalNodes[1]) |
|
489 |
+#' plotTSCANPseudotimeGenes(mouseBrainSubsetSCE, |
|
490 |
+#' pathIndex = terminalNodes[1], |
|
491 |
+#' useAssay = "logcounts") |
|
492 |
+plotTSCANPseudotimeGenes <- function(inSCE, |
|
493 |
+ pathIndex, |
|
494 |
+ direction = c("increasing", "decreasing"), |
|
495 |
+ topN = 10, |
|
496 |
+ useAssay = NULL, |
|
497 |
+ featureDisplay = metadata(inSCE)$featureDisplay){ |
|
498 |
+ results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex) |
|
546 | 499 |
direction = match.arg(direction) |
547 |
- if (direction == "decreasing"){ |
|
548 |
- scater::plotExpression(y, |
|
549 |
- features = utils::head(results$upLeft$SYMBOL, n), |
|
550 |
- swap_rownames = "Symbol", |
|
551 |
- x = paste0("Path_",pathIndex,"_pseudotime"), |
|
552 |
- colour_by = "TSCAN_clusters") |
|
500 |
+ if (!is.null(featureDisplay) && |
|
501 |
+ featureDisplay == "rownames") |
|
502 |
+ featureDisplay <- NULL |
|
503 |
+ if(!is.null(results$discardClusters)){ |
|
504 |
+ inSCE <- inSCE[,!colData(inSCE)$TSCAN_clusters %in% results$discardClusters] |
|
553 | 505 |
} |
554 |
- else{ |
|
555 |
- scater::plotExpression(y, |
|
556 |
- features = utils::head(results$upRight$SYMBOL, n), |
|
557 |
- swap_rownames = "Symbol", |
|
558 |
- x = paste0("Path_",pathIndex,"_pseudotime"), |
|
559 |
- colour_by = "TSCAN_clusters") |
|
560 |
- |
|
506 |
+ if (direction == "decreasing") features <- rownames(results$upLeft) |
|
507 |
+ if (direction == "increasing") features <- rownames(results$upRight) |
|
508 |
+ features <- head(features, topN) |
|
509 |
+ if (!is.null(featureDisplay)) { |
|
510 |
+ features.idx <- featureIndex(features, inSCE) |
|
511 |
+ features <- rowData(inSCE)[[featureDisplay]][features.idx] |
|
561 | 512 |
} |
513 |
+ if (is.null(useAssay)) useAssay <- results$useAssay |
|
514 |
+ scater::plotExpression(inSCE, |
|
515 |
+ features = features, |
|
516 |
+ exprs_values = useAssay, |
|