R/runTSCAN.R
d4e1c876
 ###################################################
 ###  Setting accessor functions
 ###################################################
007a2c8d
 #' @title getTSCANResults accessor function
 #' @description SCTK allows user to access all TSCAN related results with
 #' \code{"getTSCANResults"}. See details.
662a501a
 #' @param x Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param analysisName Algorithm name implemented, should be one of
 #' \code{"Pseudotime"}, \code{"DEG"}, or \code{"ClusterDEAnalysis"}.
 #' @param pathName Sub folder name within the \code{analysisName}. See details.
 #' @param value Value to be stored within the \code{pathName} or
bbffe107
 #' \code{analysisName}
007a2c8d
 #' @details
 #' When \code{analysisName = "Pseudotime"}, returns the list result from
 #' \code{\link{runTSCAN}}, including the MST structure.
 #'
 #' When \code{analysisName = "DEG"}, returns the list result from
 #' \code{\link{runTSCANDEG}}, including \code{DataFrame}s containing genes that
 #' increase/decrease along each the pseudotime paths. \code{pathName} indicates
 #' the path index, the available options of which can be listed by
 #' \code{listTSCANTerminalNodes}.
 #'
 #' When \code{analysisName = "ClusterDEAnalysis"}, returns the list result from
d3b410fc
 #' \code{\link{runTSCANClusterDEAnalysis}}. Here \code{pathName} needs to match
007a2c8d
 #' with the \code{useCluster} argument when running the algorithm.
662a501a
 #' @export
 #' @rdname getTSCANResults
bbffe107
 #' @return Get or set TSCAN results
007a2c8d
 #' @examples
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' results <- getTSCANResults(mouseBrainSubsetSCE, "Pseudotime")
d4e1c876
 setGeneric("getTSCANResults", signature = "x",
007a2c8d
            function(x, analysisName = NULL, pathName = NULL) {
bbffe107
              standardGeneric("getTSCANResults")
            }
d4e1c876
 )
 
662a501a
 #' @export
 #' @rdname getTSCANResults
007a2c8d
 #'
 setMethod("getTSCANResults", signature(x = "SingleCellExperiment"),
           function(x, analysisName = NULL, pathName = NULL){
d4e1c876
   result.names <- listTSCANResults(x)
   if(!analysisName %in% result.names) {
007a2c8d
     stop("The analysis was not found for TSCAN results")
d4e1c876
   }
007a2c8d
   if (analysisName == "Pseudotime")
     results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime
   else {
     if (is.null(pathName)) {
d3b410fc
       results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]]
     } else {
       pathName <- as.character(pathName)
       results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]]
007a2c8d
     }
d4e1c876
   }
   return(results)
 })
 
662a501a
 #' @export
 #' @rdname getTSCANResults
007a2c8d
 #'
 setGeneric("getTSCANResults<-",
            function(x, analysisName, pathName = NULL, value)
bbffe107
              standardGeneric("getTSCANResults<-")
            )
d4e1c876
 
bbffe107
 #' @export
662a501a
 #' @rdname getTSCANResults
007a2c8d
 setReplaceMethod("getTSCANResults", signature(x = "SingleCellExperiment"),
bbffe107
                  function(x, analysisName, pathName = NULL, value) {
007a2c8d
   if (analysisName == "Pseudotime")
     S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime <- value
   else {
     if (is.null(pathName))
       stop("Have to specify `pathName` for TSCAN DE analyses")
     pathName <- as.character(pathName)
d4e1c876
     S4Vectors::metadata(x)$sctk$Traj$TSCAN[[analysisName]][[pathName]] <- value
   }
   return(x)
 })
 
662a501a
 #' @export
 #' @rdname getTSCANResults
d4e1c876
 setGeneric("listTSCANResults", signature = "x",
            function(x) {standardGeneric("listTSCANResults")}
 )
 
662a501a
 #' @export
 #' @rdname getTSCANResults
d4e1c876
 setMethod("listTSCANResults", "SingleCellExperiment", function(x){
   all.results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN
007a2c8d
   if (is.null(all.results) ||
       !"Pseudotime" %in% names(all.results)) {
     stop("No TSCAN result found. Please run `runTSCAN` first.")
d4e1c876
   }
   return(names(all.results))
 })
 
09a8d3ee
 #' @export
 #' @rdname getTSCANResults
 setGeneric("listTSCANTerminalNodes", signature = "x",
007a2c8d
            function(x) {
09a8d3ee
              standardGeneric("listTSCANTerminalNodes")
            }
 )
 
 #' @export
 #' @rdname getTSCANResults
007a2c8d
 setMethod("listTSCANTerminalNodes", signature(x = "SingleCellExperiment"),
           function(x){
   if (is.null(S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime))
     stop("No TSCAN result found. Please run `runTSCAN()` first.")
   results <- S4Vectors::metadata(x)$sctk$Traj$TSCAN$Pseudotime$pathClusters
09a8d3ee
   return(names(results))
   })
d4e1c876
 
 ###################################################
 ###  STEP 1:: creating cluster and MST
 ###################################################
 
007a2c8d
 #' @title Run TSCAN to obtain pseudotime values for cells
 #' @description Wrapper for obtaining a pseudotime ordering of the cells by
 #' projecting them onto the minimum spanning tree (MST)
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param useReducedDim Character. A low-dimension representation in
 #' \code{reducedDims}, will be used for both clustering if \code{cluster} not
 #' specified and MST construction. Default \code{"PCA"}.
 #' @param cluster Grouping for each cell in \code{inSCE}. A vector with equal
 #' length to the number of the cells in \code{inSCE}, or a single character for
 #' retriving \code{colData} variable. Default \code{NULL}, will run
 #' \code{runScranSNN} to obtain.
 #' @param seed An integer. Random seed for clustering if \code{cluster} is not
 #' specified. Default \code{12345}.
 #' @return The input \code{inSCE} object with pseudotime ordering of the cells
 #' along the paths and the cluster label stored in \code{colData}, and other
 #' unstructured information in \code{metadata}.
 #' @export
d4e1c876
 #' @author Nida Pervaiz
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 runTSCAN <- function(inSCE,
                      useReducedDim = "PCA",
                      cluster = NULL,
                      seed = 12345) {
   if (is.null(cluster)) {
     # DON'T RETURN TO `inSCE`
     # It really overwrites existing cluster labels
     sce <- runScranSNN(inSCE, useReducedDim = useReducedDim, seed = seed)
     cluster <- colData(sce)$scranSNN_cluster
     rm(sce)
93e2414e
   } else {
     cluster <- .manageCellVar(inSCE, var = cluster)
d4e1c876
   }
d3b410fc
   if (length(unique(cluster)) == 1) {
     stop("Only one cluster found. Unable to calculate.")
   }
007a2c8d
   message(date(), " ... Running TSCAN to estimate pseudotime")
59f77c88
   inSCE <- scran::computeSumFactors(inSCE, clusters = cluster)
007a2c8d
   by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = cluster)
   centroids <- SingleCellExperiment::reducedDim(by.cluster, useReducedDim)
   mst <- TSCAN::createClusterMST(centroids, clusters = NULL)
 
   # Map each cell to the closest edge on the MST, reporting also the distance to
bbffe107
   # the corresponding vertices.
007a2c8d
   map.tscan <- TSCAN::mapCellsToEdges(inSCE , mst = mst,
                                       use.dimred = useReducedDim,
bbffe107
                                       clusters = cluster)
007a2c8d
 
   # Compute a pseudotime for each cell lying on each path through the MST from a
bbffe107
   # given starting node.
d4e1c876
   tscan.pseudo <- TSCAN::orderCells(map.tscan, mst)
007a2c8d
   common.pseudo <- TrajectoryUtils::averagePseudotime(tscan.pseudo)
 
   colData(inSCE)$TSCAN_clusters <- cluster
   colData(inSCE)$TSCAN_pseudotime <- common.pseudo
 
849e6104
   pathClusters <- list()
   pathList <- data.frame()
   for(i in seq(ncol(tscan.pseudo))){
     pathIndex = as.character(colnames(tscan.pseudo)[i])
007a2c8d
     colDataVarName <- paste0("Path_",pathIndex,"_pseudotime")
     inSCE[[colDataVarName]] <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex]
     nonNA.idx <- !is.na(colData(inSCE)[[colDataVarName]])
     pathClusters[[pathIndex]] <- unique(colData(inSCE)$TSCAN_clusters[nonNA.idx])
849e6104
     pathList[i,1] <- pathIndex
007a2c8d
     pathList[i,2] <- toString(pathClusters[[pathIndex]])
     message(date(), " ...   Clusters involved in path index ", pathIndex,
             " are: ", paste(pathClusters[[i]], collapse = ", "))
849e6104
   }
007a2c8d
 
849e6104
   maxWidth <- max(stringr::str_length(pathList[, 1]))
007a2c8d
   choiceList <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth,
                                         side="right"),
                        "|", pathList[, 2])
 
   branchClusters <- c()
849e6104
   edgeList <- mst
   for (i in seq_along(unique(colData(inSCE)$TSCAN_clusters))){
007a2c8d
     degree <- sum(edgeList[i] != 0)
d3b410fc
     if (degree > 1) branchClusters <- c(branchClusters, i)
849e6104
   }
007a2c8d
 
   ## Save these results in a list and then make S4 accessor that passes the
bbffe107
   ## entire list
d3b410fc
   result <- list(pseudo = tscan.pseudo, mst = mst,
007a2c8d
                  maptscan = map.tscan,
                  pathClusters = pathClusters,
                  branchClusters = branchClusters,
                  pathIndexList = choiceList)
   getTSCANResults(inSCE, analysisName = "Pseudotime") <- result
 
   message(date(), " ...   Number of estimated paths is ", ncol(tscan.pseudo))
 
d4e1c876
   return (inSCE)
007a2c8d
 }
d4e1c876
 
 ###################################################
 ###  plot pseudotime values
 ###################################################
 
007a2c8d
 #' @title Plot MST pseudotime values on cell 2D embedding
 #' @description A wrapper function which visualizes outputs from the
 #' \code{\link{runTSCAN}} function. Plots the pseudotime ordering of the cells
 #' and project them onto the MST.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param useReducedDim Saved dimension reduction name in \code{inSCE} object.
bbffe107
 #' Required.
007a2c8d
 #' @return A \code{.ggplot} object with the pseudotime ordering of the cells
 #' colored on a cell 2D embedding, and the MST path drawn on it.
 #' @export
d4e1c876
 #' @author Nida Pervaiz
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' plotTSCANResults(inSCE = mouseBrainSubsetSCE,
 #'                  useReducedDim = "TSNE_logcounts")
 plotTSCANResults <- function(inSCE, useReducedDim = "UMAP") {
 
d4e1c876
   results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
d3b410fc
   clusters <- colData(inSCE)$TSCAN_clusters
   by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
007a2c8d
   line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
                                   clusters = NULL, use.dimred = useReducedDim)
d4e1c876
 
007a2c8d
   scater::plotReducedDim(inSCE, dimred = useReducedDim,
                          colour_by = I(colData(inSCE)$TSCAN_pseudotime),
                          text_by = "TSCAN_clusters", text_colour = "black") +
     suppressMessages(ggplot2::scale_colour_viridis_c(name = "Pseudotime")) +
     ggplot2::geom_path(data = line.data,
                        ggplot2::aes_string(x = names(line.data)[2],
                                            y = names(line.data)[3],
                                            group = 'edge'))
 }
d4e1c876
 
 ###################################################
 ###  STEP 2:: identify expressive genes
 ###################################################
007a2c8d
 #' @title Test gene expression changes along a TSCAN trajectory path
 #' @description Wrapper for identifying genes with significant changes with
 #' respect to one of the TSCAN pseudotime paths
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param pathIndex Path index for which the pseudotime values should be used.
 #' This corresponds to the terminal node of specific path from the root
 #' node to the terminal node. Run \code{listTSCANTerminalNodes(inSCE)} for
 #' available options.
 #' @param useAssay Character. The name of the assay to use for testing the
 #' expression change. Should be log-normalized. Default \code{"logcounts"}
 #' @param discardCluster Cluster(s) which are not of use or masks other
 #' interesting effects can be discarded. Default \code{NULL}.
 #' @return The input \code{inSCE} with results updated in \code{metadata}.
 #' @export
d4e1c876
 #' @author Nida Pervaiz
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
 #' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
 #'                                    pathIndex = terminalNodes[1])
 runTSCANDEG <- function(inSCE,
                         pathIndex,
                         useAssay = "logcounts",
                         discardCluster = NULL) {
   nx <- inSCE
   if (!is.null(discardCluster)) {
     if (any(!discardCluster %in% unique(colData(nx)$TSCAN_clusters))) {
       stop("Not all `discardCluster` exist in TSCAN clusters")
d4e1c876
     }
007a2c8d
     nx <- nx[, !(colData(nx)$TSCAN_clusters %in% discardCluster)]
d4e1c876
   }
007a2c8d
 
   pseudo <- TSCAN::testPseudotime(nx,
                                   pseudotime = nx[[paste0("Path_", pathIndex,
                                                           "_pseudotime")]],
bbffe107
                                   assay.type = useAssay)
007a2c8d
   pseudo <- pseudo[order(pseudo$FDR),]
   up.left <- pseudo[pseudo$logFC < 0,]
   up.right <- pseudo[pseudo$logFC > 0,]
 
   ## Save these results in a list and then make S4 accessor that passes the
bbffe107
   ## entire list
007a2c8d
   pathresults <- list(discardClusters = discardCluster, upLeft = up.left,
                       upRight = up.right, useAssay = useAssay)
   getTSCANResults(inSCE, analysisName = "DEG",
bbffe107
                   pathName = as.character(pathIndex)) <- pathresults
007a2c8d
 
d4e1c876
   return (inSCE)
 }
 
 ###################################################
 ###  plot heatmap of top genes
 ###################################################
007a2c8d
 #' @title Plot heatmap of genes with expression change along TSCAN pseudotime
 #' @description A wrapper function which visualizes outputs from the
 #' \code{\link{runTSCANDEG}} function. Plots the top genes that change in
 #' expression with increasing pseudotime along the path in the MST.
 #' \code{\link{runTSCANDEG}} has to be run in advance with using the same
 #' \code{pathIndex} of interest.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param pathIndex Path index for which the pseudotime values should be used.
 #' Should have being used in \code{\link{runTSCANDEG}}.
 #' @param direction Should we show features with expression increasing or
 #' decreeasing along the increase in TSCAN pseudotime? Choices are
 #' \code{"both"}, \code{"increasing"} or \code{"decreasing"}.
 #' @param topN An integer. Only to plot this number of top genes along the path
 #' in the MST, in terms of FDR value. Use \code{NULL} to cancel the top N
 #' subscription. Default \code{30}.
 #' @param log2fcThreshold Only output DEGs with the absolute values of log2FC
 #' larger than this value. Default \code{NULL}.
 #' @param useAssay A single character to specify a feature expression matrix in
 #' \code{assays} slot. The expression of top features from here will be
 #' visualized. Default \code{NULL} use the one used for
 #' \code{\link{runTSCANDEG}}.
 #' @param featureDisplay Whether to display feature ID and what ID type to
 #' display. Users can set default ID type by \code{\link{setSCTKDisplayRow}}.
 #' \code{NULL} will display when number of features to display is less than 60.
 #' \code{FALSE} for no display. Variable name in \code{rowData} to indicate ID
 #' type. \code{"rownames"} or \code{TRUE} for using \code{rownames(inSCE)}.
 #' @return A ComplexHeatmap in \code{.ggplot} class
 #' @export
d4e1c876
 #' @author Nida Pervaiz
007a2c8d
 #' @importFrom S4Vectors metadata
d4e1c876
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
 #' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
 #'                                    pathIndex = terminalNodes[1])
 #' plotTSCANPseudotimeHeatmap(mouseBrainSubsetSCE,
 #'                            pathIndex = terminalNodes[1])
 plotTSCANPseudotimeHeatmap <- function(inSCE,
                                        pathIndex,
                                        direction = c("both", "increasing",
                                                      "decreasing"),
                                        topN = 50,
                                        log2fcThreshold = NULL,
                                        useAssay = NULL,
                                        featureDisplay = metadata(inSCE)$featureDisplay){
   results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex)
   direction <- match.arg(direction)
   # Organizing cells
   cell.idx <- rep(TRUE, ncol(inSCE))
bec1e741
   if(!is.null(results$discardClusters)){
007a2c8d
     cell.idx <- cell.idx &
       (!colData(inSCE)$TSCAN_clusters %in% results$discardClusters)
   }
   colPathPseudo <- paste0("Path_", pathIndex, "_pseudotime")
   cell.idx <- cell.idx & !is.na(colData(inSCE)[[colPathPseudo]])
   cell.order <- order(colData(inSCE)[[colPathPseudo]])
   cell.order <- cell.order[cell.order %in% which(cell.idx)]
 
   # Organizing genes
   if (direction == "both") {
     degR <- results$upRight
     degL <- results$upLeft
     if (!is.null(log2fcThreshold)) {
       degR <- degR[abs(degR$logFC) > log2fcThreshold,]
       degL <- degL[abs(degL$logFC) > log2fcThreshold,]
     }
     genesR <- rownames(degR)[seq(min(topN, nrow(degR)))]
     genesL <- rownames(degL)[seq(min(topN, nrow(degL)))]
     genesR <- genesR[!is.na(genesR)]
     genesL <- genesL[!is.na(genesL)]
     genes <- c(genesL, genesR)
     if (length(genes) < 1) stop("No feature passed the filter.")
     direction.df <- data.frame(
       Direction = factor(c(rep("decreasing", length(genesL)),
                            rep("increasing", length(genesR))),
                          levels = c("increasing", "decreasing")),
       row.names = genes)
   } else {
     if (direction == "increasing") deg <- results$upRight
     if (direction == "decreasing") deg <- results$upLeft
     if (!is.null(log2fcThreshold)) {
       deg <- deg[abs(deg$logFC) > log2fcThreshold,]
bec1e741
     }
007a2c8d
     topN <- min(topN, nrow(deg))
     if (topN < 1) stop("No feature passed the filter.")
     genes <- rownames(deg)[seq(topN)]
     direction.df <- data.frame(Direction = rep(direction, length(genes)),
                                row.names = genes)
d4e1c876
   }
007a2c8d
   rowLabel <- featureDisplay
   if (is.null(featureDisplay)) {
     if (length(genes) <= 60) rowLabel <- TRUE else rowLabel <- FALSE
   } else if (featureDisplay == "rownames") {
     rowLabel <- TRUE
d4e1c876
   }
007a2c8d
   cellAnnotationColor = list(
     .viridisPseudoTimeColor(colData(inSCE)[[colPathPseudo]])
     )
   names(cellAnnotationColor) <- colPathPseudo
   if (is.null(useAssay)) useAssay <- results$useAssay
   plotSCEHeatmap(inSCE = inSCE,
                  useAssay = useAssay,
                  cellIndex = cell.order,
                  featureIndex = genes,
                  colDend = FALSE,
bbffe107
                  rowDend = FALSE,
007a2c8d
                  cluster_columns = FALSE, cluster_rows = TRUE,
                  colDataName = c("TSCAN_clusters", colPathPseudo),
                  rowLabel = rowLabel,
                  featureAnnotations = direction.df,
                  rowSplitBy = "Direction",
                  cellAnnotationColor = cellAnnotationColor
bec1e741
   )
d4e1c876
 }
 
007a2c8d
 #' Basing on pseudotime is presented within range from 0 to 100
 #' Use the viridis color scale to match with `plotTSCANResult` embedding color
 #' @param x numeric vector of pseudotime
18a0f0e6
 #' @return function object returned by circlize::colorRamp2
7d46c6eb
 #' @noRd
007a2c8d
 .viridisPseudoTimeColor <- function(x) {
   a <- max(x, na.rm = TRUE)
   b <- min(x, na.rm = TRUE)
   chunk <- (a - b) / 4
   circlize::colorRamp2(seq(0,4) * chunk + b,
                        c("#440154", "#3b528b", "#21918c", "#5ec962", "#fde725"))
 }
 
d4e1c876
 ###################################################
 ###  plot expressive genes
 ###################################################
007a2c8d
 #' @title Plot expression changes of top features along a TSCAN pseudotime path
 #' @description A wrapper function which visualizes outputs from the
bbffe107
 #' \code{\link{runTSCANDEG}} function. Plots the genes that increase or decrease
 #' in expression with increasing pseudotime along the path in the MST.
007a2c8d
 #' \code{\link{runTSCANDEG}} has to be run in advance with using the same
 #' \code{pathIndex} of interest.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param pathIndex Path index for which the pseudotime values should be used.
 #' Should have being used in \code{\link{runTSCANDEG}}.
 #' @param direction Should we show features with expression increasing or
 #' decreeasing along the increase in TSCAN pseudotime? Choices are
 #' \code{"increasing"} or \code{"decreasing"}.
 #' @param topN An integer. Only to plot this number of top genes that are
 #' increasing/decreasing in expression with increasing pseudotime along
09a8d3ee
 #' the path in the MST. Default 10
007a2c8d
 #' @param useAssay A single character to specify a feature expression matrix in
 #' \code{assays} slot. The expression of top features from here will be
 #' visualized. Default \code{NULL} use the one used for
 #' \code{\link{runTSCANDEG}}.
 #' @param featureDisplay Specify the feature ID type to display. Users can set
 #' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
 #' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
 #' values indicates \code{rowData} variable.
 #' @return A \code{.ggplot} object with the facets of the top genes. Expression
 #' on y-axis, pseudotime on x-axis.
 #' @export
d4e1c876
 #' @author Nida Pervaiz
9193246a
 #' @importFrom utils head
007a2c8d
 #' @importFrom S4Vectors metadata
 #' @importFrom SummarizedExperiment rowData
d4e1c876
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' terminalNodes <- listTSCANTerminalNodes(mouseBrainSubsetSCE)
 #' mouseBrainSubsetSCE <- runTSCANDEG(inSCE = mouseBrainSubsetSCE,
 #'                                    pathIndex = terminalNodes[1])
 #' plotTSCANPseudotimeGenes(mouseBrainSubsetSCE,
 #'                          pathIndex = terminalNodes[1],
 #'                          useAssay = "logcounts")
 plotTSCANPseudotimeGenes <- function(inSCE,
                                      pathIndex,
                                      direction = c("increasing", "decreasing"),
                                      topN = 10,
                                      useAssay = NULL,
                                      featureDisplay = metadata(inSCE)$featureDisplay){
   results <- getTSCANResults(inSCE, analysisName = "DEG", pathName = pathIndex)
d4e1c876
   direction = match.arg(direction)
007a2c8d
   if (!is.null(featureDisplay) &&
       featureDisplay == "rownames")
     featureDisplay <- NULL
   if(!is.null(results$discardClusters)){
     inSCE <- inSCE[,!colData(inSCE)$TSCAN_clusters %in% results$discardClusters]
d4e1c876
   }
007a2c8d
   if (direction == "decreasing") features <- rownames(results$upLeft)
   if (direction == "increasing") features <- rownames(results$upRight)
   features <- head(features, topN)
   if (!is.null(featureDisplay)) {
     features.idx <- featureIndex(features, inSCE)
     features <- rowData(inSCE)[[featureDisplay]][features.idx]
d4e1c876
   }
007a2c8d
   if (is.null(useAssay)) useAssay <- results$useAssay
   scater::plotExpression(inSCE,
                          features = features,
                          exprs_values = useAssay,
                          swap_rownames = featureDisplay,
                          x = paste0("Path_",pathIndex,"_pseudotime"),
                          colour_by = "TSCAN_clusters")
d4e1c876
 }
 
 ###################################################
007a2c8d
 ###  STEP3:: identify DE genes in branch cluster
d4e1c876
 ###################################################
007a2c8d
 #' @title Find DE genes between all TSCAN paths rooted from given cluster
 #' @description This function finds all paths that root from a given cluster
 #' \code{useCluster}, and performs tests to identify significant features for
 #' each path, and are not significant and/or changing in the opposite direction
 #' in the other paths. Using a branching cluster (i.e. a node with degree > 2)
 #' may highlight features which are responsible for the branching event. MST has
 #' to be pre-calculated with \code{\link{runTSCAN}}.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param useCluster The cluster to be regarded as the root, has to existing in
 #' \code{colData(inSCE)$TSCAN_clusters}.
 #' @param useAssay Character. The name of the assay to use. This assay should
 #' contain log normalized counts. Default \code{"logcounts"}.
bbffe107
 #' @param fdrThreshold Only out put DEGs with FDR value smaller than this value.
 #' Default \code{0.05}.
007a2c8d
 #' @return The input \code{inSCE} with results updated in \code{metadata}.
 #' @export
d4e1c876
 #' @author Nida Pervaiz
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
d3b410fc
 #' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE,
007a2c8d
 #'                                          useCluster = 1)
d3b410fc
 runTSCANClusterDEAnalysis <- function(inSCE,
                                       useCluster,
                                       useAssay = "logcounts",
                                       fdrThreshold = 0.05) {
d4e1c876
   results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
007a2c8d
   message(date(), " ... Finding DEG between TSCAN branches")
   tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst,
                                     start = useCluster)
9193246a
   nPaths <- colnames(tscan.pseudo)
007a2c8d
 
849e6104
   pathClusters <- list()
884944b6
   pathList <- data.frame()
007a2c8d
 
   x <- inSCE[,colData(inSCE)$TSCAN_clusters == useCluster]
   store <- list()
   genes <- list()
 
   for(i in seq(nPaths)){
     # Get Pseudotime of a path
849e6104
     pathIndex = as.character(nPaths[i])
007a2c8d
     branchPseudotime <- TrajectoryUtils::pathStat(tscan.pseudo)[,pathIndex]
     colDataVarName <- paste0("TSCAN_Cluster", useCluster,
                              "_Path_", pathIndex, "_pseudotime")
     colData(inSCE)[[colDataVarName]] <- branchPseudotime
     nonNA.idx <- !is.na(branchPseudotime)
     pathClusters[[pathIndex]] <- unique(colData(inSCE)$TSCAN_clusters[nonNA.idx])
884944b6
     pathList[i,1] <- pathIndex
007a2c8d
     pathList[i,2] <- toString(pathClusters[[pathIndex]])
     message(date(), " ...   Clusters involved in path index ", pathIndex,
             " are: ", paste(pathClusters[[i]], collapse = ", "))
     # Test along the path
     pseudo.df <- TSCAN::testPseudotime(
       x, df = 1,
       pseudotime = branchPseudotime[colData(inSCE)$TSCAN_clusters == useCluster],
       assay.type = useAssay)
     pseudo.df <- pseudo.df[order(pseudo.df$p.value),]
     store[[pathIndex]] <- pseudo.df
d4e1c876
   }
007a2c8d
   # Filter against all other paths
   genes <- list()
d4e1c876
   for(i in seq(nPaths)){
007a2c8d
     pseudo.df <- store[[nPaths[i]]]
d4e1c876
     paths <- setdiff(seq(nPaths), i)
007a2c8d
     idx <- pseudo.df$FDR < fdrThreshold
     for(j in seq_along(paths)){
       pseudo.otherPath <- store[[paths[j]]]
       idx <- idx & (pseudo.otherPath$p.value >= 0.05 |
                       sign(pseudo.otherPath$logFC) != sign(pseudo.df$logFC))
d4e1c876
     }
007a2c8d
     pseudo.df <- pseudo.df[which(idx), ]
     genes[[nPaths[i]]] <- pseudo.df[order(pseudo.df$FDR),]
d4e1c876
   }
2aa4daf2
 
007a2c8d
   maxWidth <- max(stringr::str_length(pathList[, 1]))
   choiceList <- paste0(stringr::str_pad(pathList[, 1], width=maxWidth,
                                         side="right"),
                        "|", pathList[, 2])
   expGenes <- list(DEgenes = genes, useAssay = useAssay,
                    terminalNodes = tscan.pseudo,
                    pathIndexList = choiceList)
   getTSCANResults(inSCE, analysisName = "ClusterDEAnalysis",
                   pathName = useCluster) <- expGenes
d4e1c876
   return(inSCE)
 }
 
 ###################################################
 ###  plot branch cluster
 ###################################################
007a2c8d
 #' @title Plot TSCAN pseudotime rooted from given cluster
 #' @description This function finds all paths that root from a given cluster
 #' \code{useCluster}. For each path, this function plots the recomputed
 #' pseudotime starting from the root on a scatter plot which contains cells only
 #' in this cluster. MST has to be pre-calculated with \code{\link{runTSCAN}}.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param useCluster The cluster to be regarded as the root, has to existing in
 #' \code{colData(inSCE)$TSCAN_clusters}.
 #' @param useReducedDim Saved dimension reduction name in the
09a8d3ee
 #' SingleCellExperiment object. Required.
007a2c8d
 #' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
 #' will combine plots of pseudotime along each path into a single \code{.ggplot}
 #' object, while \code{"none"} will output a list of plots. Default
 #' \code{"all"}.
 #' @return
 #' \item{combinePlot = "all"}{A \code{.ggplot} object}
 #' \item{combinePlot = "none"}{A list of \code{.ggplot}}
 #' @export
d4e1c876
 #' @author Nida Pervaiz
9193246a
 #' @importFrom utils head
d4e1c876
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
d3b410fc
 #' plotTSCANClusterPseudo(mouseBrainSubsetSCE, useCluster = 1,
 #'                        useReducedDim = "TSNE_logcounts")
 plotTSCANClusterPseudo <- function(inSCE, useCluster, useReducedDim = "UMAP",
007a2c8d
                                    combinePlot = c("all", "none")){
   # Get the plotting info of edges
d4e1c876
   results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
007a2c8d
   combinePlot <- match.arg(combinePlot)
d3b410fc
   clusters <- colData(inSCE)$TSCAN_clusters
   by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
007a2c8d
   line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
                                   clusters = NULL, use.dimred = useReducedDim)
bf4524f0
   line.data.sub <- .getClustersLineData(line.data, useCluster)
007a2c8d
 
   # Get Branch pseudotime
   tscan.pseudo <- TSCAN::orderCells(results$maptscan, mst = results$mst,
                                     start = useCluster)
   nPaths <- colnames(tscan.pseudo)
   x <- inSCE[,colData(inSCE)$TSCAN_clusters == useCluster]
   plotList <- list()
   for (i in seq(nPaths)) {
     branchPseudotime <- TrajectoryUtils::pathStat(tscan.pseudo)[,i]
     branchPseudotime <- branchPseudotime[colData(inSCE)$TSCAN_clusters == useCluster]
     x$pseudotime <- branchPseudotime
     plotList[[i]] <- scater::plotReducedDim(x, dimred = useReducedDim,
                                             colour_by = "pseudotime",
                                             text_by = "TSCAN_clusters",
                                             text_colour = "black") +
       ggplot2::geom_line(data = line.data.sub,
                          ggplot2::aes_string(x = colnames(line.data.sub)[2],
                                              y = colnames(line.data.sub)[3],
                                              group = "edge"))
d4e1c876
   }
007a2c8d
   if (combinePlot == "all") {
     plotList <- cowplot::plot_grid(plotlist = plotList)
d4e1c876
   }
007a2c8d
   return(plotList)
d4e1c876
 }
 
 ###################################################
 ###  plot gene of interest in branch cluster
 ###################################################
007a2c8d
 
d3b410fc
 #' @title Plot features identified by \code{\link{runTSCANClusterDEAnalysis}} on
 #' cell 2D embedding with MST overlaid
007a2c8d
 #' @description A wrapper function which plot the top features expression
d3b410fc
 #' identified by \code{\link{runTSCANClusterDEAnalysis}} on the 2D embedding of
 #' the cells cluster used in the analysis. The related MST edges are overlaid.
d4e1c876
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
007a2c8d
 #' @param useCluster Choose a cluster used for identifying DEG with
d3b410fc
 #' \code{\link{runTSCANClusterDEAnalysis}}. Required.
bf4524f0
 #' @param pathIndex Specifies one of the branching paths from \code{useCluster}
 #' and plot the top DEGs on this path. Ususally presented by the terminal
 #' cluster of a path. By default \code{NULL} plot top DEGs of all paths.
007a2c8d
 #' @param useReducedDim A single character for the matrix of 2D embedding.
 #' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}.
 #' @param topN Integer. Use top N genes identified. Default \code{9}.
 #' @param useAssay A single character for the feature expression matrix. Should
 #' exist in \code{assayNames(inSCE)}. Default \code{NULL} for using the one used
d3b410fc
 #' in \code{\link{runTSCANClusterDEAnalysis}}.
007a2c8d
 #' @param featureDisplay Specify the feature ID type to display. Users can set
 #' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
 #' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
 #' values indicates \code{rowData} variable.
 #' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
 #' will combine plots of each feature into a single \code{.ggplot} object,
 #' while \code{"none"} will output a list of plots. Default \code{"all"}.
 #' @return A \code{.ggplot} object of cell scatter plot, colored by the
d3b410fc
 #' expression of a gene identified by \code{\link{runTSCANClusterDEAnalysis}},
 #' with the layer of trajectory.
007a2c8d
 #' @export
 #' @author Yichen Wang
 #' @importFrom S4Vectors metadata
d4e1c876
 #' @examples
007a2c8d
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
d3b410fc
 #' mouseBrainSubsetSCE <- runTSCANClusterDEAnalysis(inSCE = mouseBrainSubsetSCE,
 #'                                                  useCluster = 1)
 #' plotTSCANClusterDEG(mouseBrainSubsetSCE, useCluster = 1,
 #'                     useReducedDim = "TSNE_logcounts")
 plotTSCANClusterDEG <- function(
007a2c8d
   inSCE,
   useCluster,
d3b410fc
   pathIndex = NULL,
007a2c8d
   useReducedDim = "UMAP",
   topN = 9,
   useAssay = NULL,
   featureDisplay = metadata(inSCE)$featureDisplay,
   combinePlot = c("all", "none")
 ) {
   MSTResults <- getTSCANResults(inSCE, analysisName = "Pseudotime")
   if (length(useCluster) != 1) stop("Can only specify one cluster")
   DEResults <- getTSCANResults(inSCE, analysisName = "ClusterDEAnalysis",
                                pathName = useCluster)
   if (is.null(DEResults)) {
     stop("Branch DE result of specified cluster not found. Run ",
d3b410fc
          "`runTSCANClusterDEAnalysis()` with the same `useCluster` first.")
007a2c8d
   }
   combinePlot <- match.arg(combinePlot)
d3b410fc
   if (is.null(pathIndex)) {
     deg <- do.call(rbind, DEResults$DEgenes)
     deg <- deg[order(deg$FDR),]
   } else {
     pathIndex <- as.character(pathIndex)
     deg <- DEResults$DEgenes[[pathIndex]]
   }
   features <- rownames(deg)[seq(min(topN, nrow(deg), na.rm = TRUE))]
007a2c8d
   if (is.null(useAssay)) useAssay <- DEResults$useAssay
   plotTSCANDimReduceFeatures(inSCE = inSCE, features = features,
                              useReducedDim = useReducedDim,
                              useAssay = useAssay, useCluster = useCluster,
                              featureDisplay = featureDisplay,
                              combinePlot = combinePlot)
 }
 
 #' @title Plot feature expression on cell 2D embedding with MST overlaid
 #' @description A wrapper function which plots all cells or cells in chosen
 #' cluster. Each point is a cell colored by the expression of a feature of
 #' interest, the relevant edges of the MST are overlaid on top.
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object.
 #' @param features Choose the feature of interest to explore the expression
 #' level on the trajectory. Required.
 #' @param useReducedDim A single character for the matrix of 2D embedding.
 #' Should exist in \code{reducedDims} slot. Default \code{"UMAP"}.
 #' @param useAssay A single character for the feature expression matrix. Should
 #' exist in \code{assayNames(inSCE)}. Default \code{"logcounts"}.
 #' @param by Where should \code{features} be found? \code{NULL},
 #' \code{"rownames"} for \code{rownames(inSCE)}, otherwise will be regarded as
 #' \code{rowData} variable.
 #' @param useCluster Choose specific clusters where gene expression needs to be
 #' visualized. By default \code{NULL}, all clusters are chosen.
 #' @param featureDisplay Specify the feature ID type to display. Users can set
 #' default value with \code{\link{setSCTKDisplayRow}}. \code{NULL} or
 #' \code{"rownames"} specifies the rownames of \code{inSCE}. Other character
 #' values indicates \code{rowData} variable.
 #' @param combinePlot Must be either \code{"all"} or \code{"none"}. \code{"all"}
 #' will combine plots of each feature into a single \code{.ggplot} object,
 #' while \code{"none"} will output a list of plots. Default \code{"all"}.
 #' @return A \code{.ggplot} object of cell scatter plot, colored by the
 #' expression of a gene of interest, with the layer of trajectory.
 #' @export
 #' @author Yichen Wang
 #' @importFrom S4Vectors metadata
 #' @examples
 #' data("mouseBrainSubsetSCE", package = "singleCellTK")
 #' mouseBrainSubsetSCE <- runTSCAN(inSCE = mouseBrainSubsetSCE,
 #'                                 useReducedDim = "PCA_logcounts")
 #' plotTSCANDimReduceFeatures(inSCE = mouseBrainSubsetSCE,
 #'                            features = "Tshz1",
 #'                            useReducedDim = "TSNE_logcounts")
 plotTSCANDimReduceFeatures <- function(
   inSCE,
   features,
   useReducedDim = "UMAP",
   useAssay = "logcounts",
   by = "rownames",
   useCluster = NULL,
   featureDisplay = metadata(inSCE)$featureDisplay,
   combinePlot = c("all", "none"))
 {
   results <- getTSCANResults(inSCE, analysisName = "Pseudotime")
   combinePlot <- match.arg(combinePlot)
d3b410fc
   clusters <- colData(inSCE)$TSCAN_clusters
   by.cluster <- scuttle::aggregateAcrossCells(inSCE, ids = clusters)
007a2c8d
   line.data <- TSCAN::reportEdges(by.cluster, mst = results$mst,
                                   clusters = NULL, use.dimred = useReducedDim)
   if (!is.null(useCluster)) {
bf4524f0
     line.data <- .getClustersLineData(line.data, useCluster)
d3b410fc
     inSCE <- inSCE[, clusters %in% useCluster]
007a2c8d
   }
   if (by == "rownames") by <- NULL
   plotList <- list()
bf4524f0
   features <- stats::na.omit(features)
   for (f in features) {
     # Should not enter the loop if features is length zero after NA omit
     g <- plotSCEDimReduceFeatures(inSCE, feature = f,
                                   useAssay = useAssay,
                                   featureLocation = by,dim1 = 1, dim2 = 2,
                                   featureDisplay = featureDisplay,
                                   reducedDimName = useReducedDim,
                                   title = f) +
       ggplot2::geom_line(data = line.data,
                          ggplot2::aes_string(x = colnames(line.data)[2],
                                              y = colnames(line.data)[3],
                                              group = "edge"),
                          inherit.aes = FALSE)
     # Text labeling code credit: scater
     reddim <- as.data.frame(SingleCellExperiment::reducedDim(inSCE,
                                                              useReducedDim))
     text_out <- scater::retrieveCellInfo(inSCE, "TSCAN_clusters",
                                          search = "colData")
     by_text_x <- vapply(split(reddim[,1], text_out$val),
                         stats::median, FUN.VALUE = 0)
     by_text_y <- vapply(split(reddim[,2], text_out$val),
                         stats::median, FUN.VALUE = 0)
     g <- g + ggrepel::geom_text_repel(
       data = data.frame(x = by_text_x,
                         y = by_text_y,
                         label = names(by_text_x)),
       mapping = ggplot2::aes_string(x = "x",
                                     y = "y",
                                     label = "label"),
       inherit.aes = FALSE, na.rm = TRUE,
     )
     plotList[[f]] <- g
d4e1c876
   }
007a2c8d
   if (combinePlot == "all") {
d3b410fc
     if (length(plotList) > 0)
007a2c8d
     plotList <- cowplot::plot_grid(plotlist = plotList)
849e6104
   }
007a2c8d
   return(plotList)
d4e1c876
 }
bf4524f0
 
 #' Extract edges that connects to the clusters of interest
 #' @param line.data data.frame of three columns. First column should be named
 #' "edge", the other two are coordinates of one of the vertices that this
 #' edge connects.
 #' @param useCluster Vector of clusters, that are going to be the vertices for
 #' edge finding.
 #' @return data.frame, subset rows of line.data
7d46c6eb
 #' @noRd
bf4524f0
 .getClustersLineData <- function(line.data, useCluster) {
   idx <- vapply(useCluster, function(x){
     grepl(paste0("^", x, "--"), line.data$edge) |
       grepl(paste0("--", x, "$"), line.data$edge)
   }, logical(nrow(line.data)))
   # `idx` returned was c("matrix", "array") class. Each column is a logical
   # vector for edge selection of each cluster.
   # Next, do "or" for each row.
   idx <- rowSums(idx) > 0
   line.data[idx,]
 }