R/plot_dr.R
16dba6f7
 #' @title Mapping the dimensionality reduction plot
 #' @description Creates a scatterplot given two dimensions from a data
 #'  dimensionality reduction tool (e.g tSNE) output.
0b3c7f9a
 #' @param dim1 Numeric vector. First dimension from data dimensionality
 #'  reduction output.
 #' @param dim2 Numeric vector. Second dimension from data dimensionality
 #'  reduction output.
 #' @param matrix Numeric matrix. Each row of the matrix will be plotted as
 #'  a separate facet.
16dba6f7
 #' @param size Numeric. Sets size of point on plot. Default 1.
 #' @param xlab Character vector. Label for the x-axis. Default 'Dimension_1'.
 #' @param ylab Character vector. Label for the y-axis. Default 'Dimension_2'.
b26d22a2
 #' @param colorLow Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the lowest values on the scale.
 #'  Default 'grey'.
b26d22a2
 #' @param colorMid Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the midpoint on the scale.
b26d22a2
 #' @param colorHigh Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the highest values on the scale.
 #'   Default 'blue'.
b26d22a2
 #' @param varLabel Character vector. Title for the color legend.
d1154f0b
 #' @return The plot as a ggplot object
ff43602d
 #' @examples
 #' data(celdaCGSim, celdaCGMod)
 #' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
 #'     celdaMod = celdaCGMod)
 #' plotDimReduceGrid(celdaTsne[, 1],
 #'     celdaTsne[, 2],
 #'     matrix = celdaCGSim$counts,
 #'     xlab = "Dimension1",
 #'     ylab = "Dimension2",
 #'     varLabel = "tsne",
 #'     size = 1,
 #'     colorLow = "grey",
 #'     colorMid = NULL,
 #'     colorHigh = "blue")
d7196f24
 #' @importFrom reshape2 melt
bd77a8b7
 #' @export
0b3c7f9a
 plotDimReduceGrid <- function(dim1,
     dim2,
     matrix,
     size,
     xlab,
     ylab,
b26d22a2
     colorLow,
     colorMid,
     colorHigh,
     varLabel) {
ca5fb59d
 
0b3c7f9a
     df <- data.frame(dim1, dim2, t(as.data.frame(matrix)))
ca5fb59d
     naIx <- is.na(dim1) | is.na(dim2)
b6cf56ae
     df <- df[!naIx, ]
16dba6f7
 
0b3c7f9a
     m <- reshape2::melt(df, id.vars = c("dim1", "dim2"))
b26d22a2
     colnames(m) <- c(xlab, ylab, "facet", varLabel)
0b3c7f9a
 
     ggplot2::ggplot(m,
         ggplot2::aes_string(x = xlab, y = ylab)) +
         ggplot2::geom_point(stat = "identity",
2e877ffe
             size = size,
             ggplot2::aes_string(color = varLabel)) +
b6cf56ae
         ggplot2::facet_wrap(~ facet) +
0b3c7f9a
         ggplot2::theme_bw() +
b26d22a2
         ggplot2::scale_colour_gradient2(low = colorLow,
             high = colorHigh,
             mid = colorMid,
0b3c7f9a
             midpoint = (max(m[, 4]) + min(m[, 4])) / 2,
b26d22a2
             name = gsub("_", " ", varLabel)) +
0b3c7f9a
         ggplot2::theme(strip.background = ggplot2::element_blank(),
             panel.grid.major = ggplot2::element_blank(),
             panel.grid.minor = ggplot2::element_blank(),
             panel.spacing = unit(0, "lines"),
             panel.background = ggplot2::element_blank(),
             axis.line = ggplot2::element_line(colour = "black"))
2e877ffe
 }
bd77a8b7
 
ca5fb59d
 
9616c081
 #' @title Plotting feature expression on a dimensionality reduction plot
16dba6f7
 #' @description Create a scatterplot for each row of a normalized gene
 #'  expression matrix where x and y axis are from a data dimensionality
 #'  reduction tool. The cells are colored by expression of
 #'  the specified feature.
 #' @param dim1 Numeric vector. First dimension from data
 #'  dimensionality reduction output.
0b3c7f9a
 #' @param dim2 Numeric vector. Second dimension from data dimensionality
 #'  reduction output.
 #' @param counts Integer matrix. Rows represent features and columns
 #'  represent cells.
c5d41a04
 #' @param features Character vector. Uses these genes for plotting.
16dba6f7
 #' @param normalize Logical. Whether to normalize the columns of `counts`.
 #'  Default TRUE.
b26d22a2
 #' @param exactMatch Logical. Whether an exact match or a partial match using
0b3c7f9a
 #'  `grep()` is used to look up the feature in the rownames of the counts
 #'   matrix. Default TRUE.
16dba6f7
 #' @param trim Numeric vector. Vector of length two that specifies the lower
 #'  and upper bounds for the data. This threshold is applied after row scaling.
 #'  Set to NULL to disable. Default c(-2,2).
bd77a8b7
 #' @param size Numeric. Sets size of point on plot. Default 1.
 #' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
 #' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
b26d22a2
 #' @param colorLow Character. A color available from `colors()`. The color
0b3c7f9a
 #'  will be used to signify the lowest values on the scale. Default 'grey'.
b26d22a2
 #' @param colorMid Character. A color available from `colors()`. The color
0b3c7f9a
 #'  will be used to signify the midpoint on the scale.
b26d22a2
 #' @param colorHigh Character. A color available from `colors()`. The color
0b3c7f9a
 #'  will be used to signify the highest values on the scale. Default 'blue'.
d1154f0b
 #' @return The plot as a ggplot object
ff43602d
 #' @examples
 #' \donttest{
 #' data(celdaCGSim, celdaCGMod)
 #' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
 #'     celdaMod = celdaCGMod)
 #' plotDimReduceFeature(dim1 = celdaTsne[, 1],
 #'     dim2 = celdaTsne[, 2],
 #'     counts = celdaCGSim$counts,
 #'     features = c("Gene_99"),
 #'     exactMatch = TRUE)
 #' }
16dba6f7
 #' @export
0b3c7f9a
 plotDimReduceFeature <- function(dim1,
     dim2,
     counts,
     features,
     normalize = TRUE,
b26d22a2
     exactMatch = TRUE,
0b3c7f9a
     trim = c(-2, 2),
     size = 1,
     xlab = "Dimension_1",
     ylab = "Dimension_2",
b26d22a2
     colorLow = "grey",
     colorMid = NULL,
     colorHigh = "blue") {
0b3c7f9a
     if (isTRUE(normalize)) {
         counts <- normalizeCounts(counts,
2e877ffe
             transformationFun = sqrt,
             scaleFun = base::scale)
0b3c7f9a
     }
 
     if (is.null(features)) {
         stop("at least one feature is required to create a plot")
     }
 
     if (!is.null(trim)) {
         if (length(trim) != 2) {
2e877ffe
             stop("'trim' should be a 2 element vector",
                 "specifying the lower and upper boundaries")
16dba6f7
         }
0b3c7f9a
         trim <- sort(trim)
         counts[counts < trim[1]] <- trim[1]
         counts[counts > trim[2]] <- trim[2]
2e877ffe
         }
0b3c7f9a
 
b26d22a2
     varLabel <- "Expression"
     if (!isTRUE(exactMatch)) {
ca5fb59d
         featuresIndices <- c()
         notFound <- c()
0b3c7f9a
         for (gene in features) {
ca5fb59d
             featuresIndices <-
                 c(featuresIndices, grep(gene, rownames(counts)))
0b3c7f9a
             if (length(grep(gene, rownames(counts))) == 0) {
ca5fb59d
                 notFound <- c(notFound, gene)
16dba6f7
             }
         }
ca5fb59d
         counts <- counts[featuresIndices, , drop = FALSE]
         if (length(notFound) > 0) {
             if (length(notFound) == length(features)) {
2e877ffe
                 stop("None of the provided features had",
                     "matching rownames in the provided counts matrix.")
             }
             warning("The following features were not",
                 "present in the provided count matrix: ",
                 paste(notFound,
                     sep = "",
                     collapse = ","))
16dba6f7
             }
0b3c7f9a
     } else {
b26d22a2
         featuresNotFound <- setdiff(features,
0b3c7f9a
             intersect(features, rownames(counts)))
b26d22a2
         if (length(featuresNotFound) > 0) {
             if (length(featuresNotFound) == length(features)) {
2e877ffe
                 stop("None of the provided features had matching",
                     "rownames in the provided counts matrix.")
             }
             warning("The following features were not present in",
                 "the provided count matrix: ",
                 paste(featuresNotFound,
                     sep = "",
                     collapse = ","))
16dba6f7
             }
ca5fb59d
         featuresFound <- setdiff(features, featuresNotFound)
         counts <- counts[featuresFound, , drop = FALSE]
0b3c7f9a
     }
     plotDimReduceGrid(dim1,
         dim2,
         counts,
         size,
         xlab,
         ylab,
b26d22a2
         colorLow,
         colorMid,
         colorHigh,
         varLabel)
2e877ffe
 }
bd77a8b7
 
ca5fb59d
 
16dba6f7
 #' @title Plotting the Celda module probability on a
 #'  dimensionality reduction plot
 #' @description Create a scatterplot for each row of a normalized
 #'  gene expression matrix where x and y axis are from a data
 #'  dimensionality reduction tool.
 #'  The cells are colored by the module probability(s).
 #' @param dim1 Numeric vector.
 #'  First dimension from data dimensionality reduction output.
 #' @param dim2 Numeric vector.
 #'  Second dimension from data dimensionality reduction output.
 #' @param counts Integer matrix.
 #'  Rows represent features and columns represent cells.
b26d22a2
 #'  This matrix should be the same as the one used to generate `celdaMod`.
 #' @param celdaMod Celda object of class "celda_G" or "celda_CG".
62a6c4cf
 #' @param modules Character vector. Module(s) from celda model to be plotted.
e3638ca1
 #' e.g. c("1", "2").
16dba6f7
 #' @param rescale Logical.
2e877ffe
 #'  Whether rows of the matrix should be rescaled to [0, 1]. Default TRUE.
bd77a8b7
 #' @param size Numeric. Sets size of point on plot. Default 1.
 #' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
 #' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
b26d22a2
 #' @param colorLow Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the lowest values on the scale.
 #'  Default 'grey'.
b26d22a2
 #' @param colorMid Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the midpoint on the scale.
b26d22a2
 #' @param colorHigh Character. A color available from `colors()`.
16dba6f7
 #'  The color will be used to signify the highest values on the scale.
 #'  Default 'blue'.
d1154f0b
 #' @return The plot as a ggplot object
ff43602d
 #' @examples
 #' \donttest{
 #' data(celdaCGSim, celdaCGMod)
 #' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
 #'     celdaMod = celdaCGMod)
 #' plotDimReduceModule(
 #'     dim1 = celdaTsne[, 1], dim2 = celdaTsne[, 2],
 #'     counts = celdaCGSim$counts, celdaMod = celdaCGMod,
 #'     modules = c("1", "2"))
 #' }
16dba6f7
 #' @export
 plotDimReduceModule <-
     function(dim1,
         dim2,
         counts,
b26d22a2
         celdaMod,
16dba6f7
         modules = NULL,
         rescale = TRUE,
         size = 1,
         xlab = "Dimension_1",
         ylab = "Dimension_2",
b26d22a2
         colorLow = "grey",
         colorMid = NULL,
         colorHigh = "blue") {
e3638ca1
 
b26d22a2
         factorized <- factorizeMatrix(celdaMod = celdaMod,
             counts = counts)
16dba6f7
         matrix <- factorized$proportions$cell
0b3c7f9a
 
16dba6f7
         if (rescale == TRUE) {
2e877ffe
             for (x in seq(nrow(matrix))) {
                 matrix[x, ] <- matrix[x, ] - min(matrix[x, ])
                 matrix[x, ] <- matrix[x, ] / max(matrix[x, ])
b26d22a2
                 varLabel <- "Scaled_Probability"
16dba6f7
             }
         } else {
b26d22a2
             varLabel <- "Probability"
16dba6f7
         }
 
         rownames(matrix) <- gsub("L", "", rownames(matrix))
         if (!is.null(modules)) {
             if (length(rownames(matrix)[rownames(matrix) %in% modules]) < 1) {
                 stop("All modules selected do not exist in the model.")
             }
b26d22a2
             matrix <- matrix[which(rownames(matrix) %in% modules), ,
                 drop = FALSE]
             matrix <- matrix[match(rownames(matrix), modules), ,
                 drop = FALSE]
16dba6f7
         }
0b3c7f9a
 
16dba6f7
         rownames(matrix) <- paste0("L", rownames(matrix))
         plotDimReduceGrid(dim1,
             dim2,
             matrix,
             size,
             xlab,
             ylab,
b26d22a2
             colorLow,
             colorMid,
             colorHigh,
             varLabel)
c0963ca8
     }
bd77a8b7
 
ca5fb59d
 
918ae097
 # Labeling code adapted from Seurat (https://github.com/satijalab/seurat)
9616c081
 #' @title Plotting the cell labels on a dimensionality reduction plot
16dba6f7
 #' @description Create a scatterplot for each row of a normalized
 #'  gene expression matrix where x and y axis are from a
 #'  data dimensionality reduction tool.
 #'  The cells are colored by its given `cluster` label.
 #' @param dim1 Numeric vector. First dimension from data
 #'  dimensionality reduction output.
 #' @param dim2 Numeric vector. Second dimension from data
 #'  dimensionality reduction output.
 #' @param cluster Integer vector. Contains cluster labels for each cell.
bd77a8b7
 #' @param size Numeric. Sets size of point on plot. Default 1.
 #' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
 #' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
b26d22a2
 #' @param specificClusters Numeric vector.
16dba6f7
 #'  Only color cells in the specified clusters.
 #'  All other cells will be grey.
 #'  If NULL, all clusters will be colored. Default NULL.
b26d22a2
 #' @param labelClusters Logical. Whether the cluster labels are plotted.
16dba6f7
 #'  Default FALSE.
b26d22a2
 #' @param labelSize Numeric. Sets size of label if labelClusters is TRUE.
16dba6f7
 #'  Default 3.5.
d1154f0b
 #' @return The plot as a ggplot object
d7196f24
 #' @importFrom ggrepel geom_text_repel
ff43602d
 #' @examples
 #' \donttest{
 #' data(celdaCGSim, celdaCGMod)
 #' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
 #'     celdaMod = celdaCGMod)
 #' plotDimReduceCluster(dim1 = celdaTsne[, 1],
 #'     dim2 = celdaTsne[, 2],
 #'     cluster = as.factor(clusters(celdaCGMod)$z),
 #'     specificClusters = c(1, 2, 3))
 #' }
16dba6f7
 #' @export
b6cf56ae
 plotDimReduceCluster <- function(dim1,
0b3c7f9a
     dim2,
     cluster,
     size = 1,
     xlab = "Dimension_1",
     ylab = "Dimension_2",
b26d22a2
     specificClusters = NULL,
     labelClusters = FALSE,
     labelSize = 3.5) {
0b3c7f9a
     df <- data.frame(dim1, dim2, cluster)
     colnames(df) <- c(xlab, ylab, "Cluster")
ca5fb59d
     naIx <- is.na(dim1) | is.na(dim2)
     df <- df[!naIx, ]
0b3c7f9a
     df[3] <- as.factor(df[[3]])
b26d22a2
     clusterColors <- distinctColors(nlevels(as.factor(cluster)))
     if (!is.null(specificClusters)) {
         clusterColors[!levels(df[[3]]) %in% specificClusters] <- "gray92"
0b3c7f9a
     }
     g <- ggplot2::ggplot(df, ggplot2::aes_string(x = xlab, y = ylab)) +
         ggplot2::geom_point(stat = "identity",
             size = size,
             ggplot2::aes_string(color = "Cluster")) +
         ggplot2::theme(
             panel.grid.major = ggplot2::element_blank(),
             panel.grid.minor = ggplot2::element_blank(),
             panel.background = ggplot2::element_blank(),
             axis.line = ggplot2::element_line(color = "black")) +
b26d22a2
         ggplot2::scale_color_manual(values = clusterColors) +
0b3c7f9a
         ggplot2::guides(color =
                 ggplot2::guide_legend(override.aes = list(size = 1)))
 
b26d22a2
     if (labelClusters == TRUE) {
2e877ffe
         centroidList <- lapply(seq(length(unique(cluster))), function(x) {
b6cf56ae
             df.sub <- df[df$Cluster == x, ]
0b3c7f9a
             median.1 <- stats::median(df.sub$Dimension_1)
             median.2 <- stats::median(df.sub$Dimension_2)
             cbind(median.1, median.2, x)
         })
b26d22a2
         centroid <- do.call(rbind, centroidList)
0b3c7f9a
         centroid <- as.data.frame(centroid)
16dba6f7
 
0b3c7f9a
         colnames(centroid) <- c("Dimension_1", "Dimension_2", "Cluster")
         g <- g + ggplot2::geom_point(data = centroid,
2e877ffe
             mapping = ggplot2::aes_string(x = "Dimension_1",
                 y = "Dimension_2"),
             size = 0,
             alpha = 0) +
             ggrepel::geom_text_repel(data = centroid,
                 mapping = ggplot2::aes_string(label = "Cluster"),
                 size = labelSize)
16dba6f7
     }
2e877ffe
     return(g)
 }
bd77a8b7
 
ca5fb59d
 
e05116da
 # Run the t-SNE algorithm for dimensionality reduction
 # @param norm Normalized count matrix.
 # @param perplexity Numeric vector. Determines perplexity for tsne. Default 20.
b26d22a2
 # @param maxIter Numeric vector. Determines iterations for tsne. Default 1000.
af4c3cb8
 # @param doPca Logical. Whether to perform
16dba6f7
 # dimensionality reduction with PCA before tSNE.
b26d22a2
 # @param initialDims Integer.Number of dimensions from PCA to use as
0b3c7f9a
 # input in tSNE.
d7196f24
 #' @importFrom Rtsne Rtsne
b6cf56ae
 .calculateTsne <- function(norm,
0b3c7f9a
     perplexity = 20,
b26d22a2
     maxIter = 2500,
af4c3cb8
     doPca = FALSE,
b26d22a2
     initialDims = 20) {
bd77a8b7
 
2e877ffe
     res <- Rtsne::Rtsne(
         norm,
af4c3cb8
         pca = doPca,
2e877ffe
         max_iter = maxIter,
         perplexity = perplexity,
         check_duplicates = FALSE,
         is_distance = FALSE,
         initial_dims = initialDims)$Y
0b3c7f9a
 
2e877ffe
     return(res)
 }
bd77a8b7
 
ca5fb59d
 
8b60a355
 # Run the umap algorithm for dimensionality reduction
 # @param norm Normalized count matrix.
2e877ffe
 # @param umapConfig An object of class umap.config,
16dba6f7
 # containing configuration parameters to be passed to umap.
 # Default umap::umap.defualts.
d7196f24
 #' @importFrom umap umap
2e877ffe
 .calculateUmap <- function(norm, umapConfig = umap::umap.defaults) {
     return(umap::umap(norm, umapConfig)$layout)
8b60a355
 }