16dba6f7 |
#' @title Mapping the dimensionality reduction plot
#' @description Creates a scatterplot given two dimensions from a data
#' dimensionality reduction tool (e.g tSNE) output.
|
0b3c7f9a |
#' @param dim1 Numeric vector. First dimension from data dimensionality
#' reduction output.
#' @param dim2 Numeric vector. Second dimension from data dimensionality
#' reduction output.
#' @param matrix Numeric matrix. Each row of the matrix will be plotted as
#' a separate facet.
|
16dba6f7 |
#' @param size Numeric. Sets size of point on plot. Default 1.
#' @param xlab Character vector. Label for the x-axis. Default 'Dimension_1'.
#' @param ylab Character vector. Label for the y-axis. Default 'Dimension_2'.
|
b26d22a2 |
#' @param colorLow Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the lowest values on the scale.
#' Default 'grey'.
|
b26d22a2 |
#' @param colorMid Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the midpoint on the scale.
|
b26d22a2 |
#' @param colorHigh Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the highest values on the scale.
#' Default 'blue'.
|
b26d22a2 |
#' @param varLabel Character vector. Title for the color legend.
|
d1154f0b |
#' @return The plot as a ggplot object
|
ff43602d |
#' @examples
#' data(celdaCGSim, celdaCGMod)
#' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
#' celdaMod = celdaCGMod)
#' plotDimReduceGrid(celdaTsne[, 1],
#' celdaTsne[, 2],
#' matrix = celdaCGSim$counts,
#' xlab = "Dimension1",
#' ylab = "Dimension2",
#' varLabel = "tsne",
#' size = 1,
#' colorLow = "grey",
#' colorMid = NULL,
#' colorHigh = "blue")
|
d7196f24 |
#' @importFrom reshape2 melt
|
bd77a8b7 |
#' @export
|
0b3c7f9a |
plotDimReduceGrid <- function(dim1,
dim2,
matrix,
size,
xlab,
ylab,
|
b26d22a2 |
colorLow,
colorMid,
colorHigh,
varLabel) {
|
ca5fb59d |
|
0b3c7f9a |
df <- data.frame(dim1, dim2, t(as.data.frame(matrix)))
|
ca5fb59d |
naIx <- is.na(dim1) | is.na(dim2)
|
b6cf56ae |
df <- df[!naIx, ]
|
16dba6f7 |
|
0b3c7f9a |
m <- reshape2::melt(df, id.vars = c("dim1", "dim2"))
|
b26d22a2 |
colnames(m) <- c(xlab, ylab, "facet", varLabel)
|
0b3c7f9a |
ggplot2::ggplot(m,
ggplot2::aes_string(x = xlab, y = ylab)) +
ggplot2::geom_point(stat = "identity",
|
2e877ffe |
size = size,
ggplot2::aes_string(color = varLabel)) +
|
b6cf56ae |
ggplot2::facet_wrap(~ facet) +
|
0b3c7f9a |
ggplot2::theme_bw() +
|
b26d22a2 |
ggplot2::scale_colour_gradient2(low = colorLow,
high = colorHigh,
mid = colorMid,
|
0b3c7f9a |
midpoint = (max(m[, 4]) + min(m[, 4])) / 2,
|
b26d22a2 |
name = gsub("_", " ", varLabel)) +
|
0b3c7f9a |
ggplot2::theme(strip.background = ggplot2::element_blank(),
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.spacing = unit(0, "lines"),
panel.background = ggplot2::element_blank(),
axis.line = ggplot2::element_line(colour = "black"))
|
2e877ffe |
}
|
bd77a8b7 |
|
ca5fb59d |
|
9616c081 |
#' @title Plotting feature expression on a dimensionality reduction plot
|
16dba6f7 |
#' @description Create a scatterplot for each row of a normalized gene
#' expression matrix where x and y axis are from a data dimensionality
#' reduction tool. The cells are colored by expression of
#' the specified feature.
#' @param dim1 Numeric vector. First dimension from data
#' dimensionality reduction output.
|
0b3c7f9a |
#' @param dim2 Numeric vector. Second dimension from data dimensionality
#' reduction output.
#' @param counts Integer matrix. Rows represent features and columns
#' represent cells.
|
c5d41a04 |
#' @param features Character vector. Uses these genes for plotting.
|
16dba6f7 |
#' @param normalize Logical. Whether to normalize the columns of `counts`.
#' Default TRUE.
|
b26d22a2 |
#' @param exactMatch Logical. Whether an exact match or a partial match using
|
0b3c7f9a |
#' `grep()` is used to look up the feature in the rownames of the counts
#' matrix. Default TRUE.
|
16dba6f7 |
#' @param trim Numeric vector. Vector of length two that specifies the lower
#' and upper bounds for the data. This threshold is applied after row scaling.
#' Set to NULL to disable. Default c(-2,2).
|
bd77a8b7 |
#' @param size Numeric. Sets size of point on plot. Default 1.
#' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
#' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
|
b26d22a2 |
#' @param colorLow Character. A color available from `colors()`. The color
|
0b3c7f9a |
#' will be used to signify the lowest values on the scale. Default 'grey'.
|
b26d22a2 |
#' @param colorMid Character. A color available from `colors()`. The color
|
0b3c7f9a |
#' will be used to signify the midpoint on the scale.
|
b26d22a2 |
#' @param colorHigh Character. A color available from `colors()`. The color
|
0b3c7f9a |
#' will be used to signify the highest values on the scale. Default 'blue'.
|
d1154f0b |
#' @return The plot as a ggplot object
|
ff43602d |
#' @examples
#' \donttest{
#' data(celdaCGSim, celdaCGMod)
#' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
#' celdaMod = celdaCGMod)
#' plotDimReduceFeature(dim1 = celdaTsne[, 1],
#' dim2 = celdaTsne[, 2],
#' counts = celdaCGSim$counts,
#' features = c("Gene_99"),
#' exactMatch = TRUE)
#' }
|
16dba6f7 |
#' @export
|
0b3c7f9a |
plotDimReduceFeature <- function(dim1,
dim2,
counts,
features,
normalize = TRUE,
|
b26d22a2 |
exactMatch = TRUE,
|
0b3c7f9a |
trim = c(-2, 2),
size = 1,
xlab = "Dimension_1",
ylab = "Dimension_2",
|
b26d22a2 |
colorLow = "grey",
colorMid = NULL,
colorHigh = "blue") {
|
0b3c7f9a |
if (isTRUE(normalize)) {
counts <- normalizeCounts(counts,
|
2e877ffe |
transformationFun = sqrt,
scaleFun = base::scale)
|
0b3c7f9a |
}
if (is.null(features)) {
stop("at least one feature is required to create a plot")
}
if (!is.null(trim)) {
if (length(trim) != 2) {
|
2e877ffe |
stop("'trim' should be a 2 element vector",
"specifying the lower and upper boundaries")
|
16dba6f7 |
}
|
0b3c7f9a |
trim <- sort(trim)
counts[counts < trim[1]] <- trim[1]
counts[counts > trim[2]] <- trim[2]
|
2e877ffe |
}
|
0b3c7f9a |
|
b26d22a2 |
varLabel <- "Expression"
if (!isTRUE(exactMatch)) {
|
ca5fb59d |
featuresIndices <- c()
notFound <- c()
|
0b3c7f9a |
for (gene in features) {
|
ca5fb59d |
featuresIndices <-
c(featuresIndices, grep(gene, rownames(counts)))
|
0b3c7f9a |
if (length(grep(gene, rownames(counts))) == 0) {
|
ca5fb59d |
notFound <- c(notFound, gene)
|
16dba6f7 |
}
}
|
ca5fb59d |
counts <- counts[featuresIndices, , drop = FALSE]
if (length(notFound) > 0) {
if (length(notFound) == length(features)) {
|
2e877ffe |
stop("None of the provided features had",
"matching rownames in the provided counts matrix.")
}
warning("The following features were not",
"present in the provided count matrix: ",
paste(notFound,
sep = "",
collapse = ","))
|
16dba6f7 |
}
|
0b3c7f9a |
} else {
|
b26d22a2 |
featuresNotFound <- setdiff(features,
|
0b3c7f9a |
intersect(features, rownames(counts)))
|
b26d22a2 |
if (length(featuresNotFound) > 0) {
if (length(featuresNotFound) == length(features)) {
|
2e877ffe |
stop("None of the provided features had matching",
"rownames in the provided counts matrix.")
}
warning("The following features were not present in",
"the provided count matrix: ",
paste(featuresNotFound,
sep = "",
collapse = ","))
|
16dba6f7 |
}
|
ca5fb59d |
featuresFound <- setdiff(features, featuresNotFound)
counts <- counts[featuresFound, , drop = FALSE]
|
0b3c7f9a |
}
plotDimReduceGrid(dim1,
dim2,
counts,
size,
xlab,
ylab,
|
b26d22a2 |
colorLow,
colorMid,
colorHigh,
varLabel)
|
2e877ffe |
}
|
bd77a8b7 |
|
ca5fb59d |
|
16dba6f7 |
#' @title Plotting the Celda module probability on a
#' dimensionality reduction plot
#' @description Create a scatterplot for each row of a normalized
#' gene expression matrix where x and y axis are from a data
#' dimensionality reduction tool.
#' The cells are colored by the module probability(s).
#' @param dim1 Numeric vector.
#' First dimension from data dimensionality reduction output.
#' @param dim2 Numeric vector.
#' Second dimension from data dimensionality reduction output.
#' @param counts Integer matrix.
#' Rows represent features and columns represent cells.
|
b26d22a2 |
#' This matrix should be the same as the one used to generate `celdaMod`.
#' @param celdaMod Celda object of class "celda_G" or "celda_CG".
|
62a6c4cf |
#' @param modules Character vector. Module(s) from celda model to be plotted.
|
e3638ca1 |
#' e.g. c("1", "2").
|
16dba6f7 |
#' @param rescale Logical.
|
2e877ffe |
#' Whether rows of the matrix should be rescaled to [0, 1]. Default TRUE.
|
bd77a8b7 |
#' @param size Numeric. Sets size of point on plot. Default 1.
#' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
#' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
|
b26d22a2 |
#' @param colorLow Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the lowest values on the scale.
#' Default 'grey'.
|
b26d22a2 |
#' @param colorMid Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the midpoint on the scale.
|
b26d22a2 |
#' @param colorHigh Character. A color available from `colors()`.
|
16dba6f7 |
#' The color will be used to signify the highest values on the scale.
#' Default 'blue'.
|
d1154f0b |
#' @return The plot as a ggplot object
|
ff43602d |
#' @examples
#' \donttest{
#' data(celdaCGSim, celdaCGMod)
#' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
#' celdaMod = celdaCGMod)
#' plotDimReduceModule(
#' dim1 = celdaTsne[, 1], dim2 = celdaTsne[, 2],
#' counts = celdaCGSim$counts, celdaMod = celdaCGMod,
#' modules = c("1", "2"))
#' }
|
16dba6f7 |
#' @export
plotDimReduceModule <-
function(dim1,
dim2,
counts,
|
b26d22a2 |
celdaMod,
|
16dba6f7 |
modules = NULL,
rescale = TRUE,
size = 1,
xlab = "Dimension_1",
ylab = "Dimension_2",
|
b26d22a2 |
colorLow = "grey",
colorMid = NULL,
colorHigh = "blue") {
|
e3638ca1 |
|
b26d22a2 |
factorized <- factorizeMatrix(celdaMod = celdaMod,
counts = counts)
|
16dba6f7 |
matrix <- factorized$proportions$cell
|
0b3c7f9a |
|
16dba6f7 |
if (rescale == TRUE) {
|
2e877ffe |
for (x in seq(nrow(matrix))) {
matrix[x, ] <- matrix[x, ] - min(matrix[x, ])
matrix[x, ] <- matrix[x, ] / max(matrix[x, ])
|
b26d22a2 |
varLabel <- "Scaled_Probability"
|
16dba6f7 |
}
} else {
|
b26d22a2 |
varLabel <- "Probability"
|
16dba6f7 |
}
rownames(matrix) <- gsub("L", "", rownames(matrix))
if (!is.null(modules)) {
if (length(rownames(matrix)[rownames(matrix) %in% modules]) < 1) {
stop("All modules selected do not exist in the model.")
}
|
b26d22a2 |
matrix <- matrix[which(rownames(matrix) %in% modules), ,
drop = FALSE]
matrix <- matrix[match(rownames(matrix), modules), ,
drop = FALSE]
|
16dba6f7 |
}
|
0b3c7f9a |
|
16dba6f7 |
rownames(matrix) <- paste0("L", rownames(matrix))
plotDimReduceGrid(dim1,
dim2,
matrix,
size,
xlab,
ylab,
|
b26d22a2 |
colorLow,
colorMid,
colorHigh,
varLabel)
|
c0963ca8 |
}
|
bd77a8b7 |
|
ca5fb59d |
|
918ae097 |
# Labeling code adapted from Seurat (https://github.com/satijalab/seurat)
|
9616c081 |
#' @title Plotting the cell labels on a dimensionality reduction plot
|
16dba6f7 |
#' @description Create a scatterplot for each row of a normalized
#' gene expression matrix where x and y axis are from a
#' data dimensionality reduction tool.
#' The cells are colored by its given `cluster` label.
#' @param dim1 Numeric vector. First dimension from data
#' dimensionality reduction output.
#' @param dim2 Numeric vector. Second dimension from data
#' dimensionality reduction output.
#' @param cluster Integer vector. Contains cluster labels for each cell.
|
bd77a8b7 |
#' @param size Numeric. Sets size of point on plot. Default 1.
#' @param xlab Character vector. Label for the x-axis. Default "Dimension_1".
#' @param ylab Character vector. Label for the y-axis. Default "Dimension_2".
|
b26d22a2 |
#' @param specificClusters Numeric vector.
|
16dba6f7 |
#' Only color cells in the specified clusters.
#' All other cells will be grey.
#' If NULL, all clusters will be colored. Default NULL.
|
b26d22a2 |
#' @param labelClusters Logical. Whether the cluster labels are plotted.
|
16dba6f7 |
#' Default FALSE.
|
b26d22a2 |
#' @param labelSize Numeric. Sets size of label if labelClusters is TRUE.
|
16dba6f7 |
#' Default 3.5.
|
d1154f0b |
#' @return The plot as a ggplot object
|
d7196f24 |
#' @importFrom ggrepel geom_text_repel
|
ff43602d |
#' @examples
#' \donttest{
#' data(celdaCGSim, celdaCGMod)
#' celdaTsne <- celdaTsne(counts = celdaCGSim$counts,
#' celdaMod = celdaCGMod)
#' plotDimReduceCluster(dim1 = celdaTsne[, 1],
#' dim2 = celdaTsne[, 2],
#' cluster = as.factor(clusters(celdaCGMod)$z),
#' specificClusters = c(1, 2, 3))
#' }
|
16dba6f7 |
#' @export
|
b6cf56ae |
plotDimReduceCluster <- function(dim1,
|
0b3c7f9a |
dim2,
cluster,
size = 1,
xlab = "Dimension_1",
ylab = "Dimension_2",
|
b26d22a2 |
specificClusters = NULL,
labelClusters = FALSE,
labelSize = 3.5) {
|
0b3c7f9a |
df <- data.frame(dim1, dim2, cluster)
colnames(df) <- c(xlab, ylab, "Cluster")
|
ca5fb59d |
naIx <- is.na(dim1) | is.na(dim2)
df <- df[!naIx, ]
|
0b3c7f9a |
df[3] <- as.factor(df[[3]])
|
b26d22a2 |
clusterColors <- distinctColors(nlevels(as.factor(cluster)))
if (!is.null(specificClusters)) {
clusterColors[!levels(df[[3]]) %in% specificClusters] <- "gray92"
|
0b3c7f9a |
}
g <- ggplot2::ggplot(df, ggplot2::aes_string(x = xlab, y = ylab)) +
ggplot2::geom_point(stat = "identity",
size = size,
ggplot2::aes_string(color = "Cluster")) +
ggplot2::theme(
panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.line = ggplot2::element_line(color = "black")) +
|
b26d22a2 |
ggplot2::scale_color_manual(values = clusterColors) +
|
0b3c7f9a |
ggplot2::guides(color =
ggplot2::guide_legend(override.aes = list(size = 1)))
|
b26d22a2 |
if (labelClusters == TRUE) {
|
2e877ffe |
centroidList <- lapply(seq(length(unique(cluster))), function(x) {
|
b6cf56ae |
df.sub <- df[df$Cluster == x, ]
|
0b3c7f9a |
median.1 <- stats::median(df.sub$Dimension_1)
median.2 <- stats::median(df.sub$Dimension_2)
cbind(median.1, median.2, x)
})
|
b26d22a2 |
centroid <- do.call(rbind, centroidList)
|
0b3c7f9a |
centroid <- as.data.frame(centroid)
|
16dba6f7 |
|
0b3c7f9a |
colnames(centroid) <- c("Dimension_1", "Dimension_2", "Cluster")
g <- g + ggplot2::geom_point(data = centroid,
|
2e877ffe |
mapping = ggplot2::aes_string(x = "Dimension_1",
y = "Dimension_2"),
size = 0,
alpha = 0) +
ggrepel::geom_text_repel(data = centroid,
mapping = ggplot2::aes_string(label = "Cluster"),
size = labelSize)
|
16dba6f7 |
}
|
2e877ffe |
return(g)
}
|
bd77a8b7 |
|
ca5fb59d |
|
e05116da |
# Run the t-SNE algorithm for dimensionality reduction
# @param norm Normalized count matrix.
# @param perplexity Numeric vector. Determines perplexity for tsne. Default 20.
|
b26d22a2 |
# @param maxIter Numeric vector. Determines iterations for tsne. Default 1000.
|
af4c3cb8 |
# @param doPca Logical. Whether to perform
|
16dba6f7 |
# dimensionality reduction with PCA before tSNE.
|
b26d22a2 |
# @param initialDims Integer.Number of dimensions from PCA to use as
|
0b3c7f9a |
# input in tSNE.
|
d7196f24 |
#' @importFrom Rtsne Rtsne
|
b6cf56ae |
.calculateTsne <- function(norm,
|
0b3c7f9a |
perplexity = 20,
|
b26d22a2 |
maxIter = 2500,
|
af4c3cb8 |
doPca = FALSE,
|
b26d22a2 |
initialDims = 20) {
|
bd77a8b7 |
|
2e877ffe |
res <- Rtsne::Rtsne(
norm,
|
af4c3cb8 |
pca = doPca,
|
2e877ffe |
max_iter = maxIter,
perplexity = perplexity,
check_duplicates = FALSE,
is_distance = FALSE,
initial_dims = initialDims)$Y
|
0b3c7f9a |
|
2e877ffe |
return(res)
}
|
bd77a8b7 |
|
ca5fb59d |
|
8b60a355 |
# Run the umap algorithm for dimensionality reduction
# @param norm Normalized count matrix.
|
2e877ffe |
# @param umapConfig An object of class umap.config,
|
16dba6f7 |
# containing configuration parameters to be passed to umap.
# Default umap::umap.defualts.
|
d7196f24 |
#' @importFrom umap umap
|
2e877ffe |
.calculateUmap <- function(norm, umapConfig = umap::umap.defaults) {
return(umap::umap(norm, umapConfig)$layout)
|
8b60a355 |
}
|