#' Plot heatmap of using data stored in SingleCellExperiment Object #' @rdname plotSCEHeatmap #' @param inSCE \linkS4class{SingleCellExperiment} inherited object. #' @param useAssay character. A string indicating the assay name that #' provides the expression level to plot. Only for \code{plotSCEHeatmap}. #' @param useReducedDim character. A string indicating the reducedDim name that #' provides the expression level to plot. Only for \code{plotSCEDimReduceHeatmap}. #' @param doLog Logical scalar. Whether to do \code{log(assay + 1)} #' transformation on the assay indicated by \code{useAssay}. Default #' \code{FALSE}. #' @param featureIndex A vector that can subset the input SCE object by rows #' (features). Alternatively, it can be a vector identifying features in #' another feature list indicated by \code{featureIndexBy}. Default \code{NULL}. #' @param cellIndex A vector that can subset the input SCE object by columns #' (cells). Alternatively, it can be a vector identifying cells in another #' cell list indicated by \code{featureIndexBy}. Default \code{NULL}. #' @param scale Whether to perform z-score scaling on each row. Default #' \code{TRUE}. #' @param trim A 2-element numeric vector. Values outside of this range will be #' trimmed to their nearst bound. Default \code{c(-2, 2)} #' @param featureIndexBy A single character specifying a column name of #' \code{rowData(inSCE)}, or a vector of the same length as \code{nrow(inSCE)}, #' where we search for the non-rowname feature indices. Not applicable for #' \code{plotSCEDimReduceHeatmap}. Default \code{"rownames"}. #' @param cellIndexBy A single character specifying a column name of #' \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)}, #' where we search for the non-rowname cell indices. Default \code{"rownames"}. #' @param rowDataName character. The column name(s) in \code{rowData} that need #' to be added to the annotation. Not applicable for #' \code{plotSCEDimReduceHeatmap}. Default \code{NULL}. #' @param colDataName character. The column name(s) in \code{colData} that need #' to be added to the annotation. Default \code{NULL}. #' @param aggregateRow Feature variable for aggregating the heatmap by row. Can #' be a vector or a \code{rowData} column name for feature variable. Multiple #' variables are allowed. Default \code{NULL}. #' @param aggregateCol Cell variable for aggregating the heatmap by column. Can #' be a vector or a \code{colData} column name for cell variable. Multiple #' variables are allowed. Default \code{NULL}. #' @param featureAnnotations \code{data.frame}, with \code{rownames} containing #' all the features going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param cellAnnotations \code{data.frame}, with \code{rownames} containing #' all the cells going to be plotted. Character columns should be factors. #' Default \code{NULL}. #' @param featureAnnotationColor A named list. Customized color settings for #' feature labeling. Should match the entries in the \code{featureAnnotations} #' or \code{rowDataName}. For each entry, there should be a list/vector of #' colors named with categories. Default \code{NULL}. #' @param cellAnnotationColor A named list. Customized color settings for #' cell labeling. Should match the entries in the \code{cellAnnotations} or #' \code{colDataName}. For each entry, there should be a list/vector of colors #' named with categories. Default \code{NULL}. #' @param palette Choose from \code{"ggplot"}, \code{"celda"} or \code{"random"} #' to generate unique category colors. #' @param rowSplitBy character. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{rowDataName} or #' \code{names(featureAnnotations)}. Default \code{NULL}. #' @param colSplitBy character. Do semi-heatmap based on the grouping of #' this(these) annotation(s). Should exist in either \code{colDataName} or #' \code{names(cellAnnotations)}. Default \code{NULL}. #' @param rowLabel Use a logical for whether to display all the feature names, #' a single character to display a column of \code{rowData(inSCE)} annotation, #' a vector of the same length as full/subset \code{nrow(inSCE)} to display #' customized info. Default \code{FALSE}. #' @param colLabel Use a logical for whether to display all the cell names, a #' single character to display a column of \code{colData(inSCE)} annotation, #' a vector of the same length as full/subset \code{ncol(inSCE)} to display #' customized info. Default \code{FALSE}. #' @param rowLabelSize A number for the font size of feature names. Default #' \code{8} #' @param colLabelSize A number for the font size of cell names. Default #' \code{8} #' @param rowDend Whether to display row dendrogram. Default \code{TRUE}. #' @param colDend Whether to display column dendrogram. Default \code{TRUE}. #' @param title The main title of the whole plot. Default \code{NULL}. #' @param rowTitle The subtitle for the rows. Default \code{"Genes"}. #' @param colTitle The subtitle for the columns. Default \code{"Cells"}. #' @param rowGap A numeric value or a \code{\link[grid]{unit}} object. For the #' gap size between rows of the splitted heatmap. Default #' \code{grid::unit(0, 'mm')}. #' @param colGap A numeric value or a \code{\link[grid]{unit}} object. For the #' gap size between columns of the splitted heatmap. Default #' \code{grid::unit(0, 'mm')}. #' @param border A logical scalar. Whether to show the border of the heatmap or #' splitted heatmaps. Default \code{TRUE}. #' @param colorScheme function. A function that generates color code by giving #' a value. Can be generated by \code{\link[circlize]{colorRamp2}}. #' Default \code{NULL}. #' @param ... Other arguments passed to \code{\link[ComplexHeatmap]{Heatmap}}. #' @examples #' data(scExample, package = "singleCellTK") #' plotSCEHeatmap(sce[1:3,1:3], useAssay = "counts") #' @return A \code{\link[ggplot2]{ggplot}} object. #' @export #' @author Yichen Wang #' @importFrom scuttle aggregateAcrossCells aggregateAcrossFeatures #' @importFrom SingleCellExperiment SingleCellExperiment #' @importFrom SummarizedExperiment colData assayNames<- plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, doLog = FALSE, featureIndex = NULL, cellIndex = NULL, scale = TRUE, trim = c(-2, 2), featureIndexBy = 'rownames', cellIndexBy = 'rownames', rowDataName = NULL, colDataName = NULL, aggregateRow = NULL, aggregateCol = NULL, featureAnnotations = NULL, cellAnnotations = NULL, featureAnnotationColor = NULL, cellAnnotationColor = NULL, palette = c("ggplot", "celda", "random"), rowSplitBy = NULL, colSplitBy = NULL, rowLabel = FALSE, colLabel = FALSE, rowLabelSize = 6, colLabelSize = 6, rowDend = TRUE, colDend = TRUE, title = NULL, rowTitle = 'Features', colTitle = 'Cells', rowGap = grid::unit(0, 'mm'), colGap = grid::unit(0, 'mm'), border = FALSE, colorScheme = NULL, ...){ palette <- match.arg(palette) # STAGE 1: Create clean SCE object with only needed information #### ## .selectSCEMatrix, .manageCellVar and .manageFeatureVar perform checks useMat <- .selectSCEMatrix(inSCE, useAssay = useAssay, useReducedDim = useReducedDim, returnMatrix = TRUE, cellAsCol = TRUE) useAssay <- useMat$names$useAssay useReducedDim <- useMat$names$useReducedDim useData <- ifelse(!is.null(useAssay), useAssay, useReducedDim) ### cell annotation colDataName <- unique(c(colDataName, aggregateCol)) colDataAnns <- lapply(colDataName, function(x) .manageCellVar(inSCE, x)) if (length(colDataName) > 0) colDataAnns <- data.frame(colDataAnns, row.names = colnames(inSCE)) else colDataAnns <- data.frame(row.names = colnames(inSCE)) colnames(colDataAnns) <- colDataName cellAnnotations <- .mergeAnnotationDF(colDataAnns, cellAnnotations) if (!is.null(colSplitBy) && any(!colSplitBy %in% colnames(cellAnnotations))) stop('Specified `colSplitBy` variables not found.') if (isTRUE(colLabel)) { colLabelName <- colnames(inSCE) } else if (isFALSE(colLabel)) { colLabelName <- NULL } else { colLabelName <- .manageCellVar(inSCE, colLabel) colLabel <- TRUE } ### feature annotation rowDataAnns <- data.frame(row.names = rownames(useMat$mat)) if (!is.null(useAssay)) { # When using reducedDim, no rowData can be applied rowDataName <- unique(c(rowDataName, aggregateRow)) rowDataAnns <- lapply(rowDataName, function(x) .manageFeatureVar(inSCE, x)) if (length(rowDataName) > 0) rowDataAnns <- data.frame(rowDataAnns, row.names = rownames(inSCE)) else rowDataAnns <- data.frame(row.names = rownames(inSCE)) colnames(rowDataAnns) <- rowDataName } # But customized featureAnnotations should work featureAnnotations <- .mergeAnnotationDF(rowDataAnns, featureAnnotations) if (!is.null(rowSplitBy) && any(!rowSplitBy %in% colnames(featureAnnotations))) stop('Specified `rowSplitBy` variables not found.') if (isTRUE(rowLabel)) { rowLabelName <- rownames(useMat$mat) } else if (isFALSE(rowLabel)) { rowLabelName <- NULL } else { if (!is.null(useAssay)) { rowLabelName <- .manageFeatureVar(inSCE, rowLabel) rowLabel <- TRUE } else { # Using customized rowLabel for reducedDim if (length(rowLabel) != nrow(useMat$mat)) stop("Length of `rowLabel` does not match nrow of specified ", "`useReducedDim`") rowLabelName <- rowLabel rowLabel <- TRUE } } ### create SCE object SCE <- SingleCellExperiment(assay = list(useMat$mat), colData = cellAnnotations, rowData = featureAnnotations) assayNames(SCE) <- useData # STAGE 2: Subset SCE object as needed #### # Manage cell subsetting if(is.null(cellIndex)){ cellIndex <- seq(ncol(SCE)) } else if (is.character(cellIndex)) { # cellIndexBy not necessarily included in new "SCE" cellIndex <- retrieveSCEIndex(inSCE, cellIndex, axis = "col", by = cellIndexBy) } else if (is.logical(cellIndex)) { if (length(cellIndex) != ncol(inSCE)) { stop("Logical index length does not match ncol(inSCE)") } cellIndex <- which(cellIndex) } # Manage feature subsetting if(is.null(featureIndex)){ featureIndex <- seq(nrow(SCE)) } else if (is.character(featureIndex)) { if (!is.null(useAssay)) featureIndex <- retrieveSCEIndex(inSCE, featureIndex, axis = "row", by = featureIndexBy) else # When using reducedDim, can only go with "PC" names # or customized "by" featureIndex <- retrieveSCEIndex(SCE, featureIndex, axis = "row", by = featureIndexBy) } else if (is.logical(featureIndex)) { if (length(featureIndex) != nrow(SCE)) { stop("Logical index length does not match nrow(inSCE)") } featureIndex <- which(featureIndex) } colnames(SCE) <- colLabelName rownames(SCE) <- rowLabelName SCE <- SCE[featureIndex, cellIndex] ### Scaling should be done before aggregating if (isTRUE(doLog)) assay(SCE) <- log1p(assay(SCE)) if (isTRUE(scale)) assay(SCE) <- as.matrix(computeZScore(assay(SCE))) if (!is.null(trim)) assay(SCE) <- trimCounts(assay(SCE), trim) # STAGE 3: Aggregate As needed #### if (!is.null(aggregateCol)) { # TODO: whether to also aggregate numeric variable that users want # Might need to use "coldata.merge" in aggregate function colIDS <- colData(SCE)[, aggregateCol] origRowData <- rowData(SCE) SCE <- aggregateAcrossCells(SCE, ids = colIDS, use.assay.type = useData, store.number = NULL, statistics = "mean") # TODO: `aggregateAcrossCells` produce duplicated variables in colData # and unwanted "ncell" variable even if I set `store.number = NULL`. colData(SCE) <- colData(SCE)[,aggregateCol,drop=FALSE] newColnames <- do.call(paste, c(colData(SCE), list(sep = "_"))) colnames(SCE) <- newColnames rowData(SCE) <- origRowData } if (!is.null(aggregateRow)) { # `aggregateAcrossFeatures` doesn't work by with multi-var # Remake one single variable vector rowIDS <- rowData(SCE)[, aggregateRow, drop = FALSE] rowIDS <- do.call(paste, c(rowIDS, list(sep = "_"))) origColData <- colData(SCE) SCE <- aggregateAcrossFeatures(SCE, ids = rowIDS, average = TRUE, use.assay.type = useData) colData(SCE) <- origColData } # STAGE 4: Other minor preparation for plotting #### mat <- assay(SCE) if (is.null(colorScheme)) { if (!is.null(trim)) colorScheme <- circlize::colorRamp2(c(trim[1], 0, trim[2]), c('blue', 'white', 'red')) else colorScheme <- circlize::colorRamp2(c(min(mat), (max(mat) + min(mat))/2, max(mat)), c('blue', 'white', 'red')) } else { if (!is.function(colorScheme)) stop('`colorScheme` must be a function generated by ', 'circlize::colorRamp2') breaks <- attr(colorScheme, 'breaks') if (breaks[1] != min(trim) || breaks[length(breaks)] != max(trim)) stop('Breaks of `colorScheme` do not match with `trim`.') } ### Generate HeatmapAnnotation object ca <- NULL cellAnnotationColor <- .heatmapAnnColor(SCE, slot = "colData", custom = cellAnnotationColor, palette = palette) if(dim(cellAnnotations)[2] > 0) ca <- ComplexHeatmap::HeatmapAnnotation(df = colData(SCE), col = cellAnnotationColor) ra <- NULL featureAnnotationColor <- .heatmapAnnColor(SCE, slot = "rowData", custom = featureAnnotationColor, palette = palette) if(ncol(rowData(SCE)) > 0) ra <- ComplexHeatmap::rowAnnotation(df = rowData(SCE), col = featureAnnotationColor) ### Set split variable cs <- NULL if (!is.null(colSplitBy)) cs <- colData(SCE)[colSplitBy] rs <- NULL if (!is.null(rowSplitBy)) rs <- rowData(SCE)[rowSplitBy] ### if (!is.null(colGap)) { if (!inherits(colGap, "unit")) stop("`colGap` has to be 'unit' object. Try `grid::unit(", colGap, ", 'mm')`.") } else colGap <- grid::unit(0, 'mm') if (!is.null(rowGap)) { if (!inherits(rowGap, "unit")) stop("`rowGap` has to be 'unit' object. Try `grid::unit(", rowGap, ", 'mm')`.") } else rowGap <- grid::unit(0, 'mm') if (!is.null(useAssay)) name <- useAssay else name <- useReducedDim hm <- ComplexHeatmap::Heatmap(mat, name = name, left_annotation = ra, top_annotation = ca, col = colorScheme, row_split = rs, column_split = cs, row_title = rowTitle, column_title = colTitle, show_row_names = rowLabel, row_names_gp = grid::gpar(fontsize = rowLabelSize), show_row_dend = rowDend, show_column_names = colLabel, column_names_gp = grid::gpar(fontsize = colLabelSize), show_column_dend = colDend, row_gap = rowGap, column_gap = colGap, border = border, ...) return(hm) } .mergeAnnotationDF <- function(origin, external) { if (!is.null(external)) { external <- external[match(rownames(origin), rownames(external)), ,drop = FALSE] origin <- cbind(origin, external) } return(origin) } .heatmapAnnColor <- function(inSCE, slot = c("colData", "rowData"), custom = NULL, palette = palette) { slot <- match.arg(slot) if (!is.null(custom) && !is.list(custom)) stop("'cellAnnotationColor' or 'featureAnnotationColor' must be a list.") if (is.null(custom)) custom <- list() if (slot == "colData") data <- SummarizedExperiment::colData(inSCE) if (slot == "rowData") data <- SummarizedExperiment::rowData(inSCE) todoNames <- colnames(data) todoNames <- todoNames[!todoNames %in% names(custom)] newColor <- lapply(todoNames, function(n) { var <- data[[n]] if (is.factor(var)) categories <- levels(var) else categories <- unique(var) colors <- discreteColorPalette(length(categories), palette = palette) names(colors) <- categories return(colors) }) names(newColor) <- todoNames custom <- c(custom, newColor) return(custom) } # Test #logcounts(sceBatches) <- log1p(counts(sceBatches)) #plotSCEHeatmap2(sceBatches, "logcounts", # featureIndex = c("GCG1", "COX11", "INS1", "ND41"), # featureIndexBy = rowData(sceBatches)$feature_name, # cellIndex = c("reads.16087_", "Sample_1073_", # "reads.29330_", "Sample_801_"), # cellIndexBy = paste0(colnames(sceBatches), "_"), # rowLabel = "feature_name", rowDend = FALSE, # cluster_rows = FALSE, colLabel = TRUE, cluster_columns = FALSE, # colDataName = c("batch", "cell_type"), aggregateCol = c("cell_type", "batch")) #sce <-plotSCEHeatmap2(sceBatches, aggregateCol = "batch") #plotSCEHeatmap2(sceBatches, aggregateCol = c("cell_type", "batch")) #plotFindMarkerHeatmap(sce, log2fcThreshold = 0, minClustExprPerc = 0.4, # maxCtrlExprPerc = 0.5) #plotFindMarkerHeatmap(sce, log2fcThreshold = 0, minClustExprPerc = 0.4, # maxCtrlExprPerc = 0.5, # aggregateRow = "marker") #plotSCEDimReduceColData(sce, "cluster", "UMAP") CellVarColor <- function(inSCE, var, palette = c("ggplot", "random", "celda"), seed = 12345, ...) { var <- .manageCellVar(inSCE, var = var) palette <- match.arg(palette) if (is.factor(var)) uniqVar <- levels(var) else uniqVar <- unique(var) colors <- discreteColorPalette(length(uniqVar), palette = palette, seed = seed, ...) names(colors) <- uniqVar return(colors) }