R/feature_selection.R
ca5fb59d
 #' @title Identify features with the highest influence on clustering.
 #' @description topRank() can quickly identify the top `n` rows for each column
 #'  of a matrix. For example, this can be useful for identifying the top `n`
 #'  features per cell.
ac522b67
 #' @param matrix Numeric matrix.
 #' @param n Integer. Maximum number of items above `threshold` returned for each
 #'  ranked row or column.
 #' @param margin Integer. Dimension of `matrix` to rank, with 1 for rows, 2 for
 #'  columns. Default 2.
 #' @param threshold Numeric. Only return ranked rows or columns in the matrix
 #'  that are above this threshold. If NULL, then no threshold will be applied.
 #'  Default 1.
 #' @param decreasing Logical. Specifies if the rank should be decreasing.
 #'  Default TRUE.
 #' @return List. The `index` variable provides the top `n` row (feature) indices
ca5fb59d
 #'  contributing the most to each column (cell). The `names` variable provides
ac522b67
 #'  the rownames corresponding to these indexes.
6a6a4ea5
 #' @examples
a49fff03
 #' data(sampleCells)
ca5fb59d
 #' topRanksPerCell <- topRank(sampleCells, n = 5)
 #' topFeatureNamesForCell <- topRanksPerCell$names[1]
b0f460d6
 #' @export
ac522b67
 topRank <- function(matrix,
     n = 25,
     margin = 2,
     threshold = 0,
     decreasing = TRUE) {
ca5fb59d
 
ac522b67
     if (is.null(threshold) || is.na(threshold)) {
ca5fb59d
         threshold <- min(matrix) - 1
ac522b67
     }
ca5fb59d
 
ac522b67
     # Function to sort values in a vector and return 'n' top results
     # If there are not 'n' top results above 'thresh', then the
     # number of entries in 'v' that are above 'thresh' will be returned
     .topFunction <- function(v, n, thresh) {
ca5fb59d
         vAboveThresh <- sum(v > thresh)
         nToSelect <- min(vAboveThresh, n)
 
         h <- NA
         if (nToSelect > 0) {
             h <- utils::head(order(v, decreasing = decreasing), nToSelect)
         }
         return(h)
ac522b67
     }
ca5fb59d
 
ac522b67
     # Parse top ranked indices from matrix
     topIx <-
ca5fb59d
         base::apply(matrix, margin, .topFunction, thresh = threshold, n = n)
 
ac522b67
     # Convert to list if apply converted to a matrix because all
     # elements had the same length
     if (is.matrix(topIx)) {
92ca710a
         topIx <- lapply(seq(ncol(topIx)), function(i)
ca5fb59d
             topIx[, i])
         names(topIx) <- dimnames(matrix)[[margin]]
ac522b67
     }
ca5fb59d
 
ac522b67
     # Parse names from returned margin
     oppositeMargin <-
ca5fb59d
         ifelse(margin - 1 > 0, margin - 1, length(dim(matrix)))
ac522b67
     topNames <- NULL
     namesToParse <- dimnames(matrix)[[oppositeMargin]]
     if (!is.null(namesToParse) & all(!is.na(topIx))) {
92ca710a
         topNames <- lapply(seq(length(topIx)),
ca5fb59d
             function(i) {
                 ifelse(is.na(topIx[[i]]), NA, namesToParse[topIx[[i]]])
             })
         names(topNames) <- names(topIx)
ac522b67
     }
ca5fb59d
 
ac522b67
     return(list(index = topIx, names = topNames))
b0f460d6
 }