R/helper.R
664a1a3f
 #' @rdname get_params
c9cd07e1
 #'   
53885e45
 #' @param x an object of class \code{\link{SconeExperiment}}.
c9cd07e1
 #'   
e9aed5e1
 #' @return A data.frame containing workflow parameters for each scone workflow.
c9cd07e1
 #'   
664a1a3f
 #' @export
c9cd07e1
 #' 
664a1a3f
 setMethod(
   f = "get_params",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     return(x@scone_params)
   })
 
49694464
 #' @rdname get_scores
c9cd07e1
 #'   
53885e45
 #' @param x an object of class \code{\link{SconeExperiment}}.
c9cd07e1
 #'   
 #' @return \code{get_scores} returns a matrix with all (non-missing) scone 
 #'   scores, ordered by average score rank.
 #'   
49694464
 #' @export
c9cd07e1
 #' 
49694464
 setMethod(
   f = "get_scores",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
       scores <- t(na.omit(t(x@scone_scores[,-NCOL(x@scone_scores)])))
       return(scores)
 })
 
 #' @rdname get_scores
c9cd07e1
 #'   
e9aed5e1
 #' @return \code{get_score_ranks} returns a vector of average score ranks.
c9cd07e1
 #'   
49694464
 #' @export
c9cd07e1
 #' 
49694464
 setMethod(
   f = "get_score_ranks",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     return(x@scone_scores[,NCOL(x@scone_scores)])
   })
 
 
b31e581a
 #' @rdname get_negconruv
c9cd07e1
 #'   
53885e45
 #' @param x an object of class \code{\link{SconeExperiment}}.
c9cd07e1
 #'   
b31e581a
 #' @return NULL or a logical vector.
c9cd07e1
 #'   
 #' @return For \code{get_negconruv} the returned vector indicates which genes
 #'   are negative controls to be used for RUV.
53885e45
 #'   
 #' @export
 #' 
b31e581a
 setMethod(
   f = "get_negconruv",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_negconruv) == 0) {
       return(NULL)
     } else {
       return(rowData(x)[,x@which_negconruv])
     }
   }
 )
 
 #' @rdname get_negconruv
c9cd07e1
 #'   
 #' @return For \code{get_negconeval} the returned vector indicates which genes
 #'   are negative controls to be used for evaluation.
53885e45
 #'
 #' @export
 #' 
b31e581a
 setMethod(
   f = "get_negconeval",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_negconeval) == 0) {
       return(NULL)
     } else {
       return(rowData(x)[,x@which_negconeval])
     }
   }
 )
 
 #' @rdname get_negconruv
c9cd07e1
 #'   
 #' @return For \code{get_poscon} the returned vector indicates which genes are 
 #'   positive controls to be used for evaluation.
53885e45
 #'
 #' @export
 #'
b31e581a
 setMethod(
   f = "get_poscon",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_poscon) == 0) {
       return(NULL)
     } else {
       return(rowData(x)[,x@which_poscon])
     }
   }
 )
 
 #' @rdname get_qc
c9cd07e1
 #'   
53885e45
 #' @param x an object of class \code{\link{SconeExperiment}}.
c9cd07e1
 #'   
e9aed5e1
 #' @return NULL or the quality control (QC) metric matrix.
53885e45
 #'
 #' @export
 #'
b31e581a
 setMethod(
   f = "get_qc",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_qc) == 0) {
       return(NULL)
     } else {
       retval <- as.matrix(colData(x)[, x@which_qc, drop=FALSE])
       return(retval)
     }
   }
 )
 
 #' @rdname get_bio
c9cd07e1
 #'   
53885e45
 #' @param x an object of class \code{\link{SconeExperiment}}.
c9cd07e1
 #'   
e9aed5e1
 #' @return NULL or a factor containing bio or batch covariate.
53885e45
 #'
 #' @export
 #'
b31e581a
 setMethod(
   f = "get_bio",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_bio) == 0) {
       return(NULL)
     } else {
       return(colData(x)[,x@which_bio])
     }
   }
 )
 
 #' @rdname get_bio
53885e45
 #'
 #' @export
 #'
b31e581a
 setMethod(
   f = "get_batch",
   signature = signature(x = "SconeExperiment"),
   definition = function(x) {
     if(length(x@which_batch) == 0) {
       return(NULL)
     } else {
       return(colData(x)[,x@which_batch])
     }
   }
 )
 
 #' Parse rows
c9cd07e1
 #' 
 #' This function is used internally in scone to parse the variables used to
 #' generate the design matrices.
 #' 
 #' @param pars character. A vector of parameters corresponding to a row of
 #'   workflow parameters.
e9aed5e1
 #' @param bio factor. The biological covariate.
 #' @param batch factor. The batch covariate.
c9cd07e1
 #' @param ruv_factors list. A list containing the factors of unwanted variation
 #'   (RUVg) for all upstream workflows.
e9aed5e1
 #' @param qc matrix. The principal components of the QC metric matrix.
c9cd07e1
 #'   
963a5849
 #' @return A list with the variables to be passed to make_design.
c9cd07e1
 #'   
e9aed5e1
 #' @keywords internal
c9cd07e1
 #'   
e9aed5e1
 .parse_row <- function(pars, bio, batch, ruv_factors, qc) {
   
   # Define upstream workflow: imputation x scaling
963a5849
   sc_name <- paste(pars[1:2], collapse="_")
7a3bbeb2
 
963a5849
   W <- out_bio <- out_batch <- NULL
e9aed5e1
   
963a5849
   if(pars[3]!="no_uv") {
     parsed <- strsplit(as.character(pars[3]), "=")[[1]]
     if(grepl("ruv", parsed[1])) {
e9aed5e1
       W <- ruv_factors[[sc_name]][,seq_len(as.numeric(parsed[2]))]
963a5849
     } else {
c440ebf7
       W <- qc[,seq_len(as.numeric(parsed[2]))]
963a5849
     }
   }
7a3bbeb2
 
963a5849
   if(pars[4]=="bio") {
     out_bio <- bio
   }
7a3bbeb2
 
963a5849
   if(pars[5]=="batch") {
     out_batch <- batch
   }
7a3bbeb2
 
963a5849
   return(list(sc_name=sc_name, W=W, bio=out_bio, batch=out_batch))
 }
 
b31e581a
 #' Make a Design Matrix
c9cd07e1
 #' 
 #' This function builds a design matrix for the Adjustment Normalization Step,
 #' in which covariates are two (possibly nested) categorical factors and one or
 #' more continuous variables.
 #' 
 #' @details If nested=TRUE a nested design is used, i.e. the batch variable is
 #'   assumed to be nested within the bio variable. Here, nested means that each
 #'   batch is composed of samples from only *one* level of bio, while each
 #'   level of bio may contain multiple batches.
 #'   
cb7b3ea1
 #' @export
c9cd07e1
 #' 
e9aed5e1
 #' @param bio factor. The biological covariate.
 #' @param batch factor. The batch covariate.
c9cd07e1
 #' @param W numeric. Either a vector or matrix containing one or more
 #'   continuous covariates (e.g. RUVg factors).
 #' @param nested logical. Whether or not to consider a nested design
 #'   (see details).
 #'   
963a5849
 #' @return The design matrix.
c9cd07e1
 #'   
 #' @examples 
 #' 
 #' bio = as.factor(rep(c(1,2),each = 2))
 #' batch = as.factor(rep(c(1,2),2))
 #' design_mat = make_design(bio,batch, W = NULL)
 #' 
963a5849
 make_design <- function(bio, batch, W, nested=FALSE) {
e9aed5e1
   
963a5849
   if(nested & (is.null(bio) | is.null(batch))) {
     stop("Nested design can be used only if both batch and bio are specified.")
   }
e9aed5e1
   
963a5849
   if(!is.null(bio)) {
     if(class(bio)!="factor") {
       stop("bio must be a factor.")
     }
   }
e9aed5e1
   
963a5849
   if(!is.null(batch)){
     if(class(batch)!="factor") {
       stop("batch must be a factor.")
     }
   }
7a3bbeb2
 
963a5849
   f <- "~ 1"
   if(!is.null(bio)) {
     f <- paste(f, "bio", sep="+")
   }
   if(!is.null(batch)) {
     f <- paste(f, "batch", sep="+")
   }
   if(!is.null(W)) {
     f <- paste(f, "W", sep="+")
   }
7a3bbeb2
 
963a5849
   if(is.null(bio) & is.null(batch) & is.null(W)) {
     return(NULL)
   } else if (!is.null(bio) & !is.null(batch) & nested) {
e9aed5e1
     
963a5849
     n_vec <- tapply(batch, bio, function(x) nlevels(droplevels(x)))
7a3bbeb2
 
963a5849
     mat = matrix(0,nrow = sum(n_vec),ncol = sum(n_vec - 1))
     xi = 1
     yi = 1
     for(i in 1:length(n_vec)){
       if(n_vec[i] > 1){
         cs = contr.sum(n_vec[i])
         dd = dim(cs)
         mat[xi:(xi + dd[1] - 1),yi:(yi + dd[2] - 1)] = cs
         xi = xi + dd[1]
         yi = yi + dd[2]
       }else{
         xi = xi + 1
       }
     }
7a3bbeb2
 
c9cd07e1
     return(model.matrix(as.formula(f), 
                         contrasts=list(bio=contr.sum, batch=mat)))
963a5849
   } else {
7a3bbeb2
     return(model.matrix(as.formula(f)))
963a5849
   }
 }
 
e9aed5e1
 #' Linear Adjustment Normalization
c9cd07e1
 #' 
 #' Given a matrix with log expression values and a design matrix, this function
 #' fits a linear model and removes the effects of the batch factor
 #' as well as of the linear variables encoded in W.
 #' 
 #' @details The function assumes that the columns of the design matrix
 #'   corresponding to the variable for which expression needs to be adjusted,
 #'   start with either the word "batch" or the letter "W" (case sensitive). Any
 #'   other covariate (including the intercept) is kept.
 #'   
cb7b3ea1
 #' @importFrom limma lmFit
 #' @export
c9cd07e1
 #' 
 #' @param log_expr matrix. The log gene expression (genes in row, samples in
 #'   columns).
 #' @param design_mat matrix. The design matrix (usually the result of
 #'   make_design).
 #' @param batch factor. A factor with the batch information, identifying batch
 #'   effect to be removed.
cb7b3ea1
 #' @param weights matrix. A matrix of weights.
963a5849
 #' @return The corrected log gene expression.
c9cd07e1
 #'   
 #' @examples
 #' 
 #' set.seed(141)
 #' bio = as.factor(rep(c(1,2),each = 2))
 #' batch = as.factor(rep(c(1,2),2))
 #' design_mat = make_design(bio,batch, W = NULL)
 #' 
 #' log_expr = matrix(rnorm(20),ncol = 4)
 #' adjusted_log_expr = lm_adjust(log_expr = log_expr,
 #'   design_mat = design_mat,
 #'   batch = batch)
 #' 
cb7b3ea1
 lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
963a5849
   lm_object <- lmFit(log_expr, design = design_mat, weights = weights)
 
   uvind <- grep("^W", colnames(design_mat))
   bind <- grep("^batch", colnames(design_mat))
7a3bbeb2
 
963a5849
   if(length(uvind)) {
     uv_term <- t(design_mat[,uvind] %*% t(lm_object$coefficients[,uvind]))
   } else {
     uv_term <- 0
   }
7a3bbeb2
 
963a5849
   if(length(bind)) {
     if(is.character(attr(design_mat,"contrasts")$batch)) {
       contr <- get(attr(design_mat,"contrasts")$batch)(nlevels(batch))
     } else {
       contr <- attr(design_mat,"contrasts")$batch
     }
     batch_term <- t(contr %*% t(lm_object$coefficients[,bind]))[,batch]
   } else {
     batch_term <- 0
   }
7a3bbeb2
 
   return(log_expr - batch_term  - uv_term)
963a5849
 }