Browse code

Added stratified_rle

mbcole authored on 12/09/2017 22:19:03
Showing 4 changed files

... ...
@@ -43,6 +43,9 @@
43 43
 #' @param stratified_pam logical. If TRUE then maximum ASW is separately 
44 44
 #'   computed for each biological-cross-batch stratum (accepts NAs), and a 
45 45
 #'   weighted average silhouette width is returned as PAM_SIL. Default FALSE.
46
+#' @param stratified_rle logical. If TRUE then rle metrics are separately 
47
+#'   computed for each biological-cross-batch stratum (accepts NAs), and
48
+#'   weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.
46 49
 #'   
47 50
 #' @importFrom class knn
48 51
 #' @importFrom fpc pamk
... ...
@@ -61,8 +64,9 @@
61 64
 #'   correlation between expression pcs and negative control gene factors.} 
62 65
 #'   \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression
63 66
 #'   pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared 
64
-#'   median Relative Log Expression (RLE).} \item{RLE_IQR}{ The variance of the
65
-#'   inter-quartile range (IQR) of the RLE.} }
67
+#'   median Relative Log Expression (RLE) (see stratified_rle argument).}
68
+#'   \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) of the RLE
69
+#'   (see stratified_rle argument).} }
66 70
 #'   
67 71
 #' @examples 
68 72
 #' 
... ...
@@ -83,7 +87,8 @@ score_matrix <- function(expr, eval_pcs = 3,
83 87
                          qc_factors = NULL,
84 88
                          uv_factors = NULL,
85 89
                          wv_factors = NULL,
86
-                         is_log=FALSE, stratified_pam = FALSE){
90
+                         is_log=FALSE, stratified_pam = FALSE,
91
+                         stratified_rle = FALSE){
87 92
   
88 93
   if(any(is.na(expr) | is.infinite(expr) | is.nan(expr))){
89 94
     stop("NA/Inf/NaN Expression Values.")
... ...
@@ -242,13 +247,36 @@ score_matrix <- function(expr, eval_pcs = 3,
242 247
   }
243 248
   
244 249
   ## ----- RLE Measures
245
-  rle <- expr - rowMedians(expr)
246
-  
247
-  # Non-Zero Median RLE
248
-  RLE_MED <- mean(colMedians(rle)^2) # Variance of the median
249
-  
250
-  # Variable IQR RLE
251
-  RLE_IQR <- var(colIQRs(rle)) # Variance of the IQR
250
+  if(stratified_rle){
251
+    
252
+    biobatch = as.factor(paste(bio,batch,sep = "_"))
253
+    RLE_MED <- RLE_IQR <- 0
254
+    
255
+    for (cond in levels(biobatch)) {
256
+      is_cond = which(biobatch == cond)
257
+      cond_w = length(is_cond)
258
+      rle <- expr[,is_cond] - rowMedians(expr[,is_cond])
259
+      
260
+      # Non-Zero Median RLE
261
+      RLE_MED <- RLE_MED + cond_w * mean(colMedians(rle) ^ 2) # Variance of the median
262
+      
263
+      # Variable IQR RLE
264
+      RLE_IQR <- RLE_IQR + cond_w * var(colIQRs(rle)) # Variance of the IQR
265
+      
266
+    }
267
+    
268
+    RLE_MED <- RLE_MED / length(biobatch)
269
+    RLE_IQR <- RLE_IQR / length(biobatch)
270
+    
271
+  } else{
272
+    rle <- expr - rowMedians(expr)
273
+    
274
+    # Non-Zero Median RLE
275
+    RLE_MED <- mean(colMedians(rle) ^ 2) # Variance of the median
276
+    
277
+    # Variable IQR RLE
278
+    RLE_IQR <- var(colIQRs(rle)) # Variance of the IQR
279
+  }
252 280
   
253 281
   scores = c(BIO_SIL, BATCH_SIL, PAM_SIL,
254 282
              EXP_QC_COR, EXP_UV_COR, EXP_WV_COR,
... ...
@@ -50,7 +50,9 @@
50 50
 #' @param stratified_pam logical. If TRUE then maximum ASW for PAM_SIL is
51 51
 #'   separately computed for each biological-cross-batch stratum (accepting
52 52
 #'   NAs), and a weighted average is returned as PAM_SIL.
53
-#'   
53
+#' @param stratified_rle logical. If TRUE then rle metrics are separately 
54
+#'   computed for each biological-cross-batch stratum (accepts NAs), and
55
+#'   weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.
54 56
 #' @param verbose logical. If TRUE some messagges are printed.
55 57
 #' @param run logical. If FALSE the normalization and evaluation are not run,
56 58
 #'   but normalization parameters are returned in the output object for 
... ...
@@ -169,6 +171,7 @@ setMethod(
169 171
                         eval_proj = NULL,
170 172
                         eval_proj_args = NULL, eval_kclust=2:10,
171 173
                         verbose=FALSE, stratified_pam = FALSE,
174
+                        stratified_rle = FALSE,
172 175
                         return_norm = c("no", "in_memory", "hdf5"),
173 176
                         hdf5file,
174 177
                         bpparam=BiocParallel::bpparam()) {
... ...
@@ -329,6 +332,12 @@ setMethod(
329 332
         stop("'eval_kclust' must be less than the number of samples.")
330 333
       }
331 334
       
335
+      if(stratified_rle) {
336
+        if(is.null(bio) & is.null(batch)){
337
+          stop("For stratified_rle, bio and/or batch must be specified")
338
+        }
339
+      }
340
+      
332 341
       if(!is.null(eval_kclust) & stratified_pam) {
333 342
         if(is.null(bio) & is.null(batch)){
334 343
           stop("For stratified_pam, bio and/or batch must be specified")
... ...
@@ -15,8 +15,8 @@ scone(x, ...)
15 15
   adjust_batch = c("no", "yes", "force"), run = TRUE, evaluate = TRUE,
16 16
   eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL,
17 17
   eval_kclust = 2:10, verbose = FALSE, stratified_pam = FALSE,
18
-  return_norm = c("no", "in_memory", "hdf5"), hdf5file,
19
-  bpparam = BiocParallel::bpparam())
18
+  stratified_rle = FALSE, return_norm = c("no", "in_memory", "hdf5"),
19
+  hdf5file, bpparam = BiocParallel::bpparam())
20 20
 }
21 21
 \arguments{
22 22
 \item{x}{a \code{\link{SconeExperiment}} object.}
... ...
@@ -77,6 +77,10 @@ If NULL, tightness will be returned NA.}
77 77
 separately computed for each biological-cross-batch stratum (accepting
78 78
 NAs), and a weighted average is returned as PAM_SIL.}
79 79
 
80
+\item{stratified_rle}{logical. If TRUE then rle metrics are separately 
81
+computed for each biological-cross-batch stratum (accepts NAs), and
82
+weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.}
83
+
80 84
 \item{return_norm}{character. If "no" the normalized values will not be
81 85
 returned with the output object. This will create a much smaller object
82 86
 and may be useful for large datasets and/or when many combinations are
... ...
@@ -7,7 +7,7 @@
7 7
 score_matrix(expr, eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL,
8 8
   eval_kclust = NULL, bio = NULL, batch = NULL, qc_factors = NULL,
9 9
   uv_factors = NULL, wv_factors = NULL, is_log = FALSE,
10
-  stratified_pam = FALSE)
10
+  stratified_pam = FALSE, stratified_rle = FALSE)
11 11
 }
12 12
 \arguments{
13 13
 \item{expr}{matrix. The expression data matrix (genes in rows, cells in 
... ...
@@ -50,6 +50,10 @@ transformation will not be carried out prior to projection. Default FALSE.}
50 50
 \item{stratified_pam}{logical. If TRUE then maximum ASW is separately 
51 51
 computed for each biological-cross-batch stratum (accepts NAs), and a 
52 52
 weighted average silhouette width is returned as PAM_SIL. Default FALSE.}
53
+
54
+\item{stratified_rle}{logical. If TRUE then rle metrics are separately 
55
+computed for each biological-cross-batch stratum (accepts NAs), and
56
+weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.}
53 57
 }
54 58
 \value{
55 59
 A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average
... ...
@@ -61,8 +65,9 @@ A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average
61 65
   correlation between expression pcs and negative control gene factors.} 
62 66
   \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression
63 67
   pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared 
64
-  median Relative Log Expression (RLE).} \item{RLE_IQR}{ The variance of the
65
-  inter-quartile range (IQR) of the RLE.} }
68
+  median Relative Log Expression (RLE) (see stratified_rle argument).}
69
+  \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) of the RLE
70
+  (see stratified_rle argument).} }
66 71
 }
67 72
 \description{
68 73
 This function evaluates a (normalized) expression matrix using SCONE