Browse code

added stratified_cor and passed stratified_rle/cor via scone

mbcole authored on 12/09/2017 22:55:50
Showing 4 changed files

... ...
@@ -1,125 +1,145 @@
1 1
 #' SCONE Evaluation: Evaluate an Expression Matrix
2
-#' 
3
-#' This function evaluates a (normalized) expression matrix using SCONE 
4
-#' criteria, producing 8 metrics based on i) Clustering, ii) Correlations and 
2
+#'
3
+#' This function evaluates a (normalized) expression matrix using SCONE
4
+#' criteria, producing 8 metrics based on i) Clustering, ii) Correlations and
5 5
 #' iii) Relative Expression.
6
-#' 
6
+#'
7 7
 #' @details Users may specify their own eval_proj function that will be used to
8 8
 #'   compute Clustering and Correlation metrics. This eval_proj() function must
9
-#'   have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ 
10
-#'   pseudocount) expression data (genes in rows, cells in columns).} 
9
+#'   have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+
10
+#'   pseudocount) expression data (genes in rows, cells in columns).}
11 11
 #'   \item{eval_proj_args}{ list. additional function arguments, e.g. prior
12 12
 #'   data weights.}} and it must output a matrix representation of the original
13 13
 #'   data (cells in rows, factors in columns). The value of eval_proj_args is
14 14
 #'   passed to the user-defined function from the eval_proj_args argument of
15 15
 #'   the main score_matrix() function call.
16
-#'   
17
-#' @param expr matrix. The expression data matrix (genes in rows, cells in 
16
+#'
17
+#' @param expr matrix. The expression data matrix (genes in rows, cells in
18 18
 #'   columns).
19 19
 #' @param eval_pcs numeric. The number of principal components to use for
20 20
 #'   evaluation (Default 3). Ignored if !is.null(eval_proj).
21 21
 #' @param eval_proj function. Projection function for evaluation (see Details).
22 22
 #'   If NULL, PCA is used for projection
23
-#' @param eval_proj_args list. List of arguments passed to projection function 
23
+#' @param eval_proj_args list. List of arguments passed to projection function
24 24
 #'   as eval_proj_args (see Details).
25
-#' @param eval_kclust numeric. The number of clusters (> 1) to be used for pam 
26
-#'   tightness (PAM_SIL) evaluation. If an array of integers, largest average 
25
+#' @param eval_kclust numeric. The number of clusters (> 1) to be used for pam
26
+#'   tightness (PAM_SIL) evaluation. If an array of integers, largest average
27 27
 #'   silhouette width (tightness) will be reported in PAM_SIL. If NULL, PAM_SIL
28 28
 #'   will be returned NA.
29 29
 #' @param bio factor. A known biological condition (variation to be preserved),
30 30
 #'   NA is allowed. If NULL, condition ASW, BIO_SIL, will be returned NA.
31 31
 #' @param batch factor. A known batch variable (variation to be removed), NA is
32 32
 #'   allowed. If NULL, batch ASW, BATCH_SIL, will be returned NA.
33
-#' @param qc_factors Factors of unwanted variation derived from quality 
33
+#' @param qc_factors Factors of unwanted variation derived from quality
34 34
 #'   metrics. If NULL, qc correlations, EXP_QC_COR, will be returned NA.
35
-#' @param uv_factors Factors of unwanted variation derived from negative 
35
+#' @param uv_factors Factors of unwanted variation derived from negative
36 36
 #'   control genes (evaluation set). If NULL, uv correlations, EXP_UV_COR,
37 37
 #'   will be returned NA.
38 38
 #' @param wv_factors Factors of wanted variation derived from positive
39 39
 #'   control genes (evaluation set). If NULL, wv correlations, EXP_WV_COR,
40 40
 #'   will be returned NA.
41
-#' @param is_log logical. If TRUE the expr matrix is already logged and log 
41
+#' @param is_log logical. If TRUE the expr matrix is already logged and log
42 42
 #'   transformation will not be carried out prior to projection. Default FALSE.
43
-#' @param stratified_pam logical. If TRUE then maximum ASW is separately 
44
-#'   computed for each biological-cross-batch stratum (accepts NAs), and a 
43
+#' @param stratified_pam logical. If TRUE then maximum ASW is separately
44
+#'   computed for each biological-cross-batch stratum (accepts NAs), and a
45 45
 #'   weighted average silhouette width is returned as PAM_SIL. Default FALSE.
46
-#' @param stratified_rle logical. If TRUE then rle metrics are separately 
46
+#' @param stratified_cor logical. If TRUE then cor metrics are separately
47
+#'   computed for each biological-cross-batch stratum (accepts NAs), and
48
+#'   weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR.
49
+#'   Default FALSE.
50
+#' @param stratified_rle logical. If TRUE then rle metrics are separately
47 51
 #'   computed for each biological-cross-batch stratum (accepts NAs), and
48 52
 #'   weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.
49
-#'   
53
+#'
50 54
 #' @importFrom class knn
51 55
 #' @importFrom fpc pamk
52 56
 #' @importFrom cluster silhouette
53 57
 #' @importFrom rARPACK svds
54 58
 #' @importFrom matrixStats rowMedians colMedians colIQRs
55
-#'   
59
+#'
56 60
 #' @export
57
-#' 
61
+#'
58 62
 #' @return A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average
59
-#'   silhouette width by biological condition.} \item{BATCH_SIL}{ Average 
60
-#'   silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average 
61
-#'   silhouette width from PAM clustering (see stratified_pam argument).} 
63
+#'   silhouette width by biological condition.} \item{BATCH_SIL}{ Average
64
+#'   silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average
65
+#'   silhouette width from PAM clustering (see stratified_pam argument).}
62 66
 #'   \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression
63
-#'   pcs and quality factors.} \item{EXP_UV_COR}{ Maximum squared spearman 
64
-#'   correlation between expression pcs and negative control gene factors.} 
65
-#'   \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression
66
-#'   pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared 
67
-#'   median Relative Log Expression (RLE) (see stratified_rle argument).}
68
-#'   \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) of the RLE
69
-#'   (see stratified_rle argument).} }
70
-#'   
71
-#' @examples 
72
-#' 
67
+#'   pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{
68
+#'   Maximum squared spearman correlation between expression pcs and negative
69
+#'   control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{
70
+#'   Maximum squared spearman correlation between expression pcs and positive
71
+#'   control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The
72
+#'   mean squared median Relative Log Expression (RLE) (see stratified_rle
73
+#'   argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR)
74
+#'   of the RLE (see stratified_rle argument).} }
75
+#'
76
+#' @examples
77
+#'
73 78
 #' set.seed(141)
74 79
 #' bio = as.factor(rep(c(1,2),each = 2))
75 80
 #' batch = as.factor(rep(c(1,2),2))
76 81
 #' log_expr = matrix(rnorm(20),ncol = 4)
77
-#' 
78
-#' scone_metrics = score_matrix(log_expr, 
82
+#'
83
+#' scone_metrics = score_matrix(log_expr,
79 84
 #'    bio = bio, batch = batch,
80 85
 #'    eval_kclust = 2, is_log = TRUE)
81
-#' 
86
+#'
82 87
 
83
-score_matrix <- function(expr, eval_pcs = 3,
84
-                         eval_proj = NULL, eval_proj_args = NULL,
88
+score_matrix <- function(expr,
89
+                         eval_pcs = 3,
90
+                         eval_proj = NULL,
91
+                         eval_proj_args = NULL,
85 92
                          eval_kclust = NULL,
86
-                         bio = NULL, batch = NULL,
93
+                         bio = NULL,
94
+                         batch = NULL,
87 95
                          qc_factors = NULL,
88 96
                          uv_factors = NULL,
89 97
                          wv_factors = NULL,
90
-                         is_log=FALSE, stratified_pam = FALSE,
91
-                         stratified_rle = FALSE){
92
-  
93
-  if(any(is.na(expr) | is.infinite(expr) | is.nan(expr))){
98
+                         is_log = FALSE,
99
+                         stratified_pam = FALSE,
100
+                         stratified_cor = FALSE,
101
+                         stratified_rle = FALSE) {
102
+  if (any(is.na(expr) | is.infinite(expr) | is.nan(expr))) {
94 103
     stop("NA/Inf/NaN Expression Values.")
95 104
   }
96 105
   
97
-  if(!is_log) {
106
+  if (!is_log) {
98 107
     expr <- log1p(expr)
99 108
   }
100 109
   
101
-  # The svd we do below on expr throws an exception if 
102
-  # expr created by one of the normalizations has a 
110
+  # The svd we do below on expr throws an exception if
111
+  # expr created by one of the normalizations has a
103 112
   # constant feature (=gene, i.e. row)
104
-  constantFeatures = apply(expr, 1, function(x) max(x)-min(x)) < 1e-3
105
-  if(any(constantFeatures)) {
106
-    warning(sprintf(paste0("scone_eval: expression matrix ",
107
-                           "contained %d constant features (rows) ",
108
-                           "---> excluding them"), sum(constantFeatures)))
109
-    expr = expr[!constantFeatures, ]
113
+  constantFeatures = apply(expr, 1, function(x)
114
+    max(x) - min(x)) < 1e-3
115
+  if (any(constantFeatures)) {
116
+    warning(sprintf(
117
+      paste0(
118
+        "scone_eval: expression matrix ",
119
+        "contained %d constant features (rows) ",
120
+        "---> excluding them"
121
+      ),
122
+      sum(constantFeatures)
123
+    ))
124
+    expr = expr[!constantFeatures,]
110 125
   }
111 126
   
112
-  if(is.null(eval_proj)){
127
+  if (is.null(eval_proj)) {
113 128
     proj = tryCatch({
114
-      svds(scale(t(expr), center = TRUE, scale = TRUE),
115
-           k=eval_pcs, nu=eval_pcs, nv=0)$u},
116
-      error = function(e) {
117
-        stop("scone_eval: svd failed")
118
-      })
129
+      svds(
130
+        scale(t(expr), center = TRUE, scale = TRUE),
131
+        k = eval_pcs,
132
+        nu = eval_pcs,
133
+        nv = 0
134
+      )$u
135
+    },
136
+    error = function(e) {
137
+      stop("scone_eval: svd failed")
138
+    })
119 139
     
120 140
     
121 141
   } else {
122
-    proj = eval_proj(expr,eval_proj_args = eval_proj_args)
142
+    proj = eval_proj(expr, eval_proj_args = eval_proj_args)
123 143
     eval_pcs = ncol(proj)
124 144
   }
125 145
   
... ...
@@ -128,17 +148,21 @@ score_matrix <- function(expr, eval_pcs = 3,
128 148
   
129 149
   # Biological Condition
130 150
   
131
-  if( !is.null(bio) ) {
132
-    if(!all(is.na(bio))) {
133
-      if(length(unique(bio)) > 1) {
151
+  if (!is.null(bio)) {
152
+    if (!all(is.na(bio))) {
153
+      if (length(unique(bio)) > 1) {
134 154
         BIO_SIL = summary(cluster::silhouette(as.numeric(na.omit(bio)),
135 155
                                               dd[!is.na(bio),
136 156
                                                  !is.na(bio)]))$avg.width
137 157
       } else {
138 158
         BIO_SIL = NA
139
-        warning(paste0("after exclusion of samples, ",
140
-                       "only one bio remains, BATCH_BIO",
141
-                       " is undefined"))
159
+        warning(
160
+          paste0(
161
+            "after exclusion of samples, ",
162
+            "only one bio remains, BATCH_BIO",
163
+            " is undefined"
164
+          )
165
+        )
142 166
       }
143 167
     } else {
144 168
       BIO_SIL = NA
... ...
@@ -150,17 +174,21 @@ score_matrix <- function(expr, eval_pcs = 3,
150 174
   
151 175
   # Batch Condition
152 176
   
153
-  if(!is.null(batch)) {
154
-    if(!all(is.na(batch))) {
155
-      if(length(unique(batch)) > 1) {
177
+  if (!is.null(batch)) {
178
+    if (!all(is.na(batch))) {
179
+      if (length(unique(batch)) > 1) {
156 180
         BATCH_SIL <- summary(cluster::silhouette(as.numeric(na.omit(batch)),
157 181
                                                  dd[!is.na(batch),
158 182
                                                     !is.na(batch)]))$avg.width
159 183
       } else {
160 184
         BATCH_SIL <- NA
161
-        warning(paste0("after exclusion of samples,",
162
-                       " only one batch remains, ",
163
-                       "BATCH_SIL is undefined"))
185
+        warning(
186
+          paste0(
187
+            "after exclusion of samples,",
188
+            " only one batch remains, ",
189
+            "BATCH_SIL is undefined"
190
+          )
191
+        )
164 192
       }
165 193
     } else{
166 194
       BATCH_SIL <- NA
... ...
@@ -172,96 +200,152 @@ score_matrix <- function(expr, eval_pcs = 3,
172 200
   
173 201
   ## ------ PAM Tightness -----
174 202
   
175
-  if ( !is.null(eval_kclust) ){
176
-    
203
+  if (!is.null(eval_kclust)) {
177 204
     # "Stratified" PAM
178 205
     
179
-    if(stratified_pam){
180
-      
181
-      biobatch = as.factor(paste(bio,batch,sep = "_"))
206
+    if (stratified_pam) {
207
+      biobatch = as.factor(paste(bio, batch, sep = "_"))
182 208
       PAM_SIL = 0
183 209
       
184 210
       # Max Average Sil Width per bio-cross-batch Condition
185
-      for (cond in levels(biobatch)){
211
+      for (cond in levels(biobatch)) {
186 212
         is_cond = which(biobatch == cond)
187 213
         cond_w = length(is_cond)
188
-        if(cond_w > max(eval_kclust)){
189
-          pamk_object = pamk(proj[is_cond,],krange = eval_kclust)
214
+        if (cond_w > max(eval_kclust)) {
215
+          pamk_object = pamk(proj[is_cond, ], krange = eval_kclust)
190 216
           
191
-          # Despite krange excluding nc = 1, 
217
+          # Despite krange excluding nc = 1,
192 218
           # if asw is negative, nc = 1 will be selected
193
-          if(is.null(pamk_object$pamobject$silinfo$avg.width) ){
219
+          if (is.null(pamk_object$pamobject$silinfo$avg.width)) {
194 220
             if (!1 %in% eval_kclust) {
195
-              pamk_object$pamobject$silinfo$avg.width = max(
196
-                pamk_object$crit[1:max(eval_kclust) %in% eval_kclust]
197
-              )
198
-            }else{
199
-              stop(paste0("nc = 1 was selected by Duda-Hart,",
200
-                          " exclude 1 from eval_kclust."))
221
+              pamk_object$pamobject$silinfo$avg.width = 
222
+                max(pamk_object$crit[1:max(eval_kclust) %in% eval_kclust])
223
+            } else{
224
+              stop(paste0(
225
+                "nc = 1 was selected by Duda-Hart,",
226
+                " exclude 1 from eval_kclust."
227
+              ))
201 228
             }
202 229
           }
203 230
           
204
-          PAM_SIL = PAM_SIL + cond_w*pamk_object$pamobject$silinfo$avg.width
205
-        }else{
206
-          stop(paste(paste0("Number of clusters 'k' must be ",
207
-                            "smaller than bio-cross-batch stratum size:"),
208
-                     paste(levels(biobatch),
209
-                           table(biobatch),
210
-                           sep = " = ",collapse = ", ")))
231
+          PAM_SIL = PAM_SIL + cond_w * pamk_object$pamobject$silinfo$avg.width
232
+        } else{
233
+          stop(paste(
234
+            paste0(
235
+              "Number of clusters 'k' must be ",
236
+              "smaller than bio-cross-batch stratum size:"
237
+            ),
238
+            paste(
239
+              levels(biobatch),
240
+              table(biobatch),
241
+              sep = " = ",
242
+              collapse = ", "
243
+            )
244
+          ))
211 245
         }
212 246
       }
213
-      PAM_SIL = PAM_SIL/length(biobatch)
247
+      PAM_SIL = PAM_SIL / length(biobatch)
214 248
       
215 249
       # Traditional PAM
216 250
       
217
-    }else{
218
-      pamk_object = pamk(proj,krange = eval_kclust)
251
+    } else{
252
+      pamk_object = pamk(proj, krange = eval_kclust)
219 253
       PAM_SIL = pamk_object$pamobject$silinfo$avg.width
220 254
     }
221 255
     
222
-  }else{
256
+  } else{
223 257
     PAM_SIL = NA
224 258
   }
225 259
   
226 260
   ## ------ Correlation with Factors -----
227
-  
228
-  # Max cor with quality factors.
229
-  if(!is.null(qc_factors)){
230
-    EXP_QC_COR = max(cor(proj,qc_factors,method = "spearman")^2)
231
-  }else{
232
-    EXP_QC_COR = NA
233
-  }
234
-  
235
-  # Max cor with UV factors.
236
-  if(!is.null(uv_factors)){
237
-    EXP_UV_COR = max(cor(proj,uv_factors,method = "spearman")^2)
238
-  }else{
239
-    EXP_UV_COR = NA
240
-  }
241
-  
242
-  # Max cor with WV factors.
243
-  if(!is.null(wv_factors)){
244
-    EXP_WV_COR = max(cor(proj,wv_factors,method = "spearman")^2)
245
-  }else{
246
-    EXP_WV_COR = NA
261
+  if (stratified_cor) {
262
+    biobatch = as.factor(paste(bio, batch, sep = "_"))
263
+    EXP_QC_COR <- EXP_UV_COR <- EXP_WV_COR <- 0
264
+    
265
+    for (cond in levels(biobatch)) {
266
+      is_cond = which(biobatch == cond)
267
+      cond_w = length(is_cond)
268
+      
269
+      # Max cor with quality factors.
270
+      if (!is.null(qc_factors)) {
271
+        EXP_QC_COR <- EXP_QC_COR + cond_w * max(cor(proj[is_cond, ],
272
+                                                    qc_factors[is_cond, ],
273
+                                                    method = "spearman") ^ 2)
274
+      }
275
+      
276
+      # Max cor with UV factors.
277
+      if (!is.null(uv_factors)) {
278
+        EXP_UV_COR  <- EXP_UV_COR + cond_w * max(cor(proj[is_cond, ],
279
+                                                     uv_factors[is_cond, ],
280
+                                                     method = "spearman") ^ 2)
281
+      }
282
+      
283
+      # Max cor with WV factors.
284
+      if (!is.null(wv_factors)) {
285
+        EXP_WV_COR <-  EXP_WV_COR + cond_w * max(cor(proj[is_cond, ],
286
+                                                     wv_factors[is_cond, ],
287
+                                                     method = "spearman") ^ 2)
288
+      }
289
+      
290
+    }
291
+    
292
+    if (!is.null(qc_factors)) {
293
+      EXP_QC_COR <- EXP_QC_COR / length(biobatch)
294
+    } else{
295
+      EXP_QC_COR <- NA
296
+    }
297
+    if (!is.null(uv_factors)) {
298
+      EXP_UV_COR <- EXP_UV_COR / length(biobatch)
299
+    } else{
300
+      EXP_UV_COR <- NA
301
+    }
302
+    if (!is.null(wv_factors)) {
303
+      EXP_WV_COR <- EXP_WV_COR / length(biobatch)
304
+    } else{
305
+      EXP_WV_COR <- NA
306
+    }
307
+    
308
+  } else{
309
+    # Max cor with quality factors.
310
+    if (!is.null(qc_factors)) {
311
+      EXP_QC_COR = max(cor(proj, qc_factors, method = "spearman") ^ 2)
312
+    } else{
313
+      EXP_QC_COR = NA
314
+    }
315
+    
316
+    # Max cor with UV factors.
317
+    if (!is.null(uv_factors)) {
318
+      EXP_UV_COR = max(cor(proj, uv_factors, method = "spearman") ^ 2)
319
+    } else{
320
+      EXP_UV_COR = NA
321
+    }
322
+    
323
+    # Max cor with WV factors.
324
+    if (!is.null(wv_factors)) {
325
+      EXP_WV_COR = max(cor(proj, wv_factors, method = "spearman") ^ 2)
326
+    } else{
327
+      EXP_WV_COR = NA
328
+    }
329
+    
247 330
   }
248 331
   
249 332
   ## ----- RLE Measures
250
-  if(stratified_rle){
251
-    
252
-    biobatch = as.factor(paste(bio,batch,sep = "_"))
333
+  if (stratified_rle) {
334
+    biobatch = as.factor(paste(bio, batch, sep = "_"))
253 335
     RLE_MED <- RLE_IQR <- 0
254 336
     
255 337
     for (cond in levels(biobatch)) {
256 338
       is_cond = which(biobatch == cond)
257 339
       cond_w = length(is_cond)
258
-      rle <- expr[,is_cond] - rowMedians(expr[,is_cond])
340
+      rle <- expr[, is_cond] - rowMedians(expr[, is_cond])
259 341
       
260 342
       # Non-Zero Median RLE
261
-      RLE_MED <- RLE_MED + cond_w * mean(colMedians(rle) ^ 2) # Variance of the median
343
+      RLE_MED <-
344
+        RLE_MED + cond_w * mean(colMedians(rle) ^ 2) # Var of the med
262 345
       
263 346
       # Variable IQR RLE
264
-      RLE_IQR <- RLE_IQR + cond_w * var(colIQRs(rle)) # Variance of the IQR
347
+      RLE_IQR <-
348
+        RLE_IQR + cond_w * var(colIQRs(rle)) # Variance of the IQR
265 349
       
266 350
     }
267 351
     
... ...
@@ -278,11 +362,23 @@ score_matrix <- function(expr, eval_pcs = 3,
278 362
     RLE_IQR <- var(colIQRs(rle)) # Variance of the IQR
279 363
   }
280 364
   
281
-  scores = c(BIO_SIL, BATCH_SIL, PAM_SIL,
282
-             EXP_QC_COR, EXP_UV_COR, EXP_WV_COR,
283
-             RLE_MED, RLE_IQR)
284
-  names(scores) = c("BIO_SIL", "BATCH_SIL", "PAM_SIL",
285
-                    "EXP_QC_COR", "EXP_UV_COR", "EXP_WV_COR",
286
-                    "RLE_MED", "RLE_IQR")
365
+  scores = c(BIO_SIL,
366
+             BATCH_SIL,
367
+             PAM_SIL,
368
+             EXP_QC_COR,
369
+             EXP_UV_COR,
370
+             EXP_WV_COR,
371
+             RLE_MED,
372
+             RLE_IQR)
373
+  names(scores) = c(
374
+    "BIO_SIL",
375
+    "BATCH_SIL",
376
+    "PAM_SIL",
377
+    "EXP_QC_COR",
378
+    "EXP_UV_COR",
379
+    "EXP_WV_COR",
380
+    "RLE_MED",
381
+    "RLE_IQR"
382
+  )
287 383
   return(scores)
288 384
 }
... ...
@@ -50,6 +50,10 @@
50 50
 #' @param stratified_pam logical. If TRUE then maximum ASW for PAM_SIL is
51 51
 #'   separately computed for each biological-cross-batch stratum (accepting
52 52
 #'   NAs), and a weighted average is returned as PAM_SIL.
53
+#' @param stratified_cor logical. If TRUE then cor metrics are separately
54
+#'   computed for each biological-cross-batch stratum (accepts NAs), and
55
+#'   weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR.
56
+#'   Default FALSE.
53 57
 #' @param stratified_rle logical. If TRUE then rle metrics are separately 
54 58
 #'   computed for each biological-cross-batch stratum (accepts NAs), and
55 59
 #'   weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.
... ...
@@ -171,6 +175,7 @@ setMethod(
171 175
                         eval_proj = NULL,
172 176
                         eval_proj_args = NULL, eval_kclust=2:10,
173 177
                         verbose=FALSE, stratified_pam = FALSE,
178
+                        stratified_cor = FALSE,
174 179
                         stratified_rle = FALSE,
175 180
                         return_norm = c("no", "in_memory", "hdf5"),
176 181
                         hdf5file,
... ...
@@ -338,6 +343,12 @@ setMethod(
338 343
         }
339 344
       }
340 345
       
346
+      if(stratified_cor) {
347
+        if(is.null(bio) & is.null(batch)){
348
+          stop("For stratified_cor, bio and/or batch must be specified")
349
+        }
350
+      }
351
+      
341 352
       if(!is.null(eval_kclust) & stratified_pam) {
342 353
         if(is.null(bio) & is.null(batch)){
343 354
           stop("For stratified_pam, bio and/or batch must be specified")
... ...
@@ -548,7 +559,10 @@ setMethod(
548 559
                               bio = bio, batch = batch,
549 560
                               qc_factors = qc_pcs, 
550 561
                               uv_factors = uv_factors, wv_factors = wv_factors,
551
-                              is_log = TRUE, stratified_pam = stratified_pam)
562
+                              stratified_pam = stratified_pam,
563
+                              stratified_cor = stratified_cor,
564
+                              stratified_rle = stratified_rle,
565
+                              is_log = TRUE)
552 566
       } else {
553 567
         score <- NULL
554 568
       }
... ...
@@ -15,8 +15,8 @@ scone(x, ...)
15 15
   adjust_batch = c("no", "yes", "force"), run = TRUE, evaluate = TRUE,
16 16
   eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL,
17 17
   eval_kclust = 2:10, verbose = FALSE, stratified_pam = FALSE,
18
-  stratified_rle = FALSE, return_norm = c("no", "in_memory", "hdf5"),
19
-  hdf5file, bpparam = BiocParallel::bpparam())
18
+  stratified_cor = FALSE, stratified_rle = FALSE, return_norm = c("no",
19
+  "in_memory", "hdf5"), hdf5file, bpparam = BiocParallel::bpparam())
20 20
 }
21 21
 \arguments{
22 22
 \item{x}{a \code{\link{SconeExperiment}} object.}
... ...
@@ -77,6 +77,11 @@ If NULL, tightness will be returned NA.}
77 77
 separately computed for each biological-cross-batch stratum (accepting
78 78
 NAs), and a weighted average is returned as PAM_SIL.}
79 79
 
80
+\item{stratified_cor}{logical. If TRUE then cor metrics are separately
81
+computed for each biological-cross-batch stratum (accepts NAs), and
82
+weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR.
83
+Default FALSE.}
84
+
80 85
 \item{stratified_rle}{logical. If TRUE then rle metrics are separately 
81 86
 computed for each biological-cross-batch stratum (accepts NAs), and
82 87
 weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.}
... ...
@@ -7,10 +7,10 @@
7 7
 score_matrix(expr, eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL,
8 8
   eval_kclust = NULL, bio = NULL, batch = NULL, qc_factors = NULL,
9 9
   uv_factors = NULL, wv_factors = NULL, is_log = FALSE,
10
-  stratified_pam = FALSE, stratified_rle = FALSE)
10
+  stratified_pam = FALSE, stratified_cor = FALSE, stratified_rle = FALSE)
11 11
 }
12 12
 \arguments{
13
-\item{expr}{matrix. The expression data matrix (genes in rows, cells in 
13
+\item{expr}{matrix. The expression data matrix (genes in rows, cells in
14 14
 columns).}
15 15
 
16 16
 \item{eval_pcs}{numeric. The number of principal components to use for
... ...
@@ -19,11 +19,11 @@ evaluation (Default 3). Ignored if !is.null(eval_proj).}
19 19
 \item{eval_proj}{function. Projection function for evaluation (see Details).
20 20
 If NULL, PCA is used for projection}
21 21
 
22
-\item{eval_proj_args}{list. List of arguments passed to projection function 
22
+\item{eval_proj_args}{list. List of arguments passed to projection function
23 23
 as eval_proj_args (see Details).}
24 24
 
25
-\item{eval_kclust}{numeric. The number of clusters (> 1) to be used for pam 
26
-tightness (PAM_SIL) evaluation. If an array of integers, largest average 
25
+\item{eval_kclust}{numeric. The number of clusters (> 1) to be used for pam
26
+tightness (PAM_SIL) evaluation. If an array of integers, largest average
27 27
 silhouette width (tightness) will be reported in PAM_SIL. If NULL, PAM_SIL
28 28
 will be returned NA.}
29 29
 
... ...
@@ -33,10 +33,10 @@ NA is allowed. If NULL, condition ASW, BIO_SIL, will be returned NA.}
33 33
 \item{batch}{factor. A known batch variable (variation to be removed), NA is
34 34
 allowed. If NULL, batch ASW, BATCH_SIL, will be returned NA.}
35 35
 
36
-\item{qc_factors}{Factors of unwanted variation derived from quality 
36
+\item{qc_factors}{Factors of unwanted variation derived from quality
37 37
 metrics. If NULL, qc correlations, EXP_QC_COR, will be returned NA.}
38 38
 
39
-\item{uv_factors}{Factors of unwanted variation derived from negative 
39
+\item{uv_factors}{Factors of unwanted variation derived from negative
40 40
 control genes (evaluation set). If NULL, uv correlations, EXP_UV_COR,
41 41
 will be returned NA.}
42 42
 
... ...
@@ -44,41 +44,47 @@ will be returned NA.}
44 44
 control genes (evaluation set). If NULL, wv correlations, EXP_WV_COR,
45 45
 will be returned NA.}
46 46
 
47
-\item{is_log}{logical. If TRUE the expr matrix is already logged and log 
47
+\item{is_log}{logical. If TRUE the expr matrix is already logged and log
48 48
 transformation will not be carried out prior to projection. Default FALSE.}
49 49
 
50
-\item{stratified_pam}{logical. If TRUE then maximum ASW is separately 
51
-computed for each biological-cross-batch stratum (accepts NAs), and a 
50
+\item{stratified_pam}{logical. If TRUE then maximum ASW is separately
51
+computed for each biological-cross-batch stratum (accepts NAs), and a
52 52
 weighted average silhouette width is returned as PAM_SIL. Default FALSE.}
53 53
 
54
-\item{stratified_rle}{logical. If TRUE then rle metrics are separately 
54
+\item{stratified_cor}{logical. If TRUE then cor metrics are separately
55
+computed for each biological-cross-batch stratum (accepts NAs), and
56
+weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR.
57
+Default FALSE.}
58
+
59
+\item{stratified_rle}{logical. If TRUE then rle metrics are separately
55 60
 computed for each biological-cross-batch stratum (accepts NAs), and
56 61
 weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.}
57 62
 }
58 63
 \value{
59 64
 A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average
60
-  silhouette width by biological condition.} \item{BATCH_SIL}{ Average 
61
-  silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average 
62
-  silhouette width from PAM clustering (see stratified_pam argument).} 
65
+  silhouette width by biological condition.} \item{BATCH_SIL}{ Average
66
+  silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average
67
+  silhouette width from PAM clustering (see stratified_pam argument).}
63 68
   \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression
64
-  pcs and quality factors.} \item{EXP_UV_COR}{ Maximum squared spearman 
65
-  correlation between expression pcs and negative control gene factors.} 
66
-  \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression
67
-  pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared 
68
-  median Relative Log Expression (RLE) (see stratified_rle argument).}
69
-  \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) of the RLE
70
-  (see stratified_rle argument).} }
69
+  pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{
70
+  Maximum squared spearman correlation between expression pcs and negative
71
+  control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{
72
+  Maximum squared spearman correlation between expression pcs and positive
73
+  control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The
74
+  mean squared median Relative Log Expression (RLE) (see stratified_rle
75
+  argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR)
76
+  of the RLE (see stratified_rle argument).} }
71 77
 }
72 78
 \description{
73
-This function evaluates a (normalized) expression matrix using SCONE 
74
-criteria, producing 8 metrics based on i) Clustering, ii) Correlations and 
79
+This function evaluates a (normalized) expression matrix using SCONE
80
+criteria, producing 8 metrics based on i) Clustering, ii) Correlations and
75 81
 iii) Relative Expression.
76 82
 }
77 83
 \details{
78 84
 Users may specify their own eval_proj function that will be used to
79 85
   compute Clustering and Correlation metrics. This eval_proj() function must
80
-  have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ 
81
-  pseudocount) expression data (genes in rows, cells in columns).} 
86
+  have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+
87
+  pseudocount) expression data (genes in rows, cells in columns).}
82 88
   \item{eval_proj_args}{ list. additional function arguments, e.g. prior
83 89
   data weights.}} and it must output a matrix representation of the original
84 90
   data (cells in rows, factors in columns). The value of eval_proj_args is
... ...
@@ -92,7 +98,7 @@ bio = as.factor(rep(c(1,2),each = 2))
92 98
 batch = as.factor(rep(c(1,2),2))
93 99
 log_expr = matrix(rnorm(20),ncol = 4)
94 100
 
95
-scone_metrics = score_matrix(log_expr, 
101
+scone_metrics = score_matrix(log_expr,
96 102
    bio = bio, batch = batch,
97 103
    eval_kclust = 2, is_log = TRUE)
98 104