Browse code

EXP_.._COR methods now measure R^2

mbcole authored on 13/09/2017 23:48:47
Showing 2 changed files

... ...
@@ -63,11 +63,11 @@
63 63
 #'   silhouette width by biological condition.} \item{BATCH_SIL}{ Average
64 64
 #'   silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average
65 65
 #'   silhouette width from PAM clustering (see stratified_pam argument).}
66
-#'   \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression
66
+#'   \item{EXP_QC_COR}{ Coefficient of determination between expression
67 67
 #'   pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{
68
-#'   Maximum squared spearman correlation between expression pcs and negative
68
+#'   Coefficient of determination between expression pcs and negative
69 69
 #'   control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{
70
-#'   Maximum squared spearman correlation between expression pcs and positive
70
+#'   Coefficient of determination between expression pcs and positive
71 71
 #'   control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The
72 72
 #'   mean squared median Relative Log Expression (RLE) (see stratified_rle
73 73
 #'   argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR)
... ...
@@ -268,23 +268,26 @@ score_matrix <- function(expr,
268 268
       
269 269
       # Max cor with quality factors.
270 270
       if (!is.null(qc_factors)) {
271
-        EXP_QC_COR <- EXP_QC_COR + cond_w * max(cor(proj[is_cond, ],
272
-                                                    qc_factors[is_cond, ],
273
-                                                    method = "spearman") ^ 2)
271
+        EXP_QC_COR <-
272
+          EXP_QC_COR + cond_w * (1 - sum(unlist(apply(proj, 2, function(y) {
273
+            lm(y ~ qc_factors)$residual
274
+          })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2))
274 275
       }
275 276
       
276 277
       # Max cor with UV factors.
277 278
       if (!is.null(uv_factors)) {
278
-        EXP_UV_COR  <- EXP_UV_COR + cond_w * max(cor(proj[is_cond, ],
279
-                                                     uv_factors[is_cond, ],
280
-                                                     method = "spearman") ^ 2)
279
+        EXP_UV_COR  <-
280
+          EXP_UV_COR + cond_w * (1 - sum(unlist(apply(proj, 2, function(y) {
281
+            lm(y ~ uv_factors)$residual
282
+          })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2))
281 283
       }
282 284
       
283 285
       # Max cor with WV factors.
284 286
       if (!is.null(wv_factors)) {
285
-        EXP_WV_COR <-  EXP_WV_COR + cond_w * max(cor(proj[is_cond, ],
286
-                                                     wv_factors[is_cond, ],
287
-                                                     method = "spearman") ^ 2)
287
+        EXP_WV_COR <-
288
+          EXP_WV_COR + cond_w * (1 - sum(unlist(apply(proj, 2, function(y) {
289
+            lm(y ~ wv_factors)$residual
290
+          })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2))
288 291
       }
289 292
       
290 293
     }
... ...
@@ -308,21 +311,27 @@ score_matrix <- function(expr,
308 311
   } else{
309 312
     # Max cor with quality factors.
310 313
     if (!is.null(qc_factors)) {
311
-      EXP_QC_COR = max(cor(proj, qc_factors, method = "spearman") ^ 2)
314
+      EXP_QC_COR <- 1 - sum(unlist(apply(proj, 2, function(y) {
315
+        lm(y ~ qc_factors)$residual
316
+        })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2)
312 317
     } else{
313 318
       EXP_QC_COR = NA
314 319
     }
315 320
     
316 321
     # Max cor with UV factors.
317 322
     if (!is.null(uv_factors)) {
318
-      EXP_UV_COR = max(cor(proj, uv_factors, method = "spearman") ^ 2)
323
+      EXP_UV_COR <- 1 - sum(unlist(apply(proj, 2, function(y) {
324
+        lm(y ~ uv_factors)$residual
325
+      })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2)
319 326
     } else{
320 327
       EXP_UV_COR = NA
321 328
     }
322 329
     
323 330
     # Max cor with WV factors.
324 331
     if (!is.null(wv_factors)) {
325
-      EXP_WV_COR = max(cor(proj, wv_factors, method = "spearman") ^ 2)
332
+      EXP_WV_COR <- 1 - sum(unlist(apply(proj, 2, function(y) {
333
+        lm(y ~ wv_factors)$residual
334
+      })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2)
326 335
     } else{
327 336
       EXP_WV_COR = NA
328 337
     }
... ...
@@ -65,11 +65,11 @@ A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average
65 65
   silhouette width by biological condition.} \item{BATCH_SIL}{ Average
66 66
   silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average
67 67
   silhouette width from PAM clustering (see stratified_pam argument).}
68
-  \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression
68
+  \item{EXP_QC_COR}{ Coefficient of determination between expression
69 69
   pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{
70
-  Maximum squared spearman correlation between expression pcs and negative
70
+  Coefficient of determination between expression pcs and negative
71 71
   control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{
72
-  Maximum squared spearman correlation between expression pcs and positive
72
+  Coefficient of determination between expression pcs and positive
73 73
   control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The
74 74
   mean squared median Relative Log Expression (RLE) (see stratified_rle
75 75
   argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR)