... | ... |
@@ -60,6 +60,7 @@ importFrom(grDevices,pdf) |
60 | 60 |
importFrom(graphics,abline) |
61 | 61 |
importFrom(graphics,arrows) |
62 | 62 |
importFrom(graphics,barplot) |
63 |
+importFrom(graphics,boxplot) |
|
63 | 64 |
importFrom(graphics,hist) |
64 | 65 |
importFrom(graphics,legend) |
65 | 66 |
importFrom(graphics,lines) |
... | ... |
@@ -91,6 +92,7 @@ importFrom(stats,dist) |
91 | 92 |
importFrom(stats,dnbinom) |
92 | 93 |
importFrom(stats,fitted.values) |
93 | 94 |
importFrom(stats,glm) |
95 |
+importFrom(stats,lm) |
|
94 | 96 |
importFrom(stats,mad) |
95 | 97 |
importFrom(stats,median) |
96 | 98 |
importFrom(stats,model.matrix) |
... | ... |
@@ -101,7 +103,10 @@ importFrom(stats,prcomp) |
101 | 103 |
importFrom(stats,quantile) |
102 | 104 |
importFrom(stats,quasibinomial) |
103 | 105 |
importFrom(stats,sd) |
106 |
+importFrom(stats,setNames) |
|
107 |
+importFrom(stats,var) |
|
104 | 108 |
importFrom(utils,capture.output) |
105 | 109 |
importFrom(utils,head) |
106 | 110 |
importFrom(utils,sessionInfo) |
111 |
+importFrom(utils,write.csv) |
|
107 | 112 |
importFrom(utils,write.table) |
... | ... |
@@ -33,7 +33,8 @@ TMM_FN = function(ei) { |
33 | 33 |
return(eo) |
34 | 34 |
} |
35 | 35 |
|
36 |
-#' Relative log-expression (RLE; DESeq) scaling normalization wrapper function |
|
36 |
+#' Relative log-expression (RLE; DESeq) scaling normalization wrapper |
|
37 |
+#' function |
|
37 | 38 |
#' @importFrom edgeR calcNormFactors |
38 | 39 |
#' @details SCONE scaling wrapper for \code{\link[edgeR]{calcNormFactors}}). |
39 | 40 |
#' @export |
... | ... |
@@ -24,6 +24,9 @@ |
24 | 24 |
#' |
25 | 25 |
#' @importFrom RColorBrewer brewer.pal |
26 | 26 |
#' @importFrom rARPACK svds |
27 |
+#' @importFrom graphics boxplot |
|
28 |
+#' @importFrom utils write.csv |
|
29 |
+#' @importFrom stats setNames var |
|
27 | 30 |
#' @export |
28 | 31 |
#' |
29 | 32 |
#' @return An object that represents the SCONE report app. |
... | ... |
@@ -66,15 +69,15 @@ sconeReport = function(x, methods, |
66 | 69 |
stop("reshape2 package needed for sconeReport()") |
67 | 70 |
} |
68 | 71 |
|
69 |
- if (!require("plotly", quietly = TRUE)) { |
|
72 |
+ if (!requireNamespace("plotly", quietly = TRUE)) { |
|
70 | 73 |
stop("plotly package needed for sconeReport()") |
71 | 74 |
} |
72 | 75 |
|
73 |
- if (!require("visNetwork", quietly = TRUE)) { |
|
76 |
+ if (!requireNamespace("visNetwork", quietly = TRUE)) { |
|
74 | 77 |
stop("visNetwork package needed for sconeReport()") |
75 | 78 |
} |
76 | 79 |
|
77 |
- if (!require("ggplot2", quietly = TRUE)) { |
|
80 |
+ if (!requireNamespace("ggplot2", quietly = TRUE)) { |
|
78 | 81 |
stop("ggplot2 package needed for sconeReport()") |
79 | 82 |
} |
80 | 83 |
|
... | ... |
@@ -223,18 +226,18 @@ sconeReport = function(x, methods, |
223 | 226 |
), |
224 | 227 |
shiny::tabPanel("PCA", |
225 | 228 |
shiny::br(), |
226 |
- shiny::p(paste0("This panel shows principal ", |
|
229 |
+ shiny::p(paste0("This panel shows principal ", |
|
227 | 230 |
"component analysis ", |
228 | 231 |
"(PCA) results", |
229 | 232 |
" on different subsets ", |
230 | 233 |
"of genes.")), |
231 | 234 |
shiny::br(), |
232 |
- shiny::h6("Variance Explained (All Genes)"), |
|
235 |
+ shiny::h6("Variance Explained (All Genes)"), |
|
233 | 236 |
shiny::p(paste0("Use this plot", |
234 | 237 |
" to decide on the ", |
235 | 238 |
"dimensionality of the reduced", |
236 | 239 |
" spaced used for evaluation.")), |
237 |
- shiny::plotOutput("plot_scree",width = "650px", |
|
240 |
+ shiny::plotOutput("plot_scree",width = "650px", |
|
238 | 241 |
height = "400px"), |
239 | 242 |
shiny::br(), |
240 | 243 |
shiny::h6("2D (All Genes)"), |
... | ... |
@@ -264,7 +267,7 @@ sconeReport = function(x, methods, |
264 | 267 |
), |
265 | 268 |
shiny::tabPanel("QC", |
266 | 269 |
shiny::br(), |
267 |
- shiny::p(paste0("This panel shows the absolute", |
|
270 |
+ shiny::p(paste0("This panel shows the absolute", |
|
268 | 271 |
" correlations between", |
269 | 272 |
" Principal", |
270 | 273 |
" Components (PCs) of", |
... | ... |
@@ -326,7 +329,7 @@ sconeReport = function(x, methods, |
326 | 329 |
), |
327 | 330 |
shiny::tabPanel("Control Genes", |
328 | 331 |
shiny::br(), |
329 |
- shiny::p(paste0("Heatmap of control genes, ", |
|
332 |
+ shiny::p(paste0("Heatmap of control genes, ", |
|
330 | 333 |
"colored by all", |
331 | 334 |
" three categories.")), |
332 | 335 |
shiny::br(), |
... | ... |
@@ -625,7 +628,7 @@ sconeReport = function(x, methods, |
625 | 628 |
fill = cc[sort(unique(factor(strat_col())))]) |
626 | 629 |
}) |
627 | 630 |
|
628 |
- output$plot3d_base <- shiny::renderPlotly({ |
|
631 |
+ output$plot3d_base <- plotly::renderPlotly({ |
|
629 | 632 |
PC1 <- PC2 <- PC3 <- NULL |
630 | 633 |
df <- setNames(data.frame(pc_obj_base()$x[,1:3]), |
631 | 634 |
c("PC1", "PC2", "PC3")) |
... | ... |
@@ -665,7 +668,7 @@ sconeReport = function(x, methods, |
665 | 668 |
abs(cor(qc,pc_obj_base()$x)) |
666 | 669 |
}) |
667 | 670 |
|
668 |
- output$qccorPlot <- shiny::renderPlotly({ |
|
671 |
+ output$qccorPlot <- plotly::renderPlotly({ |
|
669 | 672 |
metric <- value <- PC <- NULL |
670 | 673 |
|
671 | 674 |
df = reshape2::melt(cor_qc()[,1:input$dim]) |
... | ... |
@@ -696,7 +699,7 @@ sconeReport = function(x, methods, |
696 | 699 |
fill = cc[sort(unique(factor(strat_col())))]) |
697 | 700 |
}) |
698 | 701 |
|
699 |
- output$plot3d_qc <- shiny::renderPlotly({ |
|
702 |
+ output$plot3d_qc <- plotly::renderPlotly({ |
|
700 | 703 |
if(ncol(pc_obj_qc()$x) >= 3){ |
701 | 704 |
PC1 <- PC2 <- PC3 <- NULL |
702 | 705 |
df <- setNames(data.frame(pc_obj_qc()$x[,1:3]), |
... | ... |
@@ -1,120 +1,146 @@ |
1 | 1 |
#' SCONE Evaluation: Evaluate an Expression Matrix |
2 |
-#' |
|
3 |
-#' This function evaluates a (normalized) expression matrix using SCONE |
|
4 |
-#' criteria, producing 8 metrics based on i) Clustering, ii) Correlations and |
|
2 |
+#' |
|
3 |
+#' This function evaluates a (normalized) expression matrix using SCONE |
|
4 |
+#' criteria, producing 8 metrics based on i) Clustering, ii) Correlations and |
|
5 | 5 |
#' iii) Relative Expression. |
6 |
-#' |
|
6 |
+#' |
|
7 | 7 |
#' @details Users may specify their own eval_proj function that will be used to |
8 | 8 |
#' compute Clustering and Correlation metrics. This eval_proj() function must |
9 |
-#' have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ |
|
10 |
-#' pseudocount) expression data (genes in rows, cells in columns).} |
|
9 |
+#' have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ |
|
10 |
+#' pseudocount) expression data (genes in rows, cells in columns).} |
|
11 | 11 |
#' \item{eval_proj_args}{ list. additional function arguments, e.g. prior |
12 | 12 |
#' data weights.}} and it must output a matrix representation of the original |
13 | 13 |
#' data (cells in rows, factors in columns). The value of eval_proj_args is |
14 | 14 |
#' passed to the user-defined function from the eval_proj_args argument of |
15 | 15 |
#' the main score_matrix() function call. |
16 |
-#' |
|
17 |
-#' @param expr matrix. The expression data matrix (genes in rows, cells in |
|
16 |
+#' |
|
17 |
+#' @param expr matrix. The expression data matrix (genes in rows, cells in |
|
18 | 18 |
#' columns). |
19 | 19 |
#' @param eval_pcs numeric. The number of principal components to use for |
20 | 20 |
#' evaluation (Default 3). Ignored if !is.null(eval_proj). |
21 | 21 |
#' @param eval_proj function. Projection function for evaluation (see Details). |
22 | 22 |
#' If NULL, PCA is used for projection |
23 |
-#' @param eval_proj_args list. List of arguments passed to projection function |
|
23 |
+#' @param eval_proj_args list. List of arguments passed to projection function |
|
24 | 24 |
#' as eval_proj_args (see Details). |
25 |
-#' @param eval_kclust numeric. The number of clusters (> 1) to be used for pam |
|
26 |
-#' tightness (PAM_SIL) evaluation. If an array of integers, largest average |
|
25 |
+#' @param eval_kclust numeric. The number of clusters (> 1) to be used for pam |
|
26 |
+#' tightness (PAM_SIL) evaluation. If an array of integers, largest average |
|
27 | 27 |
#' silhouette width (tightness) will be reported in PAM_SIL. If NULL, PAM_SIL |
28 | 28 |
#' will be returned NA. |
29 | 29 |
#' @param bio factor. A known biological condition (variation to be preserved), |
30 | 30 |
#' NA is allowed. If NULL, condition ASW, BIO_SIL, will be returned NA. |
31 | 31 |
#' @param batch factor. A known batch variable (variation to be removed), NA is |
32 | 32 |
#' allowed. If NULL, batch ASW, BATCH_SIL, will be returned NA. |
33 |
-#' @param qc_factors Factors of unwanted variation derived from quality |
|
33 |
+#' @param qc_factors Factors of unwanted variation derived from quality |
|
34 | 34 |
#' metrics. If NULL, qc correlations, EXP_QC_COR, will be returned NA. |
35 |
-#' @param uv_factors Factors of unwanted variation derived from negative |
|
35 |
+#' @param uv_factors Factors of unwanted variation derived from negative |
|
36 | 36 |
#' control genes (evaluation set). If NULL, uv correlations, EXP_UV_COR, |
37 | 37 |
#' will be returned NA. |
38 | 38 |
#' @param wv_factors Factors of wanted variation derived from positive |
39 | 39 |
#' control genes (evaluation set). If NULL, wv correlations, EXP_WV_COR, |
40 | 40 |
#' will be returned NA. |
41 |
-#' @param is_log logical. If TRUE the expr matrix is already logged and log |
|
41 |
+#' @param is_log logical. If TRUE the expr matrix is already logged and log |
|
42 | 42 |
#' transformation will not be carried out prior to projection. Default FALSE. |
43 |
-#' @param stratified_pam logical. If TRUE then maximum ASW is separately |
|
44 |
-#' computed for each biological-cross-batch stratum (accepts NAs), and a |
|
43 |
+#' @param stratified_pam logical. If TRUE then maximum ASW is separately |
|
44 |
+#' computed for each biological-cross-batch stratum (accepts NAs), and a |
|
45 | 45 |
#' weighted average silhouette width is returned as PAM_SIL. Default FALSE. |
46 |
-#' |
|
46 |
+#' @param stratified_cor logical. If TRUE then cor metrics are separately |
|
47 |
+#' computed for each biological-cross-batch stratum (accepts NAs), and |
|
48 |
+#' weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR. |
|
49 |
+#' Default FALSE. |
|
50 |
+#' @param stratified_rle logical. If TRUE then rle metrics are separately |
|
51 |
+#' computed for each biological-cross-batch stratum (accepts NAs), and |
|
52 |
+#' weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE. |
|
53 |
+#' |
|
47 | 54 |
#' @importFrom class knn |
48 | 55 |
#' @importFrom fpc pamk |
49 | 56 |
#' @importFrom cluster silhouette |
50 | 57 |
#' @importFrom rARPACK svds |
51 | 58 |
#' @importFrom matrixStats rowMedians colMedians colIQRs |
52 |
-#' |
|
59 |
+#' @importFrom stats lm |
|
60 |
+#' |
|
53 | 61 |
#' @export |
54 |
-#' |
|
62 |
+#' |
|
55 | 63 |
#' @return A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average |
56 |
-#' silhouette width by biological condition.} \item{BATCH_SIL}{ Average |
|
57 |
-#' silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average |
|
58 |
-#' silhouette width from PAM clustering (see stratified_pam argument).} |
|
59 |
-#' \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression |
|
60 |
-#' pcs and quality factors.} \item{EXP_UV_COR}{ Maximum squared spearman |
|
61 |
-#' correlation between expression pcs and negative control gene factors.} |
|
62 |
-#' \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression |
|
63 |
-#' pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared |
|
64 |
-#' median Relative Log Expression (RLE).} \item{RLE_IQR}{ The variance of the |
|
65 |
-#' inter-quartile range (IQR) of the RLE.} } |
|
66 |
-#' |
|
67 |
-#' @examples |
|
68 |
-#' |
|
64 |
+#' silhouette width by biological condition.} \item{BATCH_SIL}{ Average |
|
65 |
+#' silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average |
|
66 |
+#' silhouette width from PAM clustering (see stratified_pam argument).} |
|
67 |
+#' \item{EXP_QC_COR}{ Coefficient of determination between expression |
|
68 |
+#' pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{ |
|
69 |
+#' Coefficient of determination between expression pcs and negative |
|
70 |
+#' control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{ |
|
71 |
+#' Coefficient of determination between expression pcs and positive |
|
72 |
+#' control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The |
|
73 |
+#' mean squared median Relative Log Expression (RLE) (see stratified_rle |
|
74 |
+#' argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) |
|
75 |
+#' of the RLE (see stratified_rle argument).} } |
|
76 |
+#' |
|
77 |
+#' @examples |
|
78 |
+#' |
|
69 | 79 |
#' set.seed(141) |
70 | 80 |
#' bio = as.factor(rep(c(1,2),each = 2)) |
71 | 81 |
#' batch = as.factor(rep(c(1,2),2)) |
72 | 82 |
#' log_expr = matrix(rnorm(20),ncol = 4) |
73 |
-#' |
|
74 |
-#' scone_metrics = score_matrix(log_expr, |
|
83 |
+#' |
|
84 |
+#' scone_metrics = score_matrix(log_expr, |
|
75 | 85 |
#' bio = bio, batch = batch, |
76 | 86 |
#' eval_kclust = 2, is_log = TRUE) |
77 |
-#' |
|
87 |
+#' |
|
78 | 88 |
|
79 |
-score_matrix <- function(expr, eval_pcs = 3, |
|
80 |
- eval_proj = NULL, eval_proj_args = NULL, |
|
89 |
+score_matrix <- function(expr, |
|
90 |
+ eval_pcs = 3, |
|
91 |
+ eval_proj = NULL, |
|
92 |
+ eval_proj_args = NULL, |
|
81 | 93 |
eval_kclust = NULL, |
82 |
- bio = NULL, batch = NULL, |
|
94 |
+ bio = NULL, |
|
95 |
+ batch = NULL, |
|
83 | 96 |
qc_factors = NULL, |
84 | 97 |
uv_factors = NULL, |
85 | 98 |
wv_factors = NULL, |
86 |
- is_log=FALSE, stratified_pam = FALSE){ |
|
87 |
- |
|
88 |
- if(any(is.na(expr) | is.infinite(expr) | is.nan(expr))){ |
|
99 |
+ is_log = FALSE, |
|
100 |
+ stratified_pam = FALSE, |
|
101 |
+ stratified_cor = FALSE, |
|
102 |
+ stratified_rle = FALSE) { |
|
103 |
+ if (any(is.na(expr) | is.infinite(expr) | is.nan(expr))) { |
|
89 | 104 |
stop("NA/Inf/NaN Expression Values.") |
90 | 105 |
} |
91 | 106 |
|
92 |
- if(!is_log) { |
|
107 |
+ if (!is_log) { |
|
93 | 108 |
expr <- log1p(expr) |
94 | 109 |
} |
95 | 110 |
|
96 |
- # The svd we do below on expr throws an exception if |
|
97 |
- # expr created by one of the normalizations has a |
|
111 |
+ # The svd we do below on expr throws an exception if |
|
112 |
+ # expr created by one of the normalizations has a |
|
98 | 113 |
# constant feature (=gene, i.e. row) |
99 |
- constantFeatures = apply(expr, 1, function(x) max(x)-min(x)) < 1e-3 |
|
100 |
- if(any(constantFeatures)) { |
|
101 |
- warning(sprintf(paste0("scone_eval: expression matrix ", |
|
102 |
- "contained %d constant features (rows) ", |
|
103 |
- "---> excluding them"), sum(constantFeatures))) |
|
104 |
- expr = expr[!constantFeatures, ] |
|
114 |
+ constantFeatures = apply(expr, 1, function(x) |
|
115 |
+ max(x) - min(x)) < 1e-3 |
|
116 |
+ if (any(constantFeatures)) { |
|
117 |
+ warning(sprintf( |
|
118 |
+ paste0( |
|
119 |
+ "scone_eval: expression matrix ", |
|
120 |
+ "contained %d constant features (rows) ", |
|
121 |
+ "---> excluding them" |
|
122 |
+ ), |
|
123 |
+ sum(constantFeatures) |
|
124 |
+ )) |
|
125 |
+ expr = expr[!constantFeatures,] |
|
105 | 126 |
} |
106 | 127 |
|
107 |
- if(is.null(eval_proj)){ |
|
128 |
+ if (is.null(eval_proj)) { |
|
108 | 129 |
proj = tryCatch({ |
109 |
- svds(scale(t(expr), center = TRUE, scale = TRUE), |
|
110 |
- k=eval_pcs, nu=eval_pcs, nv=0)$u}, |
|
111 |
- error = function(e) { |
|
112 |
- stop("scone_eval: svd failed") |
|
113 |
- }) |
|
130 |
+ svds( |
|
131 |
+ scale(t(expr), center = TRUE, scale = TRUE), |
|
132 |
+ k = eval_pcs, |
|
133 |
+ nu = eval_pcs, |
|
134 |
+ nv = 0 |
|
135 |
+ )$u |
|
136 |
+ }, |
|
137 |
+ error = function(e) { |
|
138 |
+ stop("scone_eval: svd failed") |
|
139 |
+ }) |
|
114 | 140 |
|
115 | 141 |
|
116 | 142 |
} else { |
117 |
- proj = eval_proj(expr,eval_proj_args = eval_proj_args) |
|
143 |
+ proj = eval_proj(expr, eval_proj_args = eval_proj_args) |
|
118 | 144 |
eval_pcs = ncol(proj) |
119 | 145 |
} |
120 | 146 |
|
... | ... |
@@ -123,17 +149,21 @@ score_matrix <- function(expr, eval_pcs = 3, |
123 | 149 |
|
124 | 150 |
# Biological Condition |
125 | 151 |
|
126 |
- if( !is.null(bio) ) { |
|
127 |
- if(!all(is.na(bio))) { |
|
128 |
- if(length(unique(bio)) > 1) { |
|
152 |
+ if (!is.null(bio)) { |
|
153 |
+ if (!all(is.na(bio))) { |
|
154 |
+ if (length(unique(bio)) > 1) { |
|
129 | 155 |
BIO_SIL = summary(cluster::silhouette(as.numeric(na.omit(bio)), |
130 | 156 |
dd[!is.na(bio), |
131 | 157 |
!is.na(bio)]))$avg.width |
132 | 158 |
} else { |
133 | 159 |
BIO_SIL = NA |
134 |
- warning(paste0("after exclusion of samples, ", |
|
135 |
- "only one bio remains, BATCH_BIO", |
|
136 |
- " is undefined")) |
|
160 |
+ warning( |
|
161 |
+ paste0( |
|
162 |
+ "after exclusion of samples, ", |
|
163 |
+ "only one bio remains, BATCH_BIO", |
|
164 |
+ " is undefined" |
|
165 |
+ ) |
|
166 |
+ ) |
|
137 | 167 |
} |
138 | 168 |
} else { |
139 | 169 |
BIO_SIL = NA |
... | ... |
@@ -145,17 +175,21 @@ score_matrix <- function(expr, eval_pcs = 3, |
145 | 175 |
|
146 | 176 |
# Batch Condition |
147 | 177 |
|
148 |
- if(!is.null(batch)) { |
|
149 |
- if(!all(is.na(batch))) { |
|
150 |
- if(length(unique(batch)) > 1) { |
|
178 |
+ if (!is.null(batch)) { |
|
179 |
+ if (!all(is.na(batch))) { |
|
180 |
+ if (length(unique(batch)) > 1) { |
|
151 | 181 |
BATCH_SIL <- summary(cluster::silhouette(as.numeric(na.omit(batch)), |
152 | 182 |
dd[!is.na(batch), |
153 | 183 |
!is.na(batch)]))$avg.width |
154 | 184 |
} else { |
155 | 185 |
BATCH_SIL <- NA |
156 |
- warning(paste0("after exclusion of samples,", |
|
157 |
- " only one batch remains, ", |
|
158 |
- "BATCH_SIL is undefined")) |
|
186 |
+ warning( |
|
187 |
+ paste0( |
|
188 |
+ "after exclusion of samples,", |
|
189 |
+ " only one batch remains, ", |
|
190 |
+ "BATCH_SIL is undefined" |
|
191 |
+ ) |
|
192 |
+ ) |
|
159 | 193 |
} |
160 | 194 |
} else{ |
161 | 195 |
BATCH_SIL <- NA |
... | ... |
@@ -167,94 +201,197 @@ score_matrix <- function(expr, eval_pcs = 3, |
167 | 201 |
|
168 | 202 |
## ------ PAM Tightness ----- |
169 | 203 |
|
170 |
- if ( !is.null(eval_kclust) ){ |
|
171 |
- |
|
204 |
+ if (!is.null(eval_kclust)) { |
|
172 | 205 |
# "Stratified" PAM |
173 | 206 |
|
174 |
- if(stratified_pam){ |
|
175 |
- |
|
176 |
- biobatch = as.factor(paste(bio,batch,sep = "_")) |
|
207 |
+ if (stratified_pam) { |
|
208 |
+ biobatch = as.factor(paste(bio, batch, sep = "_")) |
|
177 | 209 |
PAM_SIL = 0 |
178 | 210 |
|
179 | 211 |
# Max Average Sil Width per bio-cross-batch Condition |
180 |
- for (cond in levels(biobatch)){ |
|
212 |
+ for (cond in levels(biobatch)) { |
|
181 | 213 |
is_cond = which(biobatch == cond) |
182 | 214 |
cond_w = length(is_cond) |
183 |
- if(cond_w > max(eval_kclust)){ |
|
184 |
- pamk_object = pamk(proj[is_cond,],krange = eval_kclust) |
|
215 |
+ if (cond_w > max(eval_kclust)) { |
|
216 |
+ pamk_object = pamk(proj[is_cond, ], krange = eval_kclust) |
|
185 | 217 |
|
186 |
- # Despite krange excluding nc = 1, |
|
218 |
+ # Despite krange excluding nc = 1, |
|
187 | 219 |
# if asw is negative, nc = 1 will be selected |
188 |
- if(is.null(pamk_object$pamobject$silinfo$avg.width) ){ |
|
220 |
+ if (is.null(pamk_object$pamobject$silinfo$avg.width)) { |
|
189 | 221 |
if (!1 %in% eval_kclust) { |
190 |
- pamk_object$pamobject$silinfo$avg.width = max( |
|
191 |
- pamk_object$crit[1:max(eval_kclust) %in% eval_kclust] |
|
192 |
- ) |
|
193 |
- }else{ |
|
194 |
- stop(paste0("nc = 1 was selected by Duda-Hart,", |
|
195 |
- " exclude 1 from eval_kclust.")) |
|
222 |
+ pamk_object$pamobject$silinfo$avg.width = |
|
223 |
+ max(pamk_object$crit[1:max(eval_kclust) %in% eval_kclust]) |
|
224 |
+ } else{ |
|
225 |
+ stop(paste0( |
|
226 |
+ "nc = 1 was selected by Duda-Hart,", |
|
227 |
+ " exclude 1 from eval_kclust." |
|
228 |
+ )) |
|
196 | 229 |
} |
197 | 230 |
} |
198 | 231 |
|
199 |
- PAM_SIL = PAM_SIL + cond_w*pamk_object$pamobject$silinfo$avg.width |
|
200 |
- }else{ |
|
201 |
- stop(paste(paste0("Number of clusters 'k' must be ", |
|
202 |
- "smaller than bio-cross-batch stratum size:"), |
|
203 |
- paste(levels(biobatch), |
|
204 |
- table(biobatch), |
|
205 |
- sep = " = ",collapse = ", "))) |
|
232 |
+ PAM_SIL = PAM_SIL + cond_w * pamk_object$pamobject$silinfo$avg.width |
|
233 |
+ } else{ |
|
234 |
+ stop(paste( |
|
235 |
+ paste0( |
|
236 |
+ "Number of clusters 'k' must be ", |
|
237 |
+ "smaller than bio-cross-batch stratum size:" |
|
238 |
+ ), |
|
239 |
+ paste( |
|
240 |
+ levels(biobatch), |
|
241 |
+ table(biobatch), |
|
242 |
+ sep = " = ", |
|
243 |
+ collapse = ", " |
|
244 |
+ ) |
|
245 |
+ )) |
|
206 | 246 |
} |
207 | 247 |
} |
208 |
- PAM_SIL = PAM_SIL/length(biobatch) |
|
248 |
+ PAM_SIL = PAM_SIL / length(biobatch) |
|
209 | 249 |
|
210 | 250 |
# Traditional PAM |
211 | 251 |
|
212 |
- }else{ |
|
213 |
- pamk_object = pamk(proj,krange = eval_kclust) |
|
252 |
+ } else{ |
|
253 |
+ pamk_object = pamk(proj, krange = eval_kclust) |
|
214 | 254 |
PAM_SIL = pamk_object$pamobject$silinfo$avg.width |
215 | 255 |
} |
216 | 256 |
|
217 |
- }else{ |
|
257 |
+ } else{ |
|
218 | 258 |
PAM_SIL = NA |
219 | 259 |
} |
220 | 260 |
|
221 | 261 |
## ------ Correlation with Factors ----- |
222 |
- |
|
223 |
- # Max cor with quality factors. |
|
224 |
- if(!is.null(qc_factors)){ |
|
225 |
- EXP_QC_COR = max(cor(proj,qc_factors,method = "spearman")^2) |
|
226 |
- }else{ |
|
227 |
- EXP_QC_COR = NA |
|
228 |
- } |
|
229 |
- |
|
230 |
- # Max cor with UV factors. |
|
231 |
- if(!is.null(uv_factors)){ |
|
232 |
- EXP_UV_COR = max(cor(proj,uv_factors,method = "spearman")^2) |
|
233 |
- }else{ |
|
234 |
- EXP_UV_COR = NA |
|
235 |
- } |
|
236 |
- |
|
237 |
- # Max cor with WV factors. |
|
238 |
- if(!is.null(wv_factors)){ |
|
239 |
- EXP_WV_COR = max(cor(proj,wv_factors,method = "spearman")^2) |
|
240 |
- }else{ |
|
241 |
- EXP_WV_COR = NA |
|
262 |
+ if (stratified_cor) { |
|
263 |
+ biobatch = as.factor(paste(bio, batch, sep = "_")) |
|
264 |
+ EXP_QC_COR <- EXP_UV_COR <- EXP_WV_COR <- 0 |
|
265 |
+ |
|
266 |
+ for (cond in levels(biobatch)) { |
|
267 |
+ is_cond = which(biobatch == cond) |
|
268 |
+ cond_w = length(is_cond) |
|
269 |
+ |
|
270 |
+ # Max cor with quality factors. |
|
271 |
+ if (!is.null(qc_factors)) { |
|
272 |
+ EXP_QC_COR <- |
|
273 |
+ EXP_QC_COR + cond_w * (1 - sum(unlist(apply(proj[is_cond,], 2, |
|
274 |
+ function(y) { |
|
275 |
+ lm(y ~ qc_factors[is_cond,])$residual |
|
276 |
+ })) ^ 2) / sum(scale(proj[is_cond,], scale = FALSE) ^ 2)) |
|
277 |
+ } |
|
278 |
+ |
|
279 |
+ # Max cor with UV factors. |
|
280 |
+ if (!is.null(uv_factors)) { |
|
281 |
+ EXP_UV_COR <- |
|
282 |
+ EXP_UV_COR + cond_w * (1 - sum(unlist(apply(proj[is_cond,], 2, |
|
283 |
+ function(y) { |
|
284 |
+ lm(y ~ uv_factors[is_cond,])$residual |
|
285 |
+ })) ^ 2) / sum(scale(proj[is_cond,], scale = FALSE) ^ 2)) |
|
286 |
+ } |
|
287 |
+ |
|
288 |
+ # Max cor with WV factors. |
|
289 |
+ if (!is.null(wv_factors)) { |
|
290 |
+ EXP_WV_COR <- |
|
291 |
+ EXP_WV_COR + cond_w * (1 - sum(unlist(apply(proj[is_cond,], 2, |
|
292 |
+ function(y) { |
|
293 |
+ lm(y ~ wv_factors[is_cond,])$residual |
|
294 |
+ })) ^ 2) / sum(scale(proj[is_cond,], scale = FALSE) ^ 2)) |
|
295 |
+ } |
|
296 |
+ |
|
297 |
+ } |
|
298 |
+ |
|
299 |
+ if (!is.null(qc_factors)) { |
|
300 |
+ EXP_QC_COR <- EXP_QC_COR / length(biobatch) |
|
301 |
+ } else{ |
|
302 |
+ EXP_QC_COR <- NA |
|
303 |
+ } |
|
304 |
+ if (!is.null(uv_factors)) { |
|
305 |
+ EXP_UV_COR <- EXP_UV_COR / length(biobatch) |
|
306 |
+ } else{ |
|
307 |
+ EXP_UV_COR <- NA |
|
308 |
+ } |
|
309 |
+ if (!is.null(wv_factors)) { |
|
310 |
+ EXP_WV_COR <- EXP_WV_COR / length(biobatch) |
|
311 |
+ } else{ |
|
312 |
+ EXP_WV_COR <- NA |
|
313 |
+ } |
|
314 |
+ |
|
315 |
+ } else{ |
|
316 |
+ # Max cor with quality factors. |
|
317 |
+ if (!is.null(qc_factors)) { |
|
318 |
+ EXP_QC_COR <- 1 - sum(unlist(apply(proj, 2, function(y) { |
|
319 |
+ lm(y ~ qc_factors)$residual |
|
320 |
+ })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2) |
|
321 |
+ } else{ |
|
322 |
+ EXP_QC_COR = NA |
|
323 |
+ } |
|
324 |
+ |
|
325 |
+ # Max cor with UV factors. |
|
326 |
+ if (!is.null(uv_factors)) { |
|
327 |
+ EXP_UV_COR <- 1 - sum(unlist(apply(proj, 2, function(y) { |
|
328 |
+ lm(y ~ uv_factors)$residual |
|
329 |
+ })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2) |
|
330 |
+ } else{ |
|
331 |
+ EXP_UV_COR = NA |
|
332 |
+ } |
|
333 |
+ |
|
334 |
+ # Max cor with WV factors. |
|
335 |
+ if (!is.null(wv_factors)) { |
|
336 |
+ EXP_WV_COR <- 1 - sum(unlist(apply(proj, 2, function(y) { |
|
337 |
+ lm(y ~ wv_factors)$residual |
|
338 |
+ })) ^ 2) / sum(scale(proj, scale = FALSE) ^ 2) |
|
339 |
+ } else{ |
|
340 |
+ EXP_WV_COR = NA |
|
341 |
+ } |
|
342 |
+ |
|
242 | 343 |
} |
243 | 344 |
|
244 | 345 |
## ----- RLE Measures |
245 |
- rle <- expr - rowMedians(expr) |
|
246 |
- |
|
247 |
- # Non-Zero Median RLE |
|
248 |
- RLE_MED <- mean(colMedians(rle)^2) # Variance of the median |
|
249 |
- |
|
250 |
- # Variable IQR RLE |
|
251 |
- RLE_IQR <- var(colIQRs(rle)) # Variance of the IQR |
|
346 |
+ if (stratified_rle) { |
|
347 |
+ biobatch = as.factor(paste(bio, batch, sep = "_")) |
|
348 |
+ RLE_MED <- RLE_IQR <- 0 |
|
349 |
+ |
|
350 |
+ for (cond in levels(biobatch)) { |
|
351 |
+ is_cond = which(biobatch == cond) |
|
352 |
+ cond_w = length(is_cond) |
|
353 |
+ rle <- expr[, is_cond] - rowMedians(expr[, is_cond]) |
|
354 |
+ |
|
355 |
+ # Non-Zero Median RLE |
|
356 |
+ RLE_MED <- |
|
357 |
+ RLE_MED + cond_w * mean(colMedians(rle) ^ 2) # Var of the med |
|
358 |
+ |
|
359 |
+ # Variable IQR RLE |
|
360 |
+ RLE_IQR <- |
|
361 |
+ RLE_IQR + cond_w * var(colIQRs(rle)) # Variance of the IQR |
|
362 |
+ |
|
363 |
+ } |
|
364 |
+ |
|
365 |
+ RLE_MED <- RLE_MED / length(biobatch) |
|
366 |
+ RLE_IQR <- RLE_IQR / length(biobatch) |
|
367 |
+ |
|
368 |
+ } else{ |
|
369 |
+ rle <- expr - rowMedians(expr) |
|
370 |
+ |
|
371 |
+ # Non-Zero Median RLE |
|
372 |
+ RLE_MED <- mean(colMedians(rle) ^ 2) # Variance of the median |
|
373 |
+ |
|
374 |
+ # Variable IQR RLE |
|
375 |
+ RLE_IQR <- var(colIQRs(rle)) # Variance of the IQR |
|
376 |
+ } |
|
252 | 377 |
|
253 |
- scores = c(BIO_SIL, BATCH_SIL, PAM_SIL, |
|
254 |
- EXP_QC_COR, EXP_UV_COR, EXP_WV_COR, |
|
255 |
- RLE_MED, RLE_IQR) |
|
256 |
- names(scores) = c("BIO_SIL", "BATCH_SIL", "PAM_SIL", |
|
257 |
- "EXP_QC_COR", "EXP_UV_COR", "EXP_WV_COR", |
|
258 |
- "RLE_MED", "RLE_IQR") |
|
378 |
+ scores = c(BIO_SIL, |
|
379 |
+ BATCH_SIL, |
|
380 |
+ PAM_SIL, |
|
381 |
+ EXP_QC_COR, |
|
382 |
+ EXP_UV_COR, |
|
383 |
+ EXP_WV_COR, |
|
384 |
+ RLE_MED, |
|
385 |
+ RLE_IQR) |
|
386 |
+ names(scores) = c( |
|
387 |
+ "BIO_SIL", |
|
388 |
+ "BATCH_SIL", |
|
389 |
+ "PAM_SIL", |
|
390 |
+ "EXP_QC_COR", |
|
391 |
+ "EXP_UV_COR", |
|
392 |
+ "EXP_WV_COR", |
|
393 |
+ "RLE_MED", |
|
394 |
+ "RLE_IQR" |
|
395 |
+ ) |
|
259 | 396 |
return(scores) |
260 | 397 |
} |
... | ... |
@@ -50,7 +50,13 @@ |
50 | 50 |
#' @param stratified_pam logical. If TRUE then maximum ASW for PAM_SIL is |
51 | 51 |
#' separately computed for each biological-cross-batch stratum (accepting |
52 | 52 |
#' NAs), and a weighted average is returned as PAM_SIL. |
53 |
-#' |
|
53 |
+#' @param stratified_cor logical. If TRUE then cor metrics are separately |
|
54 |
+#' computed for each biological-cross-batch stratum (accepts NAs), and |
|
55 |
+#' weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR. |
|
56 |
+#' Default FALSE. |
|
57 |
+#' @param stratified_rle logical. If TRUE then rle metrics are separately |
|
58 |
+#' computed for each biological-cross-batch stratum (accepts NAs), and |
|
59 |
+#' weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE. |
|
54 | 60 |
#' @param verbose logical. If TRUE some messagges are printed. |
55 | 61 |
#' @param run logical. If FALSE the normalization and evaluation are not run, |
56 | 62 |
#' but normalization parameters are returned in the output object for |
... | ... |
@@ -169,14 +175,16 @@ setMethod( |
169 | 175 |
eval_proj = NULL, |
170 | 176 |
eval_proj_args = NULL, eval_kclust=2:10, |
171 | 177 |
verbose=FALSE, stratified_pam = FALSE, |
178 |
+ stratified_cor = FALSE, |
|
179 |
+ stratified_rle = FALSE, |
|
172 | 180 |
return_norm = c("no", "in_memory", "hdf5"), |
173 | 181 |
hdf5file, |
174 | 182 |
bpparam=BiocParallel::bpparam()) { |
175 |
- |
|
176 |
- |
|
177 |
- rezero = (zero %in% c("preadjust","strong")) |
|
178 |
- fixzero = (zero %in% c("postadjust","strong")) |
|
179 |
- |
|
183 |
+ |
|
184 |
+ zero <- match.arg(zero) |
|
185 |
+ rezero <- (zero %in% c("preadjust","strong")) |
|
186 |
+ fixzero <- (zero %in% c("postadjust","strong")) |
|
187 |
+ |
|
180 | 188 |
if(x@is_log) { |
181 | 189 |
stop("At the moment, scone is implemented only for non-log counts.") |
182 | 190 |
} |
... | ... |
@@ -329,6 +337,18 @@ setMethod( |
329 | 337 |
stop("'eval_kclust' must be less than the number of samples.") |
330 | 338 |
} |
331 | 339 |
|
340 |
+ if(stratified_rle) { |
|
341 |
+ if(is.null(bio) & is.null(batch)){ |
|
342 |
+ stop("For stratified_rle, bio and/or batch must be specified") |
|
343 |
+ } |
|
344 |
+ } |
|
345 |
+ |
|
346 |
+ if(stratified_cor) { |
|
347 |
+ if(is.null(bio) & is.null(batch)){ |
|
348 |
+ stop("For stratified_cor, bio and/or batch must be specified") |
|
349 |
+ } |
|
350 |
+ } |
|
351 |
+ |
|
332 | 352 |
if(!is.null(eval_kclust) & stratified_pam) { |
333 | 353 |
if(is.null(bio) & is.null(batch)){ |
334 | 354 |
stop("For stratified_pam, bio and/or batch must be specified") |
... | ... |
@@ -539,7 +559,10 @@ setMethod( |
539 | 559 |
bio = bio, batch = batch, |
540 | 560 |
qc_factors = qc_pcs, |
541 | 561 |
uv_factors = uv_factors, wv_factors = wv_factors, |
542 |
- is_log = TRUE, stratified_pam = stratified_pam) |
|
562 |
+ stratified_pam = stratified_pam, |
|
563 |
+ stratified_cor = stratified_cor, |
|
564 |
+ stratified_rle = stratified_rle, |
|
565 |
+ is_log = TRUE) |
|
543 | 566 |
} else { |
544 | 567 |
score <- NULL |
545 | 568 |
} |
... | ... |
@@ -296,8 +296,8 @@ impute_null <- function(expression,impute_args) { |
296 | 296 |
#' \item{p_nodrop}{ 1 - the probability P(drop|Y), useful as weights in |
297 | 297 |
#' weighted PCA} \item{expected_state}{ the expected |
298 | 298 |
#' value E[Z] (1 = "on")} \item{loglik}{ the log-likelihood} |
299 |
-#' \item{convergence}{for all genes, 0 if the algorithm converged and 1 if maxiter was |
|
300 |
-#' reached} } |
|
299 |
+#' \item{convergence}{for all genes, 0 if the algorithm converged and |
|
300 |
+#' 1 if maxiter was reached} } |
|
301 | 301 |
#' |
302 | 302 |
#' @examples |
303 | 303 |
#' mat <- matrix(rpois(1000, lambda = 3), ncol=10) |
... | ... |
@@ -1,3 +1,10 @@ |
1 |
+Changes in version 1.1.3 (2018-10-19) |
|
2 |
+======================== |
|
3 |
+ |
|
4 |
+* Modified COR scores to reflect R^2 for regression on UV/WV/QC rather than max correlation. |
|
5 |
+* Bug fix for shiny report. |
|
6 |
+* Updated vignette. |
|
7 |
+ |
|
1 | 8 |
Changes in version 1.1.2 (2017-7-3) |
2 | 9 |
======================== |
3 | 10 |
|
... | ... |
@@ -2,7 +2,8 @@ |
2 | 2 |
% Please edit documentation in R/SCONE_DEFAULTS.R |
3 | 3 |
\name{DESEQ_FN} |
4 | 4 |
\alias{DESEQ_FN} |
5 |
-\title{Relative log-expression (RLE; DESeq) scaling normalization wrapper function} |
|
5 |
+\title{Relative log-expression (RLE; DESeq) scaling normalization wrapper |
|
6 |
+function} |
|
6 | 7 |
\usage{ |
7 | 8 |
DESEQ_FN(ei) |
8 | 9 |
} |
... | ... |
@@ -13,7 +14,8 @@ DESEQ_FN(ei) |
13 | 14 |
RLE normalized matrix. |
14 | 15 |
} |
15 | 16 |
\description{ |
16 |
-Relative log-expression (RLE; DESeq) scaling normalization wrapper function |
|
17 |
+Relative log-expression (RLE; DESeq) scaling normalization wrapper |
|
18 |
+function |
|
17 | 19 |
} |
18 | 20 |
\details{ |
19 | 21 |
SCONE scaling wrapper for \code{\link[edgeR]{calcNormFactors}}). |
... | ... |
@@ -40,8 +40,8 @@ a list with the following elements: \itemize{ \item{W}{ coefficients |
40 | 40 |
\item{p_nodrop}{ 1 - the probability P(drop|Y), useful as weights in |
41 | 41 |
weighted PCA} \item{expected_state}{ the expected |
42 | 42 |
value E[Z] (1 = "on")} \item{loglik}{ the log-likelihood} |
43 |
- \item{convergence}{for all genes, 0 if the algorithm converged and 1 if maxiter was |
|
44 |
- reached} } |
|
43 |
+ \item{convergence}{for all genes, 0 if the algorithm converged and |
|
44 |
+ 1 if maxiter was reached} } |
|
45 | 45 |
} |
46 | 46 |
\description{ |
47 | 47 |
This function implements Newton's method for solving zero of |
... | ... |
@@ -15,8 +15,8 @@ scone(x, ...) |
15 | 15 |
adjust_batch = c("no", "yes", "force"), run = TRUE, evaluate = TRUE, |
16 | 16 |
eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL, |
17 | 17 |
eval_kclust = 2:10, verbose = FALSE, stratified_pam = FALSE, |
18 |
- return_norm = c("no", "in_memory", "hdf5"), hdf5file, |
|
19 |
- bpparam = BiocParallel::bpparam()) |
|
18 |
+ stratified_cor = FALSE, stratified_rle = FALSE, return_norm = c("no", |
|
19 |
+ "in_memory", "hdf5"), hdf5file, bpparam = BiocParallel::bpparam()) |
|
20 | 20 |
} |
21 | 21 |
\arguments{ |
22 | 22 |
\item{x}{a \code{\link{SconeExperiment}} object.} |
... | ... |
@@ -77,6 +77,15 @@ If NULL, tightness will be returned NA.} |
77 | 77 |
separately computed for each biological-cross-batch stratum (accepting |
78 | 78 |
NAs), and a weighted average is returned as PAM_SIL.} |
79 | 79 |
|
80 |
+\item{stratified_cor}{logical. If TRUE then cor metrics are separately |
|
81 |
+computed for each biological-cross-batch stratum (accepts NAs), and |
|
82 |
+weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR. |
|
83 |
+Default FALSE.} |
|
84 |
+ |
|
85 |
+\item{stratified_rle}{logical. If TRUE then rle metrics are separately |
|
86 |
+computed for each biological-cross-batch stratum (accepts NAs), and |
|
87 |
+weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.} |
|
88 |
+ |
|
80 | 89 |
\item{return_norm}{character. If "no" the normalized values will not be |
81 | 90 |
returned with the output object. This will create a much smaller object |
82 | 91 |
and may be useful for large datasets and/or when many combinations are |
... | ... |
@@ -7,10 +7,10 @@ |
7 | 7 |
score_matrix(expr, eval_pcs = 3, eval_proj = NULL, eval_proj_args = NULL, |
8 | 8 |
eval_kclust = NULL, bio = NULL, batch = NULL, qc_factors = NULL, |
9 | 9 |
uv_factors = NULL, wv_factors = NULL, is_log = FALSE, |
10 |
- stratified_pam = FALSE) |
|
10 |
+ stratified_pam = FALSE, stratified_cor = FALSE, stratified_rle = FALSE) |
|
11 | 11 |
} |
12 | 12 |
\arguments{ |
13 |
-\item{expr}{matrix. The expression data matrix (genes in rows, cells in |
|
13 |
+\item{expr}{matrix. The expression data matrix (genes in rows, cells in |
|
14 | 14 |
columns).} |
15 | 15 |
|
16 | 16 |
\item{eval_pcs}{numeric. The number of principal components to use for |
... | ... |
@@ -19,11 +19,11 @@ evaluation (Default 3). Ignored if !is.null(eval_proj).} |
19 | 19 |
\item{eval_proj}{function. Projection function for evaluation (see Details). |
20 | 20 |
If NULL, PCA is used for projection} |
21 | 21 |
|
22 |
-\item{eval_proj_args}{list. List of arguments passed to projection function |
|
22 |
+\item{eval_proj_args}{list. List of arguments passed to projection function |
|
23 | 23 |
as eval_proj_args (see Details).} |
24 | 24 |
|
25 |
-\item{eval_kclust}{numeric. The number of clusters (> 1) to be used for pam |
|
26 |
-tightness (PAM_SIL) evaluation. If an array of integers, largest average |
|
25 |
+\item{eval_kclust}{numeric. The number of clusters (> 1) to be used for pam |
|
26 |
+tightness (PAM_SIL) evaluation. If an array of integers, largest average |
|
27 | 27 |
silhouette width (tightness) will be reported in PAM_SIL. If NULL, PAM_SIL |
28 | 28 |
will be returned NA.} |
29 | 29 |
|
... | ... |
@@ -33,10 +33,10 @@ NA is allowed. If NULL, condition ASW, BIO_SIL, will be returned NA.} |
33 | 33 |
\item{batch}{factor. A known batch variable (variation to be removed), NA is |
34 | 34 |
allowed. If NULL, batch ASW, BATCH_SIL, will be returned NA.} |
35 | 35 |
|
36 |
-\item{qc_factors}{Factors of unwanted variation derived from quality |
|
36 |
+\item{qc_factors}{Factors of unwanted variation derived from quality |
|
37 | 37 |
metrics. If NULL, qc correlations, EXP_QC_COR, will be returned NA.} |
38 | 38 |
|
39 |
-\item{uv_factors}{Factors of unwanted variation derived from negative |
|
39 |
+\item{uv_factors}{Factors of unwanted variation derived from negative |
|
40 | 40 |
control genes (evaluation set). If NULL, uv correlations, EXP_UV_COR, |
41 | 41 |
will be returned NA.} |
42 | 42 |
|
... | ... |
@@ -44,36 +44,47 @@ will be returned NA.} |
44 | 44 |
control genes (evaluation set). If NULL, wv correlations, EXP_WV_COR, |
45 | 45 |
will be returned NA.} |
46 | 46 |
|
47 |
-\item{is_log}{logical. If TRUE the expr matrix is already logged and log |
|
47 |
+\item{is_log}{logical. If TRUE the expr matrix is already logged and log |
|
48 | 48 |
transformation will not be carried out prior to projection. Default FALSE.} |
49 | 49 |
|
50 |
-\item{stratified_pam}{logical. If TRUE then maximum ASW is separately |
|
51 |
-computed for each biological-cross-batch stratum (accepts NAs), and a |
|
50 |
+\item{stratified_pam}{logical. If TRUE then maximum ASW is separately |
|
51 |
+computed for each biological-cross-batch stratum (accepts NAs), and a |
|
52 | 52 |
weighted average silhouette width is returned as PAM_SIL. Default FALSE.} |
53 |
+ |
|
54 |
+\item{stratified_cor}{logical. If TRUE then cor metrics are separately |
|
55 |
+computed for each biological-cross-batch stratum (accepts NAs), and |
|
56 |
+weighted averages are returned for EXP_QC_COR, EXP_UV_COR, & EXP_WV_COR. |
|
57 |
+Default FALSE.} |
|
58 |
+ |
|
59 |
+\item{stratified_rle}{logical. If TRUE then rle metrics are separately |
|
60 |
+computed for each biological-cross-batch stratum (accepts NAs), and |
|
61 |
+weighted averages are returned for RLE_MED & RLE_IQR. Default FALSE.} |
|
53 | 62 |
} |
54 | 63 |
\value{ |
55 | 64 |
A list with the following metrics: \itemize{ \item{BIO_SIL}{ Average |
56 |
- silhouette width by biological condition.} \item{BATCH_SIL}{ Average |
|
57 |
- silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average |
|
58 |
- silhouette width from PAM clustering (see stratified_pam argument).} |
|
59 |
- \item{EXP_QC_COR}{ Maximum squared spearman correlation between expression |
|
60 |
- pcs and quality factors.} \item{EXP_UV_COR}{ Maximum squared spearman |
|
61 |
- correlation between expression pcs and negative control gene factors.} |
|
62 |
- \item{EXP_WV_COR}{ Maximum squared spearman correlation between expression |
|
63 |
- pcs and positive control gene factors.} \item{RLE_MED}{ The mean squared |
|
64 |
- median Relative Log Expression (RLE).} \item{RLE_IQR}{ The variance of the |
|
65 |
- inter-quartile range (IQR) of the RLE.} } |
|
65 |
+ silhouette width by biological condition.} \item{BATCH_SIL}{ Average |
|
66 |
+ silhouette width by batch condition.} \item{PAM_SIL}{ Maximum average |
|
67 |
+ silhouette width from PAM clustering (see stratified_pam argument).} |
|
68 |
+ \item{EXP_QC_COR}{ Coefficient of determination between expression |
|
69 |
+ pcs and quality factors (see stratified_cor argument).} \item{EXP_UV_COR}{ |
|
70 |
+ Coefficient of determination between expression pcs and negative |
|
71 |
+ control gene factors (see stratified_cor argument).} \item{EXP_WV_COR}{ |
|
72 |
+ Coefficient of determination between expression pcs and positive |
|
73 |
+ control gene factors (see stratified_cor argument).} \item{RLE_MED}{ The |
|
74 |
+ mean squared median Relative Log Expression (RLE) (see stratified_rle |
|
75 |
+ argument).} \item{RLE_IQR}{ The variance of the inter-quartile range (IQR) |
|
76 |
+ of the RLE (see stratified_rle argument).} } |
|
66 | 77 |
} |
67 | 78 |
\description{ |
68 |
-This function evaluates a (normalized) expression matrix using SCONE |
|
69 |
-criteria, producing 8 metrics based on i) Clustering, ii) Correlations and |
|
79 |
+This function evaluates a (normalized) expression matrix using SCONE |
|
80 |
+criteria, producing 8 metrics based on i) Clustering, ii) Correlations and |
|
70 | 81 |
iii) Relative Expression. |
71 | 82 |
} |
72 | 83 |
\details{ |
73 | 84 |
Users may specify their own eval_proj function that will be used to |
74 | 85 |
compute Clustering and Correlation metrics. This eval_proj() function must |
75 |
- have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ |
|
76 |
- pseudocount) expression data (genes in rows, cells in columns).} |
|
86 |
+ have 2 input arguments: \itemize{ \item{e}{ matrix. log-transformed (+ |
|
87 |
+ pseudocount) expression data (genes in rows, cells in columns).} |
|
77 | 88 |
\item{eval_proj_args}{ list. additional function arguments, e.g. prior |
78 | 89 |
data weights.}} and it must output a matrix representation of the original |
79 | 90 |
data (cells in rows, factors in columns). The value of eval_proj_args is |
... | ... |
@@ -87,7 +98,7 @@ bio = as.factor(rep(c(1,2),each = 2)) |
87 | 98 |
batch = as.factor(rep(c(1,2),2)) |
88 | 99 |
log_expr = matrix(rnorm(20),ncol = 4) |
89 | 100 |
|
90 |
-scone_metrics = score_matrix(log_expr, |
|
101 |
+scone_metrics = score_matrix(log_expr, |
|
91 | 102 |
bio = bio, batch = batch, |
92 | 103 |
eval_kclust = 2, is_log = TRUE) |
93 | 104 |
|
... | ... |
@@ -617,16 +617,16 @@ negative (lower is better) signature. |
617 | 617 |
expression PCs. |
618 | 618 |
Positive signature. |
619 | 619 |
* EXP_QC_COR: Removal of Alignment Artifacts. |
620 |
- Maximum squared Spearman correlation between first 3 expression PCs and |
|
620 |
+ R^2 measure for regression of first 3 expression PCs on |
|
621 | 621 |
first `k_qc` QPCs. |
622 | 622 |
Negative signature. |
623 | 623 |
* EXP_UV_COR: Removal of Expression Artifacts. |
624 |
- Maximum squared Spearman correlation between first 3 expression PCs and |
|
624 |
+ R^2 measure for regression of first 3 expression PCs on |
|
625 | 625 |
first 3 PCs of the negative control (specified by `eval_negcon` or |
626 | 626 |
`ruv_negcon` by default) sub-matrix of the original (raw) data. |
627 | 627 |
Negative signature. |
628 | 628 |
* EXP_WV_COR: Preservation of Biological Variance. |
629 |
- Maximum squared Spearman correlation between first 3 expression PCs and |
|
629 |
+ R^2 measure for regression of first 3 expression PCs on |
|
630 | 630 |
first 3 PCs of the positive control (specified by `eval_poscon`) |
631 | 631 |
sub-matrix of the original (raw) data. |
632 | 632 |
Positive signature. |
... | ... |
@@ -648,7 +648,7 @@ my_scone <- scone(my_scone, |
648 | 648 |
run=TRUE, |
649 | 649 |
eval_kclust = 2:6,stratified_pam = TRUE, |
650 | 650 |
return_norm = "in_memory", |
651 |
- zero = "preadjust") |
|
651 |
+ zero = "postadjust") |
|
652 | 652 |
|
653 | 653 |
``` |
654 | 654 |
|
... | ... |
@@ -665,9 +665,9 @@ In the call above, we have set the following parameter arguments: |
665 | 665 |
Store all normalized matrices in addition to evaluation data. |
666 | 666 |
Otherwise normalized data is not returned in the resulting |
667 | 667 |
object. |
668 |
-* zero = "preadjust". |
|
669 |
- Restore data entries that are originally zeroes back to zero |
|
670 |
- after the scaling step. |
|
668 |
+* zero = "postadjust". |
|
669 |
+ Restore data entries that are originally zeroes / negative after |
|
670 |
+ normalization to zero after the adjustment step. |
|
671 | 671 |
|
672 | 672 |
The output will contain various updated elements: |
673 | 673 |
|
... | ... |
@@ -695,7 +695,7 @@ be recomputed. |
695 | 695 |
# Step 3: Selecting a normalization for downstream analysis |
696 | 696 |
|
697 | 697 |
Based on our sorting criteria, it would appear that |
698 |
-`none,fq,no_uv,no_bio,no_batch` performs well compared to other normalization |
|
698 |
+`none,uq,ruv_k=1,no_bio,no_batch` performs well compared to other normalization |
|
699 | 699 |
workflows. A useful way to visualize this method with respect to others is the |
700 | 700 |
`biplot_color` function |
701 | 701 |
|