... | ... |
@@ -1,7 +1,9 @@ |
1 | 1 |
Package: scone |
2 | 2 |
Version: 0.99.2 |
3 | 3 |
Title: Single Cell Overview of Normalized Expression data |
4 |
-Description: SCONE is an R package for comparing and ranking the performance of different normalization schemes for single-cell RNA-seq and other high-throughput analyses. |
|
4 |
+Description: SCONE is an R package for comparing and ranking the performance of |
|
5 |
+ different normalization schemes for single-cell RNA-seq and other |
|
6 |
+ high-throughput analyses. |
|
5 | 7 |
Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com", |
6 | 8 |
role = c("aut", "cre", "cph")), |
7 | 9 |
person("Davide", "Risso", email = "risso.davide@gmail.com", |
... | ... |
@@ -43,7 +45,6 @@ Imports: |
43 | 45 |
ggplot2, |
44 | 46 |
plotly, |
45 | 47 |
reshape2, |
46 |
- scran, |
|
47 | 48 |
visNetwork |
48 | 49 |
Suggests: |
49 | 50 |
BiocStyle, |
... | ... |
@@ -60,6 +61,9 @@ biocViews: |
60 | 61 |
GeneExpression, |
61 | 62 |
RNASeq, |
62 | 63 |
Software, |
63 |
- Transcriptomics |
|
64 |
+ Transcriptomics, |
|
65 |
+ Sequencing, |
|
66 |
+ SingleCell, |
|
67 |
+ Coverage |
|
64 | 68 |
BugReports: https://github.com/YosefLab/scone/issues |
65 | 69 |
RoxygenNote: 5.0.1 |
... | ... |
@@ -4,7 +4,6 @@ export(DESEQ_FN) |
4 | 4 |
export(DESEQ_FN_POS) |
5 | 5 |
export(FQT_FN) |
6 | 6 |
export(FQ_FN) |
7 |
-export(SCRAN_FN) |
|
8 | 7 |
export(TMM_FN) |
9 | 8 |
export(UQ_FN) |
10 | 9 |
export(UQ_FN_POS) |
... | ... |
@@ -98,8 +97,6 @@ importFrom(rhdf5,h5ls) |
98 | 97 |
importFrom(rhdf5,h5read) |
99 | 98 |
importFrom(rhdf5,h5write) |
100 | 99 |
importFrom(rhdf5,h5write.default) |
101 |
-importFrom(scran,computeSumFactors) |
|
102 |
-importFrom(scran,quickCluster) |
|
103 | 100 |
importFrom(shiny,br) |
104 | 101 |
importFrom(shiny,brushedPoints) |
105 | 102 |
importFrom(shiny,column) |
... | ... |
@@ -1,71 +1,72 @@ |
1 | 1 |
#' Class SconeExperiment |
2 |
-#' |
|
3 |
-#' @description Objects of this class store, at minimum, a gene |
|
4 |
-#' expression matrix and a set of covariates (sample metadata) useful for |
|
5 |
-#' running \code{\link{scone}}. These include, the quality control (QC) |
|
6 |
-#' metrics, batch information, and biological classes of interest (if |
|
7 |
-#' available). |
|
8 |
-#' |
|
9 |
-#' @description The typical way of creating \code{SconeExperiment} objects is |
|
10 |
-#' via a call to the \code{\link{sconeExperiment}} function or to the |
|
11 |
-#' \code{\link{scone}} function. If the object is a result to a |
|
12 |
-#' \code{\link{scone}} call, it will contain the results, e.g., the |
|
13 |
-#' performance metrics, scores, and normalization workflow comparisons. (See |
|
2 |
+#' |
|
3 |
+#' @description Objects of this class store, at minimum, a gene expression |
|
4 |
+#' matrix and a set of covariates (sample metadata) useful for running |
|
5 |
+#' \code{\link{scone}}. These include, the quality control (QC) metrics, |
|
6 |
+#' batch information, and biological classes of interest (if available). |
|
7 |
+#' |
|
8 |
+#' @description The typical way of creating \code{SconeExperiment} objects is |
|
9 |
+#' via a call to the \code{\link{sconeExperiment}} function or to the |
|
10 |
+#' \code{\link{scone}} function. If the object is a result to a |
|
11 |
+#' \code{\link{scone}} call, it will contain the results, e.g., the |
|
12 |
+#' performance metrics, scores, and normalization workflow comparisons. (See |
|
14 | 13 |
#' Slots for a full list). |
15 |
-#' |
|
16 |
-#' @description This object extends the |
|
14 |
+#' |
|
15 |
+#' @description This object extends the |
|
17 | 16 |
#' \code{\linkS4class{SummarizedExperiment}} class. |
18 |
-#' |
|
19 |
-#' @details The QC matrix, biological class, and batch information are stored as |
|
20 |
-#' elements of the `colData` of the object. |
|
21 |
-#' @details The positive and negative control genes are stored as elements of |
|
22 |
-#' the `rowData` of the object. |
|
23 |
-#' |
|
17 |
+#' |
|
18 |
+#' @details The QC matrix, biological class, and batch information are |
|
19 |
+#' stored as elements of the `colData` of the object. |
|
20 |
+#' @details The positive and negative control genes are stored as |
|
21 |
+#' elements of the `rowData` of the object. |
|
22 |
+#' |
|
24 | 23 |
#' @import methods |
25 | 24 |
#' @import SummarizedExperiment |
26 | 25 |
#' @importClassesFrom SummarizedExperiment SummarizedExperiment |
27 |
-#' |
|
26 |
+#' |
|
28 | 27 |
#' @name SconeExperiment-class |
29 | 28 |
#' @import methods |
30 | 29 |
#' @aliases SconeExperiment |
31 |
-#' |
|
30 |
+#' |
|
32 | 31 |
#' @export |
33 |
-#' |
|
34 |
-#' @slot which_qc integer. Index of columns of `colData` that contain the QC |
|
35 |
-#' metrics. |
|
36 |
-#' @slot which_bio integer. Index of the column of `colData` that contains the |
|
37 |
-#' biological classes information (it must be a factor). |
|
38 |
-#' @slot which_batch integer. Index of the column of `colData` that contains the |
|
39 |
-#' batch information (it must be a factor). |
|
40 |
-#' @slot which_negconruv integer. Index of the column of `rowData` that contains |
|
41 |
-#' a logical vector indicating which genes to use as negative controls to |
|
42 |
-#' infer the factors of unwanted variation in RUV. |
|
43 |
-#' @slot which_negconeval integer. Index of the column of `rowData` that |
|
32 |
+#' |
|
33 |
+#' @slot which_qc integer. Index of columns of `colData` that contain the |
|
34 |
+#' QC metrics. |
|
35 |
+#' @slot which_bio integer. Index of the column of `colData` that contains |
|
36 |
+#' the biological classes information (it must be a factor). |
|
37 |
+#' @slot which_batch integer. Index of the column of `colData` |
|
38 |
+#' that contains the batch information (it must be a factor). |
|
39 |
+#' @slot which_negconruv integer. Index of the column of `rowData` that |
|
44 | 40 |
#' contains a logical vector indicating which genes to use as negative |
41 |
+#' controls to infer the factors of unwanted variation in RUV. |
|
42 |
+#' @slot which_negconeval integer. Index of the column of `rowData` that |
|
43 |
+#' contains a logical vector indicating which genes to use as negative |
|
44 |
+#' controls to evaluate the performance of the normalizations. |
|
45 |
+#' @slot which_poscon integer. Index of the column of `rowData` that |
|
46 |
+#' contains a logical vector indicating which genes to use as positive |
|
45 | 47 |
#' controls to evaluate the performance of the normalizations. |
46 |
-#' @slot which_poscon integer. Index of the column of `rowData` that contains a |
|
47 |
-#' logical vector indicating which genes to use as positive controls to |
|
48 |
-#' evaluate the performance of the normalizations. |
|
49 |
-#' @slot hdf5_pointer character. A string specifying to which file to write / |
|
50 |
-#' read the normalized data. |
|
51 |
-#' @slot imputation_fn list of functions used by scone for the imputation step. |
|
48 |
+#' @slot hdf5_pointer character. A string specifying to which |
|
49 |
+#' file to write / read the normalized data. |
|
50 |
+#' @slot imputation_fn list of functions used by scone for |
|
51 |
+#' the imputation step. |
|
52 | 52 |
#' @slot scaling_fn list of functions used by scone for the scaling step. |
53 |
-#' @slot scone_metrics matrix. Matrix containing the "raw" performance metrics. |
|
54 |
-#' See \code{\link{scone}} for a description of each metric. |
|
55 |
-#' @slot scone_scores matrix. Matrix containing the performance scores |
|
56 |
-#' (transformed metrics). See \code{\link{scone}} for a discussion on the |
|
53 |
+#' @slot scone_metrics matrix. Matrix containing the "raw" |
|
54 |
+#' performance metrics. See \code{\link{scone}} for a |
|
55 |
+#' description of each metric. |
|
56 |
+#' @slot scone_scores matrix. Matrix containing the performance scores |
|
57 |
+#' (transformed metrics). See \code{\link{scone}} for a discussion on the |
|
57 | 58 |
#' difference between scores and metrics. |
58 |
-#' @slot scone_params data.frame. A data frame containing the normalization |
|
59 |
-#' schemes applied to the data and compared. |
|
60 |
-#' @slot scone_run character. Whether \code{\link{scone}} was run and in |
|
61 |
-#' which mode ("no", "in_memory", "hdf5"). |
|
59 |
+#' @slot scone_params data.frame. A data frame containing |
|
60 |
+#' the normalization schemes applied to the data and compared. |
|
61 |
+#' @slot scone_run character. Whether \code{\link{scone}} was |
|
62 |
+#' run and in which mode ("no", "in_memory", "hdf5"). |
|
62 | 63 |
#' @slot is_log logical. Are the expression data in log scale? |
63 |
-#' @slot nested logical. Is batch nested within bio? (Automatically set by |
|
64 |
-#' \code{\link{scone}}). |
|
65 |
-#' @slot rezero logical. TRUE if \code{\link{scone}} was run with |
|
64 |
+#' @slot nested logical. Is batch nested within bio? |
|
65 |
+#' (Automatically set by \code{\link{scone}}). |
|
66 |
+#' @slot rezero logical. TRUE if \code{\link{scone}} was run with |
|
66 | 67 |
#' \code{rezero=TRUE}. |
67 | 68 |
#' @slot impute_args list. Arguments passed to all imputation functions. |
68 |
-#' |
|
69 |
+#' |
|
69 | 70 |
setClass( |
70 | 71 |
Class = "SconeExperiment", |
71 | 72 |
contains = "SummarizedExperiment", |
... | ... |
@@ -130,7 +131,8 @@ setValidity("SconeExperiment", function(object) { |
130 | 131 |
return("Only one set of negative controls for RUV can be specified.") |
131 | 132 |
} |
132 | 133 |
if(length(object@which_negconeval) > 1) { |
133 |
- return("Only one set of negative controls for evaluation can be specified.") |
|
134 |
+ return(paste0("Only one set of negative controls ", |
|
135 |
+ "for evaluation can be specified.")) |
|
134 | 136 |
} |
135 | 137 |
if(length(object@which_poscon) > 1) { |
136 | 138 |
return("Only one set of positive controls can be specified.") |
... | ... |
@@ -180,7 +182,8 @@ setValidity("SconeExperiment", function(object) { |
180 | 182 |
return(paste0("File ", object@hdf5_pointer, " not found.")) |
181 | 183 |
} |
182 | 184 |
if(object@scone_run == "no" && file.exists(object@hdf5_pointer)) { |
183 |
- return(paste0("File ", object@hdf5_pointer, " exists. Please specify a new file.")) |
|
185 |
+ return(paste0("File ", object@hdf5_pointer, |
|
186 |
+ " exists. Please specify a new file.")) |
|
184 | 187 |
} |
185 | 188 |
} |
186 | 189 |
|
... | ... |
@@ -191,17 +194,17 @@ setValidity("SconeExperiment", function(object) { |
191 | 194 |
## Constructor |
192 | 195 |
|
193 | 196 |
#' @rdname SconeExperiment-class |
194 |
-#' |
|
197 |
+#' |
|
195 | 198 |
#' @description The constructor \code{sconeExperiment} creates an object of the |
196 | 199 |
#' class \code{SconeExperiment}. |
197 |
-#' |
|
198 |
-#' @param object Either a matrix or a \code{\link{SummarizedExperiment}} |
|
200 |
+#' |
|
201 |
+#' @param object Either a matrix or a \code{\link{SummarizedExperiment}} |
|
199 | 202 |
#' containing the raw gene expression. |
200 | 203 |
#' @param ... see specific S4 methods for additional arguments. |
201 | 204 |
#' @export |
202 |
-#' |
|
205 |
+#' |
|
203 | 206 |
#' @examples |
204 |
-#' |
|
207 |
+#' |
|
205 | 208 |
#' nrows <- 200 |
206 | 209 |
#' ncols <- 6 |
207 | 210 |
#' counts <- matrix(rpois(nrows * ncols, lambda=10), nrows) |
... | ... |
@@ -209,12 +212,12 @@ setValidity("SconeExperiment", function(object) { |
209 | 212 |
#' coldata <- data.frame(bio=gl(2, 3)) |
210 | 213 |
#' se <- SummarizedExperiment(assays=SimpleList(counts=counts), |
211 | 214 |
#' rowData=rowdata, colData=coldata) |
212 |
-#' |
|
215 |
+#' |
|
213 | 216 |
#' scone1 <- sconeExperiment(assay(se), bio=coldata$bio, poscon=rowdata$poscon) |
214 |
-#' |
|
217 |
+#' |
|
215 | 218 |
#' scone2 <- sconeExperiment(se, which_bio=1L, which_poscon=1L) |
216 |
-#' |
|
217 |
-#' |
|
219 |
+#' |
|
220 |
+#' |
|
218 | 221 |
setGeneric( |
219 | 222 |
name = "sconeExperiment", |
220 | 223 |
def = function(object, ...) { |
... | ... |
@@ -223,22 +226,22 @@ setGeneric( |
223 | 226 |
) |
224 | 227 |
|
225 | 228 |
#' @rdname SconeExperiment-class |
226 |
-#' |
|
227 |
-#' @param which_qc index that specifies which columns of `colData` correspond to |
|
228 |
-#' QC measures. |
|
229 |
-#' @param which_bio index that specifies which column of `colData` corresponds |
|
230 |
-#' to `bio`. |
|
231 |
-#' @param which_batch index that specifies which column of `colData` corresponds |
|
232 |
-#' to `batch`. |
|
233 |
-#' @param which_negconruv index that specifies which column of `rowData` has |
|
234 |
-#' information on negative controls for RUV. |
|
235 |
-#' @param which_negconeval index that specifies which column of `rowData` has |
|
236 |
-#' information on negative controls for evaluation. |
|
237 |
-#' @param which_poscon index that specifies which column of `rowData` has |
|
229 |
+#' |
|
230 |
+#' @param which_qc index that specifies which columns of `colData` |
|
231 |
+#' correspond to QC measures. |
|
232 |
+#' @param which_bio index that specifies which column of `colData` |
|
233 |
+#' corresponds to `bio`. |
|
234 |
+#' @param which_batch index that specifies which column of `colData` |
|
235 |
+#' corresponds to `batch`. |
|
236 |
+#' @param which_negconruv index that specifies which column of `rowData` |
|
237 |
+#' has information on negative controls for RUV. |
|
238 |
+#' @param which_negconeval index that specifies which column of `rowData` |
|
239 |
+#' has information on negative controls for evaluation. |
|
240 |
+#' @param which_poscon index that specifies which column of `rowData` has |
|
238 | 241 |
#' information on positive controls. |
239 | 242 |
#' @param is_log are the expression data in log scale? |
240 | 243 |
#' @export |
241 |
-#' |
|
244 |
+#' |
|
242 | 245 |
setMethod( |
243 | 246 |
f = "sconeExperiment", |
244 | 247 |
signature = signature("SummarizedExperiment"), |
... | ... |
@@ -275,18 +278,21 @@ setMethod( |
275 | 278 |
|
276 | 279 |
|
277 | 280 |
#' @rdname SconeExperiment-class |
278 |
-#' |
|
281 |
+#' |
|
279 | 282 |
#' @param qc numeric matrix with the QC measures. |
280 | 283 |
#' @param bio factor with the biological class of interest. |
281 | 284 |
#' @param batch factor with the batch information. |
282 |
-#' @param negcon_ruv a logical vector indicating which genes to use as negative controls for RUV. |
|
283 |
-#' @param negcon_eval a logical vector indicating which genes to use as negative controls for evaluation. |
|
284 |
-#' @param poscon a logical vector indicating which genes to use as positive controls. |
|
285 |
-#' |
|
285 |
+#' @param negcon_ruv a logical vector indicating which genes to use as negative |
|
286 |
+#' controls for RUV. |
|
287 |
+#' @param negcon_eval a logical vector indicating which genes to use as |
|
288 |
+#' negative controls for evaluation. |
|
289 |
+#' @param poscon a logical vector indicating which genes to use as positive |
|
290 |
+#' controls. |
|
291 |
+#' |
|
286 | 292 |
#' @export |
287 | 293 |
#' |
288 | 294 |
#' @return A \code{\link{sconeExperiment}} object. |
289 |
-#' |
|
295 |
+#' |
|
290 | 296 |
setMethod( |
291 | 297 |
f = "sconeExperiment", |
292 | 298 |
signature = signature("matrix"), |
... | ... |
@@ -7,36 +7,37 @@ setGeneric( |
7 | 7 |
) |
8 | 8 |
|
9 | 9 |
#' Retrieve Normalized Matrix |
10 |
-#' |
|
11 |
-#' Given a \code{SconeExperiment} object created by a call to scone, it will |
|
10 |
+#' |
|
11 |
+#' Given a \code{SconeExperiment} object created by a call to scone, it will |
|
12 | 12 |
#' return a matrix of normalized counts (in log scale if \code{log=TRUE}). |
13 |
-#' |
|
13 |
+#' |
|
14 | 14 |
#' @details If \code{\link{scone}} was run with \code{return_norm="in_memory"}, |
15 |
-#' this function simply retrieves the normalized data from the \code{assays} |
|
15 |
+#' this function simply retrieves the normalized data from the \code{assays} |
|
16 | 16 |
#' slote of \code{object}. |
17 |
-#' |
|
17 |
+#' |
|
18 | 18 |
#' @details If \code{\link{scone}} was run with \code{return_norm="hdf5"}, this |
19 | 19 |
#' function will read the normalized matrix from the specified hdf5 file. |
20 |
-#' |
|
21 |
-#' @details If \code{\link{scone}} was run with \code{return_norm="no"}, this |
|
20 |
+#' |
|
21 |
+#' @details If \code{\link{scone}} was run with \code{return_norm="no"}, this |
|
22 | 22 |
#' function will compute the normalized matrix on the fly. |
23 |
-#' |
|
24 |
-#' @param x a \code{\link{sconeExperiment}} object containing the results of |
|
23 |
+#' |
|
24 |
+#' @param x a \code{\link{sconeExperiment}} object containing the results of |
|
25 | 25 |
#' \code{\link{scone}}. |
26 |
-#' @param method character or numeric. Either a string identifying the |
|
27 |
-#' normalization scheme to be retrieved, or a numeric index with the rank of |
|
28 |
-#' the normalization method to retrieve (according to scone ranking of |
|
26 |
+#' @param method character or numeric. Either a string identifying the |
|
27 |
+#' normalization scheme to be retrieved, or a numeric index with the rank of |
|
28 |
+#' the normalization method to retrieve (according to scone ranking of |
|
29 | 29 |
#' normalizations). |
30 | 30 |
#' @param ... additional arguments for specific methods. |
31 |
-#' |
|
31 |
+#' |
|
32 | 32 |
#' @return A matrix of normalized counts in log-scale. |
33 |
-#' |
|
33 |
+#' |
|
34 | 34 |
#' @examples |
35 | 35 |
#' mat <- matrix(rpois(500, lambda = 5), ncol=10) |
36 | 36 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
37 | 37 |
#' obj <- sconeExperiment(mat) |
38 | 38 |
#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN), |
39 |
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
39 |
+#' evaluate=TRUE, k_ruv=0, k_qc=0, |
|
40 |
+#' eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
40 | 41 |
#' norm = get_normalized(res,1) |
41 | 42 |
#' |
42 | 43 |
#' |
... | ... |
@@ -66,7 +67,8 @@ setGeneric( |
66 | 67 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
67 | 68 |
#' obj <- sconeExperiment(mat) |
68 | 69 |
#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN), |
69 |
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
70 |
+#' evaluate=TRUE, k_ruv=0, k_qc=0, |
|
71 |
+#' eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
70 | 72 |
#' null_design = get_design(res,1) |
71 | 73 |
#' |
72 | 74 |
setGeneric( |
... | ... |
@@ -77,28 +79,30 @@ setGeneric( |
77 | 79 |
) |
78 | 80 |
|
79 | 81 |
#' Get a subset of normalizations from a SconeExperiment object |
80 |
-#' |
|
82 |
+#' |
|
81 | 83 |
#' @description This method let a user extract a subset of normalizations. This |
82 |
-#' is useful when the original dataset is large and/or many normalization |
|
84 |
+#' is useful when the original dataset is large and/or many normalization |
|
83 | 85 |
#' schemes have been applied. |
84 |
-#' |
|
85 |
-#' @description In such cases, the user may want to run scone in mode |
|
86 |
-#' \code{return_norm = "no"}, explore the results, and then select the top |
|
86 |
+#' |
|
87 |
+#' @description In such cases, the user may want to run scone in mode |
|
88 |
+#' \code{return_norm = "no"}, explore the results, and then select the top |
|
87 | 89 |
#' performing methods for additional exploration. |
88 |
-#' |
|
90 |
+#' |
|
89 | 91 |
#' @param x a \code{SconeExperiment} object. |
90 |
-#' @param methods either character or numeric specifying the normalizations to select. |
|
91 |
-#' |
|
92 |
+#' @param methods either character or numeric specifying the normalizations to |
|
93 |
+#' select. |
|
94 |
+#' |
|
92 | 95 |
#' @return A \code{SconeExperiment} object with selected method data. |
93 |
-#' |
|
96 |
+#' |
|
94 | 97 |
#' @examples |
95 | 98 |
#' mat <- matrix(rpois(500, lambda = 5), ncol=10) |
96 | 99 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
97 | 100 |
#' obj <- sconeExperiment(mat) |
98 | 101 |
#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN), |
99 |
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
102 |
+#' evaluate=TRUE, k_ruv=0, k_qc=0, |
|
103 |
+#' eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
100 | 104 |
#' select_res = select_methods(res,1:2) |
101 |
-#' |
|
105 |
+#' |
|
102 | 106 |
setGeneric( |
103 | 107 |
name = "select_methods", |
104 | 108 |
def = function(x, methods) { |
... | ... |
@@ -107,8 +111,8 @@ setGeneric( |
107 | 111 |
) |
108 | 112 |
|
109 | 113 |
#' Get Negative and Positive Controls |
110 |
-#' |
|
111 |
-#' @aliases get_negconeval get_poscon get_negconruv,SconeExperiment-method |
|
114 |
+#' |
|
115 |
+#' @aliases get_negconeval get_poscon get_negconruv,SconeExperiment-method |
|
112 | 116 |
#' get_negconeval,SconeExperiment-method get_poscon,SconeExperiment-method |
113 | 117 |
setGeneric( |
114 | 118 |
name = "get_negconruv", |
... | ... |
@@ -134,7 +138,7 @@ setGeneric( |
134 | 138 |
) |
135 | 139 |
|
136 | 140 |
#' Get Quality Control Matrix |
137 |
-#' |
|
141 |
+#' |
|
138 | 142 |
#' @aliases get_qc,SconeExperiment-method |
139 | 143 |
setGeneric( |
140 | 144 |
name = "get_qc", |
... | ... |
@@ -163,8 +167,8 @@ setGeneric( |
163 | 167 |
) |
164 | 168 |
|
165 | 169 |
#' Extract scone scores |
166 |
-#' |
|
167 |
-#' @aliases get_scores get_score,SconeExperiment-method get_score_ranks |
|
170 |
+#' |
|
171 |
+#' @aliases get_scores get_score,SconeExperiment-method get_score_ranks |
|
168 | 172 |
#' get_score_ranks,SconeExperiment-method |
169 | 173 |
#' |
170 | 174 |
#' @examples |
... | ... |
@@ -172,7 +176,8 @@ setGeneric( |
172 | 176 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
173 | 177 |
#' obj <- sconeExperiment(mat) |
174 | 178 |
#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN), |
175 |
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
179 |
+#' evaluate=TRUE, k_ruv=0, k_qc=0, |
|
180 |
+#' eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
176 | 181 |
#' scores = get_scores(res) |
177 | 182 |
#' score_ranks = get_score_ranks(res) |
178 | 183 |
#' |
... | ... |
@@ -192,7 +197,7 @@ setGeneric( |
192 | 197 |
) |
193 | 198 |
|
194 | 199 |
#' Extract scone parameters |
195 |
-#' |
|
200 |
+#' |
|
196 | 201 |
#' @aliases get_params get_params,SconeExperiment-method |
197 | 202 |
#' @examples |
198 | 203 |
#' mat <- matrix(rpois(500, lambda = 5), ncol=10) |
... | ... |
@@ -1,10 +1,11 @@ |
1 | 1 |
#' Upper-quartile normalization wrapper. |
2 | 2 |
#' @importFrom EDASeq betweenLaneNormalization |
3 |
-#' @details SCONE scaling wrapper for \code{\link[EDASeq]{betweenLaneNormalization}}). |
|
3 |
+#' @details SCONE scaling wrapper for |
|
4 |
+#' \code{\link[EDASeq]{betweenLaneNormalization}}). |
|
4 | 5 |
#' @export |
5 | 6 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
6 | 7 |
#' @return Upper-quartile normalized matrix. |
7 |
-#' |
|
8 |
+#' |
|
8 | 9 |
#' @examples |
9 | 10 |
#' ei <- matrix(0:20,nrow = 7) |
10 | 11 |
#' eo <- UQ_FN(ei) |
... | ... |
@@ -15,11 +16,12 @@ UQ_FN = function(ei){ |
15 | 16 |
} |
16 | 17 |
|
17 | 18 |
#' Upper-quartile normalization derived from positive data. |
18 |
-#' @details SCONE scaling function scales expression data by upper quartile of positive data. |
|
19 |
+#' @details SCONE scaling function scales expression data by upper quartile of |
|
20 |
+#' positive data. |
|
19 | 21 |
#' @export |
20 | 22 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
21 | 23 |
#' @return Upper-quartile (positive) normalized matrix. |
22 |
-#' |
|
24 |
+#' |
|
23 | 25 |
#' @examples |
24 | 26 |
#' ei <- matrix(0:20,nrow = 7) |
25 | 27 |
#' ei[1:3,] <- 0 |
... | ... |
@@ -40,11 +42,12 @@ UQ_FN_POS = function(ei){ |
40 | 42 |
|
41 | 43 |
#' Full-quantile normalization wrapper. |
42 | 44 |
#' @importFrom aroma.light normalizeQuantileRank.matrix |
43 |
-#' @details SCONE "scaling" wrapper for \code{\link[aroma.light]{normalizeQuantileRank.matrix}}). |
|
45 |
+#' @details SCONE "scaling" wrapper for |
|
46 |
+#' \code{\link[aroma.light]{normalizeQuantileRank.matrix}}). |
|
44 | 47 |
#' @export |
45 | 48 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
46 | 49 |
#' @return Full-quantile normalized matrix. |
47 |
-#' |
|
50 |
+#' |
|
48 | 51 |
#' @examples |
49 | 52 |
#' ei <- matrix(0:20,nrow = 7) |
50 | 53 |
#' eo <- FQ_FN(ei) |
... | ... |
@@ -55,7 +58,8 @@ FQ_FN = function(ei){ |
55 | 58 |
} |
56 | 59 |
|
57 | 60 |
#' @rdname FQ_FN |
58 |
-#' @details FQT_FN handles ties carefully (see \code{\link[limma]{normalizeQuantiles}}). |
|
61 |
+#' @details FQT_FN handles ties carefully (see |
|
62 |
+#' \code{\link[limma]{normalizeQuantiles}}). |
|
59 | 63 |
#' @export |
60 | 64 |
#' |
61 | 65 |
#' @examples |
... | ... |
@@ -69,11 +73,12 @@ FQT_FN = function(ei){ |
69 | 73 |
|
70 | 74 |
#' DESeq size factor normalization wrapper. |
71 | 75 |
#' @importFrom DESeq estimateSizeFactorsForMatrix |
72 |
-#' @details SCONE scaling wrapper for \code{\link[DESeq]{estimateSizeFactorsForMatrix}}). |
|
76 |
+#' @details SCONE scaling wrapper for |
|
77 |
+#' \code{\link[DESeq]{estimateSizeFactorsForMatrix}}). |
|
73 | 78 |
#' @export |
74 | 79 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
75 | 80 |
#' @return DESeq size factor normalized matrix. |
76 |
-#' |
|
81 |
+#' |
|
77 | 82 |
#' @examples |
78 | 83 |
#' ei <- matrix(0:20,nrow = 7) |
79 | 84 |
#' eo <- DESEQ_FN(ei) |
... | ... |
@@ -85,11 +90,12 @@ DESEQ_FN = function(ei){ |
85 | 90 |
} |
86 | 91 |
|
87 | 92 |
#' DESeq size factor normalization derived from positive data. |
88 |
-#' @details SCONE scaling function scales expression data by DESeq size factor derived from positive data. |
|
93 |
+#' @details SCONE scaling function scales expression data by DESeq size factor |
|
94 |
+#' derived from positive data. |
|
89 | 95 |
#' @export |
90 | 96 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
91 | 97 |
#' @return DESeq size factor (positive) normalized matrix. |
92 |
-#' |
|
98 |
+#' |
|
93 | 99 |
#' @examples |
94 | 100 |
#' ei <- matrix(0:20,nrow = 7) |
95 | 101 |
#' ei[1:3,] <- 0 |
... | ... |
@@ -101,7 +107,10 @@ DESEQ_FN_POS = function(ei){ |
101 | 107 |
if(!is.null(dim(ei))){ |
102 | 108 |
y = ei |
103 | 109 |
y[y == 0] = NA # Matrix with zeroes replaced w/ NA |
104 |
- geom_mean = exp(apply(log(y),1,sum,na.rm = TRUE)/rowSums(!is.na(y))) # Compute Geometric Mean of Expression for Each Gene (Use positive data only) |
|
110 |
+ geom_mean = exp(apply(log(y),1,sum,na.rm = TRUE)/rowSums(!is.na(y))) |
|
111 |
+ # Compute Geometric Mean of Expression |
|
112 |
+ # for Each Gene (Use positive data only) |
|
113 |
+ |
|
105 | 114 |
}else{stop("Null imput matrix dimension.")} |
106 | 115 |
if(!any(geom_mean > 0)){stop("Geometric mean non-positive for all genes.")} |
107 | 116 |
|
... | ... |
@@ -128,7 +137,7 @@ DESEQ_FN_POS = function(ei){ |
128 | 137 |
#' @export |
129 | 138 |
#' @param ei Numerical matrix. (rows = genes, cols = samples). |
130 | 139 |
#' @return TMM normalized matrix. |
131 |
-#' |
|
140 |
+#' |
|
132 | 141 |
#' @examples |
133 | 142 |
#' ei <- matrix(0:20,nrow = 7) |
134 | 143 |
#' eo <- TMM_FN(ei) |
... | ... |
@@ -137,23 +146,4 @@ TMM_FN = function(ei){ |
137 | 146 |
size_fac = calcNormFactors(ei,method = "TMM") |
138 | 147 |
eo = t(t(ei)/size_fac) |
139 | 148 |
return(eo) |
140 |
-} |
|
141 |
- |
|
142 |
-#' Pooled Sample normalization wrapper. |
|
143 |
-#' |
|
144 |
-#' @description SCONE scaling wrapper for \code{\link[scran]{computeSumFactors}} |
|
145 |
-#' with clusters from \code{\link[scran]{quickCluster}}. |
|
146 |
-#' |
|
147 |
-#' @importFrom scran computeSumFactors |
|
148 |
-#' @importFrom scran quickCluster |
|
149 |
-#' @export |
|
150 |
-#' @param ei Numerical matrix. (rows = genes, cols = samples). |
|
151 |
-#' @return SCRAN size factor normalized matrix. |
|
152 |
-#' |
|
153 |
- |
|
154 |
-SCRAN_FN = function(ei){ |
|
155 |
- clusters <- quickCluster(ei, min.size = 20) |
|
156 |
- size_fac = computeSumFactors(ei, cluster=clusters) |
|
157 |
- eo = t(t(ei)/size_fac) |
|
158 |
- return(eo) |
|
159 |
-} |
|
149 |
+} |
|
160 | 150 |
\ No newline at end of file |
... | ... |
@@ -1,33 +1,38 @@ |
1 |
-#' Function for biplotting with no point labels and with points color-coded according |
|
2 |
-#' to a quantitative variable. For example: the rank of normalization performance. |
|
3 |
-#' |
|
1 |
+#' Function for biplotting with no point labels and with |
|
2 |
+#' points color-coded according to a quantitative variable. |
|
3 |
+#' For example: the rank of normalization performance. |
|
4 |
+#' |
|
4 | 5 |
#' This function implements biplot for \code{\link[stats]{prcomp}} objects. |
5 |
-#' |
|
6 |
+#' |
|
6 | 7 |
#' @param x \code{\link[stats]{prcomp}} object. |
7 |
-#' @param y numeric. Quantitative values used to color the points. |
|
8 |
-#' If rank is FALSE, all values must be positive integers and less than or equal to the length of y. |
|
9 |
-#' @param rank logical. If TRUE (default) y will be transformed by the rank() function |
|
8 |
+#' @param y numeric. Quantitative values used to color the points. If rank is |
|
9 |
+#' FALSE, all values must be positive integers and less than or equal to the |
|
10 |
+#' length of y. |
|
11 |
+#' @param rank logical. If TRUE (default) y will be transformed by the rank() |
|
12 |
+#' function |
|
10 | 13 |
#' @param ties_method character. ties.method used by the rank() function |
11 |
-#' @param choices numeric. 2 principal components to plot. Default to first two PCs. |
|
12 |
-#' @param expand numeric. value used to adjust the spread of the arrows relative |
|
13 |
-#' to the points. |
|
14 |
+#' @param choices numeric. 2 principal components to plot. Default to first two |
|
15 |
+#' PCs. |
|
16 |
+#' @param expand numeric. value used to adjust the spread of the arrows |
|
17 |
+#' relative to the points. |
|
14 | 18 |
#' @param ... arguments passed to plot. |
15 |
-#' |
|
19 |
+#' |
|
16 | 20 |
#' @importFrom grDevices colorRampPalette |
17 | 21 |
#' @export |
18 | 22 |
#' |
19 | 23 |
#' @return Invisibly returns scaled point coordinates used in plot. |
20 |
-#' |
|
24 |
+#' |
|
21 | 25 |
#' @examples |
22 | 26 |
#' mat <- matrix(rnorm(1000), ncol=10) |
23 | 27 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
24 |
-#' |
|
28 |
+#' |
|
25 | 29 |
#' pc <- prcomp(mat) |
26 |
-#' |
|
30 |
+#' |
|
27 | 31 |
#' biplot_color(pc, rank(pc$x[,1])) |
28 |
-#' |
|
32 |
+#' |
|
29 | 33 |
biplot_color <- function(x, y, rank = TRUE, |
30 |
- ties_method = c("max", "min", "first", "last", "random"), |
|
34 |
+ ties_method = c("max", "min", |
|
35 |
+ "first", "last", "random"), |
|
31 | 36 |
choices = 1:2, expand = 1, ...) { |
32 | 37 |
|
33 | 38 |
if(rank){ |
... | ... |
@@ -1,28 +1,32 @@ |
1 | 1 |
#' Interactive biplot |
2 |
-#' |
|
3 |
-#' This is a wrapper around \code{\link{biplot_color}}, creating a shiny |
|
4 |
-#' gadget to allow the user to select specific points in the graph. |
|
5 |
-#' |
|
6 |
-#' @details Since this is based on the shiny gadget feature, it will not work in |
|
7 |
-#' static documents, such as vignettes or markdown / knitr documents. |
|
8 |
-#' See \code{biplot_color} for more details on the internals. |
|
9 |
-#' |
|
2 |
+#' |
|
3 |
+#' This is a wrapper around \code{\link{biplot_color}}, creating a shiny gadget |
|
4 |
+#' to allow the user to select specific points in the graph. |
|
5 |
+#' |
|
6 |
+#' @details Since this is based on the shiny gadget feature, it will not work |
|
7 |
+#' in static documents, such as vignettes or markdown / knitr documents. See |
|
8 |
+#' \code{biplot_color} for more details on the internals. |
|
9 |
+#' |
|
10 | 10 |
#' @param x a \code{\link{sconeExperiment}} object. |
11 | 11 |
#' @param ... passed to \code{\link{biplot_color}}. |
12 |
-#' |
|
12 |
+#' |
|
13 | 13 |
#' @importFrom miniUI gadgetTitleBar miniContentPanel miniPage gadgetTitleBar |
14 |
-#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget verbatimTextOutput stopApp renderText |
|
15 |
-#' |
|
14 |
+#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget |
|
15 |
+#' verbatimTextOutput stopApp renderText |
|
16 |
+#' |
|
16 | 17 |
#' @export |
17 | 18 |
#' |
18 |
-#' @return A \code{\link{sconeExperiment}} object representing selected methods. |
|
19 |
-#' |
|
19 |
+#' @return A \code{\link{sconeExperiment}} object representing |
|
20 |
+#' selected methods. |
|
21 |
+#' |
|
20 | 22 |
#' @examples |
21 | 23 |
#' mat <- matrix(rpois(1000, lambda = 5), ncol=10) |
22 | 24 |
#' colnames(mat) <- paste("X", 1:ncol(mat), sep="") |
23 | 25 |
#' obj <- sconeExperiment(mat) |
24 |
-#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN, fq=FQT_FN), |
|
25 |
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam()) |
|
26 |
+#' res <- scone(obj, scaling=list(none=identity, |
|
27 |
+#' uq=UQ_FN, deseq=DESEQ_FN, fq=FQT_FN), |
|
28 |
+#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, |
|
29 |
+#' bpparam = BiocParallel::SerialParam()) |
|
26 | 30 |
#' \dontrun{ |
27 | 31 |
#' biplot_interactive(res) |
28 | 32 |
#' } |
... | ... |
@@ -69,7 +73,10 @@ biplot_interactive <- function(x, ...) { |
69 | 73 |
# Handle the Done button being pressed. |
70 | 74 |
observeEvent(input$done, { |
71 | 75 |
# Return the brushed points. See ?shiny::brushedPoints. |
72 |
- names <- rownames(brushedPoints(data_out, input$plot_brush, xvar="PC1", yvar="PC2")) |
|
76 |
+ names <- rownames(brushedPoints(data_out, |
|
77 |
+ input$plot_brush, |
|
78 |
+ xvar="PC1", |
|
79 |
+ yvar="PC2")) |
|
73 | 80 |
out <- select_methods(x, names) |
74 | 81 |
stopApp(invisible(out)) |
75 | 82 |
}) |
... | ... |
@@ -1,67 +1,66 @@ |
1 | 1 |
#' Data: Positive and Negative Control Genes |
2 |
-#' |
|
3 |
-#' Sets of "positive" and "negative" control genes, useful arguments for |
|
2 |
+#' |
|
3 |
+#' Sets of "positive" and "negative" control genes, useful arguments for |
|
4 | 4 |
#' \code{\link{scone}}. |
5 |
-#' |
|
6 |
-#' These gene sets can be used as negative or positive controls, either |
|
7 |
-#' for RUV factor normalization or for evaluation and ranking of the |
|
8 |
-#' normalization workflows. |
|
9 |
-#' |
|
10 |
-#' @details Gene set datasets are in the form of \code{data.frame}, with |
|
11 |
-#' the first column containing the gene symbols and an (optional) second |
|
12 |
-#' column containing additional information (such as cortical layer or |
|
13 |
-#' cell cycle phase). |
|
14 |
-#' |
|
15 |
-#' @details Note that the gene symbols follow the mouse conventions (i.e. |
|
16 |
-#' capitalized) or the human conventions (i.e, all upper-case), based on |
|
17 |
-#' the original publication. One can use the \code{\link[base]{toupper}}, |
|
18 |
-#' \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} |
|
19 |
-#' functions to alter symbol conventions. |
|
20 |
-#' |
|
21 |
-#' @details Mouse gene symbols in \code{cortical_markers} are transcribed |
|
22 |
-#' from Figure 3 of Molyneaux et al. (2007): "laminar-specific expression |
|
23 |
-#' of 66 genes within the neocortex." |
|
24 | 5 |
#' |
25 |
-#' @details Human gene symbols in \code{housekeeping} are derived from the |
|
26 |
-#' list of "housekeeping" (HK) genes from the cDNA microarray analysis of |
|
27 |
-#' Eisenberg and Levanon (2003): "[HK genes] belong to the class of genes |
|
28 |
-#' that are EXPRESSED in all tissues." "... from 47 different human tissues |
|
29 |
-#' and cell lines." |
|
30 |
-#' |
|
31 |
-#' @details Human gene symbols in \code{housekeeping_revised} are from |
|
32 |
-#' Eisenberg and Levanon (2013): "This list provided ... is based on |
|
33 |
-#' analysis of next-generation sequencing (RNA-seq) data. At least one |
|
34 |
-#' variant of these genes is expressed in all tissues uniformly... The |
|
35 |
-#' RefSeq transcript according to which we deemed the gene |
|
36 |
-#' 'housekeeping' is given." Housekeeping exons satisfy "(i) expression |
|
37 |
-#' observed in all tissues; (ii) low variance over tissues: |
|
38 |
-#' standard-deviation [log2(RPKM)]<1; and (iii) no exceptional expression |
|
39 |
-#' in any single tissue; that is, no log-expression value differed from |
|
40 |
-#' the averaged log2(RPKM) by two (fourfold) or more." "We define a |
|
41 |
-#' housekeeping gene as a gene for which at least one RefSeq transcript |
|
42 |
-#' has more than half of its exons meeting the previous criteria (thus |
|
43 |
-#' being housekeeping exons)." |
|
6 |
+#' These gene sets can be used as negative or positive controls, either for RUV |
|
7 |
+#' factor normalization or for evaluation and ranking of the normalization |
|
8 |
+#' workflows. |
|
9 |
+#' |
|
10 |
+#' @details Gene set datasets are in the form of \code{data.frame}, with the |
|
11 |
+#' first column containing the gene symbols and an (optional) second column |
|
12 |
+#' containing additional information (such as cortical layer or cell cycle |
|
13 |
+#' phase). |
|
14 |
+#' |
|
15 |
+#' @details Note that the gene symbols follow the mouse conventions (i.e. |
|
16 |
+#' capitalized) or the human conventions (i.e, all upper-case), based on the |
|
17 |
+#' original publication. One can use the \code{\link[base]{toupper}}, |
|
18 |
+#' \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} |
|
19 |
+#' functions to alter symbol conventions. |
|
20 |
+#' |
|
21 |
+#' @details Mouse gene symbols in \code{cortical_markers} are transcribed from |
|
22 |
+#' Figure 3 of Molyneaux et al. (2007): "laminar-specific expression of 66 |
|
23 |
+#' genes within the neocortex." |
|
24 |
+#' |
|
25 |
+#' @details Human gene symbols in \code{housekeeping} are derived from the list |
|
26 |
+#' of "housekeeping" genes from the cDNA microarray analysis of Eisenberg |
|
27 |
+#' and Levanon (2003): "[HK genes] belong to the class of genes that are |
|
28 |
+#' EXPRESSED in all tissues." "... from 47 different human tissues and cell |
|
29 |
+#' lines." |
|
44 | 30 |
#' |
45 |
-#' @details Human gene symbols in \code{cellcycle_genes} are from Macosko et al. |
|
46 |
-#' (2015) and represent a set of genes marking G1/S, S, G2/M, M, and M/G1 |
|
31 |
+#' @details Human gene symbols in \code{housekeeping_revised} from Eisenberg |
|
32 |
+#' and Levanon (2013): "This list provided ... is based on analysis of |
|
33 |
+#' next-generation sequencing (RNA-seq) data. At least one variant of these |
|
34 |
+#' genes is expressed in all tissues uniformly... The RefSeq transcript |
|
35 |
+#' according to which we deemed the gene 'housekeeping' is given." |
|
36 |
+#' Housekeeping exons satisfy "(i) expression observed in all tissues; (ii) |
|
37 |
+#' low variance over tissues: standard-deviation [log2(RPKM)]<1; and (iii) no |
|
38 |
+#' exceptional expression in any single tissue; that is, no log-expression |
|
39 |
+#' value differed from the averaged log2(RPKM) by two (fourfold) or more." |
|
40 |
+#' "We define a housekeeping gene as a gene for which at least one RefSeq |
|
41 |
+#' transcript has more than half of its exons meeting the previous criteria |
|
42 |
+#' (thus being housekeeping exons)." |
|
43 |
+#' |
|
44 |
+#' @details Human gene symbols in \code{cellcycle_genes} from Macosko et al. |
|
45 |
+#' (2015) and represent a set of genes marking G1/S, S, G2/M, M, and M/G1 |
|
47 | 46 |
#' phases. |
48 |
-#' |
|
49 |
-#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D.. |
|
50 |
-#' Neuronal subtype specification in the cerebral cortex. Nature Reviews |
|
47 |
+#' |
|
48 |
+#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D.. |
|
49 |
+#' Neuronal subtype specification in the cerebral cortex. Nature Reviews |
|
51 | 50 |
#' Neuroscience, 2007, 8(6):427-437. |
52 |
-#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact. |
|
51 |
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact. |
|
53 | 52 |
#' Trends in Genetics, 2003, 19(7):362-5. |
54 |
-#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited. |
|
53 |
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited. |
|
55 | 54 |
#' Trends in Genetics, 2013, 29(10):569-74. |
56 |
-#' @references Macosko, E. Z., et al. Highly parallel genome-wide expression |
|
57 |
-#' profiling of individual cells using nanoliter droplets. Cell, 2015, |
|
55 |
+#' @references Macosko, E. Z., et al. Highly parallel genome-wide expression |
|
56 |
+#' profiling of individual cells using nanoliter droplets. Cell, 2015, |
|
58 | 57 |
#' 161.5:1202-1214. |
59 |
-#' |
|
58 |
+#' |
|
60 | 59 |
#' @name control_genes |
61 |
-#' |
|
60 |
+#' |
|
62 | 61 |
#' @docType data |
63 | 62 |
#' @aliases cortical_markers housekeeping housekeeping_revised cellcycle_genes |
64 |
-#' |
|
63 |
+#' |
|
65 | 64 |
#' @examples |
66 | 65 |
#' data(housekeeping) |
67 | 66 |
#' data(housekeeping_revised) |
... | ... |
@@ -1,11 +1,13 @@ |
1 |
-#' @describeIn get_design If \code{method} is a character, it will return the |
|
2 |
-#' design matrix corresponding to the normalization scheme specified by the |
|
3 |
-#' character string. The string must be one of the \code{row.names} of the |
|
4 |
-#' slot \code{scone_params}. |
|
5 |
-#' |
|
1 |
+#' @describeIn get_design |
|
2 |
+#' If |
|
3 |
+#' \code{method} is a character, it will return the design |
|
4 |
+#' matrix corresponding to the normalization scheme specified |
|
5 |
+#' by the character string. The string must be one of the |
|
6 |
+#' \code{row.names} of the slot \code{scone_params}. |
|
7 |
+#' |
|
6 | 8 |
#' @export |
7 |
-#' |
|
8 |
-#' |
|
9 |
+#' |
|
10 |
+#' |
|
9 | 11 |
setMethod( |
10 | 12 |
f = "get_design", |
11 | 13 |
signature = signature(x = "SconeExperiment", method = "character"), |
... | ... |
@@ -45,26 +47,32 @@ setMethod( |
45 | 47 |
|
46 | 48 |
} |
47 | 49 |
|
48 |
- parsed <- .parse_row(params, get_bio(x), get_batch(x), ruv_factors, qc_factors) |
|
50 |
+ parsed <- .parse_row(params, get_bio(x), get_batch(x), |
|
51 |
+ ruv_factors, qc_factors) |
|
49 | 52 |
design_mat <- make_design(parsed$bio, parsed$batch, parsed$W, |
50 |
- nested=(x@nested & !is.null(parsed$bio) & !is.null(parsed$batch))) |
|
53 |
+ nested=(x@nested & |
|
54 |
+ !is.null(parsed$bio) & |
|
55 |
+ !is.null(parsed$batch))) |
|
51 | 56 |
|
52 | 57 |
return(design_mat) |
53 | 58 |
} |
54 | 59 |
) |
55 | 60 |
|
56 |
-#' @describeIn get_design If \code{method} is a numeric, it will return the |
|
57 |
-#' design matrix according to the scone ranking. |
|
58 |
-#' |
|
59 |
-#' @details The numeric method will always return the design matrix |
|
60 |
-#' corresponding to row \code{method} of the \code{scone_params} slot. This |
|
61 |
-#' means that if \code{\link{scone}} was run with \code{eval=TRUE}, |
|
62 |
-#' \code{get_design(x, 1)} will return the top ranked method. If |
|
63 |
-#' \code{\link{scone}} was run with \code{eval=FALSE}, \code{get_design(x, 1)} |
|
64 |
-#' will return the first normalization in the order saved by scone. |
|
65 |
-#' |
|
61 |
+#' @describeIn get_design |
|
62 |
+#' If |
|
63 |
+#' \code{method} is a numeric, it will return the design matrix |
|
64 |
+#' according to the scone ranking. |
|
65 |
+#' |
|
66 |
+#' @details The numeric method will always return the design matrix |
|
67 |
+#' corresponding to row \code{method} of the \code{scone_params} |
|
68 |
+#' slot. This means that if \code{\link{scone}} was run with |
|
69 |
+#' \code{eval=TRUE}, \code{get_design(x, 1)} will return the top |
|
70 |
+#' ranked method. If \code{\link{scone}} was run with |
|
71 |
+#' \code{eval=FALSE}, \code{get_design(x, 1)} will return the first |
|
72 |
+#' normalization in the order saved by scone. |
|
73 |
+#' |
|
66 | 74 |
#' @export |
67 |
-#' |
|
75 |
+#' |
|
68 | 76 |
setMethod( |
69 | 77 |
f = "get_design", |
70 | 78 |
signature = signature(x = "SconeExperiment", method = "numeric"), |
... | ... |
@@ -1,13 +1,15 @@ |
1 |
-#' @describeIn get_normalized If \code{method} is a character, it will return |
|
2 |
-#' the normalized matrix corresponding to the normalization scheme specified |
|
3 |
-#' by the character string. The string must be one of the \code{row.names} of |
|
4 |
-#' the slot \code{scone_params}. |
|
5 |
-#' |
|
1 |
+#' @describeIn get_normalized |
|
2 |
+#' If |
|
3 |
+#' \code{method} is a character, it will return the normalized |
|
4 |
+#' matrix corresponding to the normalization scheme specified |
|
5 |
+#' by the character string.The string must be one of the |
|
6 |
+#' \code{row.names} of the slot \code{scone_params}. |
|
7 |
+#' |
|
6 | 8 |
#' @importFrom rhdf5 h5ls h5read |
7 | 9 |
#' @export |
8 |
-#' |
|
10 |
+#' |
|
9 | 11 |
#' @param log logical. Should the data be returned in log-scale |
10 |
-#' |
|
12 |
+#' |
|
11 | 13 |
setMethod( |
12 | 14 |
f = "get_normalized", |
13 | 15 |
signature = signature(x = "SconeExperiment", method = "character"), |
... | ... |
@@ -61,9 +63,12 @@ setMethod( |
61 | 63 |
|
62 | 64 |
} |
63 | 65 |
|
64 |
- parsed <- .parse_row(params, get_bio(x), get_batch(x), ruv_factors, qc_factors) |
|
66 |
+ parsed <- .parse_row(params, get_bio(x), |
|
67 |
+ get_batch(x), ruv_factors, qc_factors) |
|
65 | 68 |
design_mat <- make_design(parsed$bio, parsed$batch, parsed$W, |
66 |
- nested=(x@nested & !is.null(parsed$bio) & !is.null(parsed$batch))) |
|
69 |
+ nested=(x@nested & |
|
70 |
+ !is.null(parsed$bio) & |
|
71 |
+ !is.null(parsed$batch))) |
|
67 | 72 |
adjusted <- lm_adjust(log1p(scaled), design_mat, get_batch(x)) |
68 | 73 |
retval <- expm1(adjusted) |
69 | 74 |
} |
... | ... |
@@ -77,18 +82,21 @@ setMethod( |
77 | 82 |
) |
78 | 83 |
|
79 | 84 |
|
80 |
-#' @describeIn get_normalized If \code{method} is a numeric, it will return the |
|
81 |
-#' normalized matrix according to the scone ranking. |
|
82 |
-#' |
|
83 |
-#' @details The numeric method will always return the normalization |
|
85 |
+#' @describeIn get_normalized |
|
86 |
+#' If |
|
87 |
+#' \code{method} is a numeric, it will return the normalized |
|
88 |
+#' matrix according to the scone ranking. |
|
89 |
+#' |
|
90 |
+#' @details The numeric method will always return the normalization |
|
84 | 91 |
#' corresponding to row \code{method} of the \code{scone_params} slot. This |
85 |
-#' means that if \code{\link{scone}} was run with \code{eval=TRUE}, |
|
86 |
-#' \code{get_normalized(x, 1)} will return the top ranked method. If |
|
87 |
-#' \code{\link{scone}} was run with \code{eval=FALSE}, \code{get_normalized(x, |
|
88 |
-#' 1)} will return the first normalization in the order saved by scone. |
|
89 |
-#' |
|
92 |
+#' means that if \code{\link{scone}} was run with \code{eval=TRUE}, |
|
93 |
+#' \code{get_normalized(x, 1)} will return the top ranked method. If |
|
94 |
+#' \code{\link{scone}} was run with \code{eval=FALSE}, |
|
95 |
+#' \code{get_normalized(x,1)} will return the first normalization |
|
96 |
+#' in the order saved by scone. |
|
97 |
+#' |
|
90 | 98 |
#' @export |
91 |
-#' |
|
99 |
+#' |
|
92 | 100 |
setMethod( |
93 | 101 |
f = "get_normalized", |
94 | 102 |
signature = signature(x = "SconeExperiment", method = "numeric"), |
... | ... |
@@ -1,11 +1,11 @@ |
1 | 1 |
#' @rdname get_params |
2 |
-#' |
|
2 |
+#' |
|
3 | 3 |
#' @param x an object of class \code{\link{sconeExperiment}}. |
4 |
-#' |
|
4 |
+#' |
|
5 | 5 |
#' @return A data.frame containing workflow parameters for each scone workflow. |
6 |
-#' |
|
6 |
+#' |
|
7 | 7 |
#' @export |
8 |
-#' |
|
8 |
+#' |
|
9 | 9 |
setMethod( |
10 | 10 |
f = "get_params", |
11 | 11 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -14,13 +14,14 @@ setMethod( |
14 | 14 |
}) |
15 | 15 |
|
16 | 16 |
#' @rdname get_scores |
17 |
-#' |
|
17 |
+#' |
|
18 | 18 |
#' @param x an object of class \code{\link{sconeExperiment}}. |
19 |
-#' |
|
20 |
-#' @return \code{get_scores} returns a matrix with all (non-missing) scone scores, ordered by average score rank. |
|
21 |
-#' |
|
19 |
+#' |
|
20 |
+#' @return \code{get_scores} returns a matrix with all (non-missing) scone |
|
21 |
+#' scores, ordered by average score rank. |
|
22 |
+#' |
|
22 | 23 |
#' @export |
23 |
-#' |
|
24 |
+#' |
|
24 | 25 |
setMethod( |
25 | 26 |
f = "get_scores", |
26 | 27 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -30,11 +31,11 @@ setMethod( |
30 | 31 |
}) |
31 | 32 |
|
32 | 33 |
#' @rdname get_scores |
33 |
-#' |
|
34 |
+#' |
|
34 | 35 |
#' @return \code{get_score_ranks} returns a vector of average score ranks. |
35 |
-#' |
|
36 |
+#' |
|
36 | 37 |
#' @export |
37 |
-#' |
|
38 |
+#' |
|
38 | 39 |
setMethod( |
39 | 40 |
f = "get_score_ranks", |
40 | 41 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -44,13 +45,13 @@ setMethod( |
44 | 45 |
|
45 | 46 |
|
46 | 47 |
#' @rdname get_negconruv |
47 |
-#' |
|
48 |
+#' |
|
48 | 49 |
#' @param x an object of class \code{\link{sconeExperiment}}. |
49 |
-#' |
|
50 |
+#' |
|
50 | 51 |
#' @return NULL or a logical vector. |
51 |
-#' |
|
52 |
-#' @return For \code{get_negconruv} the returned vector indicates which genes are negative controls |
|
53 |
-#' to be used for RUV. |
|
52 |
+#' |
|
53 |
+#' @return For \code{get_negconruv} the returned vector indicates which genes |
|
54 |
+#' are negative controls to be used for RUV. |
|
54 | 55 |
setMethod( |
55 | 56 |
f = "get_negconruv", |
56 | 57 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -64,9 +65,9 @@ setMethod( |
64 | 65 |
) |
65 | 66 |
|
66 | 67 |
#' @rdname get_negconruv |
67 |
-#' |
|
68 |
-#' @return For \code{get_negconeval} the returned vector indicates which genes are negative controls |
|
69 |
-#' to be used for evaluation. |
|
68 |
+#' |
|
69 |
+#' @return For \code{get_negconeval} the returned vector indicates which genes |
|
70 |
+#' are negative controls to be used for evaluation. |
|
70 | 71 |
setMethod( |
71 | 72 |
f = "get_negconeval", |
72 | 73 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -80,9 +81,9 @@ setMethod( |
80 | 81 |
) |
81 | 82 |
|
82 | 83 |
#' @rdname get_negconruv |
83 |
-#' |
|
84 |
-#' @return For \code{get_poscon} the returned vector indicates which genes are positive controls |
|
85 |
-#' to be used for evaluation. |
|
84 |
+#' |
|
85 |
+#' @return For \code{get_poscon} the returned vector indicates which genes are |
|
86 |
+#' positive controls to be used for evaluation. |
|
86 | 87 |
setMethod( |
87 | 88 |
f = "get_poscon", |
88 | 89 |
signature = signature(x = "SconeExperiment"), |
... | ... |
@@ -96,9 +97,9 @@ setMethod( |
96 | 97 |
) |
97 | 98 |
|
98 | 99 |
#' @rdname get_qc |
99 |
-#' |
|
100 |
+#' |
|
100 | 101 |
#' @param x an object of class \code{\link{sconeExperiment}}. |
101 |
-#' |
|
102 |
+#' |
|
102 | 103 |
#' @return NULL or the quality control (QC) metric matrix. |
103 | 104 |
setMethod( |
104 | 105 |
f = "get_qc", |
... | ... |
@@ -114,9 +115,9 @@ setMethod( |
114 | 115 |
) |
115 | 116 |
|
116 | 117 |
#' @rdname get_bio |
117 |
-#' |
|
118 |
+#' |
|
118 | 119 |
#' @param x an object of class \code{\link{sconeExperiment}}. |
119 |
-#' |
|
120 |
+#' |
|
120 | 121 |
#' @return NULL or a factor containing bio or batch covariate. |
121 | 122 |
setMethod( |
122 | 123 |
f = "get_bio", |
... | ... |
@@ -144,19 +145,22 @@ setMethod( |
144 | 145 |
) |
145 | 146 |
|
146 | 147 |
#' Parse rows |
147 |
-#' |
|
148 |
-#' This function is used internally in scone to parse the variables used to generate the design matrices. |
|
149 |
-#' |
|
150 |
-#' @param pars character. A vector of parameters corresponding to a row of workflow parameters. |
|
148 |
+#' |
|
149 |
+#' This function is used internally in scone to parse the variables used to |
|
150 |
+#' generate the design matrices. |
|
151 |
+#' |
|
152 |
+#' @param pars character. A vector of parameters corresponding to a row of |
|
153 |
+#' workflow parameters. |
|
151 | 154 |
#' @param bio factor. The biological covariate. |
152 | 155 |
#' @param batch factor. The batch covariate. |
153 |
-#' @param ruv_factors list. A list containing the factors of unwanted variation (RUVg) for all upstream workflows. |
|
156 |
+#' @param ruv_factors list. A list containing the factors of unwanted variation |
|
157 |
+#' (RUVg) for all upstream workflows. |
|
154 | 158 |
#' @param qc matrix. The principal components of the QC metric matrix. |
155 |
-#' |
|
159 |
+#' |
|
156 | 160 |
#' @return A list with the variables to be passed to make_design. |
157 |
-#' |
|
161 |
+#' |
|
158 | 162 |
#' @keywords internal |
159 |
-#' |
|
163 |
+#' |
|
160 | 164 |
.parse_row <- function(pars, bio, batch, ruv_factors, qc) { |
161 | 165 |
|
162 | 166 |
# Define upstream workflow: imputation x scaling |
... | ... |
@@ -185,22 +189,33 @@ setMethod( |
185 | 189 |
} |
186 | 190 |
|
187 | 191 |
#' Make a Design Matrix |
188 |
-#' |
|
189 |
-#' This function builds a design matrix for the Adjustment Normalization Step, in which |
|
190 |
-#' covariates are two (possibly nested) categorical factors and one or more continuous variables. |
|
191 |
-#' |
|
192 |
-#' @details If nested=TRUE a nested design is used, i.e. the batch variable is assumed to be nested within |
|
193 |
-#' the bio variable. Here, nested means that each batch is composed of samples from only *one* level of bio, |
|
194 |
-#' while each level of bio may contain multiple batches. |
|
195 |
-#' |
|
192 |
+#' |
|
193 |
+#' This function builds a design matrix for the Adjustment Normalization Step, |
|
194 |
+#' in which covariates are two (possibly nested) categorical factors and one or |
|
195 |
+#' more continuous variables. |
|
196 |
+#' |
|
197 |
+#' @details If nested=TRUE a nested design is used, i.e. the batch variable is |
|
198 |
+#' assumed to be nested within the bio variable. Here, nested means that each |
|
199 |
+#' batch is composed of samples from only *one* level of bio, while each |
|
200 |
+#' level of bio may contain multiple batches. |
|
201 |
+#' |
|
196 | 202 |
#' @export |
197 |
-#' |
|
203 |
+#' |
|
198 | 204 |
#' @param bio factor. The biological covariate. |
199 | 205 |
#' @param batch factor. The batch covariate. |
200 |
-#' @param W numeric. Either a vector or matrix containing one or more continuous covariates (e.g. RUVg factors). |
|
201 |
-#' @param nested logical. Whether or not to consider a nested design (see details). |
|
202 |
-#' |
|
206 |
+#' @param W numeric. Either a vector or matrix containing one or more |
|
207 |
+#' continuous covariates (e.g. RUVg factors). |
|
208 |
+#' @param nested logical. Whether or not to consider a nested design |
|
209 |
+#' (see details). |
|
210 |
+#' |
|
203 | 211 |
#' @return The design matrix. |
212 |
+#' |
|
213 |
+#' @examples |
|
214 |
+#' |
|
215 |
+#' bio = as.factor(rep(c(1,2),each = 2)) |
|
216 |
+#' batch = as.factor(rep(c(1,2),2)) |
|
217 |
+#' design_mat = make_design(bio,batch, W = NULL) |
|
218 |
+#' |
|
204 | 219 |
make_design <- function(bio, batch, W, nested=FALSE) { |
205 | 220 |
|
206 | 221 |
if(nested & (is.null(bio) | is.null(batch))) { |
... | ... |
@@ -251,29 +266,48 @@ make_design <- function(bio, batch, W, nested=FALSE) { |
251 | 266 |
} |
252 | 267 |
} |
253 | 268 |
|
254 |
- return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat))) |
|
269 |
+ return(model.matrix(as.formula(f), |
|
270 |
+ contrasts=list(bio=contr.sum, batch=mat))) |
|
255 | 271 |
} else { |
256 | 272 |
return(model.matrix(as.formula(f))) |
257 | 273 |
} |
258 | 274 |
} |
259 | 275 |
|
260 | 276 |
#' Linear Adjustment Normalization |
261 |
-#' |
|
262 |
-#' Given a matrix with log expression values and a design matrix, this function fits a linear model |
|
263 |
-#' and removes the effects of the batch factor as well as of the linear variables encoded in W. |
|
264 |
-#' |
|
265 |
-#' @details The function assumes that the columns of the design matrix corresponding to the variable |
|
266 |
-#' for which expression needs to be adjusted, start with either the word "batch" or the letter "W" (case sensitive). |
|
267 |
-#' Any other covariate (including the intercept) is kept. |
|
268 |
-#' |
|
277 |
+#' |
|
278 |
+#' Given a matrix with log expression values and a design matrix, this function |
|
279 |
+#' fits a linear model and removes the effects of the batch factor |
|
280 |
+#' as well as of the linear variables encoded in W. |
|
281 |
+#' |
|
282 |
+#' @details The function assumes that the columns of the design matrix |
|
283 |
+#' corresponding to the variable for which expression needs to be adjusted, |
|
284 |
+#' start with either the word "batch" or the letter "W" (case sensitive). Any |
|
285 |
+#' other covariate (including the intercept) is kept. |
|
286 |
+#' |
|
269 | 287 |
#' @importFrom limma lmFit |
270 | 288 |
#' @export |
271 |
-#' |
|
272 |
-#' @param log_expr matrix. The log gene expression (genes in row, samples in columns). |
|
273 |
-#' @param design_mat matrix. The design matrix (usually the result of make_design). |
|
274 |
-#' @param batch factor. A factor with the batch information, identifying batch effect to be removed. |
|
289 |
+#' |
|
290 |
+#' @param log_expr matrix. The log gene expression (genes in row, samples in |
|
291 |
+#' columns). |
|
292 |
+#' @param design_mat matrix. The design matrix (usually the result of |
|
293 |
+#' make_design). |
|
294 |
+#' @param batch factor. A factor with the batch information, identifying batch |
|
295 |
+#' effect to be removed. |
|
275 | 296 |
#' @param weights matrix. A matrix of weights. |
276 | 297 |
#' @return The corrected log gene expression. |
298 |
+#' |
|
299 |
+#' @examples |
|
300 |
+#' |
|
301 |
+#' set.seed(141) |
|
302 |
+#' bio = as.factor(rep(c(1,2),each = 2)) |
|
303 |
+#' batch = as.factor(rep(c(1,2),2)) |
|
304 |
+#' design_mat = make_design(bio,batch, W = NULL) |
|
305 |
+#' |
|
306 |
+#' log_expr = matrix(rnorm(20),ncol = 4) |
|
307 |
+#' adjusted_log_expr = lm_adjust(log_expr = log_expr, |
|
308 |
+#' design_mat = design_mat, |
|
309 |
+#' batch = batch) |
|
310 |
+#' |
|
277 | 311 |
lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) { |
278 | 312 |
lm_object <- lmFit(log_expr, design = design_mat, weights = weights) |
279 | 313 |
|
... | ... |
@@ -1,20 +1,20 @@ |
1 |
-#' Fit Logistic Regression Model of FNR against set of positive control |
|
1 |
+#' Fit Logistic Regression Model of FNR against set of positive control |
|
2 | 2 |
#' (ubiquitously expressed) genes |
3 |
-#' |
|
4 |
-#' @details logit(Probability of False Negative) ~ a + b*(median log-expression) |
|
5 | 3 |
#' |
6 |
-#' @param expr matrix The data matrix in transcript-proportional units (genes in |
|
7 |
-#' rows, cells in columns). |
|
8 |
-#' @param pos_controls A logical, numeric, or character vector indicating |
|
9 |
-#' positive control genes that will be used to compute false-negative rate |
|
4 |
+#' @details logit(Probability of False Negative) ~ a + b*(median log-expr) |
|
5 |
+#' |
|
6 |
+#' @param expr matrix A matrix of transcript-proportional units (genes in rows, |
|
7 |
+#' cells in columns). |
|
8 |
+#' @param pos_controls A logical, numeric, or character vector indicating |
|
9 |
+#' positive control genes that will be used to compute false-negative rate |
|
10 | 10 |
#' characteristics. User must provide at least 2 positive control genes. |
11 |
-#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01. |
|
11 |
+#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01. |
|
12 | 12 |
#' fn_tresh must be non-negative. |
13 |
-#' |
|
14 |
-#' @return A matrix of logistic regression coefficients corresponding to glm |
|
15 |
-#' fits in each sample (a and b in columns 1 and 2 respectively). If the a & b |
|
16 |
-#' fit does not converge, b is set to zero and only a is estimated. |
|
17 |
-#' |
|
13 |
+#' |
|
14 |
+#' @return A matrix of logistic regression coefficients corresponding to glm |
|
15 |
+#' fits in each sample (a and b in columns 1 and 2 respectively). If the |
|
16 |
+#' a & b fit does not converge, b is set to zero and only a is estimated. |
|
17 |
+#' |
|
18 | 18 |
#' @importFrom boot logit |
19 | 19 |
#' @importFrom matrixStats rowMedians |
20 | 20 |
#' @export |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
#' mat <- matrix(rpois(1000, lambda = 3), ncol=10) |
24 | 24 |
#' mat = mat * matrix(1-rbinom(1000, size = 1, prob = .01), ncol=10) |
25 | 25 |
#' fnr_out = simple_FNR_params(mat,pos_controls = 1:10) |
26 |
-#' |
|
26 |
+#' |
|
27 | 27 |
simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){ |
28 | 28 |
|
29 | 29 |
stopifnot(!any(is.na(pos_controls))) |
... | ... |
@@ -53,13 +53,16 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){ |
53 | 53 |
# Median log-expression in positive observations |
54 | 54 |
mu_obs = log(rowMedians(pos_expr,na.rm = TRUE)) |
55 | 55 |
if(any(is.na(mu_obs))){ |
56 |
- stop("Median log-expression in positive observations NA for some positive control gene/s") |
|
56 |
+ stop(paste0("Median log-expression in positive ", |
|
57 |
+ "observations NA for some positive control gene/s")) |
|
57 | 58 |
} |
58 | 59 |
|
59 | 60 |
# Logistic Regression Model of FNR |
60 | 61 |
logistic_coef = matrix(0,ncol(drop_outs),2) |
61 | 62 |
for (si in seq_len(ncol(drop_outs))){ |
62 |
- fit = suppressWarnings(glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit))) |
|
63 |
+ fit = suppressWarnings(glm(cbind(drop_outs[,si], |
|
64 |
+ 1 - drop_outs[,si]) ~ mu_obs, |
|
65 |
+ family=binomial(logit))) |
|
63 | 66 |
if(fit$converged){ |
64 | 67 |
logistic_coef[si,] = fit$coefficients |
65 | 68 |
} else { |
... | ... |
@@ -69,72 +72,74 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){ |
69 | 72 |
return(logistic_coef) |
70 | 73 |
} |
71 | 74 |
|
72 |
-#'metric-based sample filtering: function to filter single-cell RNA-Seq |
|
75 |
+#'metric-based sample filtering: function to filter single-cell RNA-Seq |
|
73 | 76 |
#'libraries. |
74 | 77 |
#' |
75 |
-#'This function returns a sample-filtering report for each cell in the input |
|
78 |
+#'This function returns a sample-filtering report for each cell in the input |
|
76 | 79 |
#'expression matrix, describing which filtering criteria are satisfied. |
77 | 80 |
#' |
78 |
-#'@details For each primary criterion (metric), a sample is evaluated based on 4 |
|
79 |
-#' sub-criteria: 1) Hard (encoded) threshold 2) Adaptive thresholding via sd's |
|
80 |
-#' from the mean 3) Adaptive thresholding via mad's from the median 4) Adaptive |
|
81 |
-#' thresholding via sd's from the mean (after mixture modeling) A sample must |
|
82 |
-#' pass all sub-criteria to pass the primary criterion. |
|
83 |
-#' |
|
81 |
+#'@details For each primary criterion (metric), a sample is evaluated |
|
82 |
+#' based on 4 sub-criteria: 1) Hard (encoded) threshold 2) Adaptive |
|
83 |
+#' thresholding via sd's from the mean 3) Adaptive thresholding via |
|
84 |
+#' mad's from the median 4) Adaptive thresholding via sd's from the |
|
85 |
+#' mean (after mixture modeling) A sample must pass all sub-criteria |
|
86 |
+#' to pass the primary criterion. |
|
87 |
+#' |
|
84 | 88 |
#'@param expr matrix The data matrix (genes in rows, cells in columns). |
85 |
-#'@param nreads A numeric vector representing number of reads in each library. |
|
86 |
-#' Default to `colSums` of `expr`. |
|
87 |
-#'@param ralign A numeric vector representing the proportion of reads aligned to |
|
88 |
-#' the reference genome in each library. If NULL, filtered_ralign will be |
|
89 |
-#' returned NA. |
|
90 |
-#'@param gene_filter A logical vector indexing genes that will be used to |
|
91 |
-#' compute library transcriptome breadth. If NULL, filtered_breadth will be |
|
89 |
+#'@param nreads A numeric vector representing number of reads in each |
|
90 |
+#' library. Default to `colSums` of `expr`. |
|
91 |
+#'@param ralign A numeric vector representing the proportion of reads |
|
92 |
+#' aligned to the reference genome in each library. If NULL, filtered_ralign |
|
93 |
+#' will be returned NA. |
|
94 |
+#'@param gene_filter A logical vector indexing genes that will be used to |
|
95 |
+#' compute library transcriptome breadth. If NULL, filtered_breadth will be |
|
92 | 96 |
#' returned NA. |
93 |
-#'@param pos_controls A logical, numeric, or character vector indicating |
|
94 |
-#' positive control genes that will be used to compute false-negative rate |
|
97 |
+#'@param pos_controls A logical, numeric, or character vector indicating |
|
98 |
+#' positive control genes that will be used to compute false-negative rate |
|
95 | 99 |
#' characteristics. If NULL, filtered_fnr will be returned NA. |
96 |
-#'@param scale. logical. Will expression be scaled by total expression for FNR |
|
100 |
+#'@param scale. logical. Will expression be scaled by total expression for FNR |
|
97 | 101 |
#' computation? Default = FALSE |
98 |
-#'@param glen Gene lengths for gene-length normalization (normalized data used |
|
102 |
+#'@param glen Gene lengths for gene-length normalization (normalized data used |
|
99 | 103 |
#' in FNR computation). |
100 |
-#'@param AUC_range An array of two values, representing range over which FNR AUC |
|
101 |
-#' will be computed (log(expr_units)). Default c(0,15) |
|
102 |
-#'@param zcut A numeric value determining threshold Z-score for sd, mad, and |
|
104 |
+#'@param AUC_range An array of two values, representing range over which FNR |
|
105 |
+#' AUC will be computed (log(expr_units)). Default c(0,15) |
|
106 |
+#'@param zcut A numeric value determining threshold Z-score for sd, mad, and |
|
103 | 107 |
#' mixture sub-criteria. Default 1. If NULL, only hard threshold sub-criteria |
104 | 108 |
#' will be applied. |
105 |
-#'@param mixture A logical value determining whether mixture modeling |
|
106 |
-#' sub-criterion will be applied per primary criterion (metric). If true, a dip |
|
107 |
-#' test will be applied to each metric. If a metric is multimodal, it is fit to |
|
108 |
-#' a two-component normal mixture model. Samples deviating zcut sd's from |
|
109 |
-#' optimal mean (in the inferior direction), have failed this sub-criterion. |
|
110 |
-#'@param dip_thresh A numeric value determining dip test p-value threshold. |
|
109 |
+#'@param mixture A logical value determining whether mixture modeling |
|
110 |
+#' sub-criterion will be applied per primary criterion (metric). If true, |
|
111 |
+#' a dip test will be applied to each metric. If a metric is multimodal, |
|
112 |
+#' it is fit to a two-component normal mixture model. Samples deviating zcut |
|
113 |
+#' sd's from optimal mean (in the inferior direction), have failed this |
|
114 |
+#' sub-criterion. |
|
115 |
+#'@param dip_thresh A numeric value determining dip test p-value threshold. |
|
111 | 116 |
#' Default 0.05. |
112 |
-#'@param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default |
|
117 |
+#'@param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default |
|
113 | 118 |
#' 25000. |
114 |
-#'@param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default |
|
119 |
+#'@param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default |
|
115 | 120 |
#' 15. |
116 |
-#'@param hard_breadth numeric. Hard (lower bound on) breadth threshold. Default |
|
117 |
-#' 0.2. |
|
121 |
+#'@param hard_breadth numeric. Hard (lower bound on) breadth threshold. |
|
122 |
+#' Default 0.2. |
|
118 | 123 |
#'@param hard_auc numeric. Hard (upper bound on) fnr auc threshold. Default 10. |
119 |
-#'@param suff_nreads numeric. If not null, serves as an overriding upper bound |
|
124 |
+#'@param suff_nreads numeric. If not null, serves as an overriding upper bound |
|
120 | 125 |
#' on nreads threshold. |
121 |
-#'@param suff_ralign numeric. If not null, serves as an overriding upper bound |
|
126 |
+#'@param suff_ralign numeric. If not null, serves as an overriding upper bound |
|
122 | 127 |
#' on ralign threshold. |
123 |
-#'@param suff_breadth numeric. If not null, serves as an overriding upper bound |
|
124 |
-#' on breadth threshold. |
|
125 |
-#'@param suff_auc numeric. If not null, serves as an overriding lower bound on |
|
128 |
+#'@param suff_breadth numeric. If not null, serves as an overriding upper |
|
129 |
+#' bound on breadth threshold. |
|
130 |
+#'@param suff_auc numeric. If not null, serves as an overriding lower bound on |
|
126 | 131 |
#' fnr auc threshold. |
127 | 132 |
#'@param plot logical. Should a plot be produced? |
128 | 133 |
#'@param hist_breaks hist() breaks argument. Ignored if `plot=FALSE`. |
129 | 134 |
#'@param ... Arguments to be passed to methods. |
130 |
-#' |
|
131 |
-#' |
|
135 |
+#' |
|
136 |
+#' |
|
132 | 137 |
#'@return A list with the following elements: \itemize{ \item{filtered_nreads}{ |
133 | 138 |
#' Logical. Sample has too few reads.} \item{filtered_ralign}{ Logical. Sample |
134 | 139 |
#' has too few reads aligned.} \item{filtered_breadth}{ Logical. Samples has |
135 | 140 |
#' too few genes detected (low breadth).} \item{filtered_fnr}{ Logical. Sample |
136 | 141 |
#' has a high FNR AUC.} } |
137 |
-#' |
|
142 |
+#' |
|
138 | 143 |
#'@importFrom mixtools normalmixEM |
139 | 144 |
#'@importFrom diptest dip.test |
140 | 145 |
#'@importFrom boot inv.logit |
... | ... |
@@ -146,14 +151,18 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){ |
146 | 151 |
#' qc = as.matrix(cbind(colSums(mat),colSums(mat > 0))) |
147 | 152 |
#' rownames(qc) = colnames(mat) |
148 | 153 |
#' colnames(qc) = c("NCOUNTS","NGENES") |
149 |
-#' mfilt = metric_sample_filter(expr = mat,nreads = qc[,"NCOUNTS"], plot = TRUE, hard_nreads = 0) |
|
154 |
+#' mfilt = metric_sample_filter(expr = mat,nreads = qc[,"NCOUNTS"], |
|