Browse code

Reformat lines and fixed bugs in easy wrapper

mbcole authored on 22/11/2016 22:48:59
Showing 46 changed files

... ...
@@ -1,7 +1,9 @@
1 1
 Package: scone
2 2
 Version: 0.99.2
3 3
 Title: Single Cell Overview of Normalized Expression data
4
-Description: SCONE is an R package for comparing and ranking the performance of different normalization schemes for single-cell RNA-seq and other high-throughput analyses.
4
+Description: SCONE is an R package for comparing and ranking the performance of
5
+	different normalization schemes for single-cell RNA-seq and other 
6
+	high-throughput analyses.
5 7
 Authors@R: c(person("Michael", "Cole", email = "mbeloc@gmail.com",
6 8
 	          role = c("aut", "cre", "cph")),
7 9
 	     person("Davide", "Risso", email = "risso.davide@gmail.com",
... ...
@@ -43,7 +45,6 @@ Imports:
43 45
   ggplot2,
44 46
   plotly,
45 47
   reshape2,
46
-  scran,
47 48
   visNetwork
48 49
 Suggests:
49 50
   BiocStyle,
... ...
@@ -60,6 +61,9 @@ biocViews:
60 61
   GeneExpression,
61 62
   RNASeq,
62 63
   Software,
63
-  Transcriptomics
64
+  Transcriptomics,
65
+  Sequencing,
66
+  SingleCell,
67
+  Coverage
64 68
 BugReports: https://github.com/YosefLab/scone/issues
65 69
 RoxygenNote: 5.0.1
... ...
@@ -4,7 +4,6 @@ export(DESEQ_FN)
4 4
 export(DESEQ_FN_POS)
5 5
 export(FQT_FN)
6 6
 export(FQ_FN)
7
-export(SCRAN_FN)
8 7
 export(TMM_FN)
9 8
 export(UQ_FN)
10 9
 export(UQ_FN_POS)
... ...
@@ -98,8 +97,6 @@ importFrom(rhdf5,h5ls)
98 97
 importFrom(rhdf5,h5read)
99 98
 importFrom(rhdf5,h5write)
100 99
 importFrom(rhdf5,h5write.default)
101
-importFrom(scran,computeSumFactors)
102
-importFrom(scran,quickCluster)
103 100
 importFrom(shiny,br)
104 101
 importFrom(shiny,brushedPoints)
105 102
 importFrom(shiny,column)
... ...
@@ -1,71 +1,72 @@
1 1
 #' Class SconeExperiment
2
-#'
3
-#' @description Objects of this class store, at minimum, a gene
4
-#'   expression matrix and a set of covariates (sample metadata) useful for
5
-#'   running \code{\link{scone}}. These include, the quality control (QC)
6
-#'   metrics, batch information, and biological classes of interest (if
7
-#'   available).
8
-#'
9
-#' @description The typical way of creating \code{SconeExperiment} objects is
10
-#'   via a call to the \code{\link{sconeExperiment}} function or to the
11
-#'   \code{\link{scone}} function. If the object is a result to a
12
-#'   \code{\link{scone}} call, it will contain the results, e.g., the
13
-#'   performance metrics, scores, and normalization workflow comparisons. (See
2
+#' 
3
+#' @description Objects of this class store, at minimum, a gene expression 
4
+#'   matrix and a set of covariates (sample metadata) useful for running 
5
+#'   \code{\link{scone}}. These include, the quality control (QC) metrics,
6
+#'   batch information, and biological classes of interest (if available).
7
+#'   
8
+#' @description The typical way of creating \code{SconeExperiment} objects is 
9
+#'   via a call to the \code{\link{sconeExperiment}} function or to the 
10
+#'   \code{\link{scone}} function. If the object is a result to a 
11
+#'   \code{\link{scone}} call, it will contain the results, e.g., the 
12
+#'   performance metrics, scores, and normalization workflow comparisons. (See 
14 13
 #'   Slots for a full list).
15
-#'
16
-#' @description This object extends the
14
+#'   
15
+#' @description This object extends the 
17 16
 #'   \code{\linkS4class{SummarizedExperiment}} class.
18
-#'
19
-#' @details The QC matrix, biological class, and batch information are stored as
20
-#'   elements of the `colData` of the object.
21
-#' @details The positive and negative control genes are stored as elements of
22
-#'   the `rowData` of the object.
23
-#'
17
+#'   
18
+#' @details The QC matrix, biological class, and batch information are 
19
+#'   stored as elements of the `colData` of the object.
20
+#' @details The positive and negative control genes are stored as 
21
+#'   elements of the `rowData` of the object.
22
+#'   
24 23
 #' @import methods
25 24
 #' @import SummarizedExperiment
26 25
 #' @importClassesFrom SummarizedExperiment SummarizedExperiment
27
-#'
26
+#'   
28 27
 #' @name SconeExperiment-class
29 28
 #' @import methods
30 29
 #' @aliases SconeExperiment
31
-#'
30
+#'   
32 31
 #' @export
33
-#'
34
-#' @slot which_qc integer. Index of columns of `colData` that contain the QC
35
-#'   metrics.
36
-#' @slot which_bio integer. Index of the column of `colData` that contains the
37
-#'   biological classes information (it must be a factor).
38
-#' @slot which_batch integer. Index of the column of `colData` that contains the
39
-#'   batch information (it must be a factor).
40
-#' @slot which_negconruv integer. Index of the column of `rowData` that contains
41
-#'   a logical vector indicating which genes to use as negative controls to
42
-#'   infer the factors of unwanted variation in RUV.
43
-#' @slot which_negconeval integer. Index of the column of `rowData` that
32
+#' 
33
+#' @slot which_qc integer. Index of columns of `colData` that contain the
34
+#'   QC metrics.
35
+#' @slot which_bio integer. Index of the column of `colData` that contains
36
+#'   the biological classes information (it must be a factor).
37
+#' @slot which_batch integer. Index of the column of `colData`
38
+#'   that contains the batch information (it must be a factor).
39
+#' @slot which_negconruv integer. Index of the column of `rowData` that
44 40
 #'   contains a logical vector indicating which genes to use as negative
41
+#'   controls to infer the factors of unwanted variation in RUV.
42
+#' @slot which_negconeval integer. Index of the column of `rowData` that 
43
+#'   contains a logical vector indicating which genes to use as negative 
44
+#'   controls to evaluate the performance of the normalizations.
45
+#' @slot which_poscon integer. Index of the column of `rowData` that
46
+#'   contains a logical vector indicating which genes to use as positive
45 47
 #'   controls to evaluate the performance of the normalizations.
46
-#' @slot which_poscon integer. Index of the column of `rowData` that contains a
47
-#'   logical vector indicating which genes to use as positive controls to
48
-#'   evaluate the performance of the normalizations.
49
-#' @slot hdf5_pointer character. A string specifying to which file to write /
50
-#'   read the normalized data.
51
-#' @slot imputation_fn list of functions used by scone for the imputation step.
48
+#' @slot hdf5_pointer character. A string specifying to which 
49
+#'   file to write / read the normalized data.
50
+#' @slot imputation_fn list of functions used by scone for 
51
+#'   the imputation step.
52 52
 #' @slot scaling_fn list of functions used by scone for the scaling step.
53
-#' @slot scone_metrics matrix. Matrix containing the "raw" performance metrics.
54
-#'   See \code{\link{scone}} for a description of each metric.
55
-#' @slot scone_scores matrix. Matrix containing the performance scores
56
-#'   (transformed metrics). See \code{\link{scone}} for a discussion on the
53
+#' @slot scone_metrics matrix. Matrix containing the "raw" 
54
+#'   performance metrics. See \code{\link{scone}} for a 
55
+#'   description of each metric.
56
+#' @slot scone_scores matrix. Matrix containing the performance scores 
57
+#'   (transformed metrics). See \code{\link{scone}} for a discussion on the 
57 58
 #'   difference between scores and metrics.
58
-#' @slot scone_params data.frame. A data frame containing the normalization
59
-#'   schemes applied to the data and compared.
60
-#' @slot scone_run character. Whether \code{\link{scone}} was run and in
61
-#'   which mode ("no", "in_memory", "hdf5").
59
+#' @slot scone_params data.frame. A data frame containing
60
+#'   the normalization schemes applied to the data and compared.
61
+#' @slot scone_run character. Whether \code{\link{scone}} was 
62
+#'   run and in which mode ("no", "in_memory", "hdf5").
62 63
 #' @slot is_log logical. Are the expression data in log scale?
63
-#' @slot nested logical. Is batch nested within bio? (Automatically set by
64
-#'   \code{\link{scone}}).
65
-#' @slot rezero logical. TRUE if \code{\link{scone}} was run with
64
+#' @slot nested logical. Is batch nested within bio? 
65
+#'   (Automatically set by \code{\link{scone}}).
66
+#' @slot rezero logical. TRUE if \code{\link{scone}} was run with 
66 67
 #'   \code{rezero=TRUE}.
67 68
 #' @slot impute_args list. Arguments passed to all imputation functions.
68
-#'
69
+#'   
69 70
 setClass(
70 71
   Class = "SconeExperiment",
71 72
   contains = "SummarizedExperiment",
... ...
@@ -130,7 +131,8 @@ setValidity("SconeExperiment", function(object) {
130 131
     return("Only one set of negative controls for RUV can be specified.")
131 132
   }
132 133
   if(length(object@which_negconeval) > 1) {
133
-    return("Only one set of negative controls for evaluation can be specified.")
134
+    return(paste0("Only one set of negative controls ",
135
+                  "for evaluation can be specified."))
134 136
   }
135 137
   if(length(object@which_poscon) > 1) {
136 138
     return("Only one set of positive controls can be specified.")
... ...
@@ -180,7 +182,8 @@ setValidity("SconeExperiment", function(object) {
180 182
       return(paste0("File ", object@hdf5_pointer, " not found."))
181 183
     }
182 184
     if(object@scone_run == "no" && file.exists(object@hdf5_pointer)) {
183
-      return(paste0("File ", object@hdf5_pointer, " exists. Please specify a new file."))
185
+      return(paste0("File ", object@hdf5_pointer,
186
+                    " exists. Please specify a new file."))
184 187
     }
185 188
   }
186 189
 
... ...
@@ -191,17 +194,17 @@ setValidity("SconeExperiment", function(object) {
191 194
 ## Constructor
192 195
 
193 196
 #' @rdname SconeExperiment-class
194
-#'
197
+#'   
195 198
 #' @description The constructor \code{sconeExperiment} creates an object of the
196 199
 #'   class \code{SconeExperiment}.
197
-#'
198
-#' @param object Either a matrix or a \code{\link{SummarizedExperiment}}
200
+#'   
201
+#' @param object Either a matrix or a \code{\link{SummarizedExperiment}} 
199 202
 #'   containing the raw gene expression.
200 203
 #' @param ... see specific S4 methods for additional arguments.
201 204
 #' @export
202
-#'
205
+#' 
203 206
 #' @examples
204
-#'
207
+#' 
205 208
 #' nrows <- 200
206 209
 #' ncols <- 6
207 210
 #' counts <- matrix(rpois(nrows * ncols, lambda=10), nrows)
... ...
@@ -209,12 +212,12 @@ setValidity("SconeExperiment", function(object) {
209 212
 #' coldata <- data.frame(bio=gl(2, 3))
210 213
 #' se <- SummarizedExperiment(assays=SimpleList(counts=counts),
211 214
 #'                           rowData=rowdata, colData=coldata)
212
-#'
215
+#' 
213 216
 #' scone1 <- sconeExperiment(assay(se), bio=coldata$bio, poscon=rowdata$poscon)
214
-#'
217
+#' 
215 218
 #' scone2 <- sconeExperiment(se, which_bio=1L, which_poscon=1L)
216
-#'
217
-#'
219
+#' 
220
+#' 
218 221
 setGeneric(
219 222
   name = "sconeExperiment",
220 223
   def = function(object, ...) {
... ...
@@ -223,22 +226,22 @@ setGeneric(
223 226
 )
224 227
 
225 228
 #' @rdname SconeExperiment-class
226
-#'
227
-#' @param which_qc index that specifies which columns of `colData` correspond to
228
-#'   QC measures.
229
-#' @param which_bio index that specifies which column of `colData` corresponds
230
-#'   to `bio`.
231
-#' @param which_batch index that specifies which column of `colData` corresponds
232
-#'   to `batch`.
233
-#' @param which_negconruv index that specifies which column of `rowData` has
234
-#'   information on negative controls for RUV.
235
-#' @param which_negconeval index that specifies which column of `rowData` has
236
-#'   information on negative controls for evaluation.
237
-#' @param which_poscon index that specifies which column of `rowData` has
229
+#'   
230
+#' @param which_qc index that specifies which columns of `colData` 
231
+#'   correspond to QC measures.
232
+#' @param which_bio index that specifies which column of `colData`
233
+#'   corresponds to `bio`.
234
+#' @param which_batch index that specifies which column of `colData`
235
+#'   corresponds to `batch`.
236
+#' @param which_negconruv index that specifies which column of `rowData`
237
+#'   has information on negative controls for RUV.
238
+#' @param which_negconeval index that specifies which column of `rowData`
239
+#'   has information on negative controls for evaluation.
240
+#' @param which_poscon index that specifies which column of `rowData` has 
238 241
 #'   information on positive controls.
239 242
 #' @param is_log are the expression data in log scale?
240 243
 #' @export
241
-#'
244
+#' 
242 245
 setMethod(
243 246
   f = "sconeExperiment",
244 247
   signature = signature("SummarizedExperiment"),
... ...
@@ -275,18 +278,21 @@ setMethod(
275 278
 
276 279
 
277 280
 #' @rdname SconeExperiment-class
278
-#'
281
+#'   
279 282
 #' @param qc numeric matrix with the QC measures.
280 283
 #' @param bio factor with the biological class of interest.
281 284
 #' @param batch factor with the batch information.
282
-#' @param negcon_ruv a logical vector indicating which genes to use as negative controls for RUV.
283
-#' @param negcon_eval a logical vector indicating which genes to use as negative controls for evaluation.
284
-#' @param poscon a logical vector indicating which genes to use as positive controls.
285
-#'
285
+#' @param negcon_ruv a logical vector indicating which genes to use as negative
286
+#'   controls for RUV.
287
+#' @param negcon_eval a logical vector indicating which genes to use as 
288
+#'   negative controls for evaluation.
289
+#' @param poscon a logical vector indicating which genes to use as positive
290
+#'   controls.
291
+#'   
286 292
 #' @export
287 293
 #' 
288 294
 #' @return A \code{\link{sconeExperiment}} object.
289
-#'
295
+#'   
290 296
 setMethod(
291 297
   f = "sconeExperiment",
292 298
   signature = signature("matrix"),
... ...
@@ -7,36 +7,37 @@ setGeneric(
7 7
 )
8 8
 
9 9
 #' Retrieve Normalized Matrix
10
-#'
11
-#' Given a \code{SconeExperiment} object created by a call to scone, it will
10
+#' 
11
+#' Given a \code{SconeExperiment} object created by a call to scone, it will 
12 12
 #' return a matrix of normalized counts (in log scale if \code{log=TRUE}).
13
-#'
13
+#' 
14 14
 #' @details If \code{\link{scone}} was run with \code{return_norm="in_memory"},
15
-#'   this function simply retrieves the normalized data from the \code{assays}
15
+#'   this function simply retrieves the normalized data from the \code{assays} 
16 16
 #'   slote of \code{object}.
17
-#'
17
+#'   
18 18
 #' @details If \code{\link{scone}} was run with \code{return_norm="hdf5"}, this
19 19
 #'   function will read the normalized matrix from the specified hdf5 file.
20
-#'
21
-#' @details If \code{\link{scone}} was run with \code{return_norm="no"}, this
20
+#'   
21
+#' @details If \code{\link{scone}} was run with \code{return_norm="no"}, this 
22 22
 #'   function will compute the normalized matrix on the fly.
23
-#'
24
-#' @param x a \code{\link{sconeExperiment}} object containing the results of
23
+#'   
24
+#' @param x a \code{\link{sconeExperiment}} object containing the results of 
25 25
 #'   \code{\link{scone}}.
26
-#' @param method character or numeric. Either a string identifying the
27
-#'   normalization scheme to be retrieved, or a numeric index with the rank of
28
-#'   the normalization method to retrieve (according to scone ranking of
26
+#' @param method character or numeric. Either a string identifying the 
27
+#'   normalization scheme to be retrieved, or a numeric index with the rank of 
28
+#'   the normalization method to retrieve (according to scone ranking of 
29 29
 #'   normalizations).
30 30
 #' @param ... additional arguments for specific methods.
31
-#'
31
+#'   
32 32
 #' @return A matrix of normalized counts in log-scale.
33
-#' 
33
+#'   
34 34
 #' @examples
35 35
 #' mat <- matrix(rpois(500, lambda = 5), ncol=10)
36 36
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
37 37
 #' obj <- sconeExperiment(mat)
38 38
 #' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN),
39
-#'            evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam())
39
+#'            evaluate=TRUE, k_ruv=0, k_qc=0, 
40
+#'            eval_kclust=2, bpparam = BiocParallel::SerialParam())
40 41
 #' norm = get_normalized(res,1)
41 42
 #'            
42 43
 #' 
... ...
@@ -66,7 +67,8 @@ setGeneric(
66 67
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
67 68
 #' obj <- sconeExperiment(mat)
68 69
 #' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN),
69
-#'            evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam())
70
+#'            evaluate=TRUE, k_ruv=0, k_qc=0, 
71
+#'            eval_kclust=2, bpparam = BiocParallel::SerialParam())
70 72
 #' null_design = get_design(res,1)
71 73
 #' 
72 74
 setGeneric(
... ...
@@ -77,28 +79,30 @@ setGeneric(
77 79
 )
78 80
 
79 81
 #' Get a subset of normalizations from a SconeExperiment object
80
-#'
82
+#' 
81 83
 #' @description This method let a user extract a subset of normalizations. This
82
-#'   is useful when the original dataset is large and/or many normalization
84
+#'   is useful when the original dataset is large and/or many normalization 
83 85
 #'   schemes have been applied.
84
-#'
85
-#' @description In such cases, the user may want to run scone in mode
86
-#'   \code{return_norm = "no"}, explore the results, and then select the top
86
+#'   
87
+#' @description In such cases, the user may want to run scone in mode 
88
+#'   \code{return_norm = "no"}, explore the results, and then select the top 
87 89
 #'   performing methods for additional exploration.
88
-#'
90
+#'   
89 91
 #' @param x a \code{SconeExperiment} object.
90
-#' @param methods either character or numeric specifying the normalizations to select.
91
-#' 
92
+#' @param methods either character or numeric specifying the normalizations to
93
+#'   select.
94
+#'   
92 95
 #' @return A \code{SconeExperiment} object with selected method data.
93
-#' 
96
+#'   
94 97
 #' @examples
95 98
 #' mat <- matrix(rpois(500, lambda = 5), ncol=10)
96 99
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
97 100
 #' obj <- sconeExperiment(mat)
98 101
 #' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN),
99
-#'            evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam())
102
+#'            evaluate=TRUE, k_ruv=0, k_qc=0, 
103
+#'            eval_kclust=2, bpparam = BiocParallel::SerialParam())
100 104
 #' select_res = select_methods(res,1:2)
101
-#'
105
+#' 
102 106
 setGeneric(
103 107
   name = "select_methods",
104 108
   def = function(x, methods) {
... ...
@@ -107,8 +111,8 @@ setGeneric(
107 111
 )
108 112
 
109 113
 #' Get Negative and Positive Controls
110
-#'
111
-#' @aliases get_negconeval get_poscon get_negconruv,SconeExperiment-method
114
+#' 
115
+#' @aliases get_negconeval get_poscon get_negconruv,SconeExperiment-method 
112 116
 #'   get_negconeval,SconeExperiment-method get_poscon,SconeExperiment-method
113 117
 setGeneric(
114 118
   name = "get_negconruv",
... ...
@@ -134,7 +138,7 @@ setGeneric(
134 138
 )
135 139
 
136 140
 #' Get Quality Control Matrix
137
-#'
141
+#' 
138 142
 #' @aliases get_qc,SconeExperiment-method
139 143
 setGeneric(
140 144
   name = "get_qc",
... ...
@@ -163,8 +167,8 @@ setGeneric(
163 167
 )
164 168
 
165 169
 #' Extract scone scores
166
-#'
167
-#' @aliases get_scores get_score,SconeExperiment-method get_score_ranks
170
+#' 
171
+#' @aliases get_scores get_score,SconeExperiment-method get_score_ranks 
168 172
 #'   get_score_ranks,SconeExperiment-method
169 173
 #'   
170 174
 #' @examples
... ...
@@ -172,7 +176,8 @@ setGeneric(
172 176
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
173 177
 #' obj <- sconeExperiment(mat)
174 178
 #' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN),
175
-#'            evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam())
179
+#'            evaluate=TRUE, k_ruv=0, k_qc=0, 
180
+#'            eval_kclust=2, bpparam = BiocParallel::SerialParam())
176 181
 #' scores = get_scores(res)
177 182
 #' score_ranks = get_score_ranks(res)
178 183
 #' 
... ...
@@ -192,7 +197,7 @@ setGeneric(
192 197
 )
193 198
 
194 199
 #' Extract scone parameters
195
-#'
200
+#' 
196 201
 #' @aliases get_params get_params,SconeExperiment-method
197 202
 #' @examples
198 203
 #' mat <- matrix(rpois(500, lambda = 5), ncol=10)
... ...
@@ -1,10 +1,11 @@
1 1
 #' Upper-quartile normalization wrapper.
2 2
 #' @importFrom EDASeq betweenLaneNormalization
3
-#' @details SCONE scaling wrapper for \code{\link[EDASeq]{betweenLaneNormalization}}).
3
+#' @details SCONE scaling wrapper for
4
+#'   \code{\link[EDASeq]{betweenLaneNormalization}}).
4 5
 #' @export
5 6
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
6 7
 #' @return Upper-quartile normalized matrix.
7
-#' 
8
+#'   
8 9
 #' @examples
9 10
 #' ei <- matrix(0:20,nrow = 7)
10 11
 #' eo <- UQ_FN(ei)
... ...
@@ -15,11 +16,12 @@ UQ_FN = function(ei){
15 16
 }
16 17
 
17 18
 #' Upper-quartile normalization derived from positive data.
18
-#' @details SCONE scaling function scales expression data by upper quartile of positive data.
19
+#' @details SCONE scaling function scales expression data by upper quartile of
20
+#'   positive data.
19 21
 #' @export
20 22
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
21 23
 #' @return Upper-quartile (positive) normalized matrix.
22
-#' 
24
+#'   
23 25
 #' @examples
24 26
 #' ei <- matrix(0:20,nrow = 7)
25 27
 #' ei[1:3,] <- 0
... ...
@@ -40,11 +42,12 @@ UQ_FN_POS = function(ei){
40 42
 
41 43
 #' Full-quantile normalization wrapper.
42 44
 #' @importFrom aroma.light normalizeQuantileRank.matrix
43
-#' @details SCONE "scaling" wrapper for \code{\link[aroma.light]{normalizeQuantileRank.matrix}}).
45
+#' @details SCONE "scaling" wrapper for
46
+#'   \code{\link[aroma.light]{normalizeQuantileRank.matrix}}).
44 47
 #' @export
45 48
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
46 49
 #' @return Full-quantile normalized matrix.
47
-#' 
50
+#'   
48 51
 #' @examples
49 52
 #' ei <- matrix(0:20,nrow = 7)
50 53
 #' eo <- FQ_FN(ei)
... ...
@@ -55,7 +58,8 @@ FQ_FN = function(ei){
55 58
 }
56 59
 
57 60
 #' @rdname FQ_FN
58
-#' @details FQT_FN handles ties carefully (see \code{\link[limma]{normalizeQuantiles}}).
61
+#' @details FQT_FN handles ties carefully (see
62
+#'   \code{\link[limma]{normalizeQuantiles}}).
59 63
 #' @export
60 64
 #' 
61 65
 #' @examples
... ...
@@ -69,11 +73,12 @@ FQT_FN = function(ei){
69 73
 
70 74
 #' DESeq size factor normalization wrapper.
71 75
 #' @importFrom DESeq estimateSizeFactorsForMatrix
72
-#' @details SCONE scaling wrapper for \code{\link[DESeq]{estimateSizeFactorsForMatrix}}).
76
+#' @details SCONE scaling wrapper for
77
+#'   \code{\link[DESeq]{estimateSizeFactorsForMatrix}}).
73 78
 #' @export
74 79
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
75 80
 #' @return DESeq size factor normalized matrix.
76
-#' 
81
+#'   
77 82
 #' @examples
78 83
 #' ei <- matrix(0:20,nrow = 7)
79 84
 #' eo <- DESEQ_FN(ei)
... ...
@@ -85,11 +90,12 @@ DESEQ_FN = function(ei){
85 90
 }
86 91
 
87 92
 #' DESeq size factor normalization derived from positive data.
88
-#' @details SCONE scaling function scales expression data by DESeq size factor derived from positive data.
93
+#' @details SCONE scaling function scales expression data by DESeq size factor
94
+#'   derived from positive data.
89 95
 #' @export
90 96
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
91 97
 #' @return DESeq size factor (positive) normalized matrix.
92
-#' 
98
+#'   
93 99
 #' @examples
94 100
 #' ei <- matrix(0:20,nrow = 7)
95 101
 #' ei[1:3,] <- 0
... ...
@@ -101,7 +107,10 @@ DESEQ_FN_POS = function(ei){
101 107
   if(!is.null(dim(ei))){
102 108
     y = ei
103 109
     y[y == 0] = NA # Matrix with zeroes replaced w/ NA
104
-    geom_mean = exp(apply(log(y),1,sum,na.rm = TRUE)/rowSums(!is.na(y))) # Compute Geometric Mean of Expression for Each Gene (Use positive data only)
110
+    geom_mean = exp(apply(log(y),1,sum,na.rm = TRUE)/rowSums(!is.na(y)))
111
+    # Compute Geometric Mean of Expression
112
+    # for Each Gene (Use positive data only)
113
+    
105 114
   }else{stop("Null imput matrix dimension.")}
106 115
   if(!any(geom_mean > 0)){stop("Geometric mean non-positive for all genes.")}
107 116
 
... ...
@@ -128,7 +137,7 @@ DESEQ_FN_POS = function(ei){
128 137
 #' @export
129 138
 #' @param ei Numerical matrix. (rows = genes, cols = samples).
130 139
 #' @return TMM normalized matrix.
131
-#' 
140
+#'   
132 141
 #' @examples
133 142
 #' ei <- matrix(0:20,nrow = 7)
134 143
 #' eo <- TMM_FN(ei)
... ...
@@ -137,23 +146,4 @@ TMM_FN = function(ei){
137 146
   size_fac = calcNormFactors(ei,method = "TMM")
138 147
   eo = t(t(ei)/size_fac)
139 148
   return(eo)
140
-}
141
-
142
-#' Pooled Sample normalization wrapper.
143
-#'
144
-#' @description SCONE scaling wrapper for \code{\link[scran]{computeSumFactors}}
145
-#'   with clusters from \code{\link[scran]{quickCluster}}.
146
-#'
147
-#' @importFrom scran computeSumFactors
148
-#' @importFrom scran quickCluster
149
-#' @export
150
-#' @param ei Numerical matrix. (rows = genes, cols = samples).
151
-#' @return SCRAN size factor normalized matrix.
152
-#' 
153
-
154
-SCRAN_FN = function(ei){
155
-  clusters <- quickCluster(ei, min.size = 20)
156
-  size_fac = computeSumFactors(ei, cluster=clusters)
157
-  eo = t(t(ei)/size_fac)
158
-  return(eo)
159
-}
149
+}
160 150
\ No newline at end of file
... ...
@@ -1,33 +1,38 @@
1
-#' Function for biplotting with no point labels and with points color-coded according 
2
-#' to a quantitative variable. For example: the rank of normalization performance.
3
-#'
1
+#' Function for biplotting with no point labels and with
2
+#' points color-coded according to a quantitative variable.
3
+#' For example: the rank of normalization performance.
4
+#' 
4 5
 #' This function implements biplot for \code{\link[stats]{prcomp}} objects.
5
-#'
6
+#' 
6 7
 #' @param x \code{\link[stats]{prcomp}} object.
7
-#' @param y numeric. Quantitative values used to color the points. 
8
-#' If rank is FALSE, all values must be positive integers and less than or equal to the length of y.
9
-#' @param rank logical. If TRUE (default) y will be transformed by the rank() function
8
+#' @param y numeric. Quantitative values used to color the points. If rank is 
9
+#'   FALSE, all values must be positive integers and less than or equal to the 
10
+#'   length of y.
11
+#' @param rank logical. If TRUE (default) y will be transformed by the rank() 
12
+#'   function
10 13
 #' @param ties_method character. ties.method used by the rank() function
11
-#' @param choices numeric. 2 principal components to plot. Default to first two PCs.
12
-#' @param expand numeric. value used to adjust the spread of the arrows relative
13
-#'   to the points.
14
+#' @param choices numeric. 2 principal components to plot. Default to first two
15
+#'   PCs.
16
+#' @param expand numeric. value used to adjust the spread of the arrows
17
+#'   relative to the points.
14 18
 #' @param ... arguments passed to plot.
15
-#'
19
+#'   
16 20
 #' @importFrom grDevices colorRampPalette
17 21
 #' @export
18 22
 #' 
19 23
 #' @return Invisibly returns scaled point coordinates used in plot.
20
-#'
24
+#'   
21 25
 #' @examples
22 26
 #' mat <- matrix(rnorm(1000), ncol=10)
23 27
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
24
-#'
28
+#' 
25 29
 #' pc <- prcomp(mat)
26
-#'
30
+#' 
27 31
 #' biplot_color(pc, rank(pc$x[,1]))
28
-#'
32
+#' 
29 33
 biplot_color <- function(x, y, rank = TRUE, 
30
-                         ties_method = c("max", "min", "first", "last", "random"),  
34
+                         ties_method = c("max", "min", 
35
+                                         "first", "last", "random"),
31 36
                          choices = 1:2, expand = 1, ...) {
32 37
 
33 38
   if(rank){
... ...
@@ -1,28 +1,32 @@
1 1
 #' Interactive biplot
2
-#'
3
-#' This is a wrapper around \code{\link{biplot_color}}, creating a shiny
4
-#' gadget to allow the user to select specific points in the graph.
5
-#'
6
-#' @details Since this is based on the shiny gadget feature, it will not work in
7
-#'   static documents, such as vignettes or markdown / knitr documents.
8
-#'   See \code{biplot_color} for more details on the internals.
9
-#'
2
+#' 
3
+#' This is a wrapper around \code{\link{biplot_color}}, creating a shiny gadget
4
+#' to allow the user to select specific points in the graph.
5
+#' 
6
+#' @details Since this is based on the shiny gadget feature, it will not work
7
+#'   in static documents, such as vignettes or markdown / knitr documents. See
8
+#'   \code{biplot_color} for more details on the internals.
9
+#'   
10 10
 #' @param x a \code{\link{sconeExperiment}} object.
11 11
 #' @param ... passed to \code{\link{biplot_color}}.
12
-#'
12
+#'   
13 13
 #' @importFrom miniUI gadgetTitleBar miniContentPanel miniPage gadgetTitleBar
14
-#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget verbatimTextOutput stopApp renderText
15
-#'
14
+#' @importFrom shiny plotOutput renderPlot observeEvent brushedPoints runGadget
15
+#'   verbatimTextOutput stopApp renderText
16
+#'   
16 17
 #' @export
17 18
 #' 
18
-#' @return A \code{\link{sconeExperiment}} object representing selected methods.
19
-#'
19
+#' @return A \code{\link{sconeExperiment}} object representing 
20
+#'   selected methods.
21
+#'   
20 22
 #' @examples
21 23
 #' mat <- matrix(rpois(1000, lambda = 5), ncol=10)
22 24
 #' colnames(mat) <- paste("X", 1:ncol(mat), sep="")
23 25
 #' obj <- sconeExperiment(mat)
24
-#' res <- scone(obj, scaling=list(none=identity, uq=UQ_FN, deseq=DESEQ_FN,  fq=FQT_FN),
25
-#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2, bpparam = BiocParallel::SerialParam())
26
+#' res <- scone(obj, scaling=list(none=identity,
27
+#'    uq=UQ_FN, deseq=DESEQ_FN,  fq=FQT_FN),
28
+#' evaluate=TRUE, k_ruv=0, k_qc=0, eval_kclust=2,
29
+#'    bpparam = BiocParallel::SerialParam())
26 30
 #' \dontrun{
27 31
 #' biplot_interactive(res)
28 32
 #' }
... ...
@@ -69,7 +73,10 @@ biplot_interactive <- function(x, ...) {
69 73
     # Handle the Done button being pressed.
70 74
     observeEvent(input$done, {
71 75
       # Return the brushed points. See ?shiny::brushedPoints.
72
-      names <- rownames(brushedPoints(data_out, input$plot_brush, xvar="PC1", yvar="PC2"))
76
+      names <- rownames(brushedPoints(data_out, 
77
+                                      input$plot_brush, 
78
+                                      xvar="PC1",
79
+                                      yvar="PC2"))
73 80
       out <- select_methods(x, names)
74 81
       stopApp(invisible(out))
75 82
     })
... ...
@@ -1,67 +1,66 @@
1 1
 #' Data: Positive and Negative Control Genes
2
-#'
3
-#' Sets of "positive" and "negative" control genes, useful arguments for
2
+#' 
3
+#' Sets of "positive" and "negative" control genes, useful arguments for 
4 4
 #' \code{\link{scone}}.
5
-#'
6
-#' These gene sets can be used as negative or positive controls, either 
7
-#' for RUV factor normalization or for evaluation and ranking of the 
8
-#' normalization workflows.
9
-#'
10
-#' @details Gene set datasets are in the form of \code{data.frame}, with 
11
-#'  the first column containing the gene symbols and an (optional) second 
12
-#'  column containing additional information (such as cortical layer or 
13
-#'  cell cycle phase).
14
-#'
15
-#' @details Note that the gene symbols follow the mouse conventions (i.e.
16
-#'  capitalized) or the human conventions (i.e, all upper-case), based on
17
-#'  the original publication. One can use the \code{\link[base]{toupper}},
18
-#'  \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} 
19
-#'  functions to alter symbol conventions.
20
-#'
21
-#' @details Mouse gene symbols in \code{cortical_markers} are transcribed
22
-#'  from Figure 3 of Molyneaux et al. (2007): "laminar-specific expression
23
-#'  of 66 genes within the neocortex." 
24 5
 #' 
25
-#' @details Human gene symbols in \code{housekeeping} are derived from the
26
-#'  list of "housekeeping" (HK) genes from the cDNA microarray analysis of
27
-#'  Eisenberg and Levanon (2003): "[HK genes] belong to the class of genes
28
-#'  that are EXPRESSED in all tissues." "... from 47 different human tissues
29
-#'  and cell lines."
30
-#'     
31
-#' @details Human gene symbols in \code{housekeeping_revised} are from 
32
-#'  Eisenberg and Levanon (2013): "This list provided ... is based on 
33
-#'  analysis of next-generation sequencing (RNA-seq) data. At least one 
34
-#'  variant of these genes is expressed in all tissues uniformly... The 
35
-#'  RefSeq transcript according to which we deemed the gene 
36
-#'  'housekeeping' is given." Housekeeping exons satisfy "(i) expression
37
-#'  observed in all tissues; (ii) low variance over tissues: 
38
-#'  standard-deviation [log2(RPKM)]<1; and (iii) no exceptional expression
39
-#'  in any single tissue; that is, no log-expression value differed from
40
-#'  the averaged log2(RPKM) by two (fourfold) or more." "We define a 
41
-#'  housekeeping gene as a gene for which at least one RefSeq transcript
42
-#'  has more than half of its exons meeting the previous criteria (thus
43
-#'  being housekeeping exons)."
6
+#' These gene sets can be used as negative or positive controls, either for RUV
7
+#' factor normalization or for evaluation and ranking of the normalization
8
+#' workflows.
9
+#' 
10
+#' @details Gene set datasets are in the form of \code{data.frame}, with the
11
+#'   first column containing the gene symbols and an (optional) second column
12
+#'   containing additional information (such as cortical layer or cell cycle
13
+#'   phase).
14
+#'   
15
+#' @details Note that the gene symbols follow the mouse conventions (i.e. 
16
+#'   capitalized) or the human conventions (i.e, all upper-case), based on the
17
+#'   original publication. One can use the \code{\link[base]{toupper}}, 
18
+#'   \code{\link[base]{tolower}}, and \code{\link[tools]{toTitleCase}} 
19
+#'   functions to alter symbol conventions.
20
+#'   
21
+#' @details Mouse gene symbols in \code{cortical_markers} are transcribed from
22
+#'   Figure 3 of Molyneaux et al. (2007): "laminar-specific expression of 66
23
+#'   genes within the neocortex."
24
+#'   
25
+#' @details Human gene symbols in \code{housekeeping} are derived from the list
26
+#'   of "housekeeping" genes from the cDNA microarray analysis of Eisenberg
27
+#'   and Levanon (2003): "[HK genes] belong to the class of genes that are
28
+#'   EXPRESSED in all tissues." "... from 47 different human tissues and cell
29
+#'   lines."
44 30
 #'   
45
-#' @details Human gene symbols in \code{cellcycle_genes} are from Macosko et al.
46
-#'   (2015) and represent a set of genes marking G1/S, S, G2/M, M, and M/G1
31
+#' @details Human gene symbols in \code{housekeeping_revised} from Eisenberg
32
+#'   and Levanon (2013): "This list provided ... is based on analysis of
33
+#'   next-generation sequencing (RNA-seq) data. At least one variant of these
34
+#'   genes is expressed in all tissues uniformly... The RefSeq transcript
35
+#'   according to which we deemed the gene 'housekeeping' is given."
36
+#'   Housekeeping exons satisfy "(i) expression observed in all tissues; (ii)
37
+#'   low variance over tissues: standard-deviation [log2(RPKM)]<1; and (iii) no
38
+#'   exceptional expression in any single tissue; that is, no log-expression
39
+#'   value differed from the averaged log2(RPKM) by two (fourfold) or more."
40
+#'   "We define a housekeeping gene as a gene for which at least one RefSeq
41
+#'   transcript has more than half of its exons meeting the previous criteria
42
+#'   (thus being housekeeping exons)."
43
+#'   
44
+#' @details Human gene symbols in \code{cellcycle_genes} from Macosko et al.
45
+#'   (2015) and represent a set of genes marking G1/S, S, G2/M, M, and M/G1 
47 46
 #'   phases.
48
-#'
49
-#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D..
50
-#'   Neuronal subtype specification in the cerebral cortex. Nature Reviews
47
+#'   
48
+#' @references Molyneaux, B.J., Arlotta, P., Menezes, J.R. and Macklis, J.D.. 
49
+#'   Neuronal subtype specification in the cerebral cortex. Nature Reviews 
51 50
 #'   Neuroscience, 2007, 8(6):427-437.
52
-#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact.
51
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes are compact. 
53 52
 #'   Trends in Genetics, 2003, 19(7):362-5.
54
-#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited.
53
+#' @references Eisenberg E, Levanon EY. Human housekeeping genes, revisited. 
55 54
 #'   Trends in Genetics, 2013, 29(10):569-74.
56
-#' @references Macosko, E. Z., et al. Highly parallel genome-wide expression
57
-#'   profiling of individual cells using nanoliter droplets. Cell, 2015,
55
+#' @references Macosko, E. Z., et al. Highly parallel genome-wide expression 
56
+#'   profiling of individual cells using nanoliter droplets. Cell, 2015, 
58 57
 #'   161.5:1202-1214.
59
-#'
58
+#'   
60 59
 #' @name control_genes
61
-#'
60
+#'   
62 61
 #' @docType data
63 62
 #' @aliases cortical_markers housekeeping housekeeping_revised cellcycle_genes
64
-#'
63
+#'   
65 64
 #' @examples
66 65
 #' data(housekeeping)
67 66
 #' data(housekeeping_revised)
... ...
@@ -1,11 +1,13 @@
1
-#' @describeIn get_design If \code{method} is a character, it will return the
2
-#'   design matrix corresponding to the normalization scheme specified by the
3
-#'   character string. The string must be one of the \code{row.names} of the
4
-#'   slot \code{scone_params}.
5
-#'
1
+#' @describeIn get_design 
2
+#'   If 
3
+#'   \code{method} is a character, it will return the design
4
+#'   matrix corresponding to the normalization scheme specified
5
+#'   by the character string. The string must be one of the
6
+#'   \code{row.names} of the slot \code{scone_params}.
7
+#'   
6 8
 #' @export
7
-#'
8
-#'
9
+#' 
10
+#' 
9 11
 setMethod(
10 12
   f = "get_design",
11 13
   signature = signature(x = "SconeExperiment", method = "character"),
... ...
@@ -45,26 +47,32 @@ setMethod(
45 47
 
46 48
     }
47 49
 
48
-    parsed <- .parse_row(params, get_bio(x), get_batch(x), ruv_factors, qc_factors)
50
+    parsed <- .parse_row(params, get_bio(x), get_batch(x), 
51
+                         ruv_factors, qc_factors)
49 52
     design_mat <- make_design(parsed$bio, parsed$batch, parsed$W,
50
-                              nested=(x@nested & !is.null(parsed$bio) & !is.null(parsed$batch)))
53
+                              nested=(x@nested &
54
+                                        !is.null(parsed$bio) &
55
+                                        !is.null(parsed$batch)))
51 56
 
52 57
     return(design_mat)
53 58
   }
54 59
 )
55 60
 
56
-#' @describeIn get_design If \code{method} is a numeric, it will return the
57
-#'   design matrix according to the scone ranking.
58
-#'
59
-#' @details The numeric method will always return the design matrix
60
-#'   corresponding to row \code{method} of the \code{scone_params} slot. This
61
-#'   means that if \code{\link{scone}} was run with \code{eval=TRUE},
62
-#'   \code{get_design(x, 1)} will return the top ranked method. If
63
-#'   \code{\link{scone}} was run with \code{eval=FALSE}, \code{get_design(x, 1)}
64
-#'   will return the first normalization in the order saved by scone.
65
-#'
61
+#' @describeIn get_design
62
+#'   If
63
+#'   \code{method} is a numeric, it will return the design matrix
64
+#'   according to the scone ranking.
65
+#'   
66
+#' @details The numeric method will always return the design matrix 
67
+#'   corresponding to row \code{method} of the \code{scone_params} 
68
+#'   slot. This means that if \code{\link{scone}} was run with 
69
+#'   \code{eval=TRUE}, \code{get_design(x, 1)} will return the top
70
+#'   ranked method. If \code{\link{scone}} was run with 
71
+#'   \code{eval=FALSE}, \code{get_design(x, 1)} will return the first
72
+#'   normalization in the order saved by scone.
73
+#'   
66 74
 #' @export
67
-#'
75
+#' 
68 76
 setMethod(
69 77
   f = "get_design",
70 78
   signature = signature(x = "SconeExperiment", method = "numeric"),
... ...
@@ -1,13 +1,15 @@
1
-#' @describeIn get_normalized If \code{method} is a character, it will return
2
-#'   the normalized matrix corresponding to the normalization scheme specified
3
-#'   by the character string. The string must be one of the \code{row.names} of
4
-#'   the slot \code{scone_params}.
5
-#'
1
+#' @describeIn get_normalized 
2
+#'   If 
3
+#'   \code{method} is a character, it will return the normalized
4
+#'   matrix corresponding to the normalization scheme specified
5
+#'   by the character string.The string must be one of the 
6
+#'   \code{row.names} of the slot \code{scone_params}.
7
+#'   
6 8
 #' @importFrom rhdf5 h5ls h5read
7 9
 #' @export
8
-#'
10
+#' 
9 11
 #' @param log logical. Should the data be returned in log-scale
10
-#'
12
+#'   
11 13
 setMethod(
12 14
   f = "get_normalized",
13 15
   signature = signature(x = "SconeExperiment", method = "character"),
... ...
@@ -61,9 +63,12 @@ setMethod(
61 63
 
62 64
       }
63 65
 
64
-      parsed <- .parse_row(params, get_bio(x), get_batch(x), ruv_factors, qc_factors)
66
+      parsed <- .parse_row(params, get_bio(x), 
67
+                           get_batch(x), ruv_factors, qc_factors)
65 68
       design_mat <- make_design(parsed$bio, parsed$batch, parsed$W,
66
-                                nested=(x@nested & !is.null(parsed$bio) & !is.null(parsed$batch)))
69
+                                nested=(x@nested &
70
+                                          !is.null(parsed$bio) &
71
+                                          !is.null(parsed$batch)))
67 72
       adjusted <- lm_adjust(log1p(scaled), design_mat, get_batch(x))
68 73
       retval <- expm1(adjusted)
69 74
     }
... ...
@@ -77,18 +82,21 @@ setMethod(
77 82
 )
78 83
 
79 84
 
80
-#' @describeIn get_normalized If \code{method} is a numeric, it will return the
81
-#'   normalized matrix according to the scone ranking.
82
-#'
83
-#' @details The numeric method will always return the normalization
85
+#' @describeIn get_normalized 
86
+#'   If 
87
+#'   \code{method} is a numeric, it will return the normalized
88
+#'   matrix according to the scone ranking.
89
+#'   
90
+#' @details The numeric method will always return the normalization 
84 91
 #'   corresponding to row \code{method} of the \code{scone_params} slot. This
85
-#'   means that if \code{\link{scone}} was run with \code{eval=TRUE},
86
-#'   \code{get_normalized(x, 1)} will return the top ranked method. If
87
-#'   \code{\link{scone}} was run with \code{eval=FALSE}, \code{get_normalized(x,
88
-#'   1)} will return the first normalization in the order saved by scone.
89
-#'
92
+#'   means that if \code{\link{scone}} was run with \code{eval=TRUE}, 
93
+#'   \code{get_normalized(x, 1)} will return the top ranked method. If 
94
+#'   \code{\link{scone}} was run with \code{eval=FALSE}, 
95
+#'   \code{get_normalized(x,1)} will return the first normalization
96
+#'   in the order saved by scone.
97
+#'   
90 98
 #' @export
91
-#'
99
+#' 
92 100
 setMethod(
93 101
   f = "get_normalized",
94 102
   signature = signature(x = "SconeExperiment", method = "numeric"),
... ...
@@ -1,11 +1,11 @@
1 1
 #' @rdname get_params
2
-#'
2
+#'   
3 3
 #' @param x an object of class \code{\link{sconeExperiment}}.
4
-#'
4
+#'   
5 5
 #' @return A data.frame containing workflow parameters for each scone workflow.
6
-#'
6
+#'   
7 7
 #' @export
8
-#'
8
+#' 
9 9
 setMethod(
10 10
   f = "get_params",
11 11
   signature = signature(x = "SconeExperiment"),
... ...
@@ -14,13 +14,14 @@ setMethod(
14 14
   })
15 15
 
16 16
 #' @rdname get_scores
17
-#'
17
+#'   
18 18
 #' @param x an object of class \code{\link{sconeExperiment}}.
19
-#'
20
-#' @return \code{get_scores} returns a matrix with all (non-missing) scone scores, ordered by average score rank.
21
-#'
19
+#'   
20
+#' @return \code{get_scores} returns a matrix with all (non-missing) scone 
21
+#'   scores, ordered by average score rank.
22
+#'   
22 23
 #' @export
23
-#'
24
+#' 
24 25
 setMethod(
25 26
   f = "get_scores",
26 27
   signature = signature(x = "SconeExperiment"),
... ...
@@ -30,11 +31,11 @@ setMethod(
30 31
 })
31 32
 
32 33
 #' @rdname get_scores
33
-#' 
34
+#'   
34 35
 #' @return \code{get_score_ranks} returns a vector of average score ranks.
35
-#'
36
+#'   
36 37
 #' @export
37
-#'
38
+#' 
38 39
 setMethod(
39 40
   f = "get_score_ranks",
40 41
   signature = signature(x = "SconeExperiment"),
... ...
@@ -44,13 +45,13 @@ setMethod(
44 45
 
45 46
 
46 47
 #' @rdname get_negconruv
47
-#'
48
+#'   
48 49
 #' @param x an object of class \code{\link{sconeExperiment}}.
49
-#'
50
+#'   
50 51
 #' @return NULL or a logical vector.
51
-#'
52
-#' @return For \code{get_negconruv} the returned vector indicates which genes are negative controls
53
-#'  to be used for RUV.
52
+#'   
53
+#' @return For \code{get_negconruv} the returned vector indicates which genes
54
+#'   are negative controls to be used for RUV.
54 55
 setMethod(
55 56
   f = "get_negconruv",
56 57
   signature = signature(x = "SconeExperiment"),
... ...
@@ -64,9 +65,9 @@ setMethod(
64 65
 )
65 66
 
66 67
 #' @rdname get_negconruv
67
-#'
68
-#' @return For \code{get_negconeval} the returned vector indicates which genes are negative controls
69
-#'  to be used for evaluation.
68
+#'   
69
+#' @return For \code{get_negconeval} the returned vector indicates which genes
70
+#'   are negative controls to be used for evaluation.
70 71
 setMethod(
71 72
   f = "get_negconeval",
72 73
   signature = signature(x = "SconeExperiment"),
... ...
@@ -80,9 +81,9 @@ setMethod(
80 81
 )
81 82
 
82 83
 #' @rdname get_negconruv
83
-#'
84
-#' @return For \code{get_poscon} the returned vector indicates which genes are positive controls
85
-#'  to be used for evaluation.
84
+#'   
85
+#' @return For \code{get_poscon} the returned vector indicates which genes are 
86
+#'   positive controls to be used for evaluation.
86 87
 setMethod(
87 88
   f = "get_poscon",
88 89
   signature = signature(x = "SconeExperiment"),
... ...
@@ -96,9 +97,9 @@ setMethod(
96 97
 )
97 98
 
98 99
 #' @rdname get_qc
99
-#'
100
+#'   
100 101
 #' @param x an object of class \code{\link{sconeExperiment}}.
101
-#'
102
+#'   
102 103
 #' @return NULL or the quality control (QC) metric matrix.
103 104
 setMethod(
104 105
   f = "get_qc",
... ...
@@ -114,9 +115,9 @@ setMethod(
114 115
 )
115 116
 
116 117
 #' @rdname get_bio
117
-#'
118
+#'   
118 119
 #' @param x an object of class \code{\link{sconeExperiment}}.
119
-#'
120
+#'   
120 121
 #' @return NULL or a factor containing bio or batch covariate.
121 122
 setMethod(
122 123
   f = "get_bio",
... ...
@@ -144,19 +145,22 @@ setMethod(
144 145
 )
145 146
 
146 147
 #' Parse rows
147
-#'
148
-#' This function is used internally in scone to parse the variables used to generate the design matrices.
149
-#'
150
-#' @param pars character. A vector of parameters corresponding to a row of workflow parameters.
148
+#' 
149
+#' This function is used internally in scone to parse the variables used to
150
+#' generate the design matrices.
151
+#' 
152
+#' @param pars character. A vector of parameters corresponding to a row of
153
+#'   workflow parameters.
151 154
 #' @param bio factor. The biological covariate.
152 155
 #' @param batch factor. The batch covariate.
153
-#' @param ruv_factors list. A list containing the factors of unwanted variation (RUVg) for all upstream workflows.
156
+#' @param ruv_factors list. A list containing the factors of unwanted variation
157
+#'   (RUVg) for all upstream workflows.
154 158
 #' @param qc matrix. The principal components of the QC metric matrix.
155
-#'
159
+#'   
156 160
 #' @return A list with the variables to be passed to make_design.
157
-#' 
161
+#'   
158 162
 #' @keywords internal
159
-#' 
163
+#'   
160 164
 .parse_row <- function(pars, bio, batch, ruv_factors, qc) {
161 165
   
162 166
   # Define upstream workflow: imputation x scaling
... ...
@@ -185,22 +189,33 @@ setMethod(
185 189
 }
186 190
 
187 191
 #' Make a Design Matrix
188
-#'
189
-#' This function builds a design matrix for the Adjustment Normalization Step, in which 
190
-#' covariates are two (possibly nested) categorical factors and one or more continuous variables.
191
-#'
192
-#' @details If nested=TRUE a nested design is used, i.e. the batch variable is assumed to be nested within
193
-#' the bio variable. Here, nested means that each batch is composed of samples from only *one* level of bio,
194
-#' while each level of bio may contain multiple batches.
195
-#'
192
+#' 
193
+#' This function builds a design matrix for the Adjustment Normalization Step,
194
+#' in which covariates are two (possibly nested) categorical factors and one or
195
+#' more continuous variables.
196
+#' 
197
+#' @details If nested=TRUE a nested design is used, i.e. the batch variable is
198
+#'   assumed to be nested within the bio variable. Here, nested means that each
199
+#'   batch is composed of samples from only *one* level of bio, while each
200
+#'   level of bio may contain multiple batches.
201
+#'   
196 202
 #' @export
197
-#'
203
+#' 
198 204
 #' @param bio factor. The biological covariate.
199 205
 #' @param batch factor. The batch covariate.
200
-#' @param W numeric. Either a vector or matrix containing one or more continuous covariates (e.g. RUVg factors).
201
-#' @param nested logical. Whether or not to consider a nested design (see details).
202
-#'
206
+#' @param W numeric. Either a vector or matrix containing one or more
207
+#'   continuous covariates (e.g. RUVg factors).
208
+#' @param nested logical. Whether or not to consider a nested design
209
+#'   (see details).
210
+#'   
203 211
 #' @return The design matrix.
212
+#'   
213
+#' @examples 
214
+#' 
215
+#' bio = as.factor(rep(c(1,2),each = 2))
216
+#' batch = as.factor(rep(c(1,2),2))
217
+#' design_mat = make_design(bio,batch, W = NULL)
218
+#' 
204 219
 make_design <- function(bio, batch, W, nested=FALSE) {
205 220
   
206 221
   if(nested & (is.null(bio) | is.null(batch))) {
... ...
@@ -251,29 +266,48 @@ make_design <- function(bio, batch, W, nested=FALSE) {
251 266
       }
252 267
     }
253 268
 
254
-    return(model.matrix(as.formula(f), contrasts=list(bio=contr.sum, batch=mat)))
269
+    return(model.matrix(as.formula(f), 
270
+                        contrasts=list(bio=contr.sum, batch=mat)))
255 271
   } else {
256 272
     return(model.matrix(as.formula(f)))
257 273
   }
258 274
 }
259 275
 
260 276
 #' Linear Adjustment Normalization
261
-#'
262
-#' Given a matrix with log expression values and a design matrix, this function fits a linear model
263
-#' and removes the effects of the batch factor as well as of the linear variables encoded in W.
264
-#'
265
-#' @details The function assumes that the columns of the design matrix corresponding to the variable
266
-#' for which expression needs to be adjusted, start with either the word "batch" or the letter "W" (case sensitive).
267
-#' Any other covariate (including the intercept) is kept.
268
-#'
277
+#' 
278
+#' Given a matrix with log expression values and a design matrix, this function
279
+#' fits a linear model and removes the effects of the batch factor
280
+#' as well as of the linear variables encoded in W.
281
+#' 
282
+#' @details The function assumes that the columns of the design matrix
283
+#'   corresponding to the variable for which expression needs to be adjusted,
284
+#'   start with either the word "batch" or the letter "W" (case sensitive). Any
285
+#'   other covariate (including the intercept) is kept.
286
+#'   
269 287
 #' @importFrom limma lmFit
270 288
 #' @export
271
-#'
272
-#' @param log_expr matrix. The log gene expression (genes in row, samples in columns).
273
-#' @param design_mat matrix. The design matrix (usually the result of make_design).
274
-#' @param batch factor. A factor with the batch information, identifying batch effect to be removed.
289
+#' 
290
+#' @param log_expr matrix. The log gene expression (genes in row, samples in
291
+#'   columns).
292
+#' @param design_mat matrix. The design matrix (usually the result of
293
+#'   make_design).
294
+#' @param batch factor. A factor with the batch information, identifying batch
295
+#'   effect to be removed.
275 296
 #' @param weights matrix. A matrix of weights.
276 297
 #' @return The corrected log gene expression.
298
+#'   
299
+#' @examples
300
+#' 
301
+#' set.seed(141)
302
+#' bio = as.factor(rep(c(1,2),each = 2))
303
+#' batch = as.factor(rep(c(1,2),2))
304
+#' design_mat = make_design(bio,batch, W = NULL)
305
+#' 
306
+#' log_expr = matrix(rnorm(20),ncol = 4)
307
+#' adjusted_log_expr = lm_adjust(log_expr = log_expr,
308
+#'   design_mat = design_mat,
309
+#'   batch = batch)
310
+#' 
277 311
 lm_adjust <- function(log_expr, design_mat, batch=NULL, weights=NULL) {
278 312
   lm_object <- lmFit(log_expr, design = design_mat, weights = weights)
279 313
 
... ...
@@ -1,20 +1,20 @@
1
-#' Fit Logistic Regression Model of FNR against set of positive control
1
+#' Fit Logistic Regression Model of FNR against set of positive control 
2 2
 #' (ubiquitously expressed) genes
3
-#'
4
-#' @details logit(Probability of False Negative) ~ a + b*(median log-expression)
5 3
 #' 
6
-#' @param expr matrix The data matrix in transcript-proportional units (genes in
7
-#'   rows, cells in columns).
8
-#' @param pos_controls A logical, numeric, or character vector indicating
9
-#'   positive control genes that will be used to compute false-negative rate
4
+#' @details logit(Probability of False Negative) ~ a + b*(median log-expr)
5
+#'   
6
+#' @param expr matrix A matrix of transcript-proportional units (genes in rows,
7
+#'   cells in columns).
8
+#' @param pos_controls A logical, numeric, or character vector indicating 
9
+#'   positive control genes that will be used to compute false-negative rate 
10 10
 #'   characteristics. User must provide at least 2 positive control genes.
11
-#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01.
11
+#' @param fn_tresh Inclusive threshold for negative detection. Default 0.01. 
12 12
 #'   fn_tresh must be non-negative.
13
-#'
14
-#' @return A matrix of logistic regression coefficients corresponding to glm
15
-#'   fits in each sample (a and b in columns 1 and 2 respectively). If the a & b
16
-#'   fit does not converge, b is set to zero and only a is estimated.
17
-#'
13
+#'   
14
+#' @return A matrix of logistic regression coefficients corresponding to glm 
15
+#'   fits in each sample (a and b in columns 1 and 2 respectively). If the 
16
+#'   a & b fit does not converge, b is set to zero and only a is estimated.
17
+#'   
18 18
 #' @importFrom boot logit
19 19
 #' @importFrom matrixStats rowMedians
20 20
 #' @export
... ...
@@ -23,7 +23,7 @@
23 23
 #' mat <- matrix(rpois(1000, lambda = 3), ncol=10)
24 24
 #' mat = mat * matrix(1-rbinom(1000, size = 1, prob = .01), ncol=10)
25 25
 #' fnr_out = simple_FNR_params(mat,pos_controls = 1:10)
26
-#'
26
+#' 
27 27
 simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
28 28
 
29 29
   stopifnot(!any(is.na(pos_controls)))
... ...
@@ -53,13 +53,16 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
53 53
   # Median log-expression in positive observations
54 54
   mu_obs = log(rowMedians(pos_expr,na.rm = TRUE))
55 55
   if(any(is.na(mu_obs))){
56
-    stop("Median log-expression in positive observations NA for some positive control gene/s")
56
+    stop(paste0("Median log-expression in positive ",
57
+                "observations NA for some positive control gene/s"))
57 58
   }
58 59
 
59 60
   # Logistic Regression Model of FNR
60 61
   logistic_coef = matrix(0,ncol(drop_outs),2)
61 62
   for (si in seq_len(ncol(drop_outs))){
62
-    fit = suppressWarnings(glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit)))
63
+    fit = suppressWarnings(glm(cbind(drop_outs[,si],
64
+                                     1 - drop_outs[,si]) ~ mu_obs,
65
+                               family=binomial(logit)))
63 66
     if(fit$converged){
64 67
       logistic_coef[si,] = fit$coefficients
65 68
     } else {
... ...
@@ -69,72 +72,74 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
69 72
   return(logistic_coef)
70 73
 }
71 74
 
72
-#'metric-based sample filtering: function to filter single-cell RNA-Seq
75
+#'metric-based sample filtering: function to filter single-cell RNA-Seq 
73 76
 #'libraries.
74 77
 #'
75
-#'This function returns a sample-filtering report for each cell in the input
78
+#'This function returns a sample-filtering report for each cell in the input 
76 79
 #'expression matrix, describing which filtering criteria are satisfied.
77 80
 #'
78
-#'@details For each primary criterion (metric), a sample is evaluated based on 4
79
-#'  sub-criteria: 1) Hard (encoded) threshold 2) Adaptive thresholding via sd's
80
-#'  from the mean 3) Adaptive thresholding via mad's from the median 4) Adaptive
81
-#'  thresholding via sd's from the mean (after mixture modeling) A sample must
82
-#'  pass all sub-criteria to pass the primary criterion.
83
-#'
81
+#'@details For each primary criterion (metric), a sample is evaluated 
82
+#'  based on 4 sub-criteria: 1) Hard (encoded) threshold 2) Adaptive
83
+#'  thresholding via sd's from the mean 3) Adaptive thresholding via 
84
+#'  mad's from the median 4) Adaptive thresholding via sd's from the
85
+#'  mean (after mixture modeling) A sample must pass all sub-criteria
86
+#'  to pass the primary criterion.
87
+#'  
84 88
 #'@param expr matrix The data matrix (genes in rows, cells in columns).
85
-#'@param nreads A numeric vector representing number of reads in each library.
86
-#'  Default to `colSums` of `expr`.
87
-#'@param ralign A numeric vector representing the proportion of reads aligned to
88
-#'  the reference genome in each library. If NULL, filtered_ralign will be
89
-#'  returned NA.
90
-#'@param gene_filter A logical vector indexing genes that will be used to
91
-#'  compute library transcriptome breadth. If NULL, filtered_breadth will be
89
+#'@param nreads A numeric vector representing number of reads in each 
90
+#'  library. Default to `colSums` of `expr`.
91
+#'@param ralign A numeric vector representing the proportion of reads
92
+#'  aligned to the reference genome in each library. If NULL, filtered_ralign
93
+#'  will be returned NA.
94
+#'@param gene_filter A logical vector indexing genes that will be used to 
95
+#'  compute library transcriptome breadth. If NULL, filtered_breadth will be 
92 96
 #'  returned NA.
93
-#'@param pos_controls A logical, numeric, or character vector indicating
94
-#'  positive control genes that will be used to compute false-negative rate
97
+#'@param pos_controls A logical, numeric, or character vector indicating 
98
+#'  positive control genes that will be used to compute false-negative rate 
95 99
 #'  characteristics. If NULL, filtered_fnr will be returned NA.
96
-#'@param scale. logical. Will expression be scaled by total expression for FNR
100
+#'@param scale. logical. Will expression be scaled by total expression for FNR 
97 101
 #'  computation? Default = FALSE
98
-#'@param glen Gene lengths for gene-length normalization (normalized data used
102
+#'@param glen Gene lengths for gene-length normalization (normalized data used 
99 103
 #'  in FNR computation).
100
-#'@param AUC_range An array of two values, representing range over which FNR AUC
101
-#'  will be computed (log(expr_units)). Default c(0,15)
102
-#'@param zcut A numeric value determining threshold Z-score for sd, mad, and
104
+#'@param AUC_range An array of two values, representing range over which FNR 
105
+#'  AUC will be computed (log(expr_units)). Default c(0,15)
106
+#'@param zcut A numeric value determining threshold Z-score for sd, mad, and 
103 107
 #'  mixture sub-criteria. Default 1. If NULL, only hard threshold sub-criteria
104 108
 #'  will be applied.
105
-#'@param mixture A logical value determining whether mixture modeling
106
-#'  sub-criterion will be applied per primary criterion (metric). If true, a dip
107
-#'  test will be applied to each metric. If a metric is multimodal, it is fit to
108
-#'  a two-component normal mixture model. Samples deviating zcut sd's from
109
-#'  optimal mean (in the inferior direction), have failed this sub-criterion.
110
-#'@param dip_thresh A numeric value determining dip test p-value threshold.
109
+#'@param mixture A logical value determining whether mixture modeling 
110
+#'  sub-criterion will be applied per primary criterion (metric). If true, 
111
+#'  a dip test will be applied to each metric. If a metric is multimodal, 
112
+#'  it is fit to a two-component normal mixture model. Samples deviating zcut
113
+#'  sd's from optimal mean (in the inferior direction), have failed this 
114
+#'  sub-criterion.
115
+#'@param dip_thresh A numeric value determining dip test p-value threshold. 
111 116
 #'  Default 0.05.
112
-#'@param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default
117
+#'@param hard_nreads numeric. Hard (lower bound on) nreads threshold. Default 
113 118
 #'  25000.
114
-#'@param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default
119
+#'@param hard_ralign numeric. Hard (lower bound on) ralign threshold. Default 
115 120
 #'  15.
116
-#'@param hard_breadth numeric. Hard (lower bound on) breadth threshold. Default
117
-#'  0.2.
121
+#'@param hard_breadth numeric. Hard (lower bound on) breadth threshold. 
122
+#'  Default 0.2.
118 123
 #'@param hard_auc numeric. Hard (upper bound on) fnr auc threshold. Default 10.
119
-#'@param suff_nreads numeric. If not null, serves as an overriding upper bound
124
+#'@param suff_nreads numeric. If not null, serves as an overriding upper bound 
120 125
 #'  on nreads threshold.
121
-#'@param suff_ralign numeric. If not null, serves as an overriding upper bound
126
+#'@param suff_ralign numeric. If not null, serves as an overriding upper bound 
122 127
 #'  on ralign threshold.
123
-#'@param suff_breadth numeric. If not null, serves as an overriding upper bound
124
-#'  on breadth threshold.
125
-#'@param suff_auc numeric. If not null, serves as an overriding lower bound on
128
+#'@param suff_breadth numeric. If not null, serves as an overriding upper 
129
+#'  bound on breadth threshold.
130
+#'@param suff_auc numeric. If not null, serves as an overriding lower bound on 
126 131
 #'  fnr auc threshold.
127 132
 #'@param plot logical. Should a plot be produced?
128 133
 #'@param hist_breaks hist() breaks argument. Ignored if `plot=FALSE`.
129 134
 #'@param ... Arguments to be passed to methods.
130
-#'
131
-#'
135
+#'  
136
+#'  
132 137
 #'@return A list with the following elements: \itemize{ \item{filtered_nreads}{
133 138
 #'  Logical. Sample has too few reads.} \item{filtered_ralign}{ Logical. Sample
134 139
 #'  has too few reads aligned.} \item{filtered_breadth}{ Logical. Samples has
135 140
 #'  too few genes detected (low breadth).} \item{filtered_fnr}{ Logical. Sample
136 141
 #'  has a high FNR AUC.} }
137
-#'
142
+#'  
138 143
 #'@importFrom mixtools normalmixEM
139 144
 #'@importFrom diptest dip.test
140 145
 #'@importFrom boot inv.logit
... ...
@@ -146,14 +151,18 @@ simple_FNR_params = function(expr, pos_controls, fn_tresh = 0.01){
146 151
 #' qc = as.matrix(cbind(colSums(mat),colSums(mat > 0)))
147 152
 #' rownames(qc) = colnames(mat)
148 153
 #' colnames(qc) = c("NCOUNTS","NGENES")
149
-#' mfilt = metric_sample_filter(expr = mat,nreads = qc[,"NCOUNTS"], plot = TRUE, hard_nreads = 0)
154
+#' mfilt = metric_sample_filter(expr = mat,nreads = qc[,"NCOUNTS"],