... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.12 |
|
7 |
-Date: 2022-08-02 |
|
6 |
+Version: 3.1.13 |
|
7 |
+Date: 2022-08-21 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -1,8 +1,14 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
export(ClassifyResult) |
4 |
+export(CrossValParams) |
|
4 | 5 |
export(FeatureSetCollection) |
6 |
+export(ModellingParams) |
|
7 |
+export(PredictParams) |
|
5 | 8 |
export(ROCplot) |
9 |
+export(SelectParams) |
|
10 |
+export(TrainParams) |
|
11 |
+export(TransformParams) |
|
6 | 12 |
export(actualOutcome) |
7 | 13 |
export(allFeatureNames) |
8 | 14 |
export(available) |
... | ... |
@@ -24,18 +30,30 @@ export(plotFeatureClasses) |
24 | 30 |
export(predictions) |
25 | 31 |
export(prepareData) |
26 | 32 |
export(rankingPlot) |
33 |
+export(runTest) |
|
34 |
+export(runTests) |
|
27 | 35 |
export(sampleNames) |
28 | 36 |
export(samplesMetricMap) |
29 | 37 |
export(selectionPlot) |
30 | 38 |
export(totalPredictions) |
31 | 39 |
export(tunedParameters) |
32 | 40 |
exportClasses(ClassifyResult) |
41 |
+exportClasses(CrossValParams) |
|
33 | 42 |
exportClasses(FeatureSetCollection) |
43 |
+exportClasses(ModellingParams) |
|
44 |
+exportClasses(PredictParams) |
|
45 |
+exportClasses(SelectParams) |
|
46 |
+exportClasses(TrainParams) |
|
47 |
+exportClasses(TransformParams) |
|
34 | 48 |
exportMethods("[") |
35 | 49 |
exportMethods("[[") |
36 | 50 |
exportMethods(ClassifyResult) |
37 | 51 |
exportMethods(FeatureSetCollection) |
52 |
+exportMethods(PredictParams) |
|
38 | 53 |
exportMethods(ROCplot) |
54 |
+exportMethods(SelectParams) |
|
55 |
+exportMethods(TrainParams) |
|
56 |
+exportMethods(TransformParams) |
|
39 | 57 |
exportMethods(actualOutcome) |
40 | 58 |
exportMethods(allFeatureNames) |
41 | 59 |
exportMethods(calcCVperformance) |
... | ... |
@@ -55,6 +73,8 @@ exportMethods(predict) |
55 | 73 |
exportMethods(predictions) |
56 | 74 |
exportMethods(prepareData) |
57 | 75 |
exportMethods(rankingPlot) |
76 |
+exportMethods(runTest) |
|
77 |
+exportMethods(runTests) |
|
58 | 78 |
exportMethods(sampleNames) |
59 | 79 |
exportMethods(samplesMetricMap) |
60 | 80 |
exportMethods(selectionPlot) |
... | ... |
@@ -65,6 +85,7 @@ import(BiocParallel) |
65 | 85 |
import(grid) |
66 | 86 |
import(utils) |
67 | 87 |
importFrom(S4Vectors,as.data.frame) |
88 |
+importFrom(S4Vectors,do.call) |
|
68 | 89 |
importFrom(dplyr,mutate) |
69 | 90 |
importFrom(dplyr,n) |
70 | 91 |
importFrom(rlang,sym) |
... | ... |
@@ -5,14 +5,14 @@ |
5 | 5 |
#' known classes and a vector of predicted classes determined outside of the |
6 | 6 |
#' ClassifyR package, a single metric value is calculated. If |
7 | 7 |
#' \code{calcCVperformance} is used, annotates the results of calling |
8 |
-#' \code{\link{crossValidate}} with one of the user-specified performance measures. |
|
8 |
+#' \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}} with one of the user-specified performance measures. |
|
9 | 9 |
#' |
10 | 10 |
#' All metrics except Matthews Correlation Coefficient are suitable for |
11 | 11 |
#' evaluating classification scenarios with more than two classes and are |
12 | 12 |
#' reimplementations of those available from Intel DAAL. |
13 | 13 |
#' |
14 |
-#' If \code{\link{crossValidate}} was run in resampling mode, one performance |
|
15 |
-#' measure is produced for every resampling. If the leave-k-out mode was used, |
|
14 |
+#' \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}} was run in resampling mode, one performance |
|
15 |
+#' measure is produced for every resampling. Otherwise, if the leave-k-out mode was used, |
|
16 | 16 |
#' then the predictions are concatenated, and one performance measure is |
17 | 17 |
#' calculated for all classifications. |
18 | 18 |
#' |
... | ... |
@@ -53,9 +53,8 @@ setOldClass("rfsrc") |
53 | 53 |
# Union of A Function and NULL |
54 | 54 |
setClassUnion("functionOrNULL", c("function", "NULL")) |
55 | 55 |
|
56 |
-# Union of Functions and List of Functions. Useful for allowing ensemble feature selection. |
|
57 |
-setClassUnion("functionOrList", c("function", "list")) |
|
58 |
- |
|
56 |
+# Union of a Function and a List of Functions. Useful for allowing ensemble feature selection. |
|
57 |
+setClassUnion("functionOrChraracterOrList", c("function", "character", "list")) |
|
59 | 58 |
|
60 | 59 |
# Union of A Numeric Value and NULL |
61 | 60 |
setClassUnion("numericOrNULL", c("numeric", "NULL")) |
... | ... |
@@ -63,7 +62,7 @@ setClassUnion("numericOrNULL", c("numeric", "NULL")) |
63 | 62 |
# Union of a Character and a DataFrame |
64 | 63 |
setClassUnion("characterOrDataFrame", c("character", "DataFrame")) |
65 | 64 |
|
66 |
-# Union of a Surv class and a factor |
|
65 |
+# Union of a Surv class and a factor for flexibility with sample outcome |
|
67 | 66 |
setClassUnion("factorOrSurv", c("factor", "Surv")) |
68 | 67 |
|
69 | 68 |
# Union of a List and NULL |
... | ... |
@@ -82,7 +81,56 @@ setClassUnion("DataFrameOrNULL", c("DataFrame", "NULL")) |
82 | 81 |
##### CrossValParams ##### |
83 | 82 |
|
84 | 83 |
# Parameters for Cross-validation Specification |
85 |
- |
|
84 |
+#' Parameters for Cross-validation Specification |
|
85 |
+#' |
|
86 |
+#' Collects and checks necessary parameters required for cross-validation by |
|
87 |
+#' \code{\link{runTests}}. |
|
88 |
+#' |
|
89 |
+#' |
|
90 |
+#' @name CrossValParams |
|
91 |
+#' @rdname CrossValParams-class |
|
92 |
+#' @aliases CrossValParams CrossValParams-class |
|
93 |
+#' @docType class |
|
94 |
+#' |
|
95 |
+#' @param samplesSplits Default: "Permute k-Fold". A character value |
|
96 |
+#' specifying what kind of sample splitting to do. |
|
97 |
+#' @param permutations Default: 100. Number of times to permute the |
|
98 |
+#' data set before it is split into training and test sets. Only relevant if |
|
99 |
+#' \code{samplesSplits} is either \code{"Permute k-Fold"} or \code{"Permute |
|
100 |
+#' Percentage Split"}. |
|
101 |
+#' @param percentTest The percentage of the data |
|
102 |
+#' set to assign to the test set, with the remainder of the samples belonging |
|
103 |
+#' to the training set. Only relevant if \code{samplesSplits} is \code{"Permute |
|
104 |
+#' Percentage Split"}. |
|
105 |
+#' @param folds The number of approximately equal-sized folds to partition |
|
106 |
+#' the samples into. Only relevant if \code{samplesSplits} is \code{"Permute k-Fold"} |
|
107 |
+#' or \code{"k-Fold"}. |
|
108 |
+#' @param leave The number of samples to generate all possible |
|
109 |
+#' combination of and use as the test set. Only relevant if \code{samplesSplits} is |
|
110 |
+#' \code{"Leave-k-Out"}. If set to 1, it is the traditional leave-one-out cross-validation, |
|
111 |
+#' sometimes written as LOOCV. |
|
112 |
+#' @param tuneMode Default: Resubstitution. The scheme to use for selecting any tuning parameters. |
|
113 |
+#' @param adaptiveResamplingDelta Default: \code{NULL}. If not null, adaptive resampling of training |
|
114 |
+#' samples is performed and this number is the difference in consecutive iterations that the |
|
115 |
+#' class probability or risk of all samples must change less than for the iterative process to stop. 0.01 |
|
116 |
+#' was used in the original publication. |
|
117 |
+#' @param parallelParams An instance of \code{\link{BiocParallelParam}} specifying |
|
118 |
+#' the kind of parallelisation to use. Default is to use two cores less than the total number of |
|
119 |
+#' cores the computer has, if it has four or more cores, otherwise one core, as is the |
|
120 |
+#' default of \code{\link{bpparam}}. To make results fully reproducible, please |
|
121 |
+#' choose a specific back-end depending on your operating system and also set |
|
122 |
+#' \code{RNGseed} to a number. |
|
123 |
+#' |
|
124 |
+#' @author Dario Strbenac |
|
125 |
+#' @examples |
|
126 |
+#' |
|
127 |
+#' CrossValParams() # Default is 100 permutations and 5 folds of each. |
|
128 |
+#' snow <- SnowParam(workers = 4, RNGseed = 999) |
|
129 |
+#' CrossValParams("Leave-k-Out", leave = 2, parallelParams = snow) |
|
130 |
+#' # Fully reproducible Leave-2-out cross-validation on 4 cores, |
|
131 |
+#' # even if feature selection or classifier use random sampling. |
|
132 |
+#' |
|
133 |
+#' @exportClass CrossValParams |
|
86 | 134 |
setClass("CrossValParams", representation( |
87 | 135 |
samplesSplits = "character", |
88 | 136 |
permutations = "numericOrNULL", |
... | ... |
@@ -96,6 +144,8 @@ setClass("CrossValParams", representation( |
96 | 144 |
) |
97 | 145 |
|
98 | 146 |
# CrossValParams constructor is an ordinary function and not S4 method for performance reasons. |
147 |
+#' @export |
|
148 |
+#' @rdname CrossValParams-class |
|
99 | 149 |
CrossValParams <- function(samplesSplits = c("Permute k-Fold", "Permute Percentage Split", "Leave-k-Out", "k-Fold"), |
100 | 150 |
permutations = 100, percentTest = 25, folds = 5, leave = 2, |
101 | 151 |
tuneMode = c("Resubstitution", "Nested CV", "none"), adaptiveResamplingDelta = NULL, parallelParams = bpparam()) |
... | ... |
@@ -138,6 +188,7 @@ setClassUnion("StageParamsOrMissingOrNULL", c("StageParams", "missing", "NULL")) |
138 | 188 |
|
139 | 189 |
|
140 | 190 |
##### TransformParams ##### |
191 |
+#' @exportClass TransformParams |
|
141 | 192 |
setClass("TransformParams", representation( |
142 | 193 |
transform = "function", |
143 | 194 |
characteristics = "DataFrame", |
... | ... |
@@ -145,17 +196,67 @@ setClass("TransformParams", representation( |
145 | 196 |
otherParams = "list"), contains = "StageParams" |
146 | 197 |
) |
147 | 198 |
|
148 |
-# Union of a TransformParams pbject and NULL. |
|
199 |
+# Union of a TransformParams object and NULL. |
|
149 | 200 |
setClassUnion("TransformParamsOrNULL", c("TransformParams", "NULL")) |
150 | 201 |
|
151 | 202 |
# Parameters for Data Transformation within CV. |
152 |
- |
|
203 |
+#' Parameters for Data Transformation |
|
204 |
+#' |
|
205 |
+#' Collects and checks necessary parameters required for transformation within CV. |
|
206 |
+#' |
|
207 |
+#' |
|
208 |
+#' @name TransformParams |
|
209 |
+#' @rdname TransformParams-class |
|
210 |
+#' @aliases TransformParams TransformParams-class TransformParams,ANY-method |
|
211 |
+#' TransformParams,character-method show,TransformParams-method |
|
212 |
+#' @docType class |
|
213 |
+#' @usage NULL |
|
214 |
+#' @section Constructor: |
|
215 |
+#' \describe{ |
|
216 |
+#' \item{}{ |
|
217 |
+#' \code{TransformParams(transform, characteristics = DataFrame(), intermediate = character(0), ...)} |
|
218 |
+#' Creates a \code{TransformParams} object which stores the function which will do the |
|
219 |
+#' transformation and parameters that the function will use. |
|
220 |
+#' \describe{ |
|
221 |
+#' \item{\code{transform}}{A character keyword referring to a registered transformation function. See \code{\link{available}} |
|
222 |
+#' for valid keywords.} |
|
223 |
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the |
|
224 |
+#' characteristics of data transformation to be done. First column must be |
|
225 |
+#' named \code{"charateristic"} and second column must be named \code{"value"}. |
|
226 |
+#' If using wrapper functions for data transformation in this package, the data |
|
227 |
+#' transformation name will automatically be generated and therefore it is not |
|
228 |
+#' necessary to specify it.} |
|
229 |
+#' \item{\code{intermediate}}{Character vector. Names of any variables created in |
|
230 |
+#' prior stages by \code{\link{runTest}} that need to be passed to a feature selection |
|
231 |
+#' function.} |
|
232 |
+#' \item{\code{...}}{Other named parameters which will be used by the transformation function.} |
|
233 |
+#' } } } |
|
234 |
+#' |
|
235 |
+#' @section Summary: |
|
236 |
+#' \code{transformParams} is a \code{TransformParams} object. |
|
237 |
+#' \describe{ |
|
238 |
+#' \item{}{ |
|
239 |
+#' \code{show(transformParams)}: Prints a short summary of what \code{transformParams} contains. |
|
240 |
+#' }} |
|
241 |
+#' |
|
242 |
+#' @author Dario Strbenac |
|
243 |
+#' @examples |
|
244 |
+#' |
|
245 |
+#' transformParams <- TransformParams("diffLoc", location = "median") |
|
246 |
+#' # Subtract all values from training set median, to obtain absolute deviations. |
|
247 |
+#' |
|
248 |
+#' @export |
|
249 |
+#' @usage NULL |
|
153 | 250 |
setGeneric("TransformParams", function(transform, ...) |
154 | 251 |
standardGeneric("TransformParams")) |
155 | 252 |
|
156 |
-setMethod("TransformParams", "function", |
|
253 |
+#' @rdname TransformParams-class |
|
254 |
+#' @usage NULL |
|
255 |
+#' @export |
|
256 |
+setMethod("TransformParams", "character", |
|
157 | 257 |
function(transform, characteristics = S4Vectors::DataFrame(), intermediate = character(0), ...) |
158 | 258 |
{ |
259 |
+ transform <- .transformKeywordToFunction(transform) |
|
159 | 260 |
if(ncol(characteristics) == 0 || !"Transform Name" %in% characteristics[, "characteristic"]) |
160 | 261 |
{ |
161 | 262 |
characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Transform Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(transform, "name"), "name"])) |
... | ... |
@@ -164,10 +265,7 @@ setMethod("TransformParams", "function", |
164 | 265 |
intermediate = intermediate, otherParams = list(...)) |
165 | 266 |
}) |
166 | 267 |
|
167 |
-#' Inspect Data Transformation Details |
|
168 |
-#' |
|
169 |
-#' @rdname TransformParams-class |
|
170 |
-#' @param object An object of class \code{TransformParams} to inspect. |
|
268 |
+#' @usage NULL |
|
171 | 269 |
#' @export |
172 | 270 |
setMethod("show", "TransformParams", |
173 | 271 |
function(object) |
... | ... |
@@ -374,6 +472,7 @@ setMethod("show", "FeatureSetCollection", |
374 | 472 |
) |
375 | 473 |
|
376 | 474 |
#' @export |
475 |
+#' @usage NULL |
|
377 | 476 |
setMethod("[", c("FeatureSetCollection", "numeric", "missing", "ANY"), |
378 | 477 |
function(x, i, j, ..., drop = TRUE) |
379 | 478 |
{ |
... | ... |
@@ -381,6 +480,7 @@ setMethod("[", c("FeatureSetCollection", "numeric", "missing", "ANY"), |
381 | 480 |
}) |
382 | 481 |
|
383 | 482 |
#' @export |
483 |
+#' @usage NULL |
|
384 | 484 |
setMethod("[[", c("FeatureSetCollection", "ANY", "missing"), |
385 | 485 |
function(x, i, j, ...) |
386 | 486 |
{ |
... | ... |
@@ -388,9 +488,12 @@ setMethod("[[", c("FeatureSetCollection", "ANY", "missing"), |
388 | 488 |
}) |
389 | 489 |
|
390 | 490 |
setClassUnion("FeatureSetCollectionOrNULL", c("FeatureSetCollection", "NULL")) |
491 |
+setClassUnion("functionOrList", c("function", "list")) |
|
492 |
+setClassUnion("characterOrList", c("character", "list")) |
|
391 | 493 |
|
392 | 494 |
##### SelectParams ##### |
393 | 495 |
|
496 |
+#' @exportClass SelectParams |
|
394 | 497 |
setClass("SelectParams", representation( |
395 | 498 |
featureRanking = "functionOrList", |
396 | 499 |
characteristics = "DataFrame", |
... | ... |
@@ -404,22 +507,76 @@ setClass("SelectParams", representation( |
404 | 507 |
setClassUnion("SelectParamsOrNULL", c("SelectParams", "NULL")) |
405 | 508 |
|
406 | 509 |
# Parameters for Feature Selection |
510 |
+#' Parameters for Feature Selection |
|
511 |
+#' |
|
512 |
+#' Collects and checks necessary parameters required for feature selection. |
|
513 |
+#' Either one function is specified or a list of functions to perform ensemble |
|
514 |
+#' feature selection. The empty constructor is provided for convenience. |
|
515 |
+#' |
|
516 |
+#' |
|
517 |
+#' @name SelectParams |
|
518 |
+#' @rdname SelectParams-class |
|
519 |
+#' @aliases SelectParams SelectParams-class SelectParams,missing-method |
|
520 |
+#' SelectParams,characterOrList-method |
|
521 |
+#' @docType class |
|
522 |
+#' @section Constructor: |
|
523 |
+#' \describe{ |
|
524 |
+#' \item{}{\preformatted{SelectParams(featureRanking, characteristics = DataFrame(), minPresence = 1, intermediate = character(0), |
|
525 |
+#' subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...)} Creates a \code{SelectParams} |
|
526 |
+#' object which stores the function(s) which will do the selection and parameters that the |
|
527 |
+#' function will use. |
|
528 |
+#' \describe{\item{\code{featureRanking}}{A character keyword referring to a registered feature ranking function. See \code{\link{available}} |
|
529 |
+#' for valid keywords.} |
|
530 |
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the characteristics |
|
531 |
+#' of feature selection to be done. First column must be named \code{"charateristic"} and |
|
532 |
+#' second column must be named \code{"value"}. If using wrapper functions for feature |
|
533 |
+#' selection in this package, the feature selection name will automatically be |
|
534 |
+#' generated and therefore it is not necessary to specify it.} |
|
535 |
+#' \item{\code{minPresence}}{If a list of functions was provided, how many of |
|
536 |
+#' those must a feature have been selected by to be used in classification. 1 |
|
537 |
+#' is equivalent to a set union and a number the same length as |
|
538 |
+#' \code{featureSelection} is equivalent to set intersection.} |
|
539 |
+#' \item{\code{intermediate}}{Character vector. Names of any variables created |
|
540 |
+#' in prior stages by \code{\link{runTest}} that need to be passed to a feature |
|
541 |
+#' selection function.} |
|
542 |
+#' \item{\code{subsetToSelections}}{Whether to subset the data table(s), after feature selection has been done.} |
|
543 |
+#' \item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of |
|
544 |
+#' the list are the names of the parameters and the vectors are the values of the parameters to try. All possible |
|
545 |
+#' combinations are generated. Two elements named \code{nFeatures} and \code{performanceType} are mandatory, to |
|
546 |
+#' define the performance metric which will be used to select features and how many top-ranked features to try.} |
|
547 |
+#' \item{\code{...}}{Other named parameters which will be used by the |
|
548 |
+#' selection function. If \code{featureSelection} was a list of functions, |
|
549 |
+#' this must be a list of lists, as long as \code{featureSelection}.} } } } |
|
550 |
+#' @section Summary: |
|
551 |
+#' \code{selectParams} is a \code{SelectParams} object. |
|
552 |
+#' \describe{ |
|
553 |
+#' \item{}{ |
|
554 |
+#' \code{show(SelectParams)}: Prints a short summary of what \code{selectParams} contains. |
|
555 |
+#' }} |
|
556 |
+#' @author Dario Strbenac |
|
557 |
+#' @examples |
|
558 |
+#' |
|
559 |
+#' #if(require(sparsediscrim)) |
|
560 |
+#' #{ |
|
561 |
+#' SelectParams("KS") |
|
562 |
+#' |
|
563 |
+#' # Ensemble feature selection. |
|
564 |
+#' SelectParams(list("Bartlett", "Levene")) |
|
565 |
+#' #} |
|
566 |
+#' |
|
567 |
+#' @export |
|
568 |
+#' @usage NULL |
|
407 | 569 |
setGeneric("SelectParams", function(featureRanking, ...) |
408 | 570 |
standardGeneric("SelectParams")) |
409 | 571 |
|
410 |
-# Default constructor. |
|
411 |
-setMethod("SelectParams", "missing", function() |
|
412 |
-{ |
|
413 |
- new("SelectParams", featureRanking = differentMeansRanking, |
|
414 |
- characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "Difference in Means"), |
|
415 |
- minPresence = 1, intermediate = character(0), subsetToSelections = TRUE, |
|
416 |
- tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error")) |
|
417 |
-}) |
|
418 |
- |
|
419 |
-setMethod("SelectParams", c("functionOrList"), |
|
572 |
+#' @rdname SelectParams-class |
|
573 |
+#' @usage NULL |
|
574 |
+#' @export |
|
575 |
+setMethod("SelectParams", c("characterOrList"), |
|
420 | 576 |
function(featureRanking, characteristics = DataFrame(), minPresence = 1, |
421 | 577 |
intermediate = character(0), subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...) |
422 | 578 |
{ |
579 |
+ if(is.character(featureRanking)) featureRanking <- .selectionKeywordToFunction(featureRanking) else featureRanking <- lapply(featureRanking, .selectionKeywordToFunction) |
|
423 | 580 |
if(!is.list(featureRanking) && (ncol(characteristics) == 0 || !"Selection Name" %in% characteristics[, "characteristic"])) |
424 | 581 |
{ |
425 | 582 |
characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Selection Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(featureRanking, "name"), "name"])) |
... | ... |
@@ -437,10 +594,8 @@ setMethod("SelectParams", c("functionOrList"), |
437 | 594 |
tuneParams = tuneParams, otherParams = others) |
438 | 595 |
}) |
439 | 596 |
|
440 |
-#' Container for Storing Details of Feature Selection Function(s) |
|
441 |
-#' |
|
597 |
+#' @usage NULL |
|
442 | 598 |
#' @rdname SelectParams-class |
443 |
-#' @param object An object of class \code{SelectParams} to inspect. |
|
444 | 599 |
#' @export |
445 | 600 |
setMethod("show", "SelectParams", |
446 | 601 |
function(object) |
... | ... |
@@ -463,11 +618,9 @@ setMethod("show", "SelectParams", |
463 | 618 |
} |
464 | 619 |
}) |
465 | 620 |
|
466 |
- |
|
467 |
- |
|
468 |
- |
|
469 | 621 |
##### TrainParams ##### |
470 | 622 |
|
623 |
+#' @exportClass TrainParams |
|
471 | 624 |
setClass("TrainParams", representation( |
472 | 625 |
classifier = "function", |
473 | 626 |
characteristics = "DataFrame", |
... | ... |
@@ -476,19 +629,65 @@ setClass("TrainParams", representation( |
476 | 629 |
otherParams = "listOrNULL", |
477 | 630 |
getFeatures = "functionOrNULL"), contains = "StageParams") |
478 | 631 |
|
479 |
-# Parameters for Classifier Training |
|
632 |
+#' Parameters for Classifier Training |
|
633 |
+#' |
|
634 |
+#' Collects and checks necessary parameters required for classifier training. |
|
635 |
+#' The empty constructor is provided for convenience. |
|
636 |
+#' |
|
637 |
+#' @name TrainParams |
|
638 |
+#' @rdname TrainParams-class |
|
639 |
+#' @aliases TrainParams TrainParams-class TrainParams,missing-method |
|
640 |
+#' TrainParams,characterOrFunction-method show,TrainParams-method |
|
641 |
+#' @docType class |
|
642 |
+#' @section Constructor: |
|
643 |
+#' \describe{ |
|
644 |
+#' \item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(), |
|
645 |
+#' intermediate = character(0), getFeatures = NULL, ...)} |
|
646 |
+#' Creates a \code{TrainParams} object which stores the function which will do the |
|
647 |
+#' classifier building and parameters that the function will use. |
|
648 |
+#' \describe{ |
|
649 |
+#' \item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
|
650 |
+#' for valid keywords.} |
|
651 |
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the |
|
652 |
+#' characteristics of the classifier used. First column must be named \code{"charateristic"} |
|
653 |
+#' and second column must be named \code{"value"}. If using wrapper functions for classifiers |
|
654 |
+#' in this package, a classifier name will automatically be generated and |
|
655 |
+#' therefore it is not necessary to specify it.} |
|
656 |
+#' \item{\code{intermediate}}{Character vector. Names of any variables created |
|
657 |
+#' in prior stages by \code{\link{runTest}} that need to be passed to |
|
658 |
+#' \code{classifier}.} |
|
659 |
+#' \item{\code{getFeatures}}{A function may be specified that extracts the selected |
|
660 |
+#' features from the trained model. This is relevant if using a classifier that does |
|
661 |
+#' feature selection within training (e.g. random forest). The function must return a |
|
662 |
+#' list of two vectors. The first vector contains the ranked features (or empty if the |
|
663 |
+#' training algorithm doesn't produce rankings) and the second vector contains the selected |
|
664 |
+#' features.} |
|
665 |
+#' \item{\code{...}}{Other named parameters which will be used by the classifier.} } } } |
|
666 |
+#' @section Summary: |
|
667 |
+#' \code{trainParams} is a \code{TrainParams} object. |
|
668 |
+#' \describe{ |
|
669 |
+#' \item{}{ |
|
670 |
+#' \code{show(trainParams)}: Prints a short summary of what \code{trainParams} contains. |
|
671 |
+#' }} |
|
672 |
+#' @author Dario Strbenac |
|
673 |
+#' @examples |
|
674 |
+#' |
|
675 |
+#' #if(require(sparsediscrim)) |
|
676 |
+#' trainParams <- TrainParams("DLDA") |
|
677 |
+#' |
|
678 |
+#' @usage NULL |
|
679 |
+#' @export |
|
480 | 680 |
setGeneric("TrainParams", function(classifier, ...) standardGeneric("TrainParams")) |
681 |
+setClassUnion("characterOrFunction", c("character", "function")) |
|
481 | 682 |
|
482 |
-setMethod("TrainParams", "missing", function() |
|
483 |
-{ |
|
484 |
- new("TrainParams", classifier = DLDAtrainInterface, |
|
485 |
- characteristics = S4Vectors::DataFrame(characteristic = "Classifier Name", value = "Diagonal LDA"), |
|
486 |
- intermediate = character(0), getFeatures = NULL) |
|
487 |
-}) |
|
488 |
- |
|
489 |
-setMethod("TrainParams", c("function"), |
|
683 |
+#' @usage NULL |
|
684 |
+#' @rdname TrainParams-class |
|
685 |
+#' @export |
|
686 |
+setMethod("TrainParams", c("characterOrFunction"), |
|
490 | 687 |
function(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(), intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...) |
491 | 688 |
{ |
689 |
+ if(is.character(classifier)) |
|
690 |
+ classifier <- .classifierKeywordToParams(classifier)[[1]]@classifier # Training function. |
|
492 | 691 |
if(ncol(characteristics) == 0 || !"Classifier Name" %in% characteristics[, "characteristic"]) |
493 | 692 |
{ |
494 | 693 |
characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Classifier Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(classifier, "name"), "name"])) |
... | ... |
@@ -498,11 +697,7 @@ setMethod("TrainParams", c("function"), |
498 | 697 |
otherParams = list(...)) |
499 | 698 |
}) |
500 | 699 |
|
501 |
-#' Inspect Model Training Details |
|
502 |
-#' |
|
503 |
-#' @rdname TrainParams-class |
|
504 |
-#' @param object An object of class \code{TrainParams} to inspect. |
|
505 |
-#' @export |
|
700 |
+#' @usage NULL |
|
506 | 701 |
setMethod("show", "TrainParams", |
507 | 702 |
function(object) |
508 | 703 |
{ |
... | ... |
@@ -523,39 +718,77 @@ setMethod("show", "TrainParams", |
523 | 718 |
|
524 | 719 |
##### PredictParams ##### |
525 | 720 |
|
721 |
+#' @exportClass PredictParams |
|
526 | 722 |
setClass("PredictParams", representation( |
527 |
- predictor = "functionOrNULL", |
|
723 |
+ predictor = "function", |
|
528 | 724 |
characteristics = "DataFrame", |
529 | 725 |
intermediate = "character", |
530 | 726 |
otherParams = "listOrNULL"), contains = "StageParams" |
531 | 727 |
) |
532 | 728 |
|
729 |
+#' Parameters for Classifier Prediction |
|
730 |
+#' |
|
731 |
+#' Collects the function to be used for making predictions and any associated |
|
732 |
+#' parameters. |
|
733 |
+#' |
|
734 |
+#' The function specified must return either a factor vector of class |
|
735 |
+#' predictions, or a numeric vector of scores for the second class, according |
|
736 |
+#' to the levels of the class vector of the input data set, or a data frame |
|
737 |
+#' which has two columns named class and score. |
|
738 |
+#' |
|
739 |
+#' |
|
740 |
+#' @name PredictParams |
|
741 |
+#' @rdname PredictParams-class |
|
742 |
+#' @aliases PredictParams PredictParams-class PredictParams,missing-method |
|
743 |
+#' PredictParams,characterOrFunction-method show,PredictParams-method |
|
744 |
+#' @docType class |
|
745 |
+#' @section Constructor: \describe{\item{}{ |
|
746 |
+#' \code{PredictParams(predictor, characteristics = DataFrame(), intermediate = |
|
747 |
+#' character(0), ...)} Creates a PredictParams object which stores the function |
|
748 |
+#' which will do the class prediction, if required, and parameters that the |
|
749 |
+#' function will use. If the training function also makes predictions, this |
|
750 |
+#' must be set to \code{NULL}.} |
|
751 |
+#' \describe{ \item{\code{predictor}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
|
752 |
+#' for valid keywords.} |
|
753 |
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing |
|
754 |
+#' the characteristics of the predictor function used. First column must be |
|
755 |
+#' named \code{"charateristic"} and second column must be named \code{"value"}.} |
|
756 |
+#' \item{\code{intermediate}}{Character vector. Names of any |
|
757 |
+#' variables created in prior stages in \code{\link{runTest}} that need to be |
|
758 |
+#' passed to the prediction function.} |
|
759 |
+#' \item{\code{...}}{Other arguments that \code{predictor} may use.} } } |
|
760 |
+#' @section Summary: |
|
761 |
+#' \code{predictParams} is a \code{PredictParams} object. |
|
762 |
+#' \describe{ |
|
763 |
+#' \item{}{ |
|
764 |
+#' \code{show(predictParams)}: Prints a short summary of what \code{predictParams} contains. |
|
765 |
+#' }} |
|
766 |
+#' @author Dario Strbenac |
|
767 |
+#' @examples |
|
768 |
+#' |
|
769 |
+#' # For prediction by trained object created by DLDA training function. |
|
770 |
+#' predictParams <- PredictParams("DLDA") |
|
771 |
+#' |
|
772 |
+#' @export |
|
773 |
+#' @usage NULL |
|
533 | 774 |
setGeneric("PredictParams", function(predictor, ...) |
534 | 775 |
standardGeneric("PredictParams")) |
535 | 776 |
|
536 |
-setMethod("PredictParams", "missing", function() |
|
537 |
-{ |
|
538 |
- new("PredictParams", predictor = DLDApredictInterface, |
|
539 |
- characteristics = S4Vectors::DataFrame(characteristic = "Predictor Name", value = "Diagonal LDA"), |
|
540 |
- intermediate = character(0), otherParams = NULL) |
|
541 |
-}) |
|
542 |
- |
|
543 |
-setMethod("PredictParams", c("functionOrNULL"), |
|
777 |
+#' @usage NULL |
|
778 |
+#' @rdname PredictParams-class |
|
779 |
+#' @export |
|
780 |
+setMethod("PredictParams", c("characterOrFunction"), |
|
544 | 781 |
function(predictor, characteristics = DataFrame(), intermediate = character(0), ...) |
545 | 782 |
{ |
546 |
- if(missing(predictor)) |
|
547 |
- stop("Either a function or NULL must be specified by 'predictor'.") |
|
783 |
+ if(is.character(predictor)) |
|
784 |
+ predictor <- .classifierKeywordToParams(predictor)[[2]]@predictor # Prediction function. |
|
548 | 785 |
others <- list(...) |
549 | 786 |
if(length(others) == 0) others <- NULL |
550 | 787 |
new("PredictParams", predictor = predictor, characteristics = characteristics, |
551 | 788 |
intermediate = intermediate, otherParams = others) |
552 | 789 |
}) |
553 | 790 |
|
554 |
-#' Inspect Prediction Function Details |
|
555 |
-#' |
|
556 |
-#' @rdname PredictParams-class |
|
557 |
-#' @param object An object of class \code{TrainParams} to inspect. |
|
558 |
-#' @export |
|
791 |
+#' @usage NULL |
|
559 | 792 |
setMethod("show", "PredictParams", |
560 | 793 |
function(object) |
561 | 794 |
{ |
... | ... |
@@ -581,6 +814,7 @@ setMethod("show", "PredictParams", |
581 | 814 |
|
582 | 815 |
setClassUnion("PredictParamsOrNULL", c("PredictParams", "NULL")) |
583 | 816 |
|
817 |
+#' @exportClass ModellingParams |
|
584 | 818 |
setClass("ModellingParams", representation( |
585 | 819 |
balancing = "character", |
586 | 820 |
transformParams = "TransformParamsOrNULL", |
... | ... |
@@ -590,9 +824,48 @@ setClass("ModellingParams", representation( |
590 | 824 |
doImportance = "logical" |
591 | 825 |
)) |
592 | 826 |
|
827 |
+##### ModellingParams ##### |
|
828 |
+ |
|
829 |
+#' Parameters for Data Modelling Specification |
|
830 |
+#' |
|
831 |
+#' Collects and checks necessary parameters required for data modelling. Apart |
|
832 |
+#' from data transfomation that needs to be done within cross-validation (e.g. |
|
833 |
+#' subtracting each observation from training set mean), feature selection, model training and |
|
834 |
+#' prediction, this container also stores a setting for class imbalance |
|
835 |
+#' rebalancing. |
|
836 |
+#' |
|
837 |
+#' @name ModellingParams |
|
838 |
+#' @rdname ModellingParams-class |
|
839 |
+#' @aliases ModellingParams ModellingParams-class |
|
840 |
+#' @docType class |
|
841 |
+#' @param balancing Default: "downsample". A character value specifying what kind |
|
842 |
+#' of class balancing to do, if any. |
|
843 |
+#' @param transformParams Parameters used for feature transformation inside of C.V. |
|
844 |
+#' specified by a \code{\link{TransformParams}} instance. Optional, can be \code{NULL}. |
|
845 |
+#' @param selectParams Parameters used during feature selection specified |
|
846 |
+#' by a \code{\link{SelectParams}} instance. By default, parameters for selection |
|
847 |
+#' based on differences in means of numeric data. Optional, can be \code{NULL}. |
|
848 |
+#' @param trainParams Parameters for model training specified by a \code{\link{TrainParams}} instance. |
|
849 |
+#' By default, uses diagonal LDA. |
|
850 |
+#' @param predictParams Parameters for model training specified by a \code{\link{PredictParams}} instance. |
|
851 |
+#' By default, uses diagonal LDA. |
|
852 |
+#' @param doImportance Default: \code{FALSE}. Whether or not to carry out removal of each feature, one at a time, which |
|
853 |
+#' was chosen and then retrain and model and predict the test set, to measure the change in performance metric. Can |
|
854 |
+#' also be set to TRUE, if required. Modelling run time will be noticeably longer. |
|
855 |
+#' @author Dario Strbenac |
|
856 |
+#' @examples |
|
857 |
+#' |
|
858 |
+#' #if(require(sparsediscrim)) |
|
859 |
+#' #{ |
|
860 |
+#' ModellingParams() # Default is differences in means selection and DLDA. |
|
861 |
+#' ModellingParams(selectParams = NULL, # No feature selection before training. |
|
862 |
+#' trainParams = TrainParams("randomForest"), |
|
863 |
+#' predictParams = PredictParams("randomForest")) |
|
864 |
+#' #} |
|
865 |
+#' @export |
|
593 | 866 |
ModellingParams <- function(balancing = c("downsample", "upsample", "none"), |
594 |
- transformParams = NULL, selectParams = SelectParams(), |
|
595 |
- trainParams = TrainParams(), predictParams = PredictParams(), |
|
867 |
+ transformParams = NULL, selectParams = SelectParams("t-test"), |
|
868 |
+ trainParams = TrainParams("DLDA"), predictParams = PredictParams("DLDA"), |
|
596 | 869 |
doImportance = FALSE) |
597 | 870 |
{ |
598 | 871 |
balancing <- match.arg(balancing) |
... | ... |
@@ -612,7 +885,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
612 | 885 |
#' classes, the identifiers of features selected for each fold of each |
613 | 886 |
#' permutation or each hold-out classification, and performance metrics such as |
614 | 887 |
#' error rates. This class is not intended to be created by the user. It is |
615 |
-#' created by \code{\link{crossValidate}}. |
|
888 |
+#' created by \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}}. |
|
616 | 889 |
#' |
617 | 890 |
#' @name ClassifyResult |
618 | 891 |
#' @rdname ClassifyResult-class |
... | ... |
@@ -731,6 +1004,8 @@ setMethod("ClassifyResult", c("DataFrame", "character"), |
731 | 1004 |
predictions = predictions, actualOutcome = actualOutcome, importance = importance, modellingParams = modellingParams, finalModel = finalModel) |
732 | 1005 |
}) |
733 | 1006 |
|
1007 |
+#' @usage NULL |
|
1008 |
+#' @export |
|
734 | 1009 |
setMethod("show", "ClassifyResult", function(object) |
735 | 1010 |
{ |
736 | 1011 |
cat("An object of class 'ClassifyResult'.\n") |
... | ... |
@@ -20,7 +20,8 @@ |
20 | 20 |
# Nice-looking names for feature selection and classification functions, to automatically use |
21 | 21 |
# in a variety of performance plots. |
22 | 22 |
.ClassifyRenvir[["functionsTable"]] <- matrix( |
23 |
- c("bartlettRanking", "Bartlett Test", |
|
23 |
+ c("subtractFromLocation", "Subtraction From Training Set Location", |
|
24 |
+ "bartlettRanking", "Bartlett Test", |
|
24 | 25 |
"classifyInterface", "Poisson LDA", |
25 | 26 |
"differentMeansRanking", "Difference in Means", |
26 | 27 |
"DLDAtrainInterface", "Diagonal LDA", |
... | ... |
@@ -43,7 +44,6 @@ |
43 | 44 |
"previousSelection", "Previous Selection", |
44 | 45 |
"previousTrained", "Previous Trained", |
45 | 46 |
"randomForestTrainInterface", "Random Forest", |
46 |
- "subtractFromLocation", "Location Subtraction", |
|
47 | 47 |
"SVMtrainInterface", "Support Vector Machine", |
48 | 48 |
"coxphTrainInterface", "Cox Proportional Hazards", |
49 | 49 |
"coxphRanking", "Cox Proportional Hazards", |
... | ... |
@@ -641,52 +641,22 @@ generateModellingParams <- function(assayIDs, |
641 | 641 |
if(!classifier %in% classifiers) |
642 | 642 |
stop(paste("Classifier must exactly match of these (be careful of case):", paste(classifiers, collapse = ", "))) |
643 | 643 |
|
644 |
- classifier <- switch( |
|
645 |
- classifier, |
|
646 |
- "randomForest" = RFparams(), |
|
647 |
- "randomSurvivalForest" = RSFparams(), |
|
648 |
- "GLM" = GLMparams(), |
|
649 |
- "elasticNetGLM" = elasticNetGLMparams(), |
|
650 |
- "SVM" = SVMparams(), |
|
651 |
- "DLDA" = DLDAparams(), |
|
652 |
- "naiveBayes" = naiveBayesParams(), |
|
653 |
- "mixturesNormals" = mixModelsParams(), |
|
654 |
- "kNN" = kNNparams(), |
|
655 |
- "CoxPH" = coxphParams(), |
|
656 |
- "CoxNet" = coxnetParams() |
|
657 |
- ) |
|
644 |
+ classifierParams <- .classifierKeywordToParams(classifier) |
|
658 | 645 |
|
659 | 646 |
selectionMethod <- unlist(selectionMethod) |
660 | 647 |
|
661 |
- selectionMethod <- ifelse(is.null(selectionMethod), |
|
662 |
- "none", |
|
663 |
- selectionMethod) |
|
664 |
- |
|
665 |
- selectionMethodParam <- switch( |
|
666 |
- selectionMethod, |
|
667 |
- "none" = NULL, |
|
668 |
- "t-test" = differentMeansRanking, |
|
669 |
- "limma" = limmaRanking, |
|
670 |
- "edgeR" = edgeRranking, |
|
671 |
- "Bartlett" = bartlettRanking, |
|
672 |
- "Levene" = leveneRanking, |
|
673 |
- "DMD" = DMDranking, |
|
674 |
- "likelihoodRatio" = likelihoodRatioRanking, |
|
675 |
- "KS" = KolmogorovSmirnovRanking, |
|
676 |
- "KL" = KullbackLeiblerRanking, |
|
677 |
- "CoxPH" = coxphRanking |
|
678 |
- ) |
|
648 |
+ selectionMethod <- ifelse(is.null(selectionMethod), "none", selectionMethod) |
|
679 | 649 |
|
680 | 650 |
selectParams = SelectParams( |
681 |
- selectionMethodParam, |
|
651 |
+ selectionMethod, |
|
682 | 652 |
tuneParams = list(nFeatures = nFeatures, performanceType = performanceType) |
683 | 653 |
) |
684 | 654 |
|
685 | 655 |
params <- ModellingParams( |
686 | 656 |
balancing = "none", |
687 | 657 |
selectParams = selectParams, |
688 |
- trainParams = classifier$trainParams, |
|
689 |
- predictParams = classifier$predictParams |
|
658 |
+ trainParams = classifierParams$trainParams, |
|
659 |
+ predictParams = classifierParams$predictParams |
|
690 | 660 |
) |
691 | 661 |
|
692 | 662 |
#if(multiViewMethod != "none") stop("I haven't implemented multiview yet.") |
... | ... |
@@ -743,7 +713,7 @@ generateMultiviewParams <- function(assayIDs, |
743 | 713 |
multiViewMethod = "none") |
744 | 714 |
|
745 | 715 |
# Update selectParams to use |
746 |
- params@selectParams <- SelectParams(selectMulti, |
|
716 |
+ params@selectParams <- SelectParams("selectMulti", |
|
747 | 717 |
params = paramsassays, |
748 | 718 |
characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "merge"), |
749 | 719 |
tuneParams = list(nFeatures = nFeatures[[1]], |
... | ... |
@@ -356,12 +356,12 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment", |
356 | 356 |
{ |
357 | 357 |
if(missing(useFeatures)) |
358 | 358 |
stop("'useFeatures' must be specified by the user.") |
359 |
- if(!all(useFeatures[, 1] %in% c(names(measurements), "sampleInfo"))) |
|
360 |
- stop("Some table names in 'useFeatures' are not assay names in 'measurements' or \"sampleInfo\".") |
|
359 |
+ if(!all(useFeatures[, 1] %in% c(names(measurements), "clinical"))) |
|
360 |
+ stop("Some table names in 'useFeatures' are not assay names in 'measurements' or \"clinical\".") |
|
361 | 361 |
|
362 |
- assaysuseFeatures <- useFeatures[useFeatures[, 1] != "sampleInfo", ] |
|
363 |
- sampleInfouseFeatures <- useFeatures[useFeatures[, 1] == "sampleInfo", ] |
|
364 |
- measurements <- measurements[assaysuseFeatures[, 2], , assaysuseFeatures[, 1]] |
|
362 |
+ assaysUseFeatures <- useFeatures[useFeatures[, 1] != "clinical", ] |
|
363 |
+ clinicalUseFeatures <- useFeatures[useFeatures[, 1] == "clinical", ] |
|
364 |
+ measurements <- measurements[assaysUseFeatures[, 2], , assaysUseFeatures[, 1]] |
|
365 | 365 |
classes <- MultiAssayExperiment::colData(measurements)[, classesColumn] |
366 | 366 |
|
367 | 367 |
if(!is.null(groupBy)) |
... | ... |
@@ -369,7 +369,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment", |
369 | 369 |
if(is.null(groupingName)) |
370 | 370 |
groupingName <- groupBy[2] |
371 | 371 |
groupingTable <- groupBy[1] |
372 |
- if(groupingTable == "sampleInfo") |
|
372 |
+ if(groupingTable == "clinical") |
|
373 | 373 |
{ |
374 | 374 |
groupBy <- MultiAssayExperiment::colData(measurements)[, groupBy[2]] |
375 | 375 |
} else { # One of the omics tables. |
... | ... |
@@ -387,7 +387,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment", |
387 | 387 |
MultiAssayExperiment::colData(measurements) <- MultiAssayExperiment::colData(measurements)[colnames(MultiAssayExperiment::colData(measurements)) %in% sampleInfouseFeatures[, 2]] |
388 | 388 |
measurements <- MultiAssayExperiment::wideFormat(measurements, colDataCols = seq_along(MultiAssayExperiment::colData(measurements)), check.names = FALSE, collapse = ':') |
389 | 389 |
measurements <- measurements[, -1, drop = FALSE] # Remove sample IDs. |
390 |
- S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(measurements)[, "sourceName"]) |
|
390 |
+ S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "clinical", S4Vectors::mcols(measurements)[, "sourceName"]) |
|
391 | 391 |
colnames(S4Vectors::mcols(measurements))[1] <- "assay" |
392 | 392 |
S4Vectors::mcols(measurements)[, "feature"] <- S4Vectors::mcols(measurements)[, "rowname"] |
393 | 393 |
missingIndices <- is.na(S4Vectors::mcols(measurements)[, "feature"]) |
... | ... |
@@ -1,8 +1,81 @@ |
1 |
-# Perform a Single Classification |
|
2 |
- |
|
1 |
+#' Perform a Single Classification |
|
2 |
+#' |
|
3 |
+#' For a data set of features and samples, the classification process is run. |
|
4 |
+#' It consists of data transformation, feature selection, classifier training |
|
5 |
+#' and testing. |
|
6 |
+#' |
|
7 |
+#' This function only performs one classification and prediction. See |
|
8 |
+#' \code{\link{runTests}} for a driver function that enables a number of |
|
9 |
+#' different cross-validation schemes to be applied and uses this function to |
|
10 |
+#' perform each iteration. |
|
11 |
+#' |
|
12 |
+#' @aliases runTest runTest,matrix-method runTest,DataFrame-method |
|
13 |
+#' runTest,MultiAssayExperiment-method |
|
14 |
+#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
15 |
+#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
16 |
+#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
17 |
+#' @param outcomeTrain Either a factor vector of classes, a \code{\link{Surv}} object, or |
|
18 |
+#' a character string, or vector of such strings, containing column name(s) of column(s) |
|
19 |
+#' containing either classes or time and event information about survival. |
|
20 |
+#' @param measurementsTest Same data type as \code{measurementsTrain}, but only the test |
|
21 |
+#' samples. |
|
22 |
+#' @param outcomeTest Same data type as \code{outcomeTrain}, but only the test |
|
23 |
+#' samples. |
|
24 |
+#' @param crossValParams An object of class \code{\link{CrossValParams}}, |
|
25 |
+#' specifying the kind of cross-validation to be done, if nested |
|
26 |
+#' cross-validation is used to tune any parameters. |
|
27 |
+#' @param modellingParams An object of class \code{\link{ModellingParams}}, |
|
28 |
+#' specifying the class rebalancing, transformation (if any), feature selection |
|
29 |
+#' (if any), training and prediction to be done on the data set. |
|
30 |
+#' @param targets If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
31 |
+#' names of the data tables to be used. \code{"clinical"} is also a valid value |
|
32 |
+#' and specifies that numeric variables from the clinical data table will be |
|
33 |
+#' used. |
|
34 |
+#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
35 |
+#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)} |
|
36 |
+#' that contain(s) the samples' outcome to use for prediction. |
|
37 |
+#' @param ... Variables not used by the \code{matrix} nor the |
|
38 |
+#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
39 |
+#' \code{DataFrame} method. |
|
40 |
+#' @param characteristics A \code{\link{DataFrame}} describing the |
|
41 |
+#' characteristics of the classification used. First column must be named |
|
42 |
+#' \code{"charateristic"} and second column must be named \code{"value"}. |
|
43 |
+#' Useful for automated plot annotation by plotting functions within this |
|
44 |
+#' package. Transformation, selection and prediction functions provided by |
|
45 |
+#' this package will cause the characteristics to be automatically determined |
|
46 |
+#' and this can be left blank. |
|
47 |
+#' @param verbose Default: 1. A number between 0 and 3 for the amount of |
|
48 |
+#' progress messages to give. A higher number will produce more messages as |
|
49 |
+#' more lower-level functions print messages. |
|
50 |
+#' @param .iteration Not to be set by a user. This value is used to keep track |
|
51 |
+#' of the cross-validation iteration, if called by \code{\link{runTests}}. |
|
52 |
+#' @return If called directly by the user rather than being used internally by |
|
53 |
+#' \code{\link{runTests}}, a \code{\link{ClassifyResult}} object. Otherwise a |
|
54 |
+#' list of different aspects of the result which is passed back to \code{\link{runTests}}. |
|
55 |
+#' @author Dario Strbenac |
|
56 |
+#' @examples |
|
57 |
+#' |
|
58 |
+#' #if(require(sparsediscrim)) |
|
59 |
+#' #{ |
|
60 |
+#' data(asthma) |
|
61 |
+#' tuneList <- list(nFeatures = seq(5, 25, 5), performanceType = "Balanced Error") |
|
62 |
+#' selectParams <- SelectParams(limmaRanking, tuneParams = tuneList) |
|
63 |
+#' modellingParams <- ModellingParams(selectParams = selectParams) |
|
64 |
+#' trainIndices <- seq(1, nrow(measurements), 2) |
|
65 |
+#' testIndices <- seq(2, nrow(measurements), 2) |
|
66 |
+#' |
|
67 |
+#' runTest(measurements[trainIndices, ], classes[trainIndices], |
|
68 |
+#' measurements[testIndices, ], classes[testIndices], modellingParams = modellingParams) |
|
69 |
+#' #} |
|
70 |
+#' |
|
71 |
+#' @importFrom S4Vectors do.call |
|
72 |
+#' @usage NULL |
|
73 |
+#' @export |
|
3 | 74 |
setGeneric("runTest", function(measurementsTrain, ...) |
4 | 75 |
standardGeneric("runTest")) |
5 | 76 |
|
77 |
+#' @rdname runTest |
|
78 |
+#' @export |
|
6 | 79 |
setMethod("runTest", "matrix", # Matrix of numeric measurements. |
7 | 80 |
function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, ...) |
8 | 81 |
{ |
... | ... |
@@ -13,6 +86,8 @@ setMethod("runTest", "matrix", # Matrix of numeric measurements. |
13 | 86 |
...) |
14 | 87 |
}) |
15 | 88 |
|
89 |
+#' @rdname runTest |
|
90 |
+#' @export |
|
16 | 91 |
setMethod("runTest", "DataFrame", # Sample information data or one of the other inputs, transformed. |
17 | 92 |
function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, |
18 | 93 |
crossValParams = CrossValParams(), # crossValParams might be used for tuning optimisation. |
... | ... |
@@ -256,6 +331,8 @@ input data. Autmomatically reducing to smaller number.") |
256 | 331 |
} |
257 | 332 |
}) |
258 | 333 |
|
334 |
+#' @rdname runTest |
|
335 |
+#' @export |
|
259 | 336 |
setMethod("runTest", c("MultiAssayExperiment"), |
260 | 337 |
function(measurementsTrain, measurementsTest, targets = names(measurements), outcomeColumns, ...) |
261 | 338 |
{ |
... | ... |
@@ -1,7 +1,67 @@ |
1 |
-# Reproducibly Run Various Kinds of Cross-Validation |
|
2 |
- |
|
1 |
+#' Reproducibly Run Various Kinds of Cross-Validation |
|
2 |
+#' |
|
3 |
+#' Enables doing classification schemes such as ordinary 10-fold, 100 |
|
4 |
+#' permutations 5-fold, and leave one out cross-validation. Processing in |
|
5 |
+#' parallel is possible by leveraging the package \code{\link{BiocParallel}}. |
|
6 |
+#' |
|
7 |
+#' |
|
8 |
+#' @aliases runTests runTests,matrix-method runTests,DataFrame-method |
|
9 |
+#' runTests,MultiAssayExperiment-method |
|
10 |
+#' @param measurements Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
11 |
+#' or \code{\link{MultiAssayExperiment}} containing all of the data. For a |
|
12 |
+#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns |
|
13 |
+#' are features. |
|
14 |
+#' @param outcome Either a factor vector of classes, a \code{\link{Surv}} object, or |
|
15 |
+#' a character string, or vector of such strings, containing column name(s) of column(s) |
|
16 |
+#' containing either classes or time and event information about survival. |
|
17 |
+#' @param crossValParams An object of class \code{\link{CrossValParams}}, |
|
18 |
+#' specifying the kind of cross-validation to be done. |
|
19 |
+#' @param modellingParams An object of class \code{\link{ModellingParams}}, |
|
20 |
+#' specifying the class rebalancing, transformation (if any), feature selection |
|
21 |
+#' (if any), training and prediction to be done on the data set. |
|
22 |
+#' @param characteristics A \code{\link{DataFrame}} describing the |
|
23 |
+#' characteristics of the classification used. First column must be named |
|
24 |
+#' \code{"charateristic"} and second column must be named \code{"value"}. |
|
25 |
+#' Useful for automated plot annotation by plotting functions within this |
|
26 |
+#' package. Transformation, selection and prediction functions provided by |
|
27 |
+#' this package will cause the characteristics to be automatically determined |
|
28 |
+#' and this can be left blank. |
|
29 |
+#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the |
|
30 |
+#' names of the data tables to be used. \code{"clinical"} is also a valid value |
|
31 |
+#' and specifies that the clinical data table will be used. |
|
32 |
+#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
33 |
+#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)} |
|
34 |
+#' that contain(s)s the samples' outcome to use for prediction. |
|
35 |
+#' @param ... Variables not used by the \code{matrix} nor the |
|
36 |
+#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
37 |
+#' \code{DataFrame} method. |
|
38 |
+#' @param verbose Default: 1. A number between 0 and 3 for the amount of |
|
39 |
+#' progress messages to give. A higher number will produce more messages as |
|
40 |
+#' more lower-level functions print messages. |
|
41 |
+#' @return An object of class \code{\link{ClassifyResult}}. |
|
42 |
+#' @author Dario Strbenac |
|
43 |
+#' @examples |
|
44 |
+#' |
|
45 |
+#' #if(require(sparsediscrim)) |
|
46 |
+#' #{ |
|
47 |
+#' data(asthma) |
|
48 |
+#' |
|
49 |
+#' CVparams <- CrossValParams(permutations = 5) |
|
50 |
+#' tuneList <- list(nFeatures = seq(5, 25, 5), performanceType = "Balanced Error") |
|
51 |
+#' selectParams <- SelectParams(differentMeansRanking, tuneParams = tuneList) |
|
52 |
+#' modellingParams <- ModellingParams(selectParams = selectParams) |
|
53 |
+#' runTests(measurements, classes, CVparams, modellingParams, |
|
54 |
+#' DataFrame(characteristic = c("Assay Name", "Classifier Name"), |
|
55 |
+#' value = c("Asthma", "Different Means")) |
|
56 |
+#' ) |
|
57 |
+#' #} |
|
58 |
+#' |
|
59 |
+#' @export |
|
60 |
+#' @usage NULL |
|
3 | 61 |
setGeneric("runTests", function(measurements, ...) standardGeneric("runTests")) |
4 | 62 |
|
63 |
+#' @rdname runTests |
|
64 |
+#' @export |
|
5 | 65 |
setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix of numeric measurements. |
6 | 66 |
{ |
7 | 67 |
if(is.null(rownames(measurements))) |
... | ... |
@@ -9,6 +69,8 @@ setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix |
9 | 69 |
runTests(S4Vectors::DataFrame(measurements, check.names = FALSE), outcome, ...) |
10 | 70 |
}) |
11 | 71 |
|
72 |
+#' @rdname runTests |
|
73 |
+#' @export |
|
12 | 74 |
setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParams = CrossValParams(), modellingParams = ModellingParams(), |
13 | 75 |
characteristics = S4Vectors::DataFrame(), verbose = 1) |
14 | 76 |
{ |
... | ... |
@@ -115,6 +177,8 @@ input data. Autmomatically reducing to smaller number.") |
115 | 177 |
lapply(results, "[[", "models"), tuneList, predictionsTable, outcome, importance, modellingParams) |
116 | 178 |
}) |
117 | 179 |
|
180 |
+#' @rdname runTests |
|
181 |
+#' @export |
|
118 | 182 |
setMethod("runTests", c("MultiAssayExperiment"), |
119 | 183 |
function(measurements, targets = names(measurements), outcomeColumns, ...) |
120 | 184 |
{ |
... | ... |
@@ -6,7 +6,7 @@ RFparams <- function() { |
6 | 6 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
7 | 7 |
} |
8 | 8 |
|
9 |
-# Random Surival Forest |
|
9 |
+# Random Survival Forest |
|
10 | 10 |
RSFparams <- function() { |
11 | 11 |
trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100))) |
12 | 12 |
predictParams <- PredictParams(rfsrcPredictInterface) |
... | ... |
@@ -33,4 +33,5 @@ subtractFromLocation <- function(measurementsTrain, measurementsTest, location = |
33 | 33 |
{if(absolute == TRUE) " and absolute transformation"}, " completed.") |
34 | 34 |
|
35 | 35 |
list(transformedTrain, transformedTest) |
36 |
-} |
|
37 | 36 |
\ No newline at end of file |
37 |
+} |
|
38 |
+attr(subtractFromLocation, "name") <- "subtractFromLocation" |
|
38 | 39 |
\ No newline at end of file |
... | ... |
@@ -529,6 +529,52 @@ |
529 | 529 |
list(measurementsTrain = measurementsTrain, classesTrain = classesTrain) |
530 | 530 |
} |
531 | 531 |
|
532 |
+.transformKeywordToFunction <- function(keyword) |
|
533 |
+{ |
|
534 |
+ switch( |
|
535 |
+ keyword, |
|
536 |
+ "none" = NULL, |
|
537 |
+ "diffLoc" = subtractFromLocation |
|
538 |
+ ) |
|
539 |
+} |
|
540 |
+ |
|
541 |
+.selectionKeywordToFunction <- function(keyword) |
|
542 |
+{ |
|
543 |
+ switch( |
|
544 |
+ keyword, |
|
545 |
+ "none" = NULL, |
|
546 |
+ "t-test" = differentMeansRanking, |
|
547 |
+ "limma" = limmaRanking, |
|
548 |
+ "edgeR" = edgeRranking, |
|
549 |
+ "Bartlett" = bartlettRanking, |
|
550 |
+ "Levene" = leveneRanking, |
|
551 |
+ "DMD" = DMDranking, |
|
552 |
+ "likelihoodRatio" = likelihoodRatioRanking, |
|
553 |
+ "KS" = KolmogorovSmirnovRanking, |
|
554 |
+ "KL" = KullbackLeiblerRanking, |
|
555 |
+ "CoxPH" = coxphRanking, |
|
556 |
+ "selectMulti" = selectMulti |
|
557 |
+ ) |
|
558 |
+} |
|
559 |
+ |
|
560 |
+.classifierKeywordToParams <- function(keyword) |
|
561 |
+{ |
|
562 |
+ switch( |
|
563 |
+ keyword, |
|
564 |
+ "randomForest" = RFparams(), |
|
565 |
+ "randomSurvivalForest" = RSFparams(), |
|
566 |
+ "GLM" = GLMparams(), |
|
567 |
+ "elasticNetGLM" = elasticNetGLMparams(), |
|
568 |
+ "SVM" = SVMparams(), |
|
569 |
+ "DLDA" = DLDAparams(), |
|
570 |
+ "naiveBayes" = naiveBayesParams(), |
|
571 |
+ "mixturesNormals" = mixModelsParams(), |
|
572 |
+ "kNN" = kNNparams(), |
|
573 |
+ "CoxPH" = coxphParams(), |
|
574 |
+ "CoxNet" = coxnetParams() |
|
575 |
+ ) |
|
576 |
+} |
|
577 |
+ |
|
532 | 578 |
.dlda <- function(x, y, prior = NULL){ # Remove this once sparsediscrim is reinstated to CRAN. |
533 | 579 |
obj <- list() |
534 | 580 |
obj$labels <- y |
... | ... |
@@ -33,7 +33,7 @@ Contains a list of models, table of actual sample classes and predicted |
33 | 33 |
classes, the identifiers of features selected for each fold of each |
34 | 34 |
permutation or each hold-out classification, and performance metrics such as |
35 | 35 |
error rates. This class is not intended to be created by the user. It is |
36 |
-created by \code{\link{crossValidate}}. |
|
36 |
+created by \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}}. |
|
37 | 37 |
} |
38 | 38 |
\section{Constructor}{ |
39 | 39 |
|
40 | 40 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,73 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/classes.R |
|
3 |
+\docType{class} |
|
4 |
+\name{CrossValParams} |
|
5 |
+\alias{CrossValParams} |
|
6 |
+\alias{CrossValParams-class} |
|
7 |
+\title{Parameters for Cross-validation Specification} |
|
8 |
+\usage{ |
|
9 |
+CrossValParams( |
|
10 |
+ samplesSplits = c("Permute k-Fold", "Permute Percentage Split", "Leave-k-Out", |
|
11 |
+ "k-Fold"), |
|
12 |
+ permutations = 100, |
|
13 |
+ percentTest = 25, |
|
14 |
+ folds = 5, |
|
15 |
+ leave = 2, |
|
16 |
+ tuneMode = c("Resubstitution", "Nested CV", "none"), |
|
17 |
+ adaptiveResamplingDelta = NULL, |
|
18 |
+ parallelParams = bpparam() |
|
19 |
+) |
|
20 |
+} |
|
21 |
+\arguments{ |
|
22 |
+\item{samplesSplits}{Default: "Permute k-Fold". A character value |
|
23 |
+specifying what kind of sample splitting to do.} |
|
24 |
+ |
|
25 |
+\item{permutations}{Default: 100. Number of times to permute the |
|
26 |
+data set before it is split into training and test sets. Only relevant if |
|
27 |
+\code{samplesSplits} is either \code{"Permute k-Fold"} or \code{"Permute |
|
28 |
+Percentage Split"}.} |
|
29 |
+ |
|
30 |
+\item{percentTest}{The percentage of the data |
|
31 |
+set to assign to the test set, with the remainder of the samples belonging |
|
32 |
+to the training set. Only relevant if \code{samplesSplits} is \code{"Permute |
|
33 |
+Percentage Split"}.} |
|
34 |
+ |
|
35 |
+\item{folds}{The number of approximately equal-sized folds to partition |
|
36 |
+the samples into. Only relevant if \code{samplesSplits} is \code{"Permute k-Fold"} |
|
37 |
+or \code{"k-Fold"}.} |
|
38 |
+ |
|
39 |
+\item{leave}{The number of samples to generate all possible |
|
40 |
+combination of and use as the test set. Only relevant if \code{samplesSplits} is |
|
41 |
+\code{"Leave-k-Out"}. If set to 1, it is the traditional leave-one-out cross-validation, |
|
42 |
+sometimes written as LOOCV.} |
|
43 |
+ |
|
44 |
+\item{tuneMode}{Default: Resubstitution. The scheme to use for selecting any tuning parameters.} |
|
45 |
+ |
|
46 |
+\item{adaptiveResamplingDelta}{Default: \code{NULL}. If not null, adaptive resampling of training |
|
47 |
+samples is performed and this number is the difference in consecutive iterations that the |
|
48 |
+class probability or risk of all samples must change less than for the iterative process to stop. 0.01 |
|
49 |
+was used in the original publication.} |
|
50 |
+ |
|
51 |
+\item{parallelParams}{An instance of \code{\link{BiocParallelParam}} specifying |
|
52 |
+the kind of parallelisation to use. Default is to use two cores less than the total number of |
|
53 |
+cores the computer has, if it has four or more cores, otherwise one core, as is the |
|
54 |
+default of \code{\link{bpparam}}. To make results fully reproducible, please |
|
55 |
+choose a specific back-end depending on your operating system and also set |
|
56 |
+\code{RNGseed} to a number.} |
|
57 |
+} |
|
58 |
+\description{ |
|
59 |
+Collects and checks necessary parameters required for cross-validation by |
|
60 |
+\code{\link{runTests}}. |
|
61 |
+} |
|
62 |
+\examples{ |
|
63 |
+ |
|
64 |
+ CrossValParams() # Default is 100 permutations and 5 folds of each. |
|
65 |
+ snow <- SnowParam(workers = 4, RNGseed = 999) |
|
66 |
+ CrossValParams("Leave-k-Out", leave = 2, parallelParams = snow) |
|
67 |
+ # Fully reproducible Leave-2-out cross-validation on 4 cores, |
|
68 |
+ # even if feature selection or classifier use random sampling. |
|
69 |
+ |
|
70 |
+} |
|
71 |
+\author{ |
|
72 |
+Dario Strbenac |
|
73 |
+} |
0 | 74 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,58 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/classes.R |
|
3 |
+\docType{class} |
|
4 |
+\name{ModellingParams} |
|
5 |
+\alias{ModellingParams} |
|
6 |
+\alias{ModellingParams-class} |
|
7 |
+\title{Parameters for Data Modelling Specification} |
|
8 |
+\usage{ |
|
9 |
+ModellingParams( |
|
10 |
+ balancing = c("downsample", "upsample", "none"), |
|
11 |
+ transformParams = NULL, |
|
12 |
+ selectParams = SelectParams("t-test"), |
|
13 |
+ trainParams = TrainParams("DLDA"), |
|
14 |
+ predictParams = PredictParams("DLDA"), |
|
15 |
+ doImportance = FALSE |
|
16 |
+) |
|
17 |
+} |
|
18 |
+\arguments{ |
|
19 |
+\item{balancing}{Default: "downsample". A character value specifying what kind |
|
20 |
+of class balancing to do, if any.} |
|
21 |
+ |
|
22 |
+\item{transformParams}{Parameters used for feature transformation inside of C.V. |
|
23 |
+specified by a \code{\link{TransformParams}} instance. Optional, can be \code{NULL}.} |
|
24 |
+ |
|
25 |
+\item{selectParams}{Parameters used during feature selection specified |
|
26 |
+by a \code{\link{SelectParams}} instance. By default, parameters for selection |
|
27 |
+based on differences in means of numeric data. Optional, can be \code{NULL}.} |
|
28 |
+ |
|
29 |
+\item{trainParams}{Parameters for model training specified by a \code{\link{TrainParams}} instance. |
|
30 |
+By default, uses diagonal LDA.} |
|
31 |
+ |
|
32 |
+\item{predictParams}{Parameters for model training specified by a \code{\link{PredictParams}} instance. |
|
33 |
+By default, uses diagonal LDA.} |
|
34 |
+ |
|
35 |
+\item{doImportance}{Default: \code{FALSE}. Whether or not to carry out removal of each feature, one at a time, which |
|
36 |
+was chosen and then retrain and model and predict the test set, to measure the change in performance metric. Can |
|
37 |
+also be set to TRUE, if required. Modelling run time will be noticeably longer.} |
|
38 |
+} |
|
39 |
+\description{ |
|
40 |
+Collects and checks necessary parameters required for data modelling. Apart |
|
41 |
+from data transfomation that needs to be done within cross-validation (e.g. |
|
42 |
+subtracting each observation from training set mean), feature selection, model training and |
|
43 |
+prediction, this container also stores a setting for class imbalance |
|
44 |
+rebalancing. |
|
45 |
+} |
|
46 |
+\examples{ |
|
47 |
+ |
|
48 |
+ #if(require(sparsediscrim)) |
|
49 |
+ #{ |
|
50 |
+ ModellingParams() # Default is differences in means selection and DLDA. |
|
51 |
+ ModellingParams(selectParams = NULL, # No feature selection before training. |
|
52 |
+ trainParams = TrainParams("randomForest"), |
|
53 |
+ predictParams = PredictParams("randomForest")) |
|
54 |
+ #} |
|
55 |
+} |
|
56 |
+\author{ |
|
57 |
+Dario Strbenac |
|
58 |
+} |
... | ... |
@@ -1,14 +1,56 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/classes.R |
3 |
-\name{show,PredictParams-method} |
|
3 |
+\docType{class} |
|
4 |
+\name{PredictParams} |
|
5 |
+\alias{PredictParams} |
|
6 |
+\alias{PredictParams-class} |
|
7 |
+\alias{PredictParams,missing-method} |
|
8 |
+\alias{PredictParams,characterOrFunction-method} |
|
4 | 9 |
\alias{show,PredictParams-method} |
5 |
-\title{Inspect Prediction Function Details} |
|
6 |
-\usage{ |
|
7 |
-\S4method{show}{PredictParams}(object) |
|
10 |
+\title{Parameters for Classifier Prediction} |
|
11 |
+\description{ |
|
12 |
+Collects the function to be used for making predictions and any associated |
|
13 |
+parameters. |
|
8 | 14 |
} |
9 |
-\arguments{ |
|
10 |
-\item{object}{An object of class \code{TrainParams} to inspect.} |
|
15 |
+\details{ |
|
16 |
+The function specified must return either a factor vector of class |
|
17 |
+predictions, or a numeric vector of scores for the second class, according |
|
18 |
+to the levels of the class vector of the input data set, or a data frame |
|
19 |
+which has two columns named class and score. |
|
11 | 20 |
} |
12 |
-\description{ |
|
13 |
-Inspect Prediction Function Details |
|
21 |
+\section{Constructor}{ |
|
22 |
+ \describe{\item{}{ |
|
23 |
+\code{PredictParams(predictor, characteristics = DataFrame(), intermediate = |
|
24 |
+character(0), ...)} Creates a PredictParams object which stores the function |
|
25 |
+which will do the class prediction, if required, and parameters that the |
|
26 |
+function will use. If the training function also makes predictions, this |
|
27 |
+must be set to \code{NULL}.} |
|
28 |
+\describe{ \item{\code{predictor}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
|
29 |
+for valid keywords.} |
|
30 |
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing |
|
31 |
+the characteristics of the predictor function used. First column must be |
|
32 |
+named \code{"charateristic"} and second column must be named \code{"value"}.} |
|
33 |
+\item{\code{intermediate}}{Character vector. Names of any |
|
34 |
+variables created in prior stages in \code{\link{runTest}} that need to be |
|
35 |
+passed to the prediction function.} |
|
36 |
+\item{\code{...}}{Other arguments that \code{predictor} may use.} } } |
|
37 |
+} |
|
38 |
+ |
|
39 |
+\section{Summary}{ |
|
40 |
+ |
|
41 |
+\code{predictParams} is a \code{PredictParams} object. |
|
42 |
+\describe{ |
|
43 |
+\item{}{ |
|
44 |
+ \code{show(predictParams)}: Prints a short summary of what \code{predictParams} contains. |
|
45 |
+}} |
|
46 |
+} |
|
47 |
+ |
|
48 |
+\examples{ |
|
49 |
+ |
|
50 |
+# For prediction by trained object created by DLDA training function. |
|
51 |
+predictParams <- PredictParams("DLDA") |
|
52 |
+ |
|
53 |
+} |
|
54 |
+\author{ |
|
55 |
+Dario Strbenac |
|
14 | 56 |
} |
... | ... |
@@ -1,14 +1,69 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/classes.R |
3 |
-\name{show,SelectParams-method} |
|
3 |
+\docType{class} |
|
4 |
+\name{SelectParams} |
|
5 |
+\alias{SelectParams} |
|
6 |
+\alias{SelectParams-class} |
|
7 |
+\alias{SelectParams,missing-method} |
|
8 |
+\alias{SelectParams,characterOrList-method} |
|
4 | 9 |
\alias{show,SelectParams-method} |
5 |
-\title{Container for Storing Details of Feature Selection Function(s)} |
|
6 |
-\usage{ |
|
7 |
-\S4method{show}{SelectParams}(object) |
|
10 |
+\title{Parameters for Feature Selection} |
|
11 |
+\description{ |
|
12 |
+Collects and checks necessary parameters required for feature selection. |
|
13 |
+Either one function is specified or a list of functions to perform ensemble |
|
14 |
+feature selection. The empty constructor is provided for convenience. |
|
8 | 15 |
} |
9 |
-\arguments{ |
|
10 |
-\item{object}{An object of class \code{SelectParams} to inspect.} |
|
16 |
+\section{Constructor}{ |
|
17 |
+ |
|
18 |
+\describe{ |
|
19 |
+\item{}{\preformatted{SelectParams(featureRanking, characteristics = DataFrame(), minPresence = 1, intermediate = character(0), |
|
20 |
+subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...)} Creates a \code{SelectParams} |
|
21 |
+object which stores the function(s) which will do the selection and parameters that the |
|
22 |
+function will use. |
|
23 |
+\describe{\item{\code{featureRanking}}{A character keyword referring to a registered feature ranking function. See \code{\link{available}} |
|
24 |
+for valid keywords.} |
|
25 |
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing the characteristics |
|
26 |
+of feature selection to be done. First column must be named \code{"charateristic"} and |
|
27 |
+second column must be named \code{"value"}. If using wrapper functions for feature |
|
28 |
+selection in this package, the feature selection name will automatically be |
|
29 |
+generated and therefore it is not necessary to specify it.} |
|
30 |
+\item{\code{minPresence}}{If a list of functions was provided, how many of |
|
31 |
+those must a feature have been selected by to be used in classification. 1 |
|
32 |
+is equivalent to a set union and a number the same length as |
|
33 |
+\code{featureSelection} is equivalent to set intersection.} |
|
34 |
+\item{\code{intermediate}}{Character vector. Names of any variables created |
|
35 |
+in prior stages by \code{\link{runTest}} that need to be passed to a feature |
|
36 |
+selection function.} |
|
37 |
+\item{\code{subsetToSelections}}{Whether to subset the data table(s), after feature selection has been done.} |
|
38 |
+\item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of |
|
39 |
+the list are the names of the parameters and the vectors are the values of the parameters to try. All possible |
|
40 |
+combinations are generated. Two elements named \code{nFeatures} and \code{performanceType} are mandatory, to |
|
41 |
+define the performance metric which will be used to select features and how many top-ranked features to try.} |
|
42 |
+\item{\code{...}}{Other named parameters which will be used by the |
|
43 |
+selection function. If \code{featureSelection} was a list of functions, |
|
44 |
+this must be a list of lists, as long as \code{featureSelection}.} } } } |
|
11 | 45 |
} |
12 |
-\description{ |
|
13 |
-Container for Storing Details of Feature Selection Function(s) |
|
46 |
+ |
|
47 |
+\section{Summary}{ |
|
48 |
+ |
|
49 |
+\code{selectParams} is a \code{SelectParams} object. |
|
50 |
+\describe{ |
|
51 |
+\item{}{ |
|
52 |
+ \code{show(SelectParams)}: Prints a short summary of what \code{selectParams} contains. |
|
53 |
+}} |
|
54 |
+} |
|
55 |
+ |
|
56 |
+\examples{ |
|
57 |
+ |
|
58 |
+ #if(require(sparsediscrim)) |
|
59 |
+ #{ |
|
60 |
+ SelectParams("KS") |
|
61 |
+ |
|
62 |
+ # Ensemble feature selection. |
|
63 |
+ SelectParams(list("Bartlett", "Levene")) |
|
64 |
+ #} |
|
65 |
+ |
|
66 |
+} |
|
67 |
+\author{ |
|
68 |
+Dario Strbenac |
|
14 | 69 |
} |
... | ... |
@@ -1,14 +1,59 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/classes.R |
3 |
-\name{show,TrainParams-method} |
|
3 |
+\docType{class} |
|
4 |
+\name{TrainParams} |
|
5 |
+\alias{TrainParams} |
|
6 |
+\alias{TrainParams-class} |
|
7 |
+\alias{TrainParams,missing-method} |
|
8 |
+\alias{TrainParams,characterOrFunction-method} |
|
4 | 9 |
\alias{show,TrainParams-method} |
5 |
-\title{Inspect Model Training Details} |
|
6 |
-\usage{ |
|
7 |
-\S4method{show}{TrainParams}(object) |
|
10 |
+\title{Parameters for Classifier Training} |
|
11 |
+\description{ |
|
12 |
+Collects and checks necessary parameters required for classifier training. |
|
13 |
+The empty constructor is provided for convenience. |
|
8 | 14 |
} |
9 |
-\arguments{ |
|
10 |
-\item{object}{An object of class \code{TrainParams} to inspect.} |
|
15 |
+\section{Constructor}{ |
|
16 |
+ |
|
17 |
+\describe{ |
|
18 |
+\item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(), |
|
19 |
+intermediate = character(0), getFeatures = NULL, ...)} |
|
20 |
+Creates a \code{TrainParams} object which stores the function which will do the |
|
21 |
+classifier building and parameters that the function will use. |
|
22 |
+\describe{ |
|
23 |
+\item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
|
24 |
+for valid keywords.} |
|
25 |
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing the |
|
26 |
+characteristics of the classifier used. First column must be named \code{"charateristic"} |
|
27 |
+and second column must be named \code{"value"}. If using wrapper functions for classifiers |
|
28 |
+in this package, a classifier name will automatically be generated and |
|
29 |
+therefore it is not necessary to specify it.} |
|
30 |
+\item{\code{intermediate}}{Character vector. Names of any variables created |
|
31 |
+in prior stages by \code{\link{runTest}} that need to be passed to |
|
32 |
+\code{classifier}.} |
|
33 |
+\item{\code{getFeatures}}{A function may be specified that extracts the selected |
|
34 |
+features from the trained model. This is relevant if using a classifier that does |
|
35 |
+feature selection within training (e.g. random forest). The function must return a |