Browse code

- Restored runTest, runTests, ModellingParams, CrossValParams as public documented functions. Vignette also restored to explain them. - Constructors for params now expect a character keyword which is then converted into a function internally.

Dario Strbenac authored on 21/08/2022 16:07:38
Showing 24 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.12
7
-Date: 2022-08-02
6
+Version: 3.1.13
7
+Date: 2022-08-21
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -1,8 +1,14 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(ClassifyResult)
4
+export(CrossValParams)
4 5
 export(FeatureSetCollection)
6
+export(ModellingParams)
7
+export(PredictParams)
5 8
 export(ROCplot)
9
+export(SelectParams)
10
+export(TrainParams)
11
+export(TransformParams)
6 12
 export(actualOutcome)
7 13
 export(allFeatureNames)
8 14
 export(available)
... ...
@@ -24,18 +30,30 @@ export(plotFeatureClasses)
24 30
 export(predictions)
25 31
 export(prepareData)
26 32
 export(rankingPlot)
33
+export(runTest)
34
+export(runTests)
27 35
 export(sampleNames)
28 36
 export(samplesMetricMap)
29 37
 export(selectionPlot)
30 38
 export(totalPredictions)
31 39
 export(tunedParameters)
32 40
 exportClasses(ClassifyResult)
41
+exportClasses(CrossValParams)
33 42
 exportClasses(FeatureSetCollection)
43
+exportClasses(ModellingParams)
44
+exportClasses(PredictParams)
45
+exportClasses(SelectParams)
46
+exportClasses(TrainParams)
47
+exportClasses(TransformParams)
34 48
 exportMethods("[")
35 49
 exportMethods("[[")
36 50
 exportMethods(ClassifyResult)
37 51
 exportMethods(FeatureSetCollection)
52
+exportMethods(PredictParams)
38 53
 exportMethods(ROCplot)
54
+exportMethods(SelectParams)
55
+exportMethods(TrainParams)
56
+exportMethods(TransformParams)
39 57
 exportMethods(actualOutcome)
40 58
 exportMethods(allFeatureNames)
41 59
 exportMethods(calcCVperformance)
... ...
@@ -55,6 +73,8 @@ exportMethods(predict)
55 73
 exportMethods(predictions)
56 74
 exportMethods(prepareData)
57 75
 exportMethods(rankingPlot)
76
+exportMethods(runTest)
77
+exportMethods(runTests)
58 78
 exportMethods(sampleNames)
59 79
 exportMethods(samplesMetricMap)
60 80
 exportMethods(selectionPlot)
... ...
@@ -65,6 +85,7 @@ import(BiocParallel)
65 85
 import(grid)
66 86
 import(utils)
67 87
 importFrom(S4Vectors,as.data.frame)
88
+importFrom(S4Vectors,do.call)
68 89
 importFrom(dplyr,mutate)
69 90
 importFrom(dplyr,n)
70 91
 importFrom(rlang,sym)
... ...
@@ -5,14 +5,14 @@
5 5
 #' known classes and a vector of predicted classes determined outside of the
6 6
 #' ClassifyR package, a single metric value is calculated. If
7 7
 #' \code{calcCVperformance} is used, annotates the results of calling
8
-#' \code{\link{crossValidate}} with one of the user-specified performance measures.
8
+#' \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}} with one of the user-specified performance measures.
9 9
 #' 
10 10
 #' All metrics except Matthews Correlation Coefficient are suitable for
11 11
 #' evaluating classification scenarios with more than two classes and are
12 12
 #' reimplementations of those available from Intel DAAL.
13 13
 #' 
14
-#' If \code{\link{crossValidate}} was run in resampling mode, one performance
15
-#' measure is produced for every resampling. If the leave-k-out mode was used,
14
+#' \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}} was run in resampling mode, one performance
15
+#' measure is produced for every resampling. Otherwise, if the leave-k-out mode was used,
16 16
 #' then the predictions are concatenated, and one performance measure is
17 17
 #' calculated for all classifications.
18 18
 #' 
... ...
@@ -53,9 +53,8 @@ setOldClass("rfsrc")
53 53
 # Union of A Function and NULL
54 54
 setClassUnion("functionOrNULL", c("function", "NULL"))
55 55
 
56
-# Union of Functions and List of Functions. Useful for allowing ensemble feature selection.
57
-setClassUnion("functionOrList", c("function", "list"))
58
-
56
+# Union of a Function and a List of Functions. Useful for allowing ensemble feature selection.
57
+setClassUnion("functionOrChraracterOrList", c("function", "character", "list"))
59 58
 
60 59
 # Union of A Numeric Value and NULL
61 60
 setClassUnion("numericOrNULL", c("numeric", "NULL"))
... ...
@@ -63,7 +62,7 @@ setClassUnion("numericOrNULL", c("numeric", "NULL"))
63 62
 # Union of a Character and a DataFrame
64 63
 setClassUnion("characterOrDataFrame", c("character", "DataFrame"))
65 64
 
66
-# Union of a Surv class and a factor
65
+# Union of a Surv class and a factor for flexibility with sample outcome
67 66
 setClassUnion("factorOrSurv", c("factor", "Surv"))
68 67
 
69 68
 # Union of a List and NULL
... ...
@@ -82,7 +81,56 @@ setClassUnion("DataFrameOrNULL", c("DataFrame", "NULL"))
82 81
 ##### CrossValParams #####
83 82
 
84 83
 # Parameters for Cross-validation Specification
85
-
84
+#' Parameters for Cross-validation Specification
85
+#' 
86
+#' Collects and checks necessary parameters required for cross-validation by
87
+#' \code{\link{runTests}}.
88
+#' 
89
+#' 
90
+#' @name CrossValParams
91
+#' @rdname CrossValParams-class
92
+#' @aliases CrossValParams CrossValParams-class
93
+#' @docType class
94
+#' 
95
+#' @param samplesSplits Default: "Permute k-Fold". A character value
96
+#' specifying what kind of sample splitting to do.
97
+#' @param permutations Default: 100. Number of times to permute the
98
+#' data set before it is split into training and test sets. Only relevant if
99
+#' \code{samplesSplits} is either \code{"Permute k-Fold"} or \code{"Permute
100
+#' Percentage Split"}.
101
+#' @param percentTest The percentage of the data
102
+#' set to assign to the test set, with the remainder of the samples belonging
103
+#' to the training set. Only relevant if \code{samplesSplits} is \code{"Permute
104
+#' Percentage Split"}.
105
+#' @param folds The number of approximately equal-sized folds to partition
106
+#' the samples into. Only relevant if \code{samplesSplits} is \code{"Permute k-Fold"}
107
+#' or \code{"k-Fold"}.
108
+#' @param leave The number of samples to generate all possible
109
+#' combination of and use as the test set.  Only relevant if \code{samplesSplits} is
110
+#' \code{"Leave-k-Out"}. If set to 1, it is the traditional leave-one-out cross-validation,
111
+#' sometimes written as LOOCV.
112
+#' @param tuneMode Default: Resubstitution. The scheme to use for selecting any tuning parameters.
113
+#' @param adaptiveResamplingDelta Default: \code{NULL}. If not null, adaptive resampling of training
114
+#' samples is performed and this number is the difference in consecutive iterations that the
115
+#' class probability or risk of all samples must change less than for the iterative process to stop. 0.01
116
+#' was used in the original publication.
117
+#' @param parallelParams An instance of \code{\link{BiocParallelParam}} specifying
118
+#' the kind of parallelisation to use. Default is to use two cores less than the total number of
119
+#' cores the computer has, if it has four or more cores, otherwise one core, as is the
120
+#' default of \code{\link{bpparam}}. To make results fully reproducible, please
121
+#' choose a specific back-end depending on your operating system and also set
122
+#' \code{RNGseed} to a number.
123
+#' 
124
+#' @author Dario Strbenac
125
+#' @examples
126
+#' 
127
+#'   CrossValParams() # Default is 100 permutations and 5 folds of each.
128
+#'   snow <- SnowParam(workers = 4, RNGseed = 999)
129
+#'   CrossValParams("Leave-k-Out", leave = 2, parallelParams = snow)
130
+#'   # Fully reproducible Leave-2-out cross-validation on 4 cores,
131
+#'   # even if feature selection or classifier use random sampling.
132
+#' 
133
+#' @exportClass CrossValParams
86 134
 setClass("CrossValParams", representation(
87 135
     samplesSplits = "character",
88 136
     permutations = "numericOrNULL",
... ...
@@ -96,6 +144,8 @@ setClass("CrossValParams", representation(
96 144
 )
97 145
 
98 146
 # CrossValParams constructor is an ordinary function and not S4 method for performance reasons.
147
+#' @export
148
+#' @rdname CrossValParams-class
99 149
 CrossValParams <- function(samplesSplits = c("Permute k-Fold", "Permute Percentage Split", "Leave-k-Out", "k-Fold"),
100 150
                            permutations = 100, percentTest = 25, folds = 5, leave = 2,
101 151
                            tuneMode = c("Resubstitution", "Nested CV", "none"), adaptiveResamplingDelta = NULL, parallelParams = bpparam())
... ...
@@ -138,6 +188,7 @@ setClassUnion("StageParamsOrMissingOrNULL", c("StageParams", "missing", "NULL"))
138 188
 
139 189
 
140 190
 ##### TransformParams #####
191
+#' @exportClass TransformParams
141 192
 setClass("TransformParams", representation(
142 193
   transform = "function",
143 194
   characteristics = "DataFrame",
... ...
@@ -145,17 +196,67 @@ setClass("TransformParams", representation(
145 196
   otherParams = "list"), contains = "StageParams"
146 197
 )
147 198
 
148
-# Union of a TransformParams pbject and NULL. 
199
+# Union of a TransformParams object and NULL. 
149 200
 setClassUnion("TransformParamsOrNULL", c("TransformParams", "NULL"))
150 201
 
151 202
 # Parameters for Data Transformation within CV.
152
-
203
+#' Parameters for Data Transformation
204
+#' 
205
+#' Collects and checks necessary parameters required for transformation within CV.
206
+#' 
207
+#' 
208
+#' @name TransformParams
209
+#' @rdname TransformParams-class
210
+#' @aliases TransformParams TransformParams-class TransformParams,ANY-method
211
+#' TransformParams,character-method show,TransformParams-method
212
+#' @docType class
213
+#' @usage NULL
214
+#' @section Constructor:
215
+#' \describe{
216
+#' \item{}{
217
+#' \code{TransformParams(transform, characteristics = DataFrame(), intermediate = character(0), ...)} 
218
+#' Creates a \code{TransformParams} object which stores the function which will do the
219
+#' transformation and parameters that the function will use.
220
+#' \describe{
221
+#' \item{\code{transform}}{A character keyword referring to a registered transformation function. See \code{\link{available}}
222
+#' for valid keywords.}
223
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the
224
+#' characteristics of data transformation to be done. First column must be
225
+#' named \code{"charateristic"} and second column must be named \code{"value"}.
226
+#' If using wrapper functions for data transformation in this package, the data
227
+#' transformation name will automatically be generated and therefore it is not
228
+#' necessary to specify it.}
229
+#' \item{\code{intermediate}}{Character vector. Names of any variables created in
230
+#' prior stages by \code{\link{runTest}} that need to be passed to a feature selection
231
+#' function.}
232
+#' \item{\code{...}}{Other named parameters which will be used by the transformation function.}
233
+#' } } }
234
+#' 
235
+#' @section Summary:
236
+#' \code{transformParams} is a \code{TransformParams} object.
237
+#' \describe{
238
+#' \item{}{
239
+#'     \code{show(transformParams)}: Prints a short summary of what \code{transformParams} contains.
240
+#'  }}
241
+#' 
242
+#' @author Dario Strbenac
243
+#' @examples
244
+#' 
245
+#'   transformParams <- TransformParams("diffLoc", location = "median")
246
+#'   # Subtract all values from training set median, to obtain absolute deviations.
247
+#' 
248
+#' @export
249
+#' @usage NULL
153 250
 setGeneric("TransformParams", function(transform, ...)
154 251
 standardGeneric("TransformParams"))
155 252
 
156
-setMethod("TransformParams", "function",
253
+#' @rdname TransformParams-class
254
+#' @usage NULL
255
+#' @export
256
+setMethod("TransformParams", "character",
157 257
           function(transform, characteristics = S4Vectors::DataFrame(), intermediate = character(0), ...)
158 258
           {
259
+            transform <- .transformKeywordToFunction(transform)
159 260
             if(ncol(characteristics) == 0 || !"Transform Name" %in% characteristics[, "characteristic"])
160 261
             {
161 262
               characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Transform Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(transform, "name"), "name"]))
... ...
@@ -164,10 +265,7 @@ setMethod("TransformParams", "function",
164 265
                 intermediate = intermediate, otherParams = list(...))
165 266
           })
166 267
 
167
-#' Inspect Data Transformation Details
168
-#'
169
-#' @rdname TransformParams-class
170
-#' @param object An object of class \code{TransformParams} to inspect.
268
+#' @usage NULL
171 269
 #' @export
172 270
 setMethod("show", "TransformParams",
173 271
           function(object)
... ...
@@ -374,6 +472,7 @@ setMethod("show", "FeatureSetCollection",
374 472
 )
375 473
 
376 474
 #' @export
475
+#' @usage NULL
377 476
 setMethod("[", c("FeatureSetCollection", "numeric", "missing", "ANY"),
378 477
     function(x, i, j, ..., drop = TRUE)
379 478
 {
... ...
@@ -381,6 +480,7 @@ setMethod("[", c("FeatureSetCollection", "numeric", "missing", "ANY"),
381 480
 })
382 481
 
383 482
 #' @export
483
+#' @usage NULL
384 484
 setMethod("[[", c("FeatureSetCollection", "ANY", "missing"),
385 485
     function(x, i, j, ...)
386 486
 {
... ...
@@ -388,9 +488,12 @@ setMethod("[[", c("FeatureSetCollection", "ANY", "missing"),
388 488
 })
389 489
 
390 490
 setClassUnion("FeatureSetCollectionOrNULL", c("FeatureSetCollection", "NULL"))
491
+setClassUnion("functionOrList", c("function", "list"))
492
+setClassUnion("characterOrList", c("character", "list"))
391 493
 
392 494
 ##### SelectParams #####
393 495
 
496
+#' @exportClass SelectParams
394 497
 setClass("SelectParams", representation(
395 498
   featureRanking = "functionOrList",
396 499
   characteristics = "DataFrame",
... ...
@@ -404,22 +507,76 @@ setClass("SelectParams", representation(
404 507
 setClassUnion("SelectParamsOrNULL", c("SelectParams", "NULL"))
405 508
 
406 509
 # Parameters for Feature Selection
510
+#' Parameters for Feature Selection
511
+#' 
512
+#' Collects and checks necessary parameters required for feature selection.
513
+#' Either one function is specified or a list of functions to perform ensemble
514
+#' feature selection. The empty constructor is provided for convenience.
515
+#' 
516
+#' 
517
+#' @name SelectParams
518
+#' @rdname SelectParams-class
519
+#' @aliases SelectParams SelectParams-class SelectParams,missing-method
520
+#' SelectParams,characterOrList-method
521
+#' @docType class
522
+#' @section Constructor:
523
+#' \describe{
524
+#' \item{}{\preformatted{SelectParams(featureRanking, characteristics = DataFrame(), minPresence = 1, intermediate = character(0),
525
+#' subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...)} Creates a \code{SelectParams}
526
+#' object which stores the function(s) which will do the selection and parameters that the
527
+#' function will use.
528
+#' \describe{\item{\code{featureRanking}}{A character keyword referring to a registered feature ranking function. See \code{\link{available}}
529
+#' for valid keywords.}
530
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the characteristics
531
+#' of feature selection to be done. First column must be named \code{"charateristic"} and
532
+#' second column must be named \code{"value"}. If using wrapper functions for feature
533
+#' selection in this package, the feature selection name will automatically be
534
+#' generated and therefore it is not necessary to specify it.}
535
+#' \item{\code{minPresence}}{If a list of functions was provided, how many of
536
+#' those must a feature have been selected by to be used in classification. 1
537
+#' is equivalent to a set union and a number the same length as
538
+#' \code{featureSelection} is equivalent to set intersection.}
539
+#' \item{\code{intermediate}}{Character vector. Names of any variables created
540
+#' in prior stages by \code{\link{runTest}} that need to be passed to a feature
541
+#' selection function.}
542
+#' \item{\code{subsetToSelections}}{Whether to subset the data table(s), after feature selection has been done.}
543
+#' \item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of
544
+#' the list are the names of the parameters and the vectors are the values of the parameters to try. All possible
545
+#' combinations are generated. Two elements named \code{nFeatures} and \code{performanceType} are mandatory, to
546
+#' define the performance metric which will be used to select features and how many top-ranked features to try.}
547
+#' \item{\code{...}}{Other named parameters which will be used by the
548
+#' selection function. If \code{featureSelection} was a list of functions,
549
+#' this must be a list of lists, as long as \code{featureSelection}.} } } }
550
+#' @section Summary:
551
+#' \code{selectParams} is a \code{SelectParams} object.
552
+#' \describe{
553
+#' \item{}{
554
+#'   \code{show(SelectParams)}: Prints a short summary of what \code{selectParams} contains.
555
+#' }}
556
+#' @author Dario Strbenac
557
+#' @examples
558
+#' 
559
+#'   #if(require(sparsediscrim))
560
+#'   #{
561
+#'     SelectParams("KS")
562
+#'     
563
+#'     # Ensemble feature selection.
564
+#'     SelectParams(list("Bartlett", "Levene"))
565
+#'   #}
566
+#' 
567
+#' @export
568
+#' @usage NULL
407 569
 setGeneric("SelectParams", function(featureRanking, ...)
408 570
 standardGeneric("SelectParams"))
409 571
 
410
-# Default constructor.
411
-setMethod("SelectParams", "missing", function()
412
-{
413
-  new("SelectParams", featureRanking = differentMeansRanking,
414
-      characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "Difference in Means"),
415
-      minPresence = 1, intermediate = character(0), subsetToSelections = TRUE,
416
-      tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"))
417
-})
418
-
419
-setMethod("SelectParams", c("functionOrList"),
572
+#' @rdname SelectParams-class
573
+#' @usage NULL
574
+#' @export
575
+setMethod("SelectParams", c("characterOrList"),
420 576
           function(featureRanking, characteristics = DataFrame(), minPresence = 1, 
421 577
                    intermediate = character(0), subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...)
422 578
           {
579
+            if(is.character(featureRanking)) featureRanking <- .selectionKeywordToFunction(featureRanking) else featureRanking <- lapply(featureRanking, .selectionKeywordToFunction)
423 580
             if(!is.list(featureRanking) && (ncol(characteristics) == 0 || !"Selection Name" %in% characteristics[, "characteristic"]))
424 581
             {
425 582
               characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Selection Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(featureRanking, "name"), "name"]))
... ...
@@ -437,10 +594,8 @@ setMethod("SelectParams", c("functionOrList"),
437 594
                 tuneParams = tuneParams, otherParams = others)
438 595
           })
439 596
 
440
-#' Container for Storing Details of Feature Selection Function(s)
441
-#' 
597
+#' @usage NULL 
442 598
 #' @rdname SelectParams-class
443
-#' @param object An object of class \code{SelectParams} to inspect.
444 599
 #' @export
445 600
 setMethod("show", "SelectParams",
446 601
           function(object)
... ...
@@ -463,11 +618,9 @@ setMethod("show", "SelectParams",
463 618
             }
464 619
           })
465 620
 
466
-
467
-
468
-
469 621
 ##### TrainParams #####
470 622
 
623
+#' @exportClass TrainParams
471 624
 setClass("TrainParams", representation(
472 625
   classifier = "function",
473 626
   characteristics = "DataFrame",
... ...
@@ -476,19 +629,65 @@ setClass("TrainParams", representation(
476 629
   otherParams = "listOrNULL",
477 630
   getFeatures = "functionOrNULL"), contains = "StageParams")
478 631
 
479
-# Parameters for Classifier Training
632
+#' Parameters for Classifier Training
633
+#' 
634
+#' Collects and checks necessary parameters required for classifier training.
635
+#' The empty constructor is provided for convenience.
636
+#' 
637
+#' @name TrainParams
638
+#' @rdname TrainParams-class
639
+#' @aliases TrainParams TrainParams-class TrainParams,missing-method
640
+#' TrainParams,characterOrFunction-method show,TrainParams-method
641
+#' @docType class
642
+#' @section Constructor:
643
+#' \describe{
644
+#' \item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(),
645
+#' intermediate = character(0), getFeatures = NULL, ...)}
646
+#' Creates a \code{TrainParams} object which stores the function which will do the
647
+#' classifier building and parameters that the function will use.
648
+#' \describe{
649
+#' \item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}}
650
+#' for valid keywords.}
651
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the
652
+#' characteristics of the classifier used. First column must be named \code{"charateristic"}
653
+#' and second column must be named \code{"value"}. If using wrapper functions for classifiers
654
+#' in this package, a classifier name will automatically be generated and
655
+#' therefore it is not necessary to specify it.}
656
+#' \item{\code{intermediate}}{Character vector. Names of any variables created
657
+#' in prior stages by \code{\link{runTest}} that need to be passed to
658
+#' \code{classifier}.}
659
+#' \item{\code{getFeatures}}{A function may be specified that extracts the selected
660
+#' features from the trained model. This is relevant if using a classifier that does
661
+#' feature selection within training (e.g. random forest). The function must return a
662
+#' list of two vectors. The first vector contains the ranked features (or empty if the
663
+#' training algorithm doesn't produce rankings) and the second vector contains the selected
664
+#' features.}
665
+#' \item{\code{...}}{Other named parameters which will be used by the classifier.} } } }
666
+#' @section Summary:
667
+#' \code{trainParams} is a \code{TrainParams} object.
668
+#' \describe{
669
+#' \item{}{
670
+#'   \code{show(trainParams)}: Prints a short summary of what \code{trainParams} contains.
671
+#' }}
672
+#' @author Dario Strbenac
673
+#' @examples
674
+#' 
675
+#' #if(require(sparsediscrim))
676
+#'   trainParams <- TrainParams("DLDA")
677
+#' 
678
+#' @usage NULL
679
+#' @export
480 680
 setGeneric("TrainParams", function(classifier, ...) standardGeneric("TrainParams"))
681
+setClassUnion("characterOrFunction", c("character", "function"))
481 682
 
482
-setMethod("TrainParams", "missing", function()
483
-{
484
-  new("TrainParams", classifier = DLDAtrainInterface,
485
-      characteristics = S4Vectors::DataFrame(characteristic = "Classifier Name", value = "Diagonal LDA"),
486
-      intermediate = character(0), getFeatures = NULL)
487
-})
488
-
489
-setMethod("TrainParams", c("function"),
683
+#' @usage NULL
684
+#' @rdname TrainParams-class
685
+#' @export
686
+setMethod("TrainParams", c("characterOrFunction"),
490 687
           function(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(), intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...)
491 688
           {
689
+            if(is.character(classifier))              
690
+              classifier <- .classifierKeywordToParams(classifier)[[1]]@classifier # Training function.              
492 691
             if(ncol(characteristics) == 0 || !"Classifier Name" %in% characteristics[, "characteristic"])
493 692
             {
494 693
               characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Classifier Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(classifier, "name"), "name"]))
... ...
@@ -498,11 +697,7 @@ setMethod("TrainParams", c("function"),
498 697
                 otherParams = list(...))
499 698
           })
500 699
 
501
-#' Inspect Model Training Details
502
-#'
503
-#' @rdname TrainParams-class
504
-#' @param object An object of class \code{TrainParams} to inspect.
505
-#' @export
700
+#' @usage NULL
506 701
 setMethod("show", "TrainParams",
507 702
           function(object)
508 703
           {
... ...
@@ -523,39 +718,77 @@ setMethod("show", "TrainParams",
523 718
 
524 719
 ##### PredictParams #####
525 720
 
721
+#' @exportClass PredictParams
526 722
 setClass("PredictParams", representation(
527
-  predictor = "functionOrNULL",
723
+  predictor = "function",
528 724
   characteristics = "DataFrame",  
529 725
   intermediate = "character",
530 726
   otherParams = "listOrNULL"), contains = "StageParams"
531 727
 )
532 728
 
729
+#' Parameters for Classifier Prediction
730
+#' 
731
+#' Collects the function to be used for making predictions and any associated
732
+#' parameters.
733
+#' 
734
+#' The function specified must return either a factor vector of class
735
+#' predictions, or a numeric vector of scores for the second class, according
736
+#' to the levels of the class vector of the input data set, or a data frame
737
+#' which has two columns named class and score.
738
+#' 
739
+#' 
740
+#' @name PredictParams
741
+#' @rdname PredictParams-class
742
+#' @aliases PredictParams PredictParams-class PredictParams,missing-method
743
+#' PredictParams,characterOrFunction-method show,PredictParams-method
744
+#' @docType class
745
+#' @section Constructor: \describe{\item{}{
746
+#' \code{PredictParams(predictor, characteristics = DataFrame(), intermediate =
747
+#' character(0), ...)} Creates a PredictParams object which stores the function
748
+#' which will do the class prediction, if required, and parameters that the
749
+#' function will use. If the training function also makes predictions, this
750
+#' must be set to \code{NULL}.}
751
+#' \describe{ \item{\code{predictor}}{A character keyword referring to a registered classifier. See \code{\link{available}}
752
+#' for valid keywords.}
753
+#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing
754
+#' the characteristics of the predictor function used. First column must be
755
+#' named \code{"charateristic"} and second column must be named \code{"value"}.}
756
+#' \item{\code{intermediate}}{Character vector. Names of any
757
+#' variables created in prior stages in \code{\link{runTest}} that need to be
758
+#' passed to the prediction function.}
759
+#' \item{\code{...}}{Other arguments that \code{predictor} may use.} } }
760
+#' @section Summary:
761
+#' \code{predictParams} is a \code{PredictParams} object.
762
+#' \describe{
763
+#' \item{}{
764
+#'   \code{show(predictParams)}: Prints a short summary of what \code{predictParams} contains.
765
+#' }}
766
+#' @author Dario Strbenac
767
+#' @examples
768
+#' 
769
+#' # For prediction by trained object created by DLDA training function.
770
+#' predictParams <- PredictParams("DLDA")
771
+#' 
772
+#' @export
773
+#' @usage NULL
533 774
 setGeneric("PredictParams", function(predictor, ...)
534 775
 standardGeneric("PredictParams"))
535 776
 
536
-setMethod("PredictParams", "missing", function()
537
-{
538
-  new("PredictParams", predictor = DLDApredictInterface,
539
-      characteristics = S4Vectors::DataFrame(characteristic = "Predictor Name", value = "Diagonal LDA"),
540
-      intermediate = character(0), otherParams = NULL)
541
-})
542
-
543
-setMethod("PredictParams", c("functionOrNULL"),
777
+#' @usage NULL
778
+#' @rdname PredictParams-class
779
+#' @export
780
+setMethod("PredictParams", c("characterOrFunction"),
544 781
           function(predictor, characteristics = DataFrame(), intermediate = character(0), ...)
545 782
           {
546
-            if(missing(predictor))
547
-              stop("Either a function or NULL must be specified by 'predictor'.")
783
+            if(is.character(predictor))              
784
+              predictor <- .classifierKeywordToParams(predictor)[[2]]@predictor # Prediction function.
548 785
             others <- list(...)
549 786
             if(length(others) == 0) others <- NULL
550 787
             new("PredictParams", predictor = predictor, characteristics = characteristics,
551 788
                 intermediate = intermediate, otherParams = others)
552 789
           })
553 790
 
554
-#' Inspect Prediction Function Details
555
-#'
556
-#' @rdname PredictParams-class
557
-#' @param object An object of class \code{TrainParams} to inspect.
558
-#' @export
791
+#' @usage NULL
559 792
 setMethod("show", "PredictParams",
560 793
           function(object)
561 794
           {
... ...
@@ -581,6 +814,7 @@ setMethod("show", "PredictParams",
581 814
 
582 815
 setClassUnion("PredictParamsOrNULL", c("PredictParams", "NULL"))
583 816
 
817
+#' @exportClass ModellingParams
584 818
 setClass("ModellingParams", representation(
585 819
   balancing = "character",
586 820
   transformParams = "TransformParamsOrNULL",
... ...
@@ -590,9 +824,48 @@ setClass("ModellingParams", representation(
590 824
   doImportance = "logical"
591 825
 ))
592 826
 
827
+##### ModellingParams #####
828
+
829
+#' Parameters for Data Modelling Specification
830
+#' 
831
+#' Collects and checks necessary parameters required for data modelling. Apart
832
+#' from data transfomation that needs to be done within cross-validation (e.g.
833
+#' subtracting each observation from training set mean), feature selection, model training and
834
+#' prediction, this container also stores a setting for class imbalance
835
+#' rebalancing.
836
+#' 
837
+#' @name ModellingParams
838
+#' @rdname ModellingParams-class
839
+#' @aliases ModellingParams ModellingParams-class
840
+#' @docType class
841
+#' @param balancing Default: "downsample". A character value specifying what kind
842
+#' of class balancing to do, if any.
843
+#' @param transformParams Parameters used for feature transformation inside of C.V.
844
+#' specified by a \code{\link{TransformParams}} instance. Optional, can be \code{NULL}.
845
+#' @param selectParams Parameters used during feature selection specified
846
+#'   by a \code{\link{SelectParams}} instance.  By default, parameters for selection
847
+#'   based on differences in means of numeric data. Optional, can be \code{NULL}.
848
+#' @param trainParams Parameters for model training specified by a \code{\link{TrainParams}} instance.
849
+#'   By default, uses diagonal LDA.
850
+#' @param predictParams Parameters for model training specified by a \code{\link{PredictParams}} instance.
851
+#' By default, uses diagonal LDA.
852
+#' @param doImportance Default: \code{FALSE}. Whether or not to carry out removal of each feature, one at a time, which
853
+#' was chosen and then retrain and model and predict the test set, to measure the change in performance metric. Can
854
+#' also be set to TRUE, if required. Modelling run time will be noticeably longer.
855
+#' @author Dario Strbenac
856
+#' @examples
857
+#' 
858
+#'   #if(require(sparsediscrim))
859
+#'   #{
860
+#'      ModellingParams() # Default is differences in means selection and DLDA.
861
+#'      ModellingParams(selectParams = NULL, # No feature selection before training.
862
+#'                      trainParams = TrainParams("randomForest"),
863
+#'                      predictParams = PredictParams("randomForest"))
864
+#'   #}
865
+#' @export
593 866
 ModellingParams <- function(balancing = c("downsample", "upsample", "none"),
594
-                            transformParams = NULL, selectParams = SelectParams(),
595
-                            trainParams = TrainParams(), predictParams = PredictParams(),
867
+                            transformParams = NULL, selectParams = SelectParams("t-test"),
868
+                            trainParams = TrainParams("DLDA"), predictParams = PredictParams("DLDA"),
596 869
                             doImportance = FALSE)
597 870
 {
598 871
   balancing <- match.arg(balancing)
... ...
@@ -612,7 +885,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
612 885
 #' classes, the identifiers of features selected for each fold of each
613 886
 #' permutation or each hold-out classification, and performance metrics such as
614 887
 #' error rates. This class is not intended to be created by the user. It is
615
-#' created by \code{\link{crossValidate}}.
888
+#' created by \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}}.
616 889
 #' 
617 890
 #' @name ClassifyResult
618 891
 #' @rdname ClassifyResult-class
... ...
@@ -731,6 +1004,8 @@ setMethod("ClassifyResult", c("DataFrame", "character"),
731 1004
                 predictions = predictions, actualOutcome = actualOutcome, importance = importance, modellingParams = modellingParams, finalModel = finalModel)
732 1005
           })
733 1006
 
1007
+#' @usage NULL
1008
+#' @export
734 1009
 setMethod("show", "ClassifyResult", function(object)
735 1010
           {
736 1011
             cat("An object of class 'ClassifyResult'.\n")
... ...
@@ -20,7 +20,8 @@
20 20
 # Nice-looking names for feature selection and classification functions, to automatically use
21 21
 # in a variety of performance plots.
22 22
 .ClassifyRenvir[["functionsTable"]] <- matrix(
23
-  c("bartlettRanking", "Bartlett Test",
23
+  c("subtractFromLocation", "Subtraction From Training Set Location",
24
+    "bartlettRanking", "Bartlett Test",
24 25
     "classifyInterface", "Poisson LDA",
25 26
     "differentMeansRanking", "Difference in Means",
26 27
     "DLDAtrainInterface", "Diagonal LDA",
... ...
@@ -43,7 +44,6 @@
43 44
     "previousSelection", "Previous Selection", 
44 45
     "previousTrained", "Previous Trained",
45 46
     "randomForestTrainInterface", "Random Forest",
46
-    "subtractFromLocation", "Location Subtraction",
47 47
     "SVMtrainInterface", "Support Vector Machine",
48 48
     "coxphTrainInterface", "Cox Proportional Hazards",
49 49
     "coxphRanking", "Cox Proportional Hazards",
... ...
@@ -641,52 +641,22 @@ generateModellingParams <- function(assayIDs,
641 641
     if(!classifier %in% classifiers)
642 642
         stop(paste("Classifier must exactly match of these (be careful of case):", paste(classifiers, collapse = ", ")))
643 643
     
644
-    classifier <- switch(
645
-        classifier,
646
-        "randomForest" = RFparams(),
647
-        "randomSurvivalForest" = RSFparams(),
648
-        "GLM" = GLMparams(),
649
-        "elasticNetGLM" = elasticNetGLMparams(),
650
-        "SVM" = SVMparams(),
651
-        "DLDA" = DLDAparams(),
652
-        "naiveBayes" = naiveBayesParams(),
653
-        "mixturesNormals" = mixModelsParams(),
654
-        "kNN" = kNNparams(),
655
-        "CoxPH" = coxphParams(),
656
-        "CoxNet" = coxnetParams()
657
-    )
644
+    classifierParams <- .classifierKeywordToParams(classifier)
658 645
 
659 646
     selectionMethod <- unlist(selectionMethod)
660 647
 
661
-    selectionMethod <- ifelse(is.null(selectionMethod),
662
-                              "none",
663
-                              selectionMethod)
664
-
665
-    selectionMethodParam <- switch(
666
-        selectionMethod,
667
-        "none" = NULL,
668
-        "t-test" = differentMeansRanking,
669
-        "limma" = limmaRanking,
670
-        "edgeR" = edgeRranking,
671
-        "Bartlett" = bartlettRanking,
672
-        "Levene" = leveneRanking,
673
-        "DMD" = DMDranking,
674
-        "likelihoodRatio" = likelihoodRatioRanking,
675
-        "KS" = KolmogorovSmirnovRanking,
676
-        "KL" = KullbackLeiblerRanking,
677
-        "CoxPH" = coxphRanking
678
-    )
648
+    selectionMethod <- ifelse(is.null(selectionMethod), "none", selectionMethod)
679 649
 
680 650
     selectParams = SelectParams(
681
-        selectionMethodParam,
651
+        selectionMethod,
682 652
         tuneParams = list(nFeatures = nFeatures, performanceType = performanceType)
683 653
         )
684 654
 
685 655
     params <- ModellingParams(
686 656
         balancing = "none",
687 657
         selectParams = selectParams,
688
-        trainParams = classifier$trainParams,
689
-        predictParams = classifier$predictParams
658
+        trainParams = classifierParams$trainParams,
659
+        predictParams = classifierParams$predictParams
690 660
     )
691 661
 
692 662
     #if(multiViewMethod != "none") stop("I haven't implemented multiview yet.")
... ...
@@ -743,7 +713,7 @@ generateMultiviewParams <- function(assayIDs,
743 713
                                           multiViewMethod = "none")
744 714
 
745 715
         # Update selectParams to use
746
-        params@selectParams <- SelectParams(selectMulti,
716
+        params@selectParams <- SelectParams("selectMulti",
747 717
                                             params = paramsassays,
748 718
                                             characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "merge"),
749 719
                                             tuneParams = list(nFeatures = nFeatures[[1]],
... ...
@@ -356,12 +356,12 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment",
356 356
           {
357 357
             if(missing(useFeatures))
358 358
               stop("'useFeatures' must be specified by the user.")
359
-            if(!all(useFeatures[, 1] %in% c(names(measurements), "sampleInfo")))
360
-              stop("Some table names in 'useFeatures' are not assay names in 'measurements' or \"sampleInfo\".")  
359
+            if(!all(useFeatures[, 1] %in% c(names(measurements), "clinical")))
360
+              stop("Some table names in 'useFeatures' are not assay names in 'measurements' or \"clinical\".")  
361 361
             
362
-            assaysuseFeatures <- useFeatures[useFeatures[, 1] != "sampleInfo", ]
363
-            sampleInfouseFeatures <- useFeatures[useFeatures[, 1] == "sampleInfo", ]
364
-            measurements <- measurements[assaysuseFeatures[, 2], , assaysuseFeatures[, 1]]
362
+            assaysUseFeatures <- useFeatures[useFeatures[, 1] != "clinical", ]
363
+            clinicalUseFeatures <- useFeatures[useFeatures[, 1] == "clinical", ]
364
+            measurements <- measurements[assaysUseFeatures[, 2], , assaysUseFeatures[, 1]]
365 365
             classes <- MultiAssayExperiment::colData(measurements)[, classesColumn]
366 366
             
367 367
             if(!is.null(groupBy))
... ...
@@ -369,7 +369,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment",
369 369
               if(is.null(groupingName))
370 370
                 groupingName <- groupBy[2]
371 371
               groupingTable <- groupBy[1]
372
-              if(groupingTable == "sampleInfo")
372
+              if(groupingTable == "clinical")
373 373
               {
374 374
                 groupBy <- MultiAssayExperiment::colData(measurements)[, groupBy[2]]
375 375
               } else { # One of the omics tables.
... ...
@@ -387,7 +387,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment",
387 387
             MultiAssayExperiment::colData(measurements) <- MultiAssayExperiment::colData(measurements)[colnames(MultiAssayExperiment::colData(measurements)) %in% sampleInfouseFeatures[, 2]]
388 388
             measurements <- MultiAssayExperiment::wideFormat(measurements, colDataCols = seq_along(MultiAssayExperiment::colData(measurements)), check.names = FALSE, collapse = ':')
389 389
             measurements <- measurements[, -1, drop = FALSE] # Remove sample IDs.
390
-            S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(measurements)[, "sourceName"])
390
+            S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "clinical", S4Vectors::mcols(measurements)[, "sourceName"])
391 391
             colnames(S4Vectors::mcols(measurements))[1] <- "assay"
392 392
             S4Vectors::mcols(measurements)[, "feature"] <- S4Vectors::mcols(measurements)[, "rowname"]
393 393
             missingIndices <- is.na(S4Vectors::mcols(measurements)[, "feature"])
... ...
@@ -1,8 +1,81 @@
1
-# Perform a Single Classification
2
-
1
+#' Perform a Single Classification
2
+#' 
3
+#' For a data set of features and samples, the classification process is run.
4
+#' It consists of data transformation, feature selection, classifier training
5
+#' and testing.
6
+#' 
7
+#' This function only performs one classification and prediction. See
8
+#' \code{\link{runTests}} for a driver function that enables a number of
9
+#' different cross-validation schemes to be applied and uses this function to
10
+#' perform each iteration.
11
+#' 
12
+#' @aliases runTest runTest,matrix-method runTest,DataFrame-method
13
+#' runTest,MultiAssayExperiment-method
14
+#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}}
15
+#' or \code{\link{MultiAssayExperiment}} containing the training data. For a
16
+#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features.
17
+#' @param outcomeTrain Either a factor vector of classes, a \code{\link{Surv}} object, or
18
+#' a character string, or vector of such strings, containing column name(s) of column(s)
19
+#' containing either classes or time and event information about survival.
20
+#' @param measurementsTest Same data type as \code{measurementsTrain}, but only the test
21
+#' samples.
22
+#' @param outcomeTest Same data type as \code{outcomeTrain}, but only the test
23
+#' samples.
24
+#' @param crossValParams An object of class \code{\link{CrossValParams}},
25
+#' specifying the kind of cross-validation to be done, if nested
26
+#' cross-validation is used to tune any parameters.
27
+#' @param modellingParams An object of class \code{\link{ModellingParams}},
28
+#' specifying the class rebalancing, transformation (if any), feature selection
29
+#' (if any), training and prediction to be done on the data set.
30
+#' @param targets If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
31
+#' names of the data tables to be used. \code{"clinical"} is also a valid value
32
+#' and specifies that numeric variables from the clinical data table will be
33
+#' used.
34
+#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
35
+#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
36
+#' that contain(s) the samples' outcome to use for prediction.
37
+#' @param ... Variables not used by the \code{matrix} nor the
38
+#' \code{MultiAssayExperiment} method which are passed into and used by the
39
+#' \code{DataFrame} method.
40
+#' @param characteristics A \code{\link{DataFrame}} describing the
41
+#' characteristics of the classification used. First column must be named
42
+#' \code{"charateristic"} and second column must be named \code{"value"}.
43
+#' Useful for automated plot annotation by plotting functions within this
44
+#' package. Transformation, selection and prediction functions provided by
45
+#' this package will cause the characteristics to be automatically determined
46
+#' and this can be left blank.
47
+#' @param verbose Default: 1. A number between 0 and 3 for the amount of
48
+#' progress messages to give.  A higher number will produce more messages as
49
+#' more lower-level functions print messages.
50
+#' @param .iteration Not to be set by a user. This value is used to keep track
51
+#' of the cross-validation iteration, if called by \code{\link{runTests}}.
52
+#' @return If called directly by the user rather than being used internally by
53
+#' \code{\link{runTests}}, a \code{\link{ClassifyResult}} object. Otherwise a
54
+#' list of different aspects of the result which is passed back to \code{\link{runTests}}.
55
+#' @author Dario Strbenac
56
+#' @examples
57
+#' 
58
+#'   #if(require(sparsediscrim))
59
+#'   #{
60
+#'     data(asthma)
61
+#'     tuneList <- list(nFeatures = seq(5, 25, 5), performanceType = "Balanced Error")
62
+#'     selectParams <- SelectParams(limmaRanking, tuneParams = tuneList)
63
+#'     modellingParams <- ModellingParams(selectParams = selectParams)
64
+#'     trainIndices <- seq(1, nrow(measurements), 2)
65
+#'     testIndices <- seq(2, nrow(measurements), 2)
66
+#'     
67
+#'     runTest(measurements[trainIndices, ], classes[trainIndices],
68
+#'             measurements[testIndices, ], classes[testIndices], modellingParams = modellingParams)
69
+#'   #}
70
+#' 
71
+#' @importFrom S4Vectors do.call
72
+#' @usage NULL
73
+#' @export
3 74
 setGeneric("runTest", function(measurementsTrain, ...)
4 75
            standardGeneric("runTest"))
5 76
 
77
+#' @rdname runTest
78
+#' @export
6 79
 setMethod("runTest", "matrix", # Matrix of numeric measurements.
7 80
   function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, ...)
8 81
 {
... ...
@@ -13,6 +86,8 @@ setMethod("runTest", "matrix", # Matrix of numeric measurements.
13 86
           ...)
14 87
 })
15 88
 
89
+#' @rdname runTest
90
+#' @export
16 91
 setMethod("runTest", "DataFrame", # Sample information data or one of the other inputs, transformed.
17 92
 function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
18 93
          crossValParams = CrossValParams(), # crossValParams might be used for tuning optimisation.
... ...
@@ -256,6 +331,8 @@ input data. Autmomatically reducing to smaller number.")
256 331
   }  
257 332
 })
258 333
 
334
+#' @rdname runTest
335
+#' @export
259 336
 setMethod("runTest", c("MultiAssayExperiment"),
260 337
           function(measurementsTrain, measurementsTest, targets = names(measurements), outcomeColumns, ...)
261 338
 {
... ...
@@ -1,7 +1,67 @@
1
-# Reproducibly Run Various Kinds of Cross-Validation
2
-
1
+#' Reproducibly Run Various Kinds of Cross-Validation
2
+#' 
3
+#' Enables doing classification schemes such as ordinary 10-fold, 100
4
+#' permutations 5-fold, and leave one out cross-validation. Processing in
5
+#' parallel is possible by leveraging the package \code{\link{BiocParallel}}.
6
+#' 
7
+#' 
8
+#' @aliases runTests runTests,matrix-method runTests,DataFrame-method
9
+#' runTests,MultiAssayExperiment-method
10
+#' @param measurements Either a \code{\link{matrix}}, \code{\link{DataFrame}}
11
+#' or \code{\link{MultiAssayExperiment}} containing all of the data. For a
12
+#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns
13
+#' are features.
14
+#' @param outcome Either a factor vector of classes, a \code{\link{Surv}} object, or
15
+#' a character string, or vector of such strings, containing column name(s) of column(s)
16
+#' containing either classes or time and event information about survival.
17
+#' @param crossValParams An object of class \code{\link{CrossValParams}},
18
+#' specifying the kind of cross-validation to be done.
19
+#' @param modellingParams An object of class \code{\link{ModellingParams}},
20
+#' specifying the class rebalancing, transformation (if any), feature selection
21
+#' (if any), training and prediction to be done on the data set.
22
+#' @param characteristics A \code{\link{DataFrame}} describing the
23
+#' characteristics of the classification used. First column must be named
24
+#' \code{"charateristic"} and second column must be named \code{"value"}.
25
+#' Useful for automated plot annotation by plotting functions within this
26
+#' package.  Transformation, selection and prediction functions provided by
27
+#' this package will cause the characteristics to be automatically determined
28
+#' and this can be left blank.
29
+#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the
30
+#' names of the data tables to be used. \code{"clinical"} is also a valid value
31
+#' and specifies that the clinical data table will be used.
32
+#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
33
+#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
34
+#' that contain(s)s the samples' outcome to use for prediction.
35
+#' @param ... Variables not used by the \code{matrix} nor the
36
+#' \code{MultiAssayExperiment} method which are passed into and used by the
37
+#' \code{DataFrame} method.
38
+#' @param verbose Default: 1. A number between 0 and 3 for the amount of
39
+#' progress messages to give.  A higher number will produce more messages as
40
+#' more lower-level functions print messages.
41
+#' @return An object of class \code{\link{ClassifyResult}}.
42
+#' @author Dario Strbenac
43
+#' @examples
44
+#' 
45
+#'   #if(require(sparsediscrim))
46
+#'   #{
47
+#'     data(asthma)
48
+#'     
49
+#'     CVparams <- CrossValParams(permutations = 5)
50
+#'     tuneList <- list(nFeatures = seq(5, 25, 5), performanceType = "Balanced Error")
51
+#'     selectParams <- SelectParams(differentMeansRanking, tuneParams = tuneList)
52
+#'     modellingParams <- ModellingParams(selectParams = selectParams)
53
+#'     runTests(measurements, classes, CVparams, modellingParams,
54
+#'              DataFrame(characteristic = c("Assay Name", "Classifier Name"),
55
+#'                        value = c("Asthma", "Different Means"))
56
+#'              )
57
+#'   #}
58
+#'
59
+#' @export
60
+#' @usage NULL
3 61
 setGeneric("runTests", function(measurements, ...) standardGeneric("runTests"))
4 62
 
63
+#' @rdname runTests
64
+#' @export
5 65
 setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix of numeric measurements.
6 66
 {
7 67
   if(is.null(rownames(measurements)))
... ...
@@ -9,6 +69,8 @@ setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix
9 69
   runTests(S4Vectors::DataFrame(measurements, check.names = FALSE), outcome, ...)
10 70
 })
11 71
 
72
+#' @rdname runTests
73
+#' @export
12 74
 setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParams = CrossValParams(), modellingParams = ModellingParams(),
13 75
            characteristics = S4Vectors::DataFrame(), verbose = 1)
14 76
 {
... ...
@@ -115,6 +177,8 @@ input data. Autmomatically reducing to smaller number.")
115 177
                  lapply(results, "[[", "models"), tuneList, predictionsTable, outcome, importance, modellingParams)
116 178
 })
117 179
 
180
+#' @rdname runTests
181
+#' @export
118 182
 setMethod("runTests", c("MultiAssayExperiment"),
119 183
           function(measurements, targets = names(measurements), outcomeColumns, ...)
120 184
 {
... ...
@@ -6,7 +6,7 @@ RFparams <- function() {
6 6
     return(list(trainParams = trainParams, predictParams = predictParams))
7 7
 }
8 8
 
9
-# Random Surival Forest
9
+# Random Survival Forest
10 10
 RSFparams <- function() {
11 11
     trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)))
12 12
     predictParams <- PredictParams(rfsrcPredictInterface)
... ...
@@ -33,4 +33,5 @@ subtractFromLocation <- function(measurementsTrain, measurementsTest, location =
33 33
             {if(absolute == TRUE) " and absolute transformation"}, " completed.")
34 34
   
35 35
   list(transformedTrain, transformedTest)
36
-}
37 36
\ No newline at end of file
37
+}
38
+attr(subtractFromLocation, "name") <- "subtractFromLocation"
38 39
\ No newline at end of file
... ...
@@ -529,6 +529,52 @@
529 529
   list(measurementsTrain = measurementsTrain, classesTrain = classesTrain)
530 530
 }
531 531
 
532
+.transformKeywordToFunction <- function(keyword)
533
+{
534
+  switch(
535
+        keyword,
536
+        "none" = NULL,
537
+        "diffLoc" = subtractFromLocation
538
+    )
539
+}
540
+
541
+.selectionKeywordToFunction <- function(keyword)
542
+{
543
+  switch(
544
+        keyword,
545
+        "none" = NULL,
546
+        "t-test" = differentMeansRanking,
547
+        "limma" = limmaRanking,
548
+        "edgeR" = edgeRranking,
549
+        "Bartlett" = bartlettRanking,
550
+        "Levene" = leveneRanking,
551
+        "DMD" = DMDranking,
552
+        "likelihoodRatio" = likelihoodRatioRanking,
553
+        "KS" = KolmogorovSmirnovRanking,
554
+        "KL" = KullbackLeiblerRanking,
555
+        "CoxPH" = coxphRanking,
556
+        "selectMulti" = selectMulti
557
+    )
558
+}
559
+
560
+.classifierKeywordToParams <- function(keyword)
561
+{
562
+    switch(
563
+        keyword,
564
+        "randomForest" = RFparams(),
565
+        "randomSurvivalForest" = RSFparams(),
566
+        "GLM" = GLMparams(),
567
+        "elasticNetGLM" = elasticNetGLMparams(),
568
+        "SVM" = SVMparams(),
569
+        "DLDA" = DLDAparams(),
570
+        "naiveBayes" = naiveBayesParams(),
571
+        "mixturesNormals" = mixModelsParams(),
572
+        "kNN" = kNNparams(),
573
+        "CoxPH" = coxphParams(),
574
+        "CoxNet" = coxnetParams()
575
+    )    
576
+}
577
+
532 578
 .dlda <- function(x, y, prior = NULL){ # Remove this once sparsediscrim is reinstated to CRAN.
533 579
   obj <- list()
534 580
   obj$labels <- y
... ...
@@ -33,7 +33,7 @@ Contains a list of models, table of actual sample classes and predicted
33 33
 classes, the identifiers of features selected for each fold of each
34 34
 permutation or each hold-out classification, and performance metrics such as
35 35
 error rates. This class is not intended to be created by the user. It is
36
-created by \code{\link{crossValidate}}.
36
+created by \code{\link{crossValidate}}, \code{\link{runTests}} or \code{\link{runTest}}.
37 37
 }
38 38
 \section{Constructor}{
39 39
 
40 40
new file mode 100644
... ...
@@ -0,0 +1,73 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/classes.R
3
+\docType{class}
4
+\name{CrossValParams}
5
+\alias{CrossValParams}
6
+\alias{CrossValParams-class}
7
+\title{Parameters for Cross-validation Specification}
8
+\usage{
9
+CrossValParams(
10
+  samplesSplits = c("Permute k-Fold", "Permute Percentage Split", "Leave-k-Out",
11
+    "k-Fold"),
12
+  permutations = 100,
13
+  percentTest = 25,
14
+  folds = 5,
15
+  leave = 2,
16
+  tuneMode = c("Resubstitution", "Nested CV", "none"),
17
+  adaptiveResamplingDelta = NULL,
18
+  parallelParams = bpparam()
19
+)
20
+}
21
+\arguments{
22
+\item{samplesSplits}{Default: "Permute k-Fold". A character value
23
+specifying what kind of sample splitting to do.}
24
+
25
+\item{permutations}{Default: 100. Number of times to permute the
26
+data set before it is split into training and test sets. Only relevant if
27
+\code{samplesSplits} is either \code{"Permute k-Fold"} or \code{"Permute
28
+Percentage Split"}.}
29
+
30
+\item{percentTest}{The percentage of the data
31
+set to assign to the test set, with the remainder of the samples belonging
32
+to the training set. Only relevant if \code{samplesSplits} is \code{"Permute
33
+Percentage Split"}.}
34
+
35
+\item{folds}{The number of approximately equal-sized folds to partition
36
+the samples into. Only relevant if \code{samplesSplits} is \code{"Permute k-Fold"}
37
+or \code{"k-Fold"}.}
38
+
39
+\item{leave}{The number of samples to generate all possible
40
+combination of and use as the test set.  Only relevant if \code{samplesSplits} is
41
+\code{"Leave-k-Out"}. If set to 1, it is the traditional leave-one-out cross-validation,
42
+sometimes written as LOOCV.}
43
+
44
+\item{tuneMode}{Default: Resubstitution. The scheme to use for selecting any tuning parameters.}
45
+
46
+\item{adaptiveResamplingDelta}{Default: \code{NULL}. If not null, adaptive resampling of training
47
+samples is performed and this number is the difference in consecutive iterations that the
48
+class probability or risk of all samples must change less than for the iterative process to stop. 0.01
49
+was used in the original publication.}
50
+
51
+\item{parallelParams}{An instance of \code{\link{BiocParallelParam}} specifying
52
+the kind of parallelisation to use. Default is to use two cores less than the total number of
53
+cores the computer has, if it has four or more cores, otherwise one core, as is the
54
+default of \code{\link{bpparam}}. To make results fully reproducible, please
55
+choose a specific back-end depending on your operating system and also set
56
+\code{RNGseed} to a number.}
57
+}
58
+\description{
59
+Collects and checks necessary parameters required for cross-validation by
60
+\code{\link{runTests}}.
61
+}
62
+\examples{
63
+
64
+  CrossValParams() # Default is 100 permutations and 5 folds of each.
65
+  snow <- SnowParam(workers = 4, RNGseed = 999)
66
+  CrossValParams("Leave-k-Out", leave = 2, parallelParams = snow)
67
+  # Fully reproducible Leave-2-out cross-validation on 4 cores,
68
+  # even if feature selection or classifier use random sampling.
69
+
70
+}
71
+\author{
72
+Dario Strbenac
73
+}
0 74
new file mode 100644
... ...
@@ -0,0 +1,58 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/classes.R
3
+\docType{class}
4
+\name{ModellingParams}
5
+\alias{ModellingParams}
6
+\alias{ModellingParams-class}
7
+\title{Parameters for Data Modelling Specification}
8
+\usage{
9
+ModellingParams(
10
+  balancing = c("downsample", "upsample", "none"),
11
+  transformParams = NULL,
12
+  selectParams = SelectParams("t-test"),
13
+  trainParams = TrainParams("DLDA"),
14
+  predictParams = PredictParams("DLDA"),
15
+  doImportance = FALSE
16
+)
17
+}
18
+\arguments{
19
+\item{balancing}{Default: "downsample". A character value specifying what kind
20
+of class balancing to do, if any.}
21
+
22
+\item{transformParams}{Parameters used for feature transformation inside of C.V.
23
+specified by a \code{\link{TransformParams}} instance. Optional, can be \code{NULL}.}
24
+
25
+\item{selectParams}{Parameters used during feature selection specified
26
+by a \code{\link{SelectParams}} instance.  By default, parameters for selection
27
+based on differences in means of numeric data. Optional, can be \code{NULL}.}
28
+
29
+\item{trainParams}{Parameters for model training specified by a \code{\link{TrainParams}} instance.
30
+By default, uses diagonal LDA.}
31
+
32
+\item{predictParams}{Parameters for model training specified by a \code{\link{PredictParams}} instance.
33
+By default, uses diagonal LDA.}
34
+
35
+\item{doImportance}{Default: \code{FALSE}. Whether or not to carry out removal of each feature, one at a time, which
36
+was chosen and then retrain and model and predict the test set, to measure the change in performance metric. Can
37
+also be set to TRUE, if required. Modelling run time will be noticeably longer.}
38
+}
39
+\description{
40
+Collects and checks necessary parameters required for data modelling. Apart
41
+from data transfomation that needs to be done within cross-validation (e.g.
42
+subtracting each observation from training set mean), feature selection, model training and
43
+prediction, this container also stores a setting for class imbalance
44
+rebalancing.
45
+}
46
+\examples{
47
+
48
+  #if(require(sparsediscrim))
49
+  #{
50
+     ModellingParams() # Default is differences in means selection and DLDA.
51
+     ModellingParams(selectParams = NULL, # No feature selection before training.
52
+                     trainParams = TrainParams("randomForest"),
53
+                     predictParams = PredictParams("randomForest"))
54
+  #}
55
+}
56
+\author{
57
+Dario Strbenac
58
+}
... ...
@@ -1,14 +1,56 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/classes.R
3
-\name{show,PredictParams-method}
3
+\docType{class}
4
+\name{PredictParams}
5
+\alias{PredictParams}
6
+\alias{PredictParams-class}
7
+\alias{PredictParams,missing-method}
8
+\alias{PredictParams,characterOrFunction-method}
4 9
 \alias{show,PredictParams-method}
5
-\title{Inspect Prediction Function Details}
6
-\usage{
7
-\S4method{show}{PredictParams}(object)
10
+\title{Parameters for Classifier Prediction}
11
+\description{
12
+Collects the function to be used for making predictions and any associated
13
+parameters.
8 14
 }
9
-\arguments{
10
-\item{object}{An object of class \code{TrainParams} to inspect.}
15
+\details{
16
+The function specified must return either a factor vector of class
17
+predictions, or a numeric vector of scores for the second class, according
18
+to the levels of the class vector of the input data set, or a data frame
19
+which has two columns named class and score.
11 20
 }
12
-\description{
13
-Inspect Prediction Function Details
21
+\section{Constructor}{
22
+ \describe{\item{}{
23
+\code{PredictParams(predictor, characteristics = DataFrame(), intermediate =
24
+character(0), ...)} Creates a PredictParams object which stores the function
25
+which will do the class prediction, if required, and parameters that the
26
+function will use. If the training function also makes predictions, this
27
+must be set to \code{NULL}.}
28
+\describe{ \item{\code{predictor}}{A character keyword referring to a registered classifier. See \code{\link{available}}
29
+for valid keywords.}
30
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing
31
+the characteristics of the predictor function used. First column must be
32
+named \code{"charateristic"} and second column must be named \code{"value"}.}
33
+\item{\code{intermediate}}{Character vector. Names of any
34
+variables created in prior stages in \code{\link{runTest}} that need to be
35
+passed to the prediction function.}
36
+\item{\code{...}}{Other arguments that \code{predictor} may use.} } }
37
+}
38
+
39
+\section{Summary}{
40
+
41
+\code{predictParams} is a \code{PredictParams} object.
42
+\describe{
43
+\item{}{
44
+  \code{show(predictParams)}: Prints a short summary of what \code{predictParams} contains.
45
+}}
46
+}
47
+
48
+\examples{
49
+
50
+# For prediction by trained object created by DLDA training function.
51
+predictParams <- PredictParams("DLDA")
52
+
53
+}
54
+\author{
55
+Dario Strbenac
14 56
 }
... ...
@@ -1,14 +1,69 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/classes.R
3
-\name{show,SelectParams-method}
3
+\docType{class}
4
+\name{SelectParams}
5
+\alias{SelectParams}
6
+\alias{SelectParams-class}
7
+\alias{SelectParams,missing-method}
8
+\alias{SelectParams,characterOrList-method}
4 9
 \alias{show,SelectParams-method}
5
-\title{Container for Storing Details of Feature Selection Function(s)}
6
-\usage{
7
-\S4method{show}{SelectParams}(object)
10
+\title{Parameters for Feature Selection}
11
+\description{
12
+Collects and checks necessary parameters required for feature selection.
13
+Either one function is specified or a list of functions to perform ensemble
14
+feature selection. The empty constructor is provided for convenience.
8 15
 }
9
-\arguments{
10
-\item{object}{An object of class \code{SelectParams} to inspect.}
16
+\section{Constructor}{
17
+
18
+\describe{
19
+\item{}{\preformatted{SelectParams(featureRanking, characteristics = DataFrame(), minPresence = 1, intermediate = character(0),
20
+subsetToSelections = TRUE, tuneParams = list(nFeatures = seq(10, 100, 10), performanceType = "Balanced Error"), ...)} Creates a \code{SelectParams}
21
+object which stores the function(s) which will do the selection and parameters that the
22
+function will use.
23
+\describe{\item{\code{featureRanking}}{A character keyword referring to a registered feature ranking function. See \code{\link{available}}
24
+for valid keywords.}
25
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing the characteristics
26
+of feature selection to be done. First column must be named \code{"charateristic"} and
27
+second column must be named \code{"value"}. If using wrapper functions for feature
28
+selection in this package, the feature selection name will automatically be
29
+generated and therefore it is not necessary to specify it.}
30
+\item{\code{minPresence}}{If a list of functions was provided, how many of
31
+those must a feature have been selected by to be used in classification. 1
32
+is equivalent to a set union and a number the same length as
33
+\code{featureSelection} is equivalent to set intersection.}
34
+\item{\code{intermediate}}{Character vector. Names of any variables created
35
+in prior stages by \code{\link{runTest}} that need to be passed to a feature
36
+selection function.}
37
+\item{\code{subsetToSelections}}{Whether to subset the data table(s), after feature selection has been done.}
38
+\item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of
39
+the list are the names of the parameters and the vectors are the values of the parameters to try. All possible
40
+combinations are generated. Two elements named \code{nFeatures} and \code{performanceType} are mandatory, to
41
+define the performance metric which will be used to select features and how many top-ranked features to try.}
42
+\item{\code{...}}{Other named parameters which will be used by the
43
+selection function. If \code{featureSelection} was a list of functions,
44
+this must be a list of lists, as long as \code{featureSelection}.} } } }
11 45
 }
12
-\description{
13
-Container for Storing Details of Feature Selection Function(s)
46
+
47
+\section{Summary}{
48
+
49
+\code{selectParams} is a \code{SelectParams} object.
50
+\describe{
51
+\item{}{
52
+  \code{show(SelectParams)}: Prints a short summary of what \code{selectParams} contains.
53
+}}
54
+}
55
+
56
+\examples{
57
+
58
+  #if(require(sparsediscrim))
59
+  #{
60
+    SelectParams("KS")
61
+    
62
+    # Ensemble feature selection.
63
+    SelectParams(list("Bartlett", "Levene"))
64
+  #}
65
+
66
+}
67
+\author{
68
+Dario Strbenac
14 69
 }
... ...
@@ -1,14 +1,59 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/classes.R
3
-\name{show,TrainParams-method}
3
+\docType{class}
4
+\name{TrainParams}
5
+\alias{TrainParams}
6
+\alias{TrainParams-class}
7
+\alias{TrainParams,missing-method}
8
+\alias{TrainParams,characterOrFunction-method}
4 9
 \alias{show,TrainParams-method}
5
-\title{Inspect Model Training Details}
6
-\usage{
7
-\S4method{show}{TrainParams}(object)
10
+\title{Parameters for Classifier Training}
11
+\description{
12
+Collects and checks necessary parameters required for classifier training.
13
+The empty constructor is provided for convenience.
8 14
 }
9
-\arguments{
10
-\item{object}{An object of class \code{TrainParams} to inspect.}
15
+\section{Constructor}{
16
+
17
+\describe{
18
+\item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(),
19
+intermediate = character(0), getFeatures = NULL, ...)}
20
+Creates a \code{TrainParams} object which stores the function which will do the
21
+classifier building and parameters that the function will use.
22
+\describe{
23
+\item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}}
24
+for valid keywords.}
25
+\item{\code{characteristics}}{A \code{\link{DataFrame}} describing the
26
+characteristics of the classifier used. First column must be named \code{"charateristic"}
27
+and second column must be named \code{"value"}. If using wrapper functions for classifiers
28
+in this package, a classifier name will automatically be generated and
29
+therefore it is not necessary to specify it.}
30
+\item{\code{intermediate}}{Character vector. Names of any variables created
31
+in prior stages by \code{\link{runTest}} that need to be passed to
32
+\code{classifier}.}
33
+\item{\code{getFeatures}}{A function may be specified that extracts the selected
34
+features from the trained model. This is relevant if using a classifier that does
35
+feature selection within training (e.g. random forest). The function must return a