Browse code

- getFeatures functions added to simple params settings to extract selected features from within trained model where relevant. - Nearest Shrunken Centroid added as a simple params function and a classifier keyword option.

Dario Strbenac authored on 25/08/2022 05:15:03
Showing 16 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.13
7
-Date: 2022-08-21
6
+Version: 3.1.14
7
+Date: 2022-08-25
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -1,7 +1,7 @@
1 1
 #' Plot Receiver Operating Curve Graphs for Classification Results
2 2
 #' 
3 3
 #' Creates one ROC plot or multiple ROC plots for a list of ClassifyResult
4
-#' objects.  One plot is created if the data set has two classes and multiple
4
+#' objects. One plot is created if the data set has two classes and multiple
5 5
 #' plots are created if the data set has three or more classes.
6 6
 #' 
7 7
 #' The scores stored in the results should be higher if the sample is more
... ...
@@ -21,13 +21,13 @@
21 21
 #' averaged ROC curve.
22 22
 #' @param interval Default: 95 (percent). The percent confidence interval to
23 23
 #' draw around the averaged ROC curve, if mode is \code{"each"}.
24
-#' @param comparison The aspect of the experimental design to compare. Can be
24
+#' @param comparison Default: \code{"auto"}. The aspect of the experimental design to compare. Can be
25 25
 #' any characteristic that all results share. If the data set has two classes,
26 26
 #' then the slot name with factor levels to be used for colouring the lines.
27 27
 #' Otherwise, it specifies the variable used for plot facetting.
28
-#' @param lineColours A vector of colours for different levels of the
28
+#' @param lineColours Default: \code{"auto"}. A vector of colours for different levels of the
29 29
 #' comparison parameter, or if there are three or more classes, the classes.
30
-#' If \code{NULL}, a default colour palette is automatically generated.
30
+#' If \code{"auto"}, a default colour palette is automatically generated.
31 31
 #' @param lineWidth A single number controlling the thickness of lines drawn.
32 32
 #' @param fontSizes A vector of length 5. The first number is the size of the
33 33
 #' title.  The second number is the size of the axes titles and AUC text, if it
... ...
@@ -66,7 +66,7 @@
66 66
 #'   predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60)
67 67
 #'   predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40)
68 68
 #'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
69
-#'                                       value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")),
69
+#'                                       value = c("Melanoma", "Bartlett Test", "Differential Variability", "2-fold")),
70 70
 #'                             LETTERS[1:20], paste("Gene", LETTERS[1:10]), list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
71 71
 #'                             list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
72 72
 #'                             list(function(oracle){}), NULL, predicted, actual)
... ...
@@ -80,7 +80,7 @@ setGeneric("ROCplot", function(results, ...) standardGeneric("ROCplot"))
80 80
 #' @export
81 81
 setMethod("ROCplot", "list", 
82 82
           function(results, mode = c("merge", "average"), interval = 95,
83
-                   comparison = "Classifier Name", lineColours = NULL,
83
+                   comparison = "auto", lineColours = "auto",
84 84
                    lineWidth = 1, fontSizes = c(24, 16, 12, 12, 12), labelPositions = seq(0.0, 1.0, 0.2),
85 85
                    plotTitle = "ROC", legendTitle = NULL, xLabel = "False Positive Rate", yLabel = "True Positive Rate", showAUC = TRUE)
86 86
 {
... ...
@@ -89,7 +89,15 @@ setMethod("ROCplot", "list",
89 89
   if(!requireNamespace("scales", quietly = TRUE))
90 90
     stop("The package 'scales' could not be found. Please install it.")
91 91
   mode <- match.arg(mode)
92
-                      
92
+  characteristicsCounts <- table(unlist(lapply(results, function(result) result@characteristics[["characteristic"]])))
93
+  if(comparison == "auto")
94
+  {
95
+    if(max(characteristicsCounts) == length(results))
96
+      comparison <- names(characteristicsCounts)[characteristicsCounts == max(characteristicsCounts)][1]
97
+    else
98
+      stop("No characteristic is present for all results but must be.")
99
+  }
100
+               
93 101
   ggplot2::theme_set(ggplot2::theme_classic() + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA)))
94 102
   distinctClasses <- levels(actualOutcome(results[[1]]))
95 103
   numberDistinctClasses <- length(distinctClasses)
... ...
@@ -181,8 +189,8 @@ setMethod("ROCplot", "list",
181 189
   else
182 190
     lineColour <- comparison
183 191
   
184
-  if(is.null(lineColours))
185
-      lineColours <- scales::hue_pal()(ifelse(lineColour == "class", numberDistinctClasses, length(unique(comparisonValues))))
192
+  if(lineColours == "auto")
193
+      lineColours <- scales::hue_pal()(ifelse(lineColour == "class", numberDistinctClasses, max(characteristicsCounts)))
186 194
   if(is.null(legendTitle))
187 195
     legendTitle <- ifelse(lineColour == "class", "Class", comparisonName)
188 196
   
... ...
@@ -207,8 +215,6 @@ setMethod("ROCplot", "list",
207 215
   comparison <- rlang::sym(comparison)
208 216
   ROCplots <- lapply(plotDataSets, function(plotData)
209 217
               {
210
-
211
-
212 218
                 ROCplot <- ggplot2::ggplot(plotData, ggplot2::aes(x = FPR, y = TPR, colour = !!lineColour)) +
213 219
                            ggplot2::geom_line(size = lineWidth) + ggplot2::xlab(NULL) + ggplot2::ylab(NULL) + ggplot2::labs(colour = legendTitle) + ggplot2::geom_segment(x = 0, y = 0, xend = 1, yend = 1, size = lineWidth, colour = "black") + ggplot2::scale_x_continuous(breaks = labelPositions, limits = c(0, 1)) +  ggplot2::scale_y_continuous(breaks = labelPositions, limits = c(0, 1)) +
214 220
                            ggplot2::theme(axis.text = ggplot2::element_text(colour = "black", size = fontSizes[3]), legend.position = c(1, 0), legend.justification = c(1, 0), legend.background = ggplot2::element_rect(fill = "transparent"), legend.title = ggplot2::element_text(size = fontSizes[4], hjust = 0), legend.text = ggplot2::element_text(size = fontSizes[5])) + ggplot2::guides(colour = ggplot2::guide_legend(title.hjust = 0.5)) + ggplot2::scale_colour_manual(values = lineColours)
... ...
@@ -641,13 +641,15 @@ setClass("TrainParams", representation(
641 641
 #' @docType class
642 642
 #' @section Constructor:
643 643
 #' \describe{
644
-#' \item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(),
645
-#' intermediate = character(0), getFeatures = NULL, ...)}
644
+#' \item{}{\preformatted{TrainParams(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(),
645
+#' intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...)}
646 646
 #' Creates a \code{TrainParams} object which stores the function which will do the
647 647
 #' classifier building and parameters that the function will use.
648 648
 #' \describe{
649 649
 #' \item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}}
650 650
 #' for valid keywords.}
651
+#' \item{\code{balancing}}{Default: \code{"downsample"}. A keyword specifying how to handle class imbalance for data sets with categorical outcome.
652
+#' Valid values are \code{"downsample"}, \code{"upsample"} and \code{"none"}.}
651 653
 #' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the
652 654
 #' characteristics of the classifier used. First column must be named \code{"charateristic"}
653 655
 #' and second column must be named \code{"value"}. If using wrapper functions for classifiers
... ...
@@ -656,6 +658,9 @@ setClass("TrainParams", representation(
656 658
 #' \item{\code{intermediate}}{Character vector. Names of any variables created
657 659
 #' in prior stages by \code{\link{runTest}} that need to be passed to
658 660
 #' \code{classifier}.}
661
+#' \item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of
662
+#' the list are the names of the parameters and the vectors are the values of the parameters to try. All possible
663
+#' combinations are generated.}
659 664
 #' \item{\code{getFeatures}}{A function may be specified that extracts the selected
660 665
 #' features from the trained model. This is relevant if using a classifier that does
661 666
 #' feature selection within training (e.g. random forest). The function must return a
... ...
@@ -964,7 +969,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
964 969
 #'   #if(require(sparsediscrim))
965 970
 #'   #{
966 971
 #'     data(asthma)
967
-#'     classified <- crossValidate(measurements, classes)
972
+#'     classified <- crossValidate(measurements, classes, nRepeats = 5)
968 973
 #'     class(classified)
969 974
 #'   #}
970 975
 #'   
... ...
@@ -76,6 +76,7 @@
76 76
     "GLM", "Logistic regression.",
77 77
     "elasticNetGLM", "Elastic net GLM multinomial regression.",
78 78
     "SVM", "Support Vector Machine.",
79
+    "NSC", "Nearest Shrunken Centroids.",
79 80
     "naiveBayes", "Naive Bayes kernel feature voting classifier.",
80 81
     "mixturesNormals", "Mixture of normals feature voting classifier.",
81 82
     "CoxPH", "Cox proportional hazards.",
... ...
@@ -36,6 +36,6 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy
36 36
 forestFeatures <- function(forest)
37 37
                   {
38 38
                     rankedFeaturesIndices <- order(randomForest::importance(forest), decreasing = TRUE)
39
-                    selectedFeaturesIndices <- which(randomForest::varUsed(forest) > 0)
39
+                    selectedFeaturesIndices <- randomForest::varUsed(forest, count = FALSE)
40 40
                     list(rankedFeaturesIndices, selectedFeaturesIndices)
41 41
                   }
42 42
\ No newline at end of file
... ...
@@ -15,7 +15,7 @@
15 15
 #' \code{characteristicsList['x']} to aggregate to a single number by taking
16 16
 #' the mean. This is particularly meaningful when the cross-validation is
17 17
 #' leave-k-out, when k is small.
18
-#' @param performanceName Default: "auto". The name of the
18
+#' @param performanceName Default: \code{"auto"}. The name of the
19 19
 #' performance measure or "auto". If the results are classification then
20 20
 #' balanced accuracy will be displayed. Otherwise, the results would be survival risk
21 21
 #' predictions and then C-index will be displayed. This is one of the names printed
... ...
@@ -70,11 +70,11 @@
70 70
 #' \code{FALSE}, the x-axis labels are hidden.
71 71
 #' @param showYtickLabels Logical. Default: \code{TRUE}. If set to
72 72
 #' \code{FALSE}, the y-axis labels are hidden.
73
-#' @param xLabelPositions Either "auto" or a vector of values. The positions of
74
-#' labels on the x-axis.  If "auto", the placement of labels is automatically
73
+#' @param xLabelPositions Either \code{"auto"} or a vector of values. The positions of
74
+#' labels on the x-axis.  If \code{"auto"}, the placement of labels is automatically
75 75
 #' calculated.
76
-#' @param yLabelPositions Either "auto" or a vector of values. The positions of
77
-#' labels on the y-axis.  If "auto", the placement of labels is automatically
76
+#' @param yLabelPositions Either \code{"auto"} or a vector of values. The positions of
77
+#' labels on the y-axis.  If \code{"auto"}, the placement of labels is automatically
78 78
 #' calculated.
79 79
 #' @param fontSizes A vector of length 5. The first number is the size of the
80 80
 #' title.  The second number is the size of the axes titles. The third number
... ...
@@ -167,7 +167,7 @@ input data. Autmomatically reducing to smaller number.")
167 167
       modellingParams@selectParams <- .addIntermediates(modellingParams@selectParams)
168 168
  
169 169
     topFeatures <- tryCatch(.doSelection(measurementsTrain, outcomeTrain, crossValParams, modellingParams, verbose),
170
-                            error = function(error) error[["message"]]) 
170
+                            error = function(error) error[["message"]])
171 171
     if(is.character(topFeatures)) return(topFeatures) # An error occurred.
172 172
     
173 173
     rankedFeaturesIndices <- topFeatures[[1]] # Extract for result object.
... ...
@@ -186,10 +186,11 @@ input data. Autmomatically reducing to smaller number.")
186 186
     modellingParams@trainParams <- .addIntermediates(modellingParams@trainParams)
187 187
   if(!is.null(tuneDetailsSelect))
188 188
   {
189
-    avoidTune <- match(colnames(tuneDetailsSelect), names(modellingParams@trainParams@tuneParams))
189
+    tuneDetailsSelectUse <- tuneDetailsSelect[["tuneCombinations"]][tuneDetailsSelect[["bestIndex"]], , drop = FALSE]
190
+    avoidTune <- match(colnames(tuneDetailsSelectUse), names(modellingParams@trainParams@tuneParams))
190 191
     if(any(!is.na(avoidTune)))
191 192
     {
192
-      modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, tuneDetailsSelect[!is.na(avoidTune)])
193
+      modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, tuneDetailsSelectUse[!is.na(avoidTune)])
193 194
       modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-na.omit(avoidTune)]
194 195
       if(length(modellingParams@trainParams@tuneParams) == 0) modellingParams@trainParams@tuneParams <- NULL
195 196
     }
... ...
@@ -209,7 +210,7 @@ input data. Autmomatically reducing to smaller number.")
209 210
     if(length(extras) > 0)
210 211
       extrasList <- mget(setdiff(names(extras), "..."))
211 212
 
212
-    rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList))
213
+    rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(unname(trained[1]), extrasList))
213 214
     rankedFeaturesIndices <- rankedChosenList[[1]]
214 215
     selectedFeaturesIndices <- rankedChosenList[[2]]
215 216
   }
... ...
@@ -304,8 +305,8 @@ input data. Autmomatically reducing to smaller number.")
304 305
   } else { # runTest executed by the end user. Create a ClassifyResult object.
305 306
     # Only one training, so only one tuning choice, which can be summarised in characteristics.
306 307
     modParamsList <- list(modellingParams@transformParams, modellingParams@selectParams, modellingParams@trainParams, modellingParams@predictParams)
307
-    if(!is.null(tuneDetails)) characteristics <- rbind(characteristics, data.frame(characteristic = colnames(tuneDetails),
308
-                                                                                   value = unlist(tuneDetails)))
308
+    if(!is.null(tuneDetails)) characteristics <- rbind(characteristics, data.frame(characteristic = colnames(tuneDetails[["tuneCombinations"]]),
309
+                                                                                   value = unlist(tuneDetails[["tuneCombinations"]][tuneDetails[["bestIndex"]], ])))
309 310
     autoCharacteristics <- do.call(rbind, lapply(modParamsList, function(stageParams) if(!is.null(stageParams) && !is(stageParams, "PredictParams")) stageParams@characteristics))
310 311
     characteristics <- .filterCharacteristics(characteristics, autoCharacteristics)
311 312
     characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Cross-validation", value = "Independent Set"))
... ...
@@ -1,6 +1,7 @@
1 1
 # Random Forest
2 2
 RFparams <- function() {
3
-    trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)))
3
+    trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)),
4
+                               getFeatures = forestFeatures)
4 5
     predictParams <- PredictParams(randomForestPredictInterface)
5 6
     
6 7
     return(list(trainParams = trainParams, predictParams = predictParams))
... ...
@@ -31,7 +32,7 @@ GLMparams <- function() {
31 32
 
32 33
 # Elastic net GLM
33 34
 elasticNetGLMparams <- function() {
34
-    trainParams <- TrainParams(elasticNetGLMtrainInterface)
35
+    trainParams <- TrainParams(elasticNetGLMtrainInterface, getFeatures = elasticNetFeatures)
35 36
     predictParams <- PredictParams(elasticNetGLMpredictInterface)
36 37
     
37 38
     return(list(trainParams = trainParams, predictParams = predictParams))
... ...
@@ -45,6 +46,14 @@ SVMparams = function() {
45 46
     return(list(trainParams = trainParams, predictParams = predictParams))
46 47
 }
47 48
 
49
+# Nearest Shrunken Centroid
50
+NSCparams = function() {
51
+    trainParams <- TrainParams(NSCtrainInterface, getFeatures = NSCfeatures)
52
+    predictParams <- PredictParams(NSCpredictInterface)
53
+    
54
+    return(list(trainParams = trainParams, predictParams = predictParams))
55
+}
56
+
48 57
 # Diagonal Linear Discriminant Analysis
49 58
 DLDAparams = function() {
50 59
     trainParams <- TrainParams(DLDAtrainInterface)
... ...
@@ -193,14 +193,14 @@
193 193
     if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking.
194 194
       return(list(NULL, rankings[[1]], NULL))
195 195
     
196
-    if(tuneMode == "none") # Actually selection not ranking.
196
+    if(tuneMode == "none") # No parameters to choose between.
197 197
         return(list(NULL, rankings[[1]], NULL))
198 198
     
199 199
     tuneParamsTrain <- list(topN = topNfeatures)
200 200
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
201 201
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
202 202
     modellingParams@trainParams@tuneParams <- NULL
203
-    bestPerformers <- sapply(rankings, function(rankingsVariety)
203
+    allPerformanceTables <- lapply(rankings, function(rankingsVariety)
204 204
     {
205 205
       # Creates a matrix. Columns are top n features, rows are varieties (one row if None).
206 206
       performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex)
... ...
@@ -240,20 +240,22 @@
240 240
        })
241 241
 
242 242
         bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
243
-        c(bestOne, performances[bestOne])
243
+        list(data.frame(tuneCombosTrain, performance = performances), bestOne)
244 244
       })
245 245
 
246
-      tunePick <- ifelse(betterValues == "lower", which.min(bestPerformers[2, ])[1], which.max(bestPerformers[2, ])[1])
246
+      tablesBestMetrics <- sapply(allPerformanceTables, function(tableIndexPair) tableIndexPair[[1]][tableIndexPair[[2]], "performance"])
247
+      tunePick <- ifelse(betterValues == "lower", which.min(tablesBestMetrics)[1], which.max(tablesBestMetrics)[1])
247 248
       
248 249
       if(verbose == 3)
249 250
          message("Features selected.")
250 251
       
251
-      tuneRow <- tuneCombosTrain[bestPerformers[1, tunePick], , drop  = FALSE]
252
-      if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL
252
+      tuneDetails <- allPerformanceTables[[tunePick]] # List of length 2.
253 253
       
254 254
       rankingUse <- rankings[[tunePick]]
255
-      selectionIndices <- rankingUse[1:tuneRow[, "topN"]]
255
+      selectionIndices <- rankingUse[1:(tuneDetails[[1]][tuneDetails[[2]], "topN"])]
256 256
       
257
+      names(tuneDetails) <- c("tuneCombinations", "bestIndex")
258
+      colnames(tuneDetails[[1]])[ncol(tuneDetails[[1]])] <- performanceType
257 259
       list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails)
258 260
     } else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection.
259 261
       featuresIndiciesLists <- mapply(function(selector, selParams)
... ...
@@ -296,8 +298,7 @@
296 298
       
297 299
       list(NULL, selectionIndices, NULL)
298 300
     } else { # Previous selection
299
-      selectedFeatures <- 
300
-      list(NULL, selectionIndices, NULL)
301
+      selectedFeatures <- list(NULL, selectionIndices, NULL)
301 302
     }
302 303
 }
303 304
 
... ...
@@ -315,7 +316,7 @@
315 316
 # within the same function, so test samples are also passed in case they are needed.
316 317
 .doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose)
317 318
 {
318
-  tuneChosen <- NULL
319
+  tuneDetails <- NULL
319 320
   if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams))
320 321
   {
321 322
     performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]]
... ...
@@ -346,9 +347,14 @@
346 347
         median(performances(result)[[performanceType]])
347 348
       }
348 349
     })
350
+    allPerformanceTable <- data.frame(tuneCombos, performances)
351
+    colnames(allPerformanceTable)[ncol(allPerformanceTable)] <- performanceType
352
+    
349 353
     betterValues <- .ClassifyRenvir[["performanceInfoTable"]][.ClassifyRenvir[["performanceInfoTable"]][, "type"] == performanceType, "better"]
350 354
     bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
351 355
     tuneChosen <- tuneCombos[bestOne, , drop = FALSE]
356
+    tuneDetails <- list(tuneCombos, bestOne)
357
+    names(tuneDetails) <- c("tuneCombinations", "bestIndex")
352 358
     modellingParams@trainParams@otherParams <- tuneChosen
353 359
   }
354 360
 
... ...
@@ -367,7 +373,7 @@
367 373
   if(verbose >= 2)
368 374
     message("Training completed.")  
369 375
   
370
-  list(model = trained, tune = tuneChosen)
376
+  list(model = trained, tune = tuneDetails)
371 377
 }
372 378
 
373 379
 # Creates a function call to a prediction function.
... ...
@@ -566,6 +572,7 @@
566 572
         "GLM" = GLMparams(),
567 573
         "elasticNetGLM" = elasticNetGLMparams(),
568 574
         "SVM" = SVMparams(),
575
+        "NSC" = NSCparams(),
569 576
         "DLDA" = DLDAparams(),
570 577
         "naiveBayes" = naiveBayesParams(),
571 578
         "mixturesNormals" = mixModelsParams(),
... ...
@@ -104,7 +104,7 @@ of predictions made during the cross-validation procedure.}}
104 104
   #if(require(sparsediscrim))
105 105
   #{
106 106
     data(asthma)
107
-    classified <- crossValidate(measurements, classes)
107
+    classified <- crossValidate(measurements, classes, nRepeats = 5)
108 108
     class(classified)
109 109
   #}
110 110
   
... ...
@@ -9,8 +9,8 @@
9 9
   results,
10 10
   mode = c("merge", "average"),
11 11
   interval = 95,
12
-  comparison = "Classifier Name",
13
-  lineColours = NULL,
12
+  comparison = "auto",
13
+  lineColours = "auto",
14 14
   lineWidth = 1,
15 15
   fontSizes = c(24, 16, 12, 12, 12),
16 16
   labelPositions = seq(0, 1, 0.2),
... ...
@@ -33,14 +33,14 @@ averaged ROC curve.}
33 33
 \item{interval}{Default: 95 (percent). The percent confidence interval to
34 34
 draw around the averaged ROC curve, if mode is \code{"each"}.}
35 35
 
36
-\item{comparison}{The aspect of the experimental design to compare. Can be
36
+\item{comparison}{Default: \code{"auto"}. The aspect of the experimental design to compare. Can be
37 37
 any characteristic that all results share. If the data set has two classes,
38 38
 then the slot name with factor levels to be used for colouring the lines.
39 39
 Otherwise, it specifies the variable used for plot facetting.}
40 40
 
41
-\item{lineColours}{A vector of colours for different levels of the
41
+\item{lineColours}{Default: \code{"auto"}. A vector of colours for different levels of the
42 42
 comparison parameter, or if there are three or more classes, the classes.
43
-If \code{NULL}, a default colour palette is automatically generated.}
43
+If \code{"auto"}, a default colour palette is automatically generated.}
44 44
 
45 45
 \item{lineWidth}{A single number controlling the thickness of lines drawn.}
46 46
 
... ...
@@ -71,7 +71,7 @@ device, if \code{plot} is \code{TRUE}.
71 71
 }
72 72
 \description{
73 73
 Creates one ROC plot or multiple ROC plots for a list of ClassifyResult
74
-objects.  One plot is created if the data set has two classes and multiple
74
+objects. One plot is created if the data set has two classes and multiple
75 75
 plots are created if the data set has three or more classes.
76 76
 }
77 77
 \details{
... ...
@@ -103,7 +103,7 @@ considered simultaneously, to calculate one curve per classification.
103 103
   predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60)
104 104
   predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40)
105 105
   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
106
-                                      value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")),
106
+                                      value = c("Melanoma", "Bartlett Test", "Differential Variability", "2-fold")),
107 107
                             LETTERS[1:20], paste("Gene", LETTERS[1:10]), list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
108 108
                             list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
109 109
                             list(function(oracle){}), NULL, predicted, actual)
... ...
@@ -15,13 +15,15 @@ The empty constructor is provided for convenience.
15 15
 \section{Constructor}{
16 16
 
17 17
 \describe{
18
-\item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(),
19
-intermediate = character(0), getFeatures = NULL, ...)}
18
+\item{}{\preformatted{TrainParams(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(),
19
+intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...)}
20 20
 Creates a \code{TrainParams} object which stores the function which will do the
21 21
 classifier building and parameters that the function will use.
22 22
 \describe{
23 23
 \item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}}
24 24
 for valid keywords.}
25
+\item{\code{balancing}}{Default: \code{"downsample"}. A keyword specifying how to handle class imbalance for data sets with categorical outcome.
26
+Valid values are \code{"downsample"}, \code{"upsample"} and \code{"none"}.}
25 27
 \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the
26 28
 characteristics of the classifier used. First column must be named \code{"charateristic"}
27 29
 and second column must be named \code{"value"}. If using wrapper functions for classifiers
... ...
@@ -30,6 +32,9 @@ therefore it is not necessary to specify it.}
30 32
 \item{\code{intermediate}}{Character vector. Names of any variables created
31 33
 in prior stages by \code{\link{runTest}} that need to be passed to
32 34
 \code{classifier}.}
35
+\item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of
36
+the list are the names of the parameters and the vectors are the values of the parameters to try. All possible
37
+combinations are generated.}
33 38
 \item{\code{getFeatures}}{A function may be specified that extracts the selected
34 39
 features from the trained model. This is relevant if using a classifier that does
35 40
 feature selection within training (e.g. random forest). The function must return a
... ...
@@ -29,7 +29,7 @@
29 29
 
30 30
 \item{...}{Not used by end user.}
31 31
 
32
-\item{performanceName}{Default: "auto". The name of the
32
+\item{performanceName}{Default: \code{"auto"}. The name of the
33 33
 performance measure or "auto". If the results are classification then
34 34
 balanced accuracy will be displayed. Otherwise, the results would be survival risk
35 35
 predictions and then C-index will be displayed. This is one of the names printed
... ...
@@ -114,12 +114,12 @@ plot. Ignored if the feature's measurements are categorical.}
114 114
 \item{showYtickLabels}{Logical. Default: \code{TRUE}. If set to
115 115
 \code{FALSE}, the y-axis labels are hidden.}
116 116
 
117
-\item{xLabelPositions}{Either "auto" or a vector of values. The positions of
118
-labels on the x-axis.  If "auto", the placement of labels is automatically
117
+\item{xLabelPositions}{Either \code{"auto"} or a vector of values. The positions of
118
+labels on the x-axis.  If \code{"auto"}, the placement of labels is automatically
119 119
 calculated.}
120 120
 
121
-\item{yLabelPositions}{Either "auto" or a vector of values. The positions of
122
-labels on the y-axis.  If "auto", the placement of labels is automatically
121
+\item{yLabelPositions}{Either \code{"auto"} or a vector of values. The positions of
122
+labels on the y-axis.  If \code{"auto"}, the placement of labels is automatically
123 123
 calculated.}
124 124
 
125 125
 \item{fontSizes}{A vector of length 5. The first number is the size of the
... ...
@@ -15,8 +15,8 @@
15 15
   referenceLevel = NULL,
16 16
   characteristicsList = list(),
17 17
   orderingList = list(),
18
-  sizesList = list(lineWidth = 1, pointSize = 2, legendLinesPointsSize = 1, fonts = c(24,
19
-    16, 12, 12, 12, 16)),
18
+  sizesList = list(lineWidth = 1, pointSize = 2, legendLinesPointsSize = 1, fonts =
19
+    c(24, 16, 12, 12, 12, 16)),
20 20
   lineColours = NULL,
21 21
   xLabelPositions = seq(10, 100, 10),
22 22
   yMax = 100,