... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.13 |
|
7 |
-Date: 2022-08-21 |
|
6 |
+Version: 3.1.14 |
|
7 |
+Date: 2022-08-25 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -1,7 +1,7 @@ |
1 | 1 |
#' Plot Receiver Operating Curve Graphs for Classification Results |
2 | 2 |
#' |
3 | 3 |
#' Creates one ROC plot or multiple ROC plots for a list of ClassifyResult |
4 |
-#' objects. One plot is created if the data set has two classes and multiple |
|
4 |
+#' objects. One plot is created if the data set has two classes and multiple |
|
5 | 5 |
#' plots are created if the data set has three or more classes. |
6 | 6 |
#' |
7 | 7 |
#' The scores stored in the results should be higher if the sample is more |
... | ... |
@@ -21,13 +21,13 @@ |
21 | 21 |
#' averaged ROC curve. |
22 | 22 |
#' @param interval Default: 95 (percent). The percent confidence interval to |
23 | 23 |
#' draw around the averaged ROC curve, if mode is \code{"each"}. |
24 |
-#' @param comparison The aspect of the experimental design to compare. Can be |
|
24 |
+#' @param comparison Default: \code{"auto"}. The aspect of the experimental design to compare. Can be |
|
25 | 25 |
#' any characteristic that all results share. If the data set has two classes, |
26 | 26 |
#' then the slot name with factor levels to be used for colouring the lines. |
27 | 27 |
#' Otherwise, it specifies the variable used for plot facetting. |
28 |
-#' @param lineColours A vector of colours for different levels of the |
|
28 |
+#' @param lineColours Default: \code{"auto"}. A vector of colours for different levels of the |
|
29 | 29 |
#' comparison parameter, or if there are three or more classes, the classes. |
30 |
-#' If \code{NULL}, a default colour palette is automatically generated. |
|
30 |
+#' If \code{"auto"}, a default colour palette is automatically generated. |
|
31 | 31 |
#' @param lineWidth A single number controlling the thickness of lines drawn. |
32 | 32 |
#' @param fontSizes A vector of length 5. The first number is the size of the |
33 | 33 |
#' title. The second number is the size of the axes titles and AUC text, if it |
... | ... |
@@ -66,7 +66,7 @@ |
66 | 66 |
#' predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60) |
67 | 67 |
#' predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40) |
68 | 68 |
#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
69 |
-#' value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")), |
|
69 |
+#' value = c("Melanoma", "Bartlett Test", "Differential Variability", "2-fold")), |
|
70 | 70 |
#' LETTERS[1:20], paste("Gene", LETTERS[1:10]), list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
71 | 71 |
#' list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
72 | 72 |
#' list(function(oracle){}), NULL, predicted, actual) |
... | ... |
@@ -80,7 +80,7 @@ setGeneric("ROCplot", function(results, ...) standardGeneric("ROCplot")) |
80 | 80 |
#' @export |
81 | 81 |
setMethod("ROCplot", "list", |
82 | 82 |
function(results, mode = c("merge", "average"), interval = 95, |
83 |
- comparison = "Classifier Name", lineColours = NULL, |
|
83 |
+ comparison = "auto", lineColours = "auto", |
|
84 | 84 |
lineWidth = 1, fontSizes = c(24, 16, 12, 12, 12), labelPositions = seq(0.0, 1.0, 0.2), |
85 | 85 |
plotTitle = "ROC", legendTitle = NULL, xLabel = "False Positive Rate", yLabel = "True Positive Rate", showAUC = TRUE) |
86 | 86 |
{ |
... | ... |
@@ -89,7 +89,15 @@ setMethod("ROCplot", "list", |
89 | 89 |
if(!requireNamespace("scales", quietly = TRUE)) |
90 | 90 |
stop("The package 'scales' could not be found. Please install it.") |
91 | 91 |
mode <- match.arg(mode) |
92 |
- |
|
92 |
+ characteristicsCounts <- table(unlist(lapply(results, function(result) result@characteristics[["characteristic"]]))) |
|
93 |
+ if(comparison == "auto") |
|
94 |
+ { |
|
95 |
+ if(max(characteristicsCounts) == length(results)) |
|
96 |
+ comparison <- names(characteristicsCounts)[characteristicsCounts == max(characteristicsCounts)][1] |
|
97 |
+ else |
|
98 |
+ stop("No characteristic is present for all results but must be.") |
|
99 |
+ } |
|
100 |
+ |
|
93 | 101 |
ggplot2::theme_set(ggplot2::theme_classic() + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA))) |
94 | 102 |
distinctClasses <- levels(actualOutcome(results[[1]])) |
95 | 103 |
numberDistinctClasses <- length(distinctClasses) |
... | ... |
@@ -181,8 +189,8 @@ setMethod("ROCplot", "list", |
181 | 189 |
else |
182 | 190 |
lineColour <- comparison |
183 | 191 |
|
184 |
- if(is.null(lineColours)) |
|
185 |
- lineColours <- scales::hue_pal()(ifelse(lineColour == "class", numberDistinctClasses, length(unique(comparisonValues)))) |
|
192 |
+ if(lineColours == "auto") |
|
193 |
+ lineColours <- scales::hue_pal()(ifelse(lineColour == "class", numberDistinctClasses, max(characteristicsCounts))) |
|
186 | 194 |
if(is.null(legendTitle)) |
187 | 195 |
legendTitle <- ifelse(lineColour == "class", "Class", comparisonName) |
188 | 196 |
|
... | ... |
@@ -207,8 +215,6 @@ setMethod("ROCplot", "list", |
207 | 215 |
comparison <- rlang::sym(comparison) |
208 | 216 |
ROCplots <- lapply(plotDataSets, function(plotData) |
209 | 217 |
{ |
210 |
- |
|
211 |
- |
|
212 | 218 |
ROCplot <- ggplot2::ggplot(plotData, ggplot2::aes(x = FPR, y = TPR, colour = !!lineColour)) + |
213 | 219 |
ggplot2::geom_line(size = lineWidth) + ggplot2::xlab(NULL) + ggplot2::ylab(NULL) + ggplot2::labs(colour = legendTitle) + ggplot2::geom_segment(x = 0, y = 0, xend = 1, yend = 1, size = lineWidth, colour = "black") + ggplot2::scale_x_continuous(breaks = labelPositions, limits = c(0, 1)) + ggplot2::scale_y_continuous(breaks = labelPositions, limits = c(0, 1)) + |
214 | 220 |
ggplot2::theme(axis.text = ggplot2::element_text(colour = "black", size = fontSizes[3]), legend.position = c(1, 0), legend.justification = c(1, 0), legend.background = ggplot2::element_rect(fill = "transparent"), legend.title = ggplot2::element_text(size = fontSizes[4], hjust = 0), legend.text = ggplot2::element_text(size = fontSizes[5])) + ggplot2::guides(colour = ggplot2::guide_legend(title.hjust = 0.5)) + ggplot2::scale_colour_manual(values = lineColours) |
... | ... |
@@ -641,13 +641,15 @@ setClass("TrainParams", representation( |
641 | 641 |
#' @docType class |
642 | 642 |
#' @section Constructor: |
643 | 643 |
#' \describe{ |
644 |
-#' \item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(), |
|
645 |
-#' intermediate = character(0), getFeatures = NULL, ...)} |
|
644 |
+#' \item{}{\preformatted{TrainParams(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(), |
|
645 |
+#' intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...)} |
|
646 | 646 |
#' Creates a \code{TrainParams} object which stores the function which will do the |
647 | 647 |
#' classifier building and parameters that the function will use. |
648 | 648 |
#' \describe{ |
649 | 649 |
#' \item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
650 | 650 |
#' for valid keywords.} |
651 |
+#' \item{\code{balancing}}{Default: \code{"downsample"}. A keyword specifying how to handle class imbalance for data sets with categorical outcome. |
|
652 |
+#' Valid values are \code{"downsample"}, \code{"upsample"} and \code{"none"}.} |
|
651 | 653 |
#' \item{\code{characteristics}}{A \code{\link{DataFrame}} describing the |
652 | 654 |
#' characteristics of the classifier used. First column must be named \code{"charateristic"} |
653 | 655 |
#' and second column must be named \code{"value"}. If using wrapper functions for classifiers |
... | ... |
@@ -656,6 +658,9 @@ setClass("TrainParams", representation( |
656 | 658 |
#' \item{\code{intermediate}}{Character vector. Names of any variables created |
657 | 659 |
#' in prior stages by \code{\link{runTest}} that need to be passed to |
658 | 660 |
#' \code{classifier}.} |
661 |
+#' \item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of |
|
662 |
+#' the list are the names of the parameters and the vectors are the values of the parameters to try. All possible |
|
663 |
+#' combinations are generated.} |
|
659 | 664 |
#' \item{\code{getFeatures}}{A function may be specified that extracts the selected |
660 | 665 |
#' features from the trained model. This is relevant if using a classifier that does |
661 | 666 |
#' feature selection within training (e.g. random forest). The function must return a |
... | ... |
@@ -964,7 +969,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
964 | 969 |
#' #if(require(sparsediscrim)) |
965 | 970 |
#' #{ |
966 | 971 |
#' data(asthma) |
967 |
-#' classified <- crossValidate(measurements, classes) |
|
972 |
+#' classified <- crossValidate(measurements, classes, nRepeats = 5) |
|
968 | 973 |
#' class(classified) |
969 | 974 |
#' #} |
970 | 975 |
#' |
... | ... |
@@ -76,6 +76,7 @@ |
76 | 76 |
"GLM", "Logistic regression.", |
77 | 77 |
"elasticNetGLM", "Elastic net GLM multinomial regression.", |
78 | 78 |
"SVM", "Support Vector Machine.", |
79 |
+ "NSC", "Nearest Shrunken Centroids.", |
|
79 | 80 |
"naiveBayes", "Naive Bayes kernel feature voting classifier.", |
80 | 81 |
"mixturesNormals", "Mixture of normals feature voting classifier.", |
81 | 82 |
"CoxPH", "Cox proportional hazards.", |
... | ... |
@@ -36,6 +36,6 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy |
36 | 36 |
forestFeatures <- function(forest) |
37 | 37 |
{ |
38 | 38 |
rankedFeaturesIndices <- order(randomForest::importance(forest), decreasing = TRUE) |
39 |
- selectedFeaturesIndices <- which(randomForest::varUsed(forest) > 0) |
|
39 |
+ selectedFeaturesIndices <- randomForest::varUsed(forest, count = FALSE) |
|
40 | 40 |
list(rankedFeaturesIndices, selectedFeaturesIndices) |
41 | 41 |
} |
42 | 42 |
\ No newline at end of file |
... | ... |
@@ -15,7 +15,7 @@ |
15 | 15 |
#' \code{characteristicsList['x']} to aggregate to a single number by taking |
16 | 16 |
#' the mean. This is particularly meaningful when the cross-validation is |
17 | 17 |
#' leave-k-out, when k is small. |
18 |
-#' @param performanceName Default: "auto". The name of the |
|
18 |
+#' @param performanceName Default: \code{"auto"}. The name of the |
|
19 | 19 |
#' performance measure or "auto". If the results are classification then |
20 | 20 |
#' balanced accuracy will be displayed. Otherwise, the results would be survival risk |
21 | 21 |
#' predictions and then C-index will be displayed. This is one of the names printed |
... | ... |
@@ -70,11 +70,11 @@ |
70 | 70 |
#' \code{FALSE}, the x-axis labels are hidden. |
71 | 71 |
#' @param showYtickLabels Logical. Default: \code{TRUE}. If set to |
72 | 72 |
#' \code{FALSE}, the y-axis labels are hidden. |
73 |
-#' @param xLabelPositions Either "auto" or a vector of values. The positions of |
|
74 |
-#' labels on the x-axis. If "auto", the placement of labels is automatically |
|
73 |
+#' @param xLabelPositions Either \code{"auto"} or a vector of values. The positions of |
|
74 |
+#' labels on the x-axis. If \code{"auto"}, the placement of labels is automatically |
|
75 | 75 |
#' calculated. |
76 |
-#' @param yLabelPositions Either "auto" or a vector of values. The positions of |
|
77 |
-#' labels on the y-axis. If "auto", the placement of labels is automatically |
|
76 |
+#' @param yLabelPositions Either \code{"auto"} or a vector of values. The positions of |
|
77 |
+#' labels on the y-axis. If \code{"auto"}, the placement of labels is automatically |
|
78 | 78 |
#' calculated. |
79 | 79 |
#' @param fontSizes A vector of length 5. The first number is the size of the |
80 | 80 |
#' title. The second number is the size of the axes titles. The third number |
... | ... |
@@ -167,7 +167,7 @@ input data. Autmomatically reducing to smaller number.") |
167 | 167 |
modellingParams@selectParams <- .addIntermediates(modellingParams@selectParams) |
168 | 168 |
|
169 | 169 |
topFeatures <- tryCatch(.doSelection(measurementsTrain, outcomeTrain, crossValParams, modellingParams, verbose), |
170 |
- error = function(error) error[["message"]]) |
|
170 |
+ error = function(error) error[["message"]]) |
|
171 | 171 |
if(is.character(topFeatures)) return(topFeatures) # An error occurred. |
172 | 172 |
|
173 | 173 |
rankedFeaturesIndices <- topFeatures[[1]] # Extract for result object. |
... | ... |
@@ -186,10 +186,11 @@ input data. Autmomatically reducing to smaller number.") |
186 | 186 |
modellingParams@trainParams <- .addIntermediates(modellingParams@trainParams) |
187 | 187 |
if(!is.null(tuneDetailsSelect)) |
188 | 188 |
{ |
189 |
- avoidTune <- match(colnames(tuneDetailsSelect), names(modellingParams@trainParams@tuneParams)) |
|
189 |
+ tuneDetailsSelectUse <- tuneDetailsSelect[["tuneCombinations"]][tuneDetailsSelect[["bestIndex"]], , drop = FALSE] |
|
190 |
+ avoidTune <- match(colnames(tuneDetailsSelectUse), names(modellingParams@trainParams@tuneParams)) |
|
190 | 191 |
if(any(!is.na(avoidTune))) |
191 | 192 |
{ |
192 |
- modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, tuneDetailsSelect[!is.na(avoidTune)]) |
|
193 |
+ modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, tuneDetailsSelectUse[!is.na(avoidTune)]) |
|
193 | 194 |
modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-na.omit(avoidTune)] |
194 | 195 |
if(length(modellingParams@trainParams@tuneParams) == 0) modellingParams@trainParams@tuneParams <- NULL |
195 | 196 |
} |
... | ... |
@@ -209,7 +210,7 @@ input data. Autmomatically reducing to smaller number.") |
209 | 210 |
if(length(extras) > 0) |
210 | 211 |
extrasList <- mget(setdiff(names(extras), "...")) |
211 | 212 |
|
212 |
- rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList)) |
|
213 |
+ rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(unname(trained[1]), extrasList)) |
|
213 | 214 |
rankedFeaturesIndices <- rankedChosenList[[1]] |
214 | 215 |
selectedFeaturesIndices <- rankedChosenList[[2]] |
215 | 216 |
} |
... | ... |
@@ -304,8 +305,8 @@ input data. Autmomatically reducing to smaller number.") |
304 | 305 |
} else { # runTest executed by the end user. Create a ClassifyResult object. |
305 | 306 |
# Only one training, so only one tuning choice, which can be summarised in characteristics. |
306 | 307 |
modParamsList <- list(modellingParams@transformParams, modellingParams@selectParams, modellingParams@trainParams, modellingParams@predictParams) |
307 |
- if(!is.null(tuneDetails)) characteristics <- rbind(characteristics, data.frame(characteristic = colnames(tuneDetails), |
|
308 |
- value = unlist(tuneDetails))) |
|
308 |
+ if(!is.null(tuneDetails)) characteristics <- rbind(characteristics, data.frame(characteristic = colnames(tuneDetails[["tuneCombinations"]]), |
|
309 |
+ value = unlist(tuneDetails[["tuneCombinations"]][tuneDetails[["bestIndex"]], ]))) |
|
309 | 310 |
autoCharacteristics <- do.call(rbind, lapply(modParamsList, function(stageParams) if(!is.null(stageParams) && !is(stageParams, "PredictParams")) stageParams@characteristics)) |
310 | 311 |
characteristics <- .filterCharacteristics(characteristics, autoCharacteristics) |
311 | 312 |
characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Cross-validation", value = "Independent Set")) |
... | ... |
@@ -1,6 +1,7 @@ |
1 | 1 |
# Random Forest |
2 | 2 |
RFparams <- function() { |
3 |
- trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100))) |
|
3 |
+ trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)), |
|
4 |
+ getFeatures = forestFeatures) |
|
4 | 5 |
predictParams <- PredictParams(randomForestPredictInterface) |
5 | 6 |
|
6 | 7 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
... | ... |
@@ -31,7 +32,7 @@ GLMparams <- function() { |
31 | 32 |
|
32 | 33 |
# Elastic net GLM |
33 | 34 |
elasticNetGLMparams <- function() { |
34 |
- trainParams <- TrainParams(elasticNetGLMtrainInterface) |
|
35 |
+ trainParams <- TrainParams(elasticNetGLMtrainInterface, getFeatures = elasticNetFeatures) |
|
35 | 36 |
predictParams <- PredictParams(elasticNetGLMpredictInterface) |
36 | 37 |
|
37 | 38 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
... | ... |
@@ -45,6 +46,14 @@ SVMparams = function() { |
45 | 46 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
46 | 47 |
} |
47 | 48 |
|
49 |
+# Nearest Shrunken Centroid |
|
50 |
+NSCparams = function() { |
|
51 |
+ trainParams <- TrainParams(NSCtrainInterface, getFeatures = NSCfeatures) |
|
52 |
+ predictParams <- PredictParams(NSCpredictInterface) |
|
53 |
+ |
|
54 |
+ return(list(trainParams = trainParams, predictParams = predictParams)) |
|
55 |
+} |
|
56 |
+ |
|
48 | 57 |
# Diagonal Linear Discriminant Analysis |
49 | 58 |
DLDAparams = function() { |
50 | 59 |
trainParams <- TrainParams(DLDAtrainInterface) |
... | ... |
@@ -193,14 +193,14 @@ |
193 | 193 |
if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking. |
194 | 194 |
return(list(NULL, rankings[[1]], NULL)) |
195 | 195 |
|
196 |
- if(tuneMode == "none") # Actually selection not ranking. |
|
196 |
+ if(tuneMode == "none") # No parameters to choose between. |
|
197 | 197 |
return(list(NULL, rankings[[1]], NULL)) |
198 | 198 |
|
199 | 199 |
tuneParamsTrain <- list(topN = topNfeatures) |
200 | 200 |
tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams) |
201 | 201 |
tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE) |
202 | 202 |
modellingParams@trainParams@tuneParams <- NULL |
203 |
- bestPerformers <- sapply(rankings, function(rankingsVariety) |
|
203 |
+ allPerformanceTables <- lapply(rankings, function(rankingsVariety) |
|
204 | 204 |
{ |
205 | 205 |
# Creates a matrix. Columns are top n features, rows are varieties (one row if None). |
206 | 206 |
performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex) |
... | ... |
@@ -240,20 +240,22 @@ |
240 | 240 |
}) |
241 | 241 |
|
242 | 242 |
bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1]) |
243 |
- c(bestOne, performances[bestOne]) |
|
243 |
+ list(data.frame(tuneCombosTrain, performance = performances), bestOne) |
|
244 | 244 |
}) |
245 | 245 |
|
246 |
- tunePick <- ifelse(betterValues == "lower", which.min(bestPerformers[2, ])[1], which.max(bestPerformers[2, ])[1]) |
|
246 |
+ tablesBestMetrics <- sapply(allPerformanceTables, function(tableIndexPair) tableIndexPair[[1]][tableIndexPair[[2]], "performance"]) |
|
247 |
+ tunePick <- ifelse(betterValues == "lower", which.min(tablesBestMetrics)[1], which.max(tablesBestMetrics)[1]) |
|
247 | 248 |
|
248 | 249 |
if(verbose == 3) |
249 | 250 |
message("Features selected.") |
250 | 251 |
|
251 |
- tuneRow <- tuneCombosTrain[bestPerformers[1, tunePick], , drop = FALSE] |
|
252 |
- if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL |
|
252 |
+ tuneDetails <- allPerformanceTables[[tunePick]] # List of length 2. |
|
253 | 253 |
|
254 | 254 |
rankingUse <- rankings[[tunePick]] |
255 |
- selectionIndices <- rankingUse[1:tuneRow[, "topN"]] |
|
255 |
+ selectionIndices <- rankingUse[1:(tuneDetails[[1]][tuneDetails[[2]], "topN"])] |
|
256 | 256 |
|
257 |
+ names(tuneDetails) <- c("tuneCombinations", "bestIndex") |
|
258 |
+ colnames(tuneDetails[[1]])[ncol(tuneDetails[[1]])] <- performanceType |
|
257 | 259 |
list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails) |
258 | 260 |
} else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection. |
259 | 261 |
featuresIndiciesLists <- mapply(function(selector, selParams) |
... | ... |
@@ -296,8 +298,7 @@ |
296 | 298 |
|
297 | 299 |
list(NULL, selectionIndices, NULL) |
298 | 300 |
} else { # Previous selection |
299 |
- selectedFeatures <- |
|
300 |
- list(NULL, selectionIndices, NULL) |
|
301 |
+ selectedFeatures <- list(NULL, selectionIndices, NULL) |
|
301 | 302 |
} |
302 | 303 |
} |
303 | 304 |
|
... | ... |
@@ -315,7 +316,7 @@ |
315 | 316 |
# within the same function, so test samples are also passed in case they are needed. |
316 | 317 |
.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose) |
317 | 318 |
{ |
318 |
- tuneChosen <- NULL |
|
319 |
+ tuneDetails <- NULL |
|
319 | 320 |
if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams)) |
320 | 321 |
{ |
321 | 322 |
performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]] |
... | ... |
@@ -346,9 +347,14 @@ |
346 | 347 |
median(performances(result)[[performanceType]]) |
347 | 348 |
} |
348 | 349 |
}) |
350 |
+ allPerformanceTable <- data.frame(tuneCombos, performances) |
|
351 |
+ colnames(allPerformanceTable)[ncol(allPerformanceTable)] <- performanceType |
|
352 |
+ |
|
349 | 353 |
betterValues <- .ClassifyRenvir[["performanceInfoTable"]][.ClassifyRenvir[["performanceInfoTable"]][, "type"] == performanceType, "better"] |
350 | 354 |
bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1]) |
351 | 355 |
tuneChosen <- tuneCombos[bestOne, , drop = FALSE] |
356 |
+ tuneDetails <- list(tuneCombos, bestOne) |
|
357 |
+ names(tuneDetails) <- c("tuneCombinations", "bestIndex") |
|
352 | 358 |
modellingParams@trainParams@otherParams <- tuneChosen |
353 | 359 |
} |
354 | 360 |
|
... | ... |
@@ -367,7 +373,7 @@ |
367 | 373 |
if(verbose >= 2) |
368 | 374 |
message("Training completed.") |
369 | 375 |
|
370 |
- list(model = trained, tune = tuneChosen) |
|
376 |
+ list(model = trained, tune = tuneDetails) |
|
371 | 377 |
} |
372 | 378 |
|
373 | 379 |
# Creates a function call to a prediction function. |
... | ... |
@@ -566,6 +572,7 @@ |
566 | 572 |
"GLM" = GLMparams(), |
567 | 573 |
"elasticNetGLM" = elasticNetGLMparams(), |
568 | 574 |
"SVM" = SVMparams(), |
575 |
+ "NSC" = NSCparams(), |
|
569 | 576 |
"DLDA" = DLDAparams(), |
570 | 577 |
"naiveBayes" = naiveBayesParams(), |
571 | 578 |
"mixturesNormals" = mixModelsParams(), |
... | ... |
@@ -104,7 +104,7 @@ of predictions made during the cross-validation procedure.}} |
104 | 104 |
#if(require(sparsediscrim)) |
105 | 105 |
#{ |
106 | 106 |
data(asthma) |
107 |
- classified <- crossValidate(measurements, classes) |
|
107 |
+ classified <- crossValidate(measurements, classes, nRepeats = 5) |
|
108 | 108 |
class(classified) |
109 | 109 |
#} |
110 | 110 |
|
... | ... |
@@ -9,8 +9,8 @@ |
9 | 9 |
results, |
10 | 10 |
mode = c("merge", "average"), |
11 | 11 |
interval = 95, |
12 |
- comparison = "Classifier Name", |
|
13 |
- lineColours = NULL, |
|
12 |
+ comparison = "auto", |
|
13 |
+ lineColours = "auto", |
|
14 | 14 |
lineWidth = 1, |
15 | 15 |
fontSizes = c(24, 16, 12, 12, 12), |
16 | 16 |
labelPositions = seq(0, 1, 0.2), |
... | ... |
@@ -33,14 +33,14 @@ averaged ROC curve.} |
33 | 33 |
\item{interval}{Default: 95 (percent). The percent confidence interval to |
34 | 34 |
draw around the averaged ROC curve, if mode is \code{"each"}.} |
35 | 35 |
|
36 |
-\item{comparison}{The aspect of the experimental design to compare. Can be |
|
36 |
+\item{comparison}{Default: \code{"auto"}. The aspect of the experimental design to compare. Can be |
|
37 | 37 |
any characteristic that all results share. If the data set has two classes, |
38 | 38 |
then the slot name with factor levels to be used for colouring the lines. |
39 | 39 |
Otherwise, it specifies the variable used for plot facetting.} |
40 | 40 |
|
41 |
-\item{lineColours}{A vector of colours for different levels of the |
|
41 |
+\item{lineColours}{Default: \code{"auto"}. A vector of colours for different levels of the |
|
42 | 42 |
comparison parameter, or if there are three or more classes, the classes. |
43 |
-If \code{NULL}, a default colour palette is automatically generated.} |
|
43 |
+If \code{"auto"}, a default colour palette is automatically generated.} |
|
44 | 44 |
|
45 | 45 |
\item{lineWidth}{A single number controlling the thickness of lines drawn.} |
46 | 46 |
|
... | ... |
@@ -71,7 +71,7 @@ device, if \code{plot} is \code{TRUE}. |
71 | 71 |
} |
72 | 72 |
\description{ |
73 | 73 |
Creates one ROC plot or multiple ROC plots for a list of ClassifyResult |
74 |
-objects. One plot is created if the data set has two classes and multiple |
|
74 |
+objects. One plot is created if the data set has two classes and multiple |
|
75 | 75 |
plots are created if the data set has three or more classes. |
76 | 76 |
} |
77 | 77 |
\details{ |
... | ... |
@@ -103,7 +103,7 @@ considered simultaneously, to calculate one curve per classification. |
103 | 103 |
predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60) |
104 | 104 |
predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40) |
105 | 105 |
result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
106 |
- value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")), |
|
106 |
+ value = c("Melanoma", "Bartlett Test", "Differential Variability", "2-fold")), |
|
107 | 107 |
LETTERS[1:20], paste("Gene", LETTERS[1:10]), list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
108 | 108 |
list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
109 | 109 |
list(function(oracle){}), NULL, predicted, actual) |
... | ... |
@@ -15,13 +15,15 @@ The empty constructor is provided for convenience. |
15 | 15 |
\section{Constructor}{ |
16 | 16 |
|
17 | 17 |
\describe{ |
18 |
-\item{}{\preformatted{TrainParams(classifier, characteristics = DataFrame(), |
|
19 |
-intermediate = character(0), getFeatures = NULL, ...)} |
|
18 |
+\item{}{\preformatted{TrainParams(classifier, balancing = c("downsample", "upsample", "none"), characteristics = DataFrame(), |
|
19 |
+intermediate = character(0), tuneParams = NULL, getFeatures = NULL, ...)} |
|
20 | 20 |
Creates a \code{TrainParams} object which stores the function which will do the |
21 | 21 |
classifier building and parameters that the function will use. |
22 | 22 |
\describe{ |
23 | 23 |
\item{\code{classifier}}{A character keyword referring to a registered classifier. See \code{\link{available}} |
24 | 24 |
for valid keywords.} |
25 |
+\item{\code{balancing}}{Default: \code{"downsample"}. A keyword specifying how to handle class imbalance for data sets with categorical outcome. |
|
26 |
+Valid values are \code{"downsample"}, \code{"upsample"} and \code{"none"}.} |
|
25 | 27 |
\item{\code{characteristics}}{A \code{\link{DataFrame}} describing the |
26 | 28 |
characteristics of the classifier used. First column must be named \code{"charateristic"} |
27 | 29 |
and second column must be named \code{"value"}. If using wrapper functions for classifiers |
... | ... |
@@ -30,6 +32,9 @@ therefore it is not necessary to specify it.} |
30 | 32 |
\item{\code{intermediate}}{Character vector. Names of any variables created |
31 | 33 |
in prior stages by \code{\link{runTest}} that need to be passed to |
32 | 34 |
\code{classifier}.} |
35 |
+\item{\code{tuneParams}}{A list specifying tuning parameters required during feature selection. The names of |
|
36 |
+the list are the names of the parameters and the vectors are the values of the parameters to try. All possible |
|
37 |
+combinations are generated.} |
|
33 | 38 |
\item{\code{getFeatures}}{A function may be specified that extracts the selected |
34 | 39 |
features from the trained model. This is relevant if using a classifier that does |
35 | 40 |
feature selection within training (e.g. random forest). The function must return a |
... | ... |
@@ -29,7 +29,7 @@ |
29 | 29 |
|
30 | 30 |
\item{...}{Not used by end user.} |
31 | 31 |
|
32 |
-\item{performanceName}{Default: "auto". The name of the |
|
32 |
+\item{performanceName}{Default: \code{"auto"}. The name of the |
|
33 | 33 |
performance measure or "auto". If the results are classification then |
34 | 34 |
balanced accuracy will be displayed. Otherwise, the results would be survival risk |
35 | 35 |
predictions and then C-index will be displayed. This is one of the names printed |
... | ... |
@@ -114,12 +114,12 @@ plot. Ignored if the feature's measurements are categorical.} |
114 | 114 |
\item{showYtickLabels}{Logical. Default: \code{TRUE}. If set to |
115 | 115 |
\code{FALSE}, the y-axis labels are hidden.} |
116 | 116 |
|
117 |
-\item{xLabelPositions}{Either "auto" or a vector of values. The positions of |
|
118 |
-labels on the x-axis. If "auto", the placement of labels is automatically |
|
117 |
+\item{xLabelPositions}{Either \code{"auto"} or a vector of values. The positions of |
|
118 |
+labels on the x-axis. If \code{"auto"}, the placement of labels is automatically |
|
119 | 119 |
calculated.} |
120 | 120 |
|
121 |
-\item{yLabelPositions}{Either "auto" or a vector of values. The positions of |
|
122 |
-labels on the y-axis. If "auto", the placement of labels is automatically |
|
121 |
+\item{yLabelPositions}{Either \code{"auto"} or a vector of values. The positions of |
|
122 |
+labels on the y-axis. If \code{"auto"}, the placement of labels is automatically |
|
123 | 123 |
calculated.} |
124 | 124 |
|
125 | 125 |
\item{fontSizes}{A vector of length 5. The first number is the size of the |
... | ... |
@@ -15,8 +15,8 @@ |
15 | 15 |
referenceLevel = NULL, |
16 | 16 |
characteristicsList = list(), |
17 | 17 |
orderingList = list(), |
18 |
- sizesList = list(lineWidth = 1, pointSize = 2, legendLinesPointsSize = 1, fonts = c(24, |
|
19 |
- 16, 12, 12, 12, 16)), |
|
18 |
+ sizesList = list(lineWidth = 1, pointSize = 2, legendLinesPointsSize = 1, fonts = |
|
19 |
+ c(24, 16, 12, 12, 12, 16)), |
|
20 | 20 |
lineColours = NULL, |
21 | 21 |
xLabelPositions = seq(10, 100, 10), |
22 | 22 |
yMax = 100, |