Browse code

- Corrected ranger random forest to do two model fits in each iteration; one for prediction and one for variable importance. Previously, a mode of model fitting which uses permuted features was being executed.

Dario Strbenac authored on 17/11/2022 05:55:04
Showing 5 changed files

... ...
@@ -3,9 +3,9 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.3.2
7
-Date: 2022-11-09
8
-Authors@R: 
6
+Version: 3.3.3
7
+Date: 2022-11-17
8
+Authors@R:
9 9
     c(
10 10
     person(given = "Dario", family = "Strbenac", email = "dario.strbenac@sydney.edu.au", role = c("aut", "cre")),
11 11
     person(given = "Ellis", family = "Patrick", role = "aut"),
... ...
@@ -6,9 +6,11 @@ randomForestTrainInterface <- function(measurementsTrain, outcomeTrain, mTryProp
6 6
   if(verbose == 3)
7 7
     message("Fitting random forest classifier to training data.")
8 8
   mtry <- round(mTryProportion * ncol(measurementsTrain)) # Number of features to try.
9
-      
10 9
   # Convert to base data.frame as randomForest doesn't understand DataFrame.
11
-  ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, importance = "impurity_corrected", ...)
10
+  fittedModel <- ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, ...)
11
+  forImportance <- ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, importance = "impurity_corrected", ...)
12
+  attr(fittedModel, "forImportance") <- forImportance
13
+  fittedModel
12 14
 }
13 15
 attr(randomForestTrainInterface, "name") <- "randomForestTrainInterface"
14 16
     
... ...
@@ -37,7 +39,8 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy
37 39
 
38 40
 forestFeatures <- function(forest)
39 41
                   {
40
-                    rankedFeaturesIndices <- order(ranger::importance(forest), decreasing = TRUE)
41
-                    selectedFeaturesIndices <- which(ranger::importance(forest) > 0)
42
+                    forImportance <- attr(forest, "forImportance")
43
+                    rankedFeaturesIndices <- order(ranger::importance(forImportance), decreasing = TRUE)
44
+                    selectedFeaturesIndices <- which(ranger::importance(forImportance) > 0)
42 45
                     list(rankedFeaturesIndices, selectedFeaturesIndices)
43 46
                   }
44 47
\ No newline at end of file
... ...
@@ -44,6 +44,8 @@
44 44
 #' @param legendSize The size of the boxes in the legends.
45 45
 #' @param plot Logical. IF \code{TRUE}, a plot is produced on the current
46 46
 #' graphics device.
47
+#' @param ... Parameters not used by the \code{ClassifyResult} method that does
48
+#' list-packaging but used by the main \code{list} method.
47 49
 #' @return A plot is produced and a grob is returned that can be saved to a
48 50
 #' graphics device.
49 51
 #' @author Dario Strbenac
... ...
@@ -7,6 +7,7 @@
7 7
 \alias{calcExternalPerformance,factor,factor-method}
8 8
 \alias{calcExternalPerformance,Surv,numeric-method}
9 9
 \alias{calcCVperformance,ClassifyResult-method}
10
+\alias{calcExternalPerformance,factor,tabular-method}
10 11
 \title{Add Performance Calculations to a ClassifyResult Object or Calculate for a
11 12
 Pair of Factor Vectors}
12 13
 \usage{
... ...
@@ -24,6 +25,12 @@ Pair of Factor Vectors}
24 25
   performanceType = "C-index"
25 26
 )
26 27
 
28
+\S4method{calcExternalPerformance}{factor,tabular}(
29
+  actualOutcome,
30
+  predictedOutcome,
31
+  performanceType = "AUC"
32
+)
33
+
27 34
 \S4method{calcCVperformance}{ClassifyResult}(
28 35
   result,
29 36
   performanceType = c("Balanced Accuracy", "Balanced Error", "Error", "Accuracy",
... ...
@@ -58,6 +58,9 @@
58 58
 \item{results}{A list of \code{\link{ClassifyResult}} objects. Could also be
59 59
 a matrix of pre-calculated metrics, for backwards compatibility.}
60 60
 
61
+\item{...}{Parameters not used by the \code{ClassifyResult} method that does
62
+list-packaging but used by the main \code{list} method.}
63
+
61 64
 \item{comparison}{Default: "auto". The aspect of the experimental
62 65
 design to compare. Can be any characteristic that all results share.}
63 66