Browse code

- For each predict wrapper, the row names of the scores table or the names of the class vector now have sample IDs to make the output compatible with prevalidation. - XGBoost prediction function ensures that order of variables of test data table is the same as the variables used in model fitting.

Dario Strbenac authored on 19/10/2022 11:30:03
Showing 8 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.25
7
-Date: 2022-10-18
6
+Version: 3.1.26
7
+Date: 2022-10-19
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -193,7 +193,7 @@ setMethod("crossValidate", "DataFrame",
193 193
                   # The below loops over different combinations of assays and merges them together.
194 194
                   # This allows someone to answer which combinations of the assays might be most useful.
195 195
 
196
-                  if(!is.list(assayCombinations) && assayCombinations == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
196
+                  if(!is.list(assayCombinations) && assayCombinations[1] == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
197 197
 
198 198
                   result <- sapply(assayCombinations, function(assayIndex){
199 199
                       CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
... ...
@@ -221,14 +221,14 @@ setMethod("crossValidate", "DataFrame",
221 221
                   # This allows someone to answer which combinations of the assays might be most useful.
222 222
 
223 223
 
224
-                  if(!is.list(assayCombinations) && assayCombinations == "all")
224
+                  if(!is.list(assayCombinations) && assayCombinations[1] == "all")
225 225
                   {
226 226
                       assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
227 227
                       assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
228 228
                       if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
229 229
                   }
230 230
 
231
-                  result <- sapply(assayCombinations[2], function(assayIndex){
231
+                  result <- sapply(assayCombinations, function(assayIndex){
232 232
                       CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
233 233
                          outcome = outcome, assayIDs = assayIndex,
234 234
                          nFeatures = nFeatures[assayIndex],
... ...
@@ -255,7 +255,7 @@ setMethod("crossValidate", "DataFrame",
255 255
                   # This allows someone to answer which combinations of the assays might be most useful.
256 256
 
257 257
 
258
-                  if(!is.list(assayCombinations) && assayCombinations == "all"){
258
+                  if(!is.list(assayCombinations) && assayCombinations[1] == "all"){
259 259
                       assayCombinations <- do.call("c", sapply(seq_along(assayIDs),function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
260 260
                       assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
261 261
                       if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
... ...
@@ -14,6 +14,7 @@ classifyInterface <- function(countsTrain, classesTrain, countsTest, ...,
14 14
   classPredictions <- predicted[["ytehat"]]
15 15
   classScores <- predicted[["discriminant"]]
16 16
   colnames(classScores) <- levels(classesTrain)
17
+  rownames(classScores) <- names(classPredictions) <- rownames(countsTest)
17 18
   switch(returnType, class = classPredictions, # Factor vector.
18 19
          score = classScores, # Numeric matrix.
19 20
          both = data.frame(class = classPredictions, classScores, check.names = FALSE))
... ...
@@ -39,6 +39,6 @@ coxnetPredictInterface <- function(model, measurementsTest, survivalTest = NULL,
39 39
   model$offset <- TRUE
40 40
   
41 41
   survScores <- predict(model, testMatrix, s = lambda, type = "response", newoffset = offset)
42
-  
42
+  rownames(survScores) <- rownames(measurementsTest)
43 43
   survScores[, 1]
44 44
 }
45 45
\ No newline at end of file
... ...
@@ -31,15 +31,6 @@ extractPrevalidation = function(assayPreval){ #}, startingCol) {
31 31
     vec
32 32
 }
33 33
 
34
-# Use to pull out the names of features in a ClassifyR model - XYZ: Could Ditch if we really wanted to
35
-featurePuller = function(classifyObject) {
36
-    if ("selectResult" %in% slotNames(classifyObject)) {
37
-        features = classifyObject@selectResult@chosenFeatures
38
-    } else{
39
-        features = classifyObject@originalFeatures
40
-    }
41
-}
42
-
43 34
 setClass("prevalModel", slots = list(fullModel = "list"))
44 35
 
45 36
 prevalTrainInterface <- function(measurements, classes, params, ...)
... ...
@@ -23,6 +23,7 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy
23 23
   classPredictions <- predict(forest, measurementsTest)$predictions
24 24
   classScores <- predict(forest, measurementsTest, predict.all = TRUE)[[1]]
25 25
   classScores <- t(apply(classScores, 1, function(sampleRow) table(factor(classes[sampleRow], levels = classes)) / forest$forest$num.trees))
26
+  rownames(classScores) <- names(classPredictions) <- rownames(measurementsTest)
26 27
   switch(returnType, class = classPredictions,
27 28
          score = classScores,
28 29
          both = data.frame(class = classPredictions, classScores, check.names = FALSE))
... ...
@@ -38,7 +38,7 @@ SVMpredictInterface <- function(model, measurementsTest, returnType = c("both",
38 38
   # e1071 uses attributes to pass back probabilities. Make them a standalone variable.
39 39
   classScores <- attr(classPredictions, "probabilities")[, model[["levels"]], drop = FALSE]
40 40
   attr(classPredictions, "probabilities") <- NULL
41
-  
41
+  rownames(classScores) <- names(classPredictions) <- rownames(measurementsTest)
42 42
   switch(returnType, class = classPredictions, score = classScores,
43 43
          both = data.frame(class = classPredictions, classScores, check.names = FALSE))
44 44
 }
45 45
\ No newline at end of file
... ...
@@ -46,14 +46,17 @@ extremeGradientBoostingPredictInterface <- function(booster, measurementsTest, .
46 46
   if(verbose == 3)
47 47
     message("Predicting using boosted random forest.")  
48 48
   measurementsTest <- as(measurementsTest, "data.frame")
49
+  measurementsTest <- measurementsTest[, attr(booster, "featureNames")]
49 50
   # Convert to one-hot encoding as xgboost doesn't understand factors. Need to get rid of intercept afterwards.
50 51
   measurementsTest <- MatrixModels::model.Matrix(~ 0 + ., data = measurementsTest, sparse = TRUE)
52
+  
51 53
   scores <- predict(booster, measurementsTest, reshape = TRUE)
52 54
   colnames(scores) <- attr(booster, "classes")
53 55
   if(!is.null(attr(booster, "classes"))) # It is a classification task.
54 56
   {
55 57
     classPredictions <- attr(booster, "classes")[apply(scores, 1, function(sampleRow) which.max(sampleRow)[1])]
56 58
     classPredictions <- factor(classPredictions, levels = attr(booster, "classes"))
59
+    rownames(scores) <- names(classPredictions) <- rownames(measurementsTest)
57 60
     result <- switch(returnType, class = classPredictions,
58 61
                      score = scores,
59 62
                      both = data.frame(class = classPredictions, scores, check.names = FALSE))