More Fixes for Prevalidation
... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.25 |
|
7 |
-Date: 2022-10-18 |
|
6 |
+Version: 3.1.26 |
|
7 |
+Date: 2022-10-19 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -124,16 +124,8 @@ setMethod("crossValidate", "DataFrame", |
124 | 124 |
measurements = measurements) |
125 | 125 |
selectionMethod <- cleanSelectionMethod(selectionMethod = selectionMethod, |
126 | 126 |
measurements = measurements) |
127 |
- if(any(nFeatures == 1) && classifier == "elasticNetGLM") |
|
128 |
- { |
|
129 |
- options(warn = 1) |
|
130 |
- warning("Elastic Net GLM requires two or more features as input but there is only one. |
|
131 |
-Using an ordinary GLM instead.") |
|
132 |
- classifier <- "GLM" |
|
133 |
- } |
|
134 |
- |
|
135 | 127 |
classifier <- cleanClassifier(classifier = classifier, |
136 |
- measurements = measurements) |
|
128 |
+ measurements = measurements, nFeatures = nFeatures) |
|
137 | 129 |
|
138 | 130 |
##!!!!! Do something with data combinations |
139 | 131 |
|
... | ... |
@@ -201,7 +193,7 @@ Using an ordinary GLM instead.") |
201 | 193 |
# The below loops over different combinations of assays and merges them together. |
202 | 194 |
# This allows someone to answer which combinations of the assays might be most useful. |
203 | 195 |
|
204 |
- if(!is.list(assayCombinations) && assayCombinations == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
|
196 |
+ if(!is.list(assayCombinations) && assayCombinations[1] == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
|
205 | 197 |
|
206 | 198 |
result <- sapply(assayCombinations, function(assayIndex){ |
207 | 199 |
CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex], |
... | ... |
@@ -229,7 +221,7 @@ Using an ordinary GLM instead.") |
229 | 221 |
# This allows someone to answer which combinations of the assays might be most useful. |
230 | 222 |
|
231 | 223 |
|
232 |
- if(!is.list(assayCombinations) && assayCombinations == "all") |
|
224 |
+ if(!is.list(assayCombinations) && assayCombinations[1] == "all") |
|
233 | 225 |
{ |
234 | 226 |
assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
235 | 227 |
assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)] |
... | ... |
@@ -263,7 +255,7 @@ Using an ordinary GLM instead.") |
263 | 255 |
# This allows someone to answer which combinations of the assays might be most useful. |
264 | 256 |
|
265 | 257 |
|
266 |
- if(!is.list(assayCombinations) && assayCombinations == "all"){ |
|
258 |
+ if(!is.list(assayCombinations) && assayCombinations[1] == "all"){ |
|
267 | 259 |
assayCombinations <- do.call("c", sapply(seq_along(assayIDs),function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
268 | 260 |
assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)] |
269 | 261 |
if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data") |
... | ... |
@@ -471,9 +463,9 @@ cleanNFeatures <- function(nFeatures, measurements){ |
471 | 463 |
obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"]))) |
472 | 464 |
else obsFeatures <- ncol(measurements) |
473 | 465 |
if(is.null(nFeatures) || length(nFeatures) == 1 && nFeatures == "all") nFeatures <- as.list(obsFeatures) |
474 |
- if(is.null(names(nFeatures)) & length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures)) |
|
475 |
- if(is.null(names(nFeatures)) & length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE) |
|
476 |
- #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.") |
|
466 |
+ if(is.null(names(nFeatures)) && length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures)) |
|
467 |
+ if(is.null(names(nFeatures)) && length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE) |
|
468 |
+ #if(is.null(names(nFeatures)) && length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.") |
|
477 | 469 |
if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.") |
478 | 470 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)])) |
479 | 471 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE) |
... | ... |
@@ -498,7 +490,7 @@ cleanSelectionMethod <- function(selectionMethod, measurements){ |
498 | 490 |
|
499 | 491 |
###################################### |
500 | 492 |
###################################### |
501 |
-cleanClassifier <- function(classifier, measurements){ |
|
493 |
+cleanClassifier <- function(classifier, measurements, nFeatures){ |
|
502 | 494 |
#### Clean up |
503 | 495 |
if(!is.null(mcols(measurements)$assay)) |
504 | 496 |
obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"]))) |
... | ... |
@@ -509,10 +501,20 @@ cleanClassifier <- function(classifier, measurements){ |
509 | 501 |
#if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the assays.") |
510 | 502 |
if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the assays.") |
511 | 503 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)]) |
504 |
+ |
|
505 |
+ nFeatures <- nFeatures[names(classifier)] |
|
506 |
+ checkENs <- which(classifier == "elasticNetGLM") |
|
507 |
+ if(length(checkENs) > 0) |
|
508 |
+ { |
|
509 |
+ replacements <- sapply(checkENs, function(checkEN) ifelse(any(nFeatures[[checkEN]] == 1), "GLM", "elasticNetGLM")) |
|
510 |
+ classifier[checkENs] <- replacements |
|
511 |
+ if(any(replacements) == "GLM") |
|
512 |
+ warning("Elastic Net GLM requires two or more features as input but there is only one. |
|
513 |
+Using an ordinary GLM instead.") |
|
514 |
+ } |
|
512 | 515 |
classifier |
513 | 516 |
} |
514 | 517 |
|
515 |
- |
|
516 | 518 |
###################################### |
517 | 519 |
###################################### |
518 | 520 |
#' A function to generate a CrossValParams object |
... | ... |
@@ -808,14 +810,6 @@ CV <- function(measurements = NULL, |
808 | 810 |
characteristicsLabel = NULL) |
809 | 811 |
|
810 | 812 |
{ |
811 |
- # Check that other variables are in the right format and fix |
|
812 |
- nFeatures <- cleanNFeatures(nFeatures = nFeatures, |
|
813 |
- measurements = measurements) |
|
814 |
- selectionMethod <- cleanSelectionMethod(selectionMethod = selectionMethod, |
|
815 |
- measurements = measurements) |
|
816 |
- classifier <- cleanClassifier(classifier = classifier, |
|
817 |
- measurements = measurements) |
|
818 |
- |
|
819 | 813 |
# Which data-types or data-views are present? |
820 | 814 |
if(is.null(characteristicsLabel)) characteristicsLabel <- "none" |
821 | 815 |
|
... | ... |
@@ -841,7 +835,7 @@ CV <- function(measurements = NULL, |
841 | 835 |
characteristics <- S4Vectors::DataFrame(characteristic = c(if(!is.null(assayText)) "Assay Name" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(!is.null(assayText)) paste(assayText, collapse = ", ") else NULL, paste(classifier, collapse = ", "), paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel)) |
842 | 836 |
|
843 | 837 |
if(!is.null(measurements)) |
844 |
- { # Cross-validation. |
|
838 |
+ { # Cross-validation. |
|
845 | 839 |
classifyResults <- runTests(measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics) |
846 | 840 |
fullResult <- runTest(measurements, outcome, measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1) |
847 | 841 |
} else { # Independent training and testing. |
... | ... |
@@ -14,6 +14,7 @@ classifyInterface <- function(countsTrain, classesTrain, countsTest, ..., |
14 | 14 |
classPredictions <- predicted[["ytehat"]] |
15 | 15 |
classScores <- predicted[["discriminant"]] |
16 | 16 |
colnames(classScores) <- levels(classesTrain) |
17 |
+ rownames(classScores) <- names(classPredictions) <- rownames(countsTest) |
|
17 | 18 |
switch(returnType, class = classPredictions, # Factor vector. |
18 | 19 |
score = classScores, # Numeric matrix. |
19 | 20 |
both = data.frame(class = classPredictions, classScores, check.names = FALSE)) |
... | ... |
@@ -39,6 +39,6 @@ coxnetPredictInterface <- function(model, measurementsTest, survivalTest = NULL, |
39 | 39 |
model$offset <- TRUE |
40 | 40 |
|
41 | 41 |
survScores <- predict(model, testMatrix, s = lambda, type = "response", newoffset = offset) |
42 |
- |
|
42 |
+ rownames(survScores) <- rownames(measurementsTest) |
|
43 | 43 |
survScores[, 1] |
44 | 44 |
} |
45 | 45 |
\ No newline at end of file |
... | ... |
@@ -31,15 +31,6 @@ extractPrevalidation = function(assayPreval){ #}, startingCol) { |
31 | 31 |
vec |
32 | 32 |
} |
33 | 33 |
|
34 |
-# Use to pull out the names of features in a ClassifyR model - XYZ: Could Ditch if we really wanted to |
|
35 |
-featurePuller = function(classifyObject) { |
|
36 |
- if ("selectResult" %in% slotNames(classifyObject)) { |
|
37 |
- features = classifyObject@selectResult@chosenFeatures |
|
38 |
- } else{ |
|
39 |
- features = classifyObject@originalFeatures |
|
40 |
- } |
|
41 |
-} |
|
42 |
- |
|
43 | 34 |
setClass("prevalModel", slots = list(fullModel = "list")) |
44 | 35 |
|
45 | 36 |
prevalTrainInterface <- function(measurements, classes, params, ...) |
... | ... |
@@ -23,6 +23,7 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy |
23 | 23 |
classPredictions <- predict(forest, measurementsTest)$predictions |
24 | 24 |
classScores <- predict(forest, measurementsTest, predict.all = TRUE)[[1]] |
25 | 25 |
classScores <- t(apply(classScores, 1, function(sampleRow) table(factor(classes[sampleRow], levels = classes)) / forest$forest$num.trees)) |
26 |
+ rownames(classScores) <- names(classPredictions) <- rownames(measurementsTest) |
|
26 | 27 |
switch(returnType, class = classPredictions, |
27 | 28 |
score = classScores, |
28 | 29 |
both = data.frame(class = classPredictions, classScores, check.names = FALSE)) |
... | ... |
@@ -38,7 +38,7 @@ SVMpredictInterface <- function(model, measurementsTest, returnType = c("both", |
38 | 38 |
# e1071 uses attributes to pass back probabilities. Make them a standalone variable. |
39 | 39 |
classScores <- attr(classPredictions, "probabilities")[, model[["levels"]], drop = FALSE] |
40 | 40 |
attr(classPredictions, "probabilities") <- NULL |
41 |
- |
|
41 |
+ rownames(classScores) <- names(classPredictions) <- rownames(measurementsTest) |
|
42 | 42 |
switch(returnType, class = classPredictions, score = classScores, |
43 | 43 |
both = data.frame(class = classPredictions, classScores, check.names = FALSE)) |
44 | 44 |
} |
45 | 45 |
\ No newline at end of file |
... | ... |
@@ -46,13 +46,17 @@ extremeGradientBoostingPredictInterface <- function(booster, measurementsTest, . |
46 | 46 |
if(verbose == 3) |
47 | 47 |
message("Predicting using boosted random forest.") |
48 | 48 |
measurementsTest <- as(measurementsTest, "data.frame") |
49 |
+ measurementsTest <- measurementsTest[, attr(booster, "featureNames")] |
|
49 | 50 |
# Convert to one-hot encoding as xgboost doesn't understand factors. Need to get rid of intercept afterwards. |
50 | 51 |
measurementsTest <- MatrixModels::model.Matrix(~ 0 + ., data = measurementsTest, sparse = TRUE) |
52 |
+ |
|
51 | 53 |
scores <- predict(booster, measurementsTest, reshape = TRUE) |
54 |
+ colnames(scores) <- attr(booster, "classes") |
|
52 | 55 |
if(!is.null(attr(booster, "classes"))) # It is a classification task. |
53 | 56 |
{ |
54 | 57 |
classPredictions <- attr(booster, "classes")[apply(scores, 1, function(sampleRow) which.max(sampleRow)[1])] |
55 | 58 |
classPredictions <- factor(classPredictions, levels = attr(booster, "classes")) |
59 |
+ rownames(scores) <- names(classPredictions) <- rownames(measurementsTest) |
|
56 | 60 |
result <- switch(returnType, class = classPredictions, |
57 | 61 |
score = scores, |
58 | 62 |
both = data.frame(class = classPredictions, scores, check.names = FALSE)) |
... | ... |
@@ -188,7 +188,7 @@ Function(s) | Description | DM | DV | DD |
188 | 188 |
<span style="font-family: 'Courier New', monospace;">fisherDiscriminant</span> | Implementation of Fisher's LDA for departures from normality | ✔ | ✔* | |
189 | 189 |
<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>, <span style="font-family: 'Courier New', monospace;">mixModelsPredict</span> | Feature-wise mixtures of normals and voting | ✔ | ✔ | ✔ |
190 | 190 |
<span style="font-family: 'Courier New', monospace;">naiveBayesKernel</span> | Feature-wise kernel density estimation and voting | ✔ | ✔ | ✔ |
191 |
-<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span> | Wrapper for randomForest's functions <span style="font-family: 'Courier New', monospace;">randomForest</span> and <span style="font-family: 'Courier New', monospace;">predict</span> | ✔ | ✔ | ✔ |
|
191 |
+<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span> | Wrapper for ranger's functions <span style="font-family: 'Courier New', monospace;">ranger</span> and <span style="font-family: 'Courier New', monospace;">predict</span> | ✔ | ✔ | ✔ |
|
192 | 192 |
<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span> | Wrapper for xgboost's functions <span style="font-family: 'Courier New', monospace;">xgboost</span> and <span style="font-family: 'Courier New', monospace;">predict</span> | ✔ | ✔ | ✔ |
193 | 193 |
<span style="font-family: 'Courier New', monospace;">kNNinterface</span> | Wrapper for class's function <span style="font-family: 'Courier New', monospace;">knn</span> | ✔ | ✔ | ✔ |
194 | 194 |
<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span> | Wrapper for e1071's functions <span style="font-family: 'Courier New', monospace;">svm</span> and <span style="font-family: 'Courier New', monospace;">predict.svm</span> | ✔ | ✔ †| ✔ † |