... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.23 |
|
7 |
-Date: 2022-10-14 |
|
6 |
+Version: 3.1.24 |
|
7 |
+Date: 2022-10-17 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -118,8 +118,6 @@ setMethod("crossValidate", "DataFrame", |
118 | 118 |
# Which data-types or data-views are present? |
119 | 119 |
assayIDs <- unique(mcols(measurements)$assay) |
120 | 120 |
if(is.null(assayIDs)) assayIDs <- 1 |
121 |
- |
|
122 |
- checkData(measurements, outcome) |
|
123 | 121 |
|
124 | 122 |
# Check that other variables are in the right format and fix |
125 | 123 |
nFeatures <- cleanNFeatures(nFeatures = nFeatures, |
... | ... |
@@ -184,17 +182,12 @@ Using an ordinary GLM instead.") |
184 | 182 |
characteristicsLabel = characteristicsLabel |
185 | 183 |
) |
186 | 184 |
}, |
187 |
- |
|
188 | 185 |
simplify = FALSE) |
189 | 186 |
}, |
190 |
- |
|
191 | 187 |
simplify = FALSE) |
192 | 188 |
}, |
193 |
- |
|
194 | 189 |
simplify = FALSE) |
195 |
- |
|
196 | 190 |
result <- unlist(unlist(resClassifier)) |
197 |
- |
|
198 | 191 |
} |
199 | 192 |
|
200 | 193 |
################################ |
... | ... |
@@ -558,24 +551,6 @@ generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisati |
558 | 551 |
} |
559 | 552 |
###################################### |
560 | 553 |
|
561 |
- |
|
562 |
- |
|
563 |
-###################################### |
|
564 |
-###################################### |
|
565 |
-checkData <- function(measurements, outcome){ |
|
566 |
- if(is.null(rownames(measurements))) |
|
567 |
- stop("'measurements' DataFrame must have sample identifiers as its row names.") |
|
568 |
- if(any(is.na(measurements))) |
|
569 |
- stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.") |
|
570 |
- |
|
571 |
- # !!! Need to check mcols has assay NUm |
|
572 |
- |
|
573 |
-} |
|
574 |
-###################################### |
|
575 |
- |
|
576 |
- |
|
577 |
- |
|
578 |
-###################################### |
|
579 | 554 |
###################################### |
580 | 555 |
#' A function to generate a ModellingParams object |
581 | 556 |
#' |
... | ... |
@@ -643,9 +618,10 @@ generateModellingParams <- function(assayIDs, |
643 | 618 |
knownClassifiers <- .ClassifyRenvir[["classifyKeywords"]][, "classifier Keyword"] |
644 | 619 |
if(!classifier %in% knownClassifiers) |
645 | 620 |
stop(paste("Classifier must exactly match of these (be careful of case):", paste(knownClassifiers, collapse = ", "))) |
646 |
- |
|
621 |
+ |
|
647 | 622 |
classifierParams <- .classifierKeywordToParams(classifier) |
648 |
- classifierParams$trainParams@tuneParams <- c(classifierParams$trainParams@tuneParams, performanceType = performanceType) |
|
623 |
+ if(!is.null(classifierParams$trainParams@tuneParams)) |
|
624 |
+ classifierParams$trainParams@tuneParams <- c(classifierParams$trainParams@tuneParams, performanceType = performanceType) |
|
649 | 625 |
|
650 | 626 |
selectionMethod <- unlist(selectionMethod) |
651 | 627 |
|
... | ... |
@@ -833,11 +809,6 @@ CV <- function(measurements = NULL, |
833 | 809 |
characteristicsLabel = NULL) |
834 | 810 |
|
835 | 811 |
{ |
836 |
- # Check that data is in the right format |
|
837 |
- if(!is.null(measurements)) |
|
838 |
- checkData(measurements, outcome) |
|
839 |
- else |
|
840 |
- checkData(x, x) |
|
841 | 812 |
# Check that other variables are in the right format and fix |
842 | 813 |
nFeatures <- cleanNFeatures(nFeatures = nFeatures, |
843 | 814 |
measurements = measurements) |
... | ... |
@@ -11,6 +11,7 @@ GLMtrainInterface <- function(measurementsTrain, classesTrain, ..., verbose = 3) |
11 | 11 |
} else {fitData <- measurementsTrain} |
12 | 12 |
glm(class ~ . + 0, family = binomial, data = fitData, ...) |
13 | 13 |
} |
14 |
+attr(GLMtrainInterface, "name") <- "GLMtrainInterface" |
|
14 | 15 |
|
15 | 16 |
# model is of class glm. |
16 | 17 |
GLMpredictInterface <- function(model, measurementsTest, returnType = c("both", "class", "score"), |
... | ... |
@@ -51,11 +51,17 @@ setMethod("prepareData", "matrix", |
51 | 51 |
setMethod("prepareData", "DataFrame", |
52 | 52 |
function(measurements, outcome, useFeatures = "all", maxMissingProp = 0.0, topNvariance = NULL) |
53 | 53 |
{ |
54 |
+ if(is.null(rownames(measurements))) |
|
55 |
+ { |
|
56 |
+ warning("'measurements' DataFrame must have sample identifiers as its row names. Generating generic ones.") |
|
57 |
+ rownames(measurements) <- paste("Sample", seq_len(nrow(measurements))) |
|
58 |
+ } |
|
59 |
+ |
|
54 | 60 |
if(useFeatures != "all") # Subset to only the desired ones. |
55 | 61 |
measurements <- measurements[, useFeatures] |
56 | 62 |
|
57 | 63 |
# Won't ever be true if input data was MultiAssayExperiment because wideFormat already produces valid names. |
58 |
- if(all.equal(colnames(measurements), make.names(colnames(measurements))) != TRUE) |
|
64 |
+ if(!all(colnames(measurements) == make.names(colnames(measurements)))) |
|
59 | 65 |
{ |
60 | 66 |
warning("Unsafe feature names in input data. Converted into safe names.") |
61 | 67 |
mcols(measurements)$feature <- colnames(measurements) # Save the originals. |
... | ... |
@@ -114,7 +120,7 @@ setMethod("prepareData", "DataFrame", |
114 | 120 |
else # Three columns. Therefore, counting process data. |
115 | 121 |
outcome <- survival::Surv(outcome[, 1], outcome[, 2], outcome[, 3]) |
116 | 122 |
} |
117 |
- |
|
123 |
+ |
|
118 | 124 |
# Remove samples with indeterminate outcome. |
119 | 125 |
dropSamples <- which(is.na(outcome) | is.null(outcome)) |
120 | 126 |
if(length(dropSamples) > 0) |
... | ... |
@@ -125,8 +131,9 @@ setMethod("prepareData", "DataFrame", |
125 | 131 |
|
126 | 132 |
# Remove features with more missingness than allowed. |
127 | 133 |
nSamples <- nrow(measurements) |
128 |
- dropFeatures <- which(apply(measurements, 2, function(featureMeasurements) sum(is.na(featureMeasurements))) |
|
129 |
- / nrow(measurements) > maxMissingProp) |
|
134 |
+ measurementsMatrix <- as.matrix(measurements) # For speed of calculation. |
|
135 |
+ dropFeatures <- which(apply(measurementsMatrix, 2, function(featureMeasurements) sum(is.na(featureMeasurements))) |
|
136 |
+ / nrow(measurementsMatrix) > maxMissingProp) |
|
130 | 137 |
if(length(dropFeatures) > 0) |
131 | 138 |
measurements <- measurements[, -dropFeatures] |
132 | 139 |
|
... | ... |
@@ -139,6 +139,12 @@ |
139 | 139 |
return(list(NULL, rankings[[1]], NULL)) |
140 | 140 |
|
141 | 141 |
tuneParamsTrain <- list(topN = topNfeatures) |
142 |
+ performanceIndex <- match("performanceType", names(modellingParams@trainParams@tuneParams)) |
|
143 |
+ if(!is.na(performanceIndex)) |
|
144 |
+ { |
|
145 |
+ performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]] |
|
146 |
+ modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-performanceIndex] |
|
147 |
+ } |
|
142 | 148 |
tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams) |
143 | 149 |
tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE) |
144 | 150 |
modellingParams@trainParams@tuneParams <- NULL |