... | ... |
@@ -10,7 +10,7 @@ Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
11 | 11 |
Encoding: UTF-8 |
12 | 12 |
biocViews: Classification, Survival |
13 |
-Depends: R (>= 4.1.0), methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival |
|
13 |
+Depends: R (>= 4.1.0), generics, methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival |
|
14 | 14 |
Imports: grid, genefilter, utils, dplyr, tidyr, rlang, randomForest |
15 | 15 |
Suggests: limma, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot, |
16 | 16 |
BiocStyle, pamr, PoiClaClu, parathyroidSE, knitr, htmltools, gtable, |
... | ... |
@@ -27,7 +27,7 @@ Description: The software formalises a framework for classification in R. |
27 | 27 |
License: GPL-3 |
28 | 28 |
Packaged: 2014-10-18 11:16:55 UTC; dario |
29 | 29 |
RoxygenNote: 7.2.1 |
30 |
-SystemRequirements: C++14 |
|
30 |
+NeedsCompilation: yes |
|
31 | 31 |
Collate: |
32 | 32 |
'ROCplot.R' |
33 | 33 |
'available.R' |
... | ... |
@@ -1,6 +1,11 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 | 3 |
S3method(predict,trainedByClassifyR) |
4 |
+S3method(train,DataFrame) |
|
5 |
+S3method(train,MultiAssayExperiment) |
|
6 |
+S3method(train,data.frame) |
|
7 |
+S3method(train,list) |
|
8 |
+S3method(train,matrix) |
|
4 | 9 |
export(ClassifyResult) |
5 | 10 |
export(CrossValParams) |
6 | 11 |
export(FeatureSetCollection) |
... | ... |
@@ -36,11 +41,6 @@ export(sampleNames) |
36 | 41 |
export(samplesMetricMap) |
37 | 42 |
export(selectionPlot) |
38 | 43 |
export(totalPredictions) |
39 |
-export(train.DataFrame) |
|
40 |
-export(train.MultiAssayExperiment) |
|
41 |
-export(train.data.frame) |
|
42 |
-export(train.list) |
|
43 |
-export(train.matrix) |
|
44 | 44 |
export(tunedParameters) |
45 | 45 |
exportClasses(ClassifyResult) |
46 | 46 |
exportClasses(CrossValParams) |
... | ... |
@@ -94,6 +94,7 @@ importFrom(S4Vectors,do.call) |
94 | 94 |
importFrom(S4Vectors,mcols) |
95 | 95 |
importFrom(dplyr,mutate) |
96 | 96 |
importFrom(dplyr,n) |
97 |
+importFrom(generics,train) |
|
97 | 98 |
importFrom(rlang,sym) |
98 | 99 |
importFrom(survival,Surv) |
99 | 100 |
importFrom(survival,concordance) |
... | ... |
@@ -854,14 +854,17 @@ simplifyResults <- function(results, values = c("assay", "classifier", "selectio |
854 | 854 |
} |
855 | 855 |
|
856 | 856 |
#' @rdname crossValidate |
857 |
+#' @importFrom generics train |
|
858 |
+#' @method train matrix |
|
857 | 859 |
#' @export |
858 |
-train.matrix <-function(x, outcomeTrain, ...) |
|
860 |
+train.matrix <- function(x, outcomeTrain, ...) |
|
859 | 861 |
{ |
860 | 862 |
x <- DataFrame(x, check.names = FALSE) |
861 | 863 |
train(x, outcomeTrain, ...) |
862 | 864 |
} |
863 | 865 |
|
864 | 866 |
#' @rdname crossValidate |
867 |
+#' @method train data.frame |
|
865 | 868 |
#' @export |
866 | 869 |
train.data.frame <- function(x, outcomeTrain, ...) |
867 | 870 |
{ |
... | ... |
@@ -872,20 +875,26 @@ train.data.frame <- function(x, outcomeTrain, ...) |
872 | 875 |
#' @rdname crossValidate |
873 | 876 |
#' @param assayIDs A character vector for assays to train with. Special value \code{"all"} |
874 | 877 |
#' uses all assays in the input object. |
878 |
+#' @method train DataFrame |
|
875 | 879 |
#' @export |
876 | 880 |
train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiViewMethod = "none", assayIDs = "all", ...) # ... for prepareData. |
877 | 881 |
{ |
878 | 882 |
prepArgs <- list(x, outcomeTrain) |
879 | 883 |
extraInputs <- list(...) |
884 |
+ prepExtras <- numeric() |
|
880 | 885 |
if(length(extraInputs) > 0) |
881 | 886 |
prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]]) |
882 | 887 |
if(length(prepExtras) > 0) |
883 | 888 |
prepArgs <- append(prepArgs, extraInputs[prepExtras]) |
884 | 889 |
measurementsAndOutcome <- do.call(prepareData, prepArgs) |
890 |
+ measurements <- measurementsAndOutcome[["measurements"]] |
|
891 |
+ outcomeTrain <- measurementsAndOutcome[["outcome"]] |
|
885 | 892 |
|
886 | 893 |
classifier <- cleanClassifier(classifier = classifier, measurements = measurements) |
887 |
- if(assayIDs == "all") assayIDs <- unique(mcols(x)[, "assay"]) |
|
894 |
+ if(assayIDs == "all") assayIDs <- unique(mcols(measurements)[, "assay"]) |
|
888 | 895 |
if(is.null(assayIDs)) assayIDs <- 1 |
896 |
+ names(assayIDs) <- assayIDs |
|
897 |
+ names(classifier) <- classifier |
|
889 | 898 |
|
890 | 899 |
if(multiViewMethod == "none"){ |
891 | 900 |
resClassifier <- |
... | ... |
@@ -897,10 +906,10 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV |
897 | 906 |
if(assayIndex != 1) measurementsUse <- measurements[, mcols(measurements)[, "assay"] == assayIndex, drop = FALSE] |
898 | 907 |
|
899 | 908 |
classifierParams <- .classifierKeywordToParams(classifierForAssay) |
900 |
- modellingParams <- ModellingParams(balancing = "none", selectParams = "none", |
|
909 |
+ modellingParams <- ModellingParams(balancing = "none", selectParams = NULL, |
|
901 | 910 |
trainParams = classifierParams$trainParams, predictParams = classifierParams$predictParams) |
902 | 911 |
|
903 |
- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]] |
|
912 |
+ .doTrain(measurementsUse, outcomeTrain, NULL, NULL, CrossValParams(), modellingParams, verbose = 0)[["model"]] |
|
904 | 913 |
## train model |
905 | 914 |
}, |
906 | 915 |
simplify = FALSE) |
... | ... |
@@ -908,8 +917,13 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV |
908 | 917 |
simplify = FALSE) |
909 | 918 |
|
910 | 919 |
models <- unlist(resClassifier, recursive = FALSE) |
911 |
- names(models) <- assayIDs |
|
912 |
- class(models) <- c(class(models), "listOfModels") |
|
920 |
+ if(length(models) == 1) { |
|
921 |
+ model <- models[[1]] |
|
922 |
+ class(model) <- c(class(model), "trainedByClassifyR") |
|
923 |
+ models <- NULL |
|
924 |
+ } else { |
|
925 |
+ class(models) <- c(class(models), "listOfModels", "trainedByClassifyR") |
|
926 |
+ } |
|
913 | 927 |
} |
914 | 928 |
|
915 | 929 |
################################ |
... | ... |
@@ -919,7 +933,8 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV |
919 | 933 |
### Merging or binding to combine data |
920 | 934 |
if(multiViewMethod == "merge"){ |
921 | 935 |
measurementsUse <- measurements[, mcols(measurements)[["assay"]] %in% assayIDs] |
922 |
- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]] |
|
936 |
+ model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]] |
|
937 |
+ class(model) <- c("trainedByClassifyR", class(model)) |
|
923 | 938 |
} |
924 | 939 |
|
925 | 940 |
|
... | ... |
@@ -941,7 +956,8 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV |
941 | 956 |
selectParams = NULL, |
942 | 957 |
trainParams = TrainParams(prevalTrainInterface, params = paramsAssays, characteristics = paramsAssays$clinical@trainParams@characteristics), |
943 | 958 |
predictParams = PredictParams(prevalPredictInterface, characteristics = paramsAssays$clinical@predictParams@characteristics)) |
944 |
- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]] |
|
959 |
+ model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]] |
|
960 |
+ class(model) <- c("trainedByClassifyR", class(model)) |
|
945 | 961 |
} |
946 | 962 |
|
947 | 963 |
### Principal Components Analysis to combine data |
... | ... |
@@ -956,11 +972,14 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV |
956 | 972 |
modellingParams <- ModellingParams(balancing = "none", selectParams = NULL, |
957 | 973 |
trainParams = TrainParams(pcaTrainInterface, params = paramsClinical, nFeatures = nFeatures, characteristics = paramsClinical$clinical@trainParams@characteristics), |
958 | 974 |
predictParams = PredictParams(pcaPredictInterface, characteristics = paramsClinical$clinical@predictParams@characteristics)) |
959 |
- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]] |
|
975 |
+ model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]] |
|
976 |
+ class(model) <- c("trainedByClassifyR", class(model)) |
|
960 | 977 |
} |
978 |
+ if(missing(models) || is.null(models)) return(model) else return(models) |
|
961 | 979 |
} |
962 | 980 |
|
963 | 981 |
#' @rdname crossValidate |
982 |
+#' @method train list |
|
964 | 983 |
#' @export |
965 | 984 |
# Each of the first four variables are named lists with names of assays. |
966 | 985 |
train.list <- function(x, outcomeTrain, ...) |
... | ... |
@@ -997,11 +1016,13 @@ train.list <- function(x, outcomeTrain, ...) |
997 | 1016 |
} |
998 | 1017 |
|
999 | 1018 |
#' @rdname crossValidate |
1019 |
+#' @method train MultiAssayExperiment |
|
1000 | 1020 |
#' @export |
1001 | 1021 |
train.MultiAssayExperiment <- function(x, outcomeColumns, ...) |
1002 | 1022 |
{ |
1003 | 1023 |
prepArgs <- list(x, outcomeColumns) |
1004 | 1024 |
extraInputs <- list(...) |
1025 |
+ prepExtras <- trainExtras <- numeric() |
|
1005 | 1026 |
if(length(extraInputs) > 0) |
1006 | 1027 |
prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]]) |
1007 | 1028 |
if(length(prepExtras) > 0) |
... | ... |
@@ -1021,7 +1042,7 @@ train.MultiAssayExperiment <- function(x, outcomeColumns, ...) |
1021 | 1042 |
#' \code{DataFrame}, \code{list} (of matrices or data frames) or \code{MultiAssayExperiment} containing |
1022 | 1043 |
#' the data to make predictions with with either a fitted model created by \code{train} or the final model |
1023 | 1044 |
#' stored in a \code{\link{ClassifyResult}} object. |
1024 |
- |
|
1045 |
+#' @method predict trainedByClassifyR |
|
1025 | 1046 |
#' @export |
1026 | 1047 |
predict.trainedByClassifyR <- function(object, newData, ...) |
1027 | 1048 |
{ |
... | ... |
@@ -191,7 +191,7 @@ input data. Autmomatically reducing to smaller number.") |
191 | 191 |
} |
192 | 192 |
|
193 | 193 |
# Some classifiers have one function for training and testing, so that's why test data is also passed in. |
194 |
- trained <- tryCatch(.doTrain(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose), |
|
194 |
+ trained <- tryCatch(.doTrain(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose), |
|
195 | 195 |
error = function(error) error[["message"]]) |
196 | 196 |
if(is.character(trained)) return(trained) # An error occurred. |
197 | 197 |
|
... | ... |
@@ -238,7 +238,7 @@ input data. Autmomatically reducing to smaller number.") |
238 | 238 |
{ |
239 | 239 |
measurementsTrainLess1 <- measurementsTrain[, -selectedIndex, drop = FALSE] |
240 | 240 |
measurementsTestLess1 <- measurementsTest[, -selectedIndex, drop = FALSE] |
241 |
- modelWithoutOne <- tryCatch(.doTrain(measurementsTrainLess1, outcomeTrain, measurementsTestLess1, outcomeTest, modellingParams, verbose), |
|
241 |
+ modelWithoutOne <- tryCatch(.doTrain(measurementsTrainLess1, outcomeTrain, measurementsTestLess1, outcomeTest, crossValParams, modellingParams, verbose), |
|
242 | 242 |
error = function(error) error[["message"]]) |
243 | 243 |
if(!is.null(modellingParams@predictParams)) |
244 | 244 |
predictedOutcomeWithoutOne <- tryCatch(.doTest(modelWithoutOne[["model"]], measurementsTestLess1, modellingParams@predictParams, verbose), |
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
# Random Forest |
2 | 2 |
RFparams <- function() { |
3 |
- trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)), |
|
3 |
+ trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100), performanceType = "Balanced Error"), |
|
4 | 4 |
getFeatures = forestFeatures) |
5 | 5 |
predictParams <- PredictParams(randomForestPredictInterface) |
6 | 6 |
|
... | ... |
@@ -9,7 +9,7 @@ RFparams <- function() { |
9 | 9 |
|
10 | 10 |
# Random Survival Forest |
11 | 11 |
RSFparams <- function() { |
12 |
- trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100))) |
|
12 |
+ trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100), performanceType = "Balanced Error")) |
|
13 | 13 |
predictParams <- PredictParams(rfsrcPredictInterface) |
14 | 14 |
|
15 | 15 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
... | ... |
@@ -17,7 +17,7 @@ RSFparams <- function() { |
17 | 17 |
|
18 | 18 |
# k Nearest Neighbours |
19 | 19 |
kNNparams <- function() { |
20 |
- trainParams <- TrainParams(kNNinterface, tuneParams = list(k = 1:5)) |
|
20 |
+ trainParams <- TrainParams(kNNinterface, tuneParams = list(k = 1:5, performanceType = "Balanced Error")) |
|
21 | 21 |
predictParams <- NULL |
22 | 22 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
23 | 23 |
} |
... | ... |
@@ -40,7 +40,7 @@ elasticNetGLMparams <- function() { |
40 | 40 |
|
41 | 41 |
# Support Vector Machine |
42 | 42 |
SVMparams = function() { |
43 |
- trainParams <- TrainParams(SVMtrainInterface, tuneParams = list(kernel = c("linear", "polynomial", "radial", "sigmoid"), cost = 10^(-3:3))) |
|
43 |
+ trainParams <- TrainParams(SVMtrainInterface, tuneParams = list(kernel = c("linear", "polynomial", "radial", "sigmoid"), cost = 10^(-3:3), performanceType = "Balanced Error")) |
|
44 | 44 |
predictParams <- PredictParams(SVMpredictInterface) |
45 | 45 |
|
46 | 46 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
... | ... |
@@ -64,7 +64,7 @@ DLDAparams = function() { |
64 | 64 |
|
65 | 65 |
# naive Bayes Kernel |
66 | 66 |
naiveBayesParams <- function() { |
67 |
- trainParams <- TrainParams(naiveBayesKernel, tuneParams = list(difference = c("unweighted", "weighted"))) |
|
67 |
+ trainParams <- TrainParams(naiveBayesKernel, tuneParams = list(difference = c("unweighted", "weighted"), performanceType = "Balanced Error")) |
|
68 | 68 |
predictParams <- NULL |
69 | 69 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
70 | 70 |
} |
... | ... |
@@ -106,7 +106,6 @@ |
106 | 106 |
tuneParams <- modellingParams@selectParams@tuneParams |
107 | 107 |
performanceType <- tuneParams[["performanceType"]] |
108 | 108 |
topNfeatures <- tuneParams[["nFeatures"]] |
109 |
- tuneMode <- ifelse("tuneMode" %in% names(tuneParams), tuneParams[["tuneMode"]], crossValParams@tuneMode) |
|
110 | 109 |
tuneParams <- tuneParams[-match(c("performanceType", "nFeatures"), names(tuneParams))] # Only used as evaluation metric. |
111 | 110 |
|
112 | 111 |
# Make selectParams NULL, since we are currently doing selection and it shouldn't call |
... | ... |
@@ -136,7 +135,7 @@ |
136 | 135 |
if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking. |
137 | 136 |
return(list(NULL, rankings[[1]], NULL)) |
138 | 137 |
|
139 |
- if(tuneMode == "none") # No parameters to choose between. |
|
138 |
+ if(crossValParams@tuneMode == "none") # No parameters to choose between. |
|
140 | 139 |
return(list(NULL, rankings[[1]], NULL)) |
141 | 140 |
|
142 | 141 |
tuneParamsTrain <- list(topN = topNfeatures) |
... | ... |
@@ -257,7 +256,7 @@ |
257 | 256 |
|
258 | 257 |
# Code to create a function call to a training function. Might also do training and testing |
259 | 258 |
# within the same function, so test samples are also passed in case they are needed. |
260 |
-.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose) |
|
259 |
+.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose) |
|
261 | 260 |
{ |
262 | 261 |
tuneDetails <- NULL |
263 | 262 |
if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams)) |
... | ... |
@@ -278,10 +277,10 @@ |
278 | 277 |
|
279 | 278 |
predictions <- result[["predictions"]] |
280 | 279 |
if(class(predictions) == "data.frame") |
281 |
- predictedOutcome <- predictions[, "outcome"] |
|
280 |
+ predictedOutcome <- predictions[, colnames(predictions) %in% c("class", "risk")] |
|
282 | 281 |
else |
283 | 282 |
predictedOutcome <- predictions |
284 |
- calcExternalPerformance(outcomeTest, predictedOutcome, performanceType) |
|
283 |
+ calcExternalPerformance(outcomeTrain, predictedOutcome, performanceType) |
|
285 | 284 |
} else { |
286 | 285 |
result <- runTests(measurementsTrain, outcomeTrain, |
287 | 286 |
crossValParams, modellingParams, |
... | ... |
@@ -98,11 +98,11 @@ crossValidate(measurements, outcome, ...) |
98 | 98 |
... |
99 | 99 |
) |
100 | 100 |
|
101 |
-train.matrix(x, outcomeTrain, ...) |
|
101 |
+\method{train}{matrix}(x, outcomeTrain, ...) |
|
102 | 102 |
|
103 |
-train.data.frame(x, outcomeTrain, ...) |
|
103 |
+\method{train}{data.frame}(x, outcomeTrain, ...) |
|
104 | 104 |
|
105 |
-train.DataFrame( |
|
105 |
+\method{train}{DataFrame}( |
|
106 | 106 |
x, |
107 | 107 |
outcomeTrain, |
108 | 108 |
classifier = "randomForest", |
... | ... |
@@ -111,9 +111,9 @@ train.DataFrame( |
111 | 111 |
... |
112 | 112 |
) |
113 | 113 |
|
114 |
-train.list(x, outcomeTrain, ...) |
|
114 |
+\method{train}{list}(x, outcomeTrain, ...) |
|
115 | 115 |
|
116 |
-train.MultiAssayExperiment(x, outcomeColumns, ...) |
|
116 |
+\method{train}{MultiAssayExperiment}(x, outcomeColumns, ...) |
|
117 | 117 |
|
118 | 118 |
\method{predict}{trainedByClassifyR}(object, newData, ...) |
119 | 119 |
} |