Browse code

- train methods now exported and can be used. train generic imported from generics package. - simple params now have performanceType specified. - Fixes to training with multiple views but no aggregation. - Fix to parameter tuning in .doTrain.

Dario Strbenac authored on 02/09/2022 06:10:10
Showing 8 changed files

... ...
@@ -10,7 +10,7 @@ Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
11 11
 Encoding: UTF-8
12 12
 biocViews: Classification, Survival
13
-Depends: R (>= 4.1.0), methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival
13
+Depends: R (>= 4.1.0), generics, methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival
14 14
 Imports: grid, genefilter, utils, dplyr, tidyr, rlang, randomForest
15 15
 Suggests: limma, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot,
16 16
         BiocStyle, pamr, PoiClaClu, parathyroidSE, knitr, htmltools, gtable,
... ...
@@ -27,7 +27,7 @@ Description: The software formalises a framework for classification in R.
27 27
 License: GPL-3
28 28
 Packaged: 2014-10-18 11:16:55 UTC; dario
29 29
 RoxygenNote: 7.2.1
30
-SystemRequirements: C++14
30
+NeedsCompilation: yes
31 31
 Collate:
32 32
     'ROCplot.R'
33 33
     'available.R'
... ...
@@ -1,6 +1,11 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 S3method(predict,trainedByClassifyR)
4
+S3method(train,DataFrame)
5
+S3method(train,MultiAssayExperiment)
6
+S3method(train,data.frame)
7
+S3method(train,list)
8
+S3method(train,matrix)
4 9
 export(ClassifyResult)
5 10
 export(CrossValParams)
6 11
 export(FeatureSetCollection)
... ...
@@ -36,11 +41,6 @@ export(sampleNames)
36 41
 export(samplesMetricMap)
37 42
 export(selectionPlot)
38 43
 export(totalPredictions)
39
-export(train.DataFrame)
40
-export(train.MultiAssayExperiment)
41
-export(train.data.frame)
42
-export(train.list)
43
-export(train.matrix)
44 44
 export(tunedParameters)
45 45
 exportClasses(ClassifyResult)
46 46
 exportClasses(CrossValParams)
... ...
@@ -94,6 +94,7 @@ importFrom(S4Vectors,do.call)
94 94
 importFrom(S4Vectors,mcols)
95 95
 importFrom(dplyr,mutate)
96 96
 importFrom(dplyr,n)
97
+importFrom(generics,train)
97 98
 importFrom(rlang,sym)
98 99
 importFrom(survival,Surv)
99 100
 importFrom(survival,concordance)
... ...
@@ -854,14 +854,17 @@ simplifyResults <- function(results, values = c("assay", "classifier", "selectio
854 854
 }
855 855
 
856 856
 #' @rdname crossValidate
857
+#' @importFrom generics train
858
+#' @method train matrix
857 859
 #' @export
858
-train.matrix <-function(x, outcomeTrain, ...)
860
+train.matrix <- function(x, outcomeTrain, ...)
859 861
                {
860 862
                  x <- DataFrame(x, check.names = FALSE)
861 863
                  train(x, outcomeTrain, ...)
862 864
                }
863 865
 
864 866
 #' @rdname crossValidate
867
+#' @method train data.frame
865 868
 #' @export
866 869
 train.data.frame <- function(x, outcomeTrain, ...)
867 870
                     {
... ...
@@ -872,20 +875,26 @@ train.data.frame <- function(x, outcomeTrain, ...)
872 875
 #' @rdname crossValidate
873 876
 #' @param assayIDs A character vector for assays to train with. Special value \code{"all"}
874 877
 #' uses all assays in the input object.
878
+#' @method train DataFrame
875 879
 #' @export
876 880
 train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiViewMethod = "none", assayIDs = "all", ...) # ... for prepareData.
877 881
                    {
878 882
               prepArgs <- list(x, outcomeTrain)
879 883
               extraInputs <- list(...)
884
+              prepExtras <- numeric()
880 885
               if(length(extraInputs) > 0)
881 886
                 prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
882 887
               if(length(prepExtras) > 0)
883 888
                 prepArgs <- append(prepArgs, extraInputs[prepExtras])
884 889
               measurementsAndOutcome <- do.call(prepareData, prepArgs)
890
+              measurements <- measurementsAndOutcome[["measurements"]]
891
+              outcomeTrain <- measurementsAndOutcome[["outcome"]]
885 892
               
886 893
               classifier <- cleanClassifier(classifier = classifier, measurements = measurements)
887
-              if(assayIDs == "all") assayIDs <- unique(mcols(x)[, "assay"])
894
+              if(assayIDs == "all") assayIDs <- unique(mcols(measurements)[, "assay"])
888 895
               if(is.null(assayIDs)) assayIDs <- 1
896
+              names(assayIDs) <- assayIDs
897
+              names(classifier) <- classifier
889 898
 
890 899
               if(multiViewMethod == "none"){
891 900
                   resClassifier <-
... ...
@@ -897,10 +906,10 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV
897 906
                                   if(assayIndex != 1) measurementsUse <- measurements[, mcols(measurements)[, "assay"] == assayIndex, drop = FALSE]
898 907
                                   
899 908
                                   classifierParams <- .classifierKeywordToParams(classifierForAssay)
900
-                                  modellingParams <- ModellingParams(balancing = "none", selectParams = "none",
909
+                                  modellingParams <- ModellingParams(balancing = "none", selectParams = NULL,
901 910
                                                                trainParams = classifierParams$trainParams, predictParams = classifierParams$predictParams)
902 911
                                   
903
-                                  .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
912
+                                  .doTrain(measurementsUse, outcomeTrain, NULL, NULL, CrossValParams(), modellingParams, verbose = 0)[["model"]]
904 913
                                   ## train model
905 914
                           },
906 915
                           simplify = FALSE)
... ...
@@ -908,8 +917,13 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV
908 917
                       simplify = FALSE)
909 918
 
910 919
                   models <- unlist(resClassifier, recursive = FALSE)
911
-                  names(models) <- assayIDs
912
-                  class(models) <- c(class(models), "listOfModels")
920
+                  if(length(models) == 1) {
921
+                      model <- models[[1]]
922
+                      class(model) <- c(class(model), "trainedByClassifyR")
923
+                      models <- NULL
924
+                  } else {
925
+                      class(models) <- c(class(models), "listOfModels", "trainedByClassifyR")
926
+                  }
913 927
               }
914 928
 
915 929
               ################################
... ...
@@ -919,7 +933,8 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV
919 933
               ### Merging or binding to combine data
920 934
               if(multiViewMethod == "merge"){
921 935
                   measurementsUse <- measurements[, mcols(measurements)[["assay"]] %in% assayIDs]
922
-                  .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
936
+                  model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]]
937
+                  class(model) <- c("trainedByClassifyR", class(model))
923 938
               }
924 939
 
925 940
 
... ...
@@ -941,7 +956,8 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV
941 956
                                     selectParams = NULL,
942 957
                                     trainParams = TrainParams(prevalTrainInterface, params = paramsAssays, characteristics = paramsAssays$clinical@trainParams@characteristics),
943 958
                                     predictParams = PredictParams(prevalPredictInterface, characteristics = paramsAssays$clinical@predictParams@characteristics))
944
-                 .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
959
+                 model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]]
960
+                 class(model) <- c("trainedByClassifyR", class(model))
945 961
               }
946 962
               
947 963
               ### Principal Components Analysis to combine data
... ...
@@ -956,11 +972,14 @@ train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiV
956 972
                 modellingParams <- ModellingParams(balancing = "none", selectParams = NULL,
957 973
                                    trainParams = TrainParams(pcaTrainInterface, params = paramsClinical, nFeatures = nFeatures, characteristics = paramsClinical$clinical@trainParams@characteristics),
958 974
                                    predictParams = PredictParams(pcaPredictInterface, characteristics = paramsClinical$clinical@predictParams@characteristics))
959
-                .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
975
+                model <- .doTrain(measurementsUse, outcomeTrain, NULL, NULL, crossValParams, modellingParams, verbose = 0)[["model"]]
976
+                class(model) <- c("trainedByClassifyR", class(model))
960 977
               }
978
+              if(missing(models) || is.null(models)) return(model) else return(models)
961 979
           }
962 980
 
963 981
 #' @rdname crossValidate
982
+#' @method train list
964 983
 #' @export
965 984
 # Each of the first four variables are named lists with names of assays.
966 985
 train.list <- function(x, outcomeTrain, ...)
... ...
@@ -997,11 +1016,13 @@ train.list <- function(x, outcomeTrain, ...)
997 1016
 }
998 1017
 
999 1018
 #' @rdname crossValidate
1019
+#' @method train MultiAssayExperiment
1000 1020
 #' @export
1001 1021
 train.MultiAssayExperiment <- function(x, outcomeColumns, ...)
1002 1022
           {
1003 1023
               prepArgs <- list(x, outcomeColumns)
1004 1024
               extraInputs <- list(...)
1025
+              prepExtras <- trainExtras <- numeric()
1005 1026
               if(length(extraInputs) > 0)
1006 1027
                 prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
1007 1028
               if(length(prepExtras) > 0)
... ...
@@ -1021,7 +1042,7 @@ train.MultiAssayExperiment <- function(x, outcomeColumns, ...)
1021 1042
 #' \code{DataFrame}, \code{list} (of matrices or data frames) or \code{MultiAssayExperiment} containing
1022 1043
 #' the data to make predictions with with either a fitted model created by \code{train} or the final model
1023 1044
 #' stored in a \code{\link{ClassifyResult}} object.
1024
-
1045
+#' @method predict trainedByClassifyR
1025 1046
 #' @export
1026 1047
 predict.trainedByClassifyR <- function(object, newData, ...)
1027 1048
 {
... ...
@@ -191,7 +191,7 @@ input data. Autmomatically reducing to smaller number.")
191 191
   }
192 192
   
193 193
   # Some classifiers have one function for training and testing, so that's why test data is also passed in.
194
-  trained <- tryCatch(.doTrain(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose),
194
+  trained <- tryCatch(.doTrain(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose),
195 195
                       error = function(error) error[["message"]])
196 196
   if(is.character(trained)) return(trained) # An error occurred.
197 197
   
... ...
@@ -238,7 +238,7 @@ input data. Autmomatically reducing to smaller number.")
238 238
     {
239 239
       measurementsTrainLess1 <- measurementsTrain[, -selectedIndex, drop = FALSE]
240 240
       measurementsTestLess1 <- measurementsTest[, -selectedIndex, drop = FALSE]
241
-      modelWithoutOne <- tryCatch(.doTrain(measurementsTrainLess1, outcomeTrain, measurementsTestLess1, outcomeTest, modellingParams, verbose),
241
+      modelWithoutOne <- tryCatch(.doTrain(measurementsTrainLess1, outcomeTrain, measurementsTestLess1, outcomeTest, crossValParams, modellingParams, verbose),
242 242
                                   error = function(error) error[["message"]])
243 243
       if(!is.null(modellingParams@predictParams))
244 244
       predictedOutcomeWithoutOne <- tryCatch(.doTest(modelWithoutOne[["model"]], measurementsTestLess1, modellingParams@predictParams, verbose),
... ...
@@ -1,6 +1,6 @@
1 1
 # Random Forest
2 2
 RFparams <- function() {
3
-    trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)),
3
+    trainParams <- TrainParams(randomForestTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100), performanceType = "Balanced Error"),
4 4
                                getFeatures = forestFeatures)
5 5
     predictParams <- PredictParams(randomForestPredictInterface)
6 6
     
... ...
@@ -9,7 +9,7 @@ RFparams <- function() {
9 9
 
10 10
 # Random Survival Forest
11 11
 RSFparams <- function() {
12
-    trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100)))
12
+    trainParams <- TrainParams(rfsrcTrainInterface, tuneParams = list(mTryProportion = c(0.25, 0.33, 0.50, 0.66, 0.75, 1.00), ntree = seq(100, 500, 100), performanceType = "Balanced Error"))
13 13
     predictParams <- PredictParams(rfsrcPredictInterface)
14 14
     
15 15
     return(list(trainParams = trainParams, predictParams = predictParams))
... ...
@@ -17,7 +17,7 @@ RSFparams <- function() {
17 17
 
18 18
 # k Nearest Neighbours
19 19
 kNNparams <- function() {
20
-    trainParams <- TrainParams(kNNinterface, tuneParams = list(k = 1:5))
20
+    trainParams <- TrainParams(kNNinterface, tuneParams = list(k = 1:5, performanceType = "Balanced Error"))
21 21
     predictParams <- NULL
22 22
     return(list(trainParams = trainParams, predictParams = predictParams))
23 23
 }
... ...
@@ -40,7 +40,7 @@ elasticNetGLMparams <- function() {
40 40
 
41 41
 # Support Vector Machine
42 42
 SVMparams = function() {
43
-    trainParams <- TrainParams(SVMtrainInterface, tuneParams = list(kernel = c("linear", "polynomial", "radial", "sigmoid"), cost = 10^(-3:3)))
43
+    trainParams <- TrainParams(SVMtrainInterface, tuneParams = list(kernel = c("linear", "polynomial", "radial", "sigmoid"), cost = 10^(-3:3), performanceType = "Balanced Error"))
44 44
     predictParams <- PredictParams(SVMpredictInterface)
45 45
     
46 46
     return(list(trainParams = trainParams, predictParams = predictParams))
... ...
@@ -64,7 +64,7 @@ DLDAparams = function() {
64 64
 
65 65
 # naive Bayes Kernel
66 66
 naiveBayesParams <- function() {
67
-    trainParams <- TrainParams(naiveBayesKernel, tuneParams = list(difference = c("unweighted", "weighted")))
67
+    trainParams <- TrainParams(naiveBayesKernel, tuneParams = list(difference = c("unweighted", "weighted"), performanceType = "Balanced Error"))
68 68
     predictParams <- NULL
69 69
     return(list(trainParams = trainParams, predictParams = predictParams))
70 70
 }
... ...
@@ -106,7 +106,6 @@
106 106
   tuneParams <- modellingParams@selectParams@tuneParams
107 107
   performanceType <- tuneParams[["performanceType"]]
108 108
   topNfeatures <- tuneParams[["nFeatures"]]
109
-  tuneMode <- ifelse("tuneMode" %in% names(tuneParams), tuneParams[["tuneMode"]], crossValParams@tuneMode)
110 109
   tuneParams <- tuneParams[-match(c("performanceType", "nFeatures"), names(tuneParams))] # Only used as evaluation metric.
111 110
   
112 111
   # Make selectParams NULL, since we are currently doing selection and it shouldn't call
... ...
@@ -136,7 +135,7 @@
136 135
     if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking.
137 136
       return(list(NULL, rankings[[1]], NULL))
138 137
     
139
-    if(tuneMode == "none") # No parameters to choose between.
138
+    if(crossValParams@tuneMode == "none") # No parameters to choose between.
140 139
         return(list(NULL, rankings[[1]], NULL))
141 140
     
142 141
     tuneParamsTrain <- list(topN = topNfeatures)
... ...
@@ -257,7 +256,7 @@
257 256
 
258 257
 # Code to create a function call to a training function. Might also do training and testing
259 258
 # within the same function, so test samples are also passed in case they are needed.
260
-.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose)
259
+.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose)
261 260
 {
262 261
   tuneDetails <- NULL
263 262
   if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams))
... ...
@@ -278,10 +277,10 @@
278 277
         
279 278
         predictions <- result[["predictions"]]
280 279
         if(class(predictions) == "data.frame")
281
-          predictedOutcome <- predictions[, "outcome"]
280
+          predictedOutcome <- predictions[, colnames(predictions) %in% c("class", "risk")]
282 281
         else
283 282
           predictedOutcome <- predictions
284
-        calcExternalPerformance(outcomeTest, predictedOutcome, performanceType)
283
+        calcExternalPerformance(outcomeTrain, predictedOutcome, performanceType)
285 284
       } else {
286 285
         result <- runTests(measurementsTrain, outcomeTrain,
287 286
                            crossValParams, modellingParams,
... ...
@@ -98,11 +98,11 @@ crossValidate(measurements, outcome, ...)
98 98
   ...
99 99
 )
100 100
 
101
-train.matrix(x, outcomeTrain, ...)
101
+\method{train}{matrix}(x, outcomeTrain, ...)
102 102
 
103
-train.data.frame(x, outcomeTrain, ...)
103
+\method{train}{data.frame}(x, outcomeTrain, ...)
104 104
 
105
-train.DataFrame(
105
+\method{train}{DataFrame}(
106 106
   x,
107 107
   outcomeTrain,
108 108
   classifier = "randomForest",
... ...
@@ -111,9 +111,9 @@ train.DataFrame(
111 111
   ...
112 112
 )
113 113
 
114
-train.list(x, outcomeTrain, ...)
114
+\method{train}{list}(x, outcomeTrain, ...)
115 115
 
116
-train.MultiAssayExperiment(x, outcomeColumns, ...)
116
+\method{train}{MultiAssayExperiment}(x, outcomeColumns, ...)
117 117
 
118 118
 \method{predict}{trainedByClassifyR}(object, newData, ...)
119 119
 }
120 120
deleted file mode 100644
121 121
Binary files a/src/coxformatrices.o and /dev/null differ