Browse code

- extraParams parameter of crossValidate allows any parameter in a predefined parameter set (in simpleParams.R) to be overwritten or unused ones to be set by the user. - .permutations private utility function which allows any position(s) to be fixed to a certain value.

Dario Strbenac authored on 10/02/2023 02:10:18
Showing 1 changed files
... ...
@@ -656,3 +656,26 @@ predict.dlda <- function(object, newdata, ...) { # Remove once sparsediscrim is
656 656
 .dmvnorm_diag <- function(x, mean, sigma) { # Remove once sparsediscrim is reinstated to CRAN.
657 657
   exp(sum(dnorm(x, mean=mean, sd=sqrt(sigma), log=TRUE)))
658 658
 }
659
+
660
+# Function to create permutations of a vector, with the possibility to restrict values at certain positions.
661
+# fixed parameter is a data frame with first column position and second column value.
662
+.permutations <- function(data, fixed = NULL)
663
+{
664
+  items <- length(data)
665
+  multipliedTo1 <- factorial(items)
666
+  if(items > 1) 
667
+    permutations <- structure(vapply(seq_along(data), function(index)
668
+                     rbind(data[index], .permutations(data[-index])), 
669
+                     data[rep(1L, multipliedTo1)]), dim = c(items, multipliedTo1))
670
+  else permutations <- data
671
+  
672
+  if(!is.null(fixed))
673
+  {
674
+    for(rowIndex in seq_len(nrow(fixed)))
675
+    {
676
+      keepColumns <- permutations[fixed[rowIndex, 1], ] == fixed[rowIndex, 2]
677
+      permutations <- permutations[, keepColumns]
678
+    }
679
+  }
680
+  permutations
681
+}
Browse code

- Minor modifications to package imports to avoid notes and warnings.

Dario Strbenac authored on 12/12/2022 23:55:11
Showing 1 changed files
... ...
@@ -130,7 +130,7 @@
130 130
       if(tuneCombo != "none") # Add real parameters before function call.
131 131
         paramList <- append(paramList, tuneCombo)
132 132
       if(attr(featureRanking, "name") == "randomSelection")
133
-        paramList <- append(paramList, nFeatures = topNfeatures)
133
+        paramList <- append(paramList, list(nFeatures = topNfeatures))
134 134
       do.call(featureRanking, paramList)
135 135
     })
136 136
 
Browse code

- randomSelection function added. - crissCrossValidate and crissCrossPlot contributions by Harry Robertson added and harmonised to ClassifyR code style. - selectionMethod and classifier defaults become "auto" for crossValidate. Previously only documented but not implemented in code. - predict method for standalone use now finds the correct prediction method for each trained model.

Dario Strbenac authored on 08/12/2022 06:15:07
Showing 1 changed files
... ...
@@ -129,10 +129,12 @@
129 129
       tuneCombo <- tuneCombosSelect[rowIndex, , drop = FALSE]
130 130
       if(tuneCombo != "none") # Add real parameters before function call.
131 131
         paramList <- append(paramList, tuneCombo)
132
+      if(attr(featureRanking, "name") == "randomSelection")
133
+        paramList <- append(paramList, nFeatures = topNfeatures)
132 134
       do.call(featureRanking, paramList)
133 135
     })
134 136
 
135
-    if(attr(featureRanking, "name") %in% c("previousSelection", "Union Selection")) # Actually selection not ranking.
137
+    if(attr(featureRanking, "name") %in% c("randomSelection", "previousSelection", "Union Selection")) # Actually selection not ranking.
136 138
       return(list(NULL, rankings[[1]], NULL))
137 139
     
138 140
     if(crossValParams@tuneMode == "none") # No parameters to choose between.
... ...
@@ -509,6 +511,8 @@
509 511
         "KS" = KolmogorovSmirnovRanking,
510 512
         "KL" = KullbackLeiblerRanking,
511 513
         "CoxPH" = coxphRanking,
514
+        "previousSelection" = previousSelection,
515
+        "randomSelection" = randomSelection,
512 516
         "selectMulti" = selectMulti
513 517
     )
514 518
 }
Browse code

- .doSelection modified so that feature selection doesn't happen twice for selectMulti function. - selectMulti fixed so correct indices are returned relative to the whole training data table, not to an individual assay table.

Dario Strbenac authored on 22/11/2022 04:52:30
Showing 1 changed files
... ...
@@ -131,8 +131,8 @@
131 131
         paramList <- append(paramList, tuneCombo)
132 132
       do.call(featureRanking, paramList)
133 133
     })
134
-    
135
-    if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking.
134
+
135
+    if(attr(featureRanking, "name") %in% c("previousSelection", "Union Selection")) # Actually selection not ranking.
136 136
       return(list(NULL, rankings[[1]], NULL))
137 137
     
138 138
     if(crossValParams@tuneMode == "none") # No parameters to choose between.
Browse code

- Minor fixes.

Dario Strbenac authored on 17/10/2022 03:30:05
Showing 1 changed files
... ...
@@ -139,6 +139,12 @@
139 139
         return(list(NULL, rankings[[1]], NULL))
140 140
     
141 141
     tuneParamsTrain <- list(topN = topNfeatures)
142
+    performanceIndex <- match("performanceType", names(modellingParams@trainParams@tuneParams))
143
+    if(!is.na(performanceIndex))
144
+    {
145
+      performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]]
146
+      modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-performanceIndex]
147
+    }
142 148
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
143 149
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
144 150
     modellingParams@trainParams@tuneParams <- NULL
Browse code

- Minor fixes to standalone train and predict functions for list of inputs.

Dario Strbenac authored on 14/10/2022 06:45:03
Showing 1 changed files
... ...
@@ -140,9 +140,9 @@
140 140
     
141 141
     tuneParamsTrain <- list(topN = topNfeatures)
142 142
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
143
-    tuneParamsTrain <- tuneParamsTrain[-match("performanceType", names(tuneParamsTrain))]
144 143
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
145 144
     modellingParams@trainParams@tuneParams <- NULL
145
+    
146 146
     allPerformanceTables <- lapply(rankings, function(rankingsVariety)
147 147
     {
148 148
       # Creates a matrix. Columns are top n features, rows are varieties (one row if None).
Browse code

- Typo in train.DataFrame variable fixed. - .predict for DLDA made renamed to DLDA to enable easy dispatch by predict method. - randomForest wrapper now uses ranger as the underlying package instead of randomForest.

Dario Strbenac authored on 14/10/2022 00:30:12
Showing 1 changed files
... ...
@@ -167,7 +167,7 @@
167 167
           result <- runTest(measurementsTrain, outcomeTrain, measurementsTrain, outcomeTrain,
168 168
                             crossValParams = NULL, modellingParams = modellingParams,
169 169
                             verbose = verbose, .iteration = "internal")
170
-          
170
+
171 171
           predictions <- result[["predictions"]]
172 172
           # Classifiers will use a column "class" and survival models will use a column "risk".
173 173
           if(class(predictions) == "data.frame")
... ...
@@ -222,6 +222,7 @@
222 222
                             measurementsTrain, outcomeTrain,
223 223
                             crossValParams = NULL, modellingParams,
224 224
                             verbose = verbose, .iteration = "internal")
225
+
225 226
           predictions <- result[["predictions"]]
226 227
           if(class(predictions) == "data.frame")
227 228
             predictedOutcome <- predictions[, "class"]
... ...
@@ -275,7 +276,7 @@
275 276
         result <- runTest(measurementsTrain, outcomeTrain, measurementsTrain, outcomeTrain,
276 277
                           crossValParams = NULL, modellingParams,
277 278
                           verbose = verbose, .iteration = "internal")
278
-        
279
+
279 280
         predictions <- result[["predictions"]]
280 281
         if(class(predictions) == "data.frame")
281 282
           predictedOutcome <- predictions[, colnames(predictions) %in% c("class", "risk")]
... ...
@@ -580,7 +581,8 @@
580 581
   obj
581 582
 }
582 583
 
583
-.predict <- function(object, newdata, ...) { # Remove once sparsediscrim is reinstated to CRAN.
584
+#' @method predict dlda
585
+predict.dlda <- function(object, newdata, ...) { # Remove once sparsediscrim is reinstated to CRAN.
584 586
   if (!inherits(object, "dlda"))  {
585 587
     stop("object not of class 'dlda'")
586 588
   }
Browse code

- Extreme gradient boosting wrapper added in interfaceXGB.R. - crossValidate gains performanceType parameter that defaults to "auto" which chooses depending on classification or survival task or can be set to any user-specified performance metric. - SelectParams default performanceType is now balanced accuracy, for consistency with other functions. - Vignette text updated to refer to balanced accuracy and to add k-NN and XGB classifiers to classifier table. - NEWS file updated for upcoming Bioconductor release.

Dario Strbenac authored on 12/10/2022 13:05:23
Showing 1 changed files
... ...
@@ -140,6 +140,7 @@
140 140
     
141 141
     tuneParamsTrain <- list(topN = topNfeatures)
142 142
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
143
+    tuneParamsTrain <- tuneParamsTrain[-match("performanceType", names(tuneParamsTrain))]
143 144
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
144 145
     modellingParams@trainParams@tuneParams <- NULL
145 146
     allPerformanceTables <- lapply(rankings, function(rankingsVariety)
... ...
@@ -511,6 +512,7 @@
511 512
         keyword,
512 513
         "randomForest" = RFparams(),
513 514
         "randomSurvivalForest" = RSFparams(),
515
+        "XGB" = XGBparams(),
514 516
         "GLM" = GLMparams(),
515 517
         "elasticNetGLM" = elasticNetGLMparams(),
516 518
         "SVM" = SVMparams(),
Browse code

- Restored utilities.R.

Dario Strbenac authored on 07/09/2022 00:48:49
Showing 1 changed files
... ...
@@ -1,152 +1,3 @@
1
-# Operates on an input data frame, to extract the outcome column(s) and return
2
-# a list with the table of covariates in one element and the outcome in another.
3
-# The outcome need to be removed from the data table before predictor training!
4
-.splitDataAndOutcome <- function(measurements, outcome, restrict = NULL)
5
-{ # DataFrame's outcome variable can be character or factor, so it's a bit involved.
6
-  if(is.character(outcome) && length(outcome) > 3 && length(outcome) != nrow(measurements))
7
-    stop("'outcome' is a character variable but has more than one element. Either provide a\n",
8
-         "       one to three column names or a factor of the same length as the number of samples.")
9
-
10
-  ## String specifies the name of a single outcome column, typically a class.
11
-  if(is.character(outcome) && length(outcome) == 1)
12
-  {
13
-    outcomeColumn <- match(outcome, colnames(measurements))
14
-    if(is.na(outcomeColumn))
15
-      stop("Specified column name of outcome is not present in the data table.")
16
-    outcome <- measurements[, outcomeColumn]
17
-    measurements <- measurements[, -outcomeColumn, drop = FALSE]
18
-    # R version 4 and greater no longer automatically casts character columns to factors because stringsAsFactors
19
-    # is FALSE by default, so it is more likely to be character format these days. Handle it.
20
-    if(class(outcome) != "factor") # Assume there will be no ordinary regression prediction tasks ... for now.
21
-      outcome <- factor(outcome)
22
-  }
23
-  
24
-  # survival's Surv constructor has two inputs for the popular right-censored data and
25
-  # three inputs for less-common interval data.
26
-  if(is.character(outcome) && length(outcome) %in% 2:3)
27
-  {
28
-    outcomeColumns <- match(outcome, colnames(measurements))
29
-    if(any(is.na(outcomeColumns)))
30
-      stop("Specified column names of outcome is not present in the data table.")
31
-    outcome <- measurements[, outcomeColumns]
32
-    measurements <- measurements[, -outcomeColumns, drop = FALSE]
33
-  }
34
-  
35
-  if(is(outcome, "factor") && length(outcome) > 3 & length(outcome) < nrow(measurements))
36
-    stop("The length of outcome is not equal to the number of samples.")
37
-  
38
-  ## A vector of characters was input by the user. Ensure that it is a factor.
39
-  if(is.character(outcome) & length(outcome) == nrow(measurements))
40
-    outcome <- factor(outcome)
41
-  
42
-  # Outcome has columns, so it is tabular. It is inferred to represent survival data.
43
-  if(!is.null(ncol(outcome)) && ncol(outcome) %in% 2:3)
44
-  {
45
-    # Assume that event status is in the last column (second for two columns, third for three columns)
46
-    numberEventTypes <- length(unique(outcome[, ncol(outcome)]))
47
-    # Could be one or two kinds of events. All events might be uncensored or censored
48
-    # in a rare but not impossible scenario.
49
-    if(numberEventTypes > 2)
50
-      stop("Number of distinct event types in the last column exceeds 2 but must be 1 or 2.")
51
-      
52
-    if(ncol(outcome) == 2) # Typical, right-censored survival data.
53
-      outcome <- survival::Surv(outcome[, 1], outcome[, 2])
54
-    else # Three columns. Therefore, counting process data.
55
-      outcome <- survival::Surv(outcome[, 1], outcome[, 2], outcome[, 3])
56
-  }
57
-  
58
-  if(!is.null(restrict))
59
-  {
60
-    isDesiredClass <- sapply(measurements, function(column) is(column, restrict))
61
-    measurements <- measurements[, isDesiredClass, drop = FALSE]
62
-    if(ncol(measurements) == 0)
63
-      stop(paste("No features are left after restricting to", restrict, "but at least one must be."))
64
-  }
65
-
66
-  list(measurements = measurements, outcome = outcome)
67
-}
68
-
69
-# Function to convert a MultiAssayExperiment object into a flat DataFrame table, to enable it
70
-# to be used in typical model building functions.
71
-# Returns a list with a covariate table and and outcome vector/table, or just a covariate table
72
-# in the case the input is a test data set.
73
-.MAEtoWideTable <- function(measurements, targets = NULL, outcomeColumns = NULL, restrict = "numeric")
74
-{
75
-  if(is.null(targets))
76
-    stop("'targets' is not specified but must be.")
77
-  if(is.null(outcomeColumns))
78
-    stop("'outcomeColumns' is not specified but must be.")    
79
-  if(!all(targets %in% c(names(measurements), "sampleInfo")))
80
-    stop("Some table names in 'targets' are not assay names in 'measurements' or \"sampleInfo\".")
81
-  sampleInfoColumns <- colnames(MultiAssayExperiment::colData(measurements))
82
-  if(!missing(outcomeColumns) & !all(outcomeColumns %in% sampleInfoColumns))
83
-    stop("Not all column names specified by 'outcomeColumns' found in sample information table.")  
84
-
85
-  if("sampleInfo" %in% targets)
86
-  {
87
-    targets <- targets[targets != "sampleInfo"]
88
-    sampleInfoColumnsTrain <- sampleInfoColumns
89
-  } else {
90
-    sampleInfoColumnsTrain <- NULL
91
-  }
92
-  
93
-  if(length(targets) > 0)
94
-  {
95
-    measurements <- measurements[, , targets]
96
-  
97
-    # Get all desired measurements tables and sample information columns (other than the columns representing outcome).
98
-    # These form the independent variables to be used for making predictions with.
99
-    # Variable names will have names like RNA:BRAF for traceability.
100
-    dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomeColumns), check.names = FALSE, collapse = ':')
101
-    rownames(dataTable) <- dataTable[, "primary"]
102
-    S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"])
103
-    colnames(S4Vectors::mcols(dataTable))[1] <- "assay"
104
-  
105
-    # Sample information variable names not included in column metadata of wide table but only as row names of it.
106
-    # Create a combined column named "feature" which has feature names of the assays as well as the sample information.
107
-    S4Vectors::mcols(dataTable)[, "feature"] <- as.character(S4Vectors::mcols(dataTable)[, "rowname"])
108
-    missingIndices <- is.na(S4Vectors::mcols(dataTable)[, "feature"])
109
-    S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices]
110
-    
111
-    # Finally, a column annotation recording variable name and which table it originated from for all of the source tables.
112
-    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")]
113
-  } else { # Must have only been sample information data.
114
-    dataTable <- MultiAssayExperiment::colData(measurements)
115
-  }
116
-  if(!is.null(outcomeColumns)) outcome <- dataTable[, outcomeColumns]
117
-  
118
-  if(!is.null(restrict))
119
-  {
120
-    isDesiredClass <- sapply(dataTable, function(column) is(column, restrict))
121
-    dataTable <- dataTable[, isDesiredClass, drop = FALSE]
122
-    if(ncol(dataTable) == 0)
123
-      stop(paste("No features are left after restricting to", restrict, "but at least one must be."))
124
-  }
125
-
126
-  # Only return independent variables in dataTable for making classifications with.
127
-  # "primary" column is auto-generated by sample information table row names and a duplicate.
128
-  dropColumns <- na.omit(match(c("primary", outcomeColumns), colnames(dataTable)))
129
-  if(length(dropColumns) > 0) dataTable <- dataTable[, -dropColumns]
130
-  
131
-  # Training data table and outcome for training data.
132
-  if(!is.null(outcomeColumns))
133
-      list(dataTable = dataTable, outcome = outcome)
134
-  else # Only test data table for test data input.
135
-    dataTable
136
-}
137
-
138
-# For classifiers which use one single function for inputting a training and a testing table,
139
-# and work only for the numeric data type, this checks whether the training and testing tables 
140
-# both have the same set of features and there are at least some numeric features to use,
141
-# after they have been filtered by another function which splits the covariates and the outcome from the input.
142
-.checkVariablesAndSame <- function(trainingMatrix, testingMatrix)
143
-{
144
-  if(ncol(trainingMatrix) == 0) # Filtering of table removed all columns, leaving nothing to classify with.
145
-    stop("No variables in data tables specified by \'targets\' are numeric.")
146
-  else if(ncol(trainingMatrix) != ncol(testingMatrix))
147
-    stop("Training data set and testing data set contain differing numbers of features.")  
148
-}
149
-
150 1
 # Creates two lists of lists. First has training samples, second has test samples for a range
151 2
 # of different cross-validation schemes.
152 3
 #' @import utils
... ...
@@ -255,7 +106,6 @@
255 106
   tuneParams <- modellingParams@selectParams@tuneParams
256 107
   performanceType <- tuneParams[["performanceType"]]
257 108
   topNfeatures <- tuneParams[["nFeatures"]]
258
-  tuneMode <- ifelse("tuneMode" %in% names(tuneParams), tuneParams[["tuneMode"]], crossValParams@tuneMode)
259 109
   tuneParams <- tuneParams[-match(c("performanceType", "nFeatures"), names(tuneParams))] # Only used as evaluation metric.
260 110
   
261 111
   # Make selectParams NULL, since we are currently doing selection and it shouldn't call
... ...
@@ -282,17 +132,17 @@
282 132
       do.call(featureRanking, paramList)
283 133
     })
284 134
     
285
-    if(featureRanking@generic == "previousSelection") # Actually selection not ranking.
135
+    if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking.
286 136
       return(list(NULL, rankings[[1]], NULL))
287 137
     
288
-    if(tuneMode == "none") # Actually selection not ranking.
138
+    if(crossValParams@tuneMode == "none") # No parameters to choose between.
289 139
         return(list(NULL, rankings[[1]], NULL))
290 140
     
291 141
     tuneParamsTrain <- list(topN = topNfeatures)
292 142
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
293 143
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
294 144
     modellingParams@trainParams@tuneParams <- NULL
295
-    bestPerformers <- sapply(rankings, function(rankingsVariety)
145
+    allPerformanceTables <- lapply(rankings, function(rankingsVariety)
296 146
     {
297 147
       # Creates a matrix. Columns are top n features, rows are varieties (one row if None).
298 148
       performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex)
... ...
@@ -332,20 +182,22 @@
332 182
        })
333 183
 
334 184
         bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
335
-        c(bestOne, performances[bestOne])
185
+        list(data.frame(tuneCombosTrain, performance = performances), bestOne)
336 186
       })
337 187
 
338
-      tunePick <- ifelse(betterValues == "lower", which.min(bestPerformers[2, ])[1], which.max(bestPerformers[2, ])[1])
188
+      tablesBestMetrics <- sapply(allPerformanceTables, function(tableIndexPair) tableIndexPair[[1]][tableIndexPair[[2]], "performance"])
189
+      tunePick <- ifelse(betterValues == "lower", which.min(tablesBestMetrics)[1], which.max(tablesBestMetrics)[1])
339 190
       
340 191
       if(verbose == 3)
341 192
          message("Features selected.")
342 193
       
343
-      tuneRow <- tuneCombosTrain[bestPerformers[1, tunePick], , drop  = FALSE]
344
-      if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL
194
+      tuneDetails <- allPerformanceTables[[tunePick]] # List of length 2.
345 195
       
346 196
       rankingUse <- rankings[[tunePick]]
347
-      selectionIndices <- rankingUse[1:tuneRow[, "topN"]]
197
+      selectionIndices <- rankingUse[1:(tuneDetails[[1]][tuneDetails[[2]], "topN"])]
348 198
       
199
+      names(tuneDetails) <- c("tuneCombinations", "bestIndex")
200
+      colnames(tuneDetails[[1]])[ncol(tuneDetails[[1]])] <- performanceType
349 201
       list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails)
350 202
     } else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection.
351 203
       featuresIndiciesLists <- mapply(function(selector, selParams)
... ...
@@ -388,8 +240,7 @@
388 240
       
389 241
       list(NULL, selectionIndices, NULL)
390 242
     } else { # Previous selection
391
-      selectedFeatures <- 
392
-      list(NULL, selectionIndices, NULL)
243
+      selectedFeatures <- list(NULL, selectionIndices, NULL)
393 244
     }
394 245
 }
395 246
 
... ...
@@ -405,10 +256,10 @@
405 256
 
406 257
 # Code to create a function call to a training function. Might also do training and testing
407 258
 # within the same function, so test samples are also passed in case they are needed.
408
-.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose)
259
+.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose)
409 260
 {
410
-  tuneChosen <- NULL
411
-  if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams@tuneParams))
261
+  tuneDetails <- NULL
262
+  if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams))
412 263
   {
413 264
     performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]]
414 265
     modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-match("performanceType", names(modellingParams@trainParams@tuneParams))]
... ...
@@ -420,16 +271,16 @@
420 271
       modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, as.list(tuneCombos[rowIndex, ]))
421 272
       if(crossValParams@tuneMode == "Resubstitution")
422 273
       {
423
-        result <- runTest(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
274
+        result <- runTest(measurementsTrain, outcomeTrain, measurementsTrain, outcomeTrain,
424 275
                           crossValParams = NULL, modellingParams,
425 276
                           verbose = verbose, .iteration = "internal")
426 277
         
427 278
         predictions <- result[["predictions"]]
428 279
         if(class(predictions) == "data.frame")
429
-          predictedOutcome <- predictions[, "outcome"]
280
+          predictedOutcome <- predictions[, colnames(predictions) %in% c("class", "risk")]
430 281
         else
431 282
           predictedOutcome <- predictions
432
-        calcExternalPerformance(outcomeTest, predictedOutcome, performanceType)
283
+        calcExternalPerformance(outcomeTrain, predictedOutcome, performanceType)
433 284
       } else {
434 285
         result <- runTests(measurementsTrain, outcomeTrain,
435 286
                            crossValParams, modellingParams,
... ...
@@ -438,13 +289,18 @@
438 289
         median(performances(result)[[performanceType]])
439 290
       }
440 291
     })
292
+    allPerformanceTable <- data.frame(tuneCombos, performances)
293
+    colnames(allPerformanceTable)[ncol(allPerformanceTable)] <- performanceType
294
+    
441 295
     betterValues <- .ClassifyRenvir[["performanceInfoTable"]][.ClassifyRenvir[["performanceInfoTable"]][, "type"] == performanceType, "better"]
442 296
     bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
443 297
     tuneChosen <- tuneCombos[bestOne, , drop = FALSE]
298
+    tuneDetails <- list(tuneCombos, bestOne)
299
+    names(tuneDetails) <- c("tuneCombinations", "bestIndex")
444 300
     modellingParams@trainParams@otherParams <- tuneChosen
445 301
   }
446 302
 
447
-  if(modellingParams@trainParams@classifier@generic != "previousTrained")
303
+    if (!"previousTrained" %in% attr(modellingParams@trainParams@classifier, "name")) 
448 304
     # Don't name these first two variables. Some classifier functions might use classesTrain and others use outcomeTrain.
449 305
     paramList <- list(measurementsTrain, outcomeTrain)
450 306
   else # Don't pass the measurements and classes, because a pre-existing classifier is used.
... ...
@@ -459,7 +315,7 @@
459 315
   if(verbose >= 2)
460 316
     message("Training completed.")  
461 317
   
462
-  list(model = trained, tune = tuneChosen)
318
+  list(model = trained, tune = tuneDetails)
463 319
 }
464 320
 
465 321
 # Creates a function call to a prediction function.
... ...
@@ -528,15 +384,6 @@
528 384
 # by user-specified values.
529 385
 .filterCharacteristics <- function(characteristics, autoCharacteristics)
530 386
 {
531
-  # Remove duplication of values for classifiers that have one function for training and 
532
-  # one function for prediction.
533
-  if("Classifier Name" %in% autoCharacteristics[, "characteristic"] && "Predictor Name" %in% autoCharacteristics[, "characteristic"])
534
-  {
535
-    classRow <- which(autoCharacteristics[, "characteristic"] == "Classifier Name")
536
-    predRow <- which(autoCharacteristics[, "characteristic"] == "Predictor Name")
537
-    if(autoCharacteristics[classRow, "value"] == autoCharacteristics[predRow, "value"])
538
-      autoCharacteristics <- autoCharacteristics[-predRow, ]
539
-  }
540 387
   # Overwrite automatically-chosen names with user's names.
541 388
   if(nrow(autoCharacteristics) > 0 && nrow(characteristics) > 0)
542 389
   {
... ...
@@ -560,36 +407,6 @@
560 407
   plotData
561 408
 }
562 409
 
563
-# Summary of the features used and the total number of them, no matter if they are a simple type
564
-# or something more complex like Pairs or feature sets.
565
-.summaryFeatures <- function(measurements)
566
-{
567
-  # MultiAssayExperiment has feature details in mcols.
568
-  if(!is.null(S4Vectors::mcols(measurements)))
569
-  {
570
-    originalInfo <- S4Vectors::mcols(measurements)
571
-    featureNames <- S4Vectors::mcols(measurements)[, "feature"]
572
-    assays <- unique(S4Vectors::mcols(measurements)[, "assay"])
573
-    renamedInfo <- S4Vectors::mcols(measurements)
574
-    renamedAssays <- paste("Assay", seq_along(assays), sep = '')
575
-    for(assay in assays)
576
-    {
577
-      rowsAssay <- which(renamedInfo[, "assay"] == assay)
578
-      renamedInfo[rowsAssay, "feature"] <- paste("Feature", seq_along(rowsAssay), sep = '')
579
-      renamedInfo[rowsAssay, "assay"] <- renamedAssays[match(assay, assays)]
580
-    }
581
-    featuresInfo <- S4Vectors::DataFrame(originalInfo, renamedInfo)
582
-    colnames(featuresInfo) <- c("Original Assay", "Original Feature", "Renamed Assay", "Renamed Feature")
583
-    featuresInfo <- cbind(originalInfo, featuresInfo)
584
-  } else {
585
-    originalFeatures <- colnames(measurements)
586
-    renamedInfo <- paste("Feature", seq_along(measurements), sep = '')
587
-    featuresInfo <- S4Vectors::DataFrame(originalFeatures, renamedInfo)
588
-    colnames(featuresInfo) <- c("Original Feature", "Renamed Feature")
589
-  }
590
-  featuresInfo
591
-}
592
-
593 410
 # Function to identify the parameters of an S4 method.
594 411
 .methodFormals <- function(f, signature) {
595 412
   tryCatch({
... ...
@@ -660,6 +477,53 @@
660 477
   list(measurementsTrain = measurementsTrain, classesTrain = classesTrain)
661 478
 }
662 479
 
480
+.transformKeywordToFunction <- function(keyword)
481
+{
482
+  switch(
483
+        keyword,
484
+        "none" = NULL,
485
+        "diffLoc" = subtractFromLocation
486
+    )
487
+}
488
+
489
+.selectionKeywordToFunction <- function(keyword)
490
+{
491
+  switch(
492
+        keyword,
493
+        "none" = NULL,
494
+        "t-test" = differentMeansRanking,
495
+        "limma" = limmaRanking,
496
+        "edgeR" = edgeRranking,
497
+        "Bartlett" = bartlettRanking,
498
+        "Levene" = leveneRanking,
499
+        "DMD" = DMDranking,
500
+        "likelihoodRatio" = likelihoodRatioRanking,
501
+        "KS" = KolmogorovSmirnovRanking,
502
+        "KL" = KullbackLeiblerRanking,
503
+        "CoxPH" = coxphRanking,
504
+        "selectMulti" = selectMulti
505
+    )
506
+}
507
+
508
+.classifierKeywordToParams <- function(keyword)
509
+{
510
+    switch(
511
+        keyword,
512
+        "randomForest" = RFparams(),
513
+        "randomSurvivalForest" = RSFparams(),
514
+        "GLM" = GLMparams(),
515
+        "elasticNetGLM" = elasticNetGLMparams(),
516
+        "SVM" = SVMparams(),
517
+        "NSC" = NSCparams(),
518
+        "DLDA" = DLDAparams(),
519
+        "naiveBayes" = naiveBayesParams(),
520
+        "mixturesNormals" = mixModelsParams(),
521
+        "kNN" = kNNparams(),
522
+        "CoxPH" = coxphParams(),
523
+        "CoxNet" = coxnetParams()
524
+    )    
525
+}
526
+
663 527
 .dlda <- function(x, y, prior = NULL){ # Remove this once sparsediscrim is reinstated to CRAN.
664 528
   obj <- list()
665 529
   obj$labels <- y
Browse code

export colCoxTests

Ellis Patrick authored on 06/09/2022 22:57:36
Showing 1 changed files
... ...
@@ -1,3 +1,152 @@
1
+# Operates on an input data frame, to extract the outcome column(s) and return
2
+# a list with the table of covariates in one element and the outcome in another.
3
+# The outcome need to be removed from the data table before predictor training!
4
+.splitDataAndOutcome <- function(measurements, outcome, restrict = NULL)
5
+{ # DataFrame's outcome variable can be character or factor, so it's a bit involved.
6
+  if(is.character(outcome) && length(outcome) > 3 && length(outcome) != nrow(measurements))
7
+    stop("'outcome' is a character variable but has more than one element. Either provide a\n",
8
+         "       one to three column names or a factor of the same length as the number of samples.")
9
+
10
+  ## String specifies the name of a single outcome column, typically a class.
11
+  if(is.character(outcome) && length(outcome) == 1)
12
+  {
13
+    outcomeColumn <- match(outcome, colnames(measurements))
14
+    if(is.na(outcomeColumn))
15
+      stop("Specified column name of outcome is not present in the data table.")
16
+    outcome <- measurements[, outcomeColumn]
17
+    measurements <- measurements[, -outcomeColumn, drop = FALSE]
18
+    # R version 4 and greater no longer automatically casts character columns to factors because stringsAsFactors
19
+    # is FALSE by default, so it is more likely to be character format these days. Handle it.
20
+    if(class(outcome) != "factor") # Assume there will be no ordinary regression prediction tasks ... for now.
21
+      outcome <- factor(outcome)
22
+  }
23
+  
24
+  # survival's Surv constructor has two inputs for the popular right-censored data and
25
+  # three inputs for less-common interval data.
26
+  if(is.character(outcome) && length(outcome) %in% 2:3)
27
+  {
28
+    outcomeColumns <- match(outcome, colnames(measurements))
29
+    if(any(is.na(outcomeColumns)))
30
+      stop("Specified column names of outcome is not present in the data table.")
31
+    outcome <- measurements[, outcomeColumns]
32
+    measurements <- measurements[, -outcomeColumns, drop = FALSE]
33
+  }
34
+  
35
+  if(is(outcome, "factor") && length(outcome) > 3 & length(outcome) < nrow(measurements))
36
+    stop("The length of outcome is not equal to the number of samples.")
37
+  
38
+  ## A vector of characters was input by the user. Ensure that it is a factor.
39
+  if(is.character(outcome) & length(outcome) == nrow(measurements))
40
+    outcome <- factor(outcome)
41
+  
42
+  # Outcome has columns, so it is tabular. It is inferred to represent survival data.
43
+  if(!is.null(ncol(outcome)) && ncol(outcome) %in% 2:3)
44
+  {
45
+    # Assume that event status is in the last column (second for two columns, third for three columns)
46
+    numberEventTypes <- length(unique(outcome[, ncol(outcome)]))
47
+    # Could be one or two kinds of events. All events might be uncensored or censored
48
+    # in a rare but not impossible scenario.
49
+    if(numberEventTypes > 2)
50
+      stop("Number of distinct event types in the last column exceeds 2 but must be 1 or 2.")
51
+      
52
+    if(ncol(outcome) == 2) # Typical, right-censored survival data.
53
+      outcome <- survival::Surv(outcome[, 1], outcome[, 2])
54
+    else # Three columns. Therefore, counting process data.
55
+      outcome <- survival::Surv(outcome[, 1], outcome[, 2], outcome[, 3])
56
+  }
57
+  
58
+  if(!is.null(restrict))
59
+  {
60
+    isDesiredClass <- sapply(measurements, function(column) is(column, restrict))
61
+    measurements <- measurements[, isDesiredClass, drop = FALSE]
62
+    if(ncol(measurements) == 0)
63
+      stop(paste("No features are left after restricting to", restrict, "but at least one must be."))
64
+  }
65
+
66
+  list(measurements = measurements, outcome = outcome)
67
+}
68
+
69
+# Function to convert a MultiAssayExperiment object into a flat DataFrame table, to enable it
70
+# to be used in typical model building functions.
71
+# Returns a list with a covariate table and and outcome vector/table, or just a covariate table
72
+# in the case the input is a test data set.
73
+.MAEtoWideTable <- function(measurements, targets = NULL, outcomeColumns = NULL, restrict = "numeric")
74
+{
75
+  if(is.null(targets))
76
+    stop("'targets' is not specified but must be.")
77
+  if(is.null(outcomeColumns))
78
+    stop("'outcomeColumns' is not specified but must be.")    
79
+  if(!all(targets %in% c(names(measurements), "sampleInfo")))
80
+    stop("Some table names in 'targets' are not assay names in 'measurements' or \"sampleInfo\".")
81
+  sampleInfoColumns <- colnames(MultiAssayExperiment::colData(measurements))
82
+  if(!missing(outcomeColumns) & !all(outcomeColumns %in% sampleInfoColumns))
83
+    stop("Not all column names specified by 'outcomeColumns' found in sample information table.")  
84
+
85
+  if("sampleInfo" %in% targets)
86
+  {
87
+    targets <- targets[targets != "sampleInfo"]
88
+    sampleInfoColumnsTrain <- sampleInfoColumns
89
+  } else {
90
+    sampleInfoColumnsTrain <- NULL
91
+  }
92
+  
93
+  if(length(targets) > 0)
94
+  {
95
+    measurements <- measurements[, , targets]
96
+  
97
+    # Get all desired measurements tables and sample information columns (other than the columns representing outcome).
98
+    # These form the independent variables to be used for making predictions with.
99
+    # Variable names will have names like RNA:BRAF for traceability.
100
+    dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomeColumns), check.names = FALSE, collapse = ':')
101
+    rownames(dataTable) <- dataTable[, "primary"]
102
+    S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"])
103
+    colnames(S4Vectors::mcols(dataTable))[1] <- "assay"
104
+  
105
+    # Sample information variable names not included in column metadata of wide table but only as row names of it.
106
+    # Create a combined column named "feature" which has feature names of the assays as well as the sample information.
107
+    S4Vectors::mcols(dataTable)[, "feature"] <- as.character(S4Vectors::mcols(dataTable)[, "rowname"])
108
+    missingIndices <- is.na(S4Vectors::mcols(dataTable)[, "feature"])
109
+    S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices]
110
+    
111
+    # Finally, a column annotation recording variable name and which table it originated from for all of the source tables.
112
+    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")]
113
+  } else { # Must have only been sample information data.
114
+    dataTable <- MultiAssayExperiment::colData(measurements)
115
+  }
116
+  if(!is.null(outcomeColumns)) outcome <- dataTable[, outcomeColumns]
117
+  
118
+  if(!is.null(restrict))
119
+  {
120
+    isDesiredClass <- sapply(dataTable, function(column) is(column, restrict))
121
+    dataTable <- dataTable[, isDesiredClass, drop = FALSE]
122
+    if(ncol(dataTable) == 0)
123
+      stop(paste("No features are left after restricting to", restrict, "but at least one must be."))
124
+  }
125
+
126
+  # Only return independent variables in dataTable for making classifications with.
127
+  # "primary" column is auto-generated by sample information table row names and a duplicate.
128
+  dropColumns <- na.omit(match(c("primary", outcomeColumns), colnames(dataTable)))
129
+  if(length(dropColumns) > 0) dataTable <- dataTable[, -dropColumns]
130
+  
131
+  # Training data table and outcome for training data.
132
+  if(!is.null(outcomeColumns))
133
+      list(dataTable = dataTable, outcome = outcome)
134
+  else # Only test data table for test data input.
135
+    dataTable
136
+}
137
+
138
+# For classifiers which use one single function for inputting a training and a testing table,
139
+# and work only for the numeric data type, this checks whether the training and testing tables 
140
+# both have the same set of features and there are at least some numeric features to use,
141
+# after they have been filtered by another function which splits the covariates and the outcome from the input.
142
+.checkVariablesAndSame <- function(trainingMatrix, testingMatrix)
143
+{
144
+  if(ncol(trainingMatrix) == 0) # Filtering of table removed all columns, leaving nothing to classify with.
145
+    stop("No variables in data tables specified by \'targets\' are numeric.")
146
+  else if(ncol(trainingMatrix) != ncol(testingMatrix))
147
+    stop("Training data set and testing data set contain differing numbers of features.")  
148
+}
149
+
1 150
 # Creates two lists of lists. First has training samples, second has test samples for a range
2 151
 # of different cross-validation schemes.
3 152
 #' @import utils
... ...
@@ -106,6 +255,7 @@
106 255
   tuneParams <- modellingParams@selectParams@tuneParams
107 256
   performanceType <- tuneParams[["performanceType"]]
108 257
   topNfeatures <- tuneParams[["nFeatures"]]
258
+  tuneMode <- ifelse("tuneMode" %in% names(tuneParams), tuneParams[["tuneMode"]], crossValParams@tuneMode)
109 259
   tuneParams <- tuneParams[-match(c("performanceType", "nFeatures"), names(tuneParams))] # Only used as evaluation metric.
110 260
   
111 261
   # Make selectParams NULL, since we are currently doing selection and it shouldn't call
... ...
@@ -132,17 +282,17 @@
132 282
       do.call(featureRanking, paramList)
133 283
     })
134 284
     
135
-    if(attr(featureRanking, "name") == "previousSelection") # Actually selection not ranking.
285
+    if(featureRanking@generic == "previousSelection") # Actually selection not ranking.
136 286
       return(list(NULL, rankings[[1]], NULL))
137 287
     
138
-    if(crossValParams@tuneMode == "none") # No parameters to choose between.
288
+    if(tuneMode == "none") # Actually selection not ranking.
139 289
         return(list(NULL, rankings[[1]], NULL))
140 290
     
141 291
     tuneParamsTrain <- list(topN = topNfeatures)
142 292
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
143 293
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
144 294
     modellingParams@trainParams@tuneParams <- NULL
145
-    allPerformanceTables <- lapply(rankings, function(rankingsVariety)
295
+    bestPerformers <- sapply(rankings, function(rankingsVariety)
146 296
     {
147 297
       # Creates a matrix. Columns are top n features, rows are varieties (one row if None).
148 298
       performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex)
... ...
@@ -182,22 +332,20 @@
182 332
        })
183 333
 
184 334
         bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
185
-        list(data.frame(tuneCombosTrain, performance = performances), bestOne)
335
+        c(bestOne, performances[bestOne])
186 336
       })
187 337
 
188
-      tablesBestMetrics <- sapply(allPerformanceTables, function(tableIndexPair) tableIndexPair[[1]][tableIndexPair[[2]], "performance"])
189
-      tunePick <- ifelse(betterValues == "lower", which.min(tablesBestMetrics)[1], which.max(tablesBestMetrics)[1])
338
+      tunePick <- ifelse(betterValues == "lower", which.min(bestPerformers[2, ])[1], which.max(bestPerformers[2, ])[1])
190 339
       
191 340
       if(verbose == 3)
192 341
          message("Features selected.")
193 342
       
194
-      tuneDetails <- allPerformanceTables[[tunePick]] # List of length 2.
343
+      tuneRow <- tuneCombosTrain[bestPerformers[1, tunePick], , drop  = FALSE]
344
+      if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL
195 345
       
196 346
       rankingUse <- rankings[[tunePick]]
197
-      selectionIndices <- rankingUse[1:(tuneDetails[[1]][tuneDetails[[2]], "topN"])]
347
+      selectionIndices <- rankingUse[1:tuneRow[, "topN"]]
198 348
       
199
-      names(tuneDetails) <- c("tuneCombinations", "bestIndex")
200
-      colnames(tuneDetails[[1]])[ncol(tuneDetails[[1]])] <- performanceType
201 349
       list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails)
202 350
     } else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection.
203 351
       featuresIndiciesLists <- mapply(function(selector, selParams)
... ...
@@ -240,7 +388,8 @@
240 388
       
241 389
       list(NULL, selectionIndices, NULL)
242 390
     } else { # Previous selection
243
-      selectedFeatures <- list(NULL, selectionIndices, NULL)
391
+      selectedFeatures <- 
392
+      list(NULL, selectionIndices, NULL)
244 393
     }
245 394
 }
246 395
 
... ...
@@ -256,10 +405,10 @@
256 405
 
257 406
 # Code to create a function call to a training function. Might also do training and testing
258 407
 # within the same function, so test samples are also passed in case they are needed.
259
-.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, crossValParams, modellingParams, verbose)
408
+.doTrain <- function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest, modellingParams, verbose)
260 409
 {
261
-  tuneDetails <- NULL
262
-  if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams))
410
+  tuneChosen <- NULL
411
+  if(!is.null(modellingParams@trainParams@tuneParams) && is.null(modellingParams@selectParams@tuneParams))
263 412
   {
264 413
     performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]]
265 414
     modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-match("performanceType", names(modellingParams@trainParams@tuneParams))]
... ...
@@ -271,16 +420,16 @@
271 420
       modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, as.list(tuneCombos[rowIndex, ]))
272 421
       if(crossValParams@tuneMode == "Resubstitution")
273 422
       {
274
-        result <- runTest(measurementsTrain, outcomeTrain, measurementsTrain, outcomeTrain,
423
+        result <- runTest(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
275 424
                           crossValParams = NULL, modellingParams,
276 425
                           verbose = verbose, .iteration = "internal")
277 426
         
278 427
         predictions <- result[["predictions"]]
279 428
         if(class(predictions) == "data.frame")
280
-          predictedOutcome <- predictions[, colnames(predictions) %in% c("class", "risk")]
429
+          predictedOutcome <- predictions[, "outcome"]
281 430
         else
282 431
           predictedOutcome <- predictions
283
-        calcExternalPerformance(outcomeTrain, predictedOutcome, performanceType)
432
+        calcExternalPerformance(outcomeTest, predictedOutcome, performanceType)
284 433
       } else {
285 434
         result <- runTests(measurementsTrain, outcomeTrain,
286 435
                            crossValParams, modellingParams,
... ...
@@ -289,18 +438,13 @@
289 438
         median(performances(result)[[performanceType]])
290 439
       }
291 440
     })
292
-    allPerformanceTable <- data.frame(tuneCombos, performances)
293
-    colnames(allPerformanceTable)[ncol(allPerformanceTable)] <- performanceType
294
-    
295 441
     betterValues <- .ClassifyRenvir[["performanceInfoTable"]][.ClassifyRenvir[["performanceInfoTable"]][, "type"] == performanceType, "better"]
296 442
     bestOne <- ifelse(betterValues == "lower", which.min(performances)[1], which.max(performances)[1])
297 443
     tuneChosen <- tuneCombos[bestOne, , drop = FALSE]
298
-    tuneDetails <- list(tuneCombos, bestOne)
299
-    names(tuneDetails) <- c("tuneCombinations", "bestIndex")
300 444
     modellingParams@trainParams@otherParams <- tuneChosen
301 445
   }
302 446
 
303
-    if (!"previousTrained" %in% attr(modellingParams@trainParams@classifier, "name")) 
447
+  if(modellingParams@trainParams@classifier@generic != "previousTrained")
304 448
     # Don't name these first two variables. Some classifier functions might use classesTrain and others use outcomeTrain.
305 449
     paramList <- list(measurementsTrain, outcomeTrain)
306 450
   else # Don't pass the measurements and classes, because a pre-existing classifier is used.
... ...
@@ -315,7 +459,7 @@
315 459
   if(verbose >= 2)
316 460
     message("Training completed.")  
317 461
   
318
-  list(model = trained, tune = tuneDetails)
462
+  list(model = trained, tune = tuneChosen)
319 463
 }
320 464
 
321 465
 # Creates a function call to a prediction function.
... ...
@@ -384,6 +528,15 @@
384 528
 # by user-specified values.
385 529
 .filterCharacteristics <- function(characteristics, autoCharacteristics)
386 530
 {
531
+  # Remove duplication of values for classifiers that have one function for training and 
532
+  # one function for prediction.
533
+  if("Classifier Name" %in% autoCharacteristics[, "characteristic"] && "Predictor Name" %in% autoCharacteristics[, "characteristic"])
534
+  {
535
+    classRow <- which(autoCharacteristics[, "characteristic"] == "Classifier Name")
536
+    predRow <- which(autoCharacteristics[, "characteristic"] == "Predictor Name")
537
+    if(autoCharacteristics[classRow, "value"] == autoCharacteristics[predRow, "value"])
538
+      autoCharacteristics <- autoCharacteristics[-predRow, ]
539
+  }
387 540
   # Overwrite automatically-chosen names with user's names.
388 541
   if(nrow(autoCharacteristics) > 0 && nrow(characteristics) > 0)
389 542
   {
... ...
@@ -407,6 +560,36 @@
407 560
   plotData
408 561
 }
409 562
 
563
+# Summary of the features used and the total number of them, no matter if they are a simple type
564
+# or something more complex like Pairs or feature sets.
565
+.summaryFeatures <- function(measurements)
566
+{
567
+  # MultiAssayExperiment has feature details in mcols.
568
+  if(!is.null(S4Vectors::mcols(measurements)))
569
+  {
570
+    originalInfo <- S4Vectors::mcols(measurements)
571
+    featureNames <- S4Vectors::mcols(measurements)[, "feature"]
572
+    assays <- unique(S4Vectors::mcols(measurements)[, "assay"])
573
+    renamedInfo <- S4Vectors::mcols(measurements)
574
+    renamedAssays <- paste("Assay", seq_along(assays), sep = '')
575
+    for(assay in assays)
576
+    {
577
+      rowsAssay <- which(renamedInfo[, "assay"] == assay)
578
+      renamedInfo[rowsAssay, "feature"] <- paste("Feature", seq_along(rowsAssay), sep = '')
579
+      renamedInfo[rowsAssay, "assay"] <- renamedAssays[match(assay, assays)]
580
+    }
581
+    featuresInfo <- S4Vectors::DataFrame(originalInfo, renamedInfo)
582
+    colnames(featuresInfo) <- c("Original Assay", "Original Feature", "Renamed Assay", "Renamed Feature")
583
+    featuresInfo <- cbind(originalInfo, featuresInfo)
584
+  } else {
585
+    originalFeatures <- colnames(measurements)
586
+    renamedInfo <- paste("Feature", seq_along(measurements), sep = '')
587
+    featuresInfo <- S4Vectors::DataFrame(originalFeatures, renamedInfo)
588
+    colnames(featuresInfo) <- c("Original Feature", "Renamed Feature")
589
+  }
590
+  featuresInfo
591
+}
592
+
410 593
 # Function to identify the parameters of an S4 method.
411 594
 .methodFormals <- function(f, signature) {
412 595
   tryCatch({
... ...
@@ -477,53 +660,6 @@
477 660
   list(measurementsTrain = measurementsTrain, classesTrain = classesTrain)
478 661
 }
479 662
 
480
-.transformKeywordToFunction <- function(keyword)
481
-{
482
-  switch(
483
-        keyword,
484
-        "none" = NULL,
485
-        "diffLoc" = subtractFromLocation
486
-    )
487
-}
488
-
489
-.selectionKeywordToFunction <- function(keyword)
490