Browse code

Merge pull request #53 from SydneyBioX/master

Recent fixes and improvements

Dario Strbenac authored on 17/11/2022 09:43:39 • GitHub committed on 17/11/2022 09:43:39
Showing 135 changed files

... ...
@@ -1,2 +1,10 @@
1
-^.*\.Rproj$
2
-^\.Rproj\.user$
1
+^.*\.Rproj$
2
+^\.Rproj\.user$
3
+^_pkgdown\.yml$
4
+^docs$
5
+^pkgdown$
6
+vignettes/introduction.Rmd
7
+vignettes/performanceEvaluation.Rmd
8
+vignettes/multiViewMethods.Rmd
9
+vignettes/incorporateNew.Rmd
10
+^\.github$
... ...
@@ -1,4 +1,8 @@
1
-.Rproj.user
2
-.Rhistory
3
-.RData
4
-.Ruserdata
1
+.Rproj.user
2
+.Rhistory
3
+.RData
4
+.Ruserdata
5
+
6
+
7
+# vscode stuff
8
+.vscode
5 9
\ No newline at end of file
... ...
@@ -3,10 +3,19 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.3.0
7
-Date: 2022-10-25
8
-Author: Dario Strbenac, Ellis Patrick, Sourish Iyengar, Harry Robertson, Andy Tran, John Ormerod, Graham Mann, Jean Yang
9
-Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
6
+Version: 3.3.3
7
+Date: 2022-11-17
8
+Authors@R:
9
+    c(
10
+    person(given = "Dario", family = "Strbenac", email = "dario.strbenac@sydney.edu.au", role = c("aut", "cre")),
11
+    person(given = "Ellis", family = "Patrick", role = "aut"),
12
+    person(given = "Sourish", family = "Iyengar", role = "aut"),
13
+    person(given = "Harry", family = "Robertson", role = "aut"),
14
+    person(given = "Andy", family = "Tran", role = "aut"),
15
+    person(given = "John", family = "Ormerod", role = "aut"),
16
+    person(given = "Graham", family = "Mann", role = "aut"),
17
+    person(given = "Jean", family = "Yang", email = "jean.yang@sydney.edu.au", role = "aut")
18
+    )
10 19
 VignetteBuilder: knitr
11 20
 Encoding: UTF-8
12 21
 biocViews: Classification, Survival
... ...
@@ -86,3 +95,4 @@ Collate:
86 95
     'simpleParams.R'
87 96
     'subtractFromLocation.R'
88 97
     'utilities.R'
98
+URL: https://sydneybiox.github.io/ClassifyR/
... ...
@@ -27,8 +27,6 @@ export(distribution)
27 27
 export(edgesToHubNetworks)
28 28
 export(featureSetSummary)
29 29
 export(finalModel)
30
-export(generateCrossValParams)
31
-export(generateModellingParams)
32 30
 export(interactorDifferences)
33 31
 export(models)
34 32
 export(performance)
... ...
@@ -87,7 +85,6 @@ exportMethods(selectionPlot)
87 85
 exportMethods(show)
88 86
 exportMethods(totalPredictions)
89 87
 exportMethods(tunedParameters)
90
-import(BiocParallel)
91 88
 import(MultiAssayExperiment)
92 89
 import(grid)
93 90
 import(methods)
... ...
@@ -86,7 +86,7 @@ setMethod("ROCplot", "ClassifyResult", function(results, ...) {
86 86
 
87 87
 #' @rdname ROCplot
88 88
 #' @export
89
-setMethod("ROCplot", "list", 
89
+setMethod("ROCplot", "list",
90 90
           function(results, mode = c("merge", "average"), interval = 95,
91 91
                    comparison = "auto", lineColours = "auto",
92 92
                    lineWidth = 1, fontSizes = c(24, 16, 12, 12, 12), labelPositions = seq(0.0, 1.0, 0.2),
... ...
@@ -101,10 +101,18 @@ setMethod("ROCplot", "list",
101 101
   if(comparison == "auto")
102 102
   {
103 103
     if(max(characteristicsCounts) == length(results))
104
-      comparison <- names(characteristicsCounts)[characteristicsCounts == max(characteristicsCounts)][1]
105
-    else
104
+    { # Choose a characteristic which varies the most across the results.
105
+      candidates <- names(characteristicsCounts)[characteristicsCounts == length(results)]
106
+      allCharacteristics <- do.call(rbind, lapply(results, function(result) result@characteristics))
107
+      distinctValues <- by(allCharacteristics[, "value"], allCharacteristics[, "characteristic"], function(values) length(unique(values)))
108
+      comparison <- names(distinctValues)[which.max(distinctValues)][1]
109
+    } else {
106 110
       stop("No characteristic is present for all results but must be.")
111
+    }
107 112
   }
113
+  resultsWithComparison <- sum(sapply(results, function(result) any(result@characteristics[, "characteristic"] == comparison)))
114
+  if(resultsWithComparison < length(results))
115
+    stop("Not all results have comparison characteristic ", comparison, ' but need to.')
108 116
                
109 117
   ggplot2::theme_set(ggplot2::theme_classic() + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA)))
110 118
   distinctClasses <- levels(actualOutcome(results[[1]]))
... ...
@@ -113,6 +113,15 @@ setMethod("calcExternalPerformance", c("Surv", "numeric"),
113 113
             .calcPerformance(actualOutcome, predictedOutcome, performanceType = performanceType)[["values"]]
114 114
           })
115 115
 
116
+#' @rdname calcPerformance
117
+#' @exportMethod calcExternalPerformance
118
+setMethod("calcExternalPerformance", c("factor", "tabular"), # table has class probabilities per sample.
119
+          function(actualOutcome, predictedOutcome, performanceType = "AUC")
120
+          {
121
+            performanceType <- match.arg(performanceType)
122
+            .calcPerformance(actualOutcome, predictedOutcome, performanceType = performanceType)[["values"]]
123
+          })
124
+
116 125
 #' @rdname calcPerformance
117 126
 #' @usage NULL
118 127
 #' @export
... ...
@@ -11,22 +11,26 @@
11 11
 #' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the
12 12
 #' column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of
13 13
 #' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. If \code{measurements} is a
14
-#' \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.
14
+#' \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.  If column names
15
+#' of survival information, time must be in first column and event status in the second.
15 16
 #' @param outcomeTrain For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or
16 17
 #' a character string, or vector of such strings, containing column name(s) of column(s)
17
-#' containing either classes or time and event information about survival.
18
+#' containing either classes or time and event information about survival. If column names
19
+#' of survival information, time must be in first column and event status in the second.
18 20
 #' @param ... Parameters passed into \code{\link{prepareData}} which control subsetting and filtering of input data.
19 21
 #' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons
20 22
 #' or assays. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple assays, 
21 23
 #' a different number of features will be used for each assay. If a named list of vectors, the respective number of features will be optimised over. 
22 24
 #' Set to NULL or "all" if all features should be used.
23
-#' @param selectionMethod A character vector of feature selection methods to compare. If a named character vector with names corresponding to different assays, 
24
-#' and performing multiview classification, the respective classification methods will be used on each assay.
25
+#' @param selectionMethod Default: "auto". A character vector of feature selection methods to compare. If a named character vector with names corresponding to different assays, 
26
+#' and performing multiview classification, the respective classification methods will be used on each assay. If \code{"auto"} t-test (two categories) / F-test (three or more categories) ranking
27
+#' and top \code{nFeatures} optimisation is done. Otherwise, the ranking method is per-feature Cox proportional hazards p-value.
25 28
 #' @param selectionOptimisation A character of "Resubstitution", "Nested CV" or "none" specifying the approach used to optimise \code{nFeatures}.
26
-#' @param performanceType Default: \code{"auto"}. If \code{"auto"}, then balanced accuracy for classification or C-index for survival. Any one of the
29
+#' @param performanceType Default: \code{"auto"}. If \code{"auto"}, then balanced accuracy for classification or C-index for survival. Otherwise, any one of the
27 30
 #' options described in \code{\link{calcPerformance}} may otherwise be specified.
28
-#' @param classifier A character vector of classification methods to compare. If a named character vector with names corresponding to different assays, 
29
-#' and performing multiview classification, the respective classification methods will be used on each assay.
31
+#' @param classifier Default: \code{"auto"}. A character vector of classification methods to compare. If a named character vector with names corresponding to different assays, 
32
+#' and performing multiview classification, the respective classification methods will be used on each assay. If \code{"auto"}, then a random forest is used for a classification
33
+#' task or Cox proportional hazards model for a survival task.
30 34
 #' @param multiViewMethod A character vector specifying the multiview method or data integration approach to use.
31 35
 #' @param assayCombinations A character vector or list of character vectors proposing the assays or, in the case of a list, combination of assays to use
32 36
 #' with each element being a vector of assays to combine. Special value \code{"all"} means all possible subsets of assays.
... ...
@@ -108,12 +112,14 @@ setMethod("crossValidate", "DataFrame",
108 112
               if(!performanceType %in% c("auto", .ClassifyRenvir[["performanceTypes"]]))
109 113
                 stop(paste("performanceType must be one of", paste(c("auto", .ClassifyRenvir[["performanceTypes"]]), collapse = ", "), "but is", performanceType))
110 114
               
115
+              isCategorical <- is.character(outcome) && (length(outcome) == 1 || length(outcome) == nrow(measurements)) || is.factor(outcome)
111 116
               if(performanceType == "auto")
112
-              {
113
-                if(is.character(outcome) && (length(outcome) == 1 || length(outcome) == nrow(measurements)) || is.factor(outcome))
114
-                  performanceType <- "Balanced Accuracy"
115
-                else performanceType <- "C-index"
116
-              }
117
+                if(isCategorical) performanceType <- "Balanced Accuracy" else performanceType <- "C-index"
118
+              if(length(selectionMethod) == 1 && selectionMethod == "auto")
119
+                if(isCategorical) selectionMethod <- "t-test" else selectionMethod <- "CoxPH"
120
+              if(length(classifier) == 1 && classifier == "auto")
121
+                if(isCategorical) classifier <- "randomForest" else classifier <- "CoxPH"
122
+              
117 123
               
118 124
               # Which data-types or data-views are present?
119 125
               assayIDs <- unique(S4Vectors::mcols(measurements)$assay)
... ...
@@ -515,18 +521,6 @@ Using an ordinary GLM instead.")
515 521
     classifier
516 522
 }
517 523
 
518
-######################################
519
-######################################
520
-#' A function to generate a CrossValParams object
521
-#'
522
-#' @inheritParams crossValidate
523
-#'
524
-#' @return CrossValParams object
525
-#' @export
526
-#'
527
-#' @examples
528
-#' CVparams <- generateCrossValParams(nRepeats = 20, nFolds = 5, nCores = 8, selectionOptimisation = "none")
529
-#' @import BiocParallel
530 524
 generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisation){
531 525
 
532 526
     seed <- .Random.seed[1]
... ...
@@ -549,32 +543,7 @@ generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisati
549 543
     if(!any(tuneMode %in% c("Resubstitution", "Nested CV", "none"))) stop("selectionOptimisation must be Nested CV or Resubstitution or none")
550 544
     CrossValParams(permutations = nRepeats, folds = nFolds, parallelParams = BPparam, tuneMode = tuneMode)
551 545
 }
552
-######################################
553 546
 
554
-######################################
555
-#' A function to generate a ModellingParams object
556
-#'
557
-#' @inheritParams crossValidate
558
-#' @param assayIDs A vector of data set identifiers as long at the number of data sets.
559
-#'
560
-#' @return ModellingParams object
561
-#' @export
562
-#'
563
-#' @examples
564
-#' data(asthma)
565
-#' # First make a toy example assay with multiple data types. We'll randomly assign different features to be clinical, gene or protein.
566
-#' set.seed(51773)
567
-#' measurements <- DataFrame(measurements, check.names = FALSE) 
568
-#' mcols(measurements)$assay <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE))
569
-#' mcols(measurements)$feature <- colnames(measurements)
570
-#' modellingParams <- generateModellingParams(assayIDs = c("clinical", "gene", "protein"),
571
-#'                                           measurements = measurements, 
572
-#'                                           nFeatures = list(clinical = 10, gene = 10, protein = 10),
573
-#'                                           selectionMethod = list(clinical = "t-test", gene = "t-test", protein = "t-test"),
574
-#'                                           selectionOptimisation = "none",
575
-#'                                           classifier = "randomForest",
576
-#'                                           multiViewMethod = "merge")
577
-#' @import BiocParallel
578 547
 generateModellingParams <- function(assayIDs,
579 548
                                     measurements,
580 549
                                     nFeatures,
... ...
@@ -6,9 +6,11 @@ randomForestTrainInterface <- function(measurementsTrain, outcomeTrain, mTryProp
6 6
   if(verbose == 3)
7 7
     message("Fitting random forest classifier to training data.")
8 8
   mtry <- round(mTryProportion * ncol(measurementsTrain)) # Number of features to try.
9
-      
10 9
   # Convert to base data.frame as randomForest doesn't understand DataFrame.
11
-  ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, importance = "impurity_corrected", ...)
10
+  fittedModel <- ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, ...)
11
+  forImportance <- ranger::ranger(x = as(measurementsTrain, "data.frame"), y = outcomeTrain, mtry = mtry, importance = "impurity_corrected", ...)
12
+  attr(fittedModel, "forImportance") <- forImportance
13
+  fittedModel
12 14
 }
13 15
 attr(randomForestTrainInterface, "name") <- "randomForestTrainInterface"
14 16
     
... ...
@@ -37,7 +39,8 @@ randomForestPredictInterface <- function(forest, measurementsTest, ..., returnTy
37 39
 
38 40
 forestFeatures <- function(forest)
39 41
                   {
40
-                    rankedFeaturesIndices <- order(ranger::importance(forest), decreasing = TRUE)
41
-                    selectedFeaturesIndices <- which(ranger::importance(forest) > 0)
42
+                    forImportance <- attr(forest, "forImportance")
43
+                    rankedFeaturesIndices <- order(ranger::importance(forImportance), decreasing = TRUE)
44
+                    selectedFeaturesIndices <- which(ranger::importance(forImportance) > 0)
42 45
                     list(rankedFeaturesIndices, selectedFeaturesIndices)
43 46
                   }
44 47
\ No newline at end of file
... ...
@@ -5,8 +5,9 @@ rfsrcTrainInterface <- function(measurementsTrain, survivalTrain, mTryProportion
5 5
     stop("The package 'randomForestSRC' could not be found. Please install it.")
6 6
   if(verbose == 3)
7 7
     message("Fitting rfsrc classifier to training data and making predictions on test data.")
8
-    
9
-  bindedMeasurements <- cbind(measurementsTrain, event = survivalTrain[, 1], time = survivalTrain[, 2])
8
+
9
+  # Surv objects store survival information as a two-column table, time and event, in that order.    
10
+  bindedMeasurements <- cbind(measurementsTrain, time = survivalTrain[, 1], event = survivalTrain[, 2])
10 11
   mtry <- round(mTryProportion * ncol(measurementsTrain)) # Number of features to try.
11 12
   randomForestSRC::rfsrc(Surv(time, event) ~ ., data = as.data.frame(bindedMeasurements), mtry = mtry,
12 13
                           var.used = "all.trees", importance = TRUE, ...)
... ...
@@ -13,7 +13,8 @@
13 13
 #' are features.
14 14
 #' @param outcome Either a factor vector of classes, a \code{\link{Surv}} object, or
15 15
 #' a character string, or vector of such strings, containing column name(s) of column(s)
16
-#' containing either classes or time and event information about survival.
16
+#' containing either classes or time and event information about survival. If column names
17
+#' of survival information, time must be in first column and event status in the second.
17 18
 #' @param outcomeColumns If \code{measurements} is a \code{MultiAssayExperiment}, the
18 19
 #' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
19 20
 #' that contain(s) the each individual's outcome to use for prediction.
... ...
@@ -4,7 +4,7 @@ coxphRanking <- function(measurementsTrain, survivalTrain, verbose = 3) # Clinic
4 4
   
5 5
   pValues <- rep(NA, ncol(measurementsTrain))
6 6
   names(pValues) <- colnames(measurementsTrain)
7
-  
7
+
8 8
   isCat <- sapply(measurementsTrain, class) %in% c("character", "factor")
9 9
   if(any(isCat))
10 10
   {
... ...
@@ -16,7 +16,8 @@
16 16
 #' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features.
17 17
 #' @param outcomeTrain Either a factor vector of classes, a \code{\link{Surv}} object, or
18 18
 #' a character string, or vector of such strings, containing column name(s) of column(s)
19
-#' containing either classes or time and event information about survival.
19
+#' containing either classes or time and event information about survival. If column names
20
+#' of survival information, time must be in first column and event status in the second.
20 21
 #' @param measurementsTest Same data type as \code{measurementsTrain}, but only the test
21 22
 #' samples.
22 23
 #' @param outcomeTest Same data type as \code{outcomeTrain}, but for only the test
... ...
@@ -256,12 +257,19 @@ input data. Autmomatically reducing to smaller number.")
256 257
     {
257 258
       if(is.null(modellingParams@trainParams@getFeatures))
258 259
       selectedFeatures <- originalFeatures[selectedFeaturesIndices]
259
-      else selectedFeatures <- colnames(measurementsTrain)[rankedFeaturesIndices] 
260
+      else selectedFeatures <- colnames(measurementsTrain)[selectedFeaturesIndices] 
260 261
     } else {
261 262
       featureColumns <- na.omit(match(c("assay", "feature"), colnames(S4Vectors::mcols(measurementsTrain))))
262
-      if(is.null(modellingParams@trainParams@getFeatures))
263
-      selectedFeatures <- originalFeatures[selectedFeaturesIndices, ]
264
-      else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]
263
+      if(length(featureColumns) == 1)
264
+      {
265
+         if(is.null(modellingParams@trainParams@getFeatures))
266
+            selectedFeatures <- originalFeatures[selectedFeaturesIndices]
267
+         else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]            
268
+      } else {
269
+                if(is.null(modellingParams@trainParams@getFeatures))
270
+                  selectedFeatures <- originalFeatures[selectedFeaturesIndices, ]
271
+                else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]            
272
+      }
265 273
     }
266 274
     importanceTable <- S4Vectors::DataFrame(selectedFeatures, performanceChanges)
267 275
     if(ncol(importanceTable) == 2) colnames(importanceTable)[1] <- "feature"
... ...
@@ -272,6 +280,7 @@ input data. Autmomatically reducing to smaller number.")
272 280
   if(!is.null(tuneDetailsSelect)) tuneDetails <- tuneDetailsSelect else tuneDetails <- tuneDetailsTrain
273 281
 
274 282
   # Convert back into original, potentially unsafe feature identifiers unless it is a nested cross-validation.
283
+  
275 284
   if(is.null(.iteration) || .iteration != "internal")
276 285
   {
277 286
     if(!is.null(rankedFeaturesIndices))
... ...
@@ -283,9 +292,16 @@ input data. Autmomatically reducing to smaller number.")
283 292
         else rankedFeatures <- colnames(measurementsTrain)[rankedFeaturesIndices]            
284 293
       } else {
285 294
         featureColumns <- na.omit(match(c("assay", "feature"), colnames(S4Vectors::mcols(measurementsTrain))))          
286
-        if(is.null(modellingParams@trainParams@getFeatures))
287
-          rankedFeatures <- originalFeatures[rankedFeaturesIndices, ]
288
-        else rankedFeatures <- S4Vectors::mcols(measurementsTrain)[rankedFeaturesIndices, featureColumns]
295
+        if(length(featureColumns) == 1)
296
+        {
297
+          if(is.null(modellingParams@trainParams@getFeatures))
298
+            rankedFeatures <- originalFeatures[rankedFeaturesIndices]
299
+          else rankedFeatures <- S4Vectors::mcols(measurementsTrain)[rankedFeaturesIndices, featureColumns]
300
+        } else {
301
+          if(is.null(modellingParams@trainParams@getFeatures))
302
+            rankedFeatures <- originalFeatures[rankedFeaturesIndices, ]
303
+          else rankedFeatures <- S4Vectors::mcols(measurementsTrain)[rankedFeaturesIndices, featureColumns] 
304
+        }
289 305
       }
290 306
     } else { rankedFeatures <- NULL}
291 307
     if(!is.null(selectedFeaturesIndices))
... ...
@@ -297,9 +313,16 @@ input data. Autmomatically reducing to smaller number.")
297 313
         else selectedFeatures <- colnames(measurementsTrain)[selectedFeaturesIndices]
298 314
       } else {
299 315
         featureColumns <- na.omit(match(c("assay", "feature"), colnames(S4Vectors::mcols(measurementsTrain))))  
300
-        if(is.null(modellingParams@trainParams@getFeatures))
301
-          selectedFeatures <- originalFeatures[selectedFeaturesIndices, ]
302
-        else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]
316
+        if(length(featureColumns) == 1)
317
+        {
318
+          if(is.null(modellingParams@trainParams@getFeatures))
319
+            selectedFeatures <- originalFeatures[selectedFeaturesIndices]
320
+          else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]            
321
+        } else {
322
+                if(is.null(modellingParams@trainParams@getFeatures))
323
+                  selectedFeatures <- originalFeatures[selectedFeaturesIndices, ]
324
+                else selectedFeatures <- S4Vectors::mcols(measurementsTrain)[selectedFeaturesIndices, featureColumns]            
325
+        }
303 326
       }
304 327
     } else { selectedFeatures <- NULL}
305 328
   } else { # Nested use in feature selection. No feature selection in inner execution, so ignore features. 
... ...
@@ -16,7 +16,8 @@
16 16
 #' containing either classes or time and event information about survival. If
17 17
 #' \code{measurements} is a \code{MultiAssayExperiment}, the names of the column (class) or
18 18
 #' columns (survival) in the table extracted by \code{colData(data)} that contain(s) the samples'
19
-#' outcome to use for prediction.
19
+#' outcome to use for prediction. If column names of survival information, time must be in first
20
+#' column and event status in the second.
20 21
 #' @param crossValParams An object of class \code{\link{CrossValParams}},
21 22
 #' specifying the kind of cross-validation to be done.
22 23
 #' @param modellingParams An object of class \code{\link{ModellingParams}},
... ...
@@ -13,7 +13,7 @@
13 13
 #' a matrix of pre-calculated metrics, for backwards compatibility.
14 14
 #' @param classes If \code{results} is a matrix, this is a factor vector of the
15 15
 #' same length as the number of columns that \code{results} has.
16
-#' @param comparison Default: "Classifier Name". The aspect of the experimental
16
+#' @param comparison Default: "auto". The aspect of the experimental
17 17
 #' design to compare. Can be any characteristic that all results share.
18 18
 #' @param metric Default: "Sample Error". The sample-wise metric to plot.
19 19
 #' @param featureValues If not NULL, can be a named factor or named numeric
... ...
@@ -44,6 +44,8 @@
44 44
 #' @param legendSize The size of the boxes in the legends.
45 45
 #' @param plot Logical. IF \code{TRUE}, a plot is produced on the current
46 46
 #' graphics device.
47
+#' @param ... Parameters not used by the \code{ClassifyResult} method that does
48
+#' list-packaging but used by the main \code{list} method.
47 49
 #' @return A plot is produced and a grob is returned that can be saved to a
48 50
 #' graphics device.
49 51
 #' @author Dario Strbenac
... ...
@@ -82,11 +84,17 @@
82 84
 setGeneric("samplesMetricMap", function(results, ...)
83 85
 standardGeneric("samplesMetricMap"))
84 86
 
87
+#' @rdname samplesMetricMap
88
+#' @export
89
+setMethod("samplesMetricMap", "ClassifyResult", function(results, ...) {
90
+    samplesMetricMap(list(assay = results), ...)
91
+})
92
+
85 93
 #' @rdname samplesMetricMap
86 94
 #' @export
87 95
 setMethod("samplesMetricMap", "list", 
88 96
           function(results,
89
-                   comparison = "Classifier Name",
97
+                   comparison = "auto",
90 98
                    metric = c("Sample Error", "Sample Accuracy", "Sample C-index"),
91 99
                    featureValues = NULL, featureName = NULL,
92 100
                    metricColours = list(c("#3F48CC", "#6F75D8", "#9FA3E5", "#CFD1F2", "#FFFFFF"),
... ...
@@ -103,6 +111,20 @@ setMethod("samplesMetricMap", "list",
103 111
     stop("The package 'gridExtra' could not be found. Please install it.")       
104 112
   if(!requireNamespace("gtable", quietly = TRUE))
105 113
     stop("The package 'gtable' could not be found. Please install it.")
114
+  
115
+  characteristicsCounts <- table(unlist(lapply(results, function(result) result@characteristics[["characteristic"]])))
116
+  if(comparison == "auto")
117
+  {
118
+    if(max(characteristicsCounts) == length(results))
119
+    { # Choose a characteristic which varies the most across the results.
120
+      candidates <- names(characteristicsCounts)[characteristicsCounts == length(results)]
121
+      allCharacteristics <- do.call(rbind, lapply(results, function(result) result@characteristics))
122
+      distinctValues <- by(allCharacteristics[, "value"], allCharacteristics[, "characteristic"], function(values) length(unique(values)))
123
+      comparison <- names(distinctValues)[which.max(distinctValues)][1]
124
+    } else {
125
+      stop("No characteristic is present for all results but must be.")
126
+    }
127
+  }
106 128
   resultsWithComparison <- sum(sapply(results, function(result) any(result@characteristics[, "characteristic"] == comparison)))
107 129
   if(resultsWithComparison < length(results))
108 130
     stop("Not all results have comparison characteristic ", comparison, ' but need to.')
109 131
new file mode 100644
... ...
@@ -0,0 +1,25 @@
1
+# ClassifyR: Performance evaluation for multi-view data sets and seamless integration with MultiAssayExperiment and Bioconductor
2
+
3
+<img src="man/figures/ClassifyRsticker.png" align="right" width=250 style="margin-left: 10px;">
4
+
5
+ClassifyR's performance evaluation focuses on model stability and interpretability. Based on repeated cross-validation, it is possible to evaluate feature selection stability and also per-sample prediction accuracy. Also, multiple omics data assays on the same samples are becoming more popular and ClassifyR supports a range of multi-view methods to evaluate which data view is the most predictive and combine data views to evaluate if multiple views provide superior predictive performance to a single data view.
6
+
7
+##  Installation 
8
+
9
+The recommended method of installing ClassifyR is by using Bioconductor's BiocManager installer:
10
+
11
+```
12
+library(BiocManager)
13
+install("ClassifyR", dependencies = TRUE)
14
+```
15
+
16
+The above code will install all packages that provide feature selection or model-building functionality. If only one or two methods are desired then the dependencies option could be omitted and those packages providing functionality installed manually. 
17
+ 
18
+##  Website
19
+
20
+Please visit [the ClassifyR website](https://sydneybiox.github.io/ClassifyR/) to view the main vignette as well as articles that provide more in-depth explanations for various aspects of the package. Details of performance evaluation, multi-view methods and contributing a wrapper for a new algorithm to the package are provided.
21
+
22
+
23
+## Reference
24
+
25
+Strbenac D., Mann, G.J., Ormerod, J.T., and Yang, J. Y. H. (2015) ClassifyR: An R package for performance assessment of classification with applications to transcriptomics, *Bioinformatics*.
0 26
new file mode 100644
... ...
@@ -0,0 +1,15 @@
1
+url: https://sydneybiox.github.io/ClassifyR/
2
+template:
3
+  bootstrap: 5
4
+articles:
5
+- title: Menu
6
+  contents:
7
+    - introduction
8
+    - performanceEvaluation
9
+    - multiViewMethods
10
+    - incorporateNew
11
+    - ClassifyR
12
+    - DevelopersGuide
13
+navbar:
14
+  title: ~
15
+  bg: dark
0 16
new file mode 100644
... ...
@@ -0,0 +1,86 @@
1
+<!DOCTYPE html>
2
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
3
+<head>
4
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+<meta charset="utf-8">
6
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
7
+<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
+<title>Page not found (404) • ClassifyR</title>
9
+<script src="https://sydneybiox.github.io/ClassifyR/deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
10
+<link href="https://sydneybiox.github.io/ClassifyR/deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
11
+<script src="https://sydneybiox.github.io/ClassifyR/deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
12
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
13
+<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="https://sydneybiox.github.io/ClassifyR/pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
14
+<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
15
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
16
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
17
+<![endif]-->
18
+</head>
19
+<body>
20
+    <a href="https://sydneybiox.github.io/ClassifyR/#main" class="visually-hidden-focusable">Skip to contents</a>
21
+    
22
+
23
+    <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-dark"><div class="container">
24
+    
25
+    <a class="navbar-brand me-2" href="https://sydneybiox.github.io/ClassifyR/index.html">ClassifyR</a>
26
+
27
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">3.3.2</small>
28
+
29
+    
30
+    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
31
+      <span class="navbar-toggler-icon"></span>
32
+    </button>
33
+
34
+    <div id="navbar" class="collapse navbar-collapse ms-3">
35
+      <ul class="navbar-nav me-auto">
36
+<li class="nav-item">
37
+  <a class="nav-link" href="https://sydneybiox.github.io/ClassifyR/articles/ClassifyR.html">Get started</a>
38
+</li>
39
+<li class="nav-item">
40
+  <a class="nav-link" href="https://sydneybiox.github.io/ClassifyR/reference/index.html">Reference</a>
41
+</li>
42
+<li class="nav-item">
43
+  <a class="nav-link" href="https://sydneybiox.github.io/ClassifyR/articles/index.html">Articles</a>
44
+</li>
45
+      </ul>
46
+<form class="form-inline my-2 my-lg-0" role="search">
47
+        <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off">
48
+</form>
49
+
50
+      <ul class="navbar-nav"></ul>
51
+</div>
52
+
53
+    
54
+  </div>
55
+</nav><div class="container template-title-body">
56
+<div class="row">
57
+  <main id="main" class="col-md-9"><div class="page-header">
58
+      <img src="https://sydneybiox.github.io/ClassifyR/" class="logo" alt=""><h1>Page not found (404)</h1>
59
+      
60
+    </div>
61
+
62
+Content not found. Please use links in the navbar.
63
+
64
+  </main>
65
+</div>
66
+
67
+
68
+    <footer><div class="pkgdown-footer-left">
69
+  <p></p>
70
+<p>Developed by Dario Strbenac, Ellis Patrick, Sourish Iyengar, Harry Robertson, Andy Tran, John Ormerod, Graham Mann, Jean Yang.</p>
71
+</div>
72
+
73
+<div class="pkgdown-footer-right">
74
+  <p></p>
75
+<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
76
+</div>
77
+
78
+    </footer>
79
+</div>
80
+
81
+  
82
+
83
+  
84
+
85
+  </body>
86
+</html>
</
0 87
new file mode 100644
... ...
@@ -0,0 +1,1289 @@
1
+<!DOCTYPE html>
2
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
3
+<head>
4
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+<meta charset="utf-8">
6
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
7
+<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
+<meta name="description" content="ClassifyR">
9
+<title>An Introduction to **ClassifyR** • ClassifyR</title>
10
+<script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
11
+<link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
12
+<script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
13
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
14
+<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to **ClassifyR**">
15
+<meta property="og:description" content="ClassifyR">
16
+<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
17
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
18
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
19
+<![endif]-->
20
+</head>
21
+<body>
22
+    <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
23
+    
24
+
25
+    <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-dark"><div class="container">
26
+    
27
+    <a class="navbar-brand me-2" href="../index.html">ClassifyR</a>
28
+
29
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">3.3.2</small>
30
+
31
+    
32
+    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
33
+      <span class="navbar-toggler-icon"></span>
34
+    </button>
35
+
36
+    <div id="navbar" class="collapse navbar-collapse ms-3">
37
+      <ul class="navbar-nav me-auto">
38
+<li class="active nav-item">
39
+  <a class="nav-link" href="../articles/ClassifyR.html">Get started</a>
40
+</li>
41
+<li class="nav-item">
42
+  <a class="nav-link" href="../reference/index.html">Reference</a>
43
+</li>
44
+<li class="nav-item">
45
+  <a class="nav-link" href="../articles/index.html">Articles</a>
46
+</li>
47
+      </ul>
48
+<form class="form-inline my-2 my-lg-0" role="search">
49
+        <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off">
50
+</form>
51
+
52
+      <ul class="navbar-nav"></ul>
53
+</div>
54
+
55
+    
56
+  </div>
57
+</nav><div class="container template-article">
58
+
59
+
60
+
61
+
62
+<div class="row">
63
+  <main id="main" class="col-md-9"><div class="page-header">
64
+      <img src="" class="logo" alt=""><h1>An Introduction to ClassifyR</h1>
65
+                        <h4 data-toc-skip class="author">Dario Strbenac,
66
+Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University
67
+of Sydney, Australia.</h4>
68
+            
69
+      
70
+      
71
+      <div class="d-none name"><code>ClassifyR.Rmd</code></div>
72
+    </div>
73
+
74
+    
75
+    
76
+<div class="section level2">
77
+<h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
78
+</h2>
79
+<p>Typically, each feature selection method or classifier originates
80
+from a different R package, which <strong>ClassifyR</strong> provides a
81
+wrapper around. By default, only high-performance t-test/F-test and
82
+random forest are installed. If you intend to compare between numerous
83
+different modelling methods, you should install all suggested packages
84
+at once by using the command
85
+<code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>.
86
+This will take a few minutes, particularly on Linux, because each
87
+package will be compiled from source code.</p>
88
+</div>
89
+<div class="section level2">
90
+<h2 id="overview">Overview<a class="anchor" aria-label="anchor" href="#overview"></a>
91
+</h2>
92
+<p><strong>ClassifyR</strong> provides a structured pipeline for
93
+cross-validated classification. Classification is viewed in terms of
94
+four stages, data transformation, feature selection, classifier
95
+training, and prediction. The driver functions <em>crossValidate</em>
96
+and <em>runTests</em> implements varieties of cross-validation. They
97
+are:</p>
98
+<ul>
99
+<li>Permutation of the order of samples followed by k-fold
100
+cross-validation (runTests only)</li>
101
+<li>Repeated x% test set cross-validation</li>
102
+<li>leave-k-out cross-validation</li>
103
+</ul>
104
+<p>Driver functions can use parallel processing capabilities in R to
105
+speed up cross-validations when many CPUs are available. The output of
106
+the driver functions is a <em>ClassifyResult</em> object which can be
107
+directly used by the performance evaluation functions. The process of
108
+classification is summarised by a flowchart.</p>
109
+<img src="" style="margin-left: auto;margin-right: auto"><p>Importantly, ClassifyR implements a number of methods for
110
+classification using different kinds of changes in measurements between
111
+classes. Most classifiers work with features where the means are
112
+different. In addition to changes in means (DM),
113
+<strong>ClassifyR</strong> also allows for classification using
114
+differential variability (DV; changes in scale) and differential
115
+distribution (DD; changes in location and/or scale).</p>
116
+<div class="section level3">
117
+<h3 id="case-study-diagnosing-asthma">Case Study: Diagnosing Asthma<a class="anchor" aria-label="anchor" href="#case-study-diagnosing-asthma"></a>
118
+</h3>
119
+<p>To demonstrate some key features of ClassifyR, a data set consisting
120
+of the 2000 most variably expressed genes and 190 people will be used to
121
+quickly obtain results. The journal article corresponding to the data
122
+set was published in <em>Scientific Reports</em> in 2018 and is titled
123
+<a href="http://www.nature.com/articles/s41598-018-27189-4" class="external-link">A Nasal
124
+Brush-based Classifier of Asthma Identified by Machine Learning Analysis
125
+of Nasal RNA Sequence Data</a>.</p>
126
+<p>Load the package.</p>
127
+<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
128
+<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://sydneybiox.github.io/ClassifyR/">ClassifyR</a></span><span class="op">)</span></span></code></pre></div>
129
+<p>A glimpse at the RNA measurements and sample classes.</p>
130
+<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
131
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html" class="external-link">data</a></span><span class="op">(</span><span class="va">asthma</span><span class="op">)</span> <span class="co"># Contains measurements and classes variables.</span></span>
132
+<span><span class="va">measurements</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">5</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span></code></pre></div>
133
+<pre><code><span><span class="co">##            HBB BPIFA1  XIST FCGR3B HBA2</span></span>
134
+<span><span class="co">## Sample 1  9.72  14.06 12.28  11.42 7.83</span></span>
135
+<span><span class="co">## Sample 2 11.98  13.89  6.35  13.25 9.42</span></span>
136
+<span><span class="co">## Sample 3 12.15  17.44 10.21   7.87 9.68</span></span>
137
+<span><span class="co">## Sample 4 10.60  11.87  6.27  14.75 8.96</span></span>
138
+<span><span class="co">## Sample 5  8.18  15.01 11.21   6.77 6.43</span></span></code></pre>
139
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
140
+<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">classes</span><span class="op">)</span></span></code></pre></div>
141
+<pre><code><span><span class="co">## [1] No  No  No  No  Yes No </span></span>
142
+<span><span class="co">## Levels: No Yes</span></span></code></pre>
143
+<p>The numeric matrix variable <em>measurements</em> stores the
144
+normalised values of the RNA gene abundances for each sample and the
145
+factor vector <em>classes</em> identifies which class the samples belong
146
+to. The measurements were normalised using <strong>DESeq2</strong>’s
147
+<em>varianceStabilizingTransformation</em> function, which produces
148
+<span class="math inline">\(log_2\)</span>-like data.</p>
149
+<p>For more complex data sets with multiple kinds of experiments
150
+(e.g. DNA methylation, copy number, gene expression on the same set of
151
+samples) a <a href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html" class="external-link"><em>MultiAssayExperiment</em></a>
152
+is recommended for data storage and supported by
153
+<strong>ClassifyR</strong>’s methods.</p>
154
+</div>
155
+</div>
156
+<div class="section level2">
157
+<h2 id="quick-start-crossvalidate-function">Quick Start: <em>crossValidate</em> Function<a class="anchor" aria-label="anchor" href="#quick-start-crossvalidate-function"></a>
158
+</h2>
159
+<p>The <em>crossValidate</em> function offers a quick and simple way to
160
+start analysing a dataset in ClassifyR. It is a wrapper for
161
+<em>runTests</em>, the core model building and testing function of
162
+ClassifyR. <em>crossValidate</em> must be supplied with
163
+<em>measurements</em>, a simple tabular data container or a list-like
164
+structure of such related tabular data on common samples. The classes of
165
+it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>,
166
+<em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>.
167
+For a dataset with <span class="math inline">\(n\)</span> observations
168
+and <span class="math inline">\(p\)</span> variables, the
169
+<em>crossValidate</em> function will accept inputs of the following
170
+shapes:</p>
171
+<table class="table">
172
+<colgroup>
173
+<col width="25%">
174
+<col width="37%">
175
+<col width="37%">
176
+</colgroup>
177
+<thead><tr class="header">
178
+<th>Data Type</th>
179
+<th align="center"><span class="math inline">\(n \times p\)</span></th>
180
+<th align="center"><span class="math inline">\(p \times n\)</span></th>
181
+</tr></thead>
182
+<tbody>
183
+<tr class="odd">
184
+<td><span style="font-family: 'Courier New', monospace;">matrix</span></td>
185
+<td align="center">✔</td>
186
+<td align="center"></td>
187
+</tr>
188
+<tr class="even">
189
+<td><span style="font-family: 'Courier New', monospace;">data.frame</span></td>
190
+<td align="center">✔</td>
191
+<td align="center"></td>
192
+</tr>
193
+<tr class="odd">
194
+<td><span style="font-family: 'Courier New', monospace;">DataFrame</span></td>
195
+<td align="center">✔</td>
196
+<td align="center"></td>
197
+</tr>
198
+<tr class="even">
199
+<td><span style="font-family: 'Courier New', monospace;">MultiAssayExperiment</span></td>
200
+<td align="center"></td>
201
+<td align="center">✔</td>
202
+</tr>
203
+<tr class="odd">
204
+<td>
205
+<span style="font-family: 'Courier New', monospace;">list</span> of
206
+<span style="font-family: 'Courier New', monospace;">data.frame</span>s</td>
207
+<td align="center">✔</td>
208
+<td align="center"></td>
209
+</tr>
210
+</tbody>
211
+</table>
212
+<p><em>crossValidate</em> must also be supplied with <em>outcome</em>,
213
+which represents the prediction to be made in a variety of possible
214
+ways.</p>
215
+<ul>
216
+<li>A <em>factor</em> that contains the class label for each
217
+observation. <em>classes</em> must be of length <span class="math inline">\(n\)</span>.</li>
218
+<li>A <em>character</em> of length 1 that matches a column name in a
219
+data frame which holds the classes. The classes will automatically be
220
+removed before training is done.</li>
221
+<li>A <em>Surv</em> object of the same length as the number of samples
222
+in the data which contains information about the time and censoring of
223
+the samples.</li>
224
+<li>A <em>character</em> vector of length 2 or 3 that each match a
225
+column name in a data frame which holds information about the time and
226
+censoring of the samples. The time-to-event columns will automatically
227
+be removed before training is done.</li>
228
+</ul>
229
+<p>The type of classifier used can be changed with the
230
+<em>classifier</em> argument. The default is a random forest, which
231
+seamlessly handles categorical and numerical data. A full list of
232
+classifiers can be seen by running <em>?crossValidate</em>. A feature
233
+selection step can be performed before classification using
234
+<em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by
235
+default. Similarly, the number of folds and number of repeats for cross
236
+validation can be changed with the <em>nFolds</em> and <em>nRepeats</em>
237
+arguments. If wanted, <em>nCores</em> can be specified to run the cross
238
+validation in parallel. To perform 5-fold cross-validation of a Support
239
+Vector Machine with 2 repeats:</p>
240
+<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
241
+<code class="sourceCode R"><span><span class="va">result</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/crossValidate.html">crossValidate</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, classifier <span class="op">=</span> <span class="st">"SVM"</span>,</span>
242
+<span>                        nFeatures <span class="op">=</span> <span class="fl">20</span>, nFolds <span class="op">=</span> <span class="fl">5</span>, nRepeats <span class="op">=</span> <span class="fl">2</span>, nCores <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
243
+<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
244
+<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
245
+<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span><span class="op">)</span></span></code></pre></div>
246
+<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
247
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-5-1.png" width="700"></p>
248
+<div class="section level3">
249
+<h3 id="data-integration-with-crossvalidate">Data Integration with crossValidate<a class="anchor" aria-label="anchor" href="#data-integration-with-crossvalidate"></a>
250
+</h3>
251
+<p><em>crossValidate</em> also allows data from multiple sources to be
252
+integrated into a single model. The integration method can be specified
253
+with <em>multiViewMethod</em> argument. In this example, suppose the
254
+first 10 variables in the asthma data set are from a certain source and
255
+the remaining 1990 variables are from a second source. To integrate
256
+multiple data sets, each variable must be labeled with the data set it
257
+came from. This is done in a different manner depending on the data type
258
+of <em>measurements</em>.</p>
259
+<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified
260
+using <em>mcols</em>. In the column metadata, each feature must have an
261
+<em>assay</em> and a <em>feature</em> name.</p>
262
+<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
263
+<code class="sourceCode R"><span><span class="va">measurementsDF</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/DataFrame-class.html" class="external-link">DataFrame</a></span><span class="op">(</span><span class="va">measurements</span><span class="op">)</span></span>
264
+<span><span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/Vector-class.html" class="external-link">mcols</a></span><span class="op">(</span><span class="va">measurementsDF</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span></span>
265
+<span>  assay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay_1"</span>, <span class="st">"assay_2"</span><span class="op">)</span>, times <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">10</span>, <span class="fl">1990</span><span class="op">)</span><span class="op">)</span>,</span>
266
+<span>  feature <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/colnames.html" class="external-link">colnames</a></span><span class="op">(</span><span class="va">measurementsDF</span><span class="op">)</span></span>
267
+<span><span class="op">)</span></span>
268
+<span></span>
269
+<span><span class="va">result</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/crossValidate.html">crossValidate</a></span><span class="op">(</span><span class="va">measurementsDF</span>, <span class="va">classes</span>, classifier <span class="op">=</span> <span class="st">"SVM"</span>, nFolds <span class="op">=</span> <span class="fl">5</span>,</span>
270
+<span>                        nRepeats <span class="op">=</span> <span class="fl">3</span>, multiViewMethod <span class="op">=</span> <span class="st">"merge"</span><span class="op">)</span></span></code></pre></div>
271
+<pre><code><span><span class="co">## Processing sample set 10.</span></span>
272
+<span><span class="co">## Processing sample set 10.</span></span>
273
+<span><span class="co">## Processing sample set 10.</span></span></code></pre>
274
+<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
275
+<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
276
+<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
277
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-6-1.png" width="700"></p>
278
+<p>If using a list of <em>data.frame</em>s, the name of each element in
279
+the list will be used as the assay name.</p>
280
+<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
281
+<code class="sourceCode R"><span><span class="co"># Assigns first 10 variables to dataset_1, and the rest to dataset_2</span></span>
282
+<span><span class="va">measurementsList</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span></span>
283
+<span>  <span class="op">(</span><span class="va">measurements</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">10</span><span class="op">]</span>,</span>
284
+<span>  <span class="op">(</span><span class="va">measurements</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span><span class="op">[</span><span class="fl">11</span><span class="op">:</span><span class="fl">2000</span><span class="op">]</span></span>
285
+<span><span class="op">)</span></span>
286
+<span><span class="fu"><a href="https://rdrr.io/r/base/names.html" class="external-link">names</a></span><span class="op">(</span><span class="va">measurementsList</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay_1"</span>, <span class="st">"assay_2"</span><span class="op">)</span></span>
287
+<span></span>
288
+<span><span class="va">result</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/crossValidate.html">crossValidate</a></span><span class="op">(</span><span class="va">measurementsList</span>, <span class="va">classes</span>, classifier <span class="op">=</span> <span class="st">"SVM"</span>, nFolds <span class="op">=</span> <span class="fl">5</span>,</span>
289
+<span>                        nRepeats <span class="op">=</span> <span class="fl">3</span>, multiViewMethod <span class="op">=</span> <span class="st">"merge"</span><span class="op">)</span></span></code></pre></div>
290
+<pre><code><span><span class="co">## Processing sample set 10.</span></span>
291
+<span><span class="co">## Processing sample set 10.</span></span>
292
+<span><span class="co">## Processing sample set 10.</span></span></code></pre>
293
+<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
294
+<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
295
+<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
296
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-7-1.png" width="700"></p>
297
+</div>
298
+</div>
299
+<div class="section level2">
300
+<h2 id="a-more-detailed-look-at-classifyr">A More Detailed Look at ClassifyR<a class="anchor" aria-label="anchor" href="#a-more-detailed-look-at-classifyr"></a>
301
+</h2>
302
+<p>In the following sections, some of the most useful functions provided
303
+in <strong>ClassifyR</strong> will be demonstrated. However, a user
304
+could wrap any feature selection, training, or prediction function to
305
+the classification framework, as long as it meets some simple rules
306
+about the input and return parameters. See the appendix section of this
307
+guide titled “Rules for New Functions” for a description of these.</p>
308
+<div class="section level3">
309
+<h3 id="comparison-to-existing-classification-frameworks">Comparison to Existing Classification Frameworks<a class="anchor" aria-label="anchor" href="#comparison-to-existing-classification-frameworks"></a>
310
+</h3>
311
+<p>There are a few other frameworks for classification in R. The table
312
+below provides a comparison of which features they offer.</p>
313
+<table class="table">
314
+<colgroup>
315
+<col width="8%">
316
+<col width="10%">
317
+<col width="8%">
318
+<col width="10%">
319
+<col width="10%">
320
+<col width="11%">
321
+<col width="14%">
322
+<col width="12%">
323
+<col width="12%">
324
+</colgroup>
325
+<thead><tr class="header">
326
+<th>Package</th>
327
+<th>Run User-defined Classifiers</th>
328
+<th>Parallel Execution on any OS</th>
329
+<th>Parameter Tuning</th>
330
+<th>Intel DAAL Performance Metrics</th>
331
+<th>Ranking and Selection Plots</th>
332
+<th>Class Distribution Plot</th>
333
+<th>Sample-wise Error Heatmap</th>
334
+<th>Direct Support for MultiAssayExperiment Input</th>
335
+</tr></thead>
336
+<tbody>
337
+<tr class="odd">
338
+<td><strong>ClassifyR</strong></td>
339
+<td>Yes</td>
340
+<td>Yes</td>
341
+<td>Yes</td>
342
+<td>Yes</td>
343
+<td>Yes</td>
344
+<td>Yes</td>
345
+<td>Yes</td>
346
+<td>Yes</td>
347
+</tr>
348
+<tr class="even">
349
+<td>caret</td>
350
+<td>Yes</td>
351
+<td>Yes</td>
352
+<td>Yes</td>
353
+<td>No</td>
354
+<td>No</td>
355
+<td>No</td>
356
+<td>No</td>
357
+<td>No</td>
358
+</tr>
359
+<tr class="odd">
360
+<td>MLInterfaces</td>
361
+<td>Yes</td>
362
+<td>No</td>
363
+<td>No</td>
364
+<td>No</td>
365
+<td>No</td>
366
+<td>No</td>
367
+<td>No</td>
368
+<td>No</td>
369
+</tr>
370
+<tr class="even">
371
+<td>MCRestimate</td>
372
+<td>Yes</td>
373
+<td>No</td>
374
+<td>Yes</td>
375
+<td>No</td>
376
+<td>No</td>
377
+<td>No</td>
378
+<td>No</td>
379
+<td>No</td>
380
+</tr>
381
+<tr class="odd">
382
+<td>CMA</td>
383
+<td>No</td>
384
+<td>No</td>
385
+<td>Yes</td>
386
+<td>No</td>
387
+<td>No</td>
388
+<td>No</td>
389
+<td>No</td>
390
+<td>No</td>
391
+</tr>
392
+</tbody>
393
+</table>
394
+</div>
395
+<div class="section level3">
396
+<h3 id="provided-functionality">Provided Functionality<a class="anchor" aria-label="anchor" href="#provided-functionality"></a>
397
+</h3>
398
+<p>Although being a cross-validation framework, a number of popular
399
+feature selection and classification functions are provided by the
400
+package which meet the requirements of functions to be used by it (see
401
+the last section).</p>
402
+<div class="section level4">
403
+<h4 id="provided-methods-for-feature-selection-and-classification">Provided Methods for Feature Selection and Classification<a class="anchor" aria-label="anchor" href="#provided-methods-for-feature-selection-and-classification"></a>
404
+</h4>
405
+<p>In the following tables, a function that is used when no function is
406
+explicitly specified by the user is shown as <span style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
407
+<p>The functions below produce a ranking, of which different size
408
+subsets are tried and the classifier performance evaluated, to select a
409
+best subset of features, based on a criterion such as balanced accuracy
410
+rate, for example.</p>
411
+<table style="width:100%;" class="table">
412
+<colgroup>
413
+<col width="9%">
414
+<col width="62%">
415
+<col width="9%">
416
+<col width="9%">
417
+<col width="9%">
418
+</colgroup>
419
+<thead><tr class="header">
420
+<th>Function</th>
421
+<th>Description</th>
422
+<th>DM</th>
423
+<th>DV</th>
424
+<th>DD</th>
425
+</tr></thead>
426
+<tbody>
427
+<tr class="odd">
428
+<td><span style="padding:4px; border:2px dashed #e64626; font-family: 'Courier New', monospace;">differentMeansRanking</span></td>
429
+<td>t-test ranking if two classes, F-test ranking if three or more</td>
430
+<td>✔</td>
431
+<td></td>
432
+<td></td>
433
+</tr>
434
+<tr class="even">
435
+<td><span style="font-family: 'Courier New', monospace;">limmaRanking</span></td>
436
+<td>Moderated t-test ranking using variance shrinkage</td>
437
+<td>✔</td>
438
+<td></td>
439
+<td></td>
440
+</tr>
441
+<tr class="odd">
442
+<td><span style="font-family: 'Courier New', monospace;">edgeRranking</span></td>
443
+<td>Likelihood ratio test for count data ranking</td>
444
+<td>✔</td>
445
+<td></td>
446
+<td></td>
447
+</tr>
448
+<tr class="even">
449
+<td><span style="font-family: 'Courier New', monospace;">bartlettRanking</span></td>
450
+<td>Bartlett’s test non-robust ranking</td>
451
+<td></td>
452
+<td>✔</td>
453
+<td></td>
454
+</tr>
455
+<tr class="odd">
456
+<td><span style="font-family: 'Courier New', monospace;">leveneRanking</span></td>
457
+<td>Levene’s test robust ranking</td>
458
+<td></td>
459
+<td>✔</td>
460
+<td></td>
461
+</tr>
462
+<tr class="even">
463
+<td><span style="font-family: 'Courier New', monospace;">DMDranking</span></td>
464
+<td><span style="white-space: nowrap">Difference in location
465
+(mean/median) and/or scale (SD, MAD, <span class="math inline">\(Q_n\)</span>)</span></td>
466
+<td>✔</td>
467
+<td>✔</td>
468
+<td>✔</td>
469
+</tr>
470
+<tr class="odd">
471
+<td><span style="font-family: 'Courier New', monospace;">likelihoodRatioRanking</span></td>
472
+<td>Likelihood ratio (normal distribution) ranking</td>
473
+<td>✔</td>
474
+<td>✔</td>
475
+<td>✔</td>
476
+</tr>
477
+<tr class="even">
478
+<td><span style="font-family: 'Courier New', monospace;">KolmogorovSmirnovRanking</span></td>
479
+<td>Kolmogorov-Smirnov distance between distributions ranking</td>
480
+<td>✔</td>
481
+<td>✔</td>
482
+<td>✔</td>
483
+</tr>
484
+<tr class="odd">
485
+<td><span style="font-family: 'Courier New', monospace;">KullbackLeiblerRanking</span></td>
486
+<td>Kullback-Leibler distance between distributions ranking</td>
487
+<td>✔</td>
488
+<td>✔</td>
489
+<td>✔</td>
490
+</tr>
491
+</tbody>
492
+</table>
493
+<p>Likewise, a variety of classifiers is also provided.</p>
494
+<table class="table">
495
+<colgroup>
496
+<col width="9%">
497
+<col width="61%">
498
+<col width="9%">
499
+<col width="9%">
500
+<col width="9%">
501
+</colgroup>
502
+<thead><tr class="header">
503
+<th>Function(s)</th>
504
+<th>Description</th>
505
+<th>DM</th>
506
+<th>DV</th>
507
+<th>DD</th>
508
+</tr></thead>
509
+<tbody>
510
+<tr class="odd">
511
+<td>
512
+<span style="padding:1px; border:2px dashed #e64626; display:inline-block; margin-bottom: 3px; font-family: 'Courier New', monospace;">DLDAtrainInterface</span>,<br><span style="padding:1px; border:2px dashed #e64626; display:inline-block; font-family: 'Courier New', monospace;">DLDApredictInterface</span>
513
+</td>
514
+<td>Wrappers for sparsediscrim’s functions <span style="font-family: 'Courier New', monospace;">dlda</span> and
515
+<span style="font-family: 'Courier New', monospace;">predict.dlda</span>
516
+functions</td>
517
+<td>✔</td>
518
+<td></td>
519
+<td></td>
520
+</tr>
521
+<tr class="even">
522
+<td><span style="font-family: 'Courier New', monospace;">classifyInterface</span></td>
523
+<td>Wrapper for PoiClaClu’s Poisson LDA function <span style="font-family: 'Courier New', monospace;">classify</span>
524
+</td>
525
+<td>✔</td>
526
+<td></td>
527
+<td></td>
528
+</tr>
529
+<tr class="odd">
530
+<td>
531
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMtrainInterface</span>,
532
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMpredictInterface</span>
533
+</td>
534
+<td>Wrappers for glmnet’s elastic net GLM functions <span style="font-family: 'Courier New', monospace;">glmnet</span> and
535
+<span style="font-family: 'Courier New', monospace;">predict.glmnet</span>
536
+</td>
537
+<td>✔</td>
538
+<td></td>
539
+<td></td>
540
+</tr>
541
+<tr class="even">
542
+<td>
543
+<span style="font-family: 'Courier New', monospace;">NSCtrainInterface</span>,
544
+<span style="font-family: 'Courier New', monospace;">NSCpredictInterface</span>
545
+</td>
546
+<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span style="font-family: 'Courier New', monospace;">pamr.train</span>
547
+and <span style="font-family: 'Courier New', monospace;">pamr.predict</span>
548
+</td>
549
+<td>✔</td>
550
+<td></td>
551
+<td></td>
552
+</tr>
553
+<tr class="odd">
554
+<td><span style="font-family: 'Courier New', monospace;">fisherDiscriminant</span></td>
555
+<td>Implementation of Fisher’s LDA for departures from normality</td>
556
+<td>✔</td>
557
+<td>✔*</td>
558
+<td></td>
559
+</tr>
560
+<tr class="even">
561
+<td>
562
+<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>,
563
+<span style="font-family: 'Courier New', monospace;">mixModelsPredict</span>
564
+</td>
565
+<td>Feature-wise mixtures of normals and voting</td>
566
+<td>✔</td>
567
+<td>✔</td>
568
+<td>✔</td>
569
+</tr>
570
+<tr class="odd">
571
+<td><span style="font-family: 'Courier New', monospace;">naiveBayesKernel</span></td>
572
+<td>Feature-wise kernel density estimation and voting</td>
573
+<td>✔</td>
574
+<td>✔</td>
575
+<td>✔</td>
576
+</tr>
577
+<tr class="even">
578
+<td>
579
+<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>,
580
+<span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span>
581
+</td>
582
+<td>Wrapper for ranger’s functions <span style="font-family: 'Courier New', monospace;">ranger</span> and
583
+<span style="font-family: 'Courier New', monospace;">predict</span>
584
+</td>
585
+<td>✔</td>
586
+<td>✔</td>
587
+<td>✔</td>
588
+</tr>
589
+<tr class="odd">
590
+<td>
591
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>,
592
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span>
593
+</td>
594
+<td>Wrapper for xgboost’s functions <span style="font-family: 'Courier New', monospace;">xgboost</span>
595
+and <span style="font-family: 'Courier New', monospace;">predict</span>
596
+</td>
597
+<td>✔</td>
598
+<td>✔</td>
599
+<td>✔</td>
600
+</tr>
601
+<tr class="even">
602
+<td><span style="font-family: 'Courier New', monospace;">kNNinterface</span></td>
603
+<td>Wrapper for class’s function <span style="font-family: 'Courier New', monospace;">knn</span>
604
+</td>
605
+<td>✔</td>
606
+<td>✔</td>
607
+<td>✔</td>
608
+</tr>
609
+<tr class="odd">
610
+<td>
611
+<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>,
612
+<span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span>
613
+</td>
614
+<td>Wrapper for e1071’s functions <span style="font-family: 'Courier New', monospace;">svm</span> and
615
+<span style="font-family: 'Courier New', monospace;">predict.svm</span>
616
+</td>
617
+<td>✔</td>
618
+<td>✔ †</td>
619
+<td>✔ †</td>
620
+</tr>
621
+</tbody>
622
+</table>
623
+<p>* If ordinary numeric measurements have been transformed to absolute
624
+deviations using <span style="font-family: 'Courier New', monospace;">subtractFromLocation</span>.<br>
625
+† If the value of <span style="font-family: 'Courier New', monospace;">kernel</span> is
626
+not <span style="font-family: 'Courier New', monospace;">“linear”</span>.</p>
627
+<p>If a desired selection or classification method is not already
628
+implemented, rules for writing functions to work with
629
+<strong>ClassifyR</strong> are outlined in the wrapper vignette. Please
630
+visit it for more information.</p>
631
+</div>
632
+<div class="section level4">
633
+<h4 id="provided-meta-feature-methods">Provided Meta-feature Methods<a class="anchor" aria-label="anchor" href="#provided-meta-feature-methods"></a>
634
+</h4>
635
+<p>A number of methods are provided for users to enable classification
636
+in a feature-set-centric or interactor-centric way. The meta-feature
637
+creation functions should be used before cross-validation is done.</p>
638
+<table class="table">
639
+<colgroup>
640
+<col width="9%">
641
+<col width="61%">
642
+<col width="14%">
643
+<col width="14%">
644
+</colgroup>
645
+<thead><tr class="header">
646
+<th>Function</th>
647
+<th>Description</th>
648
+<th align="center">Before CV</th>
649
+<th align="center">During CV</th>
650
+</tr></thead>
651
+<tbody>
652
+<tr class="odd">
653
+<td><span style="font-family: 'Courier New', monospace;">edgesToHubNetworks</span></td>
654
+<td>Takes a two-column <span style="font-family: 'Courier New', monospace;">matrix</span> or
655
+<span style="font-family: 'Courier New', monospace;">DataFrame</span>
656
+and finds all nodes with at least a minimum number of interactions</td>
657
+<td align="center">✔</td>
658
+<td align="center"></td>
659
+</tr>
660
+<tr class="even">
661
+<td><span style="font-family: 'Courier New', monospace;">featureSetSummary</span></td>
662
+<td><span style="white-space: nowrap">Considers sets of features and
663
+calculates their mean or median</span></td>
664
+<td align="center">✔</td>
665
+<td align="center"></td>
666
+</tr>
667
+<tr class="odd">
668
+<td><span style="font-family: 'Courier New', monospace;">pairsDifferencesSelection</span></td>
669
+<td>Finds a set of pairs of features whose measurement inequalities can
670
+be used for predicting with</td>
671
+<td align="center"></td>
672
+<td align="center">✔</td>
673
+</tr>
674
+<tr class="even">
675
+<td><span style="font-family: 'Courier New', monospace;">kTSPclassifier</span></td>
676
+<td>Voting classifier that uses inequalities between pairs of features
677
+to vote for one of two classes</td>
678
+<td align="center"></td>
679
+<td align="center">✔</td>
680
+</tr>
681
+</tbody>
682
+</table>
683
+</div>
684
+</div>
685
+<div class="section level3">
686
+<h3 id="fine-grained-cross-validation-and-modelling-using-runtests">Fine-grained Cross-validation and Modelling Using
687
+<em>runTests</em><a class="anchor" aria-label="anchor" href="#fine-grained-cross-validation-and-modelling-using-runtests"></a>
688
+</h3>