Browse code

- Elastic net GLM and ordinary GLM wrappers now automatically determine sample weights to give better performance on unbalanced classification tasks. - precisionPathwaysTrain function implemented. Prediction companion function to do in the near future.

Dario Strbenac authored on 16/02/2023 03:20:54
Showing 9 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.3.13
7
-Date: 2023-02-13
6
+Version: 3.3.14
7
+Date: 2023-02-16
8 8
 Authors@R:
9 9
     c(
10 10
     person(given = "Dario", family = "Strbenac", email = "dario.strbenac@sydney.edu.au", role = c("aut", "cre")),
... ...
@@ -72,7 +72,7 @@ Collate:
72 72
     'interfaceXGB.R'
73 73
     'performancePlot.R'
74 74
     'plotFeatureClasses.R'
75
-    'precisionPathway.R'
75
+    'precisionPathways.R'
76 76
     'prepareData.R'
77 77
     'previousSelection.R'
78 78
     'previousTrained.R'
... ...
@@ -76,7 +76,8 @@ exportMethods(models)
76 76
 exportMethods(performance)
77 77
 exportMethods(performancePlot)
78 78
 exportMethods(plotFeatureClasses)
79
-exportMethods(precisionPathwayTrain)
79
+exportMethods(precisionPathwaysPredict)
80
+exportMethods(precisionPathwaysTrain)
80 81
 exportMethods(predictions)
81 82
 exportMethods(prepareData)
82 83
 exportMethods(rankingPlot)
... ...
@@ -23,7 +23,7 @@
23 23
 #' parameters which will be passed into the data cleaning function. The names of the list must be one of \code{"prepare"},
24 24
 #' \code{"select"}, \code{"train"}, \code{"predict"}. To remove one of the defaults (see the article titled Parameter Tuning Presets for crossValidate and Their Customisation on
25 25
 #' the website), specify the list element to be \code{NULL}. For the valid element names in the \code{"prepare"} list, see \code{?prepareData}.
26
-#' @param clinicalPredictors If \code{measurements} is a \code{MultiAssayExperiment},
26
+#' @param clinicalPredictors Default: \code{NULL}. If \code{measurements} is a \code{MultiAssayExperiment},
27 27
 #' a character vector of features to use in modelling. This allows avoidance of things like sample IDs,
28 28
 #' sample acquisition dates, etc. which are not relevant for outcome prediction.
29 29
 #' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons
... ...
@@ -92,6 +92,7 @@
92 92
 #' # performancePlot(c(result, resultMerge))
93 93
 #' 
94 94
 #' @importFrom survival Surv
95
+#' @usage NULL
95 96
 setGeneric("crossValidate", function(measurements, outcome, ...)
96 97
     standardGeneric("crossValidate"))
97 98
 
... ...
@@ -116,7 +117,7 @@ setMethod("crossValidate", "DataFrame",
116 117
               # Check that data is in the right format, if not already done for MultiAssayExperiment input.
117 118
               if(!"assay" %in% colnames(S4Vectors::mcols(measurements))) # Assay is put there by prepareData for MultiAssayExperiment, skip if present. 
118 119
               {
119
-                prepParams <- list(measurements, outcome, clinicalPredictors)
120
+                prepParams <- list(measurements, outcome)
120 121
                 if("prepare" %in% names(extraParams))
121 122
                   prepParams <- c(prepParams, extraParams[["prepare"]])
122 123
                 measurementsAndOutcome <- do.call(prepareData, prepParams)
... ...
@@ -9,12 +9,12 @@ elasticNetGLMtrainInterface <- function(measurementsTrain, classesTrain, lambda
9 9
 
10 10
   # One-hot encoding needed.    
11 11
   measurementsTrain <- MatrixModels::model.Matrix(~ 0 + ., data = measurementsTrain)
12
-  fitted <- glmnet::glmnet(measurementsTrain, classesTrain, family = "multinomial", ...)
12
+  fitted <- glmnet::glmnet(measurementsTrain, classesTrain, family = "multinomial", weights = as.numeric(1 / (table(classesTrain)[classesTrain] / length(classesTrain))), ...)
13
+  # Inverse class size weighting needed to give decent predictions when class imbalance.
13 14
   
14
-
15 15
   if(is.null(lambda)) # fitted has numerous models for automatically chosen lambda values.
16 16
   { # Pick one lambda based on resubstitution performance. But not the one that makes all variables excluded from model.
17
-      lambdaConsider <- colSums(as.matrix(fitted[["beta"]][[1]])) != 0
17
+    lambdaConsider <- colSums(as.matrix(fitted[["beta"]][[1]])) != 0
18 18
     bestLambda <- fitted[["lambda"]][lambdaConsider][which.min(sapply(fitted[["lambda"]][lambdaConsider], function(lambda) # Largest Lambda with minimum balanced error rate.
19 19
     {
20 20
       classPredictions <- factor(as.character(predict(fitted, measurementsTrain, s = lambda, type = "class")), levels = fitted[["classnames"]])
... ...
@@ -9,7 +9,7 @@ GLMtrainInterface <- function(measurementsTrain, classesTrain, ..., verbose = 3)
9 9
         fitData <- cbind(measurementsTrain, class = classesTrain)
10 10
         classesTrain <- "class" # Column name for glm fit.
11 11
     } else {fitData <- measurementsTrain}
12
-  glm(class ~ . + 0, family = binomial, data = fitData, ...)
12
+  glm(class ~ . + 0, family = binomial, data = fitData, weights = as.numeric(1 / (table(classesTrain)[classesTrain] / length(classesTrain))), ...)
13 13
 }
14 14
 attr(GLMtrainInterface, "name") <- "GLMtrainInterface"
15 15
 
16 16
deleted file mode 100644
... ...
@@ -1,50 +0,0 @@
1
-# Basically, an ordered list of cross-validations.
2
-
3
-setGeneric("precisionPathwayTrain", function(measurements, class, ...)
4
-    standardGeneric("precisionPathwayTrain"))
5
-
6
-#' @rdname precisionPathwayTrain
7
-#' @export
8
-setMethod("precisionPathwayTrain", "MultiAssayExperimentOrList", 
9
-          function(measurements, class, clinicalPredictors = NULL, maxMissingProp = 0.0, topNvariance = NULL,
10
-                   fixedAssays = "clinical", confidenceCutoff = 0.8, minAssaySamples = 10,
11
-                   nFeatures = 20, selectionMethod = setNames(c("none", rep("t-test", length(measurements))), c("clinical", names(measurements))),
12
-                   classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))),
13
-                   nFolds = 5, nRepeats = 20, nCores = 1)
14
-          {
15
-            if(is.list(measurements)) # Ensure plain list has clinical data.
16
-            {
17
-              # One of the tables must be named "clinical".
18
-              if (!any(names(measurements) == "clinical"))
19
-                stop("One of the tables must be named \"clinical\".")
20
-            }
21
-            prepArgs <- list(measurements, outcomeColumns = class, clinicalPredictors = clinicalPredictors,
22
-                             maxMissingProp = maxMissingProp, topNvariance = topNvariance)
23
-            measurementsAndClass <- do.call(prepareData, prepArgs)
24
-              
25
-            .precisionPathwayTrain(measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]],
26
-                                   fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff,
27
-                                   minAssaySamples = minAssaySamples, nFeatures = nFeatures,
28
-                                   selectionMethod = selectionMethod, classifier = classifier,
29
-                                   nFolds = nFolds, nRepeats = nRepeats, nCores = nCores)
30
-          })
31
-
32
-# Internal method which carries out all of the processing, obtaining reformatted data from the
33
-# MultiAssayExperiment and list (of basic rectangular tables) S4 methods.
34
-.precisionPathwayTrain <- function(measurements, class, fixedAssays = "clinical",
35
-                   confidenceCutoff = 0.8, minAssaySamples = 10,
36
-                   nFeatures = 20, selectionMethod = setNames(c(NULL, rep("t-test", length(measurements))), c("clinical", names(measurements))),
37
-                   classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))),
38
-                   nFolds = 5, nRepeats = 20, nCores = 1, ...)
39
-          {
40
-            
41
-            # Step 1: Determine all valid permutations of assays, taking into account the
42
-            # assays to be used and which assays, if any, must be included.
43
-            assayIDs <- unique(S4Vectors::mcols(measurements)[["assay"]])
44
-            assaysPermutations <- .permutations(assayIDs, fixed = data.frame(seq_along(fixedAssays), fixedAssays))
45
-            
46
-            # Step 2: Build a classifier for each assay using all of the samples.
47
-            modelsList <- crossValidate(measurements, class, nFeatures, selectionMethod,
48
-                                        classifier = classifier, nFolds = nFolds,
49
-                                        nRepeats = nRepeats, nCores = nCores)
50
-        }
51 0
\ No newline at end of file
52 1
new file mode 100644
... ...
@@ -0,0 +1,180 @@
1
+#' Precision Pathways for Sample Prediction Based on Prediction Confidence.
2
+#' 
3
+#' Precision pathways allows the evaluation of various permutations of multiomics or multiview data.
4
+#' Samples are predicted by a particular assay if they were consistently predicted as a particular class
5
+#' during cross-validation. Otherwise, they are passed onto subsequent assays/tiers for prediction. Balanced accuracy
6
+#' is used to evaluate overall prediction performance and sample-specific accuracy for individual-level evaluation.
7
+#'
8
+#' @param measurements Either a \code{\link{MultiAssayExperiment}} or a list of the basic tabular objects containing the data.
9
+#' @param class Same as \code{measurements} but only training samples. IF \code{measurements} is a \code{list}, may also be
10
+#' a vector of classes.
11
+#' @param clinicalPredictors Default: \code{NULL}. Must be a character vector of clinical features to use in modelling. This allows avoidance of things like sample IDs,
12
+#' sample acquisition dates, etc. which are not relevant for outcome prediction.
13
+#' @param maxMissingProp Default: 0.0. A proportion less than 1 which is the maximum
14
+#' tolerated proportion of missingness for a feature to be retained for modelling.
15
+#' @param topNvariance Default: NULL. An integer number of most variable features per assay to subset to.
16
+#' Assays with less features won't be reduced in size.
17
+#' @param fixedAssays A character vector of assay names specifying any assays which must be at the
18
+#' beginning of the pathway.
19
+#' @param confidenceCutoff The minimum confidence of predictions for a sample to be predicted by a particular issue
20
+#' . If a sample was predicted to belong to a particular class a proportion \eqn{p} times, then the confidence is \eqn{2 \times |p - 0.5|}.
21
+#' @param minAssaySamples An integer specifying the minimum number of samples a tier may have. If a subsequent tier
22
+#' would have less than this number of samples, the samples are incorporated into the current tier.
23
+#' @param nFeatures Default: 20. The number of features to consider during feature selection, if feature selection is done.
24
+#' @param selectionMethod A named character vector of feature selection methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.
25
+#' @param classifier A named character vector of modelling methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.
26
+#' @param nFolds A numeric specifying the number of folds to use for cross-validation.
27
+#' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation.
28
+#' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation.
29
+#' @param pathways A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set.
30
+#' @rdname precisionPathways
31
+#' @return An object of class \code{PrecisionPathways} which is basically a named list that other plotting and
32
+#' tabulating functions can use.
33
+#' @examples
34
+#' # To be determined.
35
+
36
+#' @usage NULL
37
+setGeneric("precisionPathwaysTrain", function(measurements, class, ...)
38
+    standardGeneric("precisionPathwaysTrain"))
39
+
40
+#' @rdname precisionPathways
41
+#' @export
42
+setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", 
43
+          function(measurements, class, clinicalPredictors = NULL, maxMissingProp = 0.0, topNvariance = NULL,
44
+                   fixedAssays = "clinical", confidenceCutoff = 0.8, minAssaySamples = 10,
45
+                   nFeatures = 20, selectionMethod = setNames(c("none", rep("t-test", length(measurements))), c("clinical", names(measurements))),
46
+                   classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))),
47
+                   nFolds = 5, nRepeats = 20, nCores = 1)
48
+          {
49
+            if(is.list(measurements)) # Ensure plain list has clinical data.
50
+            {
51
+              # One of the tables must be named "clinical".
52
+              if (!any(names(measurements) == "clinical"))
53
+                stop("One of the tables must be named \"clinical\".")
54
+            }
55
+            prepArgs <- list(measurements, outcomeColumns = class, clinicalPredictors = clinicalPredictors,
56
+                             maxMissingProp = maxMissingProp, topNvariance = topNvariance)
57
+            measurementsAndClass <- do.call(prepareData, prepArgs)
58
+              
59
+            .precisionPathwaysTrain(measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]],
60
+                                   fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff,
61
+                                   minAssaySamples = minAssaySamples, nFeatures = nFeatures,
62
+                                   selectionMethod = selectionMethod, classifier = classifier,
63
+                                   nFolds = nFolds, nRepeats = nRepeats, nCores = nCores)
64
+          })
65
+
66
+# Internal method which carries out all of the processing, obtaining reformatted data from the
67
+# MultiAssayExperiment and list (of basic rectangular tables) S4 methods.
68
+.precisionPathwaysTrain <- function(measurements, class, fixedAssays = "clinical",
69
+                   confidenceCutoff = 0.8, minAssaySamples = 10,
70
+                   nFeatures = 20, selectionMethod = setNames(c(NULL, rep("t-test", length(measurements))), c("clinical", names(measurements))),
71
+                   classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))),
72
+                   nFolds = 5, nRepeats = 20, nCores = 1)
73
+          {
74
+            # Step 1: Determine all valid permutations of assays, taking into account the
75
+            # assays to be used and which assays, if any, must be included.
76
+            assayIDs <- unique(S4Vectors::mcols(measurements)[["assay"]])
77
+            assaysPermutations <- .permutations(assayIDs, fixed = data.frame(seq_along(fixedAssays), fixedAssays))
78
+            permutationIDs <- apply(assaysPermutations, 2, function(permutation) paste(permutation, collapse = '-'))
79
+            
80
+            # Step 2: Build a classifier for each assay using all of the samples.
81
+            modelsList <- crossValidate(measurements, class, nFeatures, selectionMethod,
82
+                                        classifier = classifier, nFolds = nFolds,
83
+                                        nRepeats = nRepeats, nCores = nCores)
84
+            modelsList <- lapply(modelsList, calcCVperformance, "Sample Accuracy") # Add sample accuracy, which can be subset later.
85
+
86
+            # Step 3: Loop over each pathway and each assay in order to determine which samples are used at that level
87
+            # and which are passed onwards.
88
+            precisionPathways <- lapply(as.data.frame(assaysPermutations), function(permutation)
89
+            {
90
+              assaysProcessed <- character()
91
+              samplesUsed <- character()
92
+              individualsTableAll <- S4Vectors::DataFrame()
93
+              tierTableAll <- S4Vectors::DataFrame()
94
+              breakEarly = FALSE
95
+              for(assay in permutation)
96
+              {
97
+                # Step 3a: Identify all samples which are consistently predicted.
98
+                modelIndex <- match(assay, assayIDs)
99
+                allPredictions <- predictions(modelsList[[modelIndex]])
100
+                allSampleIDs <- sampleNames(modelsList[[modelIndex]])
101
+                predictionsSamplesCounts <- table(allPredictions[, "sample"], allPredictions[, "class"])
102
+                confidences <- 2 * abs(predictionsSamplesCounts[, 1] / rowSums(predictionsSamplesCounts) - 0.5)
103
+                sampleIDsUse <- names(confidences)[confidences > confidenceCutoff]
104
+                
105
+                # Check if too few samples left for next round. Include them in this round, if so.
106
+                remainingIDs <- setdiff(allSampleIDs, c(samplesUsed, sampleIDsUse))
107
+                if(length(remainingIDs) < minAssaySamples)
108
+                {
109
+                  sampleIDsUse <- c(sampleIDsUse, remainingIDs)
110
+                  breakEarly = TRUE
111
+                }
112
+                
113
+                predictionsSamplesCounts <- predictionsSamplesCounts[sampleIDsUse, ]
114
+                
115
+                # Step 3b: Individuals predictions and sample-wise accuracy, tier-wise error.
116
+                maxVotes <- apply(predictionsSamplesCounts, 1, function(sample) which.max(sample))
117
+                predictedClasses <- factor(colnames(predictionsSamplesCounts)[maxVotes],
118
+                                           levels = colnames(predictionsSamplesCounts))    
119
+                individualsTable <- S4Vectors::DataFrame(Tier = assay,
120
+                                                         `Sample ID` = sampleIDsUse,
121
+                                                         `Predicted` = predictedClasses,
122
+                                                         `Accuracy` = performance(modelsList[[modelIndex]])[["Sample Accuracy"]][sampleIDsUse],
123
+                                                         check.names = FALSE)
124
+                knownClasses <- actualOutcome(modelsList[[modelIndex]])[match(sampleIDsUse, allSampleIDs)]
125
+                balancedAccuracy <- calcExternalPerformance(knownClasses, predictedClasses)
126
+                tierTable <- S4Vectors::DataFrame(Tier = assay,
127
+                                                  `Balanced Accuracy` = balancedAccuracy, check.names = FALSE)
128
+                
129
+                assaysProcessed <- c(assaysProcessed, assay)
130
+                individualsTableAll <- rbind(individualsTableAll, individualsTable)
131
+                tierTableAll <- rbind(tierTableAll, tierTable)
132
+                samplesUsed <- c(samplesUsed, sampleIDsUse)
133
+                
134
+                if(breakEarly == TRUE) break
135
+              }
136
+              pathwayString <- paste(assaysProcessed, collapse = '-')
137
+              parameters = list(confidenceCutoff = confidenceCutoff, minAssaySamples = minAssaySamples)
138
+              list(models = modelsList, parameters = parameters, pathway = pathwayString,
139
+                  individuals = individualsTableAll, tiers = tierTableAll)
140
+            })
141
+            
142
+            class(precisionPathways) <- "PrecisionPathways"
143
+            names(precisionPathways) <- sapply(precisionPathways, "[[", "pathway")
144
+            precisionPathways
145
+}
146
+
147
+# A nice print method to avoid flooding the screen with lots of tables
148
+# when result is shown in console.
149
+print.PrecisionPathways <- function(x)
150
+{
151
+  cat("An object of class 'PrecisionPathways'.\n")
152
+  cat("Pathways:\n")
153
+  cat(paste(names(x), collapse = '\n'))
154
+}
155
+
156
+#' @usage NULL
157
+setGeneric("precisionPathwaysPredict", function(pathways, measurements, class, ...)
158
+    standardGeneric("precisionPathwaysPredict"))
159
+
160
+#' @rdname precisionPathways
161
+#' @export
162
+setMethod("precisionPathwaysPredict", "MultiAssayExperimentOrList", 
163
+          function(pathways, measurements, class)
164
+          {
165
+            if(is.list(measurements)) # Ensure plain list has clinical data.
166
+            {
167
+              # One of the tables must be named "clinical".
168
+              if (!any(names(measurements) == "clinical"))
169
+                stop("One of the tables must be named \"clinical\".")
170
+            }
171
+            prepArgs <- list(measurements, outcomeColumns = class)
172
+            measurementsAndClass <- do.call(prepareData, prepArgs)
173
+              
174
+            .precisionPathwaysPredict(pathways, measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]])
175
+          })
176
+
177
+.precisionPathwaysPredict <- function(pathways, measurements, class)
178
+{
179
+  # To do.
180
+}
0 181
\ No newline at end of file
... ...
@@ -15,8 +15,6 @@
15 15
 \alias{predict.trainedByClassifyR}
16 16
 \title{Cross-validation to evaluate classification performance.}
17 17
 \usage{
18
-crossValidate(measurements, outcome, ...)
19
-
20 18
 \S4method{crossValidate}{DataFrame}(
21 19
   measurements,
22 20
   outcome,
... ...
@@ -158,7 +156,7 @@ parameters which will be passed into the data cleaning function. The names of th
158 156
 \code{"select"}, \code{"train"}, \code{"predict"}. To remove one of the defaults (see the article titled Parameter Tuning Presets for crossValidate and Their Customisation on
159 157
 the website), specify the list element to be \code{NULL}. For the valid element names in the \code{"prepare"} list, see \code{?prepareData}.}
160 158
 
161
-\item{clinicalPredictors}{If \code{measurements} is a \code{MultiAssayExperiment},
159
+\item{clinicalPredictors}{Default: \code{NULL}. If \code{measurements} is a \code{MultiAssayExperiment},
162 160
 a character vector of features to use in modelling. This allows avoidance of things like sample IDs,
163 161
 sample acquisition dates, etc. which are not relevant for outcome prediction.}
164 162
 
165 163
new file mode 100644
... ...
@@ -0,0 +1,80 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/precisionPathways.R
3
+\name{precisionPathwaysTrain}
4
+\alias{precisionPathwaysTrain}
5
+\alias{precisionPathwaysTrain,MultiAssayExperimentOrList-method}
6
+\alias{precisionPathwaysPredict,MultiAssayExperimentOrList-method}
7
+\title{Precision Pathways for Sample Prediction Based on Prediction Confidence.}
8
+\usage{
9
+\S4method{precisionPathwaysTrain}{MultiAssayExperimentOrList}(
10
+  measurements,
11
+  class,
12
+  clinicalPredictors = NULL,
13
+  maxMissingProp = 0,
14
+  topNvariance = NULL,
15
+  fixedAssays = "clinical",
16
+  confidenceCutoff = 0.8,
17
+  minAssaySamples = 10,
18
+  nFeatures = 20,
19
+  selectionMethod = setNames(c("none", rep("t-test", length(measurements))),
20
+    c("clinical", names(measurements))),
21
+  classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))),
22
+    c("clinical", names(measurements))),
23
+  nFolds = 5,
24
+  nRepeats = 20,
25
+  nCores = 1
26
+)
27
+
28
+\S4method{precisionPathwaysPredict}{MultiAssayExperimentOrList}(pathways, measurements, class)
29
+}
30
+\arguments{
31
+\item{measurements}{Either a \code{\link{MultiAssayExperiment}} or a list of the basic tabular objects containing the data.}
32
+
33
+\item{class}{Same as \code{measurements} but only training samples. IF \code{measurements} is a \code{list}, may also be
34
+a vector of classes.}
35
+
36
+\item{clinicalPredictors}{Default: \code{NULL}. Must be a character vector of clinical features to use in modelling. This allows avoidance of things like sample IDs,
37
+sample acquisition dates, etc. which are not relevant for outcome prediction.}
38
+
39
+\item{maxMissingProp}{Default: 0.0. A proportion less than 1 which is the maximum
40
+tolerated proportion of missingness for a feature to be retained for modelling.}
41
+
42
+\item{topNvariance}{Default: NULL. An integer number of most variable features per assay to subset to.
43
+Assays with less features won't be reduced in size.}
44
+
45
+\item{fixedAssays}{A character vector of assay names specifying any assays which must be at the
46
+beginning of the pathway.}
47
+
48
+\item{confidenceCutoff}{The minimum confidence of predictions for a sample to be predicted by a particular issue
49
+. If a sample was predicted to belong to a particular class a proportion \eqn{p} times, then the confidence is \eqn{2 \times |p - 0.5|}.}
50
+
51
+\item{minAssaySamples}{An integer specifying the minimum number of samples a tier may have. If a subsequent tier
52
+would have less than this number of samples, the samples are incorporated into the current tier.}
53
+
54
+\item{nFeatures}{Default: 20. The number of features to consider during feature selection, if feature selection is done.}
55
+
56
+\item{selectionMethod}{A named character vector of feature selection methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.}
57
+
58
+\item{classifier}{A named character vector of modelling methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.}
59
+
60
+\item{nFolds}{A numeric specifying the number of folds to use for cross-validation.}
61
+
62
+\item{nRepeats}{A numeric specifying the the number of repeats or permutations to use for cross-validation.}
63
+
64
+\item{nCores}{A numeric specifying the number of cores used if the user wants to use parallelisation.}
65
+
66
+\item{pathways}{A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set.}
67
+}
68
+\value{
69
+An object of class \code{PrecisionPathways} which is basically a named list that other plotting and
70
+tabulating functions can use.
71
+}
72
+\description{
73
+Precision pathways allows the evaluation of various permutations of multiomics or multiview data.
74
+Samples are predicted by a particular assay if they were consistently predicted as a particular class
75
+during cross-validation. Otherwise, they are passed onto subsequent assays/tiers for prediction. Balanced accuracy
76
+is used to evaluate overall prediction performance and sample-specific accuracy for individual-level evaluation.
77
+}
78
+\examples{
79
+# To be determined.
80
+}