... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.3.13 |
|
7 |
-Date: 2023-02-13 |
|
6 |
+Version: 3.3.14 |
|
7 |
+Date: 2023-02-16 |
|
8 | 8 |
Authors@R: |
9 | 9 |
c( |
10 | 10 |
person(given = "Dario", family = "Strbenac", email = "dario.strbenac@sydney.edu.au", role = c("aut", "cre")), |
... | ... |
@@ -72,7 +72,7 @@ Collate: |
72 | 72 |
'interfaceXGB.R' |
73 | 73 |
'performancePlot.R' |
74 | 74 |
'plotFeatureClasses.R' |
75 |
- 'precisionPathway.R' |
|
75 |
+ 'precisionPathways.R' |
|
76 | 76 |
'prepareData.R' |
77 | 77 |
'previousSelection.R' |
78 | 78 |
'previousTrained.R' |
... | ... |
@@ -76,7 +76,8 @@ exportMethods(models) |
76 | 76 |
exportMethods(performance) |
77 | 77 |
exportMethods(performancePlot) |
78 | 78 |
exportMethods(plotFeatureClasses) |
79 |
-exportMethods(precisionPathwayTrain) |
|
79 |
+exportMethods(precisionPathwaysPredict) |
|
80 |
+exportMethods(precisionPathwaysTrain) |
|
80 | 81 |
exportMethods(predictions) |
81 | 82 |
exportMethods(prepareData) |
82 | 83 |
exportMethods(rankingPlot) |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
#' parameters which will be passed into the data cleaning function. The names of the list must be one of \code{"prepare"}, |
24 | 24 |
#' \code{"select"}, \code{"train"}, \code{"predict"}. To remove one of the defaults (see the article titled Parameter Tuning Presets for crossValidate and Their Customisation on |
25 | 25 |
#' the website), specify the list element to be \code{NULL}. For the valid element names in the \code{"prepare"} list, see \code{?prepareData}. |
26 |
-#' @param clinicalPredictors If \code{measurements} is a \code{MultiAssayExperiment}, |
|
26 |
+#' @param clinicalPredictors Default: \code{NULL}. If \code{measurements} is a \code{MultiAssayExperiment}, |
|
27 | 27 |
#' a character vector of features to use in modelling. This allows avoidance of things like sample IDs, |
28 | 28 |
#' sample acquisition dates, etc. which are not relevant for outcome prediction. |
29 | 29 |
#' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons |
... | ... |
@@ -92,6 +92,7 @@ |
92 | 92 |
#' # performancePlot(c(result, resultMerge)) |
93 | 93 |
#' |
94 | 94 |
#' @importFrom survival Surv |
95 |
+#' @usage NULL |
|
95 | 96 |
setGeneric("crossValidate", function(measurements, outcome, ...) |
96 | 97 |
standardGeneric("crossValidate")) |
97 | 98 |
|
... | ... |
@@ -116,7 +117,7 @@ setMethod("crossValidate", "DataFrame", |
116 | 117 |
# Check that data is in the right format, if not already done for MultiAssayExperiment input. |
117 | 118 |
if(!"assay" %in% colnames(S4Vectors::mcols(measurements))) # Assay is put there by prepareData for MultiAssayExperiment, skip if present. |
118 | 119 |
{ |
119 |
- prepParams <- list(measurements, outcome, clinicalPredictors) |
|
120 |
+ prepParams <- list(measurements, outcome) |
|
120 | 121 |
if("prepare" %in% names(extraParams)) |
121 | 122 |
prepParams <- c(prepParams, extraParams[["prepare"]]) |
122 | 123 |
measurementsAndOutcome <- do.call(prepareData, prepParams) |
... | ... |
@@ -9,12 +9,12 @@ elasticNetGLMtrainInterface <- function(measurementsTrain, classesTrain, lambda |
9 | 9 |
|
10 | 10 |
# One-hot encoding needed. |
11 | 11 |
measurementsTrain <- MatrixModels::model.Matrix(~ 0 + ., data = measurementsTrain) |
12 |
- fitted <- glmnet::glmnet(measurementsTrain, classesTrain, family = "multinomial", ...) |
|
12 |
+ fitted <- glmnet::glmnet(measurementsTrain, classesTrain, family = "multinomial", weights = as.numeric(1 / (table(classesTrain)[classesTrain] / length(classesTrain))), ...) |
|
13 |
+ # Inverse class size weighting needed to give decent predictions when class imbalance. |
|
13 | 14 |
|
14 |
- |
|
15 | 15 |
if(is.null(lambda)) # fitted has numerous models for automatically chosen lambda values. |
16 | 16 |
{ # Pick one lambda based on resubstitution performance. But not the one that makes all variables excluded from model. |
17 |
- lambdaConsider <- colSums(as.matrix(fitted[["beta"]][[1]])) != 0 |
|
17 |
+ lambdaConsider <- colSums(as.matrix(fitted[["beta"]][[1]])) != 0 |
|
18 | 18 |
bestLambda <- fitted[["lambda"]][lambdaConsider][which.min(sapply(fitted[["lambda"]][lambdaConsider], function(lambda) # Largest Lambda with minimum balanced error rate. |
19 | 19 |
{ |
20 | 20 |
classPredictions <- factor(as.character(predict(fitted, measurementsTrain, s = lambda, type = "class")), levels = fitted[["classnames"]]) |
... | ... |
@@ -9,7 +9,7 @@ GLMtrainInterface <- function(measurementsTrain, classesTrain, ..., verbose = 3) |
9 | 9 |
fitData <- cbind(measurementsTrain, class = classesTrain) |
10 | 10 |
classesTrain <- "class" # Column name for glm fit. |
11 | 11 |
} else {fitData <- measurementsTrain} |
12 |
- glm(class ~ . + 0, family = binomial, data = fitData, ...) |
|
12 |
+ glm(class ~ . + 0, family = binomial, data = fitData, weights = as.numeric(1 / (table(classesTrain)[classesTrain] / length(classesTrain))), ...) |
|
13 | 13 |
} |
14 | 14 |
attr(GLMtrainInterface, "name") <- "GLMtrainInterface" |
15 | 15 |
|
16 | 16 |
deleted file mode 100644 |
... | ... |
@@ -1,50 +0,0 @@ |
1 |
-# Basically, an ordered list of cross-validations. |
|
2 |
- |
|
3 |
-setGeneric("precisionPathwayTrain", function(measurements, class, ...) |
|
4 |
- standardGeneric("precisionPathwayTrain")) |
|
5 |
- |
|
6 |
-#' @rdname precisionPathwayTrain |
|
7 |
-#' @export |
|
8 |
-setMethod("precisionPathwayTrain", "MultiAssayExperimentOrList", |
|
9 |
- function(measurements, class, clinicalPredictors = NULL, maxMissingProp = 0.0, topNvariance = NULL, |
|
10 |
- fixedAssays = "clinical", confidenceCutoff = 0.8, minAssaySamples = 10, |
|
11 |
- nFeatures = 20, selectionMethod = setNames(c("none", rep("t-test", length(measurements))), c("clinical", names(measurements))), |
|
12 |
- classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))), |
|
13 |
- nFolds = 5, nRepeats = 20, nCores = 1) |
|
14 |
- { |
|
15 |
- if(is.list(measurements)) # Ensure plain list has clinical data. |
|
16 |
- { |
|
17 |
- # One of the tables must be named "clinical". |
|
18 |
- if (!any(names(measurements) == "clinical")) |
|
19 |
- stop("One of the tables must be named \"clinical\".") |
|
20 |
- } |
|
21 |
- prepArgs <- list(measurements, outcomeColumns = class, clinicalPredictors = clinicalPredictors, |
|
22 |
- maxMissingProp = maxMissingProp, topNvariance = topNvariance) |
|
23 |
- measurementsAndClass <- do.call(prepareData, prepArgs) |
|
24 |
- |
|
25 |
- .precisionPathwayTrain(measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]], |
|
26 |
- fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff, |
|
27 |
- minAssaySamples = minAssaySamples, nFeatures = nFeatures, |
|
28 |
- selectionMethod = selectionMethod, classifier = classifier, |
|
29 |
- nFolds = nFolds, nRepeats = nRepeats, nCores = nCores) |
|
30 |
- }) |
|
31 |
- |
|
32 |
-# Internal method which carries out all of the processing, obtaining reformatted data from the |
|
33 |
-# MultiAssayExperiment and list (of basic rectangular tables) S4 methods. |
|
34 |
-.precisionPathwayTrain <- function(measurements, class, fixedAssays = "clinical", |
|
35 |
- confidenceCutoff = 0.8, minAssaySamples = 10, |
|
36 |
- nFeatures = 20, selectionMethod = setNames(c(NULL, rep("t-test", length(measurements))), c("clinical", names(measurements))), |
|
37 |
- classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))), |
|
38 |
- nFolds = 5, nRepeats = 20, nCores = 1, ...) |
|
39 |
- { |
|
40 |
- |
|
41 |
- # Step 1: Determine all valid permutations of assays, taking into account the |
|
42 |
- # assays to be used and which assays, if any, must be included. |
|
43 |
- assayIDs <- unique(S4Vectors::mcols(measurements)[["assay"]]) |
|
44 |
- assaysPermutations <- .permutations(assayIDs, fixed = data.frame(seq_along(fixedAssays), fixedAssays)) |
|
45 |
- |
|
46 |
- # Step 2: Build a classifier for each assay using all of the samples. |
|
47 |
- modelsList <- crossValidate(measurements, class, nFeatures, selectionMethod, |
|
48 |
- classifier = classifier, nFolds = nFolds, |
|
49 |
- nRepeats = nRepeats, nCores = nCores) |
|
50 |
- } |
|
51 | 0 |
\ No newline at end of file |
52 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,180 @@ |
1 |
+#' Precision Pathways for Sample Prediction Based on Prediction Confidence. |
|
2 |
+#' |
|
3 |
+#' Precision pathways allows the evaluation of various permutations of multiomics or multiview data. |
|
4 |
+#' Samples are predicted by a particular assay if they were consistently predicted as a particular class |
|
5 |
+#' during cross-validation. Otherwise, they are passed onto subsequent assays/tiers for prediction. Balanced accuracy |
|
6 |
+#' is used to evaluate overall prediction performance and sample-specific accuracy for individual-level evaluation. |
|
7 |
+#' |
|
8 |
+#' @param measurements Either a \code{\link{MultiAssayExperiment}} or a list of the basic tabular objects containing the data. |
|
9 |
+#' @param class Same as \code{measurements} but only training samples. IF \code{measurements} is a \code{list}, may also be |
|
10 |
+#' a vector of classes. |
|
11 |
+#' @param clinicalPredictors Default: \code{NULL}. Must be a character vector of clinical features to use in modelling. This allows avoidance of things like sample IDs, |
|
12 |
+#' sample acquisition dates, etc. which are not relevant for outcome prediction. |
|
13 |
+#' @param maxMissingProp Default: 0.0. A proportion less than 1 which is the maximum |
|
14 |
+#' tolerated proportion of missingness for a feature to be retained for modelling. |
|
15 |
+#' @param topNvariance Default: NULL. An integer number of most variable features per assay to subset to. |
|
16 |
+#' Assays with less features won't be reduced in size. |
|
17 |
+#' @param fixedAssays A character vector of assay names specifying any assays which must be at the |
|
18 |
+#' beginning of the pathway. |
|
19 |
+#' @param confidenceCutoff The minimum confidence of predictions for a sample to be predicted by a particular issue |
|
20 |
+#' . If a sample was predicted to belong to a particular class a proportion \eqn{p} times, then the confidence is \eqn{2 \times |p - 0.5|}. |
|
21 |
+#' @param minAssaySamples An integer specifying the minimum number of samples a tier may have. If a subsequent tier |
|
22 |
+#' would have less than this number of samples, the samples are incorporated into the current tier. |
|
23 |
+#' @param nFeatures Default: 20. The number of features to consider during feature selection, if feature selection is done. |
|
24 |
+#' @param selectionMethod A named character vector of feature selection methods to use for the assays, one for each. The names must correspond to names of \code{measurements}. |
|
25 |
+#' @param classifier A named character vector of modelling methods to use for the assays, one for each. The names must correspond to names of \code{measurements}. |
|
26 |
+#' @param nFolds A numeric specifying the number of folds to use for cross-validation. |
|
27 |
+#' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation. |
|
28 |
+#' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation. |
|
29 |
+#' @param pathways A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set. |
|
30 |
+#' @rdname precisionPathways |
|
31 |
+#' @return An object of class \code{PrecisionPathways} which is basically a named list that other plotting and |
|
32 |
+#' tabulating functions can use. |
|
33 |
+#' @examples |
|
34 |
+#' # To be determined. |
|
35 |
+ |
|
36 |
+#' @usage NULL |
|
37 |
+setGeneric("precisionPathwaysTrain", function(measurements, class, ...) |
|
38 |
+ standardGeneric("precisionPathwaysTrain")) |
|
39 |
+ |
|
40 |
+#' @rdname precisionPathways |
|
41 |
+#' @export |
|
42 |
+setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", |
|
43 |
+ function(measurements, class, clinicalPredictors = NULL, maxMissingProp = 0.0, topNvariance = NULL, |
|
44 |
+ fixedAssays = "clinical", confidenceCutoff = 0.8, minAssaySamples = 10, |
|
45 |
+ nFeatures = 20, selectionMethod = setNames(c("none", rep("t-test", length(measurements))), c("clinical", names(measurements))), |
|
46 |
+ classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))), |
|
47 |
+ nFolds = 5, nRepeats = 20, nCores = 1) |
|
48 |
+ { |
|
49 |
+ if(is.list(measurements)) # Ensure plain list has clinical data. |
|
50 |
+ { |
|
51 |
+ # One of the tables must be named "clinical". |
|
52 |
+ if (!any(names(measurements) == "clinical")) |
|
53 |
+ stop("One of the tables must be named \"clinical\".") |
|
54 |
+ } |
|
55 |
+ prepArgs <- list(measurements, outcomeColumns = class, clinicalPredictors = clinicalPredictors, |
|
56 |
+ maxMissingProp = maxMissingProp, topNvariance = topNvariance) |
|
57 |
+ measurementsAndClass <- do.call(prepareData, prepArgs) |
|
58 |
+ |
|
59 |
+ .precisionPathwaysTrain(measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]], |
|
60 |
+ fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff, |
|
61 |
+ minAssaySamples = minAssaySamples, nFeatures = nFeatures, |
|
62 |
+ selectionMethod = selectionMethod, classifier = classifier, |
|
63 |
+ nFolds = nFolds, nRepeats = nRepeats, nCores = nCores) |
|
64 |
+ }) |
|
65 |
+ |
|
66 |
+# Internal method which carries out all of the processing, obtaining reformatted data from the |
|
67 |
+# MultiAssayExperiment and list (of basic rectangular tables) S4 methods. |
|
68 |
+.precisionPathwaysTrain <- function(measurements, class, fixedAssays = "clinical", |
|
69 |
+ confidenceCutoff = 0.8, minAssaySamples = 10, |
|
70 |
+ nFeatures = 20, selectionMethod = setNames(c(NULL, rep("t-test", length(measurements))), c("clinical", names(measurements))), |
|
71 |
+ classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))), |
|
72 |
+ nFolds = 5, nRepeats = 20, nCores = 1) |
|
73 |
+ { |
|
74 |
+ # Step 1: Determine all valid permutations of assays, taking into account the |
|
75 |
+ # assays to be used and which assays, if any, must be included. |
|
76 |
+ assayIDs <- unique(S4Vectors::mcols(measurements)[["assay"]]) |
|
77 |
+ assaysPermutations <- .permutations(assayIDs, fixed = data.frame(seq_along(fixedAssays), fixedAssays)) |
|
78 |
+ permutationIDs <- apply(assaysPermutations, 2, function(permutation) paste(permutation, collapse = '-')) |
|
79 |
+ |
|
80 |
+ # Step 2: Build a classifier for each assay using all of the samples. |
|
81 |
+ modelsList <- crossValidate(measurements, class, nFeatures, selectionMethod, |
|
82 |
+ classifier = classifier, nFolds = nFolds, |
|
83 |
+ nRepeats = nRepeats, nCores = nCores) |
|
84 |
+ modelsList <- lapply(modelsList, calcCVperformance, "Sample Accuracy") # Add sample accuracy, which can be subset later. |
|
85 |
+ |
|
86 |
+ # Step 3: Loop over each pathway and each assay in order to determine which samples are used at that level |
|
87 |
+ # and which are passed onwards. |
|
88 |
+ precisionPathways <- lapply(as.data.frame(assaysPermutations), function(permutation) |
|
89 |
+ { |
|
90 |
+ assaysProcessed <- character() |
|
91 |
+ samplesUsed <- character() |
|
92 |
+ individualsTableAll <- S4Vectors::DataFrame() |
|
93 |
+ tierTableAll <- S4Vectors::DataFrame() |
|
94 |
+ breakEarly = FALSE |
|
95 |
+ for(assay in permutation) |
|
96 |
+ { |
|
97 |
+ # Step 3a: Identify all samples which are consistently predicted. |
|
98 |
+ modelIndex <- match(assay, assayIDs) |
|
99 |
+ allPredictions <- predictions(modelsList[[modelIndex]]) |
|
100 |
+ allSampleIDs <- sampleNames(modelsList[[modelIndex]]) |
|
101 |
+ predictionsSamplesCounts <- table(allPredictions[, "sample"], allPredictions[, "class"]) |
|
102 |
+ confidences <- 2 * abs(predictionsSamplesCounts[, 1] / rowSums(predictionsSamplesCounts) - 0.5) |
|
103 |
+ sampleIDsUse <- names(confidences)[confidences > confidenceCutoff] |
|
104 |
+ |
|
105 |
+ # Check if too few samples left for next round. Include them in this round, if so. |
|
106 |
+ remainingIDs <- setdiff(allSampleIDs, c(samplesUsed, sampleIDsUse)) |
|
107 |
+ if(length(remainingIDs) < minAssaySamples) |
|
108 |
+ { |
|
109 |
+ sampleIDsUse <- c(sampleIDsUse, remainingIDs) |
|
110 |
+ breakEarly = TRUE |
|
111 |
+ } |
|
112 |
+ |
|
113 |
+ predictionsSamplesCounts <- predictionsSamplesCounts[sampleIDsUse, ] |
|
114 |
+ |
|
115 |
+ # Step 3b: Individuals predictions and sample-wise accuracy, tier-wise error. |
|
116 |
+ maxVotes <- apply(predictionsSamplesCounts, 1, function(sample) which.max(sample)) |
|
117 |
+ predictedClasses <- factor(colnames(predictionsSamplesCounts)[maxVotes], |
|
118 |
+ levels = colnames(predictionsSamplesCounts)) |
|
119 |
+ individualsTable <- S4Vectors::DataFrame(Tier = assay, |
|
120 |
+ `Sample ID` = sampleIDsUse, |
|
121 |
+ `Predicted` = predictedClasses, |
|
122 |
+ `Accuracy` = performance(modelsList[[modelIndex]])[["Sample Accuracy"]][sampleIDsUse], |
|
123 |
+ check.names = FALSE) |
|
124 |
+ knownClasses <- actualOutcome(modelsList[[modelIndex]])[match(sampleIDsUse, allSampleIDs)] |
|
125 |
+ balancedAccuracy <- calcExternalPerformance(knownClasses, predictedClasses) |
|
126 |
+ tierTable <- S4Vectors::DataFrame(Tier = assay, |
|
127 |
+ `Balanced Accuracy` = balancedAccuracy, check.names = FALSE) |
|
128 |
+ |
|
129 |
+ assaysProcessed <- c(assaysProcessed, assay) |
|
130 |
+ individualsTableAll <- rbind(individualsTableAll, individualsTable) |
|
131 |
+ tierTableAll <- rbind(tierTableAll, tierTable) |
|
132 |
+ samplesUsed <- c(samplesUsed, sampleIDsUse) |
|
133 |
+ |
|
134 |
+ if(breakEarly == TRUE) break |
|
135 |
+ } |
|
136 |
+ pathwayString <- paste(assaysProcessed, collapse = '-') |
|
137 |
+ parameters = list(confidenceCutoff = confidenceCutoff, minAssaySamples = minAssaySamples) |
|
138 |
+ list(models = modelsList, parameters = parameters, pathway = pathwayString, |
|
139 |
+ individuals = individualsTableAll, tiers = tierTableAll) |
|
140 |
+ }) |
|
141 |
+ |
|
142 |
+ class(precisionPathways) <- "PrecisionPathways" |
|
143 |
+ names(precisionPathways) <- sapply(precisionPathways, "[[", "pathway") |
|
144 |
+ precisionPathways |
|
145 |
+} |
|
146 |
+ |
|
147 |
+# A nice print method to avoid flooding the screen with lots of tables |
|
148 |
+# when result is shown in console. |
|
149 |
+print.PrecisionPathways <- function(x) |
|
150 |
+{ |
|
151 |
+ cat("An object of class 'PrecisionPathways'.\n") |
|
152 |
+ cat("Pathways:\n") |
|
153 |
+ cat(paste(names(x), collapse = '\n')) |
|
154 |
+} |
|
155 |
+ |
|
156 |
+#' @usage NULL |
|
157 |
+setGeneric("precisionPathwaysPredict", function(pathways, measurements, class, ...) |
|
158 |
+ standardGeneric("precisionPathwaysPredict")) |
|
159 |
+ |
|
160 |
+#' @rdname precisionPathways |
|
161 |
+#' @export |
|
162 |
+setMethod("precisionPathwaysPredict", "MultiAssayExperimentOrList", |
|
163 |
+ function(pathways, measurements, class) |
|
164 |
+ { |
|
165 |
+ if(is.list(measurements)) # Ensure plain list has clinical data. |
|
166 |
+ { |
|
167 |
+ # One of the tables must be named "clinical". |
|
168 |
+ if (!any(names(measurements) == "clinical")) |
|
169 |
+ stop("One of the tables must be named \"clinical\".") |
|
170 |
+ } |
|
171 |
+ prepArgs <- list(measurements, outcomeColumns = class) |
|
172 |
+ measurementsAndClass <- do.call(prepareData, prepArgs) |
|
173 |
+ |
|
174 |
+ .precisionPathwaysPredict(pathways, measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]]) |
|
175 |
+ }) |
|
176 |
+ |
|
177 |
+.precisionPathwaysPredict <- function(pathways, measurements, class) |
|
178 |
+{ |
|
179 |
+ # To do. |
|
180 |
+} |
|
0 | 181 |
\ No newline at end of file |
... | ... |
@@ -15,8 +15,6 @@ |
15 | 15 |
\alias{predict.trainedByClassifyR} |
16 | 16 |
\title{Cross-validation to evaluate classification performance.} |
17 | 17 |
\usage{ |
18 |
-crossValidate(measurements, outcome, ...) |
|
19 |
- |
|
20 | 18 |
\S4method{crossValidate}{DataFrame}( |
21 | 19 |
measurements, |
22 | 20 |
outcome, |
... | ... |
@@ -158,7 +156,7 @@ parameters which will be passed into the data cleaning function. The names of th |
158 | 156 |
\code{"select"}, \code{"train"}, \code{"predict"}. To remove one of the defaults (see the article titled Parameter Tuning Presets for crossValidate and Their Customisation on |
159 | 157 |
the website), specify the list element to be \code{NULL}. For the valid element names in the \code{"prepare"} list, see \code{?prepareData}.} |
160 | 158 |
|
161 |
-\item{clinicalPredictors}{If \code{measurements} is a \code{MultiAssayExperiment}, |
|
159 |
+\item{clinicalPredictors}{Default: \code{NULL}. If \code{measurements} is a \code{MultiAssayExperiment}, |
|
162 | 160 |
a character vector of features to use in modelling. This allows avoidance of things like sample IDs, |
163 | 161 |
sample acquisition dates, etc. which are not relevant for outcome prediction.} |
164 | 162 |
|
165 | 163 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,80 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/precisionPathways.R |
|
3 |
+\name{precisionPathwaysTrain} |
|
4 |
+\alias{precisionPathwaysTrain} |
|
5 |
+\alias{precisionPathwaysTrain,MultiAssayExperimentOrList-method} |
|
6 |
+\alias{precisionPathwaysPredict,MultiAssayExperimentOrList-method} |
|
7 |
+\title{Precision Pathways for Sample Prediction Based on Prediction Confidence.} |
|
8 |
+\usage{ |
|
9 |
+\S4method{precisionPathwaysTrain}{MultiAssayExperimentOrList}( |
|
10 |
+ measurements, |
|
11 |
+ class, |
|
12 |
+ clinicalPredictors = NULL, |
|
13 |
+ maxMissingProp = 0, |
|
14 |
+ topNvariance = NULL, |
|
15 |
+ fixedAssays = "clinical", |
|
16 |
+ confidenceCutoff = 0.8, |
|
17 |
+ minAssaySamples = 10, |
|
18 |
+ nFeatures = 20, |
|
19 |
+ selectionMethod = setNames(c("none", rep("t-test", length(measurements))), |
|
20 |
+ c("clinical", names(measurements))), |
|
21 |
+ classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), |
|
22 |
+ c("clinical", names(measurements))), |
|
23 |
+ nFolds = 5, |
|
24 |
+ nRepeats = 20, |
|
25 |
+ nCores = 1 |
|
26 |
+) |
|
27 |
+ |
|
28 |
+\S4method{precisionPathwaysPredict}{MultiAssayExperimentOrList}(pathways, measurements, class) |
|
29 |
+} |
|
30 |
+\arguments{ |
|
31 |
+\item{measurements}{Either a \code{\link{MultiAssayExperiment}} or a list of the basic tabular objects containing the data.} |
|
32 |
+ |
|
33 |
+\item{class}{Same as \code{measurements} but only training samples. IF \code{measurements} is a \code{list}, may also be |
|
34 |
+a vector of classes.} |
|
35 |
+ |
|
36 |
+\item{clinicalPredictors}{Default: \code{NULL}. Must be a character vector of clinical features to use in modelling. This allows avoidance of things like sample IDs, |
|
37 |
+sample acquisition dates, etc. which are not relevant for outcome prediction.} |
|
38 |
+ |
|
39 |
+\item{maxMissingProp}{Default: 0.0. A proportion less than 1 which is the maximum |
|
40 |
+tolerated proportion of missingness for a feature to be retained for modelling.} |
|
41 |
+ |
|
42 |
+\item{topNvariance}{Default: NULL. An integer number of most variable features per assay to subset to. |
|
43 |
+Assays with less features won't be reduced in size.} |
|
44 |
+ |
|
45 |
+\item{fixedAssays}{A character vector of assay names specifying any assays which must be at the |
|
46 |
+beginning of the pathway.} |
|
47 |
+ |
|
48 |
+\item{confidenceCutoff}{The minimum confidence of predictions for a sample to be predicted by a particular issue |
|
49 |
+. If a sample was predicted to belong to a particular class a proportion \eqn{p} times, then the confidence is \eqn{2 \times |p - 0.5|}.} |
|
50 |
+ |
|
51 |
+\item{minAssaySamples}{An integer specifying the minimum number of samples a tier may have. If a subsequent tier |
|
52 |
+would have less than this number of samples, the samples are incorporated into the current tier.} |
|
53 |
+ |
|
54 |
+\item{nFeatures}{Default: 20. The number of features to consider during feature selection, if feature selection is done.} |
|
55 |
+ |
|
56 |
+\item{selectionMethod}{A named character vector of feature selection methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.} |
|
57 |
+ |
|
58 |
+\item{classifier}{A named character vector of modelling methods to use for the assays, one for each. The names must correspond to names of \code{measurements}.} |
|
59 |
+ |
|
60 |
+\item{nFolds}{A numeric specifying the number of folds to use for cross-validation.} |
|
61 |
+ |
|
62 |
+\item{nRepeats}{A numeric specifying the the number of repeats or permutations to use for cross-validation.} |
|
63 |
+ |
|
64 |
+\item{nCores}{A numeric specifying the number of cores used if the user wants to use parallelisation.} |
|
65 |
+ |
|
66 |
+\item{pathways}{A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set.} |
|
67 |
+} |
|
68 |
+\value{ |
|
69 |
+An object of class \code{PrecisionPathways} which is basically a named list that other plotting and |
|
70 |
+tabulating functions can use. |
|
71 |
+} |
|
72 |
+\description{ |
|
73 |
+Precision pathways allows the evaluation of various permutations of multiomics or multiview data. |
|
74 |
+Samples are predicted by a particular assay if they were consistently predicted as a particular class |
|
75 |
+during cross-validation. Otherwise, they are passed onto subsequent assays/tiers for prediction. Balanced accuracy |
|
76 |
+is used to evaluate overall prediction performance and sample-specific accuracy for individual-level evaluation. |
|
77 |
+} |
|
78 |
+\examples{ |
|
79 |
+# To be determined. |
|
80 |
+} |