Precision Pathway Finished
... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.3.14 |
|
7 |
-Date: 2023-02-16 |
|
6 |
+Version: 3.3.15 |
|
7 |
+Date: 2023-02-21 |
|
8 | 8 |
Authors@R: |
9 | 9 |
c( |
10 | 10 |
person(given = "Dario", family = "Strbenac", email = "dario.strbenac@sydney.edu.au", role = c("aut", "cre")), |
... | ... |
@@ -24,7 +24,7 @@ Imports: grid, genefilter, utils, dplyr, tidyr, rlang, ranger, ggplot2 (>= 3.0.0 |
24 | 24 |
Suggests: limma, edgeR, car, Rmixmod, gridExtra (>= 2.0.0), cowplot, |
25 | 25 |
BiocStyle, pamr, PoiClaClu, parathyroidSE, knitr, htmltools, gtable, |
26 | 26 |
scales, e1071, rmarkdown, IRanges, robustbase, glmnet, class, randomForestSRC, |
27 |
- MatrixModels, xgboost |
|
27 |
+ MatrixModels, xgboost, data.tree, ggnewscale |
|
28 | 28 |
Description: The software formalises a framework for classification and survival model evaluation |
29 | 29 |
in R. There are four stages; Data transformation, feature selection, model training, |
30 | 30 |
and prediction. The requirements of variable types and variable order are |
... | ... |
@@ -1,6 +1,10 @@ |
1 | 1 |
# Generated by roxygen2: do not edit by hand |
2 | 2 |
|
3 |
+S3method(bubblePlot,PrecisionPathways) |
|
4 |
+S3method(flowchart,PrecisionPathways) |
|
3 | 5 |
S3method(predict,trainedByClassifyR) |
6 |
+S3method(strataPlot,PrecisionPathways) |
|
7 |
+S3method(summary,PrecisionPathways) |
|
4 | 8 |
S3method(train,DataFrame) |
5 | 9 |
S3method(train,MultiAssayExperiment) |
6 | 10 |
S3method(train,data.frame) |
... | ... |
@@ -19,6 +23,7 @@ export(actualOutcome) |
19 | 23 |
export(allFeatureNames) |
20 | 24 |
export(available) |
21 | 25 |
export(calcCVperformance) |
26 |
+export(calcCostsAndPerformance) |
|
22 | 27 |
export(calcExternalPerformance) |
23 | 28 |
export(chosenFeatureNames) |
24 | 29 |
export(colCoxTests) |
... | ... |
@@ -26,8 +26,9 @@ |
26 | 26 |
#' @param nFolds A numeric specifying the number of folds to use for cross-validation. |
27 | 27 |
#' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation. |
28 | 28 |
#' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation. |
29 |
-#' @param pathways A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set. |
|
29 |
+#' @param pathways A set of pathways created by \code{precisionPathwaysTrain} which is an object of class \code{PrecisionPathways} to be used for predicting on a new data set. |
|
30 | 30 |
#' @rdname precisionPathways |
31 |
+#' @aliases precisionPathwaysTrain precisionPathwaysPredict |
|
31 | 32 |
#' @return An object of class \code{PrecisionPathways} which is basically a named list that other plotting and |
32 | 33 |
#' tabulating functions can use. |
33 | 34 |
#' @examples |
... | ... |
@@ -57,7 +58,7 @@ setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", |
57 | 58 |
measurementsAndClass <- do.call(prepareData, prepArgs) |
58 | 59 |
|
59 | 60 |
.precisionPathwaysTrain(measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]], |
60 |
- fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff, |
|
61 |
+ clinicalPredictors = clinicalPredictors, fixedAssays = fixedAssays, confidenceCutoff = confidenceCutoff, |
|
61 | 62 |
minAssaySamples = minAssaySamples, nFeatures = nFeatures, |
62 | 63 |
selectionMethod = selectionMethod, classifier = classifier, |
63 | 64 |
nFolds = nFolds, nRepeats = nRepeats, nCores = nCores) |
... | ... |
@@ -66,7 +67,7 @@ setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", |
66 | 67 |
# Internal method which carries out all of the processing, obtaining reformatted data from the |
67 | 68 |
# MultiAssayExperiment and list (of basic rectangular tables) S4 methods. |
68 | 69 |
.precisionPathwaysTrain <- function(measurements, class, fixedAssays = "clinical", |
69 |
- confidenceCutoff = 0.8, minAssaySamples = 10, |
|
70 |
+ clinicalPredictors = clinicalPredictors, confidenceCutoff = 0.8, minAssaySamples = 10, |
|
70 | 71 |
nFeatures = 20, selectionMethod = setNames(c(NULL, rep("t-test", length(measurements))), c("clinical", names(measurements))), |
71 | 72 |
classifier = setNames(c("elasticNetGLM", rep("randomForest", length(measurements))), c("clinical", names(measurements))), |
72 | 73 |
nFolds = 5, nRepeats = 20, nCores = 1) |
... | ... |
@@ -101,6 +102,7 @@ setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", |
101 | 102 |
predictionsSamplesCounts <- table(allPredictions[, "sample"], allPredictions[, "class"]) |
102 | 103 |
confidences <- 2 * abs(predictionsSamplesCounts[, 1] / rowSums(predictionsSamplesCounts) - 0.5) |
103 | 104 |
sampleIDsUse <- names(confidences)[confidences > confidenceCutoff] |
105 |
+ sampleIDsUse <- setdiff(sampleIDsUse, samplesUsed) |
|
104 | 106 |
|
105 | 107 |
# Check if too few samples left for next round. Include them in this round, if so. |
106 | 108 |
remainingIDs <- setdiff(allSampleIDs, c(samplesUsed, sampleIDsUse)) |
... | ... |
@@ -134,14 +136,17 @@ setMethod("precisionPathwaysTrain", "MultiAssayExperimentOrList", |
134 | 136 |
if(breakEarly == TRUE) break |
135 | 137 |
} |
136 | 138 |
pathwayString <- paste(assaysProcessed, collapse = '-') |
137 |
- parameters = list(confidenceCutoff = confidenceCutoff, minAssaySamples = minAssaySamples) |
|
138 |
- list(models = modelsList, parameters = parameters, pathway = pathwayString, |
|
139 |
+ individualsTableAll[, "Tier"] <- factor(individualsTableAll[, "Tier"], levels = permutation) |
|
140 |
+ list(pathway = pathwayString, |
|
139 | 141 |
individuals = individualsTableAll, tiers = tierTableAll) |
140 | 142 |
}) |
141 |
- |
|
142 |
- class(precisionPathways) <- "PrecisionPathways" |
|
143 | 143 |
names(precisionPathways) <- sapply(precisionPathways, "[[", "pathway") |
144 |
- precisionPathways |
|
144 |
+ result <- list(models = modelsList, assaysPermutations = assaysPermutations, |
|
145 |
+ parameters = list(confidenceCutoff = confidenceCutoff, minAssaySamples = minAssaySamples), |
|
146 |
+ clinicalPredictors = clinicalPredictors, pathways = precisionPathways) |
|
147 |
+ class(result) <- "PrecisionPathways" |
|
148 |
+ |
|
149 |
+ result |
|
145 | 150 |
} |
146 | 151 |
|
147 | 152 |
# A nice print method to avoid flooding the screen with lots of tables |
... | ... |
@@ -150,7 +155,7 @@ print.PrecisionPathways <- function(x) |
150 | 155 |
{ |
151 | 156 |
cat("An object of class 'PrecisionPathways'.\n") |
152 | 157 |
cat("Pathways:\n") |
153 |
- cat(paste(names(x), collapse = '\n')) |
|
158 |
+ cat(paste(names(x[["pathways"]]), collapse = '\n')) |
|
154 | 159 |
} |
155 | 160 |
|
156 | 161 |
#' @usage NULL |
... | ... |
@@ -159,7 +164,7 @@ setGeneric("precisionPathwaysPredict", function(pathways, measurements, class, . |
159 | 164 |
|
160 | 165 |
#' @rdname precisionPathways |
161 | 166 |
#' @export |
162 |
-setMethod("precisionPathwaysPredict", "MultiAssayExperimentOrList", |
|
167 |
+setMethod("precisionPathwaysPredict", c("PrecisionPathways", "MultiAssayExperimentOrList"), |
|
163 | 168 |
function(pathways, measurements, class) |
164 | 169 |
{ |
165 | 170 |
if(is.list(measurements)) # Ensure plain list has clinical data. |
... | ... |
@@ -168,7 +173,8 @@ setMethod("precisionPathwaysPredict", "MultiAssayExperimentOrList", |
168 | 173 |
if (!any(names(measurements) == "clinical")) |
169 | 174 |
stop("One of the tables must be named \"clinical\".") |
170 | 175 |
} |
171 |
- prepArgs <- list(measurements, outcomeColumns = class) |
|
176 |
+ |
|
177 |
+ prepArgs <- list(measurements, outcomeColumns = class, clinicalPredictors = pathways[["clinicalPredictors"]]) |
|
172 | 178 |
measurementsAndClass <- do.call(prepareData, prepArgs) |
173 | 179 |
|
174 | 180 |
.precisionPathwaysPredict(pathways, measurementsAndClass[["measurements"]], measurementsAndClass[["outcome"]]) |
... | ... |
@@ -176,5 +182,270 @@ setMethod("precisionPathwaysPredict", "MultiAssayExperimentOrList", |
176 | 182 |
|
177 | 183 |
.precisionPathwaysPredict <- function(pathways, measurements, class) |
178 | 184 |
{ |
179 |
- # To do. |
|
185 |
+ |
|
186 |
+ # Step 1: Extract all of previously fitted models and permutations. |
|
187 |
+ modelsList <- pathways[["models"]] |
|
188 |
+ assayIDs <- lapply(PPT[["models"]], function(model) model@characteristics[model@characteristics[, 1] == "Assay Name", 2]) |
|
189 |
+ assaysPermutations <- pathways[["assaysPermutations"]] |
|
190 |
+ confidenceCutoff <- pathways[["parameters"]][["confidenceCutoff"]] |
|
191 |
+ minAssaySamples <- pathways[["parameters"]][["minAssaySamples"]] |
|
192 |
+ |
|
193 |
+ # Step 2: Loop over each pathway and each assay in order to determine which samples are used at that level |
|
194 |
+ # and which are passed onwards. |
|
195 |
+ precisionPathways <- lapply(as.data.frame(assaysPermutations), function(permutation) |
|
196 |
+ { |
|
197 |
+ assaysProcessed <- character() |
|
198 |
+ samplesUsed <- character() |
|
199 |
+ individualsTableAll <- S4Vectors::DataFrame() |
|
200 |
+ tierTableAll <- S4Vectors::DataFrame() |
|
201 |
+ breakEarly = FALSE |
|
202 |
+ for(assay in permutation) |
|
203 |
+ { |
|
204 |
+ # Step 2a: Identify all samples which are consistently predicted. |
|
205 |
+ modelIndex <- match(assay, assayIDs) |
|
206 |
+ allPredictions <- predictions(modelsList[[modelIndex]]) |
|
207 |
+ allSampleIDs <- sampleNames(modelsList[[modelIndex]]) |
|
208 |
+ predictionsSamplesCounts <- table(allPredictions[, "sample"], allPredictions[, "class"]) |
|
209 |
+ confidences <- 2 * abs(predictionsSamplesCounts[, 1] / rowSums(predictionsSamplesCounts) - 0.5) |
|
210 |
+ sampleIDsUse <- names(confidences)[confidences > confidenceCutoff] |
|
211 |
+ sampleIDsUse <- setdiff(sampleIDsUse, samplesUsed) |
|
212 |
+ |
|
213 |
+ # Check if too few samples left for next round. Include them in this round, if so. |
|
214 |
+ remainingIDs <- setdiff(allSampleIDs, c(samplesUsed, sampleIDsUse)) |
|
215 |
+ if(length(remainingIDs) < minAssaySamples) |
|
216 |
+ { |
|
217 |
+ sampleIDsUse <- c(sampleIDsUse, remainingIDs) |
|
218 |
+ breakEarly = TRUE |
|
219 |
+ } else { } |
|
220 |
+ |
|
221 |
+ predictionsSamplesCounts <- predictionsSamplesCounts[sampleIDsUse, ] |
|
222 |
+ |
|
223 |
+ # Step 2b: Individuals predictions and sample-wise accuracy, tier-wise error. |
|
224 |
+ maxVotes <- apply(predictionsSamplesCounts, 1, function(sample) which.max(sample)) |
|
225 |
+ predictedClasses <- factor(colnames(predictionsSamplesCounts)[maxVotes], |
|
226 |
+ levels = colnames(predictionsSamplesCounts)) |
|
227 |
+ individualsTable <- S4Vectors::DataFrame(Tier = assay, |
|
228 |
+ `Sample ID` = sampleIDsUse, |
|
229 |
+ `Predicted` = predictedClasses, |
|
230 |
+ `Accuracy` = performance(modelsList[[modelIndex]])[["Sample Accuracy"]][sampleIDsUse], |
|
231 |
+ check.names = FALSE) |
|
232 |
+ knownClasses <- actualOutcome(modelsList[[modelIndex]])[match(sampleIDsUse, allSampleIDs)] |
|
233 |
+ balancedAccuracy <- calcExternalPerformance(knownClasses, predictedClasses) |
|
234 |
+ tierTable <- S4Vectors::DataFrame(Tier = assay, |
|
235 |
+ `Balanced Accuracy` = balancedAccuracy, check.names = FALSE) |
|
236 |
+ |
|
237 |
+ assaysProcessed <- c(assaysProcessed, assay) |
|
238 |
+ individualsTableAll <- rbind(individualsTableAll, individualsTable) |
|
239 |
+ tierTableAll <- rbind(tierTableAll, tierTable) |
|
240 |
+ samplesUsed <- c(samplesUsed, sampleIDsUse) |
|
241 |
+ |
|
242 |
+ if(breakEarly == TRUE) break |
|
243 |
+ } |
|
244 |
+ pathwayString <- paste(assaysProcessed, collapse = '-') |
|
245 |
+ individualsTableAll[, "Tier"] <- factor(individualsTableAll[, "Tier"], levels = permutation) |
|
246 |
+ list(pathway = pathwayString, |
|
247 |
+ individuals = individualsTableAll, tiers = tierTableAll) |
|
248 |
+ }) |
|
249 |
+ names(precisionPathways) <- sapply(precisionPathways, "[[", "pathway") |
|
250 |
+ result <- list(models = modelsList, assaysPermutations = assaysPermutations, |
|
251 |
+ parameters = list(confidenceCutoff = confidenceCutoff, minAssaySamples = minAssaySamples), |
|
252 |
+ clinicalPredictors = clinicalPredictors, pathways = precisionPathways) |
|
253 |
+ class(result) <- "PrecisionPathways" |
|
254 |
+ |
|
255 |
+ result |
|
256 |
+} |
|
257 |
+ |
|
258 |
+# Calculate accuracy and costs of each pathway. |
|
259 |
+ |
|
260 |
+#' Various Functions for Evaluating Precision Pathways |
|
261 |
+#' |
|
262 |
+#' These functions tabulate or plot various aspects of precision pathways, such as accuracies and costs. |
|
263 |
+#' |
|
264 |
+#' @param precisionPathways A pathway of class \code{PrecisionPathways}. |
|
265 |
+#' @param costs A named vector of assays with the cost of each one. |
|
266 |
+#' @rdname precisionPathwaysEvaluations |
|
267 |
+#' @export |
|
268 |
+calcCostsAndPerformance <- function(precisionPathways, costs = NULL) |
|
269 |
+{ |
|
270 |
+ if(is.null(costs)) |
|
271 |
+ stop("'costs' of each assay must be specified.") |
|
272 |
+ pathwayIDs <- names(precisionPathways[["pathways"]]) |
|
273 |
+ accuraciesCosts <- do.call(rbind, lapply(precisionPathways[["pathways"]], function(pathway) |
|
274 |
+ { |
|
275 |
+ predictions <- pathway[["individuals"]][, "Predicted"] |
|
276 |
+ knownClasses <- actualOutcome(precisionPathways$models[[1]]) |
|
277 |
+ allNames <- sampleNames(precisionPathways$models[[1]]) |
|
278 |
+ knownClasses <- knownClasses[match(pathway[["individuals"]][, "Sample ID"], allNames)] |
|
279 |
+ balancedAccuracy <- calcExternalPerformance(knownClasses, predictions) |
|
280 |
+ |
|
281 |
+ costTotal <- sum(costs[match(pathway[["individuals"]][, "Tier"], names(costs))]) |
|
282 |
+ |
|
283 |
+ data.frame(accuracy = round(balancedAccuracy, 2), cost = costTotal) |
|
284 |
+ })) |
|
285 |
+ |
|
286 |
+ precisionPathways$performance <- accuraciesCosts |
|
287 |
+ precisionPathways |
|
288 |
+} |
|
289 |
+ |
|
290 |
+# Print a summary table, including accuracy and costs. |
|
291 |
+ |
|
292 |
+#' @param object A set of pathways of class \code{PrecisionPathways}. |
|
293 |
+#' @param weights A numeric vector of length two specifying how to weight the predictive accuracy |
|
294 |
+#' and the cost during ranking. Must sum to 1. |
|
295 |
+#' @rdname precisionPathwaysEvaluations |
|
296 |
+#' @export |
|
297 |
+summary.PrecisionPathways <- function(object, weights = c(accuracy = 0.5, cost = 0.5)) |
|
298 |
+{ |
|
299 |
+ summaryTable <- data.frame(Pathway = rownames(object[["performance"]]), |
|
300 |
+ `Balanced Accuracy` = object[["performance"]][, "accuracy"], |
|
301 |
+ `Total Cost` = object[["performance"]][, "cost"], |
|
302 |
+ check.names = FALSE) |
|
303 |
+ rankingScores <- list(rank(object[["performance"]][, "accuracy"]), rank(-object[["performance"]][, "cost"])) |
|
304 |
+ finalScores <- rowSums(mapply(function(scores, weight) |
|
305 |
+ { |
|
306 |
+ scores * weight |
|
307 |
+ }, rankingScores, as.list(weights))) |
|
308 |
+ summaryTable <- cbind(summaryTable, Score = finalScores) |
|
309 |
+ summaryTable |
|
310 |
+} |
|
311 |
+ |
|
312 |
+bubblePlot <- function (precisionPathways, ...) { |
|
313 |
+ UseMethod("bubblePlot", precisionPathways) |
|
314 |
+ } |
|
315 |
+ |
|
316 |
+#' @param precisionPathways A pathway of class \code{PrecisionPathways}. |
|
317 |
+#' @param pathwayColours A named vector of colours with names being the names of pathways. If none is specified, |
|
318 |
+#' a default colour scheme will automatically be chosen. |
|
319 |
+#' @rdname precisionPathwaysEvaluations |
|
320 |
+#' @export |
|
321 |
+bubblePlot.PrecisionPathways <- function(precisionPathways, pathwayColours = NULL) |
|
322 |
+{ |
|
323 |
+ ggplot2::theme_set(ggplot2::theme_classic() + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA))) |
|
324 |
+ if(is.null(pathwayColours)) pathwayColours <- scales::hue_pal()(length(precisionPathways[["pathways"]])) |
|
325 |
+ performance <- precisionPathways[["performance"]] |
|
326 |
+ performance <- cbind(Sequence = rownames(performance), performance) |
|
327 |
+ ggplot2::ggplot(performance, aes(x = accuracy, y = cost, colour = Sequence, size = 4)) + ggplot2::geom_point() + |
|
328 |
+ ggplot2::scale_color_manual(values = pathwayColours) + ggplot2::labs(x = "Balanced Accuracy", y = "Total Cost") + ggplot2::guides(size = FALSE) |
|
329 |
+} |
|
330 |
+ |
|
331 |
+flowchart <- function (precisionPathways, ...) { |
|
332 |
+ UseMethod("flowchart", precisionPathways) |
|
333 |
+ } |
|
334 |
+ |
|
335 |
+#' @param precisionPathways A pathway of class \code{PrecisionPathways}. |
|
336 |
+#' @param pathway A chracter vector of length 1 specifying which pathway to plot, e.g. "clinical-mRNA". |
|
337 |
+#' @param nodeColours A named vector of colours with names being \code{"assay"}, \code{"class1"},\code{"class2"}. |
|
338 |
+#' a default colour scheme will automatically be chosen. |
|
339 |
+#' @rdname precisionPathwaysEvaluations |
|
340 |
+#' @export |
|
341 |
+flowchart.PrecisionPathways <- function(precisionPathways, pathway, nodeColours = c(assay = "#86C57C", class1 = "#ACCEE0", class2 = "#F47F72")) |
|
342 |
+{ |
|
343 |
+ if(!requireNamespace("data.tree", quietly = TRUE)) |
|
344 |
+ stop("The package 'data.tree' could not be found. Please install it.") |
|
345 |
+ |
|
346 |
+ pathwayUse <- precisionPathways[["pathways"]][[pathway]] |
|
347 |
+ assayIDs <- pathwayUse[["tiers"]][, 1] |
|
348 |
+ possibleClasses <- levels(precisionPathways$models[[1]]@actualOutcome) |
|
349 |
+ samplesTiers <- pathwayUse$individuals |
|
350 |
+ |
|
351 |
+ pathwayTree <- Node$new(assayIDs[1]) |
|
352 |
+ currentNode <- pathwayTree |
|
353 |
+ for(assay in assayIDs) |
|
354 |
+ { |
|
355 |
+ toDo <- nrow(samplesTiers) - max(which(samplesTiers[, "Tier"] == assay)) |
|
356 |
+ class1Predictions <- currentNode$AddChild(possibleClasses[1], counter = nrow(subset(samplesTiers, Predicted == possibleClasses[1] & Tier == assay)), nodeType = "Class1") |
|
357 |
+ uncertain <- currentNode$AddChild("Uncertain", counter = toDo, nodeType = "Uncertain") |
|
358 |
+ class2Predictions = currentNode$AddChild(possibleClasses[2], counter = nrow(subset(samplesTiers, Predicted == possibleClasses[2] & Tier == assay)), nodeType = "Class2") |
|
359 |
+ |
|
360 |
+ if(toDo == 0) {break} else { |
|
361 |
+ currentNode <- uncertain |
|
362 |
+ currentNode <- currentNode$AddChild(assayIDs[match(assay, assayIDs) + 1], nodeType = "Platform") |
|
363 |
+ } |
|
364 |
+ } |
|
365 |
+ |
|
366 |
+ SetGraphStyle(pathwayTree, rankdir = "LR") |
|
367 |
+ SetEdgeStyle(pathwayTree, fontname = 'helvetica', label = .getEdgeLabel) |
|
368 |
+ SetNodeStyle(pathwayTree, style = "filled", shape = .getNodeShape, fontcolor = "black", fillcolor = .getFillColour, fontname = 'helvetica') |
|
369 |
+ plot(pathwayTree) |
|
370 |
+} |
|
371 |
+ |
|
372 |
+.getEdgeLabel <- function(node) |
|
373 |
+{ |
|
374 |
+ nSamples <<- nrow(samplesTiers) |
|
375 |
+ if(node$isRoot || node$nodeType == "Platform") |
|
376 |
+ { |
|
377 |
+ label <- NULL |
|
378 |
+ } else { |
|
379 |
+ value <- round((node$counter / nSamples) * 100) |
|
380 |
+ label <- paste(value, "% (", node$counter, ")", sep = '') |
|
381 |
+ } |
|
382 |
+ return(label) |
|
383 |
+} |
|
384 |
+ |
|
385 |
+.getNodeShape <- function(node){ |
|
386 |
+ if(node$isRoot || node$nodeType == "Platform"){ |
|
387 |
+ shape = "oval" |
|
388 |
+ } else { |
|
389 |
+ shape = "box" |
|
390 |
+ } |
|
391 |
+} |
|
392 |
+ |
|
393 |
+.getFillColour <- function(node) { |
|
394 |
+ if(node$isRoot || node$nodeType == "Platform"){ |
|
395 |
+ colour <<- nodeColours[["assay"]] |
|
396 |
+ } else if(node$nodeType == "Class1"){ |
|
397 |
+ colour <<- nodeColours[["class1"]] |
|
398 |
+ } else if(node$nodeType == "Class2"){ |
|
399 |
+ colour <<- nodeColours[["class2"]] |
|
400 |
+ } else { |
|
401 |
+ colour = "snow3" |
|
402 |
+ } |
|
403 |
+ return(colour) |
|
404 |
+} |
|
405 |
+ |
|
406 |
+strataPlot <- function (precisionPathways, ...) { |
|
407 |
+ UseMethod("strataPlot", precisionPathways) |
|
408 |
+ } |
|
409 |
+ |
|
410 |
+#' @param classColours A named vector of colours with names being \code{"class1"},\code{"class2"}, and \code{"accuracy"}. |
|
411 |
+#' a default colour scheme will automatically be chosen. |
|
412 |
+#' @rdname precisionPathwaysEvaluations |
|
413 |
+#' @export |
|
414 |
+strataPlot.PrecisionPathways <- function(precisionPathways, pathway, classColours = c(class1 = "#4DAF4A", class2 = "#984EA3")) |
|
415 |
+{ |
|
416 |
+ pathwayUse <- precisionPathways[["pathways"]][[pathway]] |
|
417 |
+ assayIDs <- pathwayUse[["tiers"]][, 1] |
|
418 |
+ possibleClasses <- levels(precisionPathways$models[[1]]@actualOutcome) |
|
419 |
+ samplesTiers <- pathwayUse$individuals |
|
420 |
+ samplesTiers$trueClass <- actualOutcome(precisionPathways$models[[1]])[match(samplesTiers[, "Sample ID"], sampleNames(precisionPathways$models[[1]]))] |
|
421 |
+ samplesTiers <- dplyr::arrange(as.data.frame(samplesTiers), Tier, trueClass, Accuracy) |
|
422 |
+ samplesTiers$ID = 1:nrow(samplesTiers) |
|
423 |
+ samplesTiers$colour = ifelse(samplesTiers$trueClass == levels(samplesTiers[, "Predicted"])[1], classColours["class1"], classColours["class2"]) |
|
424 |
+ |
|
425 |
+ strataPlot <- ggplot2::ggplot(mapping = ggplot2::aes(x = ID, y = Tier), data = samplesTiers) + |
|
426 |
+ ggplot2::geom_tile(aes(fill = trueClass)) + |
|
427 |
+ ggplot2::scale_fill_manual(values = unname(classColours)) + |
|
428 |
+ ggplot2::labs(title = paste("Pathway:", pathway), fill = "True Class", x = "", y = "") + |
|
429 |
+ ggplot2::guides(fill = guide_legend(title.position = "top")) + |
|
430 |
+ ggnewscale::new_scale_fill() + |
|
431 |
+ geom_tile(aes(fill = Accuracy)) + |
|
432 |
+ ggplot2::scale_fill_gradient(low = "#377EB8", high = "#E41A1C") + |
|
433 |
+ ggplot2::labs(fill = "Accuracy") + |
|
434 |
+ ggplot2::guides(fill = guide_colorbar(title.position = "top")) + |
|
435 |
+ ggplot2::theme(panel.background = ggplot2::element_blank(), |
|
436 |
+ axis.text.x = ggplot2::element_blank(), |
|
437 |
+ aspect.ratio = 1/4, |
|
438 |
+ plot.title = ggplot2::element_text(face = "bold", size = 20), |
|
439 |
+ legend.title = ggplot2::element_text(face = "bold", size = 12), |
|
440 |
+ legend.text = ggplot2::element_text(size = 10), |
|
441 |
+ legend.position = "bottom", |
|
442 |
+ axis.text = ggplot2::element_text(size = 15)) + |
|
443 |
+ annotate("tile", |
|
444 |
+ x = samplesTiers$ID, |
|
445 |
+ y = length(levels(samplesTiers[, "Tier"])) + 0.8, |
|
446 |
+ height = 0.6, |
|
447 |
+ fill = samplesTiers$colour) + |
|
448 |
+ ggplot2::coord_cartesian(expand = FALSE) |
|
449 |
+ |
|
450 |
+ strataPlot |
|
180 | 451 |
} |
181 | 452 |
\ No newline at end of file |
... | ... |
@@ -235,15 +235,15 @@ setMethod("prepareData", "list", |
235 | 235 |
if("clinical" %in% names(measurements)) |
236 | 236 |
measurements[["clinical"]] <- measurements[["clinical"]][, clinicalPredictors] |
237 | 237 |
|
238 |
- allMetadata <- mapply(function(measurementsOne, assayID) { |
|
238 |
+ allMetadata <- do.call(rbind, mapply(function(measurementsOne, assayID) { |
|
239 | 239 |
data.frame(assay = assayID, feature = colnames(measurementsOne)) |
240 |
- }, measurements, names(measurements)) |
|
240 |
+ }, measurements, names(measurements), SIMPLIFY = FALSE)) |
|
241 | 241 |
allMeasurements <- do.call("cbind", measurements) |
242 | 242 |
# Different assays e.g. mRNA, protein could have same feature name e.g. BRAF. |
243 | 243 |
colnames(allMeasurements) <- paste(allMetadata[, "assay"], allMetadata[, "feature"], sep = '_') |
244 |
- allDataFrame <- DataFrame(allMeasurements) |
|
244 |
+ allMeasurements <- DataFrame(allMeasurements) |
|
245 | 245 |
S4Vectors::mcols(allMeasurements) <- allMetadata |
246 | 246 |
|
247 | 247 |
# Do other filtering and preparation in DataFrame function. |
248 |
- prepareData(dataTable, outcome, clinicalPredictors = NULL, ...) |
|
248 |
+ prepareData(allMeasurements, outcome, clinicalPredictors = NULL, ...) |
|
249 | 249 |
}) |
250 | 250 |
\ No newline at end of file |
... | ... |
@@ -2,8 +2,9 @@ |
2 | 2 |
% Please edit documentation in R/precisionPathways.R |
3 | 3 |
\name{precisionPathwaysTrain} |
4 | 4 |
\alias{precisionPathwaysTrain} |
5 |
+\alias{precisionPathwaysPredict} |
|
5 | 6 |
\alias{precisionPathwaysTrain,MultiAssayExperimentOrList-method} |
6 |
-\alias{precisionPathwaysPredict,MultiAssayExperimentOrList-method} |
|
7 |
+\alias{precisionPathwaysPredict,PrecisionPathways,MultiAssayExperimentOrList-method} |
|
7 | 8 |
\title{Precision Pathways for Sample Prediction Based on Prediction Confidence.} |
8 | 9 |
\usage{ |
9 | 10 |
\S4method{precisionPathwaysTrain}{MultiAssayExperimentOrList}( |
... | ... |
@@ -25,7 +26,7 @@ |
25 | 26 |
nCores = 1 |
26 | 27 |
) |
27 | 28 |
|
28 |
-\S4method{precisionPathwaysPredict}{MultiAssayExperimentOrList}(pathways, measurements, class) |
|
29 |
+\S4method{precisionPathwaysPredict}{PrecisionPathways,MultiAssayExperimentOrList}(pathways, measurements, class) |
|
29 | 30 |
} |
30 | 31 |
\arguments{ |
31 | 32 |
\item{measurements}{Either a \code{\link{MultiAssayExperiment}} or a list of the basic tabular objects containing the data.} |
... | ... |
@@ -63,7 +64,7 @@ would have less than this number of samples, the samples are incorporated into t |
63 | 64 |
|
64 | 65 |
\item{nCores}{A numeric specifying the number of cores used if the user wants to use parallelisation.} |
65 | 66 |
|
66 |
-\item{pathways}{A set of pathways created by \code{precisionPathwaysTrain} to be used for predicting on a new data set.} |
|
67 |
+\item{pathways}{A set of pathways created by \code{precisionPathwaysTrain} which is an object of class \code{PrecisionPathways} to be used for predicting on a new data set.} |
|
67 | 68 |
} |
68 | 69 |
\value{ |
69 | 70 |
An object of class \code{PrecisionPathways} which is basically a named list that other plotting and |
70 | 71 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,52 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/precisionPathways.R |
|
3 |
+\name{calcCostsAndPerformance} |
|
4 |
+\alias{calcCostsAndPerformance} |
|
5 |
+\alias{summary.PrecisionPathways} |
|
6 |
+\alias{bubblePlot.PrecisionPathways} |
|
7 |
+\alias{flowchart.PrecisionPathways} |
|
8 |
+\alias{strataPlot.PrecisionPathways} |
|
9 |
+\title{Various Functions for Evaluating Precision Pathways} |
|
10 |
+\usage{ |
|
11 |
+calcCostsAndPerformance(precisionPathways, costs = NULL) |
|
12 |
+ |
|
13 |
+\method{summary}{PrecisionPathways}(object, weights = c(accuracy = 0.5, cost = 0.5)) |
|
14 |
+ |
|
15 |
+\method{bubblePlot}{PrecisionPathways}(precisionPathways, pathwayColours = NULL) |
|
16 |
+ |
|
17 |
+\method{flowchart}{PrecisionPathways}( |
|
18 |
+ precisionPathways, |
|
19 |
+ pathway, |
|
20 |
+ nodeColours = c(assay = "#86C57C", class1 = "#ACCEE0", class2 = "#F47F72") |
|
21 |
+) |
|
22 |
+ |
|
23 |
+\method{strataPlot}{PrecisionPathways}( |
|
24 |
+ precisionPathways, |
|
25 |
+ pathway, |
|
26 |
+ classColours = c(class1 = "#4DAF4A", class2 = "#984EA3") |
|
27 |
+) |
|
28 |
+} |
|
29 |
+\arguments{ |
|
30 |
+\item{precisionPathways}{A pathway of class \code{PrecisionPathways}.} |
|
31 |
+ |
|
32 |
+\item{costs}{A named vector of assays with the cost of each one.} |
|
33 |
+ |
|
34 |
+\item{object}{A set of pathways of class \code{PrecisionPathways}.} |
|
35 |
+ |
|
36 |
+\item{weights}{A numeric vector of length two specifying how to weight the predictive accuracy |
|
37 |
+and the cost during ranking. Must sum to 1.} |
|
38 |
+ |
|
39 |
+\item{pathwayColours}{A named vector of colours with names being the names of pathways. If none is specified, |
|
40 |
+a default colour scheme will automatically be chosen.} |
|
41 |
+ |
|
42 |
+\item{pathway}{A chracter vector of length 1 specifying which pathway to plot, e.g. "clinical-mRNA".} |
|
43 |
+ |
|
44 |
+\item{nodeColours}{A named vector of colours with names being \code{"assay"}, \code{"class1"},\code{"class2"}. |
|
45 |
+a default colour scheme will automatically be chosen.} |
|
46 |
+ |
|
47 |
+\item{classColours}{A named vector of colours with names being \code{"class1"},\code{"class2"}, and \code{"accuracy"}. |
|
48 |
+a default colour scheme will automatically be chosen.} |
|
49 |
+} |
|
50 |
+\description{ |
|
51 |
+These functions tabulate or plot various aspects of precision pathways, such as accuracies and costs. |
|
52 |
+} |