... | ... |
@@ -3,16 +3,16 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.7 |
|
7 |
-Date: 2022-06-30 |
|
6 |
+Version: 3.1.8 |
|
7 |
+Date: 2022-07-12 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
11 | 11 |
Encoding: UTF-8 |
12 | 12 |
biocViews: Classification, Survival |
13 | 13 |
Depends: R (>= 4.1.0), methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival |
14 |
-Imports: grid, utils, dplyr, tidyr, rlang, randomForest |
|
15 |
-Suggests: limma, genefilter, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot, |
|
14 |
+Imports: grid, genefilter, utils, dplyr, tidyr, rlang, randomForest |
|
15 |
+Suggests: limma, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot, |
|
16 | 16 |
BiocStyle, pamr, PoiClaClu, parathyroidSE, knitr, htmltools, gtable, |
17 | 17 |
scales, e1071, rmarkdown, IRanges, robustbase, glmnet, class |
18 | 18 |
Description: The software formalises a framework for classification in R. |
... | ... |
@@ -22,7 +22,6 @@ export(SelectParams) |
22 | 22 |
export(TrainParams) |
23 | 23 |
export(TransformParams) |
24 | 24 |
export(actualOutcomes) |
25 |
-export(allFeatureNames) |
|
26 | 25 |
export(bartlettRanking) |
27 | 26 |
export(calcCVperformance) |
28 | 27 |
export(calcExternalPerformance) |
... | ... |
@@ -42,6 +41,7 @@ export(elasticNetFeatures) |
42 | 41 |
export(elasticNetGLMpredictInterface) |
43 | 42 |
export(elasticNetGLMtrainInterface) |
44 | 43 |
export(featureSetSummary) |
44 |
+export(featuresInfo) |
|
45 | 45 |
export(fisherDiscriminant) |
46 | 46 |
export(forestFeatures) |
47 | 47 |
export(generateCrossValParams) |
... | ... |
@@ -105,7 +105,6 @@ exportMethods(SelectParams) |
105 | 105 |
exportMethods(TrainParams) |
106 | 106 |
exportMethods(TransformParams) |
107 | 107 |
exportMethods(actualOutcomes) |
108 |
-exportMethods(allFeatureNames) |
|
109 | 108 |
exportMethods(bartlettRanking) |
110 | 109 |
exportMethods(calcCVperformance) |
111 | 110 |
exportMethods(calcExternalPerformance) |
... | ... |
@@ -124,6 +123,7 @@ exportMethods(elasticNetFeatures) |
124 | 123 |
exportMethods(elasticNetGLMpredictInterface) |
125 | 124 |
exportMethods(elasticNetGLMtrainInterface) |
126 | 125 |
exportMethods(featureSetSummary) |
126 |
+exportMethods(featuresInfo) |
|
127 | 127 |
exportMethods(fisherDiscriminant) |
128 | 128 |
exportMethods(forestFeatures) |
129 | 129 |
exportMethods(getLocationsAndScales) |
... | ... |
@@ -50,30 +50,32 @@ |
50 | 50 |
#' @author Dario Strbenac |
51 | 51 |
#' @examples |
52 | 52 |
#' |
53 |
-#' predicted <- do.call(rbind, list(data.frame(data.frame(sample = LETTERS[c(1, 8, 15, 3, 11, 20, 19, 18)], |
|
54 |
-#' Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25), |
|
55 |
-#' Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75), |
|
53 |
+#' predicted <- do.call(rbind, list(DataFrame(data.frame(sample = LETTERS[seq(1, 20, 2)], |
|
54 |
+#' Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25, 0.10, 0.30), |
|
55 |
+#' Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75, 0.90, 0.70), |
|
56 | 56 |
#' fold = 1)), |
57 |
-#' data.frame(sample = LETTERS[c(11, 18, 15, 4, 6, 10, 11, 12)], |
|
58 |
-#' Healthy = c(0.45, 0.56, 0.33, 0.56, 0.33, 0.20, 0.60, 0.40), |
|
59 |
-#' Cancer = c(0.55, 0.44, 0.67, 0.44, 0.67, 0.80, 0.40, 0.60), |
|
57 |
+#' DataFrame(sample = LETTERS[seq(2, 20, 2)], |
|
58 |
+#' Healthy = c(0.45, 0.56, 0.33, 0.56, 0.65, 0.33, 0.20, 0.60, 0.40, 0.80), |
|
59 |
+#' Cancer = c(0.55, 0.44, 0.67, 0.44, 0.35, 0.67, 0.80, 0.40, 0.60, 0.20), |
|
60 | 60 |
#' fold = 2))) |
61 | 61 |
#' actual <- factor(c(rep("Healthy", 10), rep("Cancer", 10)), levels = c("Healthy", "Cancer")) |
62 |
-#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
|
63 |
-#' "Cross-validation"), |
|
64 |
-#' value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")), |
|
65 |
-#' LETTERS[1:20], LETTERS[10:1], |
|
66 |
-#' list(1:100, c(1:9, 11:101)), list(sample(10, 10), sample(10, 10)), |
|
62 |
+#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
|
63 |
+#' value = c("Melanoma", "t-test", "Random Forest", "2-fold")), |
|
64 |
+#' LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
65 |
+#' `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
66 |
+#' list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
67 |
+#' list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
|
67 | 68 |
#' list(function(oracle){}), NULL, predicted, actual) |
68 | 69 |
#' |
69 | 70 |
#' predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60) |
70 | 71 |
#' predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40) |
71 |
-#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
|
72 |
-#' "Cross-validation"), |
|
73 |
-#' value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")), |
|
74 |
-#' LETTERS[1:20], LETTERS[10:1], list(1:100, c(1:5, 11:105)), |
|
75 |
-#' list(sample(10, 10), sample(10, 10)), list(function(oracle){}), |
|
76 |
-#' NULL, predicted, actual) |
|
72 |
+#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
|
73 |
+#' value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")), |
|
74 |
+#' LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
75 |
+#' `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
76 |
+#' list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
77 |
+#' list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
|
78 |
+#' list(function(oracle){}), NULL, predicted, actual) |
|
77 | 79 |
#' ROCplot(list(result1, result2), plotTitle = "Cancer ROC") |
78 | 80 |
#' |
79 | 81 |
#' @usage NULL |
... | ... |
@@ -70,13 +70,14 @@ |
70 | 70 |
#' @author Dario Strbenac |
71 | 71 |
#' @examples |
72 | 72 |
#' |
73 |
-#' predictTable <- data.frame(sample = paste("A", 1:10, sep = ''), |
|
74 |
-#' class = factor(sample(LETTERS[1:2], 50, replace = TRUE))) |
|
73 |
+#' predictTable <- DataFrame(sample = paste("A", 1:10, sep = ''), |
|
74 |
+#' class = factor(sample(LETTERS[1:2], 50, replace = TRUE))) |
|
75 | 75 |
#' actual <- factor(sample(LETTERS[1:2], 10, replace = TRUE)) |
76 |
-#' result <- ClassifyResult(DataFrame(), |
|
77 |
-#' paste("A", 1:10, sep = ''), paste("Gene", 1:50, sep = ''), |
|
78 |
-#' list(1:50, 1:50), list(1:5, 6:15), list(function(oracle){}), NULL, |
|
79 |
-#' predictTable, actual) |
|
76 |
+#' result <- ClassifyResult(DataFrame(characteristic = "Data Set", value = "Example"), |
|
77 |
+#' paste("A", 1:10, sep = ''), DataFrame(`Original Feature` = paste("Gene", 1:50), |
|
78 |
+#' `Renamed Feature` = paste("Feature", 1:50, sep = '')), |
|
79 |
+#' list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)), |
|
80 |
+#' list(function(oracle){}), NULL, predictTable, actual) |
|
80 | 81 |
#' result <- calcCVperformance(result) |
81 | 82 |
#' performance(result) |
82 | 83 |
#' |
... | ... |
@@ -1217,7 +1217,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1217 | 1217 |
#' @aliases ClassifyResult ClassifyResult-class |
1218 | 1218 |
#' ClassifyResult,DataFrame,character,characterOrDataFrame-method |
1219 | 1219 |
#' show,ClassifyResult-method sampleNames sampleNames,ClassifyResult-method |
1220 |
-#' allFeatureNames allFeatureNames,ClassifyResult-method |
|
1220 |
+#' featuresInfo featuresInfo,ClassifyResult-method |
|
1221 | 1221 |
#' predictions predictions,ClassifyResult-method actualOutcomes |
1222 | 1222 |
#' actualOutcomes,ClassifyResult-method features features,ClassifyResult-method |
1223 | 1223 |
#' models models,ClassifyResult-method performance |
... | ... |
@@ -1237,9 +1237,8 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1237 | 1237 |
#' package, the function names will automatically be generated and therefore it |
1238 | 1238 |
#' is not necessary to specify them.} |
1239 | 1239 |
#' \item{\code{originalNames}}{All sample names.} |
1240 |
-#' \item{\code{originalFeatures}}{All feature names. Character vector |
|
1241 |
-#' or \code{\link{DataFrame}} with one row for each feature if the data set has multiple kinds |
|
1242 |
-#' of measurements on the same set of samples.} |
|
1240 |
+#' \item{\code{featuresInfo}}{A \code{\link{DataFrame}} containing all feature names in original format |
|
1241 |
+#' and a safe format without any unusual symbols that R would automatically convert into another format and cause trouble.} |
|
1243 | 1242 |
#' \item{\code{rankedFeatures}}{All features, from most to least important. Character vector |
1244 | 1243 |
#' or a data frame if data set has multiple kinds of measurements on the same set of samples.} |
1245 | 1244 |
#' \item{\code{chosenFeatures}}{Features selected at each fold. Character |
... | ... |
@@ -1267,7 +1266,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1267 | 1266 |
#' \describe{ |
1268 | 1267 |
#' \item{\code{sampleNames(result)}}{Returns a vector of sample names present in the data set.}} |
1269 | 1268 |
#' \describe{ |
1270 |
-#' \item{\code{allFeatureNames(result)}}{Returns a vector of features present in the data set.}} |
|
1269 |
+#' \item{\code{featuresInfo(result)}}{Returns a table of features present in the data set. Shows original names and renamed names to ensure no unusual symbols in names.}} |
|
1271 | 1270 |
#' \describe{ |
1272 | 1271 |
#' \item{\code{actualOutcomes(result)}}{Returns the known outcomes of each sample.}} |
1273 | 1272 |
#' \describe{ |
... | ... |
@@ -1308,7 +1307,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1308 | 1307 |
#' @importFrom S4Vectors as.data.frame |
1309 | 1308 |
#' @usage NULL |
1310 | 1309 |
#' @export |
1311 |
-setGeneric("ClassifyResult", function(characteristics, originalNames, originalFeatures, ...) |
|
1310 |
+setGeneric("ClassifyResult", function(characteristics, originalNames, featuresInfo, ...) |
|
1312 | 1311 |
standardGeneric("ClassifyResult")) |
1313 | 1312 |
|
1314 | 1313 |
#' @rdname ClassifyResult-class |
... | ... |
@@ -1316,13 +1315,13 @@ standardGeneric("ClassifyResult")) |
1316 | 1315 |
setClass("ClassifyResult", representation( |
1317 | 1316 |
characteristics = "DataFrame", |
1318 | 1317 |
originalNames = "character", |
1319 |
- originalFeatures = "characterOrDataFrame", |
|
1318 |
+ featuresInfo = "DataFrame", |
|
1320 | 1319 |
rankedFeatures = "listOrNULL", |
1321 | 1320 |
chosenFeatures = "listOrNULL", |
1322 | 1321 |
actualOutcomes = "factorOrSurv", |
1323 | 1322 |
models = "list", |
1324 | 1323 |
tune = "listOrNULL", |
1325 |
- predictions = "data.frame", |
|
1324 |
+ predictions = "DataFrame", |
|
1326 | 1325 |
performance = "listOrNULL", |
1327 | 1326 |
importance = "DataFrameOrNULL", |
1328 | 1327 |
modellingParams = "ModellingParamsOrNULL", |
... | ... |
@@ -1332,11 +1331,11 @@ setClass("ClassifyResult", representation( |
1332 | 1331 |
#' @usage NULL |
1333 | 1332 |
#' @export |
1334 | 1333 |
setMethod("ClassifyResult", c("DataFrame", "character", "characterOrDataFrame"), |
1335 |
- function(characteristics, originalNames, originalFeatures, |
|
1334 |
+ function(characteristics, originalNames, featuresInfo, |
|
1336 | 1335 |
rankedFeatures, chosenFeatures, models, tunedParameters, predictions, actualOutcomes, importance = NULL, modellingParams = NULL, finalModel = NULL) |
1337 | 1336 |
{ |
1338 | 1337 |
new("ClassifyResult", characteristics = characteristics, |
1339 |
- originalNames = originalNames, originalFeatures = originalFeatures, |
|
1338 |
+ originalNames = originalNames, featuresInfo = featuresInfo, |
|
1340 | 1339 |
rankedFeatures = rankedFeatures, chosenFeatures = chosenFeatures, |
1341 | 1340 |
models = models, tune = tunedParameters, |
1342 | 1341 |
predictions = predictions, actualOutcomes = actualOutcomes, importance = importance, modellingParams = modellingParams, finalModel = finalModel) |
... | ... |
@@ -1383,16 +1382,16 @@ setMethod("sampleNames", c("ClassifyResult"), |
1383 | 1382 |
#' @rdname ClassifyResult-class |
1384 | 1383 |
#' @usage NULL |
1385 | 1384 |
#' @export |
1386 |
-setGeneric("allFeatureNames", function(object, ...) |
|
1387 |
-standardGeneric("allFeatureNames")) |
|
1385 |
+setGeneric("featuresInfo", function(object, ...) |
|
1386 |
+standardGeneric("featuresInfo")) |
|
1388 | 1387 |
|
1389 | 1388 |
#' @rdname ClassifyResult-class |
1390 | 1389 |
#' @usage NULL |
1391 | 1390 |
#' @export |
1392 |
-setMethod("allFeatureNames", c("ClassifyResult"), |
|
1391 |
+setMethod("featuresInfo", c("ClassifyResult"), |
|
1393 | 1392 |
function(object) |
1394 | 1393 |
{ |
1395 |
- object@originalFeatures |
|
1394 |
+ object@featuresInfo |
|
1396 | 1395 |
}) |
1397 | 1396 |
|
1398 | 1397 |
#' @rdname ClassifyResult-class |
... | ... |
@@ -52,7 +52,7 @@ |
52 | 52 |
#' |
53 | 53 |
#' # Compare randomForest and SVM classifiers. |
54 | 54 |
#' result <- crossValidate(measurements, classes, classifier = c("randomForest", "SVM")) |
55 |
-#' # performancePlot(result) |
|
55 |
+#' performancePlot(result) |
|
56 | 56 |
#' |
57 | 57 |
#' |
58 | 58 |
#' # Compare performance of different datasets. |
... | ... |
@@ -125,11 +125,13 @@ setMethod("crossValidate", "DataFrame", |
125 | 125 |
Using an ordinary GLM instead.") |
126 | 126 |
classifier <- "GLM" |
127 | 127 |
} |
128 |
+ |
|
128 | 129 |
classifier <- cleanClassifier(classifier = classifier, |
129 | 130 |
measurements = measurements) |
130 | 131 |
|
131 | 132 |
# Which data-types or data-views are present? |
132 | 133 |
datasetIDs <- unique(mcols(measurements)[, "dataset"]) |
134 |
+ if(is.null(datasetIDs)) datasetIDs <- 1 |
|
133 | 135 |
|
134 | 136 |
##!!!!! Do something with data combinations |
135 | 137 |
|
... | ... |
@@ -161,9 +163,10 @@ setMethod("crossValidate", "DataFrame", |
161 | 163 |
sapply(selectionMethod[[dataIndex]], function(selectionIndex) { |
162 | 164 |
# Loop over classifiers |
163 | 165 |
set.seed(seed) |
166 |
+ measurementsUse <- measurements |
|
167 |
+ if(!is.null(mcols(measurements))) measurementsUse <- measurements[, mcols(measurements)[, "dataset"] == dataIndex, drop = FALSE] |
|
164 | 168 |
CV( |
165 |
- measurements = measurements[, mcols(measurements)$dataset == dataIndex, drop = FALSE], |
|
166 |
- classes = classes, |
|
169 |
+ measurements = measurementsUse, classes = classes, |
|
167 | 170 |
nFeatures = nFeatures[dataIndex], |
168 | 171 |
selectionMethod = selectionIndex, |
169 | 172 |
selectionOptimisation = selectionOptimisation, |
... | ... |
@@ -201,7 +204,7 @@ setMethod("crossValidate", "DataFrame", |
201 | 204 |
# This allows someone to answer which combinations of the datasets might be most useful. |
202 | 205 |
|
203 | 206 |
|
204 |
- if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_len(length(datasetIDs)),function(n)combn(datasetIDs, n, simplify = FALSE))) |
|
207 |
+ if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE))) |
|
205 | 208 |
|
206 | 209 |
result <- sapply(dataCombinations, function(dataIndex){ |
207 | 210 |
CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex], |
... | ... |
@@ -310,7 +313,7 @@ setMethod("crossValidate", "MultiAssayExperiment", |
310 | 313 |
characteristicsLabel = NULL) |
311 | 314 |
{ |
312 | 315 |
targets <- c(names(measurements), "sampleInfo") |
313 |
- omicsTargets <- setdiff("sampleInfo", targets) |
|
316 |
+ omicsTargets <- setdiff(targets, "sampleInfo") |
|
314 | 317 |
if(length(omicsTargets) > 0) |
315 | 318 |
{ |
316 | 319 |
if(any(anyReplicated(measurements[, , omicsTargets]))) |
... | ... |
@@ -352,9 +355,6 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements. |
352 | 355 |
characteristicsLabel = NULL) |
353 | 356 |
{ |
354 | 357 |
measurements <- DataFrame(measurements) |
355 |
- message(paste("You have", ncol(measurements), "features and", nrow(measurements), "samples and only one data-type.")) |
|
356 |
- mcols(measurements)$dataset <- "dataset" |
|
357 |
- mcols(measurements)$feature <- colnames(measurements) |
|
358 | 358 |
crossValidate(measurements = measurements, |
359 | 359 |
classes = classes, |
360 | 360 |
nFeatures = nFeatures, |
... | ... |
@@ -386,8 +386,6 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements. |
386 | 386 |
characteristicsLabel = NULL) |
387 | 387 |
{ |
388 | 388 |
measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE) |
389 |
- mcols(measurements)$dataset <- "dataset" |
|
390 |
- mcols(measurements)$feature <- colnames(measurements) |
|
391 | 389 |
crossValidate(measurements = measurements, |
392 | 390 |
classes = classes, |
393 | 391 |
nFeatures = nFeatures, |
... | ... |
@@ -407,7 +405,7 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements. |
407 | 405 |
###!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
408 | 406 |
#' @rdname crossValidate |
409 | 407 |
#' @export |
410 |
-setMethod("crossValidate", "list", # data.frame of numeric measurements. |
|
408 |
+setMethod("crossValidate", "list", |
|
411 | 409 |
function(measurements, |
412 | 410 |
classes, |
413 | 411 |
nFeatures = 20, |
... | ... |
@@ -481,14 +479,16 @@ setMethod("crossValidate", "list", # data.frame of numeric measurements. |
481 | 479 |
###################################### |
482 | 480 |
cleanNFeatures <- function(nFeatures, measurements){ |
483 | 481 |
#### Clean up |
484 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
482 |
+ if(!is.null(mcols(measurements))) |
|
483 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
484 |
+ else obsFeatures <- ncol(measurements) |
|
485 | 485 |
if(is.null(nFeatures) || length(nFeatures) == 1 && nFeatures == "all") nFeatures <- as.list(obsFeatures) |
486 | 486 |
if(is.null(names(nFeatures)) & length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures)) |
487 | 487 |
if(is.null(names(nFeatures)) & length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE) |
488 | 488 |
#if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.") |
489 |
- if(!all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.") |
|
490 |
- if(all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)])) |
|
491 |
- if(all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE) |
|
489 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.") |
|
490 |
+ if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)])) |
|
491 |
+ if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE) |
|
492 | 492 |
nFeatures |
493 | 493 |
} |
494 | 494 |
|
... | ... |
@@ -496,30 +496,32 @@ cleanNFeatures <- function(nFeatures, measurements){ |
496 | 496 |
###################################### |
497 | 497 |
cleanSelectionMethod <- function(selectionMethod, measurements){ |
498 | 498 |
#### Clean up |
499 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
499 |
+ if(!is.null(mcols(measurements))) |
|
500 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
501 |
+ else return(list(selectionMethod)) |
|
500 | 502 |
|
501 |
- if(is.null(names(selectionMethod)) & length(selectionMethod) == 1) selectionMethod <- sapply(names(obsFeatures), function(x)selectionMethod, simplify = FALSE) |
|
502 |
- if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) selectionMethod <- sapply(names(obsFeatures), function(x)selectionMethod, simplify = FALSE) |
|
503 |
+ if(is.null(names(selectionMethod)) & length(selectionMethod) == 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE) |
|
504 |
+ if(is.null(names(selectionMethod)) & length(selectionMethod) > 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE) |
|
503 | 505 |
#if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.") |
504 |
- if(!all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.") |
|
505 |
- if(all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)]) |
|
506 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.") |
|
507 |
+ if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)]) |
|
506 | 508 |
selectionMethod |
507 |
- |
|
508 | 509 |
} |
509 | 510 |
|
510 | 511 |
###################################### |
511 | 512 |
###################################### |
512 | 513 |
cleanClassifier <- function(classifier, measurements){ |
513 | 514 |
#### Clean up |
514 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
515 |
+ if(!is.null(mcols(measurements))) |
|
516 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
517 |
+ else return(list(classifier)) |
|
515 | 518 |
|
516 |
- if(is.null(names(classifier)) & length(classifier) == 1) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
|
517 |
- if(is.null(names(classifier)) & length(classifier) > 1) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
|
519 |
+ if(is.null(names(classifier)) & length(classifier) == 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
|
520 |
+ if(is.null(names(classifier)) & length(classifier) > 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
|
518 | 521 |
#if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the datasets.") |
519 |
- if(!all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.") |
|
520 |
- if(all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)]) |
|
522 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.") |
|
523 |
+ if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)]) |
|
521 | 524 |
classifier |
522 |
- |
|
523 | 525 |
} |
524 | 526 |
|
525 | 527 |
|
... | ... |
@@ -644,7 +646,7 @@ generateModellingParams <- function(datasetIDs, |
644 | 646 |
|
645 | 647 |
classifiers <- c("randomForest", "GLM", "elasticNetGLM", "SVM", "DLDA", |
646 | 648 |
"naiveBayes", "mixturesNormals", "kNN", |
647 |
- "elasticNetPreval", "CoxPH", "CoxNet") |
|
649 |
+ "CoxPH", "CoxNet") |
|
648 | 650 |
# Check classifier |
649 | 651 |
if(!classifier %in% classifiers) |
650 | 652 |
stop(paste("Classifier must exactly match of these (be careful of case):", paste(classifiers, collapse = ", "))) |
... | ... |
@@ -659,7 +661,6 @@ generateModellingParams <- function(datasetIDs, |
659 | 661 |
"naiveBayes" = naiveBayesParams(), |
660 | 662 |
"mixturesNormals" = mixModelsParams(), |
661 | 663 |
"kNN" = kNNparams(), |
662 |
- "elasticNetPreval" = elasticNetPreval(), |
|
663 | 664 |
"CoxPH" = coxphParams(), |
664 | 665 |
"CoxNet" = coxnetParams() |
665 | 666 |
) |
... | ... |
@@ -876,6 +877,7 @@ CV <- function(measurements, |
876 | 877 |
|
877 | 878 |
# Which data-types or data-views are present? |
878 | 879 |
datasetIDs <- unique(mcols(measurements)[, "dataset"]) |
880 |
+ if(is.null(datasetIDs)) datasetIDs <- 1 |
|
879 | 881 |
if(is.null(dataCombinations)) dataCombinations <- datasetIDs |
880 | 882 |
if(is.null(characteristicsLabel)) characteristicsLabel <- "none" |
881 | 883 |
|
... | ... |
@@ -896,8 +898,7 @@ CV <- function(measurements, |
896 | 898 |
multiViewMethod = multiViewMethod |
897 | 899 |
) |
898 | 900 |
|
899 |
- |
|
900 |
- characteristics = S4Vectors::DataFrame(characteristic = c("dataset", "classifier", "selectionMethod", "multiViewMethod", "characteristicsLabel"), value = c(paste(datasetIDs, collapse = ", "), paste(classifier, collapse = ", "), paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel)) |
|
901 |
+ characteristics = S4Vectors::DataFrame(characteristic = c(if(length(datasetIDs) > 1) "Data Set" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(length(datasetIDs) > 1) paste(datasetIDs, collapse = ", ") else NULL, paste(classifier, collapse = ", "), paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel)) |
|
901 | 902 |
|
902 | 903 |
classifyResults <- runTests(measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics) |
903 | 904 |
|
... | ... |
@@ -238,8 +238,8 @@ setMethod("elasticNetGLMpredictInterface", c("multnet", "MultiAssayExperiment"), |
238 | 238 |
#' @param model A fitted multinomial GLM which was created by |
239 | 239 |
#' \code{\link[glmnet]{glmnet}}. |
240 | 240 |
#' @return An \code{list} object. The first element is a vector or data frame |
241 |
-#' of ranked features, the second is a vector or data frame of selected |
|
242 |
-#' features. |
|
241 |
+#' of ranked feature indicies, the second is a vector or data frame of selected |
|
242 |
+#' feature indices of the \code{DataFrame} processed. |
|
243 | 243 |
#' @author Dario Strbenac |
244 | 244 |
#' @examples |
245 | 245 |
#' |
... | ... |
@@ -282,15 +282,7 @@ setMethod("elasticNetFeatures", "multnet", |
282 | 282 |
whichCoefficientColumn <- which(abs(model[["lambda"]] - attr(model, "tune")[["lambda"]]) < 0.00001)[1] |
283 | 283 |
coefficientsUsed <- sapply(model[["beta"]], function(classCoefficients) classCoefficients[, whichCoefficientColumn]) |
284 | 284 |
featureScores <- rowSums(abs(coefficientsUsed)) |
285 |
- rankedFeatures <- inputFeatures[order(featureScores, decreasing = TRUE)] |
|
286 |
- selectedFeatures <- inputFeatures[featureScores != 0] |
|
287 |
- |
|
288 |
- # Colon is a reserved symbol for separating data name and feature name, which is necessary for identifiability of MultiAssayExperiment features. It is not permitted in feature names. |
|
289 |
- if(grepl(':', selectedFeatures[1]) == TRUE) # Convert to data.frame. |
|
290 |
- { |
|
291 |
- selectedFeatures <- do.call(rbind, strsplit(selectedFeatures, ':')) |
|
292 |
- rankedFeatures <- do.call(rbind, strsplit(rankedFeatures, ':')) |
|
293 |
- colnames(selectedFeatures) <- colnames(rankedFeatures) <- c("dataset", "feature") |
|
294 |
- } |
|
295 |
- list(unique(rankedFeatures), selectedFeatures) |
|
285 |
+ rankedFeaturesIndices <- order(featureScores, decreasing = TRUE) |
|
286 |
+ selectedFeaturesIndices <- which(featureScores != 0) |
|
287 |
+ list(rankedFeaturesIndices, selectedFeaturesIndices) |
|
296 | 288 |
}) |
297 | 289 |
\ No newline at end of file |
... | ... |
@@ -70,7 +70,6 @@ |
70 | 70 |
#' colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix)) |
71 | 71 |
#' |
72 | 72 |
#' CVparams <- CrossValParams("k-Fold") |
73 |
-#' |
|
74 | 73 |
#' trainParams <- TrainParams(GLMtrainInterface) |
75 | 74 |
#' predictParams <- PredictParams(GLMpredictInterface) |
76 | 75 |
#' modParams <- ModellingParams(selectParams = NULL, trainParams = trainParams, |
... | ... |
@@ -240,7 +240,7 @@ setMethod("NSCpredictInterface", c("pamrtrained", "MultiAssayExperiment"), funct |
240 | 240 |
#' @param classesTrain A vector of class labels of class \code{\link{factor}} of the |
241 | 241 |
#' same length as the number of samples in \code{measurementsTrain}. |
242 | 242 |
#' @return A list with the first element being empty (no feature ranking is |
243 |
-#' provided) and second element being the selected features. |
|
243 |
+#' provided) and second element being the selected features' indices. |
|
244 | 244 |
#' @author Dario Strbenac |
245 | 245 |
#' @seealso \code{\link[pamr]{pamr.listgenes}} for the function that is |
246 | 246 |
#' interfaced to. |
... | ... |
@@ -278,12 +278,7 @@ setMethod("NSCfeatures", "pamrtrained", |
278 | 278 |
minError <- min(model[["errors"]]) |
279 | 279 |
threshold <- model[["threshold"]][max(which(model[["errors"]] == minError))] |
280 | 280 |
params <- c(list(model), list(list(x = t(as.matrix(measurementsTrain)), y = measurementsTrain, geneid = 1:ncol(measurementsTrain))), threshold) |
281 |
- chosen <- as.numeric(do.call(pamr::pamr.listgenes, params)[, 1]) |
|
281 |
+ chosenIndices <- as.numeric(do.call(pamr::pamr.listgenes, params)[, 1]) |
|
282 | 282 |
|
283 |
- if(is.matrix(measurementsTrain) || is.null(S4Vectors::mcols(measurementsTrain))) |
|
284 |
- chosen <- colnames(measurementsTrain)[chosen] |
|
285 |
- else |
|
286 |
- chosen <- S4Vectors::mcols(measurementsTrain)[chosen, ] |
|
287 |
- |
|
288 |
- list(NULL, chosen) |
|
283 |
+ list(NULL, chosenIndices) |
|
289 | 284 |
}) |
290 | 285 |
\ No newline at end of file |
... | ... |
@@ -193,8 +193,8 @@ setMethod("randomForestPredictInterface", c("randomForest", "MultiAssayExperimen |
193 | 193 |
#' @aliases forestFeatures forestFeatures,randomForest-method |
194 | 194 |
#' @param forest A trained random forest which was created by \code{\link{randomForest}}. |
195 | 195 |
#' @return An \code{list} object. The first element is a vector or data frame |
196 |
-#' of features, ranked from best to worst using the Gini index. The second |
|
197 |
-#' element is a vector or data frame of features used in at least one tree. |
|
196 |
+#' of feature indices, ranked from best to worst using the Gini index. The second |
|
197 |
+#' element is a vector or data frame of feature indices used in at least one tree. |
|
198 | 198 |
#' @author Dario Strbenac |
199 | 199 |
#' @examples |
200 | 200 |
#' |
... | ... |
@@ -226,17 +226,7 @@ setGeneric("forestFeatures", function(forest, ...) |
226 | 226 |
setMethod("forestFeatures", "randomForest", |
227 | 227 |
function(forest) |
228 | 228 |
{ |
229 |
- inputFeatures <- rownames(randomForest::importance(forest)) |
|
230 |
- rankedFeatures <- inputFeatures[order(randomForest::importance(forest), decreasing = TRUE)] |
|
231 |
- selectedFeatures <- inputFeatures[randomForest::varUsed(forest) > 0] |
|
232 |
- selectedFeatures <- selectedFeatures[na.omit(match(rankedFeatures, selectedFeatures))] |
|
233 |
- |
|
234 |
- # Colon is a reserved symbol for separating data name and feature name, which is necessary for identifiability of MultiAssayExperiment features. It is not permitted in feature names. |
|
235 |
- if(grepl(':', selectedFeatures[1]) == TRUE) # Convert to data.frame. |
|
236 |
- { |
|
237 |
- selectedFeatures <- do.call(rbind, strsplit(selectedFeatures, ':')) |
|
238 |
- rankedFeatures <- do.call(rbind, strsplit(rankedFeatures, ':')) |
|
239 |
- colnames(selectedFeatures) <- colnames(rankedFeatures) <- c("dataset", "feature") |
|
240 |
- } |
|
241 |
- list(rankedFeatures, selectedFeatures) |
|
229 |
+ rankedFeaturesIndices <- order(randomForest::importance(forest), decreasing = TRUE) |
|
230 |
+ selectedFeaturesIndices <- which(randomForest::varUsed(forest) > 0) |
|
231 |
+ list(rankedFeaturesIndices, selectedFeaturesIndices) |
|
242 | 232 |
}) |
243 | 233 |
\ No newline at end of file |
... | ... |
@@ -146,7 +146,6 @@ setMethod("SVMpredictInterface", c("svm", "DataFrame"), function(model, measurem |
146 | 146 |
|
147 | 147 |
# Prediction function depends on test data having same set of columns in same order as |
148 | 148 |
# selected features used for training. |
149 |
- colnames(measurementsTest) <- make.names(colnames(measurementsTest)) # svm silently converts feature names. |
|
150 | 149 |
measurementsTest <- measurementsTest[, colnames(model[["SV"]])] |
151 | 150 |
classPredictions <- predict(model, measurementsTest, probability = TRUE) |
152 | 151 |
|
... | ... |
@@ -54,26 +54,28 @@ |
54 | 54 |
#' @author Dario Strbenac |
55 | 55 |
#' @examples |
56 | 56 |
#' |
57 |
-#' predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
58 |
-#' permutation = rep(1:2, each = 40), |
|
59 |
-#' class = factor(rep(c("Healthy", "Cancer"), 40))) |
|
57 |
+#' predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
58 |
+#' permutation = rep(1:2, each = 40), |
|
59 |
+#' class = factor(rep(c("Healthy", "Cancer"), 40))) |
|
60 | 60 |
#' actual <- factor(rep(c("Healthy", "Cancer"), each = 5)) |
61 | 61 |
#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
62 | 62 |
#' "Cross-validation"), |
63 | 63 |
#' value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")), |
64 |
-#' LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:9, 11:101)), |
|
65 |
-#' list(c(1:3), c(2, 5, 6), 1:4, 5:8), |
|
64 |
+#' LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
65 |
+#' `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)), |
|
66 |
+#' list(paste("Gene", 1:3), paste("Gene", c(2, 5, 6)), paste("Gene", 1:4), paste("Gene", 5:8)), |
|
66 | 67 |
#' list(function(oracle){}), NULL, predicted, actual) |
67 | 68 |
#' result1 <- calcCVperformance(result1, "Macro F1") |
68 | 69 |
#' |
69 |
-#' predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
70 |
+#' predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
70 | 71 |
#' permutation = rep(1:2, each = 40), |
71 | 72 |
#' class = factor(rep(c("Healthy", "Cancer"), 40))) |
72 | 73 |
#' |
73 | 74 |
#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
74 | 75 |
#' "Cross-validation"), |
75 | 76 |
#' value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")), |
76 |
-#' LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:5, 11:105)), |
|
77 |
+#' LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
78 |
+#' `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)), |
|
77 | 79 |
#' list(c(1:3), c(4:6), c(1, 6, 7, 9), c(5:8)), |
78 | 80 |
#' list(function(oracle){}), NULL, predicted, actual) |
79 | 81 |
#' result2 <- calcCVperformance(result2, "Macro F1") |
... | ... |
@@ -131,7 +131,7 @@ |
131 | 131 |
#' colData = cbind(clinicalData, class = classes)) |
132 | 132 |
#' targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50") |
133 | 133 |
#' plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class", |
134 |
-#' groupBy = c("sampleInfo", "Gender"), |
|
134 |
+#' groupBy = c("sampleInfo", "Gender"), # Table name, feature name. |
|
135 | 135 |
#' xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5) |
136 | 136 |
#' |
137 | 137 |
#' @importFrom dplyr mutate n |
... | ... |
@@ -175,7 +175,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar |
175 | 175 |
facets = factor(paste(groupingName, "is", groupBy), levels = paste(groupingName, "is", levelsOrder))) |
176 | 176 |
} |
177 | 177 |
|
178 |
- splitDataset <- .splitDataAndOutcomes(measurements, classes) |
|
178 |
+ splitDataset <- .splitDataAndOutcomes(measurements, classes, restrict = NULL) |
|
179 | 179 |
measurements <- splitDataset[["measurements"]] |
180 | 180 |
classes <- splitDataset[["outcomes"]] |
181 | 181 |
|
... | ... |
@@ -96,19 +96,21 @@ setMethod("previousSelection", "DataFrame", |
96 | 96 |
previousIDs <- chosenFeatureNames(classifyResult)[[.iteration]] |
97 | 97 |
if(is.character(previousIDs)) |
98 | 98 |
{ |
99 |
- commonFeatures <- intersect(previousIDs, colnames(measurementsTrain)) |
|
100 |
- overlapPercent <- length(commonFeatures) / length(previousIDs) * 100 |
|
101 |
- } else { # A data.frame describing the data set and variable name of the chosen feature. |
|
99 |
+ featuresIDs <- colnames(measurementsTrain) |
|
100 |
+ IDsRows <- match(previousIDs, featuresInfo(classifyResult)[, "Original Feature"]) |
|
101 |
+ safeIDs <- featuresInfo(classifyResult)[IDsRows, "Renamed Feature"] |
|
102 |
+ } else { # A data frame describing the data set and variable name of the chosen feature. |
|
102 | 103 |
featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")]) |
103 |
- selectedIDs <- do.call(paste, previousIDs) |
|
104 |
- selectedColumns <- match(selectedIDs, featuresIDs) |
|
105 |
- commonFeatures <- sum(!is.na(selectedColumns)) |
|
106 |
- overlapPercent <- commonFeatures / nrow(previousIDs) * 100 |
|
104 |
+ IDsRows <- match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Dataset", "Original Feature")]))) |
|
105 |
+ safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Dataset", "Renamed Feature")]) |
|
107 | 106 |
} |
107 |
+ |
|
108 |
+ commonFeatures <- intersect(safeIDs, featuresIDs) |
|
109 |
+ overlapPercent <- length(commonFeatures) / length(safeIDs) * 100 |
|
108 | 110 |
if(overlapPercent < minimumOverlapPercent) |
109 | 111 |
signalCondition(simpleError(paste("Number of features in common between previous and current data set is lower than", minimumOverlapPercent, "percent."))) |
110 |
- |
|
111 |
- S4Vectors::mcols(measurementsTrain)[selectedColumns, ] # Each row is about one column. |
|
112 |
+ |
|
113 |
+ match(safeIDs, featuresIDs) # Return indices, not identifiers. |
|
112 | 114 |
}) |
113 | 115 |
|
114 | 116 |
#' @rdname previousSelection |
... | ... |
@@ -117,7 +119,7 @@ setMethod("previousSelection", "MultiAssayExperiment", |
117 | 119 |
function(measurementsTrain, ...) |
118 | 120 |
{ |
119 | 121 |
sampleInfoColumns <- colnames(MultiAssayExperiment::colData(sampleInfoColumns)) |
120 |
- dataTable <- wideFormat(measurementsTrain, colDataCols = sampleInfoColumns, check.names = FALSE, collapse = ':') |
|
122 |
+ dataTable <- MultiAssayExperiment::wideFormat(measurementsTrain, colDataCols = sampleInfoColumns, check.names = FALSE, collapse = ':') |
|
121 | 123 |
S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"]) |
122 | 124 |
previousSelection(dataTable, ...) |
123 | 125 |
}) |
124 | 126 |
\ No newline at end of file |
... | ... |
@@ -84,10 +84,7 @@ setMethod("bartlettRanking", "DataFrame", # Sample information data or one of th |
84 | 84 |
pValues <- apply(measurementsTrain, 2, function(featureColumn) |
85 | 85 |
stats::bartlett.test(featureColumn, classesTrain)[["p.value"]]) |
86 | 86 |
|
87 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
88 |
- S4Vectors::mcols(measurementsTrain)[order(pValues), ] |
|
89 |
- else |
|
90 |
- colnames(measurementsTrain)[order(pValues)] |
|
87 |
+ order(pValues) # From smallest to largest. |
|
91 | 88 |
}) |
92 | 89 |
|
93 | 90 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -59,10 +59,7 @@ setMethod("coxphRanking", "DataFrame", function(measurementsTrain, survivalTrain |
59 | 59 |
s$waldtest["pvalue"] |
60 | 60 |
}) |
61 | 61 |
|
62 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
63 |
- S4Vectors::mcols(measurementsTrain)[order(pValues), ] |
|
64 |
- else |
|
65 |
- colnames(measurementsTrain)[order(pValues)] |
|
62 |
+ order(pValues) # From smallest to largest. |
|
66 | 63 |
}) |
67 | 64 |
|
68 | 65 |
# One or more omics data sets, possibly with clinical data. |
... | ... |
@@ -102,11 +102,7 @@ setMethod("DMDranking", "DataFrame", # sampleInfo data or one of the other input |
102 | 102 |
if(differences %in% c("both", "scale")) |
103 | 103 |
divergence <- divergence + scalesDifferences |
104 | 104 |
|
105 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
106 |
- S4Vectors::mcols(measurementsTrain)[order(divergence, decreasing = TRUE), ] |
|
107 |
- else |
|
108 |
- colnames(measurementsTrain)[order(divergence, decreasing = TRUE)] |
|
109 |
- |
|
105 |
+ order(divergence, decreasing = TRUE) |
|
110 | 106 |
}) |
111 | 107 |
|
112 | 108 |
#' @rdname DMDranking |
... | ... |
@@ -87,10 +87,7 @@ setMethod("differentMeansRanking", "DataFrame", |
87 | 87 |
pValues <- genefilter::rowFtests(measurementsMatrix, classesTrain)[, "p.value"] |
88 | 88 |
} |
89 | 89 |
|
90 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
91 |
- S4Vectors::mcols(measurementsTrain)[order(pValues), ] |
|
92 |
- else |
|
93 |
- colnames(measurementsTrain)[order(pValues)] |
|
90 |
+ order(pValues) # From smallest to largest. |
|
94 | 91 |
}) |
95 | 92 |
|
96 | 93 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -110,10 +110,7 @@ setMethod("edgeRranking", "DataFrame", function(countsTrain, classesTrain, normF |
110 | 110 |
fit <- do.call(edgeR::glmFit, paramList) |
111 | 111 |
test <- edgeR::glmLRT(fit, coef = 2:length(levels(classesTrain)))[["table"]] |
112 | 112 |
|
113 |
- if(!is.null(S4Vectors::mcols(countsTrain))) |
|
114 |
- S4Vectors::mcols(countsTrain)[order(test[, "PValue"]), ] |
|
115 |
- else |
|
116 |
- colnames(countsTrain)[order(test[, "PValue"])] |
|
113 |
+ order(test[, "PValue"]) # From smallest to largest. |
|
117 | 114 |
}) |
118 | 115 |
|
119 | 116 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -75,10 +75,7 @@ setMethod("KolmogorovSmirnovRanking", "DataFrame", # Sample information data or |
75 | 75 |
KSdistance <- apply(measurementsTrain, 2, function(featureColumn) |
76 | 76 |
stats::ks.test(featureColumn[oneClass], featureColumn[otherClass], ...)[["statistic"]]) |
77 | 77 |
|
78 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
79 |
- S4Vectors::mcols(measurementsTrain)[order(KSdistance, decreasing = TRUE), ] |
|
80 |
- else |
|
81 |
- colnames(measurementsTrain)[order(KSdistance, decreasing = TRUE)] |
|
78 |
+ order(KSdistance, decreasing = TRUE) |
|
82 | 79 |
}) |
83 | 80 |
|
84 | 81 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -88,11 +88,7 @@ setMethod("KullbackLeiblerRanking", "DataFrame", # Sample information data or on |
88 | 88 |
((oneClassDistribution[[2]])^2) / ((otherClassDistribution[[2]])^2) + |
89 | 89 |
((otherClassDistribution[[2]])^2) / ((oneClassDistribution[[2]])^2)) |
90 | 90 |
|
91 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
92 |
- S4Vectors::mcols(measurementsTrain)[order(divergence, decreasing = TRUE), ] |
|
93 |
- else |
|
94 |
- colnames(measurementsTrain)[order(divergence, decreasing = TRUE)] |
|
95 |
- |
|
91 |
+ order(divergence, decreasing = TRUE) |
|
96 | 92 |
}) |
97 | 93 |
|
98 | 94 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -76,10 +76,7 @@ setMethod("leveneRanking", "DataFrame", # Sample information data or one of the |
76 | 76 |
pValues <- apply(measurementsTrain, 2, function(featureColumn) |
77 | 77 |
car::leveneTest(featureColumn, classesTrain)[["Pr(>F)"]][1]) |
78 | 78 |
|
79 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
80 |
- S4Vectors::mcols(measurementsTrain)[order(pValues), ] |
|
81 |
- else |
|
82 |
- colnames(measurementsTrain)[order(pValues)] |
|
79 |
+ order(pValues) # From smallest to largest. |
|
83 | 80 |
}) |
84 | 81 |
|
85 | 82 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -99,10 +99,7 @@ setMethod("likelihoodRatioRanking", "DataFrame", # Sample information data or on |
99 | 99 |
switch(alternative[["scale"]], same = allDistribution[[2]], different = classDistribution[[2]]))) |
100 | 100 |
})) |
101 | 101 |
|
102 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
103 |
- S4Vectors::mcols(measurementsTrain)[order(logLikelihoodRatios), ] |
|
104 |
- else |
|
105 |
- colnames(measurementsTrain)[order(logLikelihoodRatios)] |
|
102 |
+ order(logLikelihoodRatios) # From smallest to largest. |
|
106 | 103 |
}) |
107 | 104 |
|
108 | 105 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -77,10 +77,7 @@ setMethod("limmaRanking", "DataFrame", |
77 | 77 |
linearModel <- limma::eBayes(linearModel) |
78 | 78 |
linearModel <- linearModel[, -1] # Get rid of intercept. |
79 | 79 |
|
80 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
81 |
- S4Vectors::mcols(measurementsTrain)[order(linearModel[["F.p.value"]]), ] |
|
82 |
- else |
|
83 |
- colnames(measurementsTrain)[order(linearModel[["F.p.value"]])] |
|
80 |
+ order(linearModel[["F.p.value"]]) # From smallest to largest. |
|
84 | 81 |
}) |
85 | 82 |
|
86 | 83 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -114,7 +114,7 @@ setMethod("pairsDifferencesRanking", "DataFrame", |
114 | 114 |
|
115 | 115 |
pairsClassDifferences <- otherClassDifferences - oneClassDifferences |
116 | 116 |
|
117 |
- featurePairs[order(abs(pairsClassDifferences), decreasing = TRUE)] |
|
117 |
+ order(abs(pairsClassDifferences), decreasing = TRUE) |
|
118 | 118 |
}) |
119 | 119 |
|
120 | 120 |
# One or more omics data sets, possibly with sample information data. |
... | ... |
@@ -73,13 +73,15 @@ |
73 | 73 |
#' @author Dario Strbenac |
74 | 74 |
#' @examples |
75 | 75 |
#' |
76 |
-#' predicted <- data.frame(sample = sample(10, 100, replace = TRUE), |
|
76 |
+#' predicted <- DataFrame(sample = sample(10, 100, replace = TRUE), |
|
77 | 77 |
#' permutation = rep(1:2, each = 50), |
78 | 78 |
#' class = rep(c("Healthy", "Cancer"), each = 50)) |
79 | 79 |
#' actual <- factor(rep(c("Healthy", "Cancer"), each = 5)) |
80 | 80 |
#' allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = '')) |
81 |
-#' rankList <- list(allFeatures[1:100], allFeatures[c(15:6, 1:5, 16:100)], |
|
82 |
-#' allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 61:100, 60:51)]) |
|
81 |
+#' allFeatures <- DataFrame(`Original Feature` = allFeatures, `Renamed Feature` = paste("Feature", 1:100, sep = ''), |
|
82 |
+#' check.names = FALSE) |
|
83 |
+#' rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(15:6, 1:5, 16:100), "Original Feature"], |
|
84 |
+#' allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 61:100, 60:51), "Original Feature"]) |
|
83 | 85 |
#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
84 | 86 |
#' "Cross-validation"), |
85 | 87 |
#' value = c("Melanoma", "t-test", "Diagonal LDA", "2 Permutations, 2 Folds")), |
... | ... |
@@ -90,8 +92,8 @@ |
90 | 92 |
#' predicted, actual) |
91 | 93 |
#' |
92 | 94 |
#' predicted[, "class"] <- sample(predicted[, "class"]) |
93 |
-#' rankList <- list(allFeatures[1:100], allFeatures[c(sample(20), 21:100)], |
|
94 |
-#' allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)]) |
|
95 |
+#' rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(sample(20), 21:100), "Original Feature"], |
|
96 |
+#' allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"]) |
|
95 | 97 |
#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
96 | 98 |
#' "Cross-validations"), |
97 | 99 |
#' value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")), |
... | ... |
@@ -127,7 +129,7 @@ setMethod("rankingPlot", "list", |
127 | 129 |
if(comparison == "within" && !is.null(referenceLevel)) |
128 | 130 |
stop("'comparison' should not be \"within\" if 'referenceLevel' is not NULL.") |
129 | 131 |
|
130 |
- nFeatures <- ifelse(is.null(ncol(results[[1]]@originalFeatures)), length(results[[1]]@originalFeatures), nrow(results[[1]]@originalFeatures)) |
|
132 |
+ nFeatures <- nrow(featuresInfo(results[[1]])) |
|
131 | 133 |
error <- character() |
132 | 134 |
if(max(topRanked) > nFeatures) |
133 | 135 |
error <- paste("'topRanked' is as high as", max(topRanked)) |
... | ... |
@@ -99,7 +99,7 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest, |
99 | 99 |
stop("'measurementsTrain' DataFrame must have sample identifiers as its row names.") |
100 | 100 |
if(any(is.na(measurementsTrain))) |
101 | 101 |
stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.") |
102 |
- |
|
102 |
+ |
|
103 | 103 |
splitDatasetTrain <- .splitDataAndOutcomes(measurementsTrain, outcomesTrain) |
104 | 104 |
# Rebalance the class sizes of the training samples by either downsampling or upsampling |
105 | 105 |
# or leave untouched if balancing is none. |
... | ... |
@@ -111,6 +111,20 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest, |
111 | 111 |
} |
112 | 112 |
} |
113 | 113 |
|
114 |
+ if(is.null(.iteration) || .iteration != "internal") # Not nested cross-validation. |
|
115 |
+ { |
|
116 |
+ # Avoid strange symbols in feature identifiers. |
|
117 |
+ featuresInfo <- .summaryFeatures(measurementsTrain) |
|
118 |
+ if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
119 |
+ { |
|
120 |
+ S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")] |
|
121 |
+ S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")] |
|
122 |
+ } else { |
|
123 |
+ colnames(measurementsTrain) <- featuresInfo[, "Renamed Feature"] |
|
124 |
+ colnames(measurementsTest) <- featuresInfo[, "Renamed Feature"] |
|
125 |
+ } |
|
126 |
+ } |
|
127 |
+ |
|
114 | 128 |
if(!is.null(modellingParams@selectParams) && max(modellingParams@selectParams@tuneParams[["nFeatures"]]) > ncol(measurementsTrain)) |
115 | 129 |
{ |
116 | 130 |
warning("Attempting to evaluate more features for feature selection than in |
... | ... |
@@ -118,12 +132,6 @@ input data. Autmomatically reducing to smaller number.") |
118 | 132 |
modellingParams@selectParams@tuneParams[["nFeatures"]] <- 1:min(10, ncol(measurementsTrain)) |
119 | 133 |
} |
120 | 134 |
|
121 |
- # All input features. |
|
122 |
- if(!is.null(S4Vectors::mcols(measurementsTrain))) |
|
123 |
- allFeatures <- S4Vectors::mcols(measurementsTrain) |
|
124 |
- else |
|
125 |
- allFeatures <- colnames(measurementsTrain) |
|
126 |
- |
|
127 | 135 |
if(!is.null(crossValParams) && !is.null(crossValParams@adaptiveResamplingDelta)) |
128 | 136 |
{ # Iteratively resample training samples until their class probability or risk changes, on average, less than delta. |
129 | 137 |
delta <- crossValParams@adaptiveResamplingDelta |
... | ... |
@@ -159,34 +167,25 @@ input data. Autmomatically reducing to smaller number.") |
159 | 167 |
if(is.character(measurementsTransformedList[[1]])) return(measurementsTransformedList[[1]]) # An error occurred. |
160 | 168 |
} |
161 | 169 |
|
162 |
- rankedFeatures <- NULL |
|
163 |
- selectedFeatures <- NULL |
|
170 |
+ rankedFeaturesIndices <- NULL |
|
171 |
+ selectedFeaturesIndices <- NULL |
|
164 | 172 |
tuneDetailsSelect <- NULL |
165 |
- |
|
173 |
+ |
|
166 | 174 |
if(!is.null(modellingParams@selectParams)) |
167 | 175 |
{ |
168 | 176 |
if(length(modellingParams@selectParams@intermediate) != 0) |
169 | 177 |
modellingParams@selectParams <- .addIntermediates(modellingParams@selectParams) |
170 |
- |
|
178 |
+ |
|
171 | 179 |
topFeatures <- tryCatch(.doSelection(measurementsTrain, outcomesTrain, crossValParams, modellingParams, verbose), |
172 | 180 |
error = function(error) error[["message"]]) |
173 | 181 |
if(is.character(topFeatures)) return(topFeatures) # An error occurred. |
174 |
- rankedFeatures <- topFeatures[[1]] # Extract for result object. |
|
175 |
- selectedFeatures <- topFeatures[[2]] # Extract for subsetting. |
|
182 |
+ |
|
183 |
+ rankedFeaturesIndices <- topFeatures[[1]] # Extract for result object. |
|
184 |
+ selectedFeaturesIndices <- topFeatures[[2]] # Extract for subsetting. |
|
176 | 185 |
tuneDetailsSelect <- topFeatures[[3]] |
177 | 186 |
|
178 | 187 |
if(modellingParams@selectParams@subsetToSelections == TRUE) |
179 |
- { # Subset the the data table to only the selected features. |
|
180 |
- if(is.null(S4Vectors::mcols(measurementsTrain))) |
|
181 |
- { # Input was ordinary matrix or DataFrame. |
|
182 |
- measurementsTrain <- measurementsTrain[, selectedFeatures, drop = FALSE] |
|
183 |
- } else { # Input was MultiAssayExperiment. # Match the selected features to the data frame columns |
|
184 |
- selectedIDs <- do.call(paste, selectedFeatures) |
|
185 |
- featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")]) |
|
186 |
- selectedColumns <- match(selectedIDs, featuresIDs) |
|
187 |
- measurementsTrain <- measurementsTrain[, selectedColumns, drop = FALSE] |
|
188 |
- } |
|
189 |
- } |
|
188 |
+ measurementsTrain <- measurementsTrain[, selectedFeaturesIndices, drop = FALSE] |
|
190 | 189 |
} |
191 | 190 |
|
192 | 191 |
# Training stage. |
... | ... |
@@ -207,9 +206,9 @@ input data. Autmomatically reducing to smaller number.") |
207 | 206 |
if(length(extras) > 0) |
208 | 207 |
extrasList <- mget(setdiff(names(extras), "...")) |
209 | 208 |
|
210 |
- featureInfo <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList)) |
|
211 |
- rankedFeatures <- featureInfo[[1]] |
|
212 |
- selectedFeatures <- featureInfo[[2]] |
|
209 |
+ rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList)) |
|
210 |
+ rankedFeaturesIndices <- rankedChosenList[[1]] |
|
211 |
+ selectedFeaturesIndices <- rankedChosenList[[2]] |
|
213 | 212 |
} |
214 | 213 |
|
215 | 214 |
if(!is.null(modellingParams@predictParams)) |
... | ... |
@@ -263,7 +262,8 @@ input data. Autmomatically reducing to smaller number.") |
263 | 262 |
if(!is.null(ncol(predictedOutcomes))) |
264 | 263 |
predictedOutcomes <- predictedOutcomes[, na.omit(match(c("class", "risk"), colnames(predictedOutcomes)))] |
265 | 264 |
performanceChanges <- round(performancesWithoutEach - calcExternalPerformance(outcomesTest, predictedOutcomes, performanceType), 2) |
266 |
- |
|
265 |
+ |
|
266 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
267 | 267 |
importanceTable <- DataFrame(selectedFeatures, performanceChanges) |
268 | 268 |
if(ncol(importanceTable) == 2) colnames(importanceTable)[1] <- "feature" |
269 | 269 |
colnames(importanceTable)[ncol(importanceTable)] <- paste("Change in", performanceType) |
... | ... |
@@ -271,6 +271,22 @@ input data. Autmomatically reducing to smaller number.") |
271 | 271 |
|
272 | 272 |
if(is.null(modellingParams@predictParams)) models <- NULL else models <- trained[[1]] # One function for training and testing. Typically, the models aren't returned to the user, such as Poisson LDA implemented by PoiClaClu. |
273 | 273 |
if(!is.null(tuneDetailsSelect)) tuneDetails <- tuneDetailsSelect else tuneDetails <- tuneDetailsTrain |
274 |
+ |
|
275 |
+ # Convert back into original feature identifiers unless it is a nested cross-validation. |
|
276 |
+ if(is.null(.iteration) || .iteration != "internal") |
|
277 |
+ { |
|
278 |
+ if(!is.null(rankedFeaturesIndices)) |
|
279 |
+ { |
|
280 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
281 |
+ } else { rankedFeatures <- NULL} |
|
282 |
+ if(!is.null(selectedFeaturesIndices)) |
|
283 |
+ { |
|
284 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
285 |
+ } else { selectedFeatures <- NULL} |
|
286 |
+ } else { # Nested use in feature selection. No feature selection in inner execution, so ignore features. |
|
287 |
+ rankedFeatures <- selectedFeatures <- NULL |
|
288 |
+ } |
|
289 |
+ |
|
274 | 290 |
if(!is.null(.iteration)) # This function was not called by the end user. |
275 | 291 |
{ |
276 | 292 |
list(ranked = rankedFeatures, selected = selectedFeatures, models = models, testSet = rownames(measurementsTest), predictions = predictedOutcomes, tune = tuneDetails, importance = importanceTable) |
... | ... |
@@ -298,9 +314,9 @@ input data. Autmomatically reducing to smaller number.") |
298 | 314 |
allOutcomes <- c(outcomesTrain, outcomesTest) |
299 | 315 |
names(allOutcomes) <- allSamples |
300 | 316 |
} |
301 |
- |
|
302 |
- ClassifyResult(characteristics, allSamples, allFeatures, list(rankedFeatures), list(selectedFeatures), |
|
303 |
- list(models), tuneDetails, data.frame(sample = rownames(measurementsTest), predictedOutcomes, check.names = FALSE), allOutcomes, importanceTable) |
|
317 |
+ |
|
318 |
+ ClassifyResult(characteristics, allSamples, featuresInfo, list(rankedFeatures), list(selectedFeatures), |
|
319 |
+ list(models), tuneDetails, DataFrame(sample = rownames(measurementsTest), predictedOutcomes, check.names = FALSE), allOutcomes, importanceTable) |
|
304 | 320 |
} |
305 | 321 |
}) |
306 | 322 |
|
... | ... |
@@ -309,7 +325,7 @@ input data. Autmomatically reducing to smaller number.") |
309 | 325 |
setMethod("runTest", c("MultiAssayExperiment"), |
310 | 326 |
function(measurementsTrain, measurementsTest, targets = names(measurements), outcomesColumns, ...) |
311 | 327 |
{ |
312 |
- omicsTargets <- setdiff("sampleInfo", targets) |
|
328 |
+ omicsTargets <- setdiff(targets, "sampleInfo") |
|
313 | 329 |
if(length(omicsTargets) > 0) |
314 | 330 |
{ |
315 | 331 |
if(any(anyReplicated(measurements[, , omicsTargets]))) |
... | ... |
@@ -96,10 +96,7 @@ input data. Autmomatically reducing to smaller number.") |
96 | 96 |
# Element names of the list returned by runTest, in order. |
97 | 97 |
resultTypes <- c("ranked", "selected", "models", "testSet", "predictions", "tune", "importance") |
98 | 98 |
|
99 |
- featureInfo <- .summaryFeatures(measurements) |
|
100 |
- allFeatures <- featureInfo[[1]] |
|
101 |
- featureNames <- featureInfo[[2]] |
|
102 |
- consideredFeatures <- featureInfo[[3]] |
|
99 |
+ featuresInfo <- .summaryFeatures(measurements) |
|
103 | 100 |
# Create all partitions of training and testing sets. |
104 | 101 |
samplesSplits <- .samplesSplits(crossValParams, outcomes) |
105 | 102 |
splitsTestInfo <- .splitsTestInfo(crossValParams, samplesSplits) |
... | ... |
@@ -116,7 +113,7 @@ input data. Autmomatically reducing to smaller number.") |
116 | 113 |
{ |
117 | 114 |
if(verbose >= 1 && setNumber %% 10 == 0) |
118 | 115 |
message("Processing sample set ", setNumber, '.') |
119 |
- |
|
116 |
+ |
|
120 | 117 |
# crossValParams is needed at least for nested feature tuning. |
121 | 118 |
runTest(measurements[trainingSamples, , drop = FALSE], outcomes[trainingSamples], |
122 | 119 |
measurements[testSamples, , drop = FALSE], outcomes[testSamples], |
... | ... |
@@ -162,10 +159,10 @@ input data. Autmomatically reducing to smaller number.") |
162 | 159 |
predictsColumnName <- "risk" |
163 | 160 |
else # Classification task. A factor. |
164 | 161 |
predictsColumnName <- "class" |
165 |
- predictionsTable <- data.frame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, unlist(lapply(results, "[[", "predictions")), check.names = FALSE) |
|
162 |
+ predictionsTable <- DataFrame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, unlist(lapply(results, "[[", "predictions")), check.names = FALSE) |
|
166 | 163 |
colnames(predictionsTable)[ncol(predictionsTable)] <- predictsColumnName |
167 | 164 |
} else { # data frame |
168 |
- predictionsTable <- data.frame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, do.call(rbind, lapply(results, "[[", "predictions")), check.names = FALSE) |
|
165 |
+ predictionsTable <- DataFrame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, do.call(rbind, lapply(results, "[[", "predictions")), check.names = FALSE) |
|
169 | 166 |
} |
170 | 167 |
rownames(predictionsTable) <- NULL |
171 | 168 |
tuneList <- lapply(results, "[[", "tune") |
... | ... |
@@ -175,7 +172,7 @@ input data. Autmomatically reducing to smaller number.") |
175 | 172 |
if(!is.null(results[[1]][["importance"]])) |
176 | 173 |
importance <- do.call(rbind, lapply(results, "[[", "importance")) |
177 | 174 |
|
178 |
- ClassifyResult(characteristics, rownames(measurements), allFeatures, |
|
175 |
+ ClassifyResult(characteristics, rownames(measurements), featuresInfo, |
|
179 | 176 |
lapply(results, "[[", "ranked"), lapply(results, "[[", "selected"), |
180 | 177 |
lapply(results, "[[", "models"), tuneList, predictionsTable, outcomes, importance, modellingParams) |
181 | 178 |
}) |
... | ... |
@@ -185,7 +182,7 @@ input data. Autmomatically reducing to smaller number.") |
185 | 182 |
setMethod("runTests", c("MultiAssayExperiment"), |
186 | 183 |
function(measurements, targets = names(measurements), outcomesColumns, ...) |
187 | 184 |
{ |
188 |
- omicsTargets <- setdiff("sampleInfo", targets) |
|
185 |
+ omicsTargets <- setdiff(targets, "sampleInfo") |
|
189 | 186 |
if(length(omicsTargets) > 0) |
190 | 187 |
{ |
191 | 188 |
if(any(anyReplicated(measurements[, , omicsTargets]))) |
... | ... |
@@ -48,10 +48,11 @@ |
48 | 48 |
#' @author Dario Strbenac |
49 | 49 |
#' @examples |
50 | 50 |
#' |
51 |
-#' predicted <- data.frame(sample = LETTERS[sample(10, 100, replace = TRUE)], |
|
51 |
+#' predicted <- DataFrame(sample = LETTERS[sample(10, 100, replace = TRUE)], |
|
52 | 52 |
#' class = rep(c("Healthy", "Cancer"), each = 50)) |
53 | 53 |
#' actual <- factor(rep(c("Healthy", "Cancer"), each = 5), levels = c("Healthy", "Cancer")) |
54 | 54 |
#' features <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = '')) |
55 |
+#' features <- DataFrame(`Original Feature` = features, `Renamed Feature` = paste("Feature", 1:100, sep = ''), check.names = FALSE) |
|
55 | 56 |
#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
56 | 57 |
#' "Cross-validation"), |
57 | 58 |
#' value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")), |
... | ... |
@@ -90,12 +90,14 @@ |
90 | 90 |
#' @author Dario Strbenac |
91 | 91 |
#' @examples |
92 | 92 |
#' |
93 |
-#' predicted <- data.frame(sample = sample(10, 100, replace = TRUE), |
|
93 |
+#' predicted <- DataFrame(sample = sample(10, 100, replace = TRUE), |
|
94 | 94 |
#' class = rep(c("Healthy", "Cancer"), each = 50)) |
95 | 95 |
#' actual <- factor(rep(c("Healthy", "Cancer"), each = 5)) |
96 | 96 |
#' allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = '')) |
97 |
-#' rankList <- list(allFeatures[1:100], allFeatures[c(5:1, 6:100)], |
|
98 |
-#' allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)]) |
|
97 |
+#' allFeatures <- DataFrame(`Original Feature` = allFeatures, `Renamed Feature` = paste("Feature", 1:100, sep = ''), |
|
98 |
+#' check.names = FALSE) |
|
99 |
+#' rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(5:1, 6:100), "Original Feature"], |
|
100 |
+#' allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"]) |
|
99 | 101 |
#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
100 | 102 |
#' "Cross-validations"), |
101 | 103 |
#' value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")), |
... | ... |
@@ -106,8 +108,8 @@ |
106 | 108 |
#' predicted, actual) |
107 | 109 |
#' |
108 | 110 |
#' predicted[, "class"] <- sample(predicted[, "class"]) |
109 |
-#' rankList <- list(allFeatures[1:100], allFeatures[c(sample(20), 21:100)], |
|
110 |
-#' allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)]) |
|
111 |
+#' rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(sample(20), 21:100), "Original Feature"], |
|
112 |
+#' allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"]) |
|
111 | 113 |
#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
112 | 114 |
#' "Cross-validation"), |
113 | 115 |
#' value = c("Melanoma", "t-test", "Diagonal LDA", "2 Permutations, 2 Folds")), |
... | ... |
@@ -61,14 +61,6 @@ mixModelsParams <- function() { |
61 | 61 |
return(list(trainParams = trainParams, predictParams = predictParams)) |
62 | 62 |
} |
63 | 63 |
|
64 |
-# Elastic Net GLM with Prevaliation of Omics Data |
|
65 |
-elasticNetPreval <- function() { |
|
66 |
- trainParams <- TrainParams(elasticNetGLMtrainInterfacePreval) |
|
67 |
- predictParams <- PredictParams(elasticNetGLMpredictInterface) |
|
68 |
- |
|
69 |
- return(list(trainParams = trainParams, predictParams = predictParams)) |
|
70 |
-} |
|
71 |
- |
|
72 | 64 |
# Cox Proportional Hazards Model for Survival |
73 | 65 |
coxphParams <- function() { |
74 | 66 |
trainParams <- TrainParams(coxphTrainInterface) |
... | ... |
@@ -68,18 +68,6 @@ |
68 | 68 |
if(ncol(measurements) == 0) |
69 | 69 |
stop(paste("No features are left after restricting to", restrict, "but at least one must be.")) |
70 | 70 |
} |
71 |
- |
|
72 |
- ### |
|
73 |
- # Lets just check that measurements has mcols |
|
74 |
- ### |
|
75 |
- |
|
76 |
- if(is(measurements, "DataFrame")){ |
|
77 |
- if(is.null(mcols(measurements))){ |
|
78 |
- message(paste("You have", ncol(measurements), "features and", nrow(measurements), "samples and only one data-type.")) |
|
79 |
- mcols(measurements)$dataset <- "dataset" |
|
80 |
- mcols(measurements)$feature <- colnames(measurements) |
|
81 |
- }} |
|
82 |
- |
|
83 | 71 |
|
84 | 72 |
list(measurements = measurements, outcomes = outcomes) |
85 | 73 |
} |
... | ... |
@@ -115,7 +103,7 @@ |
115 | 103 |
# Get all desired measurements tables and sample information columns (other than the columns representing outcomes). |
116 | 104 |
# These form the independent variables to be used for making predictions with. |
117 | 105 |
# Variable names will have names like RNA:BRAF for traceability. |
118 |
- dataTable <- wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':') |
|
106 |
+ dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':') |
|
119 | 107 |
rownames(dataTable) <- dataTable[, "primary"] |
120 | 108 |
S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"]) |
121 | 109 |
colnames(S4Vectors::mcols(dataTable))[1] <- "dataset" |
... | ... |
@@ -311,24 +299,15 @@ |
311 | 299 |
tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE) |
312 | 300 |
modellingParams@trainParams@tuneParams <- NULL |
313 | 301 |
bestPerformers <- sapply(rankings, function(rankingsVariety) |
314 |
- { |
|
302 |
+ { |
|
315 | 303 |
# Creates a matrix. Columns are top n features, rows are varieties (one row if None). |
316 | 304 |
performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex) |
317 | 305 |
{ |
318 | 306 |
whichTry <- 1:tuneCombosTrain[rowIndex, "topN"] |
319 | 307 |
if(doSubset) |
320 | 308 |
{ |
321 |
- if(is.null(S4Vectors::mcols(measurementsTrain)) ) # There are no different data sets. |
|
322 |
- { |
|
323 |
- topFeatures <- rankingsVariety[whichTry] |
|
324 |
- measurementsTrain <- measurementsTrain[, topFeatures, drop = FALSE] # Features in columns |
|
325 |
- } else { # Match to relevant variables, considering data set of them. |
|
326 |
- topFeatures <- rankingsVariety[whichTry, ] |
|
327 |
- topIDs <- do.call(paste, topFeatures) |
|
328 |
- featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")]) |
|
329 |
- topColumns <- match(topIDs, featuresIDs) |
|
330 |
- measurementsTrain <- measurementsTrain[, topColumns, drop = FALSE] |
|
331 |
- } |
|
309 |
+ topFeatures <- rankingsVariety[whichTry] |
|
310 |
+ measurementsTrain <- measurementsTrain[, topFeatures, drop = FALSE] # Features in columns |
|
332 | 311 |
} else { # Pass along features to use. |
333 | 312 |
modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, setNames(list(rankingsVariety[whichTry]), names(modellingParams@trainParams@intermediate))) |
334 | 313 |
} |
... | ... |
@@ -371,12 +350,9 @@ |
371 | 350 |
if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL |
372 | 351 |
|
373 | 352 |
rankingUse <- rankings[[tunePick]] |
374 |
- if(is.null(S4Vectors::mcols(measurementsTrain))) |
|
375 |
- selection <- rankingUse[1:tuneRow[, "topN"]] |
|
376 |
- else # A data frame. Subset the rows. |
|
377 |
- selection <- rankingUse[1:tuneRow[, "topN"], ] |
|
353 |
+ selectionIndices <- rankingUse[1:tuneRow[, "topN"]] |
|
378 | 354 |
|
379 |
- list(ranked = rankingUse, selected = selection, tune = tuneDetails) |
|
355 |
+ list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails) |
|
380 | 356 |
} else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection. |
381 | 357 |
featuresLists <- mapply(function(selector, selParams) |
382 | 358 |
{ |
... | ... |
@@ -597,16 +573,27 @@ |
597 | 573 |
# MultiAssayExperiment has feature details in mcols. |
598 | 574 |
if(!is.null(S4Vectors::mcols(measurements))) |
599 | 575 |
{ |
600 |
- allFeatures <- S4Vectors::mcols(measurements) |
|
576 |
+ originalInfo <- S4Vectors::mcols(measurements) |
|
601 | 577 |
featureNames <- S4Vectors::mcols(measurements)[, "feature"] |
578 |
+ datasets <- unique(S4Vectors::mcols(measurements)[, "dataset"]) |
|
579 |
+ renamedInfo <- S4Vectors::mcols(measurements) |
|
580 |
+ renamedDatasets <- paste("Dataset", seq_along(datasets), sep = '') |
|
581 |
+ for(dataset in datasets) |
|
582 |
+ { |
|
583 |
+ rowsDataset <- which(renamedInfo[, "dataset"] == dataset) |
|
584 |
+ renamedInfo[rowsDataset, "feature"] <- paste("Feature", seq_along(rowsDataset), sep = '') |
|
585 |
+ renamedInfo[rowsDataset, "dataset"] <- renamedDatasets[match(dataset, datasets)] |
|
586 |
+ } |
|
587 |
+ featuresInfo <- DataFrame(originalInfo, renamedInfo) |
|
588 |
+ colnames(featuresInfo) <- c("Original Dataset", "Original Feature", "Renamed Dataset", "Renamed Feature") |
|
602 | 589 |
} else { |
603 |
- allFeatures <- colnames(measurements) |
|
604 |
- featureNames <- colnames(measurements) |
|
590 |
+ originalFeatures <- colnames(measurements) |
|
591 |
+ renamedInfo <- paste("Feature", seq_along(measurements), sep = '') |
|
592 |
+ featuresInfo <- DataFrame(originalFeatures, renamedInfo) |
|
593 |
+ colnames(featuresInfo) <- c("Original Feature", "Renamed Feature") |
|
605 | 594 |
} |
606 |
- # Could refer to features or feature sets, depending on if a selection method utilising feature sets is used. |
|
607 |
- consideredFeatures <- ncol(measurements) |
|
608 | 595 |
|
609 |
- list(allFeatures, featureNames, consideredFeatures) |
|
596 |
+ featuresInfo |
|
610 | 597 |
} |
611 | 598 |
|
612 | 599 |
# Function to identify the parameters of an S4 method. |
... | ... |
@@ -8,8 +8,8 @@ |
8 | 8 |
\alias{show,ClassifyResult-method} |
9 | 9 |
\alias{sampleNames} |
10 | 10 |
\alias{sampleNames,ClassifyResult-method} |
11 |
-\alias{allFeatureNames} |
|
12 |
-\alias{allFeatureNames,ClassifyResult-method} |
|
11 |
+\alias{featuresInfo} |
|
12 |
+\alias{featuresInfo,ClassifyResult-method} |
|
13 | 13 |
\alias{predictions} |
14 | 14 |
\alias{predictions,ClassifyResult-method} |
15 | 15 |
\alias{actualOutcomes} |
... | ... |
@@ -46,9 +46,8 @@ using wrapper functions for feature selection and classifiers in this |
46 | 46 |
package, the function names will automatically be generated and therefore it |
47 | 47 |
is not necessary to specify them.} |
48 | 48 |
\item{\code{originalNames}}{All sample names.} |
49 |
-\item{\code{originalFeatures}}{All feature names. Character vector |
|
50 |
-or \code{\link{DataFrame}} with one row for each feature if the data set has multiple kinds |
|
51 |
-of measurements on the same set of samples.} |
|
49 |
+\item{\code{featuresInfo}}{A \code{\link{DataFrame}} containing all feature names in original format |
|
50 |
+and a safe format without any unusual symbols that R would automatically convert into another format and cause trouble.} |
|
52 | 51 |
\item{\code{rankedFeatures}}{All features, from most to least important. Character vector |
53 | 52 |
or a data frame if data set has multiple kinds of measurements on the same set of samples.} |
54 | 53 |
\item{\code{chosenFeatures}}{Features selected at each fold. Character |
... | ... |
@@ -80,7 +79,7 @@ most popular value of the parameter in cross-validation is used.} |
80 | 79 |
\describe{ |
81 | 80 |
\item{\code{sampleNames(result)}}{Returns a vector of sample names present in the data set.}} |
82 | 81 |
\describe{ |
83 |
-\item{\code{allFeatureNames(result)}}{Returns a vector of features present in the data set.}} |
|
82 |
+\item{\code{featuresInfo(result)}}{Returns a table of features present in the data set. Shows original names and renamed names to ensure no unusual symbols in names.}} |
|
84 | 83 |
\describe{ |
85 | 84 |
\item{\code{actualOutcomes(result)}}{Returns the known outcomes of each sample.}} |
86 | 85 |
\describe{ |
... | ... |
@@ -108,7 +108,6 @@ The value of the \code{family} parameter is fixed to \code{binomal}. |
108 | 108 |
colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix)) |
109 | 109 |
|
110 | 110 |
CVparams <- CrossValParams("k-Fold") |
111 |
- |
|
112 | 111 |
trainParams <- TrainParams(GLMtrainInterface) |
113 | 112 |
predictParams <- PredictParams(GLMpredictInterface) |
114 | 113 |
modParams <- ModellingParams(selectParams = NULL, trainParams = trainParams, |
... | ... |
@@ -86,7 +86,7 @@ each class, or a table of both the class labels and class scores, depending |
86 | 86 |
on the setting of \code{returnType}. |
87 | 87 |
|
88 | 88 |
A list with the first element being empty (no feature ranking is |
89 |
-provided) and second element being the selected features. |
|
89 |
+provided) and second element being the selected features' indices. |
|
90 | 90 |
} |
91 | 91 |
\description{ |
92 | 92 |
Restructures variables from ClassifyR framework to be compatible with |
... | ... |
@@ -89,30 +89,32 @@ considered simultaneously, to calculate one curve per classification. |
89 | 89 |
} |
90 | 90 |
\examples{ |
91 | 91 |
|
92 |
- predicted <- do.call(rbind, list(data.frame(data.frame(sample = LETTERS[c(1, 8, 15, 3, 11, 20, 19, 18)], |
|
93 |
- Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25), |
|
94 |
- Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75), |
|
92 |
+ predicted <- do.call(rbind, list(DataFrame(data.frame(sample = LETTERS[seq(1, 20, 2)], |
|
93 |
+ Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25, 0.10, 0.30), |
|
94 |
+ Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75, 0.90, 0.70), |
|
95 | 95 |
fold = 1)), |
96 |
- data.frame(sample = LETTERS[c(11, 18, 15, 4, 6, 10, 11, 12)], |
|
97 |
- Healthy = c(0.45, 0.56, 0.33, 0.56, 0.33, 0.20, 0.60, 0.40), |
|
98 |
- Cancer = c(0.55, 0.44, 0.67, 0.44, 0.67, 0.80, 0.40, 0.60), |
|
96 |
+ DataFrame(sample = LETTERS[seq(2, 20, 2)], |
|
97 |
+ Healthy = c(0.45, 0.56, 0.33, 0.56, 0.65, 0.33, 0.20, 0.60, 0.40, 0.80), |
|
98 |
+ Cancer = c(0.55, 0.44, 0.67, 0.44, 0.35, 0.67, 0.80, 0.40, 0.60, 0.20), |
|
99 | 99 |
fold = 2))) |
100 | 100 |
actual <- factor(c(rep("Healthy", 10), rep("Cancer", 10)), levels = c("Healthy", "Cancer")) |
101 |
- result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
|
102 |
- "Cross-validation"), |
|
103 |
- value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")), |
|
104 |
- LETTERS[1:20], LETTERS[10:1], |
|
105 |
- list(1:100, c(1:9, 11:101)), list(sample(10, 10), sample(10, 10)), |
|
101 |
+ result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
|
102 |
+ value = c("Melanoma", "t-test", "Random Forest", "2-fold")), |
|
103 |
+ LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
104 |
+ `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
105 |
+ list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
106 |
+ list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
|
106 | 107 |
list(function(oracle){}), NULL, predicted, actual) |
107 | 108 |
|
108 | 109 |
predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60) |
109 | 110 |
predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40) |
110 |
- result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
|
111 |
- "Cross-validation"), |
|
112 |
- value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")), |
|
113 |
- LETTERS[1:20], LETTERS[10:1], list(1:100, c(1:5, 11:105)), |
|
114 |
- list(sample(10, 10), sample(10, 10)), list(function(oracle){}), |
|
115 |
- NULL, predicted, actual) |
|
111 |
+ result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
|
112 |
+ value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")), |
|
113 |
+ LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
114 |
+ `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
115 |
+ list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
116 |
+ list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
|
117 |
+ list(function(oracle){}), NULL, predicted, actual) |
|
116 | 118 |
ROCplot(list(result1, result2), plotTitle = "Cancer ROC") |
117 | 119 |
|
118 | 120 |
} |
... | ... |
@@ -103,13 +103,14 @@ classifier predicts all samples as belonging to the majority class. |
103 | 103 |
} |
104 | 104 |
\examples{ |
105 | 105 |
|
106 |
- predictTable <- data.frame(sample = paste("A", 1:10, sep = ''), |
|
107 |
- class = factor(sample(LETTERS[1:2], 50, replace = TRUE))) |
|
106 |
+ predictTable <- DataFrame(sample = paste("A", 1:10, sep = ''), |
|
107 |
+ class = factor(sample(LETTERS[1:2], 50, replace = TRUE))) |
|
108 | 108 |
actual <- factor(sample(LETTERS[1:2], 10, replace = TRUE)) |
109 |
- result <- ClassifyResult(DataFrame(), |
|
110 |
- paste("A", 1:10, sep = ''), paste("Gene", 1:50, sep = ''), |
|
111 |
- list(1:50, 1:50), list(1:5, 6:15), list(function(oracle){}), NULL, |
|
112 |
- predictTable, actual) |
|
109 |
+ result <- ClassifyResult(DataFrame(characteristic = "Data Set", value = "Example"), |
|
110 |
+ paste("A", 1:10, sep = ''), DataFrame(`Original Feature` = paste("Gene", 1:50), |
|
111 |
+ `Renamed Feature` = paste("Feature", 1:50, sep = '')), |
|
112 |
+ list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)), |
|
113 |
+ list(function(oracle){}), NULL, predictTable, actual) |
|
113 | 114 |
result <- calcCVperformance(result) |
114 | 115 |
performance(result) |
115 | 116 |
|
... | ... |
@@ -167,7 +167,7 @@ data(asthma) |
167 | 167 |
|
168 | 168 |
# Compare randomForest and SVM classifiers. |
169 | 169 |
result <- crossValidate(measurements, classes, classifier = c("randomForest", "SVM")) |
170 |
-# performancePlot(result) |
|
170 |
+performancePlot(result) |
|
171 | 171 |
|
172 | 172 |
|
173 | 173 |
# Compare performance of different datasets. |
... | ... |
@@ -14,8 +14,8 @@ Object} |
14 | 14 |
} |
15 | 15 |
\value{ |
16 | 16 |
An \code{list} object. The first element is a vector or data frame |
17 |
-of ranked features, the second is a vector or data frame of selected |
|
18 |
-features. |
|
17 |
+of ranked feature indicies, the second is a vector or data frame of selected |
|
18 |
+feature indices of the \code{DataFrame} processed. |
|
19 | 19 |
} |
20 | 20 |
\description{ |
21 | 21 |
Provides a ranking of features based on the magnitude of fitted GLM |
... | ... |
@@ -9,8 +9,8 @@ |
9 | 9 |
} |
10 | 10 |
\value{ |
11 | 11 |
An \code{list} object. The first element is a vector or data frame |
12 |
-of features, ranked from best to worst using the Gini index. The second |
|
13 |
-element is a vector or data frame of features used in at least one tree. |
|
12 |
+of feature indices, ranked from best to worst using the Gini index. The second |
|
13 |
+element is a vector or data frame of feature indices used in at least one tree. |
|
14 | 14 |
} |
15 | 15 |
\description{ |
16 | 16 |
Provides a ranking of features based on the total decrease in node |
... | ... |
@@ -93,26 +93,28 @@ calculated, and a barchart is plotted. |
93 | 93 |
} |
94 | 94 |
\examples{ |
95 | 95 |
|
96 |
- predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
97 |
- permutation = rep(1:2, each = 40), |
|
98 |
- class = factor(rep(c("Healthy", "Cancer"), 40))) |
|
96 |
+ predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
97 |
+ permutation = rep(1:2, each = 40), |
|
98 |
+ class = factor(rep(c("Healthy", "Cancer"), 40))) |
|
99 | 99 |
actual <- factor(rep(c("Healthy", "Cancer"), each = 5)) |
100 | 100 |
result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
101 | 101 |
"Cross-validation"), |
102 | 102 |
value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")), |
103 |
- LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:9, 11:101)), |
|
104 |
- list(c(1:3), c(2, 5, 6), 1:4, 5:8), |
|
103 |
+ LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
104 |
+ `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)), |
|
105 |
+ list(paste("Gene", 1:3), paste("Gene", c(2, 5, 6)), paste("Gene", 1:4), paste("Gene", 5:8)), |
|
105 | 106 |
list(function(oracle){}), NULL, predicted, actual) |
106 | 107 |
result1 <- calcCVperformance(result1, "Macro F1") |
107 | 108 |
|
108 |
- predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
109 |
+ predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE), |
|
109 | 110 |
permutation = rep(1:2, each = 40), |
110 | 111 |
class = factor(rep(c("Healthy", "Cancer"), 40))) |
111 | 112 |
|
112 | 113 |
result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", |
113 | 114 |
"Cross-validation"), |
114 | 115 |
value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")), |
115 |
- LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:5, 11:105)), |
|
116 |
+ LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
117 |
+ `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)), |
|
116 | 118 |
list(c(1:3), c(4:6), c(1, 6, 7, 9), c(5:8)), |
117 | 119 |
list(function(oracle){}), NULL, predicted, actual) |
118 | 120 |
result2 <- calcCVperformance(result2, "Macro F1") |
... | ... |
@@ -192,7 +192,7 @@ drawn. |
192 | 192 |
colData = cbind(clinicalData, class = classes)) |
193 | 193 |
targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50") |
194 | 194 |
plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class", |
195 |
- groupBy = c("sampleInfo", "Gender"), |
|
195 |
+ groupBy = c("sampleInfo", "Gender"), # Table name, feature name. |
|
196 | 196 |
xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5) |
197 | 197 |
|
198 | 198 |
} |
... | ... |
@@ -119,13 +119,15 @@ the relevant options to \code{parallelParams}. |
119 | 119 |
} |
120 | 120 |
\examples{ |
121 | 121 |
|
122 |
- predicted <- data.frame(sample = sample(10, 100, replace = TRUE), |
|
122 |
+ predicted <- DataFrame(sample = sample(10, 100, replace = TRUE), |
|
123 | 123 |
permutation = rep(1:2, each = 50), |
124 | 124 |
class = rep(c("Healthy", "Cancer"), each = 50)) |
125 | 125 |
actual <- factor(rep(c("Healthy", "Cancer"), each = 5)) |
126 | 126 |
allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = '')) |
127 |
- rankList <- list(allFeatures[1:100], allFeatures[c(15:6, 1:5, 16:100)], |