Browse code

- Data sets and feature names are converted into safe names such as Dataset1 and Feature1 for use inside of feature selection and classification functions. ClasifyResult class gains a featuresInfo slot which stores original identifiers as well as sanitised ones. - elasticNetPreval defunct parameter set removed.

Dario Strbenac authored on 12/07/2022 14:35:03
Showing 45 changed files

... ...
@@ -3,16 +3,16 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.7
7
-Date: 2022-06-30
6
+Version: 3.1.8
7
+Date: 2022-07-12
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
11 11
 Encoding: UTF-8
12 12
 biocViews: Classification, Survival
13 13
 Depends: R (>= 4.1.0), methods, S4Vectors (>= 0.18.0), MultiAssayExperiment (>= 1.6.0), BiocParallel, survival
14
-Imports: grid, utils, dplyr, tidyr, rlang, randomForest
15
-Suggests: limma, genefilter, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot,
14
+Imports: grid, genefilter, utils, dplyr, tidyr, rlang, randomForest
15
+Suggests: limma, edgeR, car, Rmixmod, ggplot2 (>= 3.0.0), gridExtra (>= 2.0.0), cowplot,
16 16
         BiocStyle, pamr, PoiClaClu, parathyroidSE, knitr, htmltools, gtable,
17 17
         scales, e1071, rmarkdown, IRanges, robustbase, glmnet, class
18 18
 Description: The software formalises a framework for classification in R.
... ...
@@ -22,7 +22,6 @@ export(SelectParams)
22 22
 export(TrainParams)
23 23
 export(TransformParams)
24 24
 export(actualOutcomes)
25
-export(allFeatureNames)
26 25
 export(bartlettRanking)
27 26
 export(calcCVperformance)
28 27
 export(calcExternalPerformance)
... ...
@@ -42,6 +41,7 @@ export(elasticNetFeatures)
42 41
 export(elasticNetGLMpredictInterface)
43 42
 export(elasticNetGLMtrainInterface)
44 43
 export(featureSetSummary)
44
+export(featuresInfo)
45 45
 export(fisherDiscriminant)
46 46
 export(forestFeatures)
47 47
 export(generateCrossValParams)
... ...
@@ -105,7 +105,6 @@ exportMethods(SelectParams)
105 105
 exportMethods(TrainParams)
106 106
 exportMethods(TransformParams)
107 107
 exportMethods(actualOutcomes)
108
-exportMethods(allFeatureNames)
109 108
 exportMethods(bartlettRanking)
110 109
 exportMethods(calcCVperformance)
111 110
 exportMethods(calcExternalPerformance)
... ...
@@ -124,6 +123,7 @@ exportMethods(elasticNetFeatures)
124 123
 exportMethods(elasticNetGLMpredictInterface)
125 124
 exportMethods(elasticNetGLMtrainInterface)
126 125
 exportMethods(featureSetSummary)
126
+exportMethods(featuresInfo)
127 127
 exportMethods(fisherDiscriminant)
128 128
 exportMethods(forestFeatures)
129 129
 exportMethods(getLocationsAndScales)
... ...
@@ -50,30 +50,32 @@
50 50
 #' @author Dario Strbenac
51 51
 #' @examples
52 52
 #' 
53
-#'   predicted <- do.call(rbind, list(data.frame(data.frame(sample = LETTERS[c(1, 8, 15, 3, 11, 20, 19, 18)],
54
-#'                                Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25),
55
-#'                                Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75),
53
+#'   predicted <- do.call(rbind, list(DataFrame(data.frame(sample = LETTERS[seq(1, 20, 2)],
54
+#'                                Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25, 0.10, 0.30),
55
+#'                                Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75, 0.90, 0.70),
56 56
 #'                                fold = 1)),
57
-#'                     data.frame(sample = LETTERS[c(11, 18, 15, 4, 6, 10, 11, 12)],
58
-#'                                Healthy = c(0.45, 0.56, 0.33, 0.56, 0.33, 0.20, 0.60, 0.40),
59
-#'                                Cancer = c(0.55, 0.44, 0.67, 0.44, 0.67, 0.80, 0.40, 0.60),
57
+#'                     DataFrame(sample = LETTERS[seq(2, 20, 2)],
58
+#'                                Healthy = c(0.45, 0.56, 0.33, 0.56, 0.65, 0.33, 0.20, 0.60, 0.40, 0.80),
59
+#'                                Cancer = c(0.55, 0.44, 0.67, 0.44, 0.35, 0.67, 0.80, 0.40, 0.60, 0.20),
60 60
 #'                                fold = 2)))
61 61
 #'   actual <- factor(c(rep("Healthy", 10), rep("Cancer", 10)), levels = c("Healthy", "Cancer"))
62
-#'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
63
-#'                                                          "Cross-validation"),
64
-#'                             value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")),
65
-#'                             LETTERS[1:20], LETTERS[10:1],
66
-#'                             list(1:100, c(1:9, 11:101)), list(sample(10, 10), sample(10, 10)),
62
+#'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
63
+#'                             value = c("Melanoma", "t-test", "Random Forest", "2-fold")),
64
+#'                             LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
65
+#'                             `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE),
66
+#'                             list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
67
+#'                             list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
67 68
 #'                             list(function(oracle){}), NULL, predicted, actual)
68 69
 #'   
69 70
 #'   predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60)
70 71
 #'   predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40)
71
-#'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
72
-#'                                                          "Cross-validation"),
73
-#'                             value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")),
74
-#'                             LETTERS[1:20], LETTERS[10:1], list(1:100, c(1:5, 11:105)),
75
-#'                             list(sample(10, 10), sample(10, 10)), list(function(oracle){}),
76
-#'                             NULL, predicted, actual)
72
+#'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
73
+#'                                       value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")),
74
+#'                             LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
75
+#'                             `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE),
76
+#'                             list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
77
+#'                             list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
78
+#'                             list(function(oracle){}), NULL, predicted, actual)
77 79
 #'   ROCplot(list(result1, result2), plotTitle = "Cancer ROC")
78 80
 #'
79 81
 #' @usage NULL
... ...
@@ -70,13 +70,14 @@
70 70
 #' @author Dario Strbenac
71 71
 #' @examples
72 72
 #' 
73
-#'   predictTable <- data.frame(sample = paste("A", 1:10, sep = ''),
74
-#'                              class = factor(sample(LETTERS[1:2], 50, replace = TRUE)))
73
+#'   predictTable <- DataFrame(sample = paste("A", 1:10, sep = ''),
74
+#'                             class = factor(sample(LETTERS[1:2], 50, replace = TRUE)))
75 75
 #'   actual <- factor(sample(LETTERS[1:2], 10, replace = TRUE))                             
76
-#'   result <- ClassifyResult(DataFrame(),
77
-#'                            paste("A", 1:10, sep = ''), paste("Gene", 1:50, sep = ''),
78
-#'                            list(1:50, 1:50), list(1:5, 6:15), list(function(oracle){}), NULL,
79
-#'                            predictTable, actual)
76
+#'   result <- ClassifyResult(DataFrame(characteristic = "Data Set", value = "Example"),
77
+#'                            paste("A", 1:10, sep = ''), DataFrame(`Original Feature` = paste("Gene", 1:50),
78
+#'                            `Renamed Feature` = paste("Feature", 1:50, sep = '')),
79
+#'                            list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)),
80
+#'                            list(function(oracle){}), NULL, predictTable, actual)
80 81
 #'   result <- calcCVperformance(result) 
81 82
 #'   performance(result)
82 83
 #' 
... ...
@@ -1217,7 +1217,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
1217 1217
 #' @aliases ClassifyResult ClassifyResult-class
1218 1218
 #' ClassifyResult,DataFrame,character,characterOrDataFrame-method
1219 1219
 #' show,ClassifyResult-method sampleNames sampleNames,ClassifyResult-method
1220
-#' allFeatureNames allFeatureNames,ClassifyResult-method
1220
+#' featuresInfo featuresInfo,ClassifyResult-method
1221 1221
 #' predictions predictions,ClassifyResult-method actualOutcomes
1222 1222
 #' actualOutcomes,ClassifyResult-method features features,ClassifyResult-method
1223 1223
 #' models models,ClassifyResult-method performance
... ...
@@ -1237,9 +1237,8 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
1237 1237
 #' package, the function names will automatically be generated and therefore it
1238 1238
 #' is not necessary to specify them.}
1239 1239
 #' \item{\code{originalNames}}{All sample names.}
1240
-#' \item{\code{originalFeatures}}{All feature names. Character vector
1241
-#' or \code{\link{DataFrame}} with one row for each feature if the data set has multiple kinds
1242
-#' of measurements on the same set of samples.}
1240
+#' \item{\code{featuresInfo}}{A \code{\link{DataFrame}} containing all feature names in original format
1241
+#' and a safe format without any unusual symbols that R would automatically convert into another format and cause trouble.}
1243 1242
 #' \item{\code{rankedFeatures}}{All features, from most to least important. Character vector
1244 1243
 #' or a data frame if data set has multiple kinds of measurements on the same set of samples.}
1245 1244
 #' \item{\code{chosenFeatures}}{Features selected at each fold. Character
... ...
@@ -1267,7 +1266,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
1267 1266
 #' \describe{
1268 1267
 #' \item{\code{sampleNames(result)}}{Returns a vector of sample names present in the data set.}}
1269 1268
 #' \describe{
1270
-#' \item{\code{allFeatureNames(result)}}{Returns a vector of features present in the data set.}}
1269
+#' \item{\code{featuresInfo(result)}}{Returns a table of features present in the data set. Shows original names and renamed names to ensure no unusual symbols in names.}}
1271 1270
 #' \describe{
1272 1271
 #' \item{\code{actualOutcomes(result)}}{Returns the known outcomes of each sample.}}
1273 1272
 #' \describe{
... ...
@@ -1308,7 +1307,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
1308 1307
 #' @importFrom S4Vectors as.data.frame
1309 1308
 #' @usage NULL
1310 1309
 #' @export
1311
-setGeneric("ClassifyResult", function(characteristics, originalNames, originalFeatures, ...)
1310
+setGeneric("ClassifyResult", function(characteristics, originalNames, featuresInfo, ...)
1312 1311
 standardGeneric("ClassifyResult"))
1313 1312
 
1314 1313
 #' @rdname ClassifyResult-class
... ...
@@ -1316,13 +1315,13 @@ standardGeneric("ClassifyResult"))
1316 1315
 setClass("ClassifyResult", representation(
1317 1316
   characteristics = "DataFrame",
1318 1317
   originalNames = "character",
1319
-  originalFeatures = "characterOrDataFrame",
1318
+  featuresInfo = "DataFrame",
1320 1319
   rankedFeatures = "listOrNULL",
1321 1320
   chosenFeatures = "listOrNULL",
1322 1321
   actualOutcomes = "factorOrSurv",
1323 1322
   models = "list",
1324 1323
   tune = "listOrNULL",
1325
-  predictions = "data.frame",
1324
+  predictions = "DataFrame",
1326 1325
   performance = "listOrNULL",
1327 1326
   importance = "DataFrameOrNULL",
1328 1327
   modellingParams = "ModellingParamsOrNULL",
... ...
@@ -1332,11 +1331,11 @@ setClass("ClassifyResult", representation(
1332 1331
 #' @usage NULL
1333 1332
 #' @export
1334 1333
 setMethod("ClassifyResult", c("DataFrame", "character", "characterOrDataFrame"),
1335
-          function(characteristics, originalNames, originalFeatures,
1334
+          function(characteristics, originalNames, featuresInfo,
1336 1335
                    rankedFeatures, chosenFeatures, models, tunedParameters, predictions, actualOutcomes, importance = NULL, modellingParams = NULL, finalModel = NULL)
1337 1336
           {
1338 1337
             new("ClassifyResult", characteristics = characteristics,
1339
-                originalNames = originalNames, originalFeatures = originalFeatures,
1338
+                originalNames = originalNames, featuresInfo = featuresInfo,
1340 1339
                 rankedFeatures = rankedFeatures, chosenFeatures = chosenFeatures,
1341 1340
                 models = models, tune = tunedParameters,
1342 1341
                 predictions = predictions, actualOutcomes = actualOutcomes, importance = importance, modellingParams = modellingParams, finalModel = finalModel)
... ...
@@ -1383,16 +1382,16 @@ setMethod("sampleNames", c("ClassifyResult"),
1383 1382
 #' @rdname ClassifyResult-class
1384 1383
 #' @usage NULL
1385 1384
 #' @export
1386
-setGeneric("allFeatureNames", function(object, ...)
1387
-standardGeneric("allFeatureNames"))
1385
+setGeneric("featuresInfo", function(object, ...)
1386
+standardGeneric("featuresInfo"))
1388 1387
 
1389 1388
 #' @rdname ClassifyResult-class
1390 1389
 #' @usage NULL
1391 1390
 #' @export
1392
-setMethod("allFeatureNames", c("ClassifyResult"),
1391
+setMethod("featuresInfo", c("ClassifyResult"),
1393 1392
           function(object)
1394 1393
           {
1395
-            object@originalFeatures
1394
+            object@featuresInfo
1396 1395
           })
1397 1396
 
1398 1397
 #' @rdname ClassifyResult-class
... ...
@@ -52,7 +52,7 @@
52 52
 #' 
53 53
 #' # Compare randomForest and SVM classifiers.
54 54
 #' result <- crossValidate(measurements, classes, classifier = c("randomForest", "SVM"))
55
-#' # performancePlot(result)
55
+#' performancePlot(result)
56 56
 #' 
57 57
 #' 
58 58
 #' # Compare performance of different datasets. 
... ...
@@ -125,11 +125,13 @@ setMethod("crossValidate", "DataFrame",
125 125
   Using an ordinary GLM instead.")
126 126
                   classifier <- "GLM"
127 127
               }
128
+              
128 129
               classifier <- cleanClassifier(classifier = classifier,
129 130
                                             measurements = measurements)
130 131
 
131 132
               # Which data-types or data-views are present?
132 133
               datasetIDs <- unique(mcols(measurements)[, "dataset"])
134
+              if(is.null(datasetIDs)) datasetIDs <- 1
133 135
               
134 136
               ##!!!!! Do something with data combinations
135 137
 
... ...
@@ -161,9 +163,10 @@ setMethod("crossValidate", "DataFrame",
161 163
                               sapply(selectionMethod[[dataIndex]], function(selectionIndex) {
162 164
                                   # Loop over classifiers
163 165
                                   set.seed(seed)
166
+                                  measurementsUse <- measurements
167
+                                  if(!is.null(mcols(measurements))) measurementsUse <- measurements[, mcols(measurements)[, "dataset"] == dataIndex, drop = FALSE]
164 168
                                   CV(
165
-                                      measurements = measurements[, mcols(measurements)$dataset == dataIndex, drop = FALSE],
166
-                                      classes = classes,
169
+                                      measurements = measurementsUse, classes = classes,
167 170
                                       nFeatures = nFeatures[dataIndex],
168 171
                                       selectionMethod = selectionIndex,
169 172
                                       selectionOptimisation = selectionOptimisation,
... ...
@@ -201,7 +204,7 @@ setMethod("crossValidate", "DataFrame",
201 204
                   # This allows someone to answer which combinations of the datasets might be most useful.
202 205
 
203 206
 
204
-                  if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_len(length(datasetIDs)),function(n)combn(datasetIDs, n, simplify = FALSE)))
207
+                  if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE)))
205 208
 
206 209
                   result <- sapply(dataCombinations, function(dataIndex){
207 210
                       CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex],
... ...
@@ -310,7 +313,7 @@ setMethod("crossValidate", "MultiAssayExperiment",
310 313
                    characteristicsLabel = NULL)
311 314
           {
312 315
               targets <- c(names(measurements), "sampleInfo")
313
-              omicsTargets <- setdiff("sampleInfo", targets)              
316
+              omicsTargets <- setdiff(targets, "sampleInfo")  
314 317
               if(length(omicsTargets) > 0)
315 318
               {
316 319
                   if(any(anyReplicated(measurements[, , omicsTargets])))
... ...
@@ -352,9 +355,6 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
352 355
                    characteristicsLabel = NULL)
353 356
           {
354 357
               measurements <- DataFrame(measurements)
355
-              message(paste("You have", ncol(measurements), "features and", nrow(measurements), "samples and only one data-type."))
356
-              mcols(measurements)$dataset <- "dataset"
357
-              mcols(measurements)$feature <- colnames(measurements)
358 358
               crossValidate(measurements = measurements,
359 359
                             classes = classes, 
360 360
                             nFeatures = nFeatures,
... ...
@@ -386,8 +386,6 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
386 386
                    characteristicsLabel = NULL)
387 387
           {
388 388
               measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE)
389
-              mcols(measurements)$dataset <- "dataset"
390
-              mcols(measurements)$feature <- colnames(measurements)
391 389
               crossValidate(measurements = measurements,
392 390
                             classes = classes, 
393 391
                             nFeatures = nFeatures,
... ...
@@ -407,7 +405,7 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
407 405
 ###!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
408 406
 #' @rdname crossValidate
409 407
 #' @export
410
-setMethod("crossValidate", "list", # data.frame of numeric measurements.
408
+setMethod("crossValidate", "list",
411 409
           function(measurements,
412 410
                    classes, 
413 411
                    nFeatures = 20,
... ...
@@ -481,14 +479,16 @@ setMethod("crossValidate", "list", # data.frame of numeric measurements.
481 479
 ######################################
482 480
 cleanNFeatures <- function(nFeatures, measurements){
483 481
     #### Clean up
484
-    obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
482
+    if(!is.null(mcols(measurements)))
483
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
484
+    else obsFeatures <- ncol(measurements)
485 485
     if(is.null(nFeatures) || length(nFeatures) == 1 && nFeatures == "all") nFeatures <- as.list(obsFeatures)
486 486
     if(is.null(names(nFeatures)) & length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures))
487 487
     if(is.null(names(nFeatures)) & length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE)
488 488
     #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.")
489
-    if(!all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.")
490
-    if(all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)]))
491
-    if(all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE)
489
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.")
490
+    if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)]))
491
+    if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE)
492 492
     nFeatures
493 493
 }
494 494
 
... ...
@@ -496,30 +496,32 @@ cleanNFeatures <- function(nFeatures, measurements){
496 496
 ######################################
497 497
 cleanSelectionMethod <- function(selectionMethod, measurements){
498 498
     #### Clean up
499
-    obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
499
+    if(!is.null(mcols(measurements)))
500
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
501
+    else return(list(selectionMethod))
500 502
 
501
-    if(is.null(names(selectionMethod)) & length(selectionMethod) == 1) selectionMethod <- sapply(names(obsFeatures), function(x)selectionMethod, simplify = FALSE)
502
-    if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) selectionMethod <- sapply(names(obsFeatures), function(x)selectionMethod, simplify = FALSE)
503
+    if(is.null(names(selectionMethod)) & length(selectionMethod) == 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE)
504
+    if(is.null(names(selectionMethod)) & length(selectionMethod) > 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE)
503 505
     #if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.")
504
-    if(!all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.")
505
-    if(all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)])
506
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.")
507
+    if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)])
506 508
     selectionMethod
507
-
508 509
 }
509 510
 
510 511
 ######################################
511 512
 ######################################
512 513
 cleanClassifier <- function(classifier, measurements){
513 514
     #### Clean up
514
-    obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
515
+    if(!is.null(mcols(measurements)))
516
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
517
+    else return(list(classifier))
515 518
 
516
-    if(is.null(names(classifier)) & length(classifier) == 1) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
517
-    if(is.null(names(classifier)) & length(classifier) > 1) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
519
+    if(is.null(names(classifier)) & length(classifier) == 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
520
+    if(is.null(names(classifier)) & length(classifier) > 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
518 521
     #if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the datasets.")
519
-    if(!all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.")
520
-    if(all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)])
522
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.")
523
+    if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)])
521 524
     classifier
522
-
523 525
 }
524 526
 
525 527
 
... ...
@@ -644,7 +646,7 @@ generateModellingParams <- function(datasetIDs,
644 646
     
645 647
     classifiers <- c("randomForest", "GLM", "elasticNetGLM", "SVM", "DLDA",
646 648
                      "naiveBayes", "mixturesNormals", "kNN",
647
-                     "elasticNetPreval", "CoxPH", "CoxNet")
649
+                     "CoxPH", "CoxNet")
648 650
     # Check classifier
649 651
     if(!classifier %in% classifiers)
650 652
         stop(paste("Classifier must exactly match of these (be careful of case):", paste(classifiers, collapse = ", ")))
... ...
@@ -659,7 +661,6 @@ generateModellingParams <- function(datasetIDs,
659 661
         "naiveBayes" = naiveBayesParams(),
660 662
         "mixturesNormals" = mixModelsParams(),
661 663
         "kNN" = kNNparams(),
662
-        "elasticNetPreval" = elasticNetPreval(),
663 664
         "CoxPH" = coxphParams(),
664 665
         "CoxNet" = coxnetParams()
665 666
     )
... ...
@@ -876,6 +877,7 @@ CV <- function(measurements,
876 877
 
877 878
     # Which data-types or data-views are present?
878 879
     datasetIDs <- unique(mcols(measurements)[, "dataset"])
880
+    if(is.null(datasetIDs)) datasetIDs <- 1
879 881
     if(is.null(dataCombinations)) dataCombinations <- datasetIDs
880 882
     if(is.null(characteristicsLabel)) characteristicsLabel <- "none"
881 883
 
... ...
@@ -896,8 +898,7 @@ CV <- function(measurements,
896 898
                                                multiViewMethod = multiViewMethod
897 899
     )
898 900
 
899
-
900
-    characteristics = S4Vectors::DataFrame(characteristic = c("dataset", "classifier", "selectionMethod", "multiViewMethod", "characteristicsLabel"), value = c(paste(datasetIDs, collapse = ", "), paste(classifier, collapse = ", "),  paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel))
901
+    characteristics = S4Vectors::DataFrame(characteristic = c(if(length(datasetIDs) > 1) "Data Set" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(length(datasetIDs) > 1) paste(datasetIDs, collapse = ", ") else NULL, paste(classifier, collapse = ", "),  paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel))
901 902
 
902 903
     classifyResults <- runTests(measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
903 904
     
... ...
@@ -238,8 +238,8 @@ setMethod("elasticNetGLMpredictInterface", c("multnet", "MultiAssayExperiment"),
238 238
 #' @param model A fitted multinomial GLM which was created by
239 239
 #' \code{\link[glmnet]{glmnet}}.
240 240
 #' @return An \code{list} object. The first element is a vector or data frame
241
-#' of ranked features, the second is a vector or data frame of selected
242
-#' features.
241
+#' of ranked feature indicies, the second is a vector or data frame of selected
242
+#' feature indices of the \code{DataFrame} processed.
243 243
 #' @author Dario Strbenac
244 244
 #' @examples
245 245
 #' 
... ...
@@ -282,15 +282,7 @@ setMethod("elasticNetFeatures", "multnet",
282 282
             whichCoefficientColumn <- which(abs(model[["lambda"]] - attr(model, "tune")[["lambda"]]) < 0.00001)[1]
283 283
             coefficientsUsed <- sapply(model[["beta"]], function(classCoefficients) classCoefficients[, whichCoefficientColumn])
284 284
             featureScores <- rowSums(abs(coefficientsUsed))
285
-            rankedFeatures <- inputFeatures[order(featureScores, decreasing = TRUE)]
286
-            selectedFeatures <- inputFeatures[featureScores != 0]
287
-            
288
-            # Colon is a reserved symbol for separating data name and feature name, which is necessary for identifiability of MultiAssayExperiment features. It is not permitted in feature names.
289
-            if(grepl(':', selectedFeatures[1]) == TRUE) # Convert to data.frame.
290
-            {
291
-              selectedFeatures <- do.call(rbind, strsplit(selectedFeatures, ':'))
292
-              rankedFeatures <- do.call(rbind, strsplit(rankedFeatures, ':'))
293
-              colnames(selectedFeatures) <- colnames(rankedFeatures) <- c("dataset", "feature")
294
-            }
295
-            list(unique(rankedFeatures), selectedFeatures)
285
+            rankedFeaturesIndices <- order(featureScores, decreasing = TRUE)
286
+            selectedFeaturesIndices <- which(featureScores != 0)
287
+            list(rankedFeaturesIndices, selectedFeaturesIndices)
296 288
           })
297 289
\ No newline at end of file
... ...
@@ -70,7 +70,6 @@
70 70
 #'     colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix))
71 71
 #'     
72 72
 #'     CVparams <- CrossValParams("k-Fold")
73
-#'       
74 73
 #'     trainParams <- TrainParams(GLMtrainInterface)
75 74
 #'     predictParams <- PredictParams(GLMpredictInterface)
76 75
 #'     modParams <- ModellingParams(selectParams = NULL, trainParams = trainParams,
... ...
@@ -240,7 +240,7 @@ setMethod("NSCpredictInterface", c("pamrtrained", "MultiAssayExperiment"), funct
240 240
 #' @param classesTrain A vector of class labels of class \code{\link{factor}} of the
241 241
 #' same length as the number of samples in \code{measurementsTrain}.
242 242
 #' @return A list with the first element being empty (no feature ranking is
243
-#' provided) and second element being the selected features.
243
+#' provided) and second element being the selected features' indices.
244 244
 #' @author Dario Strbenac
245 245
 #' @seealso \code{\link[pamr]{pamr.listgenes}} for the function that is
246 246
 #' interfaced to.
... ...
@@ -278,12 +278,7 @@ setMethod("NSCfeatures", "pamrtrained",
278 278
             minError <- min(model[["errors"]])
279 279
             threshold <- model[["threshold"]][max(which(model[["errors"]] == minError))]
280 280
             params <- c(list(model), list(list(x = t(as.matrix(measurementsTrain)), y = measurementsTrain, geneid = 1:ncol(measurementsTrain))), threshold)
281
-            chosen <- as.numeric(do.call(pamr::pamr.listgenes, params)[, 1])
281
+            chosenIndices <- as.numeric(do.call(pamr::pamr.listgenes, params)[, 1])
282 282
             
283
-            if(is.matrix(measurementsTrain) || is.null(S4Vectors::mcols(measurementsTrain)))
284
-              chosen <- colnames(measurementsTrain)[chosen]
285
-            else
286
-              chosen <- S4Vectors::mcols(measurementsTrain)[chosen, ]
287
-            
288
-            list(NULL, chosen)
283
+            list(NULL, chosenIndices)
289 284
           })
290 285
\ No newline at end of file
... ...
@@ -193,8 +193,8 @@ setMethod("randomForestPredictInterface", c("randomForest", "MultiAssayExperimen
193 193
 #' @aliases forestFeatures forestFeatures,randomForest-method
194 194
 #' @param forest A trained random forest which was created by \code{\link{randomForest}}.
195 195
 #' @return An \code{list} object. The first element is a vector or data frame
196
-#' of features, ranked from best to worst using the Gini index. The second
197
-#' element is a vector or data frame of features used in at least one tree.
196
+#' of feature indices, ranked from best to worst using the Gini index. The second
197
+#' element is a vector or data frame of feature indices used in at least one tree.
198 198
 #' @author Dario Strbenac
199 199
 #' @examples
200 200
 #'
... ...
@@ -226,17 +226,7 @@ setGeneric("forestFeatures", function(forest, ...)
226 226
 setMethod("forestFeatures", "randomForest",
227 227
           function(forest)
228 228
           {
229
-            inputFeatures <- rownames(randomForest::importance(forest))
230
-            rankedFeatures <- inputFeatures[order(randomForest::importance(forest), decreasing = TRUE)]
231
-            selectedFeatures <- inputFeatures[randomForest::varUsed(forest) > 0]
232
-            selectedFeatures <- selectedFeatures[na.omit(match(rankedFeatures, selectedFeatures))]
233
-            
234
-            # Colon is a reserved symbol for separating data name and feature name, which is necessary for identifiability of MultiAssayExperiment features. It is not permitted in feature names.
235
-            if(grepl(':', selectedFeatures[1]) == TRUE) # Convert to data.frame.
236
-            {
237
-              selectedFeatures <- do.call(rbind, strsplit(selectedFeatures, ':'))
238
-              rankedFeatures <- do.call(rbind, strsplit(rankedFeatures, ':'))
239
-              colnames(selectedFeatures) <- colnames(rankedFeatures) <- c("dataset", "feature")
240
-            }
241
-            list(rankedFeatures, selectedFeatures)
229
+            rankedFeaturesIndices <- order(randomForest::importance(forest), decreasing = TRUE)
230
+            selectedFeaturesIndices <- which(randomForest::varUsed(forest) > 0)
231
+            list(rankedFeaturesIndices, selectedFeaturesIndices)
242 232
           })
243 233
\ No newline at end of file
... ...
@@ -146,7 +146,6 @@ setMethod("SVMpredictInterface", c("svm", "DataFrame"), function(model, measurem
146 146
   
147 147
   # Prediction function depends on test data having same set of columns in same order as
148 148
   # selected features used for training.
149
-  colnames(measurementsTest) <- make.names(colnames(measurementsTest)) # svm silently converts feature names.
150 149
   measurementsTest <- measurementsTest[, colnames(model[["SV"]])]
151 150
   classPredictions <- predict(model, measurementsTest, probability = TRUE)
152 151
   
... ...
@@ -54,26 +54,28 @@
54 54
 #' @author Dario Strbenac
55 55
 #' @examples
56 56
 #' 
57
-#'   predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
58
-#'                           permutation = rep(1:2, each = 40),
59
-#'                           class = factor(rep(c("Healthy", "Cancer"), 40)))
57
+#'   predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
58
+#'                          permutation = rep(1:2, each = 40),
59
+#'                          class = factor(rep(c("Healthy", "Cancer"), 40)))
60 60
 #'   actual <- factor(rep(c("Healthy", "Cancer"), each = 5))
61 61
 #'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
62 62
 #'                                                          "Cross-validation"),
63 63
 #'                             value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")),
64
-#'                             LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:9, 11:101)),
65
-#'                             list(c(1:3), c(2, 5, 6), 1:4, 5:8),
64
+#'                             LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
65
+#'                             `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)),
66
+#'                             list(paste("Gene", 1:3), paste("Gene", c(2, 5, 6)), paste("Gene", 1:4), paste("Gene", 5:8)),
66 67
 #'                             list(function(oracle){}), NULL, predicted, actual)
67 68
 #'   result1 <- calcCVperformance(result1, "Macro F1")
68 69
 #' 
69
-#'   predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
70
+#'   predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
70 71
 #'                           permutation = rep(1:2, each = 40),
71 72
 #'                           class = factor(rep(c("Healthy", "Cancer"), 40)))
72 73
 #'                                
73 74
 #'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
74 75
 #'                                                          "Cross-validation"),
75 76
 #'                             value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")),
76
-#'                             LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:5, 11:105)),
77
+#'                             LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
78
+#'                             `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)),
77 79
 #'                             list(c(1:3), c(4:6), c(1, 6, 7, 9), c(5:8)),
78 80
 #'                             list(function(oracle){}), NULL, predicted, actual)
79 81
 #'   result2 <- calcCVperformance(result2, "Macro F1")
... ...
@@ -131,7 +131,7 @@
131 131
 #'                                         colData = cbind(clinicalData, class = classes))
132 132
 #'   targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50")                                     
133 133
 #'   plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class",
134
-#'                      groupBy = c("sampleInfo", "Gender"),
134
+#'                      groupBy = c("sampleInfo", "Gender"), # Table name, feature name.
135 135
 #'                      xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5)
136 136
 #' 
137 137
 #' @importFrom dplyr mutate n
... ...
@@ -175,7 +175,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar
175 175
                     facets = factor(paste(groupingName, "is", groupBy), levels = paste(groupingName, "is", levelsOrder)))
176 176
   }
177 177
   
178
-  splitDataset <- .splitDataAndOutcomes(measurements, classes)
178
+  splitDataset <- .splitDataAndOutcomes(measurements, classes, restrict = NULL)
179 179
   measurements <- splitDataset[["measurements"]]
180 180
   classes <- splitDataset[["outcomes"]]
181 181
   
... ...
@@ -96,19 +96,21 @@ setMethod("previousSelection", "DataFrame",
96 96
   previousIDs <- chosenFeatureNames(classifyResult)[[.iteration]]
97 97
   if(is.character(previousIDs))
98 98
   {
99
-    commonFeatures <- intersect(previousIDs, colnames(measurementsTrain))
100
-    overlapPercent <- length(commonFeatures) / length(previousIDs) * 100
101
-  } else { # A data.frame describing the data set and variable name of the chosen feature.
99
+    featuresIDs <- colnames(measurementsTrain)
100
+    IDsRows <- match(previousIDs, featuresInfo(classifyResult)[, "Original Feature"])
101
+    safeIDs <- featuresInfo(classifyResult)[IDsRows, "Renamed Feature"]
102
+  } else { # A data frame describing the data set and variable name of the chosen feature.
102 103
     featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")])
103
-    selectedIDs <-  do.call(paste, previousIDs)
104
-    selectedColumns <- match(selectedIDs, featuresIDs)
105
-    commonFeatures <- sum(!is.na(selectedColumns))
106
-    overlapPercent <- commonFeatures / nrow(previousIDs) * 100
104
+    IDsRows <-  match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Dataset", "Original Feature")])))
105
+    safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Dataset", "Renamed Feature")])
107 106
   }
107
+  
108
+  commonFeatures <- intersect(safeIDs, featuresIDs)
109
+  overlapPercent <- length(commonFeatures) / length(safeIDs) * 100
108 110
   if(overlapPercent < minimumOverlapPercent)
109 111
     signalCondition(simpleError(paste("Number of features in common between previous and current data set is lower than", minimumOverlapPercent, "percent.")))
110
-  
111
-  S4Vectors::mcols(measurementsTrain)[selectedColumns, ] # Each row is about one column.
112
+
113
+  match(safeIDs, featuresIDs) # Return indices, not identifiers.
112 114
 })
113 115
 
114 116
 #' @rdname previousSelection
... ...
@@ -117,7 +119,7 @@ setMethod("previousSelection", "MultiAssayExperiment",
117 119
           function(measurementsTrain, ...)
118 120
           {
119 121
             sampleInfoColumns <- colnames(MultiAssayExperiment::colData(sampleInfoColumns))
120
-            dataTable <- wideFormat(measurementsTrain, colDataCols = sampleInfoColumns, check.names = FALSE, collapse = ':')
122
+            dataTable <- MultiAssayExperiment::wideFormat(measurementsTrain, colDataCols = sampleInfoColumns, check.names = FALSE, collapse = ':')
121 123
             S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"])
122 124
             previousSelection(dataTable, ...)
123 125
           })
124 126
\ No newline at end of file
... ...
@@ -84,10 +84,7 @@ setMethod("bartlettRanking", "DataFrame", # Sample information data or one of th
84 84
   pValues <- apply(measurementsTrain, 2, function(featureColumn)
85 85
     stats::bartlett.test(featureColumn, classesTrain)[["p.value"]])
86 86
   
87
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
88
-    S4Vectors::mcols(measurementsTrain)[order(pValues), ]
89
-  else
90
-    colnames(measurementsTrain)[order(pValues)]
87
+  order(pValues) # From smallest to largest.
91 88
 })
92 89
 
93 90
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -59,10 +59,7 @@ setMethod("coxphRanking", "DataFrame", function(measurementsTrain, survivalTrain
59 59
     s$waldtest["pvalue"]
60 60
   })
61 61
   
62
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
63
-    S4Vectors::mcols(measurementsTrain)[order(pValues), ]
64
-  else
65
-    colnames(measurementsTrain)[order(pValues)]
62
+  order(pValues) # From smallest to largest.
66 63
 })
67 64
 
68 65
 # One or more omics data sets, possibly with clinical data.
... ...
@@ -102,11 +102,7 @@ setMethod("DMDranking", "DataFrame", # sampleInfo data or one of the other input
102 102
   if(differences %in% c("both", "scale"))
103 103
     divergence <- divergence + scalesDifferences
104 104
 
105
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
106
-      S4Vectors::mcols(measurementsTrain)[order(divergence, decreasing = TRUE), ]
107
-  else
108
-      colnames(measurementsTrain)[order(divergence, decreasing = TRUE)]
109
-  
105
+  order(divergence, decreasing = TRUE)
110 106
 })
111 107
 
112 108
 #' @rdname DMDranking
... ...
@@ -87,10 +87,7 @@ setMethod("differentMeansRanking", "DataFrame",
87 87
     pValues <- genefilter::rowFtests(measurementsMatrix, classesTrain)[, "p.value"]
88 88
   }
89 89
   
90
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
91
-    S4Vectors::mcols(measurementsTrain)[order(pValues), ]
92
-  else
93
-    colnames(measurementsTrain)[order(pValues)]
90
+  order(pValues) # From smallest to largest.
94 91
 })
95 92
 
96 93
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -110,10 +110,7 @@ setMethod("edgeRranking", "DataFrame", function(countsTrain, classesTrain, normF
110 110
   fit <- do.call(edgeR::glmFit, paramList)
111 111
   test <- edgeR::glmLRT(fit, coef = 2:length(levels(classesTrain)))[["table"]]
112 112
   
113
-  if(!is.null(S4Vectors::mcols(countsTrain)))
114
-    S4Vectors::mcols(countsTrain)[order(test[, "PValue"]), ]
115
-  else
116
-    colnames(countsTrain)[order(test[, "PValue"])]
113
+  order(test[, "PValue"]) # From smallest to largest.
117 114
 })
118 115
 
119 116
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -75,10 +75,7 @@ setMethod("KolmogorovSmirnovRanking", "DataFrame", # Sample information data or
75 75
   KSdistance <- apply(measurementsTrain, 2, function(featureColumn)
76 76
                       stats::ks.test(featureColumn[oneClass], featureColumn[otherClass], ...)[["statistic"]])
77 77
 
78
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
79
-    S4Vectors::mcols(measurementsTrain)[order(KSdistance, decreasing = TRUE), ]
80
-  else
81
-    colnames(measurementsTrain)[order(KSdistance, decreasing = TRUE)]
78
+  order(KSdistance, decreasing = TRUE)
82 79
 })
83 80
 
84 81
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -88,11 +88,7 @@ setMethod("KullbackLeiblerRanking", "DataFrame", # Sample information data or on
88 88
                          ((oneClassDistribution[[2]])^2) / ((otherClassDistribution[[2]])^2) +
89 89
                          ((otherClassDistribution[[2]])^2) / ((oneClassDistribution[[2]])^2))
90 90
 
91
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
92
-    S4Vectors::mcols(measurementsTrain)[order(divergence, decreasing = TRUE), ]
93
-  else
94
-    colnames(measurementsTrain)[order(divergence, decreasing = TRUE)]
95
-  
91
+  order(divergence, decreasing = TRUE)
96 92
 })
97 93
 
98 94
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -76,10 +76,7 @@ setMethod("leveneRanking", "DataFrame", # Sample information data or one of the
76 76
   pValues <- apply(measurementsTrain, 2, function(featureColumn)
77 77
              car::leveneTest(featureColumn, classesTrain)[["Pr(>F)"]][1])
78 78
   
79
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
80
-    S4Vectors::mcols(measurementsTrain)[order(pValues), ]
81
-  else
82
-    colnames(measurementsTrain)[order(pValues)]
79
+  order(pValues) # From smallest to largest.
83 80
 })
84 81
 
85 82
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -99,10 +99,7 @@ setMethod("likelihoodRatioRanking", "DataFrame", # Sample information data or on
99 99
     switch(alternative[["scale"]], same = allDistribution[[2]], different = classDistribution[[2]])))    
100 100
   }))
101 101
   
102
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
103
-    S4Vectors::mcols(measurementsTrain)[order(logLikelihoodRatios), ]
104
-  else
105
-    colnames(measurementsTrain)[order(logLikelihoodRatios)]
102
+  order(logLikelihoodRatios) # From smallest to largest.
106 103
 })
107 104
 
108 105
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -77,10 +77,7 @@ setMethod("limmaRanking", "DataFrame",
77 77
   linearModel <- limma::eBayes(linearModel)
78 78
   linearModel <- linearModel[, -1] # Get rid of intercept.
79 79
   
80
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
81
-    S4Vectors::mcols(measurementsTrain)[order(linearModel[["F.p.value"]]), ]
82
-  else
83
-    colnames(measurementsTrain)[order(linearModel[["F.p.value"]])]
80
+  order(linearModel[["F.p.value"]]) # From smallest to largest.
84 81
 })
85 82
 
86 83
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -114,7 +114,7 @@ setMethod("pairsDifferencesRanking", "DataFrame",
114 114
   
115 115
   pairsClassDifferences <- otherClassDifferences - oneClassDifferences
116 116
   
117
-  featurePairs[order(abs(pairsClassDifferences), decreasing = TRUE)]
117
+  order(abs(pairsClassDifferences), decreasing = TRUE)
118 118
 })
119 119
 
120 120
 # One or more omics data sets, possibly with sample information data.
... ...
@@ -73,13 +73,15 @@
73 73
 #' @author Dario Strbenac
74 74
 #' @examples
75 75
 #' 
76
-#'   predicted <- data.frame(sample = sample(10, 100, replace = TRUE),
76
+#'   predicted <- DataFrame(sample = sample(10, 100, replace = TRUE),
77 77
 #'                           permutation = rep(1:2, each = 50),
78 78
 #'                           class = rep(c("Healthy", "Cancer"), each = 50))
79 79
 #'   actual <- factor(rep(c("Healthy", "Cancer"), each = 5))
80 80
 #'   allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = ''))
81
-#'   rankList <- list(allFeatures[1:100], allFeatures[c(15:6, 1:5, 16:100)],
82
-#'                    allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 61:100, 60:51)])
81
+#'   allFeatures <- DataFrame(`Original Feature` = allFeatures, `Renamed Feature` = paste("Feature", 1:100, sep = ''),
82
+#'   check.names = FALSE)
83
+#'   rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(15:6, 1:5, 16:100), "Original Feature"],
84
+#'                    allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 61:100, 60:51), "Original Feature"])
83 85
 #'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
84 86
 #'                                                          "Cross-validation"),
85 87
 #'                             value = c("Melanoma", "t-test", "Diagonal LDA", "2 Permutations, 2 Folds")),
... ...
@@ -90,8 +92,8 @@
90 92
 #'                             predicted, actual)
91 93
 #'   
92 94
 #'   predicted[, "class"] <- sample(predicted[, "class"])
93
-#'   rankList <- list(allFeatures[1:100], allFeatures[c(sample(20), 21:100)],
94
-#'                    allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)])
95
+#'   rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(sample(20), 21:100), "Original Feature"],
96
+#'   allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"])
95 97
 #'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
96 98
 #'                                                          "Cross-validations"),
97 99
 #'                             value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")),
... ...
@@ -127,7 +129,7 @@ setMethod("rankingPlot", "list",
127 129
   if(comparison == "within" && !is.null(referenceLevel))
128 130
     stop("'comparison' should not be \"within\" if 'referenceLevel' is not NULL.")
129 131
 
130
-  nFeatures <- ifelse(is.null(ncol(results[[1]]@originalFeatures)), length(results[[1]]@originalFeatures), nrow(results[[1]]@originalFeatures)) 
132
+  nFeatures <- nrow(featuresInfo(results[[1]]))
131 133
   error <- character()
132 134
   if(max(topRanked) > nFeatures)
133 135
     error <- paste("'topRanked' is as high as", max(topRanked))
... ...
@@ -99,7 +99,7 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest,
99 99
       stop("'measurementsTrain' DataFrame must have sample identifiers as its row names.")
100 100
     if(any(is.na(measurementsTrain)))
101 101
       stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.")                
102
-    
102
+
103 103
     splitDatasetTrain <- .splitDataAndOutcomes(measurementsTrain, outcomesTrain)
104 104
     # Rebalance the class sizes of the training samples by either downsampling or upsampling
105 105
     # or leave untouched if balancing is none.
... ...
@@ -111,6 +111,20 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest,
111 111
     }
112 112
   }
113 113
     
114
+  if(is.null(.iteration) || .iteration != "internal") # Not nested cross-validation.
115
+  {
116
+    # Avoid strange symbols in feature identifiers.
117
+    featuresInfo <- .summaryFeatures(measurementsTrain)
118
+    if(!is.null(S4Vectors::mcols(measurementsTrain)))
119
+    {
120
+      S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")]
121
+      S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")]
122
+    } else {
123
+      colnames(measurementsTrain) <- featuresInfo[, "Renamed Feature"]
124
+      colnames(measurementsTest) <- featuresInfo[, "Renamed Feature"]
125
+    }
126
+  }
127
+    
114 128
   if(!is.null(modellingParams@selectParams) && max(modellingParams@selectParams@tuneParams[["nFeatures"]]) > ncol(measurementsTrain))
115 129
   {
116 130
     warning("Attempting to evaluate more features for feature selection than in
... ...
@@ -118,12 +132,6 @@ input data. Autmomatically reducing to smaller number.")
118 132
     modellingParams@selectParams@tuneParams[["nFeatures"]] <- 1:min(10, ncol(measurementsTrain))
119 133
   }
120 134
   
121
-  # All input features.
122
-  if(!is.null(S4Vectors::mcols(measurementsTrain)))
123
-    allFeatures <- S4Vectors::mcols(measurementsTrain)
124
-  else
125
-    allFeatures <- colnames(measurementsTrain)
126
-  
127 135
   if(!is.null(crossValParams) && !is.null(crossValParams@adaptiveResamplingDelta))
128 136
   { # Iteratively resample training samples until their class probability or risk changes, on average, less than delta.
129 137
     delta <- crossValParams@adaptiveResamplingDelta
... ...
@@ -159,34 +167,25 @@ input data. Autmomatically reducing to smaller number.")
159 167
     if(is.character(measurementsTransformedList[[1]])) return(measurementsTransformedList[[1]]) # An error occurred.
160 168
   }
161 169
 
162
-  rankedFeatures <- NULL
163
-  selectedFeatures <- NULL
170
+  rankedFeaturesIndices <- NULL
171
+  selectedFeaturesIndices <- NULL
164 172
   tuneDetailsSelect <- NULL
165
-  
173
+
166 174
   if(!is.null(modellingParams@selectParams))
167 175
   {
168 176
     if(length(modellingParams@selectParams@intermediate) != 0)
169 177
       modellingParams@selectParams <- .addIntermediates(modellingParams@selectParams)
170
-    
178
+ 
171 179
     topFeatures <- tryCatch(.doSelection(measurementsTrain, outcomesTrain, crossValParams, modellingParams, verbose),
172 180
                             error = function(error) error[["message"]]) 
173 181
     if(is.character(topFeatures)) return(topFeatures) # An error occurred.
174
-    rankedFeatures <- topFeatures[[1]] # Extract for result object.
175
-    selectedFeatures <- topFeatures[[2]] # Extract for subsetting.
182
+    
183
+    rankedFeaturesIndices <- topFeatures[[1]] # Extract for result object.
184
+    selectedFeaturesIndices <- topFeatures[[2]] # Extract for subsetting.
176 185
     tuneDetailsSelect <- topFeatures[[3]]
177 186
 
178 187
     if(modellingParams@selectParams@subsetToSelections == TRUE)
179
-    { # Subset the the data table to only the selected features.
180
-      if(is.null(S4Vectors::mcols(measurementsTrain)))
181
-      { # Input was ordinary matrix or DataFrame.
182
-        measurementsTrain <- measurementsTrain[, selectedFeatures, drop = FALSE]
183
-      } else { # Input was MultiAssayExperiment. # Match the selected features to the data frame columns
184
-        selectedIDs <-  do.call(paste, selectedFeatures)
185
-        featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")])
186
-        selectedColumns <- match(selectedIDs, featuresIDs)
187
-        measurementsTrain <- measurementsTrain[, selectedColumns, drop = FALSE]
188
-      }
189
-    }
188
+      measurementsTrain <- measurementsTrain[, selectedFeaturesIndices, drop = FALSE]
190 189
   } 
191 190
   
192 191
   # Training stage.
... ...
@@ -207,9 +206,9 @@ input data. Autmomatically reducing to smaller number.")
207 206
     if(length(extras) > 0)
208 207
       extrasList <- mget(setdiff(names(extras), "..."))
209 208
 
210
-    featureInfo <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList))
211
-    rankedFeatures <- featureInfo[[1]]
212
-    selectedFeatures <- featureInfo[[2]]
209
+    rankedChosenList <- do.call(modellingParams@trainParams@getFeatures, c(trained[1], extrasList))
210
+    rankedFeaturesIndices <- rankedChosenList[[1]]
211
+    selectedFeaturesIndices <- rankedChosenList[[2]]
213 212
   }
214 213
   
215 214
   if(!is.null(modellingParams@predictParams))
... ...
@@ -263,7 +262,8 @@ input data. Autmomatically reducing to smaller number.")
263 262
     if(!is.null(ncol(predictedOutcomes)))
264 263
         predictedOutcomes <- predictedOutcomes[, na.omit(match(c("class", "risk"), colnames(predictedOutcomes)))]
265 264
     performanceChanges <- round(performancesWithoutEach - calcExternalPerformance(outcomesTest, predictedOutcomes, performanceType), 2)
266
-      
265
+     
266
+    if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")]
267 267
     importanceTable <- DataFrame(selectedFeatures, performanceChanges)
268 268
     if(ncol(importanceTable) == 2) colnames(importanceTable)[1] <- "feature"
269 269
     colnames(importanceTable)[ncol(importanceTable)] <- paste("Change in", performanceType)
... ...
@@ -271,6 +271,22 @@ input data. Autmomatically reducing to smaller number.")
271 271
   
272 272
   if(is.null(modellingParams@predictParams)) models <- NULL else models <- trained[[1]] # One function for training and testing. Typically, the models aren't returned to the user, such as Poisson LDA implemented by PoiClaClu.
273 273
   if(!is.null(tuneDetailsSelect)) tuneDetails <- tuneDetailsSelect else tuneDetails <- tuneDetailsTrain
274
+
275
+  # Convert back into original feature identifiers unless it is a nested cross-validation.
276
+  if(is.null(.iteration) || .iteration != "internal")
277
+  {
278
+    if(!is.null(rankedFeaturesIndices))
279
+    {
280
+      if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Dataset", "Original Feature")]
281
+    } else { rankedFeatures <- NULL}
282
+    if(!is.null(selectedFeaturesIndices))
283
+    {
284
+      if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")]
285
+    } else { selectedFeatures <- NULL}
286
+  } else { # Nested use in feature selection. No feature selection in inner execution, so ignore features. 
287
+      rankedFeatures <- selectedFeatures <- NULL
288
+  }
289
+  
274 290
   if(!is.null(.iteration)) # This function was not called by the end user.
275 291
   {
276 292
     list(ranked = rankedFeatures, selected = selectedFeatures, models = models, testSet = rownames(measurementsTest), predictions = predictedOutcomes, tune = tuneDetails, importance = importanceTable)
... ...
@@ -298,9 +314,9 @@ input data. Autmomatically reducing to smaller number.")
298 314
       allOutcomes <- c(outcomesTrain, outcomesTest)
299 315
       names(allOutcomes) <- allSamples
300 316
     }
301
-    
302
-    ClassifyResult(characteristics, allSamples, allFeatures, list(rankedFeatures), list(selectedFeatures),
303
-                   list(models), tuneDetails, data.frame(sample = rownames(measurementsTest), predictedOutcomes, check.names = FALSE), allOutcomes, importanceTable)
317
+
318
+    ClassifyResult(characteristics, allSamples, featuresInfo, list(rankedFeatures), list(selectedFeatures),
319
+                   list(models), tuneDetails, DataFrame(sample = rownames(measurementsTest), predictedOutcomes, check.names = FALSE), allOutcomes, importanceTable)
304 320
   }  
305 321
 })
306 322
 
... ...
@@ -309,7 +325,7 @@ input data. Autmomatically reducing to smaller number.")
309 325
 setMethod("runTest", c("MultiAssayExperiment"),
310 326
           function(measurementsTrain, measurementsTest, targets = names(measurements), outcomesColumns, ...)
311 327
 {
312
-  omicsTargets <- setdiff("sampleInfo", targets)              
328
+  omicsTargets <- setdiff(targets, "sampleInfo")
313 329
   if(length(omicsTargets) > 0)
314 330
   {
315 331
     if(any(anyReplicated(measurements[, , omicsTargets])))
... ...
@@ -96,10 +96,7 @@ input data. Autmomatically reducing to smaller number.")
96 96
   # Element names of the list returned by runTest, in order.
97 97
   resultTypes <- c("ranked", "selected", "models", "testSet", "predictions", "tune", "importance")
98 98
   
99
-  featureInfo <- .summaryFeatures(measurements)
100
-  allFeatures <- featureInfo[[1]]
101
-  featureNames <- featureInfo[[2]]
102
-  consideredFeatures <- featureInfo[[3]]
99
+  featuresInfo <- .summaryFeatures(measurements)
103 100
   # Create all partitions of training and testing sets.
104 101
   samplesSplits <- .samplesSplits(crossValParams, outcomes)
105 102
   splitsTestInfo <- .splitsTestInfo(crossValParams, samplesSplits)
... ...
@@ -116,7 +113,7 @@ input data. Autmomatically reducing to smaller number.")
116 113
   {
117 114
     if(verbose >= 1 && setNumber %% 10 == 0)
118 115
       message("Processing sample set ", setNumber, '.')
119
-    
116
+
120 117
     # crossValParams is needed at least for nested feature tuning.
121 118
     runTest(measurements[trainingSamples, , drop = FALSE], outcomes[trainingSamples],
122 119
             measurements[testSamples, , drop = FALSE], outcomes[testSamples],
... ...
@@ -162,10 +159,10 @@ input data. Autmomatically reducing to smaller number.")
162 159
         predictsColumnName <- "risk"
163 160
     else # Classification task. A factor.
164 161
         predictsColumnName <- "class"
165
-    predictionsTable <- data.frame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, unlist(lapply(results, "[[", "predictions")), check.names = FALSE)
162
+    predictionsTable <- DataFrame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, unlist(lapply(results, "[[", "predictions")), check.names = FALSE)
166 163
     colnames(predictionsTable)[ncol(predictionsTable)] <- predictsColumnName
167 164
   } else { # data frame
168
-    predictionsTable <- data.frame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, do.call(rbind, lapply(results, "[[", "predictions")), check.names = FALSE)
165
+    predictionsTable <- DataFrame(sample = unlist(lapply(results, "[[", "testSet")), splitsTestInfo, do.call(rbind, lapply(results, "[[", "predictions")), check.names = FALSE)
169 166
   }
170 167
   rownames(predictionsTable) <- NULL
171 168
   tuneList <- lapply(results, "[[", "tune")
... ...
@@ -175,7 +172,7 @@ input data. Autmomatically reducing to smaller number.")
175 172
   if(!is.null(results[[1]][["importance"]]))
176 173
     importance <- do.call(rbind, lapply(results, "[[", "importance"))
177 174
   
178
-  ClassifyResult(characteristics, rownames(measurements), allFeatures,
175
+  ClassifyResult(characteristics, rownames(measurements), featuresInfo,
179 176
                  lapply(results, "[[", "ranked"), lapply(results, "[[", "selected"),
180 177
                  lapply(results, "[[", "models"), tuneList, predictionsTable, outcomes, importance, modellingParams)
181 178
 })
... ...
@@ -185,7 +182,7 @@ input data. Autmomatically reducing to smaller number.")
185 182
 setMethod("runTests", c("MultiAssayExperiment"),
186 183
           function(measurements, targets = names(measurements), outcomesColumns, ...)
187 184
 {
188
-  omicsTargets <- setdiff("sampleInfo", targets)              
185
+  omicsTargets <- setdiff(targets, "sampleInfo")
189 186
   if(length(omicsTargets) > 0)
190 187
   {
191 188
     if(any(anyReplicated(measurements[, , omicsTargets])))
... ...
@@ -48,10 +48,11 @@
48 48
 #' @author Dario Strbenac
49 49
 #' @examples
50 50
 #' 
51
-#'   predicted <- data.frame(sample = LETTERS[sample(10, 100, replace = TRUE)],
51
+#'   predicted <- DataFrame(sample = LETTERS[sample(10, 100, replace = TRUE)],
52 52
 #'                           class = rep(c("Healthy", "Cancer"), each = 50))
53 53
 #'   actual <- factor(rep(c("Healthy", "Cancer"), each = 5), levels = c("Healthy", "Cancer"))
54 54
 #'   features <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = ''))
55
+#'   features <- DataFrame(`Original Feature` = features, `Renamed Feature` = paste("Feature", 1:100, sep = ''), check.names = FALSE)
55 56
 #'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
56 57
 #'                                                          "Cross-validation"),
57 58
 #'                             value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")),
... ...
@@ -90,12 +90,14 @@
90 90
 #' @author Dario Strbenac
91 91
 #' @examples
92 92
 #' 
93
-#'   predicted <- data.frame(sample = sample(10, 100, replace = TRUE),
93
+#'   predicted <- DataFrame(sample = sample(10, 100, replace = TRUE),
94 94
 #'                           class = rep(c("Healthy", "Cancer"), each = 50))
95 95
 #'   actual <- factor(rep(c("Healthy", "Cancer"), each = 5))
96 96
 #'   allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = ''))
97
-#'   rankList <- list(allFeatures[1:100], allFeatures[c(5:1, 6:100)],
98
-#'                    allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)])
97
+#'   allFeatures <- DataFrame(`Original Feature` = allFeatures, `Renamed Feature` = paste("Feature", 1:100, sep = ''),
98
+#'   check.names = FALSE)
99
+#'   rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(5:1, 6:100), "Original Feature"],
100
+#'                    allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"])
99 101
 #'   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
100 102
 #'                                                          "Cross-validations"),
101 103
 #'                             value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")),
... ...
@@ -106,8 +108,8 @@
106 108
 #'                             predicted, actual)
107 109
 #'   
108 110
 #'   predicted[, "class"] <- sample(predicted[, "class"])
109
-#'   rankList <- list(allFeatures[1:100], allFeatures[c(sample(20), 21:100)],
110
-#'                    allFeatures[c(1:9, 11, 10, 12:100)], allFeatures[c(1:50, 60:51, 61:100)])
111
+#'   rankList <- list(allFeatures[1:100, "Original Feature"], allFeatures[c(sample(20), 21:100), "Original Feature"],
112
+#'                    allFeatures[c(1:9, 11, 10, 12:100), "Original Feature"], allFeatures[c(1:50, 60:51, 61:100), "Original Feature"])
111 113
 #'   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
112 114
 #'                                                          "Cross-validation"),
113 115
 #'                             value = c("Melanoma", "t-test", "Diagonal LDA", "2 Permutations, 2 Folds")),
... ...
@@ -61,14 +61,6 @@ mixModelsParams <- function() {
61 61
     return(list(trainParams = trainParams, predictParams = predictParams))
62 62
 }
63 63
 
64
-# Elastic Net GLM with Prevaliation of Omics Data
65
-elasticNetPreval <- function() {
66
-    trainParams <- TrainParams(elasticNetGLMtrainInterfacePreval)
67
-    predictParams <- PredictParams(elasticNetGLMpredictInterface)
68
-    
69
-    return(list(trainParams = trainParams, predictParams = predictParams))
70
-}
71
-
72 64
 # Cox Proportional Hazards Model for Survival
73 65
 coxphParams <- function() {
74 66
     trainParams <- TrainParams(coxphTrainInterface)
... ...
@@ -68,18 +68,6 @@
68 68
     if(ncol(measurements) == 0)
69 69
       stop(paste("No features are left after restricting to", restrict, "but at least one must be."))
70 70
   }
71
-  
72
-  ###
73
-  # Lets just check that measurements has mcols
74
-  ###
75
-  
76
-  if(is(measurements, "DataFrame")){
77
-    if(is.null(mcols(measurements))){
78
-      message(paste("You have", ncol(measurements), "features and", nrow(measurements), "samples and only one data-type."))
79
-      mcols(measurements)$dataset <- "dataset"
80
-      mcols(measurements)$feature <- colnames(measurements)
81
-  }}
82
-  
83 71
 
84 72
   list(measurements = measurements, outcomes = outcomes)
85 73
 }
... ...
@@ -115,7 +103,7 @@
115 103
     # Get all desired measurements tables and sample information columns (other than the columns representing outcomes).
116 104
     # These form the independent variables to be used for making predictions with.
117 105
     # Variable names will have names like RNA:BRAF for traceability.
118
-    dataTable <- wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':')
106
+    dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':')
119 107
     rownames(dataTable) <- dataTable[, "primary"]
120 108
     S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"])
121 109
     colnames(S4Vectors::mcols(dataTable))[1] <- "dataset"
... ...
@@ -311,24 +299,15 @@
311 299
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
312 300
     modellingParams@trainParams@tuneParams <- NULL
313 301
     bestPerformers <- sapply(rankings, function(rankingsVariety)
314
-    {      
302
+    {
315 303
       # Creates a matrix. Columns are top n features, rows are varieties (one row if None).
316 304
       performances <- sapply(1:nrow(tuneCombosTrain), function(rowIndex)
317 305
       {
318 306
         whichTry <- 1:tuneCombosTrain[rowIndex, "topN"]
319 307
         if(doSubset)
320 308
         {
321
-          if(is.null(S4Vectors::mcols(measurementsTrain)) ) # There are no different data sets.
322
-          {
323
-            topFeatures <- rankingsVariety[whichTry]
324
-            measurementsTrain <- measurementsTrain[, topFeatures, drop = FALSE] # Features in columns
325
-          } else { # Match to relevant variables, considering data set of them.
326
-            topFeatures <- rankingsVariety[whichTry, ]
327
-            topIDs <-  do.call(paste, topFeatures)
328
-            featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")])
329
-            topColumns <- match(topIDs, featuresIDs)
330
-            measurementsTrain <- measurementsTrain[, topColumns, drop = FALSE]
331
-          }
309
+          topFeatures <- rankingsVariety[whichTry]
310
+          measurementsTrain <- measurementsTrain[, topFeatures, drop = FALSE] # Features in columns
332 311
         } else { # Pass along features to use.
333 312
           modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, setNames(list(rankingsVariety[whichTry]), names(modellingParams@trainParams@intermediate)))
334 313
         }
... ...
@@ -371,12 +350,9 @@
371 350
       if(ncol(tuneRow) > 1) tuneDetails <- tuneRow[, -1, drop = FALSE] else tuneDetails <- NULL
372 351
       
373 352
       rankingUse <- rankings[[tunePick]]
374
-      if(is.null(S4Vectors::mcols(measurementsTrain)))
375
-        selection <- rankingUse[1:tuneRow[, "topN"]]
376
-      else # A data frame. Subset the rows.
377
-        selection <- rankingUse[1:tuneRow[, "topN"], ]
353
+      selectionIndices <- rankingUse[1:tuneRow[, "topN"]]
378 354
       
379
-      list(ranked = rankingUse, selected = selection, tune = tuneDetails)
355
+      list(ranked = rankingUse, selected = selectionIndices, tune = tuneDetails)
380 356
     } else if(is.list(featureRanking)) { # It is a list of functions for ensemble selection.
381 357
       featuresLists <- mapply(function(selector, selParams)
382 358
       {
... ...
@@ -597,16 +573,27 @@
597 573
   # MultiAssayExperiment has feature details in mcols.
598 574
   if(!is.null(S4Vectors::mcols(measurements)))
599 575
   {
600
-    allFeatures <- S4Vectors::mcols(measurements)
576
+    originalInfo <- S4Vectors::mcols(measurements)
601 577
     featureNames <- S4Vectors::mcols(measurements)[, "feature"]
578
+    datasets <- unique(S4Vectors::mcols(measurements)[, "dataset"])
579
+    renamedInfo <- S4Vectors::mcols(measurements)
580
+    renamedDatasets <- paste("Dataset", seq_along(datasets), sep = '')
581
+    for(dataset in datasets)
582
+    {
583
+      rowsDataset <- which(renamedInfo[, "dataset"] == dataset)
584
+      renamedInfo[rowsDataset, "feature"] <- paste("Feature", seq_along(rowsDataset), sep = '')
585
+      renamedInfo[rowsDataset, "dataset"] <- renamedDatasets[match(dataset, datasets)]
586
+    }
587
+    featuresInfo <- DataFrame(originalInfo, renamedInfo)
588
+    colnames(featuresInfo) <- c("Original Dataset", "Original Feature", "Renamed Dataset", "Renamed Feature")
602 589
   } else {
603
-    allFeatures <- colnames(measurements)
604
-    featureNames <- colnames(measurements)
590
+    originalFeatures <- colnames(measurements)
591
+    renamedInfo <- paste("Feature", seq_along(measurements), sep = '')
592
+    featuresInfo <- DataFrame(originalFeatures, renamedInfo)
593
+    colnames(featuresInfo) <- c("Original Feature", "Renamed Feature")
605 594
   }
606
-  # Could refer to features or feature sets, depending on if a selection method utilising feature sets is used.
607
-  consideredFeatures <- ncol(measurements)
608 595
   
609
-  list(allFeatures, featureNames, consideredFeatures)
596
+  featuresInfo
610 597
 }
611 598
 
612 599
 # Function to identify the parameters of an S4 method.
... ...
@@ -8,8 +8,8 @@
8 8
 \alias{show,ClassifyResult-method}
9 9
 \alias{sampleNames}
10 10
 \alias{sampleNames,ClassifyResult-method}
11
-\alias{allFeatureNames}
12
-\alias{allFeatureNames,ClassifyResult-method}
11
+\alias{featuresInfo}
12
+\alias{featuresInfo,ClassifyResult-method}
13 13
 \alias{predictions}
14 14
 \alias{predictions,ClassifyResult-method}
15 15
 \alias{actualOutcomes}
... ...
@@ -46,9 +46,8 @@ using wrapper functions for feature selection and classifiers in this
46 46
 package, the function names will automatically be generated and therefore it
47 47
 is not necessary to specify them.}
48 48
 \item{\code{originalNames}}{All sample names.}
49
-\item{\code{originalFeatures}}{All feature names. Character vector
50
-or \code{\link{DataFrame}} with one row for each feature if the data set has multiple kinds
51
-of measurements on the same set of samples.}
49
+\item{\code{featuresInfo}}{A \code{\link{DataFrame}} containing all feature names in original format
50
+and a safe format without any unusual symbols that R would automatically convert into another format and cause trouble.}
52 51
 \item{\code{rankedFeatures}}{All features, from most to least important. Character vector
53 52
 or a data frame if data set has multiple kinds of measurements on the same set of samples.}
54 53
 \item{\code{chosenFeatures}}{Features selected at each fold. Character
... ...
@@ -80,7 +79,7 @@ most popular value of the parameter in cross-validation is used.}
80 79
 \describe{
81 80
 \item{\code{sampleNames(result)}}{Returns a vector of sample names present in the data set.}}
82 81
 \describe{
83
-\item{\code{allFeatureNames(result)}}{Returns a vector of features present in the data set.}}
82
+\item{\code{featuresInfo(result)}}{Returns a table of features present in the data set. Shows original names and renamed names to ensure no unusual symbols in names.}}
84 83
 \describe{
85 84
 \item{\code{actualOutcomes(result)}}{Returns the known outcomes of each sample.}}
86 85
 \describe{
... ...
@@ -108,7 +108,6 @@ The value of the \code{family} parameter is fixed to \code{binomal}.
108 108
     colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix))
109 109
     
110 110
     CVparams <- CrossValParams("k-Fold")
111
-      
112 111
     trainParams <- TrainParams(GLMtrainInterface)
113 112
     predictParams <- PredictParams(GLMpredictInterface)
114 113
     modParams <- ModellingParams(selectParams = NULL, trainParams = trainParams,
... ...
@@ -86,7 +86,7 @@ each class, or a table of both the class labels and class scores, depending
86 86
 on the setting of \code{returnType}.
87 87
 
88 88
 A list with the first element being empty (no feature ranking is
89
-provided) and second element being the selected features.
89
+provided) and second element being the selected features' indices.
90 90
 }
91 91
 \description{
92 92
 Restructures variables from ClassifyR framework to be compatible with
... ...
@@ -89,30 +89,32 @@ considered simultaneously, to calculate one curve per classification.
89 89
 }
90 90
 \examples{
91 91
 
92
-  predicted <- do.call(rbind, list(data.frame(data.frame(sample = LETTERS[c(1, 8, 15, 3, 11, 20, 19, 18)],
93
-                               Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25),
94
-                               Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75),
92
+  predicted <- do.call(rbind, list(DataFrame(data.frame(sample = LETTERS[seq(1, 20, 2)],
93
+                               Healthy = c(0.89, 0.68, 0.53, 0.76, 0.13, 0.20, 0.60, 0.25, 0.10, 0.30),
94
+                               Cancer = c(0.11, 0.32, 0.47, 0.24, 0.87, 0.80, 0.40, 0.75, 0.90, 0.70),
95 95
                                fold = 1)),
96
-                    data.frame(sample = LETTERS[c(11, 18, 15, 4, 6, 10, 11, 12)],
97
-                               Healthy = c(0.45, 0.56, 0.33, 0.56, 0.33, 0.20, 0.60, 0.40),
98
-                               Cancer = c(0.55, 0.44, 0.67, 0.44, 0.67, 0.80, 0.40, 0.60),
96
+                    DataFrame(sample = LETTERS[seq(2, 20, 2)],
97
+                               Healthy = c(0.45, 0.56, 0.33, 0.56, 0.65, 0.33, 0.20, 0.60, 0.40, 0.80),
98
+                               Cancer = c(0.55, 0.44, 0.67, 0.44, 0.35, 0.67, 0.80, 0.40, 0.60, 0.20),
99 99
                                fold = 2)))
100 100
   actual <- factor(c(rep("Healthy", 10), rep("Cancer", 10)), levels = c("Healthy", "Cancer"))
101
-  result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
102
-                                                         "Cross-validation"),
103
-                            value = c("Melanoma", "t-test", "Random Forest", "2 Permutations, 2 Folds")),
104
-                            LETTERS[1:20], LETTERS[10:1],
105
-                            list(1:100, c(1:9, 11:101)), list(sample(10, 10), sample(10, 10)),
101
+  result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
102
+                            value = c("Melanoma", "t-test", "Random Forest", "2-fold")),
103
+                            LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
104
+                            `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE),
105
+                            list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
106
+                            list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
106 107
                             list(function(oracle){}), NULL, predicted, actual)
107 108
   
108 109
   predicted[c(2, 6), "Healthy"] <- c(0.40, 0.60)
109 110
   predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40)
110
-  result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
111
-                                                         "Cross-validation"),
112
-                            value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")),
113
-                            LETTERS[1:20], LETTERS[10:1], list(1:100, c(1:5, 11:105)),
114
-                            list(sample(10, 10), sample(10, 10)), list(function(oracle){}),
115
-                            NULL, predicted, actual)
111
+  result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"),
112
+                                      value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")),
113
+                            LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
114
+                            `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE),
115
+                            list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])),
116
+                            list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])),
117
+                            list(function(oracle){}), NULL, predicted, actual)
116 118
   ROCplot(list(result1, result2), plotTitle = "Cancer ROC")
117 119
 
118 120
 }
... ...
@@ -103,13 +103,14 @@ classifier predicts all samples as belonging to the majority class.
103 103
 }
104 104
 \examples{
105 105
 
106
-  predictTable <- data.frame(sample = paste("A", 1:10, sep = ''),
107
-                             class = factor(sample(LETTERS[1:2], 50, replace = TRUE)))
106
+  predictTable <- DataFrame(sample = paste("A", 1:10, sep = ''),
107
+                            class = factor(sample(LETTERS[1:2], 50, replace = TRUE)))
108 108
   actual <- factor(sample(LETTERS[1:2], 10, replace = TRUE))                             
109
-  result <- ClassifyResult(DataFrame(),
110
-                           paste("A", 1:10, sep = ''), paste("Gene", 1:50, sep = ''),
111
-                           list(1:50, 1:50), list(1:5, 6:15), list(function(oracle){}), NULL,
112
-                           predictTable, actual)
109
+  result <- ClassifyResult(DataFrame(characteristic = "Data Set", value = "Example"),
110
+                           paste("A", 1:10, sep = ''), DataFrame(`Original Feature` = paste("Gene", 1:50),
111
+                           `Renamed Feature` = paste("Feature", 1:50, sep = '')),
112
+                           list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)),
113
+                           list(function(oracle){}), NULL, predictTable, actual)
113 114
   result <- calcCVperformance(result) 
114 115
   performance(result)
115 116
 
... ...
@@ -167,7 +167,7 @@ data(asthma)
167 167
 
168 168
 # Compare randomForest and SVM classifiers.
169 169
 result <- crossValidate(measurements, classes, classifier = c("randomForest", "SVM"))
170
-# performancePlot(result)
170
+performancePlot(result)
171 171
 
172 172
 
173 173
 # Compare performance of different datasets. 
... ...
@@ -14,8 +14,8 @@ Object}
14 14
 }
15 15
 \value{
16 16
 An \code{list} object. The first element is a vector or data frame
17
-of ranked features, the second is a vector or data frame of selected
18
-features.
17
+of ranked feature indicies, the second is a vector or data frame of selected
18
+feature indices of the \code{DataFrame} processed.
19 19
 }
20 20
 \description{
21 21
 Provides a ranking of features based on the magnitude of fitted GLM
... ...
@@ -9,8 +9,8 @@
9 9
 }
10 10
 \value{
11 11
 An \code{list} object. The first element is a vector or data frame
12
-of features, ranked from best to worst using the Gini index. The second
13
-element is a vector or data frame of features used in at least one tree.
12
+of feature indices, ranked from best to worst using the Gini index. The second
13
+element is a vector or data frame of feature indices used in at least one tree.
14 14
 }
15 15
 \description{
16 16
 Provides a ranking of features based on the total decrease in node
... ...
@@ -93,26 +93,28 @@ calculated, and a barchart is plotted.
93 93
 }
94 94
 \examples{
95 95
 
96
-  predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
97
-                          permutation = rep(1:2, each = 40),
98
-                          class = factor(rep(c("Healthy", "Cancer"), 40)))
96
+  predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
97
+                         permutation = rep(1:2, each = 40),
98
+                         class = factor(rep(c("Healthy", "Cancer"), 40)))
99 99
   actual <- factor(rep(c("Healthy", "Cancer"), each = 5))
100 100
   result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
101 101
                                                          "Cross-validation"),
102 102
                             value = c("Example", "t-test", "Differential Expression", "2 Permutations, 2 Folds")),
103
-                            LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:9, 11:101)),
104
-                            list(c(1:3), c(2, 5, 6), 1:4, 5:8),
103
+                            LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
104
+                            `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)),
105
+                            list(paste("Gene", 1:3), paste("Gene", c(2, 5, 6)), paste("Gene", 1:4), paste("Gene", 5:8)),
105 106
                             list(function(oracle){}), NULL, predicted, actual)
106 107
   result1 <- calcCVperformance(result1, "Macro F1")
107 108
 
108
-  predicted <- data.frame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
109
+  predicted <- DataFrame(sample = sample(LETTERS[1:10], 80, replace = TRUE),
109 110
                           permutation = rep(1:2, each = 40),
110 111
                           class = factor(rep(c("Healthy", "Cancer"), 40)))
111 112
                                
112 113
   result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name",
113 114
                                                          "Cross-validation"),
114 115
                             value = c("Example", "Bartlett Test", "Differential Variability", "2 Permutations, 2 Folds")),
115
-                            LETTERS[1:10], LETTERS[10:1], list(1:100, c(1:5, 11:105)),
116
+                            LETTERS[1:10], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]),
117
+                            `Renamed Feature` = paste("Feature", 1:10, sep = '')), list(paste("Gene", 1:100), paste("Gene", c(10:1, 11:100)), paste("Gene", 1:100), paste("Gene", 1:100)),
116 118
                             list(c(1:3), c(4:6), c(1, 6, 7, 9), c(5:8)),
117 119
                             list(function(oracle){}), NULL, predicted, actual)
118 120
   result2 <- calcCVperformance(result2, "Macro F1")
... ...
@@ -192,7 +192,7 @@ drawn.
192 192
                                         colData = cbind(clinicalData, class = classes))
193 193
   targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50")                                     
194 194
   plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class",
195
-                     groupBy = c("sampleInfo", "Gender"),
195
+                     groupBy = c("sampleInfo", "Gender"), # Table name, feature name.
196 196
                      xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5)
197 197
 
198 198
 }
... ...
@@ -119,13 +119,15 @@ the relevant options to \code{parallelParams}.
119 119
 }
120 120
 \examples{
121 121
 
122
-  predicted <- data.frame(sample = sample(10, 100, replace = TRUE),
122
+  predicted <- DataFrame(sample = sample(10, 100, replace = TRUE),
123 123
                           permutation = rep(1:2, each = 50),
124 124
                           class = rep(c("Healthy", "Cancer"), each = 50))
125 125
   actual <- factor(rep(c("Healthy", "Cancer"), each = 5))
126 126
   allFeatures <- sapply(1:100, function(index) paste(sample(LETTERS, 3), collapse = ''))
127
-  rankList <- list(allFeatures[1:100], allFeatures[c(15:6, 1:5, 16:100)],