Browse code

- train and predict functions created to allow training and prediction to be done independently. But they have the capabilities of crossValidate, such as multi-view methods. Implemented as S3 methods to work well with existing generics defined by R. - runTest, runTests and crossValidate now all utilise prepareData and allow passing in extra parameters for it. - getLocationsAndScales becomes private function. - All reference to targets parameter converted to useFeatures, which allows both the assay and the feature to be specified, rather than only the assay. - .MAEtoWideTable data flattening function is gone. Its functionality is incorporated into prepareData. - Parameter tuning in .doSelection of resubstitution mode now correctly uses training data as the test data instead of accidentally using the testing data.

Dario Strbenac authored on 31/08/2022 11:45:02
Showing 21 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.14
7
-Date: 2022-08-25
6
+Version: 3.1.15
7
+Date: 2022-08-31
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -1,5 +1,6 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3
+S3method(predict,trainedByClassifyR)
3 4
 export(ClassifyResult)
4 5
 export(CrossValParams)
5 6
 export(FeatureSetCollection)
... ...
@@ -21,7 +22,6 @@ export(edgesToHubNetworks)
21 22
 export(featureSetSummary)
22 23
 export(generateCrossValParams)
23 24
 export(generateModellingParams)
24
-export(getLocationsAndScales)
25 25
 export(interactorDifferences)
26 26
 export(models)
27 27
 export(performance)
... ...
@@ -36,6 +36,11 @@ export(sampleNames)
36 36
 export(samplesMetricMap)
37 37
 export(selectionPlot)
38 38
 export(totalPredictions)
39
+export(train.DataFrame)
40
+export(train.MultiAssayExperiment)
41
+export(train.data.frame)
42
+export(train.list)
43
+export(train.matrix)
39 44
 export(tunedParameters)
40 45
 exportClasses(ClassifyResult)
41 46
 exportClasses(CrossValParams)
... ...
@@ -62,14 +67,12 @@ exportMethods(chosenFeatureNames)
62 67
 exportMethods(crossValidate)
63 68
 exportMethods(distribution)
64 69
 exportMethods(featureSetSummary)
65
-exportMethods(getLocationsAndScales)
66 70
 exportMethods(interactorDifferences)
67 71
 exportMethods(length)
68 72
 exportMethods(models)
69 73
 exportMethods(performance)
70 74
 exportMethods(performancePlot)
71 75
 exportMethods(plotFeatureClasses)
72
-exportMethods(predict)
73 76
 exportMethods(predictions)
74 77
 exportMethods(prepareData)
75 78
 exportMethods(rankingPlot)
... ...
@@ -82,7 +85,9 @@ exportMethods(show)
82 85
 exportMethods(totalPredictions)
83 86
 exportMethods(tunedParameters)
84 87
 import(BiocParallel)
88
+import(MultiAssayExperiment)
85 89
 import(grid)
90
+import(methods)
86 91
 import(utils)
87 92
 importFrom(S4Vectors,as.data.frame)
88 93
 importFrom(S4Vectors,do.call)
... ...
@@ -1,48 +1,7 @@
1 1
 ##### Table of contents #####
2
-# Set old classes
3 2
 # Create union of classes
4 3
 # Set generic accessors
5 4
 
6
-################################################################################
7
-#
8
-# Set old classes
9
-#
10
-################################################################################
11
-
12
-
13
-
14
-# Delete when sparsediscrim is restored to CRAN.
15
-# Trained dlda Object
16
-dlda <- function(x, ...) {
17
-  UseMethod("dlda")
18
-}
19
-setOldClass("dlda")
20
-
21
-# Trained pamr Object
22
-setOldClass("pamrtrained")
23
-
24
-
25
-# Trained svm Object
26
-setOldClass("svm")
27
-
28
-# Trained multnet Object
29
-setOldClass("multnet")
30
-
31
-# Trained coxnet Object
32
-setOldClass("coxnet")
33
-
34
-# Trained randomForest Object
35
-setOldClass("randomForest")
36
-
37
-# Trained coxph Object
38
-setOldClass("coxph")
39
-
40
-# Survival Data Container
41
-setOldClass("Surv")
42
-
43
-# Survival Forest Data Container
44
-setOldClass("rfsrc")
45
-
46 5
 ################################################################################
47 6
 #
48 7
 # Create union of classes
... ...
@@ -63,6 +22,7 @@ setClassUnion("numericOrNULL", c("numeric", "NULL"))
63 22
 setClassUnion("characterOrDataFrame", c("character", "DataFrame"))
64 23
 
65 24
 # Union of a Surv class and a factor for flexibility with sample outcome
25
+setOldClass("Surv")
66 26
 setClassUnion("factorOrSurv", c("factor", "Surv"))
67 27
 
68 28
 # Union of a List and NULL
... ...
@@ -71,6 +31,9 @@ setClassUnion("listOrNULL", c("list", "NULL"))
71 31
 # Union of NULL and DataFrame Class
72 32
 setClassUnion("DataFrameOrNULL", c("DataFrame", "NULL"))
73 33
 
34
+# Tabular data
35
+setClassUnion("tabular", c("data.frame", "DataFrame", "matrix"))
36
+setClassUnion("tabularOrList", c("tabular", "list"))
74 37
 
75 38
 ################################################################################
76 39
 #
... ...
@@ -93,4 +93,6 @@
93 93
     "PCA", "Reduce each assay into a lower dimensional representation and concatenate the principal components to the clinical data before modelling."
94 94
     ),
95 95
   ncol = 2, byrow = TRUE, dimnames = list(NULL, c("multiViewMethod Keyword", "Description"))
96
-) |> as.data.frame()
97 96
\ No newline at end of file
97
+) |> as.data.frame()
98
+
99
+.ClassifyRenvir[["prepareDataFormals"]] <- c("useFeatures", "maxMissingProp", "topNvariance")
98 100
\ No newline at end of file
... ...
@@ -1,19 +1,21 @@
1 1
 #' Cross-validation to evaluate classification performance.
2 2
 #' 
3 3
 #' This function has been designed to facilitate the comparison of classification
4
-#' methods using cross-validation. A selection of typical  comparisons are implemented.
4
+#' methods using cross-validation. A selection of typical comparisons are implemented. The \code{train} function
5
+#' is a convenience method for training on one data set and predicting on an independent validation data set.
5 6
 #'
6 7
 #' @param measurements Either a \code{\link{DataFrame}}, \code{\link{data.frame}}, \code{\link{matrix}}, \code{\link{MultiAssayExperiment}} 
7
-#' or a list of these objects containing the training data.  For a
8
-#' \code{matrix} and \code{data.frame}, the rows are samples and the columns are features. For a \code{data.frame} or \code{\link{MultiAssayExperiment}} assay
9
-#' the rows are features and the columns are samples, as is typical in Bioconductor.
8
+#' or a list of these objects containing the data.
9
+#' @param x Same as \code{measurements} but only training samples.
10 10
 #' @param outcome A vector of class labels of class \code{\link{factor}} of the
11 11
 #' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the
12
-#' column name in \code{measurements} if it is a \code{\link{DataFrame}} or the
13
-#' column name in \code{colData(measurements)} if \code{measurements} is a \code{\link{MultiAssayExperiment}}. If a column name, that column will be
14
-#' removed before training. Or a \code{\link{Surv}} object or a character vector of length 2 or 3 specifying the time and event columns in
15
-#' \code{measurements} for survival outcome.
16
-#' @param ... Parameters passed into \code{\link{prepareData}}.
12
+#' column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of
13
+#' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome.
14
+#' @param outcomeColumns If \code{measurements} is a \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.
15
+#' @param outcomeTrain For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or
16
+#' a character string, or vector of such strings, containing column name(s) of column(s)
17
+#' containing either classes or time and event information about survival.
18
+#' @param ... Parameters passed into \code{\link{prepareData}} which control subsetting and filtering of input data.
17 19
 #' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons
18 20
 #' or assays. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple assays, 
19 21
 #' a different number of features will be used for each assay. If a named list of vectors, the respective number of features will be optimised over. 
... ...
@@ -25,7 +27,7 @@
25 27
 #' and performing multiview classification, the respective classification methods will be used on each assay.
26 28
 #' @param multiViewMethod A character vector specifying the multiview method or data integration approach to use.
27 29
 #' @param assayCombinations A character vector or list of character vectors proposing the assays or, in the case of a list, combination of assays to use
28
-#' with each element being a vector of assays to combine.
30
+#' with each element being a vector of assays to combine. Special value \code{"all"} means all possible subsets of assays.
29 31
 #' @param nFolds A numeric specifying the number of folds to use for cross-validation.
30 32
 #' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation.
31 33
 #' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation. 
... ...
@@ -34,13 +36,9 @@
34 36
 #' @param newData The data to use to make predictions with.
35 37
 #'
36 38
 #' @details
37
-#' \code{classifier} can be any of the following implemented approaches - randomForest, GLM, elasticNetGLM, logistic, SVM, DLDA, kNN, naiveBayes, mixturesNormals.
38
-#' 
39
-#' \code{selectionMethod} can be any of the following implemented approaches -  none, t-test, limma, edgeR, NSC, Bartlett, Levene, DMD, likelihoodRatio, KS or KL.
40
-#' 
41
-#' \code{multiViewMethod} can take a few different values. Using \code{merge} will merge or bind the assays after feature selection. 
42
-#'  Using \code{prevalidation} will build prevalidated vectors on all the assays except the clinical data. There must be a assay called clinical.
43
-#'  Using \code{PCA} will perform Principal Components Analysis on each assay and then merge the top few components with the clinical data. There must be a assay called clinical.
39
+#' \code{classifier} can be any a keyword for any of the implemented approaches as shown by \code{available()}.
40
+#' \code{selectionMethod} can be a keyword for any of the implemented approaches as shown by \code{available("selectionMethod")}.
41
+#' \code{multiViewMethod} can be a keyword for any of the implemented approaches as shown by \code{available("multiViewMethod")}.
44 42
 #'
45 43
 #' @return An object of class \code{\link{ClassifyResult}}
46 44
 #' @export
... ...
@@ -91,7 +89,7 @@ setMethod("crossValidate", "DataFrame",
91 89
                    selectionOptimisation = "Resubstitution",
92 90
                    classifier = "randomForest",
93 91
                    multiViewMethod = "none",
94
-                   assayCombinations = NULL,
92
+                   assayCombinations = "all",
95 93
                    nFolds = 5,
96 94
                    nRepeats = 20,
97 95
                    nCores = 1,
... ...
@@ -99,12 +97,9 @@ setMethod("crossValidate", "DataFrame",
99 97
 
100 98
           {
101 99
               # Check that data is in the right format, if not already done for MultiAssayExperiment input.
102
-              #if(is.null(mcols(measurements)$assay))
103
-              #{
104
-                splitAssay <- prepareData(measurements, outcome, ...)
105
-                measurements <- splitAssay[["measurements"]]
106
-                outcome <- splitAssay[["outcome"]]
107
-              #}
100
+              measurementsAndOutcome <- prepareData(measurements, outcome, ...)
101
+              measurements <- measurementsAndOutcome[["measurements"]]
102
+              outcome <- measurementsAndOutcome[["outcome"]]
108 103
               
109 104
               # Which data-types or data-views are present?
110 105
               assayIDs <- unique(mcols(measurements)[, "assay"])
... ...
@@ -121,7 +116,7 @@ setMethod("crossValidate", "DataFrame",
121 116
               {
122 117
                   options(warn = 1)
123 118
                   warning("Elastic Net GLM requires two or more features as input but there is only one.
124
-  Using an ordinary GLM instead.")
119
+Using an ordinary GLM instead.")
125 120
                   classifier <- "GLM"
126 121
               }
127 122
               
... ...
@@ -153,9 +148,9 @@ setMethod("crossValidate", "DataFrame",
153 148
                   resClassifier <-
154 149
                       sapply(assayIDs, function(assayIndex) {
155 150
                           # Loop over assays
156
-                          sapply(classifier[[assayIndex]], function(classifierIndex) {
151
+                          sapply(classifier[[assayIndex]], function(classifierForAssay) {
157 152
                               # Loop over classifiers
158
-                              sapply(selectionMethod[[assayIndex]], function(selectionIndex) {
153
+                              sapply(selectionMethod[[assayIndex]], function(selectionForAssay) {
159 154
                                   # Loop over classifiers
160 155
                                   set.seed(seed)
161 156
                                   measurementsUse <- measurements
... ...
@@ -164,9 +159,9 @@ setMethod("crossValidate", "DataFrame",
164 159
                                       measurements = measurementsUse, outcome = outcome,
165 160
                                       assayIDs = assayIndex,
166 161
                                       nFeatures = nFeatures[assayIndex],
167
-                                      selectionMethod = selectionIndex,
162
+                                      selectionMethod = selectionForAssay,
168 163
                                       selectionOptimisation = selectionOptimisation,
169
-                                      classifier = classifierIndex,
164
+                                      classifier = classifierForAssay,
170 165
                                       multiViewMethod = multiViewMethod,
171 166
                                       nFolds = nFolds,
172 167
                                       nRepeats = nRepeats,
... ...
@@ -199,7 +194,7 @@ setMethod("crossValidate", "DataFrame",
199 194
                   # This allows someone to answer which combinations of the assays might be most useful.
200 195
 
201 196
 
202
-                  if(is.null(assayCombinations)) assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
197
+                  if(!is.list(assayCombinations) && assayCombinations == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
203 198
 
204 199
                   result <- sapply(assayCombinations, function(assayIndex){
205 200
                       CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
... ...
@@ -226,7 +221,7 @@ setMethod("crossValidate", "DataFrame",
226 221
                   # This allows someone to answer which combinations of the assays might be most useful.
227 222
 
228 223
 
229
-                  if(is.null(assayCombinations))
224
+                  if(!is.list(assayCombinations) && assayCombinations == "all")
230 225
                   {
231 226
                       assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
232 227
                       assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
... ...
@@ -260,7 +255,7 @@ setMethod("crossValidate", "DataFrame",
260 255
                   # This allows someone to answer which combinations of the assays might be most useful.
261 256
 
262 257
 
263
-                  if(is.null(assayCombinations)){
258
+                  if(!is.list(assayCombinations) && assayCombinations == "all"){
264 259
                       assayCombinations <- do.call("c", sapply(seq_along(assayIDs),function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
265 260
                       assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
266 261
                       if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
... ...
@@ -293,32 +288,22 @@ setMethod("crossValidate", "DataFrame",
293 288
 # One or more omics data sets, possibly with clinical data.
294 289
 setMethod("crossValidate", "MultiAssayExperiment",
295 290
           function(measurements,
296
-                   outcome, 
291
+                   outcomeColumns, 
297 292
                    nFeatures = 20,
298 293
                    selectionMethod = "t-test",
299 294
                    selectionOptimisation = "Resubstitution",
300 295
                    classifier = "randomForest",
301 296
                    multiViewMethod = "none",
302
-                   assayCombinations = NULL,
297
+                   assayCombinations = "all",
303 298
                    nFolds = 5,
304 299
                    nRepeats = 20,
305 300
                    nCores = 1,
306
-                   characteristicsLabel = NULL)
301
+                   characteristicsLabel = NULL, ...)
307 302
           {
308
-              targets <- c(names(measurements), "clinical")
309
-              omicsTargets <- setdiff(targets, "clinical")  
310
-              if(length(omicsTargets) > 0)
311
-              {
312
-                  if(any(anyReplicated(measurements[, , omicsTargets])))
313
-                      stop("Data set contains replicates. Please provide remove or average replicate observations and try again.")
314
-              }
315
-              
316
-              tablesAndoutcome <- .MAEtoWideTable(measurements, targets, outcome, restrict = NULL)
317
-              measurements <- tablesAndoutcome[["dataTable"]]
318
-              outcome <- tablesAndoutcome[["outcome"]]
303
+              measurementsAndOutcome <- prepareData(measurements, outcomeColumns, ...)
319 304
 
320
-              crossValidate(measurements = measurements,
321
-                            outcome = outcome, 
305
+              crossValidate(measurements = measurementsAndOutcome[["measurements"]],
306
+                            outcome = measurementsAndOutcomeoutcome[["outcome"]], 
322 307
                             nFeatures = nFeatures,
323 308
                             selectionMethod = selectionMethod,
324 309
                             selectionOptimisation = selectionOptimisation,
... ...
@@ -341,13 +326,13 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
341 326
                    selectionOptimisation = "Resubstitution",
342 327
                    classifier = "randomForest",
343 328
                    multiViewMethod = "none",
344
-                   assayCombinations = NULL,
329
+                   assayCombinations = "all",
345 330
                    nFolds = 5,
346 331
                    nRepeats = 20,
347 332
                    nCores = 1,
348
-                   characteristicsLabel = NULL)
333
+                   characteristicsLabel = NULL, ...)
349 334
           {
350
-              measurements <- S4Vectors::DataFrame(measurements)
335
+              measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE)
351 336
               crossValidate(measurements = measurements,
352 337
                             outcome = outcome,
353 338
                             nFeatures = nFeatures,
... ...
@@ -359,7 +344,7 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
359 344
                             nFolds = nFolds,
360 345
                             nRepeats = nRepeats,
361 346
                             nCores = nCores,
362
-                            characteristicsLabel = characteristicsLabel)
347
+                            characteristicsLabel = characteristicsLabel, ...) # ... for prepareData.
363 348
           })
364 349
 
365 350
 #' @rdname crossValidate
... ...
@@ -372,11 +357,11 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
372 357
                    selectionOptimisation = "Resubstitution",
373 358
                    classifier = "randomForest",
374 359
                    multiViewMethod = "none",
375
-                   assayCombinations = NULL,
360
+                   assayCombinations = "all",
376 361
                    nFolds = 5,
377 362
                    nRepeats = 20,
378 363
                    nCores = 1,
379
-                   characteristicsLabel = NULL)
364
+                   characteristicsLabel = NULL, ...)
380 365
           {
381 366
               measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE)
382 367
               crossValidate(measurements = measurements,
... ...
@@ -390,12 +375,11 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
390 375
                             nFolds = nFolds,
391 376
                             nRepeats = nRepeats,
392 377
                             nCores = nCores,
393
-                            characteristicsLabel = characteristicsLabel)
394
-
378
+                            characteristicsLabel = characteristicsLabel, ...) # ... for prepareData.
395 379
           })
396 380
 
397
-
398
-###!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
381
+# This expects that each table is about the same set of samples and thus
382
+# has the same number of rows as every other table.
399 383
 #' @rdname crossValidate
400 384
 #' @export
401 385
 setMethod("crossValidate", "list",
... ...
@@ -406,39 +390,33 @@ setMethod("crossValidate", "list",
406 390
                    selectionOptimisation = "Resubstitution",
407 391
                    classifier = "randomForest",
408 392
                    multiViewMethod = "none",
409
-                   assayCombinations = NULL,
393
+                   assayCombinations = "all",
410 394
                    nFolds = 5,
411 395
                    nRepeats = 20,
412 396
                    nCores = 1,
413
-                   characteristicsLabel = NULL)
397
+                   characteristicsLabel = NULL, ...)
414 398
           {
415
-              # Check if the list only contains one data type
416
-              if (measurements |> sapply(class) |> unique() |> length() != 1) {
417
-                  stop("All assays must be of the same type (e.g. data.frame, matrix)")
418
-              }
419
-              
420 399
               # Check data type is valid
421
-              if (!(measurements[[1]] |> class() %in% c("data.frame", "DataFrame", "matrix"))) {
400
+              if (!(all(sapply(measurements, class) %in% c("data.frame", "DataFrame", "matrix")))) {
422 401
                   stop("assays must be of type data.frame, DataFrame or matrix")
423 402
               }
424 403
               
425 404
               # Check the list is named
426
-              if (names(measurements) |> is.null()) {
427
-                  stop("Measurements must be a named list")
405
+              if (is.null(names(measurements))) {
406
+                  stop("Measurements must be a named list.")
428 407
               }
429 408
               
430 409
               # Check same number of samples for all datasets
431
-              if ((measurements |> sapply(dim))[1,] |> unique() |> length() != 1) {
432
-                  stop("All datasets must have the same number of samples")
410
+              if (!length(unique(sapply(measurements, nrow))) == 1) {
411
+                  stop("All datasets must have the same samples.")
433 412
               }
434 413
               
435 414
               # Check the number of outcome is the same
436
-              if (((measurements[[1]] |> dim())[1] != length(outcome)) & length(outcome)>2) {
437
-                  stop("outcome must have same number of samples as measurements")
415
+              if (!all(sapply(measurements, nrow) == length(outcome)) && !is.character(outcome)) {
416
+                  stop("outcome must have same number of samples as measurements.")
438 417
               }
439 418
               
440
-              df_list <- sapply(measurements, t, simplify = FALSE)
441
-              df_list <- sapply(measurements , S4Vectors::DataFrame)
419
+              df_list <- sapply(measurements, S4Vectors::DataFrame, check.names = FALSE)
442 420
               
443 421
               df_list <- mapply(function(meas, nam){
444 422
                   mcols(meas)$assay <- nam
... ...
@@ -448,7 +426,6 @@ setMethod("crossValidate", "list",
448 426
               
449 427
               
450 428
               combined_df <- do.call(cbind, df_list)
451
-              colnames(combined_df) <- mcols(combined_df)$feature
452 429
               
453 430
               crossValidate(measurements = combined_df,
454 431
                             outcome = outcome, 
... ...
@@ -461,7 +438,7 @@ setMethod("crossValidate", "list",
461 438
                             nFolds = nFolds,
462 439
                             nRepeats = nRepeats,
463 440
                             nCores = nCores,
464
-                            characteristicsLabel = characteristicsLabel)
441
+                            characteristicsLabel = characteristicsLabel, ...)
465 442
           })
466 443
 
467 444
 
... ...
@@ -633,13 +610,10 @@ generateModellingParams <- function(assayIDs,
633 610
 
634 611
     performanceType <- ifelse(classifier %in% c("CoxPH", "CoxNet", "randomSurvivalForest"), "C-index", "Balanced Accuracy")
635 612
     
636
-    
637
-    classifiers <- c("randomForest", "randomSurvivalForest", "GLM", "elasticNetGLM", "SVM", "DLDA",
638
-                     "naiveBayes", "mixturesNormals", "kNN",
639
-                     "CoxPH", "CoxNet")
640 613
     # Check classifier
641
-    if(!classifier %in% classifiers)
642
-        stop(paste("Classifier must exactly match of these (be careful of case):", paste(classifiers, collapse = ", ")))
614
+    knownClassifiers <- .ClassifyRenvir[["classifyKeywords"]][, "classifier Keyword"]
615
+    if(!classifier %in% knownClassifiers)
616
+        stop(paste("Classifier must exactly match of these (be careful of case):", paste(knownClassifiers, collapse = ", ")))
643 617
     
644 618
     classifierParams <- .classifierKeywordToParams(classifier)
645 619
 
... ...
@@ -692,7 +666,7 @@ generateMultiviewParams <- function(assayIDs,
692 666
         assayTrain <- sapply(assayIDs, function(assayID) if(assayID == 1) measurements else measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
693 667
 
694 668
         # Generate params for each assay. This could be extended to have different selectionMethods for each type
695
-        paramsassays <- mapply(generateModellingParams,
669
+        paramsAssays <- mapply(generateModellingParams,
696 670
                                  nFeatures = nFeatures[assayIDs],
697 671
                                  selectionMethod = selectionMethod[assayIDs],
698 672
                                  assayIDs = assayIDs,
... ...
@@ -714,7 +688,7 @@ generateMultiviewParams <- function(assayIDs,
714 688
 
715 689
         # Update selectParams to use
716 690
         params@selectParams <- SelectParams("selectMulti",
717
-                                            params = paramsassays,
691
+                                            params = paramsAssays,
718 692
                                             characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "merge"),
719 693
                                             tuneParams = list(nFeatures = nFeatures[[1]],
720 694
                                                               performanceType = "Balanced Error",
... ...
@@ -729,7 +703,7 @@ generateMultiviewParams <- function(assayIDs,
729 703
         assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
730 704
 
731 705
         # Generate params for each assay. This could be extended to have different selectionMethods for each type
732
-        paramsassays <- mapply(generateModellingParams,
706
+        paramsAssays <- mapply(generateModellingParams,
733 707
                                  nFeatures = nFeatures[assayIDs],
734 708
                                  selectionMethod = selectionMethod[assayIDs],
735 709
                                  assayIDs = assayIDs,
... ...
@@ -744,8 +718,8 @@ generateMultiviewParams <- function(assayIDs,
744 718
         params <- ModellingParams(
745 719
             balancing = "none",
746 720
             selectParams = NULL,
747
-            trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics),
748
-            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics)
721
+            trainParams = TrainParams(prevalTrainInterface, params = paramsAssays, characteristics = paramsAssays$clinical@trainParams@characteristics),
722
+            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsAssays$clinical@predictParams@characteristics)
749 723
         )
750 724
 
751 725
         return(params)
... ...
@@ -757,7 +731,7 @@ generateMultiviewParams <- function(assayIDs,
757 731
         assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
758 732
 
759 733
         # Generate params for each assay. This could be extended to have different selectionMethods for each type
760
-        paramsassays <- mapply(generateModellingParams,
734
+        paramsAssays <- mapply(generateModellingParams,
761 735
                                  nFeatures = nFeatures[assayIDs],
762 736
                                  selectionMethod = selectionMethod[assayIDs],
763 737
                                  assayIDs = assayIDs,
... ...
@@ -772,8 +746,8 @@ generateMultiviewParams <- function(assayIDs,
772 746
         params <- ModellingParams(
773 747
             balancing = "none",
774 748
             selectParams = NULL,
775
-            trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics),
776
-            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics)
749
+            trainParams = TrainParams(prevalTrainInterface, params = paramsAssays, characteristics = paramsAssays$clinical@trainParams@characteristics),
750
+            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsAssays$clinical@predictParams@characteristics)
777 751
         )
778 752
 
779 753
         return(params)
... ...
@@ -783,7 +757,7 @@ generateMultiviewParams <- function(assayIDs,
783 757
     if(multiViewMethod == "PCA"){
784 758
 
785 759
         # Split measurements up by assay.
786
-        assayTrain <- sapply(assayIDs, function(assayID) measurements[,mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
760
+        assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
787 761
 
788 762
         # Generate params for each assay. This could be extended to have different selectionMethods for each type
789 763
         paramsClinical <-  list(clinical = generateModellingParams(
... ...
@@ -808,16 +782,15 @@ generateMultiviewParams <- function(assayIDs,
808 782
 
809 783
 }
810 784
 
811
-
812
-CV <- function(measurements,
813
-               outcome,
785
+# measurements, outcome are mutually exclusive with x, outcomeTrain, measurementsTest, outcomeTest.
786
+CV <- function(measurements = NULL,
787
+               outcome = NULL, x = NULL, outcomeTrain = NULL, measurementsTest = NULL, outcomeTest = NULL,
814 788
                assayIDs,
815 789
                nFeatures = NULL,
816 790
                selectionMethod = "t-test",
817 791
                selectionOptimisation = "Resubstitution",
818 792
                classifier = "elasticNetGLM",
819 793
                multiViewMethod = "none",
820
-               assayCombinations = NULL,
821 794
                nFolds = 5,
822 795
                nRepeats = 100,
823 796
                nCores = 1,
... ...
@@ -825,8 +798,10 @@ CV <- function(measurements,
825 798
 
826 799
 {
827 800
     # Check that data is in the right format
828
-    checkData(measurements, outcome)
829
-    
801
+    if(!is.null(measurements))
802
+      checkData(measurements, outcome)
803
+    else
804
+      checkData(x, x)
830 805
     # Check that other variables are in the right format and fix
831 806
     nFeatures <- cleanNFeatures(nFeatures = nFeatures,
832 807
                                 measurements = measurements)
... ...
@@ -838,49 +813,239 @@ CV <- function(measurements,
838 813
     # Which data-types or data-views are present?
839 814
     if(is.null(characteristicsLabel)) characteristicsLabel <- "none"
840 815
 
841
-    # Setup cross-validation parameters including
816
+    # Setup cross-validation parameters. Could be needed for independent train/test if parameter tuning
817
+    # is specified to be done by nested cross-validation.
842 818
     crossValParams <- generateCrossValParams(nRepeats = nRepeats,
843 819
                                              nFolds = nFolds,
844 820
                                              nCores = nCores,
845
-                                             selectionOptimisation = selectionOptimisation
846
-    )
821
+                                             selectionOptimisation = selectionOptimisation)
822
+    
847 823
 
848 824
     # Turn text into TrainParams and TestParams objects
849 825
     modellingParams <- generateModellingParams(assayIDs = assayIDs,
850
-                                               measurements = measurements,
826
+                                               measurements = if(!is.null(measurements)) measurements else x,
851 827
                                                nFeatures = nFeatures,
852 828
                                                selectionMethod = selectionMethod,
853 829
                                                selectionOptimisation = selectionOptimisation,
854 830
                                                classifier = classifier,
855
-                                               multiViewMethod = multiViewMethod
856
-    )
831
+                                               multiViewMethod = multiViewMethod)
832
+    
857 833
     if(length(assayIDs) > 1 || length(assayIDs) == 1 && assayIDs != 1) assayText <- assayIDs else assayText <- NULL
858 834
     characteristics <- S4Vectors::DataFrame(characteristic = c(if(!is.null(assayText)) "Assay Name" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(!is.null(assayText)) paste(assayText, collapse = ", ") else NULL, paste(classifier, collapse = ", "),  paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel))
859 835
 
860
-    classifyResults <- runTests(measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
861
-    
862
-    fullResult <- runTest(measurements, outcome, measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
836
+    if(!is.null(measurements))
837
+    { # Cross-validation.        
838
+      classifyResults <- runTests(measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
839
+      fullResult <- runTest(measurements, outcome, measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
840
+    } else { # Independent training and testing.
841
+      classifyResults <- runTest(x, outcomeTrain, measurementsTest, outcomeTest, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
842
+      
843
+      fullResult <- runTest(measurements, outcome, measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
844
+    }
863 845
 
864 846
     classifyResults@finalModel <- list(fullResult$models)
865 847
     classifyResults
848
+}
866 849
 
850
+simplifyResults <- function(results, values = c("assay", "classifier", "selectionMethod", "multiViewMethod")){
851
+    ch <- sapply(results, function(x) x@characteristics[x@characteristics$characteristic %in% values, "value"], simplify = TRUE)
852
+    ch <- data.frame(t(ch))
853
+    results[!duplicated(ch)]
867 854
 }
868 855
 
856
+#' @rdname crossValidate
857
+#' @export
858
+train.matrix <-function(x, outcomeTrain, ...)
859
+               {
860
+                 x <- DataFrame(x, check.names = FALSE)
861
+                 train(x, outcomeTrain, ...)
862
+               }
869 863
 
864
+#' @rdname crossValidate
865
+#' @export
866
+train.data.frame <- function(x, outcomeTrain, ...)
867
+                    {
868
+                      x <- DataFrame(x, check.names = FALSE)
869
+                      train(x, outcomeTrain, ...)
870
+                    }
870 871
 
872
+#' @rdname crossValidate
873
+#' @param assayIDs A character vector for assays to train with. Special value \code{"all"}
874
+#' uses all assays in the input object.
875
+#' @export
876
+train.DataFrame <- function(x, outcomeTrain, classifier = "randomForest", multiViewMethod = "none", assayIDs = "all", ...) # ... for prepareData.
877
+                   {
878
+              prepArgs <- list(x, outcomeTrain)
879
+              extraInputs <- list(...)
880
+              if(length(extraInputs) > 0)
881
+                prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
882
+              if(length(prepExtras) > 0)
883
+                prepArgs <- append(prepArgs, extraInputs[prepExtras])
884
+              measurementsAndOutcome <- do.call(prepareData, prepArgs)
885
+              
886
+              classifier <- cleanClassifier(classifier = classifier, measurements = measurements)
887
+              if(assayIDs == "all") assayIDs <- unique(mcols(x)[, "assay"])
888
+              if(is.null(assayIDs)) assayIDs <- 1
871 889
 
890
+              if(multiViewMethod == "none"){
891
+                  resClassifier <-
892
+                      sapply(assayIDs, function(assayIndex) {
893
+                          # Loop over assays
894
+                          sapply(classifier[[assayIndex]], function(classifierForAssay) {
895
+                              # Loop over classifiers
896
+                                  measurementsUse <- measurements
897
+                                  if(assayIndex != 1) measurementsUse <- measurements[, mcols(measurements)[, "assay"] == assayIndex, drop = FALSE]
898
+                                  
899
+                                  classifierParams <- .classifierKeywordToParams(classifierForAssay)
900
+                                  modellingParams <- ModellingParams(balancing = "none", selectParams = "none",
901
+                                                               trainParams = classifierParams$trainParams, predictParams = classifierParams$predictParams)
902
+                                  
903
+                                  .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
904
+                                  ## train model
905
+                          },
906
+                          simplify = FALSE)
907
+                      },
908
+                      simplify = FALSE)
872 909
 
910
+                  models <- unlist(resClassifier, recursive = FALSE)
911
+                  names(models) <- assayIDs
912
+                  class(models) <- c(class(models), "listOfModels")
913
+              }
873 914
 
874
-simplifyResults <- function(results, values = c("assay", "classifier", "selectionMethod", "multiViewMethod")){
875
-    ch <- sapply(results, function(x) x@characteristics[x@characteristics$characteristic %in% values, "value"], simplify = TRUE)
876
-    ch <- data.frame(t(ch))
877
-    results[!duplicated(ch)]
915
+              ################################
916
+              #### Yes multiview
917
+              ################################
918
+
919
+              ### Merging or binding to combine data
920
+              if(multiViewMethod == "merge"){
921
+                  measurementsUse <- measurements[, mcols(measurements)[["assay"]] %in% assayIDs]
922
+                  .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
923
+              }
924
+
925
+
926
+              ### Prevalidation to combine data
927
+              if(multiViewMethod == "prevalidation"){
928
+                # Split measurements up by assay.
929
+                 assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
930
+
931
+               # Generate params for each assay. This could be extended to have different selectionMethods for each type
932
+                 paramsAssays <- mapply(generateModellingParams,
933
+                                        assayIDs = assayIDs,
934
+                                        measurements = assayTrain[assayIDs],
935
+                                        classifier = classifier[assayIDs],
936
+                                        MoreArgs = list(multiViewMethod = "none"),
937
+                                 SIMPLIFY = FALSE)
938
+
939
+                 modellingParams <- ModellingParams(
940
+                                    balancing = "none",
941
+                                    selectParams = NULL,
942
+                                    trainParams = TrainParams(prevalTrainInterface, params = paramsAssays, characteristics = paramsAssays$clinical@trainParams@characteristics),
943
+                                    predictParams = PredictParams(prevalPredictInterface, characteristics = paramsAssays$clinical@predictParams@characteristics))
944
+                 .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
945
+              }
946
+              
947
+              ### Principal Components Analysis to combine data
948
+              if(multiViewMethod == "PCA"){
949
+                measurementsUse <- measurements[, mcols(measurements)[["assay"]] %in% assayIDs]
950
+                paramsClinical <-  list(clinical = generateModellingParams(
951
+                                        assayIDs = "clinical",
952
+                                        measurements = measurements[, mcols(measurements)[["assay"]] == "clinical"],
953
+                                        classifier = classifier["clinical"],
954
+                                        multiViewMethod = "none"))
955
+                
956
+                modellingParams <- ModellingParams(balancing = "none", selectParams = NULL,
957
+                                   trainParams = TrainParams(pcaTrainInterface, params = paramsClinical, nFeatures = nFeatures, characteristics = paramsClinical$clinical@trainParams@characteristics),
958
+                                   predictParams = PredictParams(pcaPredictInterface, characteristics = paramsClinical$clinical@predictParams@characteristics))
959
+                .doTrain(measurementsUse, outcomeTrain, NULL, NULL, modellingParams, verbose = 0)[["model"]]
960
+              }
961
+          }
962
+
963
+#' @rdname crossValidate
964
+#' @export
965
+# Each of the first four variables are named lists with names of assays.
966
+train.list <- function(x, outcomeTrain, ...)
967
+              {
968
+                # Check data type is valid
969
+                if (!(all(sapply(x, function(element) is(element, "tabular")))))
970
+                  stop("assays must be of type data.frame, DataFrame or matrix")
971
+              
972
+                # Check the list is named
973
+                if (is.null(names(x)))
974
+                  stop("Measurements must be a named list")
975
+              
976
+                # Check same number of samples for all datasets
977
+                if (!length(unique(sapply(x, nrow))) == 1)
978
+                  stop("All datasets must have the same samples")
979
+              
980
+                # Check the number of outcome is the same
981
+                if (!all(sapply(x, nrow) == length(x)) && !is.character(x))
982
+                  stop("outcome must have same number of samples as measurements")
983
+              
984
+              df_list <- sapply(x, S4Vectors::DataFrame)
985
+              
986
+              df_list <- mapply(function(meas, nam){
987
+                  mcols(meas)$assay <- nam
988
+                  mcols(meas)$feature <- colnames(meas)
989
+                  meas
990
+              }, df_list, names(df_list))
991
+              
992
+              combined_df <- do.call(cbind, df_list)
993
+              
994
+              # Each list of tabular data has been collapsed into a DataFrame.
995
+              # Will be subset to relevant assayIDs inside the DataFrame method.
996
+              train(combined_df, outcomeTrain, ...)
878 997
 }
879 998
 
880 999
 #' @rdname crossValidate
881 1000
 #' @export
882
-setMethod("predict", "ClassifyResult", 
883
-          function(object, newData)
1001
+train.MultiAssayExperiment <- function(x, outcomeColumns, ...)
884 1002
           {
885
-              object@modellingParams@predictParams@predictor(object@finalModel[[1]], newData)
886
-          })
1003
+              prepArgs <- list(x, outcomeColumns)
1004
+              extraInputs <- list(...)
1005
+              if(length(extraInputs) > 0)
1006
+                prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
1007
+              if(length(prepExtras) > 0)
1008
+                prepArgs <- append(prepArgs, extraInputs[prepExtras])
1009
+              measurementsAndOutcome <- do.call(prepareData, prepArgs)
1010
+              trainArgs <- list(measurementsAndOutcome[["measurements"]], measurementsAndOutcome[["outcome"]])
1011
+              if(length(extraInputs) > 0)
1012
+                trainExtras <- which(!names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
1013
+              if(length(trainExtras) > 0)
1014
+                trainArgs <- append(trainArgs, extraInputs[trainExtras])
1015
+              do.call(train, trainArgs)
1016
+          }
1017
+
1018
+#' @rdname crossValidate
1019
+#' @param object A fitted model or a list of such models.
1020
+#' @param newData For the \code{predict} function, an object of type \code{matrix}, \code{data.frame}
1021
+#' \code{DataFrame}, \code{list} (of matrices or data frames) or \code{MultiAssayExperiment} containing
1022
+#' the data to make predictions with with either a fitted model created by \code{train} or the final model
1023
+#' stored in a \code{\link{ClassifyResult}} object.
1024
+
1025
+#' @export
1026
+predict.trainedByClassifyR <- function(object, newData, ...)
1027
+{
1028
+  if(is(newData, "tabular")) # Simply tabular data.
1029
+  {
1030
+    colnames(newData) <- make.names(colnames(newData)) # Ensure that feature names are syntactically valid, like during model fitting.
1031
+  } else if(is.list(newData) && !is(object, "listOfModels")) # Don't check all those conditions that train function does.
1032
+  { # Merge the list of data tables and keep track of assay names in columns' metadata.
1033
+    newData <- mapply(function(meas, nam){
1034
+               mcols(meas)$assay <- nam
1035
+               mcols(meas)$feature <- colnames(meas)
1036
+               meas
1037
+               }, newData, names(newData))
1038
+    newData <- do.call(cbind, newData)
1039
+    } else if(is(newData, "MultiAssayExperiment"))
1040
+            {
1041
+              newData <- prepareData(newData, useFeatures = allFeatureNames(object))
1042
+              # Some classifiers dangerously use positional matching rather than column name matching.
1043
+              # newData columns are sorted so that the right column ordering is guaranteed.
1044
+            } else {stop("'newData' is not one of the valid data types. It is of type ", class(newData), '.')}
1045
+  if(is(object, "ClassifyResult"))
1046
+  {
1047
+    object@modellingParams@predictParams@predictor(object@finalModel[[1]], newData)
1048
+  } else if (is(object, "listOfModels")) { # Object is itself a trained model and it is assumed that a predict method is defined for it.
1049
+    mapply(function(model, assay) predict(model, assay), object, newData, SIMPLIFY = FALSE)
1050
+  } else predict(object, newData)
1051
+}
887 1052
\ No newline at end of file
... ...
@@ -1,63 +1,7 @@
1
-#' Calculate Location and Scale
2
-#' 
3
-#' Calculates the location and scale for each feature.
4
-#' 
5
-#' \code{"SD"} is used to represent standard deviation and \code{"MAD"} is used
6
-#' to represent median absolute deviation.
7
-#' 
8
-#' @aliases getLocationsAndScales getLocationsAndScales,matrix-method
9
-#' getLocationsAndScales,DataFrame-method
10
-#' getLocationsAndScales,MultiAssayExperiment-method
11
-#' @param measurements Either a \code{\link{matrix}}, \code{\link{DataFrame}}
12
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a
13
-#' \code{matrix}, the rows are samples, and the columns are features.
14
-#' If of type \code{\link{DataFrame}} or \code{\link{MultiAssayExperiment}}, the data set is subset
15
-#' to only those features of type \code{numeric}.
16
-#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the
17
-#' names of the data tables to be used. \code{"clinical"} is also a valid value
18
-#' and specifies that numeric variables from the clinical data table will be
19
-#' used.
20
-#' @param ... Variables not used by the \code{matrix} nor the
21
-#' \code{MultiAssayExperiment} method which are passed into and used by the
22
-#' \code{DataFrame} method.
23
-#' @param location The type of location to be calculated.
24
-#' @param scale The type of scale to be calculated.
25
-#' @return A \code{\link{list}} of length 2. The first element contains the
26
-#' location for every feature. The second element contains the scale for every
27
-#' feature.
28
-#' @author Dario Strbenac
29
-#' @references Qn:
30
-#' \url{http://www.tandfonline.com/doi/pdf/10.1080/01621459.1993.10476408}
31
-#' @examples
32
-#' 
33
-#'   genesMatrix <- matrix(rnorm(1000, 8, 4), nrow = 10)
34
-#'   distributionInfo <- getLocationsAndScales(genesMatrix, "median", "MAD")
35
-#'   
36
-#'   mean(distributionInfo[["median"]]) # Typical median.
37
-#'   mean(distributionInfo[["MAD"]]) # Typical MAD.
38
-#' 
39
-#' @usage NULL
40
-#' @export
41
-setGeneric("getLocationsAndScales", function(measurements, ...)
42
-           standardGeneric("getLocationsAndScales"))
1
+# Calculate Location and Scale
43 2
 
44
-#' @rdname getLocationsAndScales
45
-#' @export
46
-setMethod("getLocationsAndScales", "matrix", # Matrix of numeric measurements.
47
-          function(measurements, ...)
3
+getLocationsAndScales <- function(measurements, location = c("mean", "median"), scale = c("SD", "MAD", "Qn"))
48 4
 {
49
-  getLocationsAndScales(S4Vectors::DataFrame(measurements, check.names = FALSE), ...)
50
-})
51
-
52
-#' @rdname getLocationsAndScales
53
-#' @export
54
-setMethod("getLocationsAndScales", "DataFrame", # Sample information data or one of the other inputs, transformed.
55
-          function(measurements, location = c("mean", "median"), scale = c("SD", "MAD", "Qn"))
56
-{
57
-  isNumeric <- sapply(measurements, is.numeric)
58
-  measurements <- measurements[, isNumeric, drop = FALSE]
59
-  if(sum(isNumeric) == 0)
60
-    stop("No features are numeric but at least one must be.")
61 5
   location <- match.arg(location) 
62 6
   scale <- match.arg(scale)
63 7
    
... ...
@@ -72,19 +16,4 @@ setMethod("getLocationsAndScales", "DataFrame", # Sample information data or one
72 16
                        MAD = apply(measurements, 2, mad),
73 17
                        Qn = apply(measurements, 2, robustbase::Qn))),
74 18
                 c(location, scale))
75
-})
76
-
77
-# One or more omics data sets, possibly with clinical data.
78
-#' @rdname getLocationsAndScales
79
-#' @export
80
-setMethod("getLocationsAndScales", "MultiAssayExperiment",
81
-          function(measurements, targets = names(measurements), ...)
82
-{
83
-  if(!all(targets %in% c(names(measurements), "clinical")))
84
-    stop("Some table names in 'targets' are not assay names in 'measurements' or \"clinical\".")  
85
-            
86
-  combinedData <- .MAEtoWideTable(measurements, targets, NULL)
87
-  if(class(combinedData) == "list")
88
-    combinedData <- combinedData[["dataTable"]]
89
-  getLocationsAndScales(combinedData, ...)            
90
-})
19
+}
91 20
\ No newline at end of file
... ...
@@ -17,11 +17,10 @@
17 17
 #' @param featurePairs A object of type \code{\link{Pairs}}.
18 18
 #' @param absolute If TRUE, then the absolute values of the differences are
19 19
 #' returned.
20
-#' @param target If \code{measurements} is a \code{MultiAssayExperiment}, the
21
-#' name of the data table to be used.
22
-#' @param classesColumn If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
23
-#' names of the class column in the table extracted by \code{colData(multiAssayExperiment)}
24
-#' that contains each sample's outcome to use for prediction.
20
+#' @param useFeatures If \code{measurements} is a \code{MultiAssayExperiment},
21
+#' \code{"all"} or a two-column table of features to use. If a table, the first column must have
22
+#' assay names and the second column must have feature names found for that assay.
23
+#' \code{"clinical"} is also a valid assay name and refers to the clinical data table.
25 24
 #' @param ... Variables not used by the \code{matrix} nor the
26 25
 #' \code{MultiAssayExperiment} method which are passed into and used by the
27 26
 #' \code{DataFrame} method.
... ...
@@ -94,8 +93,8 @@ setMethod("interactorDifferences", "DataFrame", # Possibly mixed data types.
94 93
 #' @rdname interactorDifferences
95 94
 #' @export
96 95
 setMethod("interactorDifferences", "MultiAssayExperiment", # Pick one numeric table from the data set.
97
-          function(measurements, target = NULL, classesColumn, ...)
96
+          function(measurements, useFeatures = "all", ...)
98 97
 {
99
-  tablesAndClasses <- .MAEtoWideTable(measurements, target, classesColumn)
100
-  interactorDifferences(tablesAndClasses[["dataTable"]], ...)
98
+  measurementsDF <- prepareData(measurements, useFeatures = useFeatures)[["measurements"]]
99
+  interactorDifferences(measurementsDF, ...)
101 100
 })
102 101
\ No newline at end of file
... ...
@@ -3,7 +3,7 @@
3 3
 #' Input data could be of matrix, MultiAssayExperiment, or DataFrame format and this
4 4
 #' function will prepare a DataFrame of features and a vector of outcomes and help
5 5
 #' to exclude nuisance features such as dates or unique sample identifiers from
6
-#' future modelling.
6
+#' subsequent modelling.
7 7
 #' 
8 8
 #' @aliases prepareData prepareData,matrix-method prepareData,DataFrame-method
9 9
 #' prepareData,MultiAssayExperiment-method
... ...
@@ -136,28 +136,76 @@ setMethod("prepareData", "DataFrame",
136 136
     measurements <- measurements[, mostVariance]
137 137
   }
138 138
   
139
-  list(measurements = measurements, outcome = outcome)  
139
+  list(measurements = measurements, outcome = outcome)
140 140
 })
141 141
 
142 142
 #' @rdname prepareData
143 143
 #' @export
144 144
 setMethod("prepareData", "MultiAssayExperiment",
145
-  function(measurements, outcomeColumns = NULL, useFeatures = data.frame(assay = names(measurements), feature = rep("all", length(measurements))), ...)
145
+  function(measurements, outcomeColumns = NULL, useFeatures = "all", ...)
146 146
 {
147
-  omicsTargets <- setdiff(useFeatures[, 1], "clinical")
147
+  if(is.character(useFeatures)) useFeatures <- data.frame(assay = names(measurements), feature = "all")
148
+  omicsTargets <- setdiff(useFeatures[, "assay"], "clinical")
148 149
   if(length(omicsTargets) > 0)
149 150
   {
150 151
     if(any(anyReplicated(measurements[, , omicsTargets])))
151 152
       stop("Data set contains replicates. Please remove or average replicate observations and try again.")
152 153
   }
153 154
   
154
-  if(is.null(outcomeColumns))
155
-    stop("'outcomeColumns' is NULL. One or more outcome columns must be specified.")
156
-  if(!all(outcomeColumns %in% colnames(MultiAssayExperiment::colData(measurements))))
155
+  if(!is.null(outcomeColumns) && !all(outcomeColumns %in% colnames(MultiAssayExperiment::colData(measurements))))
157 156
     stop("Not all column names specified by 'outcomeColumns' found in clinical table.")  
158
-  if(!all(useFeatures[, 1] %in% c(names(measurements), "clinical")))
157
+  if(!all(useFeatures[, "assay"] %in% c(names(measurements), "clinical")))
159 158
     stop("Some assay names in first column of 'useFeatures' are not assay names in 'measurements' or \"clinical\".")
159
+
160
+  clinicalColumns <- colnames(MultiAssayExperiment::colData(measurements))
161
+  if("clinical" %in% useFeatures[, "assay"])
162
+  {
163
+    clinicalRows <- useFeatures[, "assay"] == "clinical"      
164
+    clinicalColumns <- useFeatures[clinicalRows, "feature"]
165
+    useFeatures <- useFeatures[!clinicalRows, ]
166
+  } else {
167
+    clinicalColumns <- NULL
168
+  }
160 169
   
170
+  if(nrow(useFeatures) > 0)
171
+  {
172
+    measurements <- measurements[, , unique(useFeatures[, "assay"])]
173
+  
174
+    # Get all desired measurements tables and clinical columns (other than the columns representing outcome).
175
+    # These form the independent variables to be used for making predictions with.
176
+    # Variable names will have names like RNA_BRAF for traceability.
177
+    dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(clinicalColumns, outcomeColumns))
178
+    rownames(dataTable) <- dataTable[, "primary"]
179
+    S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "clinical", S4Vectors::mcols(dataTable)[, "sourceName"])
180
+    colnames(S4Vectors::mcols(dataTable))[1] <- "assay"
181
+            
182
+    # Sample information variable names not included in column metadata of wide table but only as row names of it.
183
+    # Create a combined column named "feature" which has feature names of the assays as well as the clinical.
184
+    S4Vectors::mcols(dataTable)[, "feature"] <- as.character(S4Vectors::mcols(dataTable)[, "rowname"])
185
+    missingIndices <- is.na(S4Vectors::mcols(dataTable)[, "feature"])
186
+    S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices]
187
+    
188
+    # Finally, a column annotation recording variable name and which table it originated from for all of the source tables.
189
+    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")]
190
+    
191
+    # Subset to only the desired features.
192
+    useFeaturesSubset <- useFeatures[useFeatures[, "feature"] != "all", ]
193
+    if(nrow(useFeaturesSubset) > 0)
194
+    {
195
+      uniqueAssays <- unique(useFeatures[, "assay"])
196
+      for(filterAssay in uniqueAssays)
197
+      {
198
+        dropFeatures <- S4Vectors::mcols(dataTable)[, "assay"] == filterAssay &
199
+                        !S4Vectors::mcols(dataTable)[, "feature"] %in% useFeatures[useFeatures[, 1] == filterAssay, 2]
200
+        dataTable <- dataTable[, !dropFeatures]
201
+      }
202
+    }
203
+    dataTable <- dataTable[, -match("primary", colnames(dataTable))]
204
+  } else { # Must have only been clinical data.
205
+    dataTable <- MultiAssayExperiment::colData(measurements)
206
+    S4Vectors::mcols(dataTable) <- DataFrame(assay = "clinical", feature = colnames(dataTable))
207
+  }
208
+    
161 209
   # Do other filtering and preparation in DataFrame function.
162
-  prepareData(.MAEtoWideTable(measurements, outcomeColumns, useFeatures), make.names(outcomeColumns), "all")
210
+  prepareData(dataTable, outcomeColumns, useFeatures = "all", ...)
163 211
 })
164 212
\ No newline at end of file
... ...
@@ -64,8 +64,6 @@
64 64
 #' @param margin The margin to have around the plot.
65 65
 #' @param showLegend If \code{TRUE}, a legend is plotted next to the plot. If
66 66
 #' FALSE, it is hidden.
67
-#' @param plot Logical. If \code{TRUE}, a plot is produced on the current
68
-#' graphics device.
69 67
 #' @param parallelParams An object of class \code{\link{MulticoreParam}} or
70 68
 #' \code{\link{SnowParam}}.
71 69
 #' @param ... Not used by end user.
... ...
@@ -126,7 +124,7 @@ setMethod("rankingPlot", "list",
126 124
                    title = if(comparison[1] == "within") "Feature Ranking Stability" else "Feature Ranking Commonality",
127 125
                    yLabel = if(is.null(referenceLevel)) "Average Common Features (%)" else paste("Average Common Features with", referenceLevel, "(%)"),
128 126
                    margin = grid::unit(c(1, 1, 1, 1), "lines"),
129
-                   showLegend = TRUE, plot = TRUE, parallelParams = bpparam())
127
+                   showLegend = TRUE, parallelParams = bpparam())
130 128
 {
131 129
   if(!requireNamespace("ggplot2", quietly = TRUE))
132 130
     stop("The package 'ggplot2' could not be found. Please install it.")
... ...
@@ -291,8 +289,5 @@ setMethod("rankingPlot", "list",
291 289
   
292 290
   overlapPlot <- overlapPlot + ggplot2::facet_grid(ggplot2::vars(!!rowVariable), ggplot2::vars(!!columnVariable)) + ggplot2::theme(strip.text = ggplot2::element_text(size = sizesList[["fontSizes"]][6]))
293 291
   
294
-  if(plot == TRUE)
295
-    print(overlapPlot)
296
-  
297 292
   overlapPlot
298 293
 })
... ...
@@ -19,7 +19,7 @@
19 19
 #' containing either classes or time and event information about survival.
20 20
 #' @param measurementsTest Same data type as \code{measurementsTrain}, but only the test
21 21
 #' samples.
22
-#' @param outcomeTest Same data type as \code{outcomeTrain}, but only the test
22
+#' @param outcomeTest Same data type as \code{outcomeTrain}, but for only the test
23 23
 #' samples.
24 24
 #' @param crossValParams An object of class \code{\link{CrossValParams}},
25 25
 #' specifying the kind of cross-validation to be done, if nested
... ...
@@ -27,16 +27,11 @@
27 27
 #' @param modellingParams An object of class \code{\link{ModellingParams}},
28 28
 #' specifying the class rebalancing, transformation (if any), feature selection
29 29
 #' (if any), training and prediction to be done on the data set.
30
-#' @param targets If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
31
-#' names of the data tables to be used. \code{"clinical"} is also a valid value
32
-#' and specifies that numeric variables from the clinical data table will be
33
-#' used.
34 30
 #' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
35 31
 #' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
36 32
 #' that contain(s) the samples' outcome to use for prediction.
37
-#' @param ... Variables not used by the \code{matrix} nor the
38
-#' \code{MultiAssayExperiment} method which are passed into and used by the
39
-#' \code{DataFrame} method.
33
+#' @param ... Variables not used by the \code{matrix} nor the \code{MultiAssayExperiment} method which
34
+#' are passed into and used by the \code{DataFrame} method or passed onwards to \code{\link{prepareData}}.
40 35
 #' @param characteristics A \code{\link{DataFrame}} describing the
41 36
 #' characteristics of the classification used. First column must be named
42 37
 #' \code{"charateristic"} and second column must be named \code{"value"}.
... ...
@@ -71,8 +66,7 @@
71 66
 #' @importFrom S4Vectors do.call mcols
72 67
 #' @usage NULL
73 68
 #' @export
74
-setGeneric("runTest", function(measurementsTrain, ...)
75
-           standardGeneric("runTest"))
69
+setGeneric("runTest", function(measurementsTrain, ...) standardGeneric("runTest"))
76 70
 
77 71
 #' @rdname runTest
78 72
 #' @export
... ...
@@ -91,7 +85,7 @@ setMethod("runTest", "matrix", # Matrix of numeric measurements.
91 85
 setMethod("runTest", "DataFrame", # Sample information data or one of the other inputs, transformed.
92 86
 function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
93 87
          crossValParams = CrossValParams(), # crossValParams might be used for tuning optimisation.
94
-         modellingParams = ModellingParams(), characteristics = S4Vectors::DataFrame(), verbose = 1, .iteration = NULL)
88
+         modellingParams = ModellingParams(), characteristics = S4Vectors::DataFrame(), ..., verbose = 1, .iteration = NULL)
95 89
 {
96 90
   if(is.null(.iteration)) # Not being called by runTests but by user. So, check the user input.
97 91
   {
... ...
@@ -100,7 +94,7 @@ function(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
100 94
     if(any(is.na(measurementsTrain)))
101 95
       stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.")                
102 96
     
103
-    splitDatasetTrain <- prepareData(measurementsTrain, outcomeTrain)
97
+    splitDatasetTrain <- prepareData(measurementsTrain, outcomeTrain, ...)
104 98
       
105 99
     # Rebalance the class sizes of the training samples by either downsampling or upsampling
106 100
     # or leave untouched if balancing is none.
... ...
@@ -223,6 +217,8 @@ input data. Autmomatically reducing to smaller number.")
223 217
     predictedOutcome <- tryCatch(.doTest(trained[["model"]], measurementsTest, modellingParams@predictParams, verbose),
224 218
                                 error = function(error) error[["message"]]
225 219
                                 )
220
+    
221
+        predictedOutcome <- .doTest(trained[["model"]], measurementsTest, modellingParams@predictParams, verbose)
226 222
 
227 223
     if(is.character(predictedOutcome)) # An error occurred.
228 224
       return(predictedOutcome) # Return early.
... ...
@@ -328,24 +324,34 @@ input data. Autmomatically reducing to smaller number.")
328 324
     }
329 325
 
330 326
     ClassifyResult(characteristics, allSamples, originalFeatures, list(rankedFeatures), list(selectedFeatures),
331
-                   list(models), tuneDetails, S4Vectors::DataFrame(sample = rownames(measurementsTest), predictedOutcome, check.names = FALSE), allOutcome, importanceTable)
327
+                   list(models), tuneDetails, S4Vectors::DataFrame(sample = rownames(measurementsTest), predictedOutcome, check.names = FALSE), allOutcome, importanceTable, modellingParams, list(models))
332 328
   }  
333 329
 })
334 330
 
335 331
 #' @rdname runTest
336 332
 #' @export
337 333
 setMethod("runTest", c("MultiAssayExperiment"),
338
-          function(measurementsTrain, measurementsTest, targets = names(measurements), outcomeColumns, ...)
334
+          function(measurementsTrain, measurementsTest, outcomeColumns, ...)
339 335
 {
340
-  omicsTargets <- setdiff(targets, "clinical")
341
-  if(length(omicsTargets) > 0)
342
-  {
343
-    if(any(anyReplicated(measurements[, , omicsTargets])))
344
-      stop("Data set contains replicates. Please provide remove or average replicate observations and try again.")
336
+  prepArgsTrain <- list(measurementsTrain, outcomeColumns)
337
+  prepArgsTest <- list(measurementsTest, outcomeColumns)
338
+  extraInputs <- list(...)
339
+  if(length(extraInputs) > 0)
340
+    prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
341
+  if(length(prepExtras) > 0)
342
+  {      
343
+    prepArgsTrain <- append(prepArgsTrain, extraInputs[prepExtras])
344
+    prepArgsTest <- append(prepArgsTest, extraInputs[prepExtras])
345 345
   }
346
+  measurementsAndOutcomeTrain <- do.call(prepareData, prepArgs)
347
+  measurementsAndOutcomeTest <- do.call(prepareData, prepArgs)
346 348
   
347
-  tablesAndClassesTrain <- .MAEtoWideTable(measurementsTrain, targets, outcomeColumns, restrict = NULL)
348
-  tablesAndClassesTest <- .MAEtoWideTable(measurementsTest, targets, outcomeColumns, restrict = NULL)
349
-  runTest(tablesAndClassesTrain[["dataTable"]], tablesAndClassesTrain[["outcome"]],
350
-          tablesAndClassesTest[["dataTable"]], tablesAndClassesTest[["outcome"]], ...)            
349
+  runTestArgs <- list(measurementsAndOutcomeTrain[["measurements"]], measurementsAndOutcomeTrain[["outcome"]],
350
+                      measurementsAndOutcomeTest[["measurements"]], measurementsAndOutcomeTest[["outcome"]])
351
+  if(length(extraInputs) > 0 && (length(prepExtras) == 0 || length(extraInputs[-prepExtras]) > 0))
352
+  {
353
+    if(length(prepExtras) == 0) runTestArgs <- append(runTestArgs, extraInputs) else
354
+    runTestArgs <- append(runTestArgs, extraInputs[-prepExtras])
355
+  }
356
+  do.call(runTest, runTestArgs)
351 357
 })
... ...
@@ -26,15 +26,11 @@
26 26
 #' package.  Transformation, selection and prediction functions provided by
27 27
 #' this package will cause the characteristics to be automatically determined
28 28
 #' and this can be left blank.
29
-#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the
30
-#' names of the data tables to be used. \code{"clinical"} is also a valid value
31
-#' and specifies that the clinical data table will be used.
32 29
 #' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
33 30
 #' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
34 31
 #' that contain(s)s the samples' outcome to use for prediction.
35
-#' @param ... Variables not used by the \code{matrix} nor the
36
-#' \code{MultiAssayExperiment} method which are passed into and used by the
37
-#' \code{DataFrame} method.
32
+#' @param ... Variables not used by the \code{matrix} nor the \code{MultiAssayExperiment} method which
33
+#' are passed into and used by the \code{DataFrame} method or passed onwards to \code{\link{prepareData}}.
38 34
 #' @param verbose Default: 1. A number between 0 and 3 for the amount of
39 35
 #' progress messages to give.  A higher number will produce more messages as
40 36
 #' more lower-level functions print messages.
... ...
@@ -72,7 +68,7 @@ setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix
72 68
 #' @rdname runTests
73 69
 #' @export
74 70
 setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParams = CrossValParams(), modellingParams = ModellingParams(),
75
-           characteristics = S4Vectors::DataFrame(), verbose = 1)
71
+           characteristics = S4Vectors::DataFrame(), ..., verbose = 1)
76 72
 {
77 73
   # Get out the outcome if inside of data table.           
78 74
   if(is.null(rownames(measurements)))
... ...
@@ -83,7 +79,7 @@ setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParam
83 79
 
84 80
   originalFeatures <- colnames(measurements)
85 81
   if("feature" %in% colnames(S4Vectors::mcols(measurements))) originalFeatures <- S4Vectors::mcols(measurements)[, c("assay", "feature")]                 
86
-  splitDataset <- prepareData(measurements, outcome)
82
+  splitDataset <- prepareData(measurements, outcome, ...)
87 83
   measurements <- splitDataset[["measurements"]]
88 84
   outcome <- splitDataset[["outcome"]]
89 85
   
... ...
@@ -172,23 +168,32 @@ input data. Autmomatically reducing to smaller number.")
172 168
   if(!is.null(results[[1]][["importance"]]))
173 169
     importance <- do.call(rbind, lapply(results, "[[", "importance"))
174 170
   
171
+  fullResult <- runTest(measurements, outcome, measurements, outcome, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
172
+  
175 173
   ClassifyResult(characteristics, rownames(measurements), originalFeatures,
176 174
                  lapply(results, "[[", "ranked"), lapply(results, "[[", "selected"),
177
-                 lapply(results, "[[", "models"), tuneList, predictionsTable, outcome, importance, modellingParams)
175
+                 lapply(results, "[[", "models"), tuneList, predictionsTable, outcome, importance, modellingParams, list(fullResult$models))
178 176
 })
179 177
 
180 178
 #' @rdname runTests
179
+#' @import MultiAssayExperiment methods
181 180
 #' @export
182 181
 setMethod("runTests", c("MultiAssayExperiment"),
183
-          function(measurements, targets = names(measurements), outcomeColumns, ...)
182
+          function(measurements, outcomeColumns, ...)
184 183
 {
185
-  omicsTargets <- setdiff(targets, "clinical")
186
-  if(length(omicsTargets) > 0)
184
+  prepArgs <- list(measurements, outcomeColumns)              
185
+  extraInputs <- list(...)
186
+  if(length(extraInputs) > 0)
187
+    prepExtras <- which(names(extrasInputs) %in% .ClassifyRenvir[["prepareDataFormals"]])
188
+  if(length(prepExtras) > 0)
189
+    prepArgs <- append(prepArgs, extraInputs[prepExtras])
190
+  measurementsAndOutcome <- do.call(prepareData, prepArgs)
191
+  
192
+  runTestsArgs <- list(measurementsAndOutcome[["measurements"]], measurementsAndOutcome[["outcome"]])
193
+  if(length(extraInputs) > 0 && (length(prepExtras) == 0 || length(extraInputs[-prepExtras]) > 0))
187 194
   {
188
-    if(any(anyReplicated(measurements[, , omicsTargets])))
189
-      stop("Data set contains replicates. Please provide remove or average replicate observations and try again.")
195
+    if(length(prepExtras) == 0) runTestsArgs <- append(runTestsArgs, extraInputs) else
196
+    runTestsArgs <- append(runTestsArgs, extraInputs[-prepExtras])
190 197
   }
191
-  
192
-  tablesAndOutcome <- .MAEtoWideTable(measurements, targets, outcomeColumns, restrict = NULL)
193
-  runTests(tablesAndOutcome[["dataTable"]], tablesAndOutcome[["outcome"]], ...)            
198
+  do.call(runTests, runTestsArgs)
194 199
 })
... ...
@@ -1,60 +1,3 @@
1
-# Function to convert a MultiAssayExperiment object into a flat DataFrame table, to enable it
2
-# to be used in typical model building functions.
3
-# Returns a list with a covariate table and and outcome vector/table, or just a covariate table
4
-# in the case the input is a test data set.
5
-.MAEtoWideTable <- function(measurements, outcomeColumns, useFeatures)
6
-{
7
-  clinicalColumns <- colnames(MultiAssayExperiment::colData(measurements))    
8
-  if("clinical" %in% useFeatures[, 1])
9
-  {
10
-    clinicalRows <- useFeatures[, 1] == "clinical"      
11
-    clinicalColumns <- useFeatures[clinicalRows, 2]
12
-    useFeatures <- useFeatures[!clinicalRows, ]
13
-  } else {
14
-    clinicalColumns <- NULL
15
-  }
16
-  
17
-  if(nrow(useFeatures) > 0)
18
-  {
19
-    measurements <- measurements[, , unique(useFeatures[, 1])]
20
-  
21
-    # Get all desired measurements tables and clinical columns (other than the columns representing outcome).
22
-    # These form the independent variables to be used for making predictions with.
23
-    # Variable names will have names like RNA_BRAF for traceability.
24
-    dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(clinicalColumns, outcomeColumns))
25
-    rownames(dataTable) <- dataTable[, "primary"]
26
-    S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "clinical", S4Vectors::mcols(dataTable)[, "sourceName"])
27
-    colnames(S4Vectors::mcols(dataTable))[1] <- "assay"
28
-            
29
-    # Sample information variable names not included in column metadata of wide table but only as row names of it.
30
-    # Create a combined column named "feature" which has feature names of the assays as well as the clinical.
31
-    S4Vectors::mcols(dataTable)[, "feature"] <- as.character(S4Vectors::mcols(dataTable)[, "rowname"])
32
-    missingIndices <- is.na(S4Vectors::mcols(dataTable)[, "feature"])
33
-    S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices]
34
-    
35
-    # Finally, a column annotation recording variable name and which table it originated from for all of the source tables.
36
-    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")]
37
-    
38
-    # Subset to only the desired features.
39
-    useFeaturesSubset <- useFeatures[useFeatures[, 2] != "all", ]
40
-    if(nrow(useFeaturesSubset) > 0)
41
-    {
42
-      uniqueAssays <- unique(useFeatures[, 1])
43
-      for(filterAssay in uniqueAssays)
44
-      {
45
-        dropFeatures <- S4Vectors::mcols(dataTable)[, "assay"] == filterAssay &
46
-                        !S4Vectors::mcols(dataTable)[, "feature"] %in% useFeatures[useFeatures[, 1] == filterAssay, 2]
47
-        dataTable <- dataTable[, !dropFeatures]
48
-      }
49
-    }
50
-    dataTable <- dataTable[, -match("primary", colnames(dataTable))]
51
-  } else { # Must have only been clinical data.
52
-    dataTable <- MultiAssayExperiment::colData(measurements)
53
-    S4Vectors::mcols(dataTable) <- DataFrame(assay = "clinical", feature = colnames(dataTable))
54
-  }
55
-  dataTable
56
-}
57
-
58 1
 # Creates two lists of lists. First has training samples, second has test samples for a range
59 2
 # of different cross-validation schemes.
60 3
 #' @import utils
... ...
@@ -329,7 +272,7 @@
329 272
       modellingParams@trainParams@otherParams <- c(modellingParams@trainParams@otherParams, as.list(tuneCombos[rowIndex, ]))
330 273
       if(crossValParams@tuneMode == "Resubstitution")
331 274
       {
332
-        result <- runTest(measurementsTrain, outcomeTrain, measurementsTest, outcomeTest,
275
+        result <- runTest(measurementsTrain, outcomeTrain, measurementsTrain, outcomeTrain,
333 276
                           crossValParams = NULL, modellingParams,
334 277
                           verbose = verbose, .iteration = "internal")
335 278
         
... ...
@@ -8,7 +8,12 @@
8 8
 \alias{crossValidate,data.frame-method}
9 9
 \alias{crossValidate,MultiAssayExperiment-method}
10 10
 \alias{crossValidate,list-method}
11
-\alias{predict,ClassifyResult-method}
11
+\alias{train.matrix}
12
+\alias{train.data.frame}
13
+\alias{train.DataFrame}
14
+\alias{train.list}
15
+\alias{train.MultiAssayExperiment}
16
+\alias{predict.trainedByClassifyR}
12 17
 \title{Cross-validation to evaluate classification performance.}
13 18
 \usage{
14 19
 crossValidate(measurements, outcome, ...)
... ...
@@ -21,7 +26,7 @@ crossValidate(measurements, outcome, ...)
21 26
   selectionOptimisation = "Resubstitution",
22 27
   classifier = "randomForest",
23 28
   multiViewMethod = "none",
24
-  assayCombinations = NULL,
29
+  assayCombinations = "all",
25 30
   nFolds = 5,
26 31
   nRepeats = 20,
27 32
   nCores = 1,
... ...
@@ -31,17 +36,18 @@ crossValidate(measurements, outcome, ...)
31 36
 
32 37
 \S4method{crossValidate}{MultiAssayExperiment}(
33 38
   measurements,
34
-  outcome,
39
+  outcomeColumns,
35 40
   nFeatures = 20,
36 41
   selectionMethod = "t-test",
37 42
   selectionOptimisation = "Resubstitution",
38 43
   classifier = "randomForest",
39 44
   multiViewMethod = "none",
40
-  assayCombinations = NULL,
45
+  assayCombinations = "all",
41 46
   nFolds = 5,
42 47
   nRepeats = 20,
43 48
   nCores = 1,
44
-  characteristicsLabel = NULL
49
+  characteristicsLabel = NULL,
50
+  ...
45 51
 )
46 52
 
47 53
 \S4method{crossValidate}{data.frame}(
... ...
@@ -52,11 +58,12 @@ crossValidate(measurements, outcome, ...)
52 58
   selectionOptimisation = "Resubstitution",
53 59
   classifier = "randomForest",
54 60
   multiViewMethod = "none",
55
-  assayCombinations = NULL,
61
+  assayCombinations = "all",
56 62
   nFolds = 5,
57 63
   nRepeats = 20,
58 64
   nCores = 1,
59
-  characteristicsLabel = NULL
65
+  characteristicsLabel = NULL,
66
+  ...
60 67
 )
61 68
 
62 69
 \S4method{crossValidate}{matrix}(
... ...
@@ -67,11 +74,12 @@ crossValidate(measurements, outcome, ...)
67 74
   selectionOptimisation = "Resubstitution",
68