Browse code

- crossVaildate now allows input of assay name for a single data table. - References to data set have been replaced by assay to remove ambiguity.

Dario Strbenac authored on 22/07/2022 10:30:06
Showing 22 changed files

... ...
@@ -1308,7 +1308,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL"))
1308 1308
 #'     modellingParams <- ModellingParams()
1309 1309
 #'     classified <-
1310 1310
 #'     runTests(measurements, classes, LOOCVparams, modellingParams,
1311
-#'              DataFrame(characteristic = c("dataset", "classification"),
1311
+#'              DataFrame(characteristic = c("Data Set", "Classification"),
1312 1312
 #'                       value = c("Asthma", "Different Means"))
1313 1313
 #'              )
1314 1314
 #'     class(classified)
... ...
@@ -7,23 +7,28 @@
7 7
 #' or a list of these objects containing the training data.  For a
8 8
 #' \code{matrix} and \code{data.frame}, the rows are samples and the columns are features. For a \code{data.frame} or \code{\link{MultiAssayExperiment}} assay
9 9
 #' the rows are features and the columns are samples, as is typical in Bioconductor.
10
-#' @param classes A vector of class labels of class \code{\link{factor}} of the
10
+#' @param outcomes A vector of class labels of class \code{\link{factor}} of the
11 11
 #' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the
12 12
 #' column name in \code{measurements} if it is a \code{\link{DataFrame}} or the
13 13
 #' column name in \code{colData(measurements)} if \code{measurements} is a \code{\link{MultiAssayExperiment}}. If a column name, that column will be
14
-#' removed before training.
14
+#' removed before training. Or a \code{\link{Surv}} object or a character vector of length 2 or 3 specifying the time and event columns in
15
+#' \code{measurements} for survival outcome.
16
+#' @param ... Arguments other than measurements and outcomes in the generic.
17
+#' @param assayName An informative name describing the data (e.g. RNA-seq) table if the input is a data frame or matrix. Not used if input
18
+#' is \code{MultiAssayExperiment} or other list-like structure because it will already have assay names in the experiment list. This
19
+#' name will be stored in the characteristics table of the result as Assay Name characteristic.
15 20
 #' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons
16
-#' or datasets. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple datasets, 
17
-#' a different number of features will be used for each dataset. If a named list of vectors, the respective number of features will be optimised over. 
21
+#' or assays. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple assays, 
22
+#' a different number of features will be used for each assay. If a named list of vectors, the respective number of features will be optimised over. 
18 23
 #' Set to NULL or "all" if all features should be used.
19
-#' @param selectionMethod A character vector of feature selection methods to compare. If a named character vector with names corresponding to different datasets, 
20
-#' and performing multiview classification, the respective classification methods will be used on each dataset.
24
+#' @param selectionMethod A character vector of feature selection methods to compare. If a named character vector with names corresponding to different assays, 
25
+#' and performing multiview classification, the respective classification methods will be used on each assay.
21 26
 #' @param selectionOptimisation A character of "Resubstitution", "Nested CV" or "none" specifying the approach used to optimise nFeatures. 
22
-#' @param classifier A character vector of classification methods to compare. If a named character vector with names corresponding to different datasets, 
23
-#' and performing multiview classification, the respective classification methods will be used on each dataset.
27
+#' @param classifier A character vector of classification methods to compare. If a named character vector with names corresponding to different assays, 
28
+#' and performing multiview classification, the respective classification methods will be used on each assay.
24 29
 #' @param multiViewMethod A character vector specifying the multiview method or data integration approach to use.
25
-#' @param dataCombinations A character vector or list of character vectors proposing the datasets or, in the case of a list, combination of datasets to use
26
-#' with each element being a vector of datasets to combine.
30
+#' @param assayCombinations A character vector or list of character vectors proposing the assays or, in the case of a list, combination of assays to use
31
+#' with each element being a vector of assays to combine.
27 32
 #' @param nFolds A numeric specifying the number of folds to use for cross-validation.
28 33
 #' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation.
29 34
 #' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation. 
... ...
@@ -36,9 +41,9 @@
36 41
 #' 
37 42
 #' \code{selectionMethod} can be any of the following implemented approaches -  none, t-test, limma, edgeR, NSC, Bartlett, Levene, DMD, likelihoodRatio, KS or KL.
38 43
 #' 
39
-#' \code{multiViewMethod} can take a few different values. Using \code{merge} will merge or bind the datasets after feature selection. 
40
-#'  Using \code{prevlidation} will build prevalidated vectors on all the datasets except the clinical data. There must be a dataset called clinical.
41
-#'  Using \code{pca} will perform pca on each dataset and then merge the top few components with the clinical data. There must be a dataset called clinical.
44
+#' \code{multiViewMethod} can take a few different values. Using \code{merge} will merge or bind the assays after feature selection. 
45
+#'  Using \code{prevalidation} will build prevalidated vectors on all the assays except the clinical data. There must be a assay called clinical.
46
+#'  Using \code{PCA} will perform Principal Components Analysis on each assay and then merge the top few components with the clinical data. There must be a assay called clinical.
42 47
 #'
43 48
 #' @return An object of class \code{\link{ClassifyResult}}
44 49
 #' @export
... ...
@@ -55,52 +60,42 @@
55 60
 #' performancePlot(result)
56 61
 #' 
57 62
 #' 
58
-#' # Compare performance of different datasets. 
59
-#' # First make a toy example dataset with multiple data types. We'll randomly assign different features to be clinical, gene or protein.
63
+#' # Compare performance of different assays. 
64
+#' # First make a toy example assay with multiple data types. We'll randomly assign different features to be clinical, gene or protein.
60 65
 #' # set.seed(51773)
61 66
 #' # measurements <- DataFrame(measurements, check.names = FALSE)
62
-#' # mcols(measurements)$dataset <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE))
67
+#' # mcols(measurements)$assay <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE))
63 68
 #' # mcols(measurements)$feature <- colnames(measurements)
64 69
 #' 
65
-#' # We'll use different nFeatures for each dataset. We'll also use repeated cross-validation with 5 repeats for speed in the example.
70
+#' # We'll use different nFeatures for each assay. We'll also use repeated cross-validation with 5 repeats for speed in the example.
66 71
 #' # set.seed(51773)
67 72
 #' #result <- crossValidate(measurements, classes, nFeatures = c(clinical = 5, gene = 20, protein = 30), classifier = "randomForest", nRepeats = 5)
68 73
 #' # performancePlot(result)
69 74
 #' 
70
-#' # Merge different datasets. But we will only do this for two combinations. If dataCombinations is not specified it would attempt all combinations.
75
+#' # Merge different assays. But we will only do this for two combinations. If assayCombinations is not specified it would attempt all combinations.
71 76
 #' # set.seed(51773)
72
-#' # resultMerge <- crossValidate(measurements, classes, dataCombinations = list(c("clinical", "protein"), c("clinical", "gene")), multiViewMethod = "merge", nRepeats = 5)
77
+#' # resultMerge <- crossValidate(measurements, classes, assayCombinations = list(c("clinical", "protein"), c("clinical", "gene")), multiViewMethod = "merge", nRepeats = 5)
73 78
 #' # performancePlot(resultMerge)
74 79
 #' 
75 80
 #' 
76 81
 #' # performancePlot(c(result, resultMerge))
77 82
 #' 
78 83
 #' @importFrom survival Surv
79
-setGeneric("crossValidate", function(measurements,
80
-                                     classes,
81
-                                     nFeatures = 20,
82
-                                     selectionMethod = "t-test",
83
-                                     selectionOptimisation = "Resubstitution",
84
-                                     classifier = "randomForest",
85
-                                     multiViewMethod = "none",
86
-                                     dataCombinations = NULL,
87
-                                     nFolds = 5,
88
-                                     nRepeats = 20,
89
-                                     nCores = 1,
90
-                                     characteristicsLabel = NULL)
84
+setGeneric("crossValidate", function(measurements, outcomes, ...)
91 85
     standardGeneric("crossValidate"))
92 86
 
93 87
 #' @rdname crossValidate
94 88
 #' @export
95 89
 setMethod("crossValidate", "DataFrame", 
96 90
           function(measurements,
97
-                   classes, 
91
+                   outcomes,
92
+                   assayName = NULL,
98 93
                    nFeatures = 20,
99 94
                    selectionMethod = "t-test",
100 95
                    selectionOptimisation = "Resubstitution",
101 96
                    classifier = "randomForest",
102 97
                    multiViewMethod = "none",
103
-                   dataCombinations = NULL,
98
+                   assayCombinations = NULL,
104 99
                    nFolds = 5,
105 100
                    nRepeats = 20,
106 101
                    nCores = 1,
... ...
@@ -108,10 +103,16 @@ setMethod("crossValidate", "DataFrame",
108 103
 
109 104
           {
110 105
               # Check that data is in the right format
111
-              splitDataset <- .splitDataAndOutcomes(measurements, classes)
112
-              measurements <- splitDataset[["measurements"]]
113
-              classes <- splitDataset[["outcomes"]]
114
-              checkData(measurements, classes)
106
+              splitAssay <- .splitDataAndOutcomes(measurements, outcomes)
107
+              measurements <- splitAssay[["measurements"]]
108
+              outcomes <- splitAssay[["outcomes"]]
109
+              
110
+              # Which data-types or data-views are present?
111
+              assayIDs <- unique(mcols(measurements)[, "assay"])
112
+              if(is.null(assayIDs))
113
+                assayIDs <- 1
114
+              
115
+              checkData(measurements, outcomes)
115 116
 
116 117
               # Check that other variables are in the right format and fix
117 118
               nFeatures <- cleanNFeatures(nFeatures = nFeatures,
... ...
@@ -128,10 +129,6 @@ setMethod("crossValidate", "DataFrame",
128 129
               
129 130
               classifier <- cleanClassifier(classifier = classifier,
130 131
                                             measurements = measurements)
131
-
132
-              # Which data-types or data-views are present?
133
-              datasetIDs <- unique(mcols(measurements)[, "dataset"])
134
-              if(is.null(datasetIDs)) datasetIDs <- 1
135 132
               
136 133
               ##!!!!! Do something with data combinations
137 134
 
... ...
@@ -146,34 +143,34 @@ setMethod("crossValidate", "DataFrame",
146 143
 
147 144
               if(multiViewMethod == "none"){
148 145
 
149
-                  # The below loops over dataset and classifier and allows us to answer
146
+                  # The below loops over assay and classifier and allows us to answer
150 147
                   # the following questions:
151 148
                   #
152
-                  # 1) One dataset using one classifier
153
-                  # 2) One dataset using multi classifiers
154
-                  # 3) Multi datasets individually
149
+                  # 1) One assay using one classifier
150
+                  # 2) One assay using multi classifiers
151
+                  # 3) Multi assays individually
155 152
                   
156 153
                   # We should probably transition this to use grid instead.
157
-
154
+                  
158 155
                   resClassifier <-
159
-                      sapply(datasetIDs, function(dataIndex) {
160
-                          # Loop over datasets
161
-                          sapply(classifier[[dataIndex]], function(classifierIndex) {
156
+                      sapply(assayIDs, function(assayIndex) {
157
+                          # Loop over assays
158
+                          sapply(classifier[[assayIndex]], function(classifierIndex) {
162 159
                               # Loop over classifiers
163
-                              sapply(selectionMethod[[dataIndex]], function(selectionIndex) {
160
+                              sapply(selectionMethod[[assayIndex]], function(selectionIndex) {
164 161
                                   # Loop over classifiers
165 162
                                   set.seed(seed)
166 163
                                   measurementsUse <- measurements
167
-                                  if(!is.null(mcols(measurements))) measurementsUse <- measurements[, mcols(measurements)[, "dataset"] == dataIndex, drop = FALSE]
168
-                                  
164
+                                  if(!is.null(assayName)) attr(measurementsUse, "assayName") <- assayName
165
+                                  if(assayIndex != 1) measurementsUse <- measurements[, mcols(measurements)[, "assay"] == assayIndex, drop = FALSE]
169 166
                                   CV(
170
-                                      measurements = measurementsUse, classes = classes,
171
-                                      nFeatures = nFeatures[dataIndex],
167
+                                      measurements = measurementsUse, outcomes = outcomes,
168
+                                      assayIDs = assayIndex,
169
+                                      nFeatures = nFeatures[assayIndex],
172 170
                                       selectionMethod = selectionIndex,
173 171
                                       selectionOptimisation = selectionOptimisation,
174 172
                                       classifier = classifierIndex,
175 173
                                       multiViewMethod = multiViewMethod,
176
-                                      dataCombinations = dataIndex,
177 174
                                       nFolds = nFolds,
178 175
                                       nRepeats = nRepeats,
179 176
                                       nCores = nCores,
... ...
@@ -201,21 +198,20 @@ setMethod("crossValidate", "DataFrame",
201 198
               if(multiViewMethod == "merge"){
202 199
 
203 200
 
204
-                  # The below loops over different combinations of datasets and merges them together.
205
-                  # This allows someone to answer which combinations of the datasets might be most useful.
201
+                  # The below loops over different combinations of assays and merges them together.
202
+                  # This allows someone to answer which combinations of the assays might be most useful.
206 203
 
207 204
 
208
-                  if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE)))
205
+                  if(is.null(assayCombinations)) assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
209 206
 
210
-                  result <- sapply(dataCombinations, function(dataIndex){
211
-                      CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex],
212
-                         classes = classes,
213
-                         nFeatures = nFeatures[dataIndex],
214
-                         selectionMethod = selectionMethod[dataIndex],
207
+                  result <- sapply(assayCombinations, function(assayIndex){
208
+                      CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
209
+                         outcomes = outcomes, assayIDs = assayIndex,
210
+                         nFeatures = nFeatures[assayIndex],
211
+                         selectionMethod = selectionMethod[assayIndex],
215 212
                          selectionOptimisation = selectionOptimisation,
216
-                         classifier = classifier[dataIndex],
217
-                         multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod),
218
-                         dataCombinations = dataIndex,
213
+                         classifier = classifier[assayIndex],
214
+                         multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod),
219 215
                          nFolds = nFolds,
220 216
                          nRepeats = nRepeats,
221 217
                          nCores = nCores,
... ...
@@ -229,26 +225,26 @@ setMethod("crossValidate", "DataFrame",
229 225
               if(multiViewMethod == "prevalidation"){
230 226
 
231 227
 
232
-                  # The below loops over different combinations of datasets and combines them together using prevalidation.
233
-                  # This allows someone to answer which combinations of the datasets might be most useful.
228
+                  # The below loops over different combinations of assays and combines them together using prevalidation.
229
+                  # This allows someone to answer which combinations of the assays might be most useful.
234 230
 
235 231
 
236
-                  if(is.null(dataCombinations)){
237
-                      dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE)))
238
-                      dataCombinations <- dataCombinations[sapply(dataCombinations, function(x)"clinical"%in%x, simplify = TRUE)]
239
-                      if(length(dataCombinations)==0) stop("No dataCombinations with `clinical` data")
232
+                  if(is.null(assayCombinations))
233
+                  {
234
+                      assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
235
+                      assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
236
+                      if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
240 237
                   }
241 238
 
242 239
 
243
-                  result <- sapply(dataCombinations, function(dataIndex){
244
-                      CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex],
245
-                         classes = classes,
246
-                         nFeatures = nFeatures[dataIndex],
247
-                         selectionMethod = selectionMethod[dataIndex],
240
+                  result <- sapply(assayCombinations, function(assayIndex){
241
+                      CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
242
+                         outcomes = outcomes, assayIDs = assayIndex,
243
+                         nFeatures = nFeatures[assayIndex],
244
+                         selectionMethod = selectionMethod[assayIndex],
248 245
                          selectionOptimisation = selectionOptimisation,
249
-                         classifier = classifier[dataIndex],
250
-                         multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod),
251
-                         dataCombinations = dataIndex,
246
+                         classifier = classifier[assayIndex],
247
+                         multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod),
252 248
                          nFolds = nFolds,
253 249
                          nRepeats = nRepeats,
254 250
                          nCores = nCores,
... ...
@@ -259,30 +255,29 @@ setMethod("crossValidate", "DataFrame",
259 255
 
260 256
 
261 257
 
262
-              ### Prevalidation to combine data
263
-              if(multiViewMethod == "pca"){
258
+              ### Principal Components Analysis to combine data
259
+              if(multiViewMethod == "PCA"){
264 260
 
265 261
 
266
-                  # The below loops over different combinations of datasets and combines them together using prevalidation.
267
-                  # This allows someone to answer which combinations of the datasets might be most useful.
262
+                  # The below loops over different combinations of assays and combines them together using prevalidation.
263
+                  # This allows someone to answer which combinations of the assays might be most useful.
268 264
 
269 265
 
270
-                  if(is.null(dataCombinations)){
271
-                      dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE)))
272
-                      dataCombinations <- dataCombinations[sapply(dataCombinations, function(x)"clinical"%in%x, simplify = TRUE)]
273
-                      if(length(dataCombinations)==0) stop("No dataCombinations with `clinical` data")
266
+                  if(is.null(assayCombinations)){
267
+                      assayCombinations <- do.call("c", sapply(seq_along(assayIDs),function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
268
+                      assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)]
269
+                      if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
274 270
                   }
275 271
 
276 272
 
277
-                  result <- sapply(dataCombinations, function(dataIndex){
278
-                      CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex],
279
-                         classes = classes,
280
-                         nFeatures = nFeatures[dataIndex],
281
-                         selectionMethod = selectionMethod[dataIndex],
273
+                  result <- sapply(assayCombinations, function(assayIndex){
274
+                      CV(measurements = measurements[, mcols(measurements)$assay %in% assayIndex],
275
+                         outcomes = outcomes, assayIDs = assayIndex,
276
+                         nFeatures = nFeatures[assayIndex],
277
+                         selectionMethod = selectionMethod[assayIndex],
282 278
                          selectionOptimisation = selectionOptimisation,
283
-                         classifier = classifier[dataIndex],
284
-                         multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod),
285
-                         dataCombinations = dataIndex,
279
+                         classifier = classifier[assayIndex],
280
+                         multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod),
286 281
                          nFolds = nFolds,
287 282
                          nRepeats = nRepeats,
288 283
                          nCores = nCores,
... ...
@@ -301,13 +296,13 @@ setMethod("crossValidate", "DataFrame",
301 296
 # One or more omics data sets, possibly with clinical data.
302 297
 setMethod("crossValidate", "MultiAssayExperiment",
303 298
           function(measurements,
304
-                   classes, 
299
+                   outcomes, 
305 300
                    nFeatures = 20,
306 301
                    selectionMethod = "t-test",
307 302
                    selectionOptimisation = "Resubstitution",
308 303
                    classifier = "randomForest",
309 304
                    multiViewMethod = "none",
310
-                   dataCombinations = NULL,
305
+                   assayCombinations = NULL,
311 306
                    nFolds = 5,
312 307
                    nRepeats = 20,
313 308
                    nCores = 1,
... ...
@@ -321,18 +316,18 @@ setMethod("crossValidate", "MultiAssayExperiment",
321 316
                       stop("Data set contains replicates. Please provide remove or average replicate observations and try again.")
322 317
               }
323 318
               
324
-              tablesAndClasses <- .MAEtoWideTable(measurements, targets, classes, restrict = NULL)
325
-              measurements <- tablesAndClasses[["dataTable"]]
326
-              classes <- tablesAndClasses[["outcomes"]]
319
+              tablesAndoutcomes <- .MAEtoWideTable(measurements, targets, outcomes, restrict = NULL)
320
+              measurements <- tablesAndoutcomes[["dataTable"]]
321
+              outcomes <- tablesAndoutcomes[["outcomes"]]
327 322
 
328 323
               crossValidate(measurements = measurements,
329
-                            classes = classes, 
324
+                            outcomes = outcomes, 
330 325
                             nFeatures = nFeatures,
331 326
                             selectionMethod = selectionMethod,
332 327
                             selectionOptimisation = selectionOptimisation,
333 328
                             classifier = classifier,
334 329
                             multiViewMethod = multiViewMethod,
335
-                            dataCombinations = dataCombinations,
330
+                            assayCombinations = assayCombinations,
336 331
                             nFolds = nFolds,
337 332
                             nRepeats = nRepeats,
338 333
                             nCores = nCores,
... ...
@@ -343,13 +338,14 @@ setMethod("crossValidate", "MultiAssayExperiment",
343 338
 #' @export
344 339
 setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
345 340
           function(measurements,
346
-                   classes, 
341
+                   outcomes, 
342
+                   assayName = NULL,
347 343
                    nFeatures = 20,
348 344
                    selectionMethod = "t-test",
349 345
                    selectionOptimisation = "Resubstitution",
350 346
                    classifier = "randomForest",
351 347
                    multiViewMethod = "none",
352
-                   dataCombinations = NULL,
348
+                   assayCombinations = NULL,
353 349
                    nFolds = 5,
354 350
                    nRepeats = 20,
355 351
                    nCores = 1,
... ...
@@ -357,13 +353,14 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
357 353
           {
358 354
               measurements <- DataFrame(measurements)
359 355
               crossValidate(measurements = measurements,
360
-                            classes = classes, 
356
+                            outcomes = outcomes,
357
+                            assayName = assayName,
361 358
                             nFeatures = nFeatures,
362 359
                             selectionMethod = selectionMethod,
363 360
                             selectionOptimisation = selectionOptimisation,
364 361
                             classifier = classifier,
365 362
                             multiViewMethod = multiViewMethod,
366
-                            dataCombinations = dataCombinations,
363
+                            assayCombinations = assayCombinations,
367 364
                             nFolds = nFolds,
368 365
                             nRepeats = nRepeats,
369 366
                             nCores = nCores,
... ...
@@ -374,13 +371,14 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements.
374 371
 #' @export
375 372
 setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
376 373
           function(measurements,
377
-                   classes, 
374
+                   outcomes,
375
+                   assayName = NULL,
378 376
                    nFeatures = 20,
379 377
                    selectionMethod = "t-test",
380 378
                    selectionOptimisation = "Resubstitution",
381 379
                    classifier = "randomForest",
382 380
                    multiViewMethod = "none",
383
-                   dataCombinations = NULL,
381
+                   assayCombinations = NULL,
384 382
                    nFolds = 5,
385 383
                    nRepeats = 20,
386 384
                    nCores = 1,
... ...
@@ -388,13 +386,14 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
388 386
           {
389 387
               measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE)
390 388
               crossValidate(measurements = measurements,
391
-                            classes = classes, 
389
+                            outcomes = outcomes,
390
+                            assayName = assayName,
392 391
                             nFeatures = nFeatures,
393 392
                             selectionMethod = selectionMethod,
394 393
                             selectionOptimisation = selectionOptimisation,
395 394
                             classifier = classifier,
396 395
                             multiViewMethod = multiViewMethod,
397
-                            dataCombinations = dataCombinations,
396
+                            assayCombinations = assayCombinations,
398 397
                             nFolds = nFolds,
399 398
                             nRepeats = nRepeats,
400 399
                             nCores = nCores,
... ...
@@ -408,13 +407,13 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements.
408 407
 #' @export
409 408
 setMethod("crossValidate", "list",
410 409
           function(measurements,
411
-                   classes, 
410
+                   outcomes, 
412 411
                    nFeatures = 20,
413 412
                    selectionMethod = "t-test",
414 413
                    selectionOptimisation = "Resubstitution",
415 414
                    classifier = "randomForest",
416 415
                    multiViewMethod = "none",
417
-                   dataCombinations = NULL,
416
+                   assayCombinations = NULL,
418 417
                    nFolds = 5,
419 418
                    nRepeats = 20,
420 419
                    nCores = 1,
... ...
@@ -424,12 +423,12 @@ setMethod("crossValidate", "list",
424 423
               
425 424
               # Check if the list only contains one data type
426 425
               if (measurements |> sapply(class) |> unique() |> length() != 1) {
427
-                  stop("All datasets must be of the same type (e.g. data.frame, matrix)")
426
+                  stop("All assays must be of the same type (e.g. data.frame, matrix)")
428 427
               }
429 428
               
430 429
               # Check data type is valid
431 430
               if (!(measurements[[1]] |> class() %in% c("data.frame", "DataFrame", "matrix"))) {
432
-                  stop("Datasets must be of type data.frame, DataFrame or matrix")
431
+                  stop("assays must be of type data.frame, DataFrame or matrix")
433 432
               }
434 433
               
435 434
               # Check the list is named
... ...
@@ -437,21 +436,21 @@ setMethod("crossValidate", "list",
437 436
                   stop("Measurements must be a named list")
438 437
               }
439 438
               
440
-              # Check same number of samples for all datasets
439
+              # Check same number of samples for all assays
441 440
               if ((df_list |> sapply(dim))[2,] |> unique() |> length() != 1) {
442
-                  stop("All datasets must have the same number of samples")
441
+                  stop("All assays must have the same number of samples")
443 442
               }
444 443
               
445
-              # Check the number of classes is the same
444
+              # Check the number of outcomes is the same
446 445
               if ((df_list[[1]] |> dim())[2] != classes |> length()) {
447
-                  stop("Classes must have same number of samples as measurements")
446
+                  stop("Outcomes must have same number of samples as measurements")
448 447
               }
449 448
               
450 449
               df_list <- sapply(measurements, t, simplify = FALSE)
451 450
               df_list <- sapply(df_list , S4Vectors::DataFrame)
452 451
               
453 452
               df_list <- mapply(function(meas, nam){
454
-                  mcols(meas)$dataset <- nam
453
+                  mcols(meas)$assay <- nam
455 454
                   mcols(meas)$feature <- colnames(meas)
456 455
                   meas
457 456
               }, df_list, names(df_list))
... ...
@@ -461,13 +460,13 @@ setMethod("crossValidate", "list",
461 460
               colnames(combined_df) <- mcols(combined_df)$feature
462 461
               
463 462
               crossValidate(measurements = combined_df,
464
-                            classes = classes, 
463
+                            outcomes = outcomes, 
465 464
                             nFeatures = nFeatures,
466 465
                             selectionMethod = selectionMethod,
467 466
                             selectionOptimisation = selectionOptimisation,
468 467
                             classifier = classifier,
469 468
                             multiViewMethod = multiViewMethod,
470
-                            dataCombinations = dataCombinations,
469
+                            assayCombinations = assayCombinations,
471 470
                             nFolds = nFolds,
472 471
                             nRepeats = nRepeats,
473 472
                             nCores = nCores,
... ...
@@ -481,13 +480,13 @@ setMethod("crossValidate", "list",
481 480
 cleanNFeatures <- function(nFeatures, measurements){
482 481
     #### Clean up
483 482
     if(!is.null(mcols(measurements)))
484
-      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
483
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"])))
485 484
     else obsFeatures <- ncol(measurements)
486 485
     if(is.null(nFeatures) || length(nFeatures) == 1 && nFeatures == "all") nFeatures <- as.list(obsFeatures)
487 486
     if(is.null(names(nFeatures)) & length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures))
488 487
     if(is.null(names(nFeatures)) & length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE)
489
-    #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.")
490
-    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.")
488
+    #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.")
489
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.")
491 490
     if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)]))
492 491
     if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE)
493 492
     nFeatures
... ...
@@ -498,13 +497,13 @@ cleanNFeatures <- function(nFeatures, measurements){
498 497
 cleanSelectionMethod <- function(selectionMethod, measurements){
499 498
     #### Clean up
500 499
     if(!is.null(mcols(measurements)))
501
-      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
500
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"])))
502 501
     else return(list(selectionMethod))
503 502
 
504 503
     if(is.null(names(selectionMethod)) & length(selectionMethod) == 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE)
505 504
     if(is.null(names(selectionMethod)) & length(selectionMethod) > 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE)
506
-    #if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.")
507
-    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.")
505
+    #if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the assays.")
506
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the assays.")
508 507
     if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)])
509 508
     selectionMethod
510 509
 }
... ...
@@ -514,13 +513,13 @@ cleanSelectionMethod <- function(selectionMethod, measurements){
514 513
 cleanClassifier <- function(classifier, measurements){
515 514
     #### Clean up
516 515
     if(!is.null(mcols(measurements)))
517
-      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"])))
516
+      obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"])))
518 517
     else return(list(classifier))
519 518
 
520 519
     if(is.null(names(classifier)) & length(classifier) == 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
521 520
     if(is.null(names(classifier)) & length(classifier) > 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE)
522
-    #if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the datasets.")
523
-    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.")
521
+    #if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the assays.")
522
+    if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the assays.")
524 523
     if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)])
525 524
     classifier
526 525
 }
... ...
@@ -566,13 +565,13 @@ generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisati
566 565
 
567 566
 ######################################
568 567
 ######################################
569
-checkData <- function(measurements, classes){
568
+checkData <- function(measurements, outcomes){
570 569
     if(is.null(rownames(measurements)))
571 570
         stop("'measurements' DataFrame must have sample identifiers as its row names.")
572 571
     if(any(is.na(measurements)))
573 572
         stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.")
574 573
 
575
-    # !!!  Need to check mcols has dataset NUm
574
+    # !!!  Need to check mcols has assay NUm
576 575
 
577 576
 }
578 577
 ######################################
... ...
@@ -584,19 +583,19 @@ checkData <- function(measurements, classes){
584 583
 #' A function to generate a ModellingParams object
585 584
 #'
586 585
 #' @inheritParams crossValidate
587
-#' @param datasetIDs A vector of data set identifiers as long at the number of data sets.
586
+#' @param assayIDs A vector of data set identifiers as long at the number of data sets.
588 587
 #'
589 588
 #' @return ModellingParams object
590 589
 #' @export
591 590
 #'
592 591
 #' @examples
593 592
 #' data(asthma)
594
-#' # First make a toy example dataset with multiple data types. We'll randomly assign different features to be clinical, gene or protein.
593
+#' # First make a toy example assay with multiple data types. We'll randomly assign different features to be clinical, gene or protein.
595 594
 #' set.seed(51773)
596 595
 #' measurements <- DataFrame(measurements, check.names = FALSE) 
597
-#' mcols(measurements)$dataset <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE))
596
+#' mcols(measurements)$assay <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE))
598 597
 #' mcols(measurements)$feature <- colnames(measurements)
599
-#' modellingParams <- generateModellingParams(datasetIDs = c("clinical", "gene", "protein"),
598
+#' modellingParams <- generateModellingParams(assayIDs = c("clinical", "gene", "protein"),
600 599
 #'                                           measurements = measurements, 
601 600
 #'                                           nFeatures = list(clinical = 10, gene = 10, protein = 10),
602 601
 #'                                           selectionMethod = list(clinical = "t-test", gene = "t-test", protein = "t-test"),
... ...
@@ -604,7 +603,7 @@ checkData <- function(measurements, classes){
604 603
 #'                                           classifier = "randomForest",
605 604
 #'                                           multiViewMethod = "merge")
606 605
 #' @import BiocParallel
607
-generateModellingParams <- function(datasetIDs,
606
+generateModellingParams <- function(assayIDs,
608 607
                                     measurements,
609 608
                                     nFeatures,
610 609
                                     selectionMethod,
... ...
@@ -613,7 +612,7 @@ generateModellingParams <- function(datasetIDs,
613 612
                                     multiViewMethod = "none"
614 613
 ){
615 614
     if(multiViewMethod != "none") {
616
-        params <- generateMultiviewParams(datasetIDs,
615
+        params <- generateMultiviewParams(assayIDs,
617 616
                                           measurements,
618 617
                                           nFeatures,
619 618
                                           selectionMethod,
... ...
@@ -626,7 +625,7 @@ generateModellingParams <- function(datasetIDs,
626 625
 
627 626
 
628 627
 
629
-    if(length(datasetIDs) > 1) obsFeatures <- sum(mcols(measurements)[, "dataset"] %in% datasetIDs)
628
+    if(length(assayIDs) > 1) obsFeatures <- sum(mcols(measurements)[, "assay"] %in% assayIDs)
630 629
     else obsFeatures <- ncol(measurements)
631 630
 
632 631
 
... ...
@@ -705,8 +704,8 @@ generateModellingParams <- function(datasetIDs,
705 704
 
706 705
     #
707 706
     # if(multiViewMethod == "prevalidation"){
708
-    #     params$trainParams <- function(measurements, classes) prevalTrainInterface(measurements, classes, params)
709
-    #     params$trainParams <- function(measurements, classes) prevalTrainInterface(measurements, classes, params)
707
+    #     params$trainParams <- function(measurements, outcomes) prevalTrainInterface(measurements, outcomes, params)
708
+    #     params$trainParams <- function(measurements, outcomes) prevalTrainInterface(measurements, outcomes, params)
710 709
     # }
711 710
     #
712 711
 
... ...
@@ -718,7 +717,7 @@ generateModellingParams <- function(datasetIDs,
718 717
 
719 718
 
720 719
 
721
-generateMultiviewParams <- function(datasetIDs,
720
+generateMultiviewParams <- function(assayIDs,
722 721
                                     measurements,
723 722
                                     nFeatures,
724 723
                                     selectionMethod,
... ...
@@ -730,15 +729,15 @@ generateMultiviewParams <- function(datasetIDs,
730 729
 
731 730
         if(length(classifier) > 1) classifier <- classifier[[1]]
732 731
 
733
-        # Split measurements up by dataset.
734
-        assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
732
+        # Split measurements up by assay.
733
+        assayTrain <- sapply(assayIDs, function(assayID) if(assayID == 1) measurements else measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
735 734
 
736
-        # Generate params for each dataset. This could be extended to have different selectionMethods for each type
737
-        paramsDatasets <- mapply(generateModellingParams,
738
-                                 nFeatures = nFeatures[datasetIDs],
739
-                                 selectionMethod = selectionMethod[datasetIDs],
740
-                                 datasetIDs = datasetIDs,
741
-                                 measurements = assayTrain[datasetIDs],
735
+        # Generate params for each assay. This could be extended to have different selectionMethods for each type
736
+        paramsassays <- mapply(generateModellingParams,
737
+                                 nFeatures = nFeatures[assayIDs],
738
+                                 selectionMethod = selectionMethod[assayIDs],
739
+                                 assayIDs = assayIDs,
740
+                                 measurements = assayTrain[assayIDs],
742 741
                                  MoreArgs = list(
743 742
                                      selectionOptimisation = selectionOptimisation,
744 743
                                      classifier = classifier,
... ...
@@ -746,7 +745,7 @@ generateMultiviewParams <- function(datasetIDs,
746 745
                                  SIMPLIFY = FALSE)
747 746
 
748 747
         # Generate some params for merged model.
749
-        params <- generateModellingParams(datasetIDs = datasetIDs,
748
+        params <- generateModellingParams(assayIDs = assayIDs,
750 749
                                           measurements = measurements,
751 750
                                           nFeatures = nFeatures,
752 751
                                           selectionMethod = selectionMethod,
... ...
@@ -756,7 +755,7 @@ generateMultiviewParams <- function(datasetIDs,
756 755
 
757 756
         # Update selectParams to use
758 757
         params@selectParams <- SelectParams(selectMulti,
759
-                                            params = paramsDatasets,
758
+                                            params = paramsassays,
760 759
                                             characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "merge"),
761 760
                                             tuneParams = list(nFeatures = nFeatures[[1]],
762 761
                                                               performanceType = "Balanced Error",
... ...
@@ -767,16 +766,16 @@ generateMultiviewParams <- function(datasetIDs,
767 766
 
768 767
     if(multiViewMethod == "prevalidation"){
769 768
 
770
-        # Split measurements up by dataset.
771
-        assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
769
+        # Split measurements up by assay.
770
+        assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
772 771
 
773
-        # Generate params for each dataset. This could be extended to have different selectionMethods for each type
774
-        paramsDatasets <- mapply(generateModellingParams,
775
-                                 nFeatures = nFeatures[datasetIDs],
776
-                                 selectionMethod = selectionMethod[datasetIDs],
777
-                                 datasetIDs = datasetIDs,
778
-                                 measurements = assayTrain[datasetIDs],
779
-                                 classifier = classifier[datasetIDs],
772
+        # Generate params for each assay. This could be extended to have different selectionMethods for each type
773
+        paramsassays <- mapply(generateModellingParams,
774
+                                 nFeatures = nFeatures[assayIDs],
775
+                                 selectionMethod = selectionMethod[assayIDs],
776
+                                 assayIDs = assayIDs,
777
+                                 measurements = assayTrain[assayIDs],
778
+                                 classifier = classifier[assayIDs],
780 779
                                  MoreArgs = list(
781 780
                                      selectionOptimisation = selectionOptimisation,
782 781
                                      multiViewMethod = "none"),
... ...
@@ -786,8 +785,8 @@ generateMultiviewParams <- function(datasetIDs,
786 785
         params <- ModellingParams(
787 786
             balancing = "none",
788 787
             selectParams = NULL,
789
-            trainParams = TrainParams(prevalTrainInterface, params = paramsDatasets, characteristics = paramsDatasets$clinical@trainParams@characteristics),
790
-            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsDatasets$clinical@predictParams@characteristics)
788
+            trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics),
789
+            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics)
791 790
         )
792 791
 
793 792
         return(params)
... ...
@@ -795,16 +794,16 @@ generateMultiviewParams <- function(datasetIDs,
795 794
 
796 795
     if(multiViewMethod == "prevalidation"){
797 796
 
798
-        # Split measurements up by dataset.
799
-        assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
797
+        # Split measurements up by assay.
798
+        assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
800 799
 
801
-        # Generate params for each dataset. This could be extended to have different selectionMethods for each type
802
-        paramsDatasets <- mapply(generateModellingParams,
803
-                                 nFeatures = nFeatures[datasetIDs],
804
-                                 selectionMethod = selectionMethod[datasetIDs],
805
-                                 datasetIDs = datasetIDs,
806
-                                 measurements = assayTrain[datasetIDs],
807
-                                 classifier = classifier[datasetIDs],
800
+        # Generate params for each assay. This could be extended to have different selectionMethods for each type
801
+        paramsassays <- mapply(generateModellingParams,
802
+                                 nFeatures = nFeatures[assayIDs],
803
+                                 selectionMethod = selectionMethod[assayIDs],
804
+                                 assayIDs = assayIDs,
805
+                                 measurements = assayTrain[assayIDs],
806
+                                 classifier = classifier[assayIDs],
808 807
                                  MoreArgs = list(
809 808
                                      selectionOptimisation = selectionOptimisation,
810 809
                                      multiViewMethod = "none"),
... ...
@@ -814,24 +813,24 @@ generateMultiviewParams <- function(datasetIDs,
814 813
         params <- ModellingParams(
815 814
             balancing = "none",
816 815
             selectParams = NULL,
817
-            trainParams = TrainParams(prevalTrainInterface, params = paramsDatasets, characteristics = paramsDatasets$clinical@trainParams@characteristics),
818
-            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsDatasets$clinical@predictParams@characteristics)
816
+            trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics),
817
+            predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics)
819 818
         )
820 819
 
821 820
         return(params)
822 821
     }
823 822
 
824 823
 
825
-    if(multiViewMethod == "pca"){
824
+    if(multiViewMethod == "PCA"){
826 825
 
827
-        # Split measurements up by dataset.
828
-        assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
826
+        # Split measurements up by assay.
827
+        assayTrain <- sapply(assayIDs, function(assayID) measurements[,mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE)
829 828
 
830
-        # Generate params for each dataset. This could be extended to have different selectionMethods for each type
829
+        # Generate params for each assay. This could be extended to have different selectionMethods for each type
831 830
         paramsClinical <-  list(clinical = generateModellingParams(
832 831
                                  nFeatures = nFeatures["clinical"],
833 832
                                  selectionMethod = selectionMethod["clinical"],
834
-                                 datasetIDs = "clinical",
833
+                                 assayIDs = "clinical",
835 834
                                  measurements = assayTrain[["clinical"]],
836 835
                                  classifier = classifier["clinical"],
837 836
                                  selectionOptimisation = selectionOptimisation,
... ...
@@ -852,13 +851,14 @@ generateMultiviewParams <- function(datasetIDs,
852 851
 
853 852
 
854 853
 CV <- function(measurements,
855
-               classes,
854
+               outcomes,
855
+               assayIDs,
856 856
                nFeatures = NULL,
857 857
                selectionMethod = "t-test",
858 858
                selectionOptimisation = "Resubstitution",
859 859
                classifier = "elasticNetGLM",
860 860
                multiViewMethod = "none",
861
-               dataCombinations = NULL,
861
+               assayCombinations = NULL,
862 862
                nFolds = 5,
863 863
                nRepeats = 100,
864 864
                nCores = 1,
... ...
@@ -866,7 +866,7 @@ CV <- function(measurements,
866 866
 
867 867
 {
868 868
     # Check that data is in the right format
869
-    checkData(measurements, classes)
869
+    checkData(measurements, outcomes)
870 870
     
871 871
     # Check that other variables are in the right format and fix
872 872
     nFeatures <- cleanNFeatures(nFeatures = nFeatures,
... ...
@@ -877,9 +877,6 @@ CV <- function(measurements,
877 877
                                   measurements = measurements)
878 878
 
879 879
     # Which data-types or data-views are present?
880
-    datasetIDs <- unique(mcols(measurements)[, "dataset"])
881
-    if(is.null(datasetIDs)) datasetIDs <- 1
882
-    if(is.null(dataCombinations)) dataCombinations <- datasetIDs
883 880
     if(is.null(characteristicsLabel)) characteristicsLabel <- "none"
884 881
 
885 882
     # Setup cross-validation parameters including
... ...
@@ -890,7 +887,7 @@ CV <- function(measurements,
890 887
     )
891 888
 
892 889
     # Turn text into TrainParams and TestParams objects
893
-    modellingParams <- generateModellingParams(datasetIDs = datasetIDs,
890
+    modellingParams <- generateModellingParams(assayIDs = assayIDs,
894 891
                                                measurements = measurements,
895 892
                                                nFeatures = nFeatures,
896 893
                                                selectionMethod = selectionMethod,
... ...
@@ -898,12 +895,12 @@ CV <- function(measurements,
898 895
                                                classifier = classifier,
899 896
                                                multiViewMethod = multiViewMethod
900 897
     )
898
+    if(assayIDs != 1) assayText <- assayIDs else if(!is.null(attr(measurements, "assayName"))) assayText <- attr(measurements, "assayName") else assayText <- NULL
899
+    characteristics <- S4Vectors::DataFrame(characteristic = c(if(!is.null(assayText)) "Assay Name" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(!is.null(assayText)) paste(assayText, collapse = ", ") else NULL, paste(classifier, collapse = ", "),  paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel))
901 900
 
902
-    characteristics = S4Vectors::DataFrame(characteristic = c(if(length(datasetIDs) > 1) "Data Set" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(length(datasetIDs) > 1) paste(datasetIDs, collapse = ", ") else NULL, paste(classifier, collapse = ", "),  paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel))
903
-
904
-    classifyResults <- runTests(measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
901
+    classifyResults <- runTests(measurements, outcomes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics)
905 902
     
906
-    fullResult <- runTest(measurements, classes, measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
903
+    fullResult <- runTest(measurements, outcomes, measurements, outcomes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1)
907 904
 
908 905
     classifyResults@finalModel <- list(fullResult$models)
909 906
     classifyResults
... ...
@@ -915,7 +912,7 @@ CV <- function(measurements,
915 912
 
916 913
 
917 914
 
918
-simplifyResults <- function(results, values = c("dataset", "classifier", "selectionMethod", "multiViewMethod")){
915
+simplifyResults <- function(results, values = c("assay", "classifier", "selectionMethod", "multiViewMethod")){
919 916
     ch <- sapply(results, function(x) x@characteristics[x@characteristics$characteristic %in% values, "value"], simplify = TRUE)
920 917
     ch <- data.frame(t(ch))
921 918
     results[!duplicated(ch)]
... ...
@@ -932,4 +929,3 @@ setMethod("predict", "ClassifyResult",
932 929
 
933 930
 
934 931
 
935
-
... ...
@@ -103,7 +103,7 @@ setMethod("distribution", "ClassifyResult",
103 103
       allFeaturesText <- allFeatures
104 104
     } else if(is(chosenFeatures[[1]], "DataFrame")) {
105 105
       allFeatures <- do.call(rbind, chosenFeatures)
106
-      allFeaturesText <- paste(allFeatures[, "dataset"], allFeatures[, "feature"], sep = ':')
106
+      allFeaturesText <- paste(allFeatures[, "Original Assay"], allFeatures[, "Original Feature"], sep = ':')
107 107
     } else if("Pairs" %in% class(chosenFeatures[[1]])) {
108 108
       allFeatures <- do.call(c, unname(chosenFeatures))
109 109
       allFeaturesText <- paste(first(allFeatures), second(allFeatures), sep = ', ')
... ...
@@ -153,7 +153,7 @@ setMethod("distribution", "ClassifyResult",
153 153
     if(isPairs) # Make it DataFrame for counting of the occurrences.
154 154
       allFeatures <- as(allFeatures, "DataFrame")
155 155
     
156
-    summaryTable <- aggregate(list(count = rep(1, nrow(allFeatures))), as.data.frame(allFeatures), length)
156
+    summaryTable <- aggregate(list(count = rep(1, nrow(allFeatures))), as.data.frame(allFeatures, optional = TRUE), length)
157 157
     
158 158
     if(summaryType == "percentage")
159 159
     {
... ...
@@ -166,9 +166,7 @@ setMethod("distribution", "ClassifyResult",
166 166
       pairsSummary <- S4Vectors::Pairs(summaryTable[, "first"], summaryTable[, "second"], summaryTable[, 3])
167 167
       colnames(mcols(pairsSummary)) <- colnames(summaryTable[, 3])
168 168
       return(pairsSummary)
169
-    } else { # A table of dataset and feature.
170
-      if(all(summaryTable[, "dataset"] == "dataset")) # Just return a vector and get rid of unnecessary dataset.
171
-        summaryTable <- setNames(summaryTable[, 3], summaryTable[, 2])
169
+    } else { # A table of assay and feature.
172 170
       summaryTable
173 171
     }
174 172
   }
... ...
@@ -165,8 +165,8 @@ setMethod("featureSetSummary", "MultiAssayExperiment", # Pick one numeric table
165 165
   if(class(featureSets) != "FeatureSetCollection")
166 166
     stop("'featureSets' is not of type FeatureSetCollection but must be.")
167 167
 
168
-  datasetUsed <- measurements[[target]]                                   
169
-  assayedFeatures <- rownames(datasetUsed)
168
+  assayUsed <- measurements[[target]]                                   
169
+  assayedFeatures <- rownames(assayUsed)
170 170
   featureSets <- featureSets@sets
171 171
   keepSets <- sapply(featureSets, function(featureSet)
172 172
     length(intersect(featureSet, assayedFeatures)) / length(featureSet) * 100 > minimumOverlapPercent)
... ...
@@ -194,7 +194,7 @@ setMethod("featureSetSummary", "MultiAssayExperiment", # Pick one numeric table
194 194
       message("Summarising features to feature sets.")
195 195
 
196 196
   # Transform measurements into one feature per set.
197
-  transformed <- apply(datasetUsed, 2, function(sampleMeasurements)
197
+  transformed <- apply(assayUsed, 2, function(sampleMeasurements)
198 198
   {
199 199
     sapply(featureSets, function(featureSet) locationFunction(sampleMeasurements[featureSet]))
200 200
   })
... ...
@@ -135,7 +135,7 @@ setMethod("elasticNetGLMtrainInterface", "DataFrame", function(measurementsTrain
135 135
   fitted
136 136
 })
137 137
 
138
-# One or more omics datasets, possibly with sample information data.
138
+# One or more omics assays, possibly with sample information data.
139 139
 #' @rdname elasticNetGLM
140 140
 #' @export
141 141
 setMethod("elasticNetGLMtrainInterface", "MultiAssayExperiment",
... ...
@@ -109,7 +109,7 @@ setMethod("GLMtrainInterface", "DataFrame", function(measurementsTrain, classesT
109 109
   glm(class ~ . + 0, family = binomial, data = fitData, ...)
110 110
 })
111 111
 
112
-# One or more omics datasets, possibly with sample information data.
112
+# One or more omics assays, possibly with sample information data.
113 113
 #' @rdname GLM
114 114
 #' @export
115 115
 setMethod("GLMtrainInterface", "MultiAssayExperiment",
... ...
@@ -19,15 +19,15 @@ setMethod("pcaTrainInterface", "DFrame",
19 19
               ###
20 20
               # Splitting measurements into a list of each of the datasets
21 21
               ###
22
-              assayTrain <- sapply(unique(mcols(measurements)[["dataset"]]), function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
22
+              assayTrain <- sapply(unique(mcols(measurements)[["assay"]]), function(assay) measurements[, mcols(measurements)[["assay"]] %in% assay], simplify = FALSE)
23 23
               
24
-              if(! "clinical" %in% names(assayTrain)) stop("Must have a dataset called `clinical`")
24
+              if(!"clinical" %in% names(assayTrain)) stop("Must have an assay called \"clinical\"")
25 25
               
26 26
               # Create generic crossValParams just to get things working, might be used for optimising features in runTest later???
27 27
               CVparams <- CrossValParams(permutations = 1, folds = 10, parallelParams = SerialParam(RNGseed = .Random.seed[1]), tuneMode = "Resubstitution") 
28 28
               
29 29
               ###
30
-              # Run PCA for all datasets except clinical
30
+              # Run PCA for all assays except clinical
31 31
               ###
32 32
               usePCA<- names(assayTrain)[names(assayTrain)!="clinical"]
33 33
               assayPCA <- sapply(assayTrain[usePCA], function(assay){
... ...
@@ -50,7 +50,7 @@ setMethod("pcaTrainInterface", "DFrame",
50 50
               ###
51 51
               
52 52
               pcaVar <- S4Vectors::DataFrame(pcaVar)
53
-              mcols(pcaVar)$dataset = "pca"
53
+              mcols(pcaVar)$assay = "PCA"
54 54
               mcols(pcaVar)$feature = colnames(pcaVar)
55 55
             
56 56
               fullTrain = cbind(assayTrain[["clinical"]], pcaVar)
... ...
@@ -116,8 +116,8 @@ setMethod("pcaPredictInterface", c("pcaModel", "DFrame"),
116 116
               # Pull out my classification model
117 117
               fullModel <- fullModel@fullModel[[1]]
118 118
               
119
-              #Split my test data into a list of the different datasets
120
-              assayTest <- sapply(unique(mcols(test)[["dataset"]]), function(x) test[,mcols(test)[["dataset"]]%in%x], simplify = FALSE)
119
+              #Split my test data into a list of the different assays
120
+              assayTest <- sapply(unique(mcols(test)[["assay"]]), function(assay) test[, mcols(test)[["assay"]] %in% assay], simplify = FALSE)
121 121
               
122 122
               # Pull out my PCA models
123 123
               pcaModels <- fullModel$pcaModels
... ...
@@ -131,7 +131,7 @@ setMethod("pcaPredictInterface", c("pcaModel", "DFrame"),
131 131
               pcaVar <- do.call(cbind, pcaVar)
132 132
               
133 133
               pcaVar <- S4Vectors::DataFrame(pcaVar)
134
-              mcols(pcaVar)$dataset = "pca"
134
+              mcols(pcaVar)$assay = "PCA"
135 135
               mcols(pcaVar)$feature = colnames(pcaVar)
136 136
               
137 137
               # Merge my PCA stuff with my clinical data
... ...
@@ -54,11 +54,11 @@ setMethod("prevalTrainInterface", "DFrame",
54 54
           {
55 55
               
56 56
               ###
57
-              # Splitting measurements into a list of each of the datasets
57
+              # Splitting measurements into a list of each of the assays
58 58
               ###
59
-              assayTrain <- sapply(unique(mcols(measurements)[["dataset"]]), function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE)
59
+              assayTrain <- sapply(unique(mcols(measurements)[["assay"]]), function(assay) measurements[, mcols(measurements)[["dataset"]] %in% assay], simplify = FALSE)
60 60
               
61
-              if(! "clinical" %in% names(assayTrain)) stop("Must have a dataset called `clinical`")
61
+              if(!"clinical" %in% names(assayTrain)) stop("Must have an assay called \"clinical\"")
62 62
               
63 63
               # Create generic crossValParams to use for my prevalidation... should add this as a input parameter
64 64
               CVparams <- CrossValParams(permutations = 1, folds = 10, parallelParams = SerialParam(RNGseed = .Random.seed[1]), tuneMode = "Resubstitution") 
... ...
@@ -66,7 +66,7 @@ setMethod("prevalTrainInterface", "DFrame",
66 66
               ###
67 67
               # Fit a classification model for each non-clinical datasets, pulling models from "params"
68 68
               ###
69
-              usePreval <- names(assayTrain)[names(assayTrain)!="clinical"]
69
+              usePreval <- names(assayTrain)[names(assayTrain) != "clinical"]
70 70
               assayTests <- bpmapply(
71 71
                   runTests,
72 72
                   measurements = assayTrain[usePreval],
... ...
@@ -77,7 +77,7 @@ setMethod("prevalTrainInterface", "DFrame",
77 77
                       verbose = 0
78 78
                   ), 
79 79
                   BPPARAM = SerialParam(RNGseed = .Random.seed[1])) |>
80
-                  sapply(function(x)x@predictions, simplify = FALSE)
80
+                  sapply(function(result) result@predictions, simplify = FALSE)
81 81
               
82 82
               ###
83 83
               # Pull-out prevalidated vectors ie. the predictions on each of the test folds.
... ...
@@ -98,7 +98,7 @@ setMethod("prevalTrainInterface", "DFrame",
98 98
               #fullTrain = cbind(assayTrain[["clinical"]][,selectedFeaturesClinical], prevalidationTrain[rownames(assayTrain[["clinical"]]), , drop = FALSE])
99 99
               
100 100
               prevalidationTrain <- S4Vectors::DataFrame(prevalidationTrain)
101
-              mcols(prevalidationTrain)$dataset = "pca"
101
+              mcols(prevalidationTrain)$assay = "PCA"
102 102
               mcols(prevalidationTrain)$feature = colnames(prevalidationTrain)
103 103
               
104 104
               
... ...
@@ -176,7 +176,7 @@ setMethod("prevalPredictInterface", c("prevalModel", "DFrame"),
176 176
           )
177 177
           {
178 178
               fullModel <- fullModel@fullModel[[1]]
179
-              assayTest <- sapply(unique(mcols(test)[["dataset"]]), function(x) test[,mcols(test)[["dataset"]]%in%x], simplify = FALSE)
179
+              assayTest <- sapply(unique(mcols(test)[["assay"]]), function(assay) test[, mcols(test)[["assay"]] %in% assay], simplify = FALSE)
180 180
               
181 181
               prevalidationModels <- fullModel$prevalidationModels
182 182
               modelPredictionFunctions <- fullModel$modellingParams
... ...
@@ -191,7 +191,7 @@ setMethod("prevalPredictInterface", c("prevalModel", "DFrame"),
191 191
                   extractPrevalidation()
192 192
               
193 193
               prevalidationPredict <- S4Vectors::DataFrame(prevalidationPredict)
194
-              mcols(prevalidationPredict)$dataset = "pca"
194
+              mcols(prevalidationPredict)$assay = "PCA"
195 195
               mcols(prevalidationPredict)$feature = colnames(prevalidationPredict)
196 196
               
197 197
               fullTest = cbind(assayTest[["clinical"]], prevalidationPredict[rownames(assayTest[["clinical"]]), , drop = FALSE])
... ...
@@ -82,7 +82,7 @@
82 82
 #' legends' titles. The fifth number is the font size of the legend labels.
83 83
 #' @param colours The colours to plot data of each class in. The length of this
84 84
 #' vector must be as long as the distinct number of classes in the data set.
85
-#' @param showDatasetName Logical. Default: \code{TRUE}. If \code{TRUE} and the
85
+#' @param showAssayName Logical. Default: \code{TRUE}. If \code{TRUE} and the
86 86
 #' data is in a \code{MultiAssayExperiment} object, the the name of the table
87 87
 #' in which the feature is stored in is added to the plot title.
88 88
 #' @param plot Logical. Default: \code{TRUE}. If \code{TRUE}, a plot is
... ...
@@ -129,7 +129,7 @@
129 129
 #'   genesMatrix <- t(genesMatrix) # MultiAssayExperiment needs features in rows.
130 130
 #'   dataContainer <- MultiAssayExperiment(list(RNA = genesMatrix),
131 131
 #'                                         colData = cbind(clinicalData, class = classes))
132
-#'   targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50")                                     
132
+#'   targetFeatures <- DataFrame(assay = "RNA", feature = "Gene 50")                                     
133 133
 #'   plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class",
134 134
 #'                      groupBy = c("sampleInfo", "Gender"), # Table name, feature name.
135 135
 #'                      xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5)
... ...
@@ -161,7 +161,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar
161 161
                                                       xLabelPositions = "auto", yLabelPositions = "auto",
162 162
                                                       fontSizes = c(24, 16, 12, 12, 12),
163 163
                                                       colours = c("#3F48CC", "#880015"),
164
-                                                      showDatasetName = TRUE, plot = TRUE)
164
+                                                      showAssayName = TRUE, plot = TRUE)
165 165
 {
166 166
   if(missing(targets))
167 167
     stop("'targets' must be specified.")
... ...
@@ -197,9 +197,6 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar
197 197
   
198 198
   # Subsetting of measurements to the features of interest.
199 199
   
200
-  # Remove unnecessary dataset column for a single dataset.
201
-  if(!is.null(ncol(targets)) && all(targets[, "dataset"] == "dataset")) targets <- targets[, "feature"]
202
-  
203 200
   if(!is(targets, "DataFrame"))
204 201
   {
205 202
     if(!"Pairs" %in% class(targets)) # A simple vector.
... ...
@@ -224,9 +221,9 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar
224 221
         featureText <- colnames(measurements)[columnIndex]
225 222
       } else {
226 223
         featureText <- S4Vectors::mcols(measurements)[columnIndex, "feature"]
227
-        if(showDatasetName == TRUE && !all(S4Vectors::mcols(measurements)[, "dataset"] == "dataset"))
224
+        if(showAssayName == TRUE && !all(S4Vectors::mcols(measurements)[, "assay"] == "assay"))
228 225
         {
229
-          featureText <- paste(featureText, paste('(', S4Vectors::mcols(measurements)[columnIndex, "dataset"], ')', sep = ''))
226
+          featureText <- paste(featureText, paste('(', S4Vectors::mcols(measurements)[columnIndex, "assay"], ')', sep = ''))
230 227
         }
231 228
       }
232 229
       
... ...
@@ -363,7 +360,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar
363 360
 #' @rdname plotFeatureClasses
364 361
 #' @export
365 362
 setMethod("plotFeatureClasses", "MultiAssayExperiment",
366
-          function(measurements, targets, classesColumn, groupBy = NULL, groupingName = NULL, showDatasetName = TRUE, ...)
363
+          function(measurements, targets, classesColumn, groupBy = NULL, groupingName = NULL, showAssayName = TRUE, ...)
367 364
           {
368 365
             if(missing(targets))
369 366
               stop("'targets' must be specified by the user.")
... ...
@@ -385,7 +382,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment",
385 382
                 groupBy <- MultiAssayExperiment::colData(measurements)[, groupBy[2]]
386 383
               } else { # One of the omics tables.
387 384
                 groupBy <- measurements[groupBy[2], , groupingTable]
388
-                if(showDatasetName == TRUE)
385
+                if(showAssayName == TRUE)
389 386
                   groupingName <- paste(groupingName, groupingTable)
390 387
               }
391 388
               levelsOrder <- levels(groupBy)
... ...
@@ -399,11 +396,11 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment",
399 396
             measurements <- MultiAssayExperiment::wideFormat(measurements, colDataCols = seq_along(MultiAssayExperiment::colData(measurements)), check.names = FALSE, collapse = ':')
400 397
             measurements <- measurements[, -1, drop = FALSE] # Remove sample IDs.
401 398
             S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(measurements)[, "sourceName"])
402
-            colnames(S4Vectors::mcols(measurements))[1] <- "dataset"
399
+            colnames(S4Vectors::mcols(measurements))[1] <- "assay"
403 400
             S4Vectors::mcols(measurements)[, "feature"] <- S4Vectors::mcols(measurements)[, "rowname"]
404 401
             missingIndices <- is.na(S4Vectors::mcols(measurements)[, "feature"])
405 402
             S4Vectors::mcols(measurements)[missingIndices, "feature"] <- colnames(measurements)[missingIndices]
406
-            S4Vectors::mcols(measurements) <- S4Vectors::mcols(measurements)[, c("dataset", "feature")]
403
+            S4Vectors::mcols(measurements) <- S4Vectors::mcols(measurements)[, c("assay", "feature")]
407 404
             
408
-            plotFeatureClasses(measurements, classes, S4Vectors::mcols(measurements), groupBy, groupingName, showDatasetName = showDatasetName, ...)
405
+            plotFeatureClasses(measurements, classes, S4Vectors::mcols(measurements), groupBy, groupingName, showAssayName = showAssayName, ...)
409 406
           })
410 407
\ No newline at end of file
... ...
@@ -100,9 +100,9 @@ setMethod("previousSelection", "DataFrame",
100 100
     IDsRows <- match(previousIDs, featuresInfo(classifyResult)[, "Original Feature"])
101 101
     safeIDs <- featuresInfo(classifyResult)[IDsRows, "Renamed Feature"]
102 102
   } else { # A data frame describing the data set and variable name of the chosen feature.
103
-    featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")])
104
-    IDsRows <-  match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Dataset", "Original Feature")])))
105
-    safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Dataset", "Renamed Feature")])
103
+    featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("assay", "feature")])
104
+    IDsRows <-  match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Assay", "Original Feature")])))
105
+    safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Assay", "Renamed Feature")])
106 106
   }
107 107
   
108 108
   commonFeatures <- intersect(safeIDs, featuresIDs)
... ...
@@ -92,7 +92,7 @@ setMethod("runTest", "DataFrame", # Sample information data or one of the other
92 92
 function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest,
93 93
          crossValParams = CrossValParams(), # crossValParams might be used for tuning optimisation.
94 94
          modellingParams = ModellingParams(), characteristics = S4Vectors::DataFrame(), verbose = 1, .iteration = NULL)
95
-{
95
+{if(!is.null(.iteration) && .iteration != "internal")
96 96
   if(is.null(.iteration)) # Not being called by runTests but by user. So, check the user input.
97 97
   {
98 98
     if(is.null(rownames(measurementsTrain)))
... ...
@@ -117,9 +117,9 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest,
117 117
     featuresInfo <- .summaryFeatures(measurementsTrain)
118 118
     if(!is.null(S4Vectors::mcols(measurementsTrain)))
119 119
     {
120
-      S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")]
121
-      S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")]
122
-      colnames(measurementsTrain) <- colnames(measurementsTest) <- paste(featuresInfo[["Renamed Dataset"]], featuresInfo[["Renamed Feature"]], sep = '')
120
+      S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Assay", "Renamed Feature")]
121
+      S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Assay", "Renamed Feature")]
122
+      colnames(measurementsTrain) <- colnames(measurementsTest) <- paste(featuresInfo[["Renamed Assay"]], featuresInfo[["Renamed Feature"]], sep = '')
123 123
     } else {
124 124
       colnames(measurementsTrain) <- colnames(measurementsTest) <- featuresInfo[, "Renamed Feature"]
125 125
     }
... ...
@@ -257,7 +257,7 @@ input data. Autmomatically reducing to smaller number.")
257 257
         predictedOutcomes <- predictedOutcomes[, na.omit(match(c("class", "risk"), colnames(predictedOutcomes)))]
258 258
     performanceChanges <- round(performancesWithoutEach - calcExternalPerformance(outcomesTest, predictedOutcomes, performanceType), 2)
259 259
      
260
-    if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")]
260
+    if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Assay", "Original Feature")]
261 261
     importanceTable <- DataFrame(selectedFeatures, performanceChanges)
262 262
     if(ncol(importanceTable) == 2) colnames(importanceTable)[1] <- "feature"
263 263
     colnames(importanceTable)[ncol(importanceTable)] <- paste("Change in", performanceType)
... ...
@@ -271,11 +271,11 @@ input data. Autmomatically reducing to smaller number.")
271 271
   {
272 272
     if(!is.null(rankedFeaturesIndices))
273 273
     {
274
-      if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Dataset", "Original Feature")]
274
+      if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Assay", "Original Feature")]
275 275
     } else { rankedFeatures <- NULL}
276 276
     if(!is.null(selectedFeaturesIndices))
277 277
     {
278
-      if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")]
278
+      if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Assay", "Original Feature")]
279 279
     } else { selectedFeatures <- NULL}
280 280
   } else { # Nested use in feature selection. No feature selection in inner execution, so ignore features. 
281 281
       rankedFeatures <- selectedFeatures <- NULL
... ...
@@ -51,7 +51,7 @@
51 51
 #'     selectParams <- SelectParams(differentMeansRanking, tuneParams = tuneList)
52 52
 #'     modellingParams <- ModellingParams(selectParams = selectParams)
53 53
 #'     runTests(measurements, classes, CVparams, modellingParams,
54
-#'              DataFrame(characteristic = c("Dataset Name", "Classifier Name"),
54
+#'              DataFrame(characteristic = c("Assay Name", "Classifier Name"),
55 55
 #'                        value = c("Asthma", "Different Means"))
56 56
 #'              )
57 57
 #'   #}
... ...
@@ -220,7 +220,7 @@ setMethod("samplesMetricMap", "list",
220 220
                                                      legend.position = ifelse(showLegends, "right", "none"),
221 221
                                                      legend.key.size = legendSize)
222 222
     } else # Numeric data about the samples.
223
-    {#browser()
223
+    {
224 224
       featureValuesData <- data.frame(measurements = featureValues, Class = 1)
225 225
       if(metric != "Sample C-index") featureValuesData[, "Class"] <- knownClasses
226 226
       featureValuesPlot <- ggplot2::ggplot(featureValuesData, environment = environment()) +
... ...
@@ -5,8 +5,7 @@ setGeneric("selectMulti", function(measurementsTrain, classesTrain, params, ...)
5 5
 setMethod("selectMulti", "DataFrame",
6 6
           function(measurementsTrain, classesTrain, params, verbose = 0)
7 7
           {
8
-              assayTrain <- sapply(unique(mcols(measurementsTrain)[["Renamed Dataset"]]), function(x) measurementsTrain[, mcols(measurementsTrain)[["Renamed Dataset"]] %in% x], simplify = FALSE)
9
-
8
+              assayTrain <- sapply(unique(mcols(measurementsTrain)[["Renamed Assay"]]), function(assay) measurementsTrain[, mcols(measurementsTrain)[["Renamed Assay"]] %in% assay], simplify = FALSE)
10 9
               featuresIndices <- mapply(.doSelection, 
11 10
                                          measurements = assayTrain,
12 11
                                          modellingParams = params,
... ...
@@ -238,12 +238,10 @@ setMethod("selectionPlot", "list",
238 238
                   } else {
239 239
                       summaryTable <- result@importance
240 240
                   }
241
-                 if("dataset" %in% colnames(summaryTable) && all(summaryTable[, "dataset"] == "dataset"))
242
-                   summaryTable <- summaryTable[, -match("dataset", colnames(summaryTable))]
243
-                 if("dataset" %in% colnames(summaryTable))
241
+                 if("assay" %in% colnames(summaryTable))
244 242
                  {
245
-                    summaryTable[, "feature"] <- paste(summaryTable[, "dataset"], summaryTable[, "feature"])
246
-                    summaryTable <- summaryTable[, -match("dataset", colnames(summaryTable))]
243
+                    summaryTable[, "feature"] <- paste(summaryTable[, "assay"], summaryTable[, "feature"])
244
+                    summaryTable <- summaryTable[, -match("assay", colnames(summaryTable))]
247 245
                   }
248 246
                   summaryTable
249 247
                 }))
... ...
@@ -100,7 +100,7 @@
100 100
     dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':')
101 101
     rownames(dataTable) <- dataTable[, "primary"]
102 102
     S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"])
103
-    colnames(S4Vectors::mcols(dataTable))[1] <- "dataset"
103
+    colnames(S4Vectors::mcols(dataTable))[1] <- "assay"
104 104
   
105 105
     # Sample information variable names not included in column metadata of wide table but only as row names of it.
106 106
     # Create a combined column named "feature" which has feature names of the assays as well as the sample information.
... ...
@@ -109,7 +109,7 @@
109 109
     S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices]
110 110
     
111 111
     # Finally, a column annotation recording variable name and which table it originated from for all of the source tables.
112
-    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("dataset", "feature")]
112
+    S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")]
113 113
   } else { # Must have only been sample information data.
114 114
     dataTable <- MultiAssayExperiment::colData(measurements)
115 115
   }
... ...
@@ -569,24 +569,23 @@
569 569
   {
570 570
     originalInfo <- S4Vectors::mcols(measurements)
571 571
     featureNames <- S4Vectors::mcols(measurements)[, "feature"]
572
-    datasets <- unique(S4Vectors::mcols(measurements)[, "dataset"])
572
+    assays <- unique(S4Vectors::mcols(measurements)[, "assay"])
573 573
     renamedInfo <- S4Vectors::mcols(measurements)
574
-    renamedDatasets <- paste("Dataset", seq_along(datasets), sep = '')
575
-    for(dataset in datasets)
574
+    renamedAssays <- paste("Assay", seq_along(assays), sep = '')
575
+    for(assay in assays)
576 576
     {
577
-      rowsDataset <- which(renamedInfo[, "dataset"] == dataset)
578
-      renamedInfo[rowsDataset, "feature"] <- paste("Feature", seq_along(rowsDataset), sep = '')
579
-      renamedInfo[rowsDataset, "dataset"] <- renamedDatasets[match(dataset, datasets)]
577
+      rowsAssay <- which(renamedInfo[, "assay"] == assay)
578
+      renamedInfo[rowsAssay, "feature"] <- paste("Feature", seq_along(rowsAssay), sep = '')
579
+      renamedInfo[rowsAssay, "assay"] <- renamedAssays[match(assay, assays)]
580 580
     }
581 581
     featuresInfo <- DataFrame(originalInfo, renamedInfo)
582
-    colnames(featuresInfo) <- c("Original Dataset", "Original Feature", "Renamed Dataset", "Renamed Feature")
582
+    colnames(featuresInfo) <- c("Original Assay", "Original Feature", "Renamed Assay", "Renamed Feature")
583 583
   } else {
584 584
     originalFeatures <- colnames(measurements)
585 585
     renamedInfo <- paste("Feature", seq_along(measurements), sep = '')
586 586
     featuresInfo <- DataFrame(originalFeatures, renamedInfo)
587 587
     colnames(featuresInfo) <- c("Original Feature", "Renamed Feature")
588 588
   }
589
-  
590 589
   featuresInfo
591 590
 }
592 591
 
... ...
@@ -111,7 +111,7 @@ of predictions made during the cross-validation procedure.}}
111 111
     modellingParams <- ModellingParams()
112 112
     classified <-
113 113
     runTests(measurements, classes, LOOCVparams, modellingParams,
114
-             DataFrame(characteristic = c("dataset", "classification"),
114
+             DataFrame(characteristic = c("Data Set", "Classification"),
115 115
                       value = c("Asthma", "Different Means"))
116 116
              )
117 117
     class(classified)
... ...
@@ -11,30 +11,18 @@
11 11
 \alias{predict,ClassifyResult-method}
12 12
 \title{Cross-validation to evaluate classification performance.}
13 13
 \usage{
14
-crossValidate(
15
-  measurements,
16
-  classes,
17
-  nFeatures = 20,
18
-  selectionMethod = "t-test",
19
-  selectionOptimisation = "Resubstitution",
20
-  classifier = "randomForest",
21
-  multiViewMethod = "none",
22
-  dataCombinations = NULL,
23
-  nFolds = 5,
24
-  nRepeats = 20,
25
-  nCores = 1,
26
-  characteristicsLabel = NULL
27
-)
14
+crossValidate(measurements, outcomes, ...)
28 15
 
29 16
 \S4method{crossValidate}{DataFrame}(
30 17
   measurements,
31
-  classes,
18
+  outcomes,
19
+  assayName = NULL,
32 20
   nFeatures = 20,
33 21
   selectionMethod = "t-test",
34 22
   selectionOptimisation = "Resubstitution",
35 23
   classifier = "randomForest",
36 24
   multiViewMethod = "none",
37
-  dataCombinations = NULL,
25
+  assayCombinations = NULL,
38 26
   nFolds = 5,
39 27
   nRepeats = 20,
40 28
   nCores = 1,
... ...
@@ -43,13 +31,13 @@ crossValidate(
43 31
 
44 32
 \S4method{crossValidate}{MultiAssayExperiment}(
45 33
   measurements,
46
-  classes,
34
+  outcomes,
47 35
   nFeatures = 20,
48 36
   selectionMethod = "t-test",
49 37
   selectionOptimisation = "Resubstitution",
50 38
   classifier = "randomForest",
51 39
   multiViewMethod = "none",
52
-  dataCombinations = NULL,
40
+  assayCombinations = NULL,
53 41
   nFolds = 5,
54 42
   nRepeats = 20,
55 43
   nCores = 1,
... ...
@@ -58,13 +46,14 @@ crossValidate(
58 46
 
59 47
 \S4method{crossValidate}{data.frame}(
60 48
   measurements,
61
-  classes,
49
+  outcomes,
50
+  assayName = NULL,
62 51
   nFeatures = 20,
63 52
   selectionMethod = "t-test",
64 53
   selectionOptimisation = "Resubstitution",
65 54
   classifier = "randomForest",
66 55
   multiViewMethod = "none",
67
-  dataCombinations = NULL,
56
+  assayCombinations = NULL,
68 57
   nFolds = 5,
69 58
   nRepeats = 20,
70 59
   nCores = 1,
... ...
@@ -73,13 +62,14 @@ crossValidate(
73 62
 
74 63
 \S4method{crossValidate}{matrix}(
75 64
   measurements,
76
-  classes,
65
+  outcomes,
66
+  assayName = NULL,
77 67
   nFeatures = 20,
78 68
   selectionMethod = "t-test",
79 69
   selectionOptimisation = "Resubstitution",
80 70
   classifier = "randomForest",
81 71
   multiViewMethod = "none",
82
-  dataCombinations = NULL,
72
+  assayCombinations = NULL,
83 73
   nFolds = 5,
84 74
   nRepeats = 20,
85 75
   nCores = 1,
... ...
@@ -88,13 +78,13 @@ crossValidate(
88 78
 
89 79
 \S4method{crossValidate}{list}(
90 80
   measurements,
91