... | ... |
@@ -1308,7 +1308,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1308 | 1308 |
#' modellingParams <- ModellingParams() |
1309 | 1309 |
#' classified <- |
1310 | 1310 |
#' runTests(measurements, classes, LOOCVparams, modellingParams, |
1311 |
-#' DataFrame(characteristic = c("dataset", "classification"), |
|
1311 |
+#' DataFrame(characteristic = c("Data Set", "Classification"), |
|
1312 | 1312 |
#' value = c("Asthma", "Different Means")) |
1313 | 1313 |
#' ) |
1314 | 1314 |
#' class(classified) |
... | ... |
@@ -7,23 +7,28 @@ |
7 | 7 |
#' or a list of these objects containing the training data. For a |
8 | 8 |
#' \code{matrix} and \code{data.frame}, the rows are samples and the columns are features. For a \code{data.frame} or \code{\link{MultiAssayExperiment}} assay |
9 | 9 |
#' the rows are features and the columns are samples, as is typical in Bioconductor. |
10 |
-#' @param classes A vector of class labels of class \code{\link{factor}} of the |
|
10 |
+#' @param outcomes A vector of class labels of class \code{\link{factor}} of the |
|
11 | 11 |
#' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the |
12 | 12 |
#' column name in \code{measurements} if it is a \code{\link{DataFrame}} or the |
13 | 13 |
#' column name in \code{colData(measurements)} if \code{measurements} is a \code{\link{MultiAssayExperiment}}. If a column name, that column will be |
14 |
-#' removed before training. |
|
14 |
+#' removed before training. Or a \code{\link{Surv}} object or a character vector of length 2 or 3 specifying the time and event columns in |
|
15 |
+#' \code{measurements} for survival outcome. |
|
16 |
+#' @param ... Arguments other than measurements and outcomes in the generic. |
|
17 |
+#' @param assayName An informative name describing the data (e.g. RNA-seq) table if the input is a data frame or matrix. Not used if input |
|
18 |
+#' is \code{MultiAssayExperiment} or other list-like structure because it will already have assay names in the experiment list. This |
|
19 |
+#' name will be stored in the characteristics table of the result as Assay Name characteristic. |
|
15 | 20 |
#' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons |
16 |
-#' or datasets. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple datasets, |
|
17 |
-#' a different number of features will be used for each dataset. If a named list of vectors, the respective number of features will be optimised over. |
|
21 |
+#' or assays. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple assays, |
|
22 |
+#' a different number of features will be used for each assay. If a named list of vectors, the respective number of features will be optimised over. |
|
18 | 23 |
#' Set to NULL or "all" if all features should be used. |
19 |
-#' @param selectionMethod A character vector of feature selection methods to compare. If a named character vector with names corresponding to different datasets, |
|
20 |
-#' and performing multiview classification, the respective classification methods will be used on each dataset. |
|
24 |
+#' @param selectionMethod A character vector of feature selection methods to compare. If a named character vector with names corresponding to different assays, |
|
25 |
+#' and performing multiview classification, the respective classification methods will be used on each assay. |
|
21 | 26 |
#' @param selectionOptimisation A character of "Resubstitution", "Nested CV" or "none" specifying the approach used to optimise nFeatures. |
22 |
-#' @param classifier A character vector of classification methods to compare. If a named character vector with names corresponding to different datasets, |
|
23 |
-#' and performing multiview classification, the respective classification methods will be used on each dataset. |
|
27 |
+#' @param classifier A character vector of classification methods to compare. If a named character vector with names corresponding to different assays, |
|
28 |
+#' and performing multiview classification, the respective classification methods will be used on each assay. |
|
24 | 29 |
#' @param multiViewMethod A character vector specifying the multiview method or data integration approach to use. |
25 |
-#' @param dataCombinations A character vector or list of character vectors proposing the datasets or, in the case of a list, combination of datasets to use |
|
26 |
-#' with each element being a vector of datasets to combine. |
|
30 |
+#' @param assayCombinations A character vector or list of character vectors proposing the assays or, in the case of a list, combination of assays to use |
|
31 |
+#' with each element being a vector of assays to combine. |
|
27 | 32 |
#' @param nFolds A numeric specifying the number of folds to use for cross-validation. |
28 | 33 |
#' @param nRepeats A numeric specifying the the number of repeats or permutations to use for cross-validation. |
29 | 34 |
#' @param nCores A numeric specifying the number of cores used if the user wants to use parallelisation. |
... | ... |
@@ -36,9 +41,9 @@ |
36 | 41 |
#' |
37 | 42 |
#' \code{selectionMethod} can be any of the following implemented approaches - none, t-test, limma, edgeR, NSC, Bartlett, Levene, DMD, likelihoodRatio, KS or KL. |
38 | 43 |
#' |
39 |
-#' \code{multiViewMethod} can take a few different values. Using \code{merge} will merge or bind the datasets after feature selection. |
|
40 |
-#' Using \code{prevlidation} will build prevalidated vectors on all the datasets except the clinical data. There must be a dataset called clinical. |
|
41 |
-#' Using \code{pca} will perform pca on each dataset and then merge the top few components with the clinical data. There must be a dataset called clinical. |
|
44 |
+#' \code{multiViewMethod} can take a few different values. Using \code{merge} will merge or bind the assays after feature selection. |
|
45 |
+#' Using \code{prevalidation} will build prevalidated vectors on all the assays except the clinical data. There must be a assay called clinical. |
|
46 |
+#' Using \code{PCA} will perform Principal Components Analysis on each assay and then merge the top few components with the clinical data. There must be a assay called clinical. |
|
42 | 47 |
#' |
43 | 48 |
#' @return An object of class \code{\link{ClassifyResult}} |
44 | 49 |
#' @export |
... | ... |
@@ -55,52 +60,42 @@ |
55 | 60 |
#' performancePlot(result) |
56 | 61 |
#' |
57 | 62 |
#' |
58 |
-#' # Compare performance of different datasets. |
|
59 |
-#' # First make a toy example dataset with multiple data types. We'll randomly assign different features to be clinical, gene or protein. |
|
63 |
+#' # Compare performance of different assays. |
|
64 |
+#' # First make a toy example assay with multiple data types. We'll randomly assign different features to be clinical, gene or protein. |
|
60 | 65 |
#' # set.seed(51773) |
61 | 66 |
#' # measurements <- DataFrame(measurements, check.names = FALSE) |
62 |
-#' # mcols(measurements)$dataset <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE)) |
|
67 |
+#' # mcols(measurements)$assay <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE)) |
|
63 | 68 |
#' # mcols(measurements)$feature <- colnames(measurements) |
64 | 69 |
#' |
65 |
-#' # We'll use different nFeatures for each dataset. We'll also use repeated cross-validation with 5 repeats for speed in the example. |
|
70 |
+#' # We'll use different nFeatures for each assay. We'll also use repeated cross-validation with 5 repeats for speed in the example. |
|
66 | 71 |
#' # set.seed(51773) |
67 | 72 |
#' #result <- crossValidate(measurements, classes, nFeatures = c(clinical = 5, gene = 20, protein = 30), classifier = "randomForest", nRepeats = 5) |
68 | 73 |
#' # performancePlot(result) |
69 | 74 |
#' |
70 |
-#' # Merge different datasets. But we will only do this for two combinations. If dataCombinations is not specified it would attempt all combinations. |
|
75 |
+#' # Merge different assays. But we will only do this for two combinations. If assayCombinations is not specified it would attempt all combinations. |
|
71 | 76 |
#' # set.seed(51773) |
72 |
-#' # resultMerge <- crossValidate(measurements, classes, dataCombinations = list(c("clinical", "protein"), c("clinical", "gene")), multiViewMethod = "merge", nRepeats = 5) |
|
77 |
+#' # resultMerge <- crossValidate(measurements, classes, assayCombinations = list(c("clinical", "protein"), c("clinical", "gene")), multiViewMethod = "merge", nRepeats = 5) |
|
73 | 78 |
#' # performancePlot(resultMerge) |
74 | 79 |
#' |
75 | 80 |
#' |
76 | 81 |
#' # performancePlot(c(result, resultMerge)) |
77 | 82 |
#' |
78 | 83 |
#' @importFrom survival Surv |
79 |
-setGeneric("crossValidate", function(measurements, |
|
80 |
- classes, |
|
81 |
- nFeatures = 20, |
|
82 |
- selectionMethod = "t-test", |
|
83 |
- selectionOptimisation = "Resubstitution", |
|
84 |
- classifier = "randomForest", |
|
85 |
- multiViewMethod = "none", |
|
86 |
- dataCombinations = NULL, |
|
87 |
- nFolds = 5, |
|
88 |
- nRepeats = 20, |
|
89 |
- nCores = 1, |
|
90 |
- characteristicsLabel = NULL) |
|
84 |
+setGeneric("crossValidate", function(measurements, outcomes, ...) |
|
91 | 85 |
standardGeneric("crossValidate")) |
92 | 86 |
|
93 | 87 |
#' @rdname crossValidate |
94 | 88 |
#' @export |
95 | 89 |
setMethod("crossValidate", "DataFrame", |
96 | 90 |
function(measurements, |
97 |
- classes, |
|
91 |
+ outcomes, |
|
92 |
+ assayName = NULL, |
|
98 | 93 |
nFeatures = 20, |
99 | 94 |
selectionMethod = "t-test", |
100 | 95 |
selectionOptimisation = "Resubstitution", |
101 | 96 |
classifier = "randomForest", |
102 | 97 |
multiViewMethod = "none", |
103 |
- dataCombinations = NULL, |
|
98 |
+ assayCombinations = NULL, |
|
104 | 99 |
nFolds = 5, |
105 | 100 |
nRepeats = 20, |
106 | 101 |
nCores = 1, |
... | ... |
@@ -108,10 +103,16 @@ setMethod("crossValidate", "DataFrame", |
108 | 103 |
|
109 | 104 |
{ |
110 | 105 |
# Check that data is in the right format |
111 |
- splitDataset <- .splitDataAndOutcomes(measurements, classes) |
|
112 |
- measurements <- splitDataset[["measurements"]] |
|
113 |
- classes <- splitDataset[["outcomes"]] |
|
114 |
- checkData(measurements, classes) |
|
106 |
+ splitAssay <- .splitDataAndOutcomes(measurements, outcomes) |
|
107 |
+ measurements <- splitAssay[["measurements"]] |
|
108 |
+ outcomes <- splitAssay[["outcomes"]] |
|
109 |
+ |
|
110 |
+ # Which data-types or data-views are present? |
|
111 |
+ assayIDs <- unique(mcols(measurements)[, "assay"]) |
|
112 |
+ if(is.null(assayIDs)) |
|
113 |
+ assayIDs <- 1 |
|
114 |
+ |
|
115 |
+ checkData(measurements, outcomes) |
|
115 | 116 |
|
116 | 117 |
# Check that other variables are in the right format and fix |
117 | 118 |
nFeatures <- cleanNFeatures(nFeatures = nFeatures, |
... | ... |
@@ -128,10 +129,6 @@ setMethod("crossValidate", "DataFrame", |
128 | 129 |
|
129 | 130 |
classifier <- cleanClassifier(classifier = classifier, |
130 | 131 |
measurements = measurements) |
131 |
- |
|
132 |
- # Which data-types or data-views are present? |
|
133 |
- datasetIDs <- unique(mcols(measurements)[, "dataset"]) |
|
134 |
- if(is.null(datasetIDs)) datasetIDs <- 1 |
|
135 | 132 |
|
136 | 133 |
##!!!!! Do something with data combinations |
137 | 134 |
|
... | ... |
@@ -146,34 +143,34 @@ setMethod("crossValidate", "DataFrame", |
146 | 143 |
|
147 | 144 |
if(multiViewMethod == "none"){ |
148 | 145 |
|
149 |
- # The below loops over dataset and classifier and allows us to answer |
|
146 |
+ # The below loops over assay and classifier and allows us to answer |
|
150 | 147 |
# the following questions: |
151 | 148 |
# |
152 |
- # 1) One dataset using one classifier |
|
153 |
- # 2) One dataset using multi classifiers |
|
154 |
- # 3) Multi datasets individually |
|
149 |
+ # 1) One assay using one classifier |
|
150 |
+ # 2) One assay using multi classifiers |
|
151 |
+ # 3) Multi assays individually |
|
155 | 152 |
|
156 | 153 |
# We should probably transition this to use grid instead. |
157 |
- |
|
154 |
+ |
|
158 | 155 |
resClassifier <- |
159 |
- sapply(datasetIDs, function(dataIndex) { |
|
160 |
- # Loop over datasets |
|
161 |
- sapply(classifier[[dataIndex]], function(classifierIndex) { |
|
156 |
+ sapply(assayIDs, function(assayIndex) { |
|
157 |
+ # Loop over assays |
|
158 |
+ sapply(classifier[[assayIndex]], function(classifierIndex) { |
|
162 | 159 |
# Loop over classifiers |
163 |
- sapply(selectionMethod[[dataIndex]], function(selectionIndex) { |
|
160 |
+ sapply(selectionMethod[[assayIndex]], function(selectionIndex) { |
|
164 | 161 |
# Loop over classifiers |
165 | 162 |
set.seed(seed) |
166 | 163 |
measurementsUse <- measurements |
167 |
- if(!is.null(mcols(measurements))) measurementsUse <- measurements[, mcols(measurements)[, "dataset"] == dataIndex, drop = FALSE] |
|
168 |
- |
|
164 |
+ if(!is.null(assayName)) attr(measurementsUse, "assayName") <- assayName |
|
165 |
+ if(assayIndex != 1) measurementsUse <- measurements[, mcols(measurements)[, "assay"] == assayIndex, drop = FALSE] |
|
169 | 166 |
CV( |
170 |
- measurements = measurementsUse, classes = classes, |
|
171 |
- nFeatures = nFeatures[dataIndex], |
|
167 |
+ measurements = measurementsUse, outcomes = outcomes, |
|
168 |
+ assayIDs = assayIndex, |
|
169 |
+ nFeatures = nFeatures[assayIndex], |
|
172 | 170 |
selectionMethod = selectionIndex, |
173 | 171 |
selectionOptimisation = selectionOptimisation, |
174 | 172 |
classifier = classifierIndex, |
175 | 173 |
multiViewMethod = multiViewMethod, |
176 |
- dataCombinations = dataIndex, |
|
177 | 174 |
nFolds = nFolds, |
178 | 175 |
nRepeats = nRepeats, |
179 | 176 |
nCores = nCores, |
... | ... |
@@ -201,21 +198,20 @@ setMethod("crossValidate", "DataFrame", |
201 | 198 |
if(multiViewMethod == "merge"){ |
202 | 199 |
|
203 | 200 |
|
204 |
- # The below loops over different combinations of datasets and merges them together. |
|
205 |
- # This allows someone to answer which combinations of the datasets might be most useful. |
|
201 |
+ # The below loops over different combinations of assays and merges them together. |
|
202 |
+ # This allows someone to answer which combinations of the assays might be most useful. |
|
206 | 203 |
|
207 | 204 |
|
208 |
- if(is.null(dataCombinations)) dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE))) |
|
205 |
+ if(is.null(assayCombinations)) assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
|
209 | 206 |
|
210 |
- result <- sapply(dataCombinations, function(dataIndex){ |
|
211 |
- CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex], |
|
212 |
- classes = classes, |
|
213 |
- nFeatures = nFeatures[dataIndex], |
|
214 |
- selectionMethod = selectionMethod[dataIndex], |
|
207 |
+ result <- sapply(assayCombinations, function(assayIndex){ |
|
208 |
+ CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex], |
|
209 |
+ outcomes = outcomes, assayIDs = assayIndex, |
|
210 |
+ nFeatures = nFeatures[assayIndex], |
|
211 |
+ selectionMethod = selectionMethod[assayIndex], |
|
215 | 212 |
selectionOptimisation = selectionOptimisation, |
216 |
- classifier = classifier[dataIndex], |
|
217 |
- multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod), |
|
218 |
- dataCombinations = dataIndex, |
|
213 |
+ classifier = classifier[assayIndex], |
|
214 |
+ multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod), |
|
219 | 215 |
nFolds = nFolds, |
220 | 216 |
nRepeats = nRepeats, |
221 | 217 |
nCores = nCores, |
... | ... |
@@ -229,26 +225,26 @@ setMethod("crossValidate", "DataFrame", |
229 | 225 |
if(multiViewMethod == "prevalidation"){ |
230 | 226 |
|
231 | 227 |
|
232 |
- # The below loops over different combinations of datasets and combines them together using prevalidation. |
|
233 |
- # This allows someone to answer which combinations of the datasets might be most useful. |
|
228 |
+ # The below loops over different combinations of assays and combines them together using prevalidation. |
|
229 |
+ # This allows someone to answer which combinations of the assays might be most useful. |
|
234 | 230 |
|
235 | 231 |
|
236 |
- if(is.null(dataCombinations)){ |
|
237 |
- dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE))) |
|
238 |
- dataCombinations <- dataCombinations[sapply(dataCombinations, function(x)"clinical"%in%x, simplify = TRUE)] |
|
239 |
- if(length(dataCombinations)==0) stop("No dataCombinations with `clinical` data") |
|
232 |
+ if(is.null(assayCombinations)) |
|
233 |
+ { |
|
234 |
+ assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
|
235 |
+ assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)] |
|
236 |
+ if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data") |
|
240 | 237 |
} |
241 | 238 |
|
242 | 239 |
|
243 |
- result <- sapply(dataCombinations, function(dataIndex){ |
|
244 |
- CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex], |
|
245 |
- classes = classes, |
|
246 |
- nFeatures = nFeatures[dataIndex], |
|
247 |
- selectionMethod = selectionMethod[dataIndex], |
|
240 |
+ result <- sapply(assayCombinations, function(assayIndex){ |
|
241 |
+ CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex], |
|
242 |
+ outcomes = outcomes, assayIDs = assayIndex, |
|
243 |
+ nFeatures = nFeatures[assayIndex], |
|
244 |
+ selectionMethod = selectionMethod[assayIndex], |
|
248 | 245 |
selectionOptimisation = selectionOptimisation, |
249 |
- classifier = classifier[dataIndex], |
|
250 |
- multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod), |
|
251 |
- dataCombinations = dataIndex, |
|
246 |
+ classifier = classifier[assayIndex], |
|
247 |
+ multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod), |
|
252 | 248 |
nFolds = nFolds, |
253 | 249 |
nRepeats = nRepeats, |
254 | 250 |
nCores = nCores, |
... | ... |
@@ -259,30 +255,29 @@ setMethod("crossValidate", "DataFrame", |
259 | 255 |
|
260 | 256 |
|
261 | 257 |
|
262 |
- ### Prevalidation to combine data |
|
263 |
- if(multiViewMethod == "pca"){ |
|
258 |
+ ### Principal Components Analysis to combine data |
|
259 |
+ if(multiViewMethod == "PCA"){ |
|
264 | 260 |
|
265 | 261 |
|
266 |
- # The below loops over different combinations of datasets and combines them together using prevalidation. |
|
267 |
- # This allows someone to answer which combinations of the datasets might be most useful. |
|
262 |
+ # The below loops over different combinations of assays and combines them together using prevalidation. |
|
263 |
+ # This allows someone to answer which combinations of the assays might be most useful. |
|
268 | 264 |
|
269 | 265 |
|
270 |
- if(is.null(dataCombinations)){ |
|
271 |
- dataCombinations <- do.call("c", sapply(seq_along(datasetIDs),function(n)combn(datasetIDs, n, simplify = FALSE))) |
|
272 |
- dataCombinations <- dataCombinations[sapply(dataCombinations, function(x)"clinical"%in%x, simplify = TRUE)] |
|
273 |
- if(length(dataCombinations)==0) stop("No dataCombinations with `clinical` data") |
|
266 |
+ if(is.null(assayCombinations)){ |
|
267 |
+ assayCombinations <- do.call("c", sapply(seq_along(assayIDs),function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
|
268 |
+ assayCombinations <- assayCombinations[sapply(assayCombinations, function(combination) "clinical" %in% combination, simplify = TRUE)] |
|
269 |
+ if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data") |
|
274 | 270 |
} |
275 | 271 |
|
276 | 272 |
|
277 |
- result <- sapply(dataCombinations, function(dataIndex){ |
|
278 |
- CV(measurements = measurements[, mcols(measurements)$dataset %in% dataIndex], |
|
279 |
- classes = classes, |
|
280 |
- nFeatures = nFeatures[dataIndex], |
|
281 |
- selectionMethod = selectionMethod[dataIndex], |
|
273 |
+ result <- sapply(assayCombinations, function(assayIndex){ |
|
274 |
+ CV(measurements = measurements[, mcols(measurements)$assay %in% assayIndex], |
|
275 |
+ outcomes = outcomes, assayIDs = assayIndex, |
|
276 |
+ nFeatures = nFeatures[assayIndex], |
|
277 |
+ selectionMethod = selectionMethod[assayIndex], |
|
282 | 278 |
selectionOptimisation = selectionOptimisation, |
283 |
- classifier = classifier[dataIndex], |
|
284 |
- multiViewMethod = ifelse(length(dataIndex)==1, "none", multiViewMethod), |
|
285 |
- dataCombinations = dataIndex, |
|
279 |
+ classifier = classifier[assayIndex], |
|
280 |
+ multiViewMethod = ifelse(length(assayIndex) == 1, "none", multiViewMethod), |
|
286 | 281 |
nFolds = nFolds, |
287 | 282 |
nRepeats = nRepeats, |
288 | 283 |
nCores = nCores, |
... | ... |
@@ -301,13 +296,13 @@ setMethod("crossValidate", "DataFrame", |
301 | 296 |
# One or more omics data sets, possibly with clinical data. |
302 | 297 |
setMethod("crossValidate", "MultiAssayExperiment", |
303 | 298 |
function(measurements, |
304 |
- classes, |
|
299 |
+ outcomes, |
|
305 | 300 |
nFeatures = 20, |
306 | 301 |
selectionMethod = "t-test", |
307 | 302 |
selectionOptimisation = "Resubstitution", |
308 | 303 |
classifier = "randomForest", |
309 | 304 |
multiViewMethod = "none", |
310 |
- dataCombinations = NULL, |
|
305 |
+ assayCombinations = NULL, |
|
311 | 306 |
nFolds = 5, |
312 | 307 |
nRepeats = 20, |
313 | 308 |
nCores = 1, |
... | ... |
@@ -321,18 +316,18 @@ setMethod("crossValidate", "MultiAssayExperiment", |
321 | 316 |
stop("Data set contains replicates. Please provide remove or average replicate observations and try again.") |
322 | 317 |
} |
323 | 318 |
|
324 |
- tablesAndClasses <- .MAEtoWideTable(measurements, targets, classes, restrict = NULL) |
|
325 |
- measurements <- tablesAndClasses[["dataTable"]] |
|
326 |
- classes <- tablesAndClasses[["outcomes"]] |
|
319 |
+ tablesAndoutcomes <- .MAEtoWideTable(measurements, targets, outcomes, restrict = NULL) |
|
320 |
+ measurements <- tablesAndoutcomes[["dataTable"]] |
|
321 |
+ outcomes <- tablesAndoutcomes[["outcomes"]] |
|
327 | 322 |
|
328 | 323 |
crossValidate(measurements = measurements, |
329 |
- classes = classes, |
|
324 |
+ outcomes = outcomes, |
|
330 | 325 |
nFeatures = nFeatures, |
331 | 326 |
selectionMethod = selectionMethod, |
332 | 327 |
selectionOptimisation = selectionOptimisation, |
333 | 328 |
classifier = classifier, |
334 | 329 |
multiViewMethod = multiViewMethod, |
335 |
- dataCombinations = dataCombinations, |
|
330 |
+ assayCombinations = assayCombinations, |
|
336 | 331 |
nFolds = nFolds, |
337 | 332 |
nRepeats = nRepeats, |
338 | 333 |
nCores = nCores, |
... | ... |
@@ -343,13 +338,14 @@ setMethod("crossValidate", "MultiAssayExperiment", |
343 | 338 |
#' @export |
344 | 339 |
setMethod("crossValidate", "data.frame", # data.frame of numeric measurements. |
345 | 340 |
function(measurements, |
346 |
- classes, |
|
341 |
+ outcomes, |
|
342 |
+ assayName = NULL, |
|
347 | 343 |
nFeatures = 20, |
348 | 344 |
selectionMethod = "t-test", |
349 | 345 |
selectionOptimisation = "Resubstitution", |
350 | 346 |
classifier = "randomForest", |
351 | 347 |
multiViewMethod = "none", |
352 |
- dataCombinations = NULL, |
|
348 |
+ assayCombinations = NULL, |
|
353 | 349 |
nFolds = 5, |
354 | 350 |
nRepeats = 20, |
355 | 351 |
nCores = 1, |
... | ... |
@@ -357,13 +353,14 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements. |
357 | 353 |
{ |
358 | 354 |
measurements <- DataFrame(measurements) |
359 | 355 |
crossValidate(measurements = measurements, |
360 |
- classes = classes, |
|
356 |
+ outcomes = outcomes, |
|
357 |
+ assayName = assayName, |
|
361 | 358 |
nFeatures = nFeatures, |
362 | 359 |
selectionMethod = selectionMethod, |
363 | 360 |
selectionOptimisation = selectionOptimisation, |
364 | 361 |
classifier = classifier, |
365 | 362 |
multiViewMethod = multiViewMethod, |
366 |
- dataCombinations = dataCombinations, |
|
363 |
+ assayCombinations = assayCombinations, |
|
367 | 364 |
nFolds = nFolds, |
368 | 365 |
nRepeats = nRepeats, |
369 | 366 |
nCores = nCores, |
... | ... |
@@ -374,13 +371,14 @@ setMethod("crossValidate", "data.frame", # data.frame of numeric measurements. |
374 | 371 |
#' @export |
375 | 372 |
setMethod("crossValidate", "matrix", # Matrix of numeric measurements. |
376 | 373 |
function(measurements, |
377 |
- classes, |
|
374 |
+ outcomes, |
|
375 |
+ assayName = NULL, |
|
378 | 376 |
nFeatures = 20, |
379 | 377 |
selectionMethod = "t-test", |
380 | 378 |
selectionOptimisation = "Resubstitution", |
381 | 379 |
classifier = "randomForest", |
382 | 380 |
multiViewMethod = "none", |
383 |
- dataCombinations = NULL, |
|
381 |
+ assayCombinations = NULL, |
|
384 | 382 |
nFolds = 5, |
385 | 383 |
nRepeats = 20, |
386 | 384 |
nCores = 1, |
... | ... |
@@ -388,13 +386,14 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements. |
388 | 386 |
{ |
389 | 387 |
measurements <- S4Vectors::DataFrame(measurements, check.names = FALSE) |
390 | 388 |
crossValidate(measurements = measurements, |
391 |
- classes = classes, |
|
389 |
+ outcomes = outcomes, |
|
390 |
+ assayName = assayName, |
|
392 | 391 |
nFeatures = nFeatures, |
393 | 392 |
selectionMethod = selectionMethod, |
394 | 393 |
selectionOptimisation = selectionOptimisation, |
395 | 394 |
classifier = classifier, |
396 | 395 |
multiViewMethod = multiViewMethod, |
397 |
- dataCombinations = dataCombinations, |
|
396 |
+ assayCombinations = assayCombinations, |
|
398 | 397 |
nFolds = nFolds, |
399 | 398 |
nRepeats = nRepeats, |
400 | 399 |
nCores = nCores, |
... | ... |
@@ -408,13 +407,13 @@ setMethod("crossValidate", "matrix", # Matrix of numeric measurements. |
408 | 407 |
#' @export |
409 | 408 |
setMethod("crossValidate", "list", |
410 | 409 |
function(measurements, |
411 |
- classes, |
|
410 |
+ outcomes, |
|
412 | 411 |
nFeatures = 20, |
413 | 412 |
selectionMethod = "t-test", |
414 | 413 |
selectionOptimisation = "Resubstitution", |
415 | 414 |
classifier = "randomForest", |
416 | 415 |
multiViewMethod = "none", |
417 |
- dataCombinations = NULL, |
|
416 |
+ assayCombinations = NULL, |
|
418 | 417 |
nFolds = 5, |
419 | 418 |
nRepeats = 20, |
420 | 419 |
nCores = 1, |
... | ... |
@@ -424,12 +423,12 @@ setMethod("crossValidate", "list", |
424 | 423 |
|
425 | 424 |
# Check if the list only contains one data type |
426 | 425 |
if (measurements |> sapply(class) |> unique() |> length() != 1) { |
427 |
- stop("All datasets must be of the same type (e.g. data.frame, matrix)") |
|
426 |
+ stop("All assays must be of the same type (e.g. data.frame, matrix)") |
|
428 | 427 |
} |
429 | 428 |
|
430 | 429 |
# Check data type is valid |
431 | 430 |
if (!(measurements[[1]] |> class() %in% c("data.frame", "DataFrame", "matrix"))) { |
432 |
- stop("Datasets must be of type data.frame, DataFrame or matrix") |
|
431 |
+ stop("assays must be of type data.frame, DataFrame or matrix") |
|
433 | 432 |
} |
434 | 433 |
|
435 | 434 |
# Check the list is named |
... | ... |
@@ -437,21 +436,21 @@ setMethod("crossValidate", "list", |
437 | 436 |
stop("Measurements must be a named list") |
438 | 437 |
} |
439 | 438 |
|
440 |
- # Check same number of samples for all datasets |
|
439 |
+ # Check same number of samples for all assays |
|
441 | 440 |
if ((df_list |> sapply(dim))[2,] |> unique() |> length() != 1) { |
442 |
- stop("All datasets must have the same number of samples") |
|
441 |
+ stop("All assays must have the same number of samples") |
|
443 | 442 |
} |
444 | 443 |
|
445 |
- # Check the number of classes is the same |
|
444 |
+ # Check the number of outcomes is the same |
|
446 | 445 |
if ((df_list[[1]] |> dim())[2] != classes |> length()) { |
447 |
- stop("Classes must have same number of samples as measurements") |
|
446 |
+ stop("Outcomes must have same number of samples as measurements") |
|
448 | 447 |
} |
449 | 448 |
|
450 | 449 |
df_list <- sapply(measurements, t, simplify = FALSE) |
451 | 450 |
df_list <- sapply(df_list , S4Vectors::DataFrame) |
452 | 451 |
|
453 | 452 |
df_list <- mapply(function(meas, nam){ |
454 |
- mcols(meas)$dataset <- nam |
|
453 |
+ mcols(meas)$assay <- nam |
|
455 | 454 |
mcols(meas)$feature <- colnames(meas) |
456 | 455 |
meas |
457 | 456 |
}, df_list, names(df_list)) |
... | ... |
@@ -461,13 +460,13 @@ setMethod("crossValidate", "list", |
461 | 460 |
colnames(combined_df) <- mcols(combined_df)$feature |
462 | 461 |
|
463 | 462 |
crossValidate(measurements = combined_df, |
464 |
- classes = classes, |
|
463 |
+ outcomes = outcomes, |
|
465 | 464 |
nFeatures = nFeatures, |
466 | 465 |
selectionMethod = selectionMethod, |
467 | 466 |
selectionOptimisation = selectionOptimisation, |
468 | 467 |
classifier = classifier, |
469 | 468 |
multiViewMethod = multiViewMethod, |
470 |
- dataCombinations = dataCombinations, |
|
469 |
+ assayCombinations = assayCombinations, |
|
471 | 470 |
nFolds = nFolds, |
472 | 471 |
nRepeats = nRepeats, |
473 | 472 |
nCores = nCores, |
... | ... |
@@ -481,13 +480,13 @@ setMethod("crossValidate", "list", |
481 | 480 |
cleanNFeatures <- function(nFeatures, measurements){ |
482 | 481 |
#### Clean up |
483 | 482 |
if(!is.null(mcols(measurements))) |
484 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
483 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"]))) |
|
485 | 484 |
else obsFeatures <- ncol(measurements) |
486 | 485 |
if(is.null(nFeatures) || length(nFeatures) == 1 && nFeatures == "all") nFeatures <- as.list(obsFeatures) |
487 | 486 |
if(is.null(names(nFeatures)) & length(nFeatures) == 1) nFeatures <- as.list(pmin(obsFeatures, nFeatures)) |
488 | 487 |
if(is.null(names(nFeatures)) & length(nFeatures) > 1) nFeatures <- sapply(obsFeatures, function(x)pmin(x, nFeatures), simplify = FALSE) |
489 |
- #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.") |
|
490 |
- if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the datasets.") |
|
488 |
+ #if(is.null(names(nFeatures)) & length(nFeatures) > 1) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.") |
|
489 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(nFeatures))) stop("nFeatures needs to be a named numeric vector or list with the same names as the assays.") |
|
491 | 490 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "numeric")) nFeatures <- as.list(pmin(obsFeatures, nFeatures[names(obsFeatures)])) |
492 | 491 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(nFeatures)) & is(nFeatures, "list")) nFeatures <- mapply(pmin, nFeatures[names(obsFeatures)], obsFeatures, SIMPLIFY = FALSE) |
493 | 492 |
nFeatures |
... | ... |
@@ -498,13 +497,13 @@ cleanNFeatures <- function(nFeatures, measurements){ |
498 | 497 |
cleanSelectionMethod <- function(selectionMethod, measurements){ |
499 | 498 |
#### Clean up |
500 | 499 |
if(!is.null(mcols(measurements))) |
501 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
500 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"]))) |
|
502 | 501 |
else return(list(selectionMethod)) |
503 | 502 |
|
504 | 503 |
if(is.null(names(selectionMethod)) & length(selectionMethod) == 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE) |
505 | 504 |
if(is.null(names(selectionMethod)) & length(selectionMethod) > 1 & !is.null(names(obsFeatures))) selectionMethod <- sapply(names(obsFeatures), function(x) selectionMethod, simplify = FALSE) |
506 |
- #if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.") |
|
507 |
- if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the datasets.") |
|
505 |
+ #if(is.null(names(selectionMethod)) & length(selectionMethod) > 1) stop("selectionMethod needs to be a named character vector or list with the same names as the assays.") |
|
506 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(selectionMethod))) stop("selectionMethod needs to be a named character vector or list with the same names as the assays.") |
|
508 | 507 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(selectionMethod)) & is(selectionMethod, "character")) selectionMethod <- as.list(selectionMethod[names(obsFeatures)]) |
509 | 508 |
selectionMethod |
510 | 509 |
} |
... | ... |
@@ -514,13 +513,13 @@ cleanSelectionMethod <- function(selectionMethod, measurements){ |
514 | 513 |
cleanClassifier <- function(classifier, measurements){ |
515 | 514 |
#### Clean up |
516 | 515 |
if(!is.null(mcols(measurements))) |
517 |
- obsFeatures <- unlist(as.list(table(mcols(measurements)[, "dataset"]))) |
|
516 |
+ obsFeatures <- unlist(as.list(table(mcols(measurements)[, "assay"]))) |
|
518 | 517 |
else return(list(classifier)) |
519 | 518 |
|
520 | 519 |
if(is.null(names(classifier)) & length(classifier) == 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
521 | 520 |
if(is.null(names(classifier)) & length(classifier) > 1 & !is.null(names(obsFeatures))) classifier <- sapply(names(obsFeatures), function(x)classifier, simplify = FALSE) |
522 |
- #if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the datasets.") |
|
523 |
- if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the datasets.") |
|
521 |
+ #if(is.null(names(classifier)) & length(classifier) > 1) stop("classifier needs to be a named character vector or list with the same names as the assays.") |
|
522 |
+ if(!is.null(names(obsFeatures)) && !all(names(obsFeatures) %in% names(classifier))) stop("classifier needs to be a named character vector or list with the same names as the assays.") |
|
524 | 523 |
if(!is.null(names(obsFeatures)) && all(names(obsFeatures) %in% names(classifier)) & is(classifier, "character")) classifier <- as.list(classifier[names(obsFeatures)]) |
525 | 524 |
classifier |
526 | 525 |
} |
... | ... |
@@ -566,13 +565,13 @@ generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisati |
566 | 565 |
|
567 | 566 |
###################################### |
568 | 567 |
###################################### |
569 |
-checkData <- function(measurements, classes){ |
|
568 |
+checkData <- function(measurements, outcomes){ |
|
570 | 569 |
if(is.null(rownames(measurements))) |
571 | 570 |
stop("'measurements' DataFrame must have sample identifiers as its row names.") |
572 | 571 |
if(any(is.na(measurements))) |
573 | 572 |
stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.") |
574 | 573 |
|
575 |
- # !!! Need to check mcols has dataset NUm |
|
574 |
+ # !!! Need to check mcols has assay NUm |
|
576 | 575 |
|
577 | 576 |
} |
578 | 577 |
###################################### |
... | ... |
@@ -584,19 +583,19 @@ checkData <- function(measurements, classes){ |
584 | 583 |
#' A function to generate a ModellingParams object |
585 | 584 |
#' |
586 | 585 |
#' @inheritParams crossValidate |
587 |
-#' @param datasetIDs A vector of data set identifiers as long at the number of data sets. |
|
586 |
+#' @param assayIDs A vector of data set identifiers as long at the number of data sets. |
|
588 | 587 |
#' |
589 | 588 |
#' @return ModellingParams object |
590 | 589 |
#' @export |
591 | 590 |
#' |
592 | 591 |
#' @examples |
593 | 592 |
#' data(asthma) |
594 |
-#' # First make a toy example dataset with multiple data types. We'll randomly assign different features to be clinical, gene or protein. |
|
593 |
+#' # First make a toy example assay with multiple data types. We'll randomly assign different features to be clinical, gene or protein. |
|
595 | 594 |
#' set.seed(51773) |
596 | 595 |
#' measurements <- DataFrame(measurements, check.names = FALSE) |
597 |
-#' mcols(measurements)$dataset <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE)) |
|
596 |
+#' mcols(measurements)$assay <- c(rep("clinical",20),sample(c("gene", "protein"), ncol(measurements)-20, replace = TRUE)) |
|
598 | 597 |
#' mcols(measurements)$feature <- colnames(measurements) |
599 |
-#' modellingParams <- generateModellingParams(datasetIDs = c("clinical", "gene", "protein"), |
|
598 |
+#' modellingParams <- generateModellingParams(assayIDs = c("clinical", "gene", "protein"), |
|
600 | 599 |
#' measurements = measurements, |
601 | 600 |
#' nFeatures = list(clinical = 10, gene = 10, protein = 10), |
602 | 601 |
#' selectionMethod = list(clinical = "t-test", gene = "t-test", protein = "t-test"), |
... | ... |
@@ -604,7 +603,7 @@ checkData <- function(measurements, classes){ |
604 | 603 |
#' classifier = "randomForest", |
605 | 604 |
#' multiViewMethod = "merge") |
606 | 605 |
#' @import BiocParallel |
607 |
-generateModellingParams <- function(datasetIDs, |
|
606 |
+generateModellingParams <- function(assayIDs, |
|
608 | 607 |
measurements, |
609 | 608 |
nFeatures, |
610 | 609 |
selectionMethod, |
... | ... |
@@ -613,7 +612,7 @@ generateModellingParams <- function(datasetIDs, |
613 | 612 |
multiViewMethod = "none" |
614 | 613 |
){ |
615 | 614 |
if(multiViewMethod != "none") { |
616 |
- params <- generateMultiviewParams(datasetIDs, |
|
615 |
+ params <- generateMultiviewParams(assayIDs, |
|
617 | 616 |
measurements, |
618 | 617 |
nFeatures, |
619 | 618 |
selectionMethod, |
... | ... |
@@ -626,7 +625,7 @@ generateModellingParams <- function(datasetIDs, |
626 | 625 |
|
627 | 626 |
|
628 | 627 |
|
629 |
- if(length(datasetIDs) > 1) obsFeatures <- sum(mcols(measurements)[, "dataset"] %in% datasetIDs) |
|
628 |
+ if(length(assayIDs) > 1) obsFeatures <- sum(mcols(measurements)[, "assay"] %in% assayIDs) |
|
630 | 629 |
else obsFeatures <- ncol(measurements) |
631 | 630 |
|
632 | 631 |
|
... | ... |
@@ -705,8 +704,8 @@ generateModellingParams <- function(datasetIDs, |
705 | 704 |
|
706 | 705 |
# |
707 | 706 |
# if(multiViewMethod == "prevalidation"){ |
708 |
- # params$trainParams <- function(measurements, classes) prevalTrainInterface(measurements, classes, params) |
|
709 |
- # params$trainParams <- function(measurements, classes) prevalTrainInterface(measurements, classes, params) |
|
707 |
+ # params$trainParams <- function(measurements, outcomes) prevalTrainInterface(measurements, outcomes, params) |
|
708 |
+ # params$trainParams <- function(measurements, outcomes) prevalTrainInterface(measurements, outcomes, params) |
|
710 | 709 |
# } |
711 | 710 |
# |
712 | 711 |
|
... | ... |
@@ -718,7 +717,7 @@ generateModellingParams <- function(datasetIDs, |
718 | 717 |
|
719 | 718 |
|
720 | 719 |
|
721 |
-generateMultiviewParams <- function(datasetIDs, |
|
720 |
+generateMultiviewParams <- function(assayIDs, |
|
722 | 721 |
measurements, |
723 | 722 |
nFeatures, |
724 | 723 |
selectionMethod, |
... | ... |
@@ -730,15 +729,15 @@ generateMultiviewParams <- function(datasetIDs, |
730 | 729 |
|
731 | 730 |
if(length(classifier) > 1) classifier <- classifier[[1]] |
732 | 731 |
|
733 |
- # Split measurements up by dataset. |
|
734 |
- assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
732 |
+ # Split measurements up by assay. |
|
733 |
+ assayTrain <- sapply(assayIDs, function(assayID) if(assayID == 1) measurements else measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE) |
|
735 | 734 |
|
736 |
- # Generate params for each dataset. This could be extended to have different selectionMethods for each type |
|
737 |
- paramsDatasets <- mapply(generateModellingParams, |
|
738 |
- nFeatures = nFeatures[datasetIDs], |
|
739 |
- selectionMethod = selectionMethod[datasetIDs], |
|
740 |
- datasetIDs = datasetIDs, |
|
741 |
- measurements = assayTrain[datasetIDs], |
|
735 |
+ # Generate params for each assay. This could be extended to have different selectionMethods for each type |
|
736 |
+ paramsassays <- mapply(generateModellingParams, |
|
737 |
+ nFeatures = nFeatures[assayIDs], |
|
738 |
+ selectionMethod = selectionMethod[assayIDs], |
|
739 |
+ assayIDs = assayIDs, |
|
740 |
+ measurements = assayTrain[assayIDs], |
|
742 | 741 |
MoreArgs = list( |
743 | 742 |
selectionOptimisation = selectionOptimisation, |
744 | 743 |
classifier = classifier, |
... | ... |
@@ -746,7 +745,7 @@ generateMultiviewParams <- function(datasetIDs, |
746 | 745 |
SIMPLIFY = FALSE) |
747 | 746 |
|
748 | 747 |
# Generate some params for merged model. |
749 |
- params <- generateModellingParams(datasetIDs = datasetIDs, |
|
748 |
+ params <- generateModellingParams(assayIDs = assayIDs, |
|
750 | 749 |
measurements = measurements, |
751 | 750 |
nFeatures = nFeatures, |
752 | 751 |
selectionMethod = selectionMethod, |
... | ... |
@@ -756,7 +755,7 @@ generateMultiviewParams <- function(datasetIDs, |
756 | 755 |
|
757 | 756 |
# Update selectParams to use |
758 | 757 |
params@selectParams <- SelectParams(selectMulti, |
759 |
- params = paramsDatasets, |
|
758 |
+ params = paramsassays, |
|
760 | 759 |
characteristics = S4Vectors::DataFrame(characteristic = "Selection Name", value = "merge"), |
761 | 760 |
tuneParams = list(nFeatures = nFeatures[[1]], |
762 | 761 |
performanceType = "Balanced Error", |
... | ... |
@@ -767,16 +766,16 @@ generateMultiviewParams <- function(datasetIDs, |
767 | 766 |
|
768 | 767 |
if(multiViewMethod == "prevalidation"){ |
769 | 768 |
|
770 |
- # Split measurements up by dataset. |
|
771 |
- assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
769 |
+ # Split measurements up by assay. |
|
770 |
+ assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE) |
|
772 | 771 |
|
773 |
- # Generate params for each dataset. This could be extended to have different selectionMethods for each type |
|
774 |
- paramsDatasets <- mapply(generateModellingParams, |
|
775 |
- nFeatures = nFeatures[datasetIDs], |
|
776 |
- selectionMethod = selectionMethod[datasetIDs], |
|
777 |
- datasetIDs = datasetIDs, |
|
778 |
- measurements = assayTrain[datasetIDs], |
|
779 |
- classifier = classifier[datasetIDs], |
|
772 |
+ # Generate params for each assay. This could be extended to have different selectionMethods for each type |
|
773 |
+ paramsassays <- mapply(generateModellingParams, |
|
774 |
+ nFeatures = nFeatures[assayIDs], |
|
775 |
+ selectionMethod = selectionMethod[assayIDs], |
|
776 |
+ assayIDs = assayIDs, |
|
777 |
+ measurements = assayTrain[assayIDs], |
|
778 |
+ classifier = classifier[assayIDs], |
|
780 | 779 |
MoreArgs = list( |
781 | 780 |
selectionOptimisation = selectionOptimisation, |
782 | 781 |
multiViewMethod = "none"), |
... | ... |
@@ -786,8 +785,8 @@ generateMultiviewParams <- function(datasetIDs, |
786 | 785 |
params <- ModellingParams( |
787 | 786 |
balancing = "none", |
788 | 787 |
selectParams = NULL, |
789 |
- trainParams = TrainParams(prevalTrainInterface, params = paramsDatasets, characteristics = paramsDatasets$clinical@trainParams@characteristics), |
|
790 |
- predictParams = PredictParams(prevalPredictInterface, characteristics = paramsDatasets$clinical@predictParams@characteristics) |
|
788 |
+ trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics), |
|
789 |
+ predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics) |
|
791 | 790 |
) |
792 | 791 |
|
793 | 792 |
return(params) |
... | ... |
@@ -795,16 +794,16 @@ generateMultiviewParams <- function(datasetIDs, |
795 | 794 |
|
796 | 795 |
if(multiViewMethod == "prevalidation"){ |
797 | 796 |
|
798 |
- # Split measurements up by dataset. |
|
799 |
- assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
797 |
+ # Split measurements up by assay. |
|
798 |
+ assayTrain <- sapply(assayIDs, function(assayID) measurements[, mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE) |
|
800 | 799 |
|
801 |
- # Generate params for each dataset. This could be extended to have different selectionMethods for each type |
|
802 |
- paramsDatasets <- mapply(generateModellingParams, |
|
803 |
- nFeatures = nFeatures[datasetIDs], |
|
804 |
- selectionMethod = selectionMethod[datasetIDs], |
|
805 |
- datasetIDs = datasetIDs, |
|
806 |
- measurements = assayTrain[datasetIDs], |
|
807 |
- classifier = classifier[datasetIDs], |
|
800 |
+ # Generate params for each assay. This could be extended to have different selectionMethods for each type |
|
801 |
+ paramsassays <- mapply(generateModellingParams, |
|
802 |
+ nFeatures = nFeatures[assayIDs], |
|
803 |
+ selectionMethod = selectionMethod[assayIDs], |
|
804 |
+ assayIDs = assayIDs, |
|
805 |
+ measurements = assayTrain[assayIDs], |
|
806 |
+ classifier = classifier[assayIDs], |
|
808 | 807 |
MoreArgs = list( |
809 | 808 |
selectionOptimisation = selectionOptimisation, |
810 | 809 |
multiViewMethod = "none"), |
... | ... |
@@ -814,24 +813,24 @@ generateMultiviewParams <- function(datasetIDs, |
814 | 813 |
params <- ModellingParams( |
815 | 814 |
balancing = "none", |
816 | 815 |
selectParams = NULL, |
817 |
- trainParams = TrainParams(prevalTrainInterface, params = paramsDatasets, characteristics = paramsDatasets$clinical@trainParams@characteristics), |
|
818 |
- predictParams = PredictParams(prevalPredictInterface, characteristics = paramsDatasets$clinical@predictParams@characteristics) |
|
816 |
+ trainParams = TrainParams(prevalTrainInterface, params = paramsassays, characteristics = paramsassays$clinical@trainParams@characteristics), |
|
817 |
+ predictParams = PredictParams(prevalPredictInterface, characteristics = paramsassays$clinical@predictParams@characteristics) |
|
819 | 818 |
) |
820 | 819 |
|
821 | 820 |
return(params) |
822 | 821 |
} |
823 | 822 |
|
824 | 823 |
|
825 |
- if(multiViewMethod == "pca"){ |
|
824 |
+ if(multiViewMethod == "PCA"){ |
|
826 | 825 |
|
827 |
- # Split measurements up by dataset. |
|
828 |
- assayTrain <- sapply(datasetIDs, function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
826 |
+ # Split measurements up by assay. |
|
827 |
+ assayTrain <- sapply(assayIDs, function(assayID) measurements[,mcols(measurements)[["assay"]] %in% assayID], simplify = FALSE) |
|
829 | 828 |
|
830 |
- # Generate params for each dataset. This could be extended to have different selectionMethods for each type |
|
829 |
+ # Generate params for each assay. This could be extended to have different selectionMethods for each type |
|
831 | 830 |
paramsClinical <- list(clinical = generateModellingParams( |
832 | 831 |
nFeatures = nFeatures["clinical"], |
833 | 832 |
selectionMethod = selectionMethod["clinical"], |
834 |
- datasetIDs = "clinical", |
|
833 |
+ assayIDs = "clinical", |
|
835 | 834 |
measurements = assayTrain[["clinical"]], |
836 | 835 |
classifier = classifier["clinical"], |
837 | 836 |
selectionOptimisation = selectionOptimisation, |
... | ... |
@@ -852,13 +851,14 @@ generateMultiviewParams <- function(datasetIDs, |
852 | 851 |
|
853 | 852 |
|
854 | 853 |
CV <- function(measurements, |
855 |
- classes, |
|
854 |
+ outcomes, |
|
855 |
+ assayIDs, |
|
856 | 856 |
nFeatures = NULL, |
857 | 857 |
selectionMethod = "t-test", |
858 | 858 |
selectionOptimisation = "Resubstitution", |
859 | 859 |
classifier = "elasticNetGLM", |
860 | 860 |
multiViewMethod = "none", |
861 |
- dataCombinations = NULL, |
|
861 |
+ assayCombinations = NULL, |
|
862 | 862 |
nFolds = 5, |
863 | 863 |
nRepeats = 100, |
864 | 864 |
nCores = 1, |
... | ... |
@@ -866,7 +866,7 @@ CV <- function(measurements, |
866 | 866 |
|
867 | 867 |
{ |
868 | 868 |
# Check that data is in the right format |
869 |
- checkData(measurements, classes) |
|
869 |
+ checkData(measurements, outcomes) |
|
870 | 870 |
|
871 | 871 |
# Check that other variables are in the right format and fix |
872 | 872 |
nFeatures <- cleanNFeatures(nFeatures = nFeatures, |
... | ... |
@@ -877,9 +877,6 @@ CV <- function(measurements, |
877 | 877 |
measurements = measurements) |
878 | 878 |
|
879 | 879 |
# Which data-types or data-views are present? |
880 |
- datasetIDs <- unique(mcols(measurements)[, "dataset"]) |
|
881 |
- if(is.null(datasetIDs)) datasetIDs <- 1 |
|
882 |
- if(is.null(dataCombinations)) dataCombinations <- datasetIDs |
|
883 | 880 |
if(is.null(characteristicsLabel)) characteristicsLabel <- "none" |
884 | 881 |
|
885 | 882 |
# Setup cross-validation parameters including |
... | ... |
@@ -890,7 +887,7 @@ CV <- function(measurements, |
890 | 887 |
) |
891 | 888 |
|
892 | 889 |
# Turn text into TrainParams and TestParams objects |
893 |
- modellingParams <- generateModellingParams(datasetIDs = datasetIDs, |
|
890 |
+ modellingParams <- generateModellingParams(assayIDs = assayIDs, |
|
894 | 891 |
measurements = measurements, |
895 | 892 |
nFeatures = nFeatures, |
896 | 893 |
selectionMethod = selectionMethod, |
... | ... |
@@ -898,12 +895,12 @@ CV <- function(measurements, |
898 | 895 |
classifier = classifier, |
899 | 896 |
multiViewMethod = multiViewMethod |
900 | 897 |
) |
898 |
+ if(assayIDs != 1) assayText <- assayIDs else if(!is.null(attr(measurements, "assayName"))) assayText <- attr(measurements, "assayName") else assayText <- NULL |
|
899 |
+ characteristics <- S4Vectors::DataFrame(characteristic = c(if(!is.null(assayText)) "Assay Name" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(!is.null(assayText)) paste(assayText, collapse = ", ") else NULL, paste(classifier, collapse = ", "), paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel)) |
|
901 | 900 |
|
902 |
- characteristics = S4Vectors::DataFrame(characteristic = c(if(length(datasetIDs) > 1) "Data Set" else NULL, "Classifier Name", "Selection Name", "multiViewMethod", "characteristicsLabel"), value = c(if(length(datasetIDs) > 1) paste(datasetIDs, collapse = ", ") else NULL, paste(classifier, collapse = ", "), paste(selectionMethod, collapse = ", "), multiViewMethod, characteristicsLabel)) |
|
903 |
- |
|
904 |
- classifyResults <- runTests(measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics) |
|
901 |
+ classifyResults <- runTests(measurements, outcomes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics) |
|
905 | 902 |
|
906 |
- fullResult <- runTest(measurements, classes, measurements, classes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1) |
|
903 |
+ fullResult <- runTest(measurements, outcomes, measurements, outcomes, crossValParams = crossValParams, modellingParams = modellingParams, characteristics = characteristics, .iteration = 1) |
|
907 | 904 |
|
908 | 905 |
classifyResults@finalModel <- list(fullResult$models) |
909 | 906 |
classifyResults |
... | ... |
@@ -915,7 +912,7 @@ CV <- function(measurements, |
915 | 912 |
|
916 | 913 |
|
917 | 914 |
|
918 |
-simplifyResults <- function(results, values = c("dataset", "classifier", "selectionMethod", "multiViewMethod")){ |
|
915 |
+simplifyResults <- function(results, values = c("assay", "classifier", "selectionMethod", "multiViewMethod")){ |
|
919 | 916 |
ch <- sapply(results, function(x) x@characteristics[x@characteristics$characteristic %in% values, "value"], simplify = TRUE) |
920 | 917 |
ch <- data.frame(t(ch)) |
921 | 918 |
results[!duplicated(ch)] |
... | ... |
@@ -932,4 +929,3 @@ setMethod("predict", "ClassifyResult", |
932 | 929 |
|
933 | 930 |
|
934 | 931 |
|
935 |
- |
... | ... |
@@ -103,7 +103,7 @@ setMethod("distribution", "ClassifyResult", |
103 | 103 |
allFeaturesText <- allFeatures |
104 | 104 |
} else if(is(chosenFeatures[[1]], "DataFrame")) { |
105 | 105 |
allFeatures <- do.call(rbind, chosenFeatures) |
106 |
- allFeaturesText <- paste(allFeatures[, "dataset"], allFeatures[, "feature"], sep = ':') |
|
106 |
+ allFeaturesText <- paste(allFeatures[, "Original Assay"], allFeatures[, "Original Feature"], sep = ':') |
|
107 | 107 |
} else if("Pairs" %in% class(chosenFeatures[[1]])) { |
108 | 108 |
allFeatures <- do.call(c, unname(chosenFeatures)) |
109 | 109 |
allFeaturesText <- paste(first(allFeatures), second(allFeatures), sep = ', ') |
... | ... |
@@ -153,7 +153,7 @@ setMethod("distribution", "ClassifyResult", |
153 | 153 |
if(isPairs) # Make it DataFrame for counting of the occurrences. |
154 | 154 |
allFeatures <- as(allFeatures, "DataFrame") |
155 | 155 |
|
156 |
- summaryTable <- aggregate(list(count = rep(1, nrow(allFeatures))), as.data.frame(allFeatures), length) |
|
156 |
+ summaryTable <- aggregate(list(count = rep(1, nrow(allFeatures))), as.data.frame(allFeatures, optional = TRUE), length) |
|
157 | 157 |
|
158 | 158 |
if(summaryType == "percentage") |
159 | 159 |
{ |
... | ... |
@@ -166,9 +166,7 @@ setMethod("distribution", "ClassifyResult", |
166 | 166 |
pairsSummary <- S4Vectors::Pairs(summaryTable[, "first"], summaryTable[, "second"], summaryTable[, 3]) |
167 | 167 |
colnames(mcols(pairsSummary)) <- colnames(summaryTable[, 3]) |
168 | 168 |
return(pairsSummary) |
169 |
- } else { # A table of dataset and feature. |
|
170 |
- if(all(summaryTable[, "dataset"] == "dataset")) # Just return a vector and get rid of unnecessary dataset. |
|
171 |
- summaryTable <- setNames(summaryTable[, 3], summaryTable[, 2]) |
|
169 |
+ } else { # A table of assay and feature. |
|
172 | 170 |
summaryTable |
173 | 171 |
} |
174 | 172 |
} |
... | ... |
@@ -165,8 +165,8 @@ setMethod("featureSetSummary", "MultiAssayExperiment", # Pick one numeric table |
165 | 165 |
if(class(featureSets) != "FeatureSetCollection") |
166 | 166 |
stop("'featureSets' is not of type FeatureSetCollection but must be.") |
167 | 167 |
|
168 |
- datasetUsed <- measurements[[target]] |
|
169 |
- assayedFeatures <- rownames(datasetUsed) |
|
168 |
+ assayUsed <- measurements[[target]] |
|
169 |
+ assayedFeatures <- rownames(assayUsed) |
|
170 | 170 |
featureSets <- featureSets@sets |
171 | 171 |
keepSets <- sapply(featureSets, function(featureSet) |
172 | 172 |
length(intersect(featureSet, assayedFeatures)) / length(featureSet) * 100 > minimumOverlapPercent) |
... | ... |
@@ -194,7 +194,7 @@ setMethod("featureSetSummary", "MultiAssayExperiment", # Pick one numeric table |
194 | 194 |
message("Summarising features to feature sets.") |
195 | 195 |
|
196 | 196 |
# Transform measurements into one feature per set. |
197 |
- transformed <- apply(datasetUsed, 2, function(sampleMeasurements) |
|
197 |
+ transformed <- apply(assayUsed, 2, function(sampleMeasurements) |
|
198 | 198 |
{ |
199 | 199 |
sapply(featureSets, function(featureSet) locationFunction(sampleMeasurements[featureSet])) |
200 | 200 |
}) |
... | ... |
@@ -135,7 +135,7 @@ setMethod("elasticNetGLMtrainInterface", "DataFrame", function(measurementsTrain |
135 | 135 |
fitted |
136 | 136 |
}) |
137 | 137 |
|
138 |
-# One or more omics datasets, possibly with sample information data. |
|
138 |
+# One or more omics assays, possibly with sample information data. |
|
139 | 139 |
#' @rdname elasticNetGLM |
140 | 140 |
#' @export |
141 | 141 |
setMethod("elasticNetGLMtrainInterface", "MultiAssayExperiment", |
... | ... |
@@ -109,7 +109,7 @@ setMethod("GLMtrainInterface", "DataFrame", function(measurementsTrain, classesT |
109 | 109 |
glm(class ~ . + 0, family = binomial, data = fitData, ...) |
110 | 110 |
}) |
111 | 111 |
|
112 |
-# One or more omics datasets, possibly with sample information data. |
|
112 |
+# One or more omics assays, possibly with sample information data. |
|
113 | 113 |
#' @rdname GLM |
114 | 114 |
#' @export |
115 | 115 |
setMethod("GLMtrainInterface", "MultiAssayExperiment", |
... | ... |
@@ -19,15 +19,15 @@ setMethod("pcaTrainInterface", "DFrame", |
19 | 19 |
### |
20 | 20 |
# Splitting measurements into a list of each of the datasets |
21 | 21 |
### |
22 |
- assayTrain <- sapply(unique(mcols(measurements)[["dataset"]]), function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
22 |
+ assayTrain <- sapply(unique(mcols(measurements)[["assay"]]), function(assay) measurements[, mcols(measurements)[["assay"]] %in% assay], simplify = FALSE) |
|
23 | 23 |
|
24 |
- if(! "clinical" %in% names(assayTrain)) stop("Must have a dataset called `clinical`") |
|
24 |
+ if(!"clinical" %in% names(assayTrain)) stop("Must have an assay called \"clinical\"") |
|
25 | 25 |
|
26 | 26 |
# Create generic crossValParams just to get things working, might be used for optimising features in runTest later??? |
27 | 27 |
CVparams <- CrossValParams(permutations = 1, folds = 10, parallelParams = SerialParam(RNGseed = .Random.seed[1]), tuneMode = "Resubstitution") |
28 | 28 |
|
29 | 29 |
### |
30 |
- # Run PCA for all datasets except clinical |
|
30 |
+ # Run PCA for all assays except clinical |
|
31 | 31 |
### |
32 | 32 |
usePCA<- names(assayTrain)[names(assayTrain)!="clinical"] |
33 | 33 |
assayPCA <- sapply(assayTrain[usePCA], function(assay){ |
... | ... |
@@ -50,7 +50,7 @@ setMethod("pcaTrainInterface", "DFrame", |
50 | 50 |
### |
51 | 51 |
|
52 | 52 |
pcaVar <- S4Vectors::DataFrame(pcaVar) |
53 |
- mcols(pcaVar)$dataset = "pca" |
|
53 |
+ mcols(pcaVar)$assay = "PCA" |
|
54 | 54 |
mcols(pcaVar)$feature = colnames(pcaVar) |
55 | 55 |
|
56 | 56 |
fullTrain = cbind(assayTrain[["clinical"]], pcaVar) |
... | ... |
@@ -116,8 +116,8 @@ setMethod("pcaPredictInterface", c("pcaModel", "DFrame"), |
116 | 116 |
# Pull out my classification model |
117 | 117 |
fullModel <- fullModel@fullModel[[1]] |
118 | 118 |
|
119 |
- #Split my test data into a list of the different datasets |
|
120 |
- assayTest <- sapply(unique(mcols(test)[["dataset"]]), function(x) test[,mcols(test)[["dataset"]]%in%x], simplify = FALSE) |
|
119 |
+ #Split my test data into a list of the different assays |
|
120 |
+ assayTest <- sapply(unique(mcols(test)[["assay"]]), function(assay) test[, mcols(test)[["assay"]] %in% assay], simplify = FALSE) |
|
121 | 121 |
|
122 | 122 |
# Pull out my PCA models |
123 | 123 |
pcaModels <- fullModel$pcaModels |
... | ... |
@@ -131,7 +131,7 @@ setMethod("pcaPredictInterface", c("pcaModel", "DFrame"), |
131 | 131 |
pcaVar <- do.call(cbind, pcaVar) |
132 | 132 |
|
133 | 133 |
pcaVar <- S4Vectors::DataFrame(pcaVar) |
134 |
- mcols(pcaVar)$dataset = "pca" |
|
134 |
+ mcols(pcaVar)$assay = "PCA" |
|
135 | 135 |
mcols(pcaVar)$feature = colnames(pcaVar) |
136 | 136 |
|
137 | 137 |
# Merge my PCA stuff with my clinical data |
... | ... |
@@ -54,11 +54,11 @@ setMethod("prevalTrainInterface", "DFrame", |
54 | 54 |
{ |
55 | 55 |
|
56 | 56 |
### |
57 |
- # Splitting measurements into a list of each of the datasets |
|
57 |
+ # Splitting measurements into a list of each of the assays |
|
58 | 58 |
### |
59 |
- assayTrain <- sapply(unique(mcols(measurements)[["dataset"]]), function(x) measurements[,mcols(measurements)[["dataset"]]%in%x], simplify = FALSE) |
|
59 |
+ assayTrain <- sapply(unique(mcols(measurements)[["assay"]]), function(assay) measurements[, mcols(measurements)[["dataset"]] %in% assay], simplify = FALSE) |
|
60 | 60 |
|
61 |
- if(! "clinical" %in% names(assayTrain)) stop("Must have a dataset called `clinical`") |
|
61 |
+ if(!"clinical" %in% names(assayTrain)) stop("Must have an assay called \"clinical\"") |
|
62 | 62 |
|
63 | 63 |
# Create generic crossValParams to use for my prevalidation... should add this as a input parameter |
64 | 64 |
CVparams <- CrossValParams(permutations = 1, folds = 10, parallelParams = SerialParam(RNGseed = .Random.seed[1]), tuneMode = "Resubstitution") |
... | ... |
@@ -66,7 +66,7 @@ setMethod("prevalTrainInterface", "DFrame", |
66 | 66 |
### |
67 | 67 |
# Fit a classification model for each non-clinical datasets, pulling models from "params" |
68 | 68 |
### |
69 |
- usePreval <- names(assayTrain)[names(assayTrain)!="clinical"] |
|
69 |
+ usePreval <- names(assayTrain)[names(assayTrain) != "clinical"] |
|
70 | 70 |
assayTests <- bpmapply( |
71 | 71 |
runTests, |
72 | 72 |
measurements = assayTrain[usePreval], |
... | ... |
@@ -77,7 +77,7 @@ setMethod("prevalTrainInterface", "DFrame", |
77 | 77 |
verbose = 0 |
78 | 78 |
), |
79 | 79 |
BPPARAM = SerialParam(RNGseed = .Random.seed[1])) |> |
80 |
- sapply(function(x)x@predictions, simplify = FALSE) |
|
80 |
+ sapply(function(result) result@predictions, simplify = FALSE) |
|
81 | 81 |
|
82 | 82 |
### |
83 | 83 |
# Pull-out prevalidated vectors ie. the predictions on each of the test folds. |
... | ... |
@@ -98,7 +98,7 @@ setMethod("prevalTrainInterface", "DFrame", |
98 | 98 |
#fullTrain = cbind(assayTrain[["clinical"]][,selectedFeaturesClinical], prevalidationTrain[rownames(assayTrain[["clinical"]]), , drop = FALSE]) |
99 | 99 |
|
100 | 100 |
prevalidationTrain <- S4Vectors::DataFrame(prevalidationTrain) |
101 |
- mcols(prevalidationTrain)$dataset = "pca" |
|
101 |
+ mcols(prevalidationTrain)$assay = "PCA" |
|
102 | 102 |
mcols(prevalidationTrain)$feature = colnames(prevalidationTrain) |
103 | 103 |
|
104 | 104 |
|
... | ... |
@@ -176,7 +176,7 @@ setMethod("prevalPredictInterface", c("prevalModel", "DFrame"), |
176 | 176 |
) |
177 | 177 |
{ |
178 | 178 |
fullModel <- fullModel@fullModel[[1]] |
179 |
- assayTest <- sapply(unique(mcols(test)[["dataset"]]), function(x) test[,mcols(test)[["dataset"]]%in%x], simplify = FALSE) |
|
179 |
+ assayTest <- sapply(unique(mcols(test)[["assay"]]), function(assay) test[, mcols(test)[["assay"]] %in% assay], simplify = FALSE) |
|
180 | 180 |
|
181 | 181 |
prevalidationModels <- fullModel$prevalidationModels |
182 | 182 |
modelPredictionFunctions <- fullModel$modellingParams |
... | ... |
@@ -191,7 +191,7 @@ setMethod("prevalPredictInterface", c("prevalModel", "DFrame"), |
191 | 191 |
extractPrevalidation() |
192 | 192 |
|
193 | 193 |
prevalidationPredict <- S4Vectors::DataFrame(prevalidationPredict) |
194 |
- mcols(prevalidationPredict)$dataset = "pca" |
|
194 |
+ mcols(prevalidationPredict)$assay = "PCA" |
|
195 | 195 |
mcols(prevalidationPredict)$feature = colnames(prevalidationPredict) |
196 | 196 |
|
197 | 197 |
fullTest = cbind(assayTest[["clinical"]], prevalidationPredict[rownames(assayTest[["clinical"]]), , drop = FALSE]) |
... | ... |
@@ -82,7 +82,7 @@ |
82 | 82 |
#' legends' titles. The fifth number is the font size of the legend labels. |
83 | 83 |
#' @param colours The colours to plot data of each class in. The length of this |
84 | 84 |
#' vector must be as long as the distinct number of classes in the data set. |
85 |
-#' @param showDatasetName Logical. Default: \code{TRUE}. If \code{TRUE} and the |
|
85 |
+#' @param showAssayName Logical. Default: \code{TRUE}. If \code{TRUE} and the |
|
86 | 86 |
#' data is in a \code{MultiAssayExperiment} object, the the name of the table |
87 | 87 |
#' in which the feature is stored in is added to the plot title. |
88 | 88 |
#' @param plot Logical. Default: \code{TRUE}. If \code{TRUE}, a plot is |
... | ... |
@@ -129,7 +129,7 @@ |
129 | 129 |
#' genesMatrix <- t(genesMatrix) # MultiAssayExperiment needs features in rows. |
130 | 130 |
#' dataContainer <- MultiAssayExperiment(list(RNA = genesMatrix), |
131 | 131 |
#' colData = cbind(clinicalData, class = classes)) |
132 |
-#' targetFeatures <- DataFrame(dataset = "RNA", feature = "Gene 50") |
|
132 |
+#' targetFeatures <- DataFrame(assay = "RNA", feature = "Gene 50") |
|
133 | 133 |
#' plotFeatureClasses(dataContainer, targets = targetFeatures, classesColumn = "class", |
134 | 134 |
#' groupBy = c("sampleInfo", "Gender"), # Table name, feature name. |
135 | 135 |
#' xAxisLabel = bquote(log[2]*'(expression)'), dotBinWidth = 0.5) |
... | ... |
@@ -161,7 +161,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar |
161 | 161 |
xLabelPositions = "auto", yLabelPositions = "auto", |
162 | 162 |
fontSizes = c(24, 16, 12, 12, 12), |
163 | 163 |
colours = c("#3F48CC", "#880015"), |
164 |
- showDatasetName = TRUE, plot = TRUE) |
|
164 |
+ showAssayName = TRUE, plot = TRUE) |
|
165 | 165 |
{ |
166 | 166 |
if(missing(targets)) |
167 | 167 |
stop("'targets' must be specified.") |
... | ... |
@@ -197,9 +197,6 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar |
197 | 197 |
|
198 | 198 |
# Subsetting of measurements to the features of interest. |
199 | 199 |
|
200 |
- # Remove unnecessary dataset column for a single dataset. |
|
201 |
- if(!is.null(ncol(targets)) && all(targets[, "dataset"] == "dataset")) targets <- targets[, "feature"] |
|
202 |
- |
|
203 | 200 |
if(!is(targets, "DataFrame")) |
204 | 201 |
{ |
205 | 202 |
if(!"Pairs" %in% class(targets)) # A simple vector. |
... | ... |
@@ -224,9 +221,9 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar |
224 | 221 |
featureText <- colnames(measurements)[columnIndex] |
225 | 222 |
} else { |
226 | 223 |
featureText <- S4Vectors::mcols(measurements)[columnIndex, "feature"] |
227 |
- if(showDatasetName == TRUE && !all(S4Vectors::mcols(measurements)[, "dataset"] == "dataset")) |
|
224 |
+ if(showAssayName == TRUE && !all(S4Vectors::mcols(measurements)[, "assay"] == "assay")) |
|
228 | 225 |
{ |
229 |
- featureText <- paste(featureText, paste('(', S4Vectors::mcols(measurements)[columnIndex, "dataset"], ')', sep = '')) |
|
226 |
+ featureText <- paste(featureText, paste('(', S4Vectors::mcols(measurements)[columnIndex, "assay"], ')', sep = '')) |
|
230 | 227 |
} |
231 | 228 |
} |
232 | 229 |
|
... | ... |
@@ -363,7 +360,7 @@ setMethod("plotFeatureClasses", "DataFrame", function(measurements, classes, tar |
363 | 360 |
#' @rdname plotFeatureClasses |
364 | 361 |
#' @export |
365 | 362 |
setMethod("plotFeatureClasses", "MultiAssayExperiment", |
366 |
- function(measurements, targets, classesColumn, groupBy = NULL, groupingName = NULL, showDatasetName = TRUE, ...) |
|
363 |
+ function(measurements, targets, classesColumn, groupBy = NULL, groupingName = NULL, showAssayName = TRUE, ...) |
|
367 | 364 |
{ |
368 | 365 |
if(missing(targets)) |
369 | 366 |
stop("'targets' must be specified by the user.") |
... | ... |
@@ -385,7 +382,7 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment", |
385 | 382 |
groupBy <- MultiAssayExperiment::colData(measurements)[, groupBy[2]] |
386 | 383 |
} else { # One of the omics tables. |
387 | 384 |
groupBy <- measurements[groupBy[2], , groupingTable] |
388 |
- if(showDatasetName == TRUE) |
|
385 |
+ if(showAssayName == TRUE) |
|
389 | 386 |
groupingName <- paste(groupingName, groupingTable) |
390 | 387 |
} |
391 | 388 |
levelsOrder <- levels(groupBy) |
... | ... |
@@ -399,11 +396,11 @@ setMethod("plotFeatureClasses", "MultiAssayExperiment", |
399 | 396 |
measurements <- MultiAssayExperiment::wideFormat(measurements, colDataCols = seq_along(MultiAssayExperiment::colData(measurements)), check.names = FALSE, collapse = ':') |
400 | 397 |
measurements <- measurements[, -1, drop = FALSE] # Remove sample IDs. |
401 | 398 |
S4Vectors::mcols(measurements)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(measurements)[, "sourceName"]) |
402 |
- colnames(S4Vectors::mcols(measurements))[1] <- "dataset" |
|
399 |
+ colnames(S4Vectors::mcols(measurements))[1] <- "assay" |
|
403 | 400 |
S4Vectors::mcols(measurements)[, "feature"] <- S4Vectors::mcols(measurements)[, "rowname"] |
404 | 401 |
missingIndices <- is.na(S4Vectors::mcols(measurements)[, "feature"]) |
405 | 402 |
S4Vectors::mcols(measurements)[missingIndices, "feature"] <- colnames(measurements)[missingIndices] |
406 |
- S4Vectors::mcols(measurements) <- S4Vectors::mcols(measurements)[, c("dataset", "feature")] |
|
403 |
+ S4Vectors::mcols(measurements) <- S4Vectors::mcols(measurements)[, c("assay", "feature")] |
|
407 | 404 |
|
408 |
- plotFeatureClasses(measurements, classes, S4Vectors::mcols(measurements), groupBy, groupingName, showDatasetName = showDatasetName, ...) |
|
405 |
+ plotFeatureClasses(measurements, classes, S4Vectors::mcols(measurements), groupBy, groupingName, showAssayName = showAssayName, ...) |
|
409 | 406 |
}) |
410 | 407 |
\ No newline at end of file |
... | ... |
@@ -100,9 +100,9 @@ setMethod("previousSelection", "DataFrame", |
100 | 100 |
IDsRows <- match(previousIDs, featuresInfo(classifyResult)[, "Original Feature"]) |
101 | 101 |
safeIDs <- featuresInfo(classifyResult)[IDsRows, "Renamed Feature"] |
102 | 102 |
} else { # A data frame describing the data set and variable name of the chosen feature. |
103 |
- featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("dataset", "feature")]) |
|
104 |
- IDsRows <- match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Dataset", "Original Feature")]))) |
|
105 |
- safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Dataset", "Renamed Feature")]) |
|
103 |
+ featuresIDs <- do.call(paste, S4Vectors::mcols(measurementsTrain)[, c("assay", "feature")]) |
|
104 |
+ IDsRows <- match(do.call(paste, previousIDs), do.call(paste(featuresInfo(classifyResult)[, c("Original Assay", "Original Feature")]))) |
|
105 |
+ safeIDs <- do.call(paste, featuresInfo(classifyResult)[IDsRows, c("Renamed Assay", "Renamed Feature")]) |
|
106 | 106 |
} |
107 | 107 |
|
108 | 108 |
commonFeatures <- intersect(safeIDs, featuresIDs) |
... | ... |
@@ -92,7 +92,7 @@ setMethod("runTest", "DataFrame", # Sample information data or one of the other |
92 | 92 |
function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest, |
93 | 93 |
crossValParams = CrossValParams(), # crossValParams might be used for tuning optimisation. |
94 | 94 |
modellingParams = ModellingParams(), characteristics = S4Vectors::DataFrame(), verbose = 1, .iteration = NULL) |
95 |
-{ |
|
95 |
+{if(!is.null(.iteration) && .iteration != "internal") |
|
96 | 96 |
if(is.null(.iteration)) # Not being called by runTests but by user. So, check the user input. |
97 | 97 |
{ |
98 | 98 |
if(is.null(rownames(measurementsTrain))) |
... | ... |
@@ -117,9 +117,9 @@ function(measurementsTrain, outcomesTrain, measurementsTest, outcomesTest, |
117 | 117 |
featuresInfo <- .summaryFeatures(measurementsTrain) |
118 | 118 |
if(!is.null(S4Vectors::mcols(measurementsTrain))) |
119 | 119 |
{ |
120 |
- S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")] |
|
121 |
- S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Dataset", "Renamed Feature")] |
|
122 |
- colnames(measurementsTrain) <- colnames(measurementsTest) <- paste(featuresInfo[["Renamed Dataset"]], featuresInfo[["Renamed Feature"]], sep = '') |
|
120 |
+ S4Vectors::mcols(measurementsTrain) <- featuresInfo[, c("Renamed Assay", "Renamed Feature")] |
|
121 |
+ S4Vectors::mcols(measurementsTest) <- featuresInfo[, c("Renamed Assay", "Renamed Feature")] |
|
122 |
+ colnames(measurementsTrain) <- colnames(measurementsTest) <- paste(featuresInfo[["Renamed Assay"]], featuresInfo[["Renamed Feature"]], sep = '') |
|
123 | 123 |
} else { |
124 | 124 |
colnames(measurementsTrain) <- colnames(measurementsTest) <- featuresInfo[, "Renamed Feature"] |
125 | 125 |
} |
... | ... |
@@ -257,7 +257,7 @@ input data. Autmomatically reducing to smaller number.") |
257 | 257 |
predictedOutcomes <- predictedOutcomes[, na.omit(match(c("class", "risk"), colnames(predictedOutcomes)))] |
258 | 258 |
performanceChanges <- round(performancesWithoutEach - calcExternalPerformance(outcomesTest, predictedOutcomes, performanceType), 2) |
259 | 259 |
|
260 |
- if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
260 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Assay", "Original Feature")] |
|
261 | 261 |
importanceTable <- DataFrame(selectedFeatures, performanceChanges) |
262 | 262 |
if(ncol(importanceTable) == 2) colnames(importanceTable)[1] <- "feature" |
263 | 263 |
colnames(importanceTable)[ncol(importanceTable)] <- paste("Change in", performanceType) |
... | ... |
@@ -271,11 +271,11 @@ input data. Autmomatically reducing to smaller number.") |
271 | 271 |
{ |
272 | 272 |
if(!is.null(rankedFeaturesIndices)) |
273 | 273 |
{ |
274 |
- if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
274 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) rankedFeatures <- featuresInfo[rankedFeaturesIndices, "Original Feature"] else rankedFeatures <- featuresInfo[rankedFeaturesIndices, c("Original Assay", "Original Feature")] |
|
275 | 275 |
} else { rankedFeatures <- NULL} |
276 | 276 |
if(!is.null(selectedFeaturesIndices)) |
277 | 277 |
{ |
278 |
- if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Dataset", "Original Feature")] |
|
278 |
+ if(is.null(S4Vectors::mcols(measurementsTrain))) selectedFeatures <- featuresInfo[selectedFeaturesIndices, "Original Feature"] else selectedFeatures <- featuresInfo[selectedFeaturesIndices, c("Original Assay", "Original Feature")] |
|
279 | 279 |
} else { selectedFeatures <- NULL} |
280 | 280 |
} else { # Nested use in feature selection. No feature selection in inner execution, so ignore features. |
281 | 281 |
rankedFeatures <- selectedFeatures <- NULL |
... | ... |
@@ -51,7 +51,7 @@ |
51 | 51 |
#' selectParams <- SelectParams(differentMeansRanking, tuneParams = tuneList) |
52 | 52 |
#' modellingParams <- ModellingParams(selectParams = selectParams) |
53 | 53 |
#' runTests(measurements, classes, CVparams, modellingParams, |
54 |
-#' DataFrame(characteristic = c("Dataset Name", "Classifier Name"), |
|
54 |
+#' DataFrame(characteristic = c("Assay Name", "Classifier Name"), |
|
55 | 55 |
#' value = c("Asthma", "Different Means")) |
56 | 56 |
#' ) |
57 | 57 |
#' #} |
... | ... |
@@ -220,7 +220,7 @@ setMethod("samplesMetricMap", "list", |
220 | 220 |
legend.position = ifelse(showLegends, "right", "none"), |
221 | 221 |
legend.key.size = legendSize) |
222 | 222 |
} else # Numeric data about the samples. |
223 |
- {#browser() |
|
223 |
+ { |
|
224 | 224 |
featureValuesData <- data.frame(measurements = featureValues, Class = 1) |
225 | 225 |
if(metric != "Sample C-index") featureValuesData[, "Class"] <- knownClasses |
226 | 226 |
featureValuesPlot <- ggplot2::ggplot(featureValuesData, environment = environment()) + |
... | ... |
@@ -5,8 +5,7 @@ setGeneric("selectMulti", function(measurementsTrain, classesTrain, params, ...) |
5 | 5 |
setMethod("selectMulti", "DataFrame", |
6 | 6 |
function(measurementsTrain, classesTrain, params, verbose = 0) |
7 | 7 |
{ |
8 |
- assayTrain <- sapply(unique(mcols(measurementsTrain)[["Renamed Dataset"]]), function(x) measurementsTrain[, mcols(measurementsTrain)[["Renamed Dataset"]] %in% x], simplify = FALSE) |
|
9 |
- |
|
8 |
+ assayTrain <- sapply(unique(mcols(measurementsTrain)[["Renamed Assay"]]), function(assay) measurementsTrain[, mcols(measurementsTrain)[["Renamed Assay"]] %in% assay], simplify = FALSE) |
|
10 | 9 |
featuresIndices <- mapply(.doSelection, |
11 | 10 |
measurements = assayTrain, |
12 | 11 |
modellingParams = params, |
... | ... |
@@ -238,12 +238,10 @@ setMethod("selectionPlot", "list", |
238 | 238 |
} else { |
239 | 239 |
summaryTable <- result@importance |
240 | 240 |
} |
241 |
- if("dataset" %in% colnames(summaryTable) && all(summaryTable[, "dataset"] == "dataset")) |
|
242 |
- summaryTable <- summaryTable[, -match("dataset", colnames(summaryTable))] |
|
243 |
- if("dataset" %in% colnames(summaryTable)) |
|
241 |
+ if("assay" %in% colnames(summaryTable)) |
|
244 | 242 |
{ |
245 |
- summaryTable[, "feature"] <- paste(summaryTable[, "dataset"], summaryTable[, "feature"]) |
|
246 |
- summaryTable <- summaryTable[, -match("dataset", colnames(summaryTable))] |
|
243 |
+ summaryTable[, "feature"] <- paste(summaryTable[, "assay"], summaryTable[, "feature"]) |
|
244 |
+ summaryTable <- summaryTable[, -match("assay", colnames(summaryTable))] |
|
247 | 245 |
} |
248 | 246 |
summaryTable |
249 | 247 |
})) |
... | ... |
@@ -100,7 +100,7 @@ |
100 | 100 |
dataTable <- MultiAssayExperiment::wideFormat(measurements, colDataCols = union(sampleInfoColumnsTrain, outcomesColumns), check.names = FALSE, collapse = ':') |
101 | 101 |
rownames(dataTable) <- dataTable[, "primary"] |
102 | 102 |
S4Vectors::mcols(dataTable)[, "sourceName"] <- gsub("colDataCols", "sampleInfo", S4Vectors::mcols(dataTable)[, "sourceName"]) |
103 |
- colnames(S4Vectors::mcols(dataTable))[1] <- "dataset" |
|
103 |
+ colnames(S4Vectors::mcols(dataTable))[1] <- "assay" |
|
104 | 104 |
|
105 | 105 |
# Sample information variable names not included in column metadata of wide table but only as row names of it. |
106 | 106 |
# Create a combined column named "feature" which has feature names of the assays as well as the sample information. |
... | ... |
@@ -109,7 +109,7 @@ |
109 | 109 |
S4Vectors::mcols(dataTable)[missingIndices, "feature"] <- colnames(dataTable)[missingIndices] |
110 | 110 |
|
111 | 111 |
# Finally, a column annotation recording variable name and which table it originated from for all of the source tables. |
112 |
- S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("dataset", "feature")] |
|
112 |
+ S4Vectors::mcols(dataTable) <- S4Vectors::mcols(dataTable)[, c("assay", "feature")] |
|
113 | 113 |
} else { # Must have only been sample information data. |
114 | 114 |
dataTable <- MultiAssayExperiment::colData(measurements) |
115 | 115 |
} |
... | ... |
@@ -569,24 +569,23 @@ |
569 | 569 |
{ |
570 | 570 |
originalInfo <- S4Vectors::mcols(measurements) |
571 | 571 |
featureNames <- S4Vectors::mcols(measurements)[, "feature"] |
572 |
- datasets <- unique(S4Vectors::mcols(measurements)[, "dataset"]) |
|
572 |
+ assays <- unique(S4Vectors::mcols(measurements)[, "assay"]) |
|
573 | 573 |
renamedInfo <- S4Vectors::mcols(measurements) |
574 |
- renamedDatasets <- paste("Dataset", seq_along(datasets), sep = '') |
|
575 |
- for(dataset in datasets) |
|
574 |
+ renamedAssays <- paste("Assay", seq_along(assays), sep = '') |
|
575 |
+ for(assay in assays) |
|
576 | 576 |
{ |
577 |
- rowsDataset <- which(renamedInfo[, "dataset"] == dataset) |
|
578 |
- renamedInfo[rowsDataset, "feature"] <- paste("Feature", seq_along(rowsDataset), sep = '') |
|
579 |
- renamedInfo[rowsDataset, "dataset"] <- renamedDatasets[match(dataset, datasets)] |
|
577 |
+ rowsAssay <- which(renamedInfo[, "assay"] == assay) |
|
578 |
+ renamedInfo[rowsAssay, "feature"] <- paste("Feature", seq_along(rowsAssay), sep = '') |
|
579 |
+ renamedInfo[rowsAssay, "assay"] <- renamedAssays[match(assay, assays)] |
|
580 | 580 |
} |
581 | 581 |
featuresInfo <- DataFrame(originalInfo, renamedInfo) |
582 |
- colnames(featuresInfo) <- c("Original Dataset", "Original Feature", "Renamed Dataset", "Renamed Feature") |
|
582 |
+ colnames(featuresInfo) <- c("Original Assay", "Original Feature", "Renamed Assay", "Renamed Feature") |
|
583 | 583 |
} else { |
584 | 584 |
originalFeatures <- colnames(measurements) |
585 | 585 |
renamedInfo <- paste("Feature", seq_along(measurements), sep = '') |
586 | 586 |
featuresInfo <- DataFrame(originalFeatures, renamedInfo) |
587 | 587 |
colnames(featuresInfo) <- c("Original Feature", "Renamed Feature") |
588 | 588 |
} |
589 |
- |
|
590 | 589 |
featuresInfo |
591 | 590 |
} |
592 | 591 |
|
... | ... |
@@ -111,7 +111,7 @@ of predictions made during the cross-validation procedure.}} |
111 | 111 |
modellingParams <- ModellingParams() |
112 | 112 |
classified <- |
113 | 113 |
runTests(measurements, classes, LOOCVparams, modellingParams, |
114 |
- DataFrame(characteristic = c("dataset", "classification"), |
|
114 |
+ DataFrame(characteristic = c("Data Set", "Classification"), |
|
115 | 115 |
value = c("Asthma", "Different Means")) |
116 | 116 |
) |
117 | 117 |
class(classified) |
... | ... |
@@ -11,30 +11,18 @@ |
11 | 11 |
\alias{predict,ClassifyResult-method} |
12 | 12 |
\title{Cross-validation to evaluate classification performance.} |
13 | 13 |
\usage{ |
14 |
-crossValidate( |
|
15 |
- measurements, |
|
16 |
- classes, |
|
17 |
- nFeatures = 20, |
|
18 |
- selectionMethod = "t-test", |
|
19 |
- selectionOptimisation = "Resubstitution", |
|
20 |
- classifier = "randomForest", |
|
21 |
- multiViewMethod = "none", |
|
22 |
- dataCombinations = NULL, |
|
23 |
- nFolds = 5, |
|
24 |
- nRepeats = 20, |
|
25 |
- nCores = 1, |
|
26 |
- characteristicsLabel = NULL |
|
27 |
-) |
|
14 |
+crossValidate(measurements, outcomes, ...) |
|
28 | 15 |
|
29 | 16 |
\S4method{crossValidate}{DataFrame}( |
30 | 17 |
measurements, |
31 |
- classes, |
|
18 |
+ outcomes, |
|
19 |
+ assayName = NULL, |
|
32 | 20 |
nFeatures = 20, |
33 | 21 |
selectionMethod = "t-test", |
34 | 22 |
selectionOptimisation = "Resubstitution", |
35 | 23 |
classifier = "randomForest", |
36 | 24 |
multiViewMethod = "none", |
37 |
- dataCombinations = NULL, |
|
25 |
+ assayCombinations = NULL, |
|
38 | 26 |
nFolds = 5, |
39 | 27 |
nRepeats = 20, |
40 | 28 |
nCores = 1, |
... | ... |
@@ -43,13 +31,13 @@ crossValidate( |
43 | 31 |
|
44 | 32 |
\S4method{crossValidate}{MultiAssayExperiment}( |
45 | 33 |
measurements, |
46 |
- classes, |
|
34 |
+ outcomes, |
|
47 | 35 |
nFeatures = 20, |
48 | 36 |
selectionMethod = "t-test", |
49 | 37 |
selectionOptimisation = "Resubstitution", |
50 | 38 |
classifier = "randomForest", |
51 | 39 |
multiViewMethod = "none", |
52 |
- dataCombinations = NULL, |
|
40 |
+ assayCombinations = NULL, |
|
53 | 41 |
nFolds = 5, |
54 | 42 |
nRepeats = 20, |
55 | 43 |
nCores = 1, |
... | ... |
@@ -58,13 +46,14 @@ crossValidate( |
58 | 46 |
|
59 | 47 |
\S4method{crossValidate}{data.frame}( |
60 | 48 |
measurements, |
61 |
- classes, |
|
49 |
+ outcomes, |
|
50 |
+ assayName = NULL, |
|
62 | 51 |
nFeatures = 20, |
63 | 52 |
selectionMethod = "t-test", |
64 | 53 |
selectionOptimisation = "Resubstitution", |
65 | 54 |
classifier = "randomForest", |
66 | 55 |
multiViewMethod = "none", |
67 |
- dataCombinations = NULL, |
|
56 |
+ assayCombinations = NULL, |
|
68 | 57 |
nFolds = 5, |
69 | 58 |
nRepeats = 20, |
70 | 59 |
nCores = 1, |
... | ... |
@@ -73,13 +62,14 @@ crossValidate( |
73 | 62 |
|
74 | 63 |
\S4method{crossValidate}{matrix}( |
75 | 64 |
measurements, |
76 |
- classes, |
|
65 |
+ outcomes, |
|
66 |
+ assayName = NULL, |
|
77 | 67 |
nFeatures = 20, |
78 | 68 |
selectionMethod = "t-test", |
79 | 69 |
selectionOptimisation = "Resubstitution", |
80 | 70 |
classifier = "randomForest", |
81 | 71 |
multiViewMethod = "none", |
82 |
- dataCombinations = NULL, |
|
72 |
+ assayCombinations = NULL, |
|
83 | 73 |
nFolds = 5, |
84 | 74 |
nRepeats = 20, |
85 | 75 |
nCores = 1, |
... | ... |
@@ -88,13 +78,13 @@ crossValidate( |
88 | 78 |
|
89 | 79 |
\S4method{crossValidate}{list}( |
90 | 80 |
measurements, |
91 |