... | ... |
@@ -3,8 +3,8 @@ Type: Package |
3 | 3 |
Title: A framework for cross-validated classification problems, with |
4 | 4 |
applications to differential variability and differential |
5 | 5 |
distribution testing |
6 |
-Version: 3.1.24 |
|
7 |
-Date: 2022-10-17 |
|
6 |
+Version: 3.1.25 |
|
7 |
+Date: 2022-10-18 |
|
8 | 8 |
Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang |
9 | 9 |
Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au> |
10 | 10 |
VignetteBuilder: knitr |
... | ... |
@@ -10,8 +10,8 @@ |
10 | 10 |
#' @param outcome A vector of class labels of class \code{\link{factor}} of the |
11 | 11 |
#' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the |
12 | 12 |
#' column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of |
13 |
-#' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. |
|
14 |
-#' @param outcomeColumns If \code{measurements} is a \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome. |
|
13 |
+#' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. If \code{measurements} is a |
|
14 |
+#' \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome. |
|
15 | 15 |
#' @param outcomeTrain For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or |
16 | 16 |
#' a character string, or vector of such strings, containing column name(s) of column(s) |
17 | 17 |
#' containing either classes or time and event information about survival. |
... | ... |
@@ -201,7 +201,6 @@ Using an ordinary GLM instead.") |
201 | 201 |
# The below loops over different combinations of assays and merges them together. |
202 | 202 |
# This allows someone to answer which combinations of the assays might be most useful. |
203 | 203 |
|
204 |
- |
|
205 | 204 |
if(!is.list(assayCombinations) && assayCombinations == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE))) |
206 | 205 |
|
207 | 206 |
result <- sapply(assayCombinations, function(assayIndex){ |
... | ... |
@@ -237,7 +236,6 @@ Using an ordinary GLM instead.") |
237 | 236 |
if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data") |
238 | 237 |
} |
239 | 238 |
|
240 |
- |
|
241 | 239 |
result <- sapply(assayCombinations, function(assayIndex){ |
242 | 240 |
CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex], |
243 | 241 |
outcome = outcome, assayIDs = assayIndex, |
... | ... |
@@ -299,7 +297,7 @@ Using an ordinary GLM instead.") |
299 | 297 |
# One or more omics data sets, possibly with clinical data. |
300 | 298 |
setMethod("crossValidate", "MultiAssayExperiment", |
301 | 299 |
function(measurements, |
302 |
- outcomeColumns, |
|
300 |
+ outcome, |
|
303 | 301 |
nFeatures = 20, |
304 | 302 |
selectionMethod = "t-test", |
305 | 303 |
selectionOptimisation = "Resubstitution", |
... | ... |
@@ -312,7 +310,7 @@ setMethod("crossValidate", "MultiAssayExperiment", |
312 | 310 |
nCores = 1, |
313 | 311 |
characteristicsLabel = NULL, ...) |
314 | 312 |
{ |
315 |
- measurementsAndOutcome <- prepareData(measurements, outcomeColumns, ...) |
|
313 |
+ measurementsAndOutcome <- prepareData(measurements, outcome, ...) |
|
316 | 314 |
|
317 | 315 |
crossValidate(measurements = measurementsAndOutcome[["measurements"]], |
318 | 316 |
outcome = measurementsAndOutcome[["outcome"]], |
... | ... |
@@ -691,6 +689,7 @@ generateMultiviewParams <- function(assayIDs, |
691 | 689 |
nFeatures = nFeatures, |
692 | 690 |
selectionMethod = selectionMethod, |
693 | 691 |
selectionOptimisation = "none", |
692 |
+ performanceType = performanceType, |
|
694 | 693 |
classifier = classifier, |
695 | 694 |
multiViewMethod = "none") |
696 | 695 |
|
... | ... |
@@ -1043,9 +1042,9 @@ train.list <- function(x, outcomeTrain, ...) |
1043 | 1042 |
#' @rdname crossValidate |
1044 | 1043 |
#' @method train MultiAssayExperiment |
1045 | 1044 |
#' @export |
1046 |
-train.MultiAssayExperiment <- function(x, outcomeColumns, ...) |
|
1045 |
+train.MultiAssayExperiment <- function(x, outcome, ...) |
|
1047 | 1046 |
{ |
1048 |
- prepArgs <- list(x, outcomeColumns) |
|
1047 |
+ prepArgs <- list(x, outcome) |
|
1049 | 1048 |
extraInputs <- list(...) |
1050 | 1049 |
prepExtras <- trainExtras <- numeric() |
1051 | 1050 |
if(length(extraInputs) > 0) |
... | ... |
@@ -165,11 +165,13 @@ setMethod("prepareData", "MultiAssayExperiment", |
165 | 165 |
if(!all(useFeatures[, "assay"] %in% c(names(measurements), "clinical"))) |
166 | 166 |
stop("Some assay names in first column of 'useFeatures' are not assay names in 'measurements' or \"clinical\".") |
167 | 167 |
|
168 |
- clinicalColumns <- colnames(MultiAssayExperiment::colData(measurements)) |
|
168 |
+ clinicalColumnsDataset <- colnames(MultiAssayExperiment::colData(measurements)) |
|
169 | 169 |
if("clinical" %in% useFeatures[, "assay"]) |
170 | 170 |
{ |
171 | 171 |
clinicalRows <- useFeatures[, "assay"] == "clinical" |
172 | 172 |
clinicalColumns <- useFeatures[clinicalRows, "feature"] |
173 |
+ if(length(clinicalColumns) == 1 && clinicalColumns == "all") |
|
174 |
+ clinicalColumns <- setdiff(clinicalColumnsDataset, outcomeColumns) |
|
173 | 175 |
useFeatures <- useFeatures[!clinicalRows, ] |
174 | 176 |
} else { |
175 | 177 |
clinicalColumns <- NULL |
... | ... |
@@ -178,7 +180,6 @@ setMethod("prepareData", "MultiAssayExperiment", |
178 | 180 |
if(nrow(useFeatures) > 0) |
179 | 181 |
{ |
180 | 182 |
measurements <- measurements[, , unique(useFeatures[, "assay"])] |
181 |
- |
|
182 | 183 |
# Get all desired measurements tables and clinical columns (other than the columns representing outcome). |
183 | 184 |
# These form the independent variables to be used for making predictions with. |
184 | 185 |
# Variable names will have names like RNA_BRAF for traceability. |
... | ... |
@@ -13,7 +13,10 @@ |
13 | 13 |
#' are features. |
14 | 14 |
#' @param outcome Either a factor vector of classes, a \code{\link{Surv}} object, or |
15 | 15 |
#' a character string, or vector of such strings, containing column name(s) of column(s) |
16 |
-#' containing either classes or time and event information about survival. |
|
16 |
+#' containing either classes or time and event information about survival. If |
|
17 |
+#' \code{measurements} is a \code{MultiAssayExperiment}, the names of the column (class) or |
|
18 |
+#' columns (survival) in the table extracted by \code{colData(data)} that contain(s) the samples' |
|
19 |
+#' outcome to use for prediction. |
|
17 | 20 |
#' @param crossValParams An object of class \code{\link{CrossValParams}}, |
18 | 21 |
#' specifying the kind of cross-validation to be done. |
19 | 22 |
#' @param modellingParams An object of class \code{\link{ModellingParams}}, |
... | ... |
@@ -26,9 +29,6 @@ |
26 | 29 |
#' package. Transformation, selection and prediction functions provided by |
27 | 30 |
#' this package will cause the characteristics to be automatically determined |
28 | 31 |
#' and this can be left blank. |
29 |
-#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
30 |
-#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)} |
|
31 |
-#' that contain(s)s the samples' outcome to use for prediction. |
|
32 | 32 |
#' @param ... Variables not used by the \code{matrix} nor the \code{MultiAssayExperiment} method which |
33 | 33 |
#' are passed into and used by the \code{DataFrame} method or passed onwards to \code{\link{prepareData}}. |
34 | 34 |
#' @param verbose Default: 1. A number between 0 and 3 for the amount of |
... | ... |
@@ -70,9 +70,11 @@ setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix |
70 | 70 |
setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParams = CrossValParams(), modellingParams = ModellingParams(), |
71 | 71 |
characteristics = S4Vectors::DataFrame(), ..., verbose = 1) |
72 | 72 |
{ |
73 |
- # Get out the outcome if inside of data table. |
|
74 | 73 |
if(is.null(rownames(measurements))) |
75 |
- stop("'measurements' DataFrame must have sample identifiers as its row names.") |
|
74 |
+ { |
|
75 |
+ warning("'measurements' DataFrame must have sample identifiers as its row names. Generating generic ones.") |
|
76 |
+ rownames(measurements) <- paste("Sample", seq_len(nrow(measurements))) |
|
77 |
+ } |
|
76 | 78 |
|
77 | 79 |
if(any(is.na(measurements))) |
78 | 80 |
stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.") |
... | ... |
@@ -93,7 +95,7 @@ input data. Autmomatically reducing to smaller number.") |
93 | 95 |
|
94 | 96 |
# Element names of the list returned by runTest, in order. |
95 | 97 |
resultTypes <- c("ranked", "selected", "models", "testSet", "predictions", "tune", "importance") |
96 |
- |
|
98 |
+ |
|
97 | 99 |
# Create all partitions of training and testing sets. |
98 | 100 |
samplesSplits <- .samplesSplits(crossValParams, outcome) |
99 | 101 |
splitsTestInfo <- .splitsTestInfo(crossValParams, samplesSplits) |
... | ... |
@@ -112,6 +114,7 @@ input data. Autmomatically reducing to smaller number.") |
112 | 114 |
message("Processing sample set ", setNumber, '.') |
113 | 115 |
|
114 | 116 |
# crossValParams is needed at least for nested feature tuning. |
117 |
+ |
|
115 | 118 |
runTest(measurements[trainingSamples, , drop = FALSE], outcome[trainingSamples], |
116 | 119 |
measurements[testSamples, , drop = FALSE], outcome[testSamples], |
117 | 120 |
crossValParams, modellingParams, characteristics, verbose, |
... | ... |
@@ -180,9 +183,9 @@ input data. Autmomatically reducing to smaller number.") |
180 | 183 |
#' @import MultiAssayExperiment methods |
181 | 184 |
#' @export |
182 | 185 |
setMethod("runTests", c("MultiAssayExperiment"), |
183 |
- function(measurements, outcomeColumns, ...) |
|
186 |
+ function(measurements, outcome, ...) |
|
184 | 187 |
{ |
185 |
- prepArgs <- list(measurements, outcomeColumns) |
|
188 |
+ prepArgs <- list(measurements, outcome) |
|
186 | 189 |
extraInputs <- list(...) |
187 | 190 |
prepExtras <- numeric() |
188 | 191 |
if(length(extraInputs) > 0) |
... | ... |
@@ -37,7 +37,7 @@ crossValidate(measurements, outcome, ...) |
37 | 37 |
|
38 | 38 |
\S4method{crossValidate}{MultiAssayExperiment}( |
39 | 39 |
measurements, |
40 |
- outcomeColumns, |
|
40 |
+ outcome, |
|
41 | 41 |
nFeatures = 20, |
42 | 42 |
selectionMethod = "t-test", |
43 | 43 |
selectionOptimisation = "Resubstitution", |
... | ... |
@@ -119,7 +119,7 @@ crossValidate(measurements, outcome, ...) |
119 | 119 |
|
120 | 120 |
\method{train}{list}(x, outcomeTrain, ...) |
121 | 121 |
|
122 |
-\method{train}{MultiAssayExperiment}(x, outcomeColumns, ...) |
|
122 |
+\method{train}{MultiAssayExperiment}(x, outcome, ...) |
|
123 | 123 |
|
124 | 124 |
\method{predict}{trainedByClassifyR}(object, newData, ...) |
125 | 125 |
} |
... | ... |
@@ -130,7 +130,8 @@ or a list of these objects containing the data.} |
130 | 130 |
\item{outcome}{A vector of class labels of class \code{\link{factor}} of the |
131 | 131 |
same length as the number of samples in \code{measurements} or a character vector of length 1 containing the |
132 | 132 |
column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of |
133 |
-length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome.} |
|
133 |
+length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. If \code{measurements} is a |
|
134 |
+\code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.} |
|
134 | 135 |
|
135 | 136 |
\item{...}{Parameters passed into \code{\link{prepareData}} which control subsetting and filtering of input data.} |
136 | 137 |
|
... | ... |
@@ -162,8 +163,6 @@ with each element being a vector of assays to combine. Special value \code{"all" |
162 | 163 |
|
163 | 164 |
\item{characteristicsLabel}{A character specifying an additional label for the cross-validation run.} |
164 | 165 |
|
165 |
-\item{outcomeColumns}{If \code{measurements} is a \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.} |
|
166 |
- |
|
167 | 166 |
\item{x}{Same as \code{measurements} but only training samples.} |
168 | 167 |
|
169 | 168 |
\item{outcomeTrain}{For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or |
... | ... |
@@ -19,7 +19,7 @@ |
19 | 19 |
verbose = 1 |
20 | 20 |
) |
21 | 21 |
|
22 |
-\S4method{runTests}{MultiAssayExperiment}(measurements, outcomeColumns, ...) |
|
22 |
+\S4method{runTests}{MultiAssayExperiment}(measurements, outcome, ...) |
|
23 | 23 |
} |
24 | 24 |
\arguments{ |
25 | 25 |
\item{measurements}{Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
... | ... |
@@ -32,7 +32,10 @@ are passed into and used by the \code{DataFrame} method or passed onwards to \co |
32 | 32 |
|
33 | 33 |
\item{outcome}{Either a factor vector of classes, a \code{\link{Surv}} object, or |
34 | 34 |
a character string, or vector of such strings, containing column name(s) of column(s) |
35 |
-containing either classes or time and event information about survival.} |
|
35 |
+containing either classes or time and event information about survival. If |
|
36 |
+\code{measurements} is a \code{MultiAssayExperiment}, the names of the column (class) or |
|
37 |
+columns (survival) in the table extracted by \code{colData(data)} that contain(s) the samples' |
|
38 |
+outcome to use for prediction.} |
|
36 | 39 |
|
37 | 40 |
\item{crossValParams}{An object of class \code{\link{CrossValParams}}, |
38 | 41 |
specifying the kind of cross-validation to be done.} |
... | ... |
@@ -52,10 +55,6 @@ and this can be left blank.} |
52 | 55 |
\item{verbose}{Default: 1. A number between 0 and 3 for the amount of |
53 | 56 |
progress messages to give. A higher number will produce more messages as |
54 | 57 |
more lower-level functions print messages.} |
55 |
- |
|
56 |
-\item{outcomeColumns}{If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
57 |
-names of the column (class) or columns (survival) in the table extracted by \code{colData(data)} |
|
58 |
-that contain(s)s the samples' outcome to use for prediction.} |
|
59 | 58 |
} |
60 | 59 |
\value{ |
61 | 60 |
An object of class \code{\link{ClassifyResult}}. |