Browse code

- Corrected prepareData when useFeatures is "all". - runTests outcome variable has0 name same for all methods to fix the generic dispatching.

Dario Strbenac authored on 18/10/2022 01:45:10
Showing 6 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.24
7
-Date: 2022-10-17
6
+Version: 3.1.25
7
+Date: 2022-10-18
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -10,8 +10,8 @@
10 10
 #' @param outcome A vector of class labels of class \code{\link{factor}} of the
11 11
 #' same length as the number of samples in \code{measurements} or a character vector of length 1 containing the
12 12
 #' column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of
13
-#' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome.
14
-#' @param outcomeColumns If \code{measurements} is a \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.
13
+#' length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. If \code{measurements} is a
14
+#' \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.
15 15
 #' @param outcomeTrain For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or
16 16
 #' a character string, or vector of such strings, containing column name(s) of column(s)
17 17
 #' containing either classes or time and event information about survival.
... ...
@@ -201,7 +201,6 @@ Using an ordinary GLM instead.")
201 201
                   # The below loops over different combinations of assays and merges them together.
202 202
                   # This allows someone to answer which combinations of the assays might be most useful.
203 203
 
204
-
205 204
                   if(!is.list(assayCombinations) && assayCombinations == "all") assayCombinations <- do.call("c", sapply(seq_along(assayIDs), function(nChoose) combn(assayIDs, nChoose, simplify = FALSE)))
206 205
 
207 206
                   result <- sapply(assayCombinations, function(assayIndex){
... ...
@@ -237,7 +236,6 @@ Using an ordinary GLM instead.")
237 236
                       if(length(assayCombinations) == 0) stop("No assayCombinations with \"clinical\" data")
238 237
                   }
239 238
 
240
-
241 239
                   result <- sapply(assayCombinations, function(assayIndex){
242 240
                       CV(measurements = measurements[, mcols(measurements)[["assay"]] %in% assayIndex],
243 241
                          outcome = outcome, assayIDs = assayIndex,
... ...
@@ -299,7 +297,7 @@ Using an ordinary GLM instead.")
299 297
 # One or more omics data sets, possibly with clinical data.
300 298
 setMethod("crossValidate", "MultiAssayExperiment",
301 299
           function(measurements,
302
-                   outcomeColumns, 
300
+                   outcome, 
303 301
                    nFeatures = 20,
304 302
                    selectionMethod = "t-test",
305 303
                    selectionOptimisation = "Resubstitution",
... ...
@@ -312,7 +310,7 @@ setMethod("crossValidate", "MultiAssayExperiment",
312 310
                    nCores = 1,
313 311
                    characteristicsLabel = NULL, ...)
314 312
           {
315
-              measurementsAndOutcome <- prepareData(measurements, outcomeColumns, ...)
313
+              measurementsAndOutcome <- prepareData(measurements, outcome, ...)
316 314
 
317 315
               crossValidate(measurements = measurementsAndOutcome[["measurements"]],
318 316
                             outcome = measurementsAndOutcome[["outcome"]], 
... ...
@@ -691,6 +689,7 @@ generateMultiviewParams <- function(assayIDs,
691 689
                                           nFeatures = nFeatures,
692 690
                                           selectionMethod = selectionMethod,
693 691
                                           selectionOptimisation = "none",
692
+                                          performanceType = performanceType,
694 693
                                           classifier = classifier,
695 694
                                           multiViewMethod = "none")
696 695
 
... ...
@@ -1043,9 +1042,9 @@ train.list <- function(x, outcomeTrain, ...)
1043 1042
 #' @rdname crossValidate
1044 1043
 #' @method train MultiAssayExperiment
1045 1044
 #' @export
1046
-train.MultiAssayExperiment <- function(x, outcomeColumns, ...)
1045
+train.MultiAssayExperiment <- function(x, outcome, ...)
1047 1046
           {
1048
-              prepArgs <- list(x, outcomeColumns)
1047
+              prepArgs <- list(x, outcome)
1049 1048
               extraInputs <- list(...)
1050 1049
               prepExtras <- trainExtras <- numeric()
1051 1050
               if(length(extraInputs) > 0)
... ...
@@ -165,11 +165,13 @@ setMethod("prepareData", "MultiAssayExperiment",
165 165
   if(!all(useFeatures[, "assay"] %in% c(names(measurements), "clinical")))
166 166
     stop("Some assay names in first column of 'useFeatures' are not assay names in 'measurements' or \"clinical\".")
167 167
 
168
-  clinicalColumns <- colnames(MultiAssayExperiment::colData(measurements))
168
+  clinicalColumnsDataset <- colnames(MultiAssayExperiment::colData(measurements))
169 169
   if("clinical" %in% useFeatures[, "assay"])
170 170
   {
171 171
     clinicalRows <- useFeatures[, "assay"] == "clinical"      
172 172
     clinicalColumns <- useFeatures[clinicalRows, "feature"]
173
+    if(length(clinicalColumns) == 1 && clinicalColumns == "all")
174
+      clinicalColumns <- setdiff(clinicalColumnsDataset, outcomeColumns)
173 175
     useFeatures <- useFeatures[!clinicalRows, ]
174 176
   } else {
175 177
     clinicalColumns <- NULL
... ...
@@ -178,7 +180,6 @@ setMethod("prepareData", "MultiAssayExperiment",
178 180
   if(nrow(useFeatures) > 0)
179 181
   {
180 182
     measurements <- measurements[, , unique(useFeatures[, "assay"])]
181
-  
182 183
     # Get all desired measurements tables and clinical columns (other than the columns representing outcome).
183 184
     # These form the independent variables to be used for making predictions with.
184 185
     # Variable names will have names like RNA_BRAF for traceability.
... ...
@@ -13,7 +13,10 @@
13 13
 #' are features.
14 14
 #' @param outcome Either a factor vector of classes, a \code{\link{Surv}} object, or
15 15
 #' a character string, or vector of such strings, containing column name(s) of column(s)
16
-#' containing either classes or time and event information about survival.
16
+#' containing either classes or time and event information about survival. If
17
+#' \code{measurements} is a \code{MultiAssayExperiment}, the names of the column (class) or
18
+#' columns (survival) in the table extracted by \code{colData(data)} that contain(s) the samples'
19
+#' outcome to use for prediction.
17 20
 #' @param crossValParams An object of class \code{\link{CrossValParams}},
18 21
 #' specifying the kind of cross-validation to be done.
19 22
 #' @param modellingParams An object of class \code{\link{ModellingParams}},
... ...
@@ -26,9 +29,6 @@
26 29
 #' package.  Transformation, selection and prediction functions provided by
27 30
 #' this package will cause the characteristics to be automatically determined
28 31
 #' and this can be left blank.
29
-#' @param outcomeColumns If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
30
-#' names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
31
-#' that contain(s)s the samples' outcome to use for prediction.
32 32
 #' @param ... Variables not used by the \code{matrix} nor the \code{MultiAssayExperiment} method which
33 33
 #' are passed into and used by the \code{DataFrame} method or passed onwards to \code{\link{prepareData}}.
34 34
 #' @param verbose Default: 1. A number between 0 and 3 for the amount of
... ...
@@ -70,9 +70,11 @@ setMethod("runTests", c("matrix"), function(measurements, outcome, ...) # Matrix
70 70
 setMethod("runTests", "DataFrame", function(measurements, outcome, crossValParams = CrossValParams(), modellingParams = ModellingParams(),
71 71
            characteristics = S4Vectors::DataFrame(), ..., verbose = 1)
72 72
 {
73
-  # Get out the outcome if inside of data table.           
74 73
   if(is.null(rownames(measurements)))
75
-    stop("'measurements' DataFrame must have sample identifiers as its row names.")
74
+  {
75
+    warning("'measurements' DataFrame must have sample identifiers as its row names. Generating generic ones.")
76
+    rownames(measurements) <- paste("Sample", seq_len(nrow(measurements)))
77
+  }
76 78
   
77 79
   if(any(is.na(measurements)))
78 80
     stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.")            
... ...
@@ -93,7 +95,7 @@ input data. Autmomatically reducing to smaller number.")
93 95
   
94 96
   # Element names of the list returned by runTest, in order.
95 97
   resultTypes <- c("ranked", "selected", "models", "testSet", "predictions", "tune", "importance")
96
-  
98
+
97 99
   # Create all partitions of training and testing sets.
98 100
   samplesSplits <- .samplesSplits(crossValParams, outcome)
99 101
   splitsTestInfo <- .splitsTestInfo(crossValParams, samplesSplits)
... ...
@@ -112,6 +114,7 @@ input data. Autmomatically reducing to smaller number.")
112 114
       message("Processing sample set ", setNumber, '.')
113 115
     
114 116
     # crossValParams is needed at least for nested feature tuning.
117
+    
115 118
     runTest(measurements[trainingSamples, , drop = FALSE], outcome[trainingSamples],
116 119
             measurements[testSamples, , drop = FALSE], outcome[testSamples],
117 120
             crossValParams, modellingParams, characteristics, verbose,
... ...
@@ -180,9 +183,9 @@ input data. Autmomatically reducing to smaller number.")
180 183
 #' @import MultiAssayExperiment methods
181 184
 #' @export
182 185
 setMethod("runTests", c("MultiAssayExperiment"),
183
-          function(measurements, outcomeColumns, ...)
186
+          function(measurements, outcome, ...)
184 187
 {
185
-  prepArgs <- list(measurements, outcomeColumns)              
188
+  prepArgs <- list(measurements, outcome)              
186 189
   extraInputs <- list(...)
187 190
   prepExtras <- numeric()
188 191
   if(length(extraInputs) > 0)
... ...
@@ -37,7 +37,7 @@ crossValidate(measurements, outcome, ...)
37 37
 
38 38
 \S4method{crossValidate}{MultiAssayExperiment}(
39 39
   measurements,
40
-  outcomeColumns,
40
+  outcome,
41 41
   nFeatures = 20,
42 42
   selectionMethod = "t-test",
43 43
   selectionOptimisation = "Resubstitution",
... ...
@@ -119,7 +119,7 @@ crossValidate(measurements, outcome, ...)
119 119
 
120 120
 \method{train}{list}(x, outcomeTrain, ...)
121 121
 
122
-\method{train}{MultiAssayExperiment}(x, outcomeColumns, ...)
122
+\method{train}{MultiAssayExperiment}(x, outcome, ...)
123 123
 
124 124
 \method{predict}{trainedByClassifyR}(object, newData, ...)
125 125
 }
... ...
@@ -130,7 +130,8 @@ or a list of these objects containing the data.}
130 130
 \item{outcome}{A vector of class labels of class \code{\link{factor}} of the
131 131
 same length as the number of samples in \code{measurements} or a character vector of length 1 containing the
132 132
 column name in \code{measurements} if it is a \code{\link{DataFrame}}. Or a \code{\link{Surv}} object or a character vector of
133
-length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome.}
133
+length 2 or 3 specifying the time and event columns in \code{measurements} for survival outcome. If \code{measurements} is a
134
+\code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.}
134 135
 
135 136
 \item{...}{Parameters passed into \code{\link{prepareData}} which control subsetting and filtering of input data.}
136 137
 
... ...
@@ -162,8 +163,6 @@ with each element being a vector of assays to combine. Special value \code{"all"
162 163
 
163 164
 \item{characteristicsLabel}{A character specifying an additional label for the cross-validation run.}
164 165
 
165
-\item{outcomeColumns}{If \code{measurements} is a \code{\link{MultiAssayExperiment}}, the column name(s) in \code{colData(measurements)} representing the outcome.}
166
-
167 166
 \item{x}{Same as \code{measurements} but only training samples.}
168 167
 
169 168
 \item{outcomeTrain}{For the \code{train} function, either a factor vector of classes, a \code{\link{Surv}} object, or
... ...
@@ -19,7 +19,7 @@
19 19
   verbose = 1
20 20
 )
21 21
 
22
-\S4method{runTests}{MultiAssayExperiment}(measurements, outcomeColumns, ...)
22
+\S4method{runTests}{MultiAssayExperiment}(measurements, outcome, ...)
23 23
 }
24 24
 \arguments{
25 25
 \item{measurements}{Either a \code{\link{matrix}}, \code{\link{DataFrame}}
... ...
@@ -32,7 +32,10 @@ are passed into and used by the \code{DataFrame} method or passed onwards to \co
32 32
 
33 33
 \item{outcome}{Either a factor vector of classes, a \code{\link{Surv}} object, or
34 34
 a character string, or vector of such strings, containing column name(s) of column(s)
35
-containing either classes or time and event information about survival.}
35
+containing either classes or time and event information about survival. If
36
+\code{measurements} is a \code{MultiAssayExperiment}, the names of the column (class) or
37
+columns (survival) in the table extracted by \code{colData(data)} that contain(s) the samples'
38
+outcome to use for prediction.}
36 39
 
37 40
 \item{crossValParams}{An object of class \code{\link{CrossValParams}},
38 41
 specifying the kind of cross-validation to be done.}
... ...
@@ -52,10 +55,6 @@ and this can be left blank.}
52 55
 \item{verbose}{Default: 1. A number between 0 and 3 for the amount of
53 56
 progress messages to give.  A higher number will produce more messages as
54 57
 more lower-level functions print messages.}
55
-
56
-\item{outcomeColumns}{If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the
57
-names of the column (class) or columns (survival) in the table extracted by \code{colData(data)}
58
-that contain(s)s the samples' outcome to use for prediction.}
59 58
 }
60 59
 \value{
61 60
 An object of class \code{\link{ClassifyResult}}.