Browse code

- Minor fixes.

Dario Strbenac authored on 17/10/2022 03:30:05
Showing 5 changed files

... ...
@@ -3,8 +3,8 @@ Type: Package
3 3
 Title: A framework for cross-validated classification problems, with
4 4
        applications to differential variability and differential
5 5
        distribution testing
6
-Version: 3.1.23
7
-Date: 2022-10-14
6
+Version: 3.1.24
7
+Date: 2022-10-17
8 8
 Author: Dario Strbenac, Ellis Patrick, John Ormerod, Graham Mann, Jean Yang
9 9
 Maintainer: Dario Strbenac <dario.strbenac@sydney.edu.au>
10 10
 VignetteBuilder: knitr
... ...
@@ -118,8 +118,6 @@ setMethod("crossValidate", "DataFrame",
118 118
               # Which data-types or data-views are present?
119 119
               assayIDs <- unique(mcols(measurements)$assay)
120 120
               if(is.null(assayIDs)) assayIDs <- 1
121
-              
122
-              checkData(measurements, outcome)
123 121
 
124 122
               # Check that other variables are in the right format and fix
125 123
               nFeatures <- cleanNFeatures(nFeatures = nFeatures,
... ...
@@ -184,17 +182,12 @@ Using an ordinary GLM instead.")
184 182
                                       characteristicsLabel = characteristicsLabel
185 183
                                   )
186 184
                               },
187
-
188 185
                               simplify = FALSE)
189 186
                           },
190
-
191 187
                           simplify = FALSE)
192 188
                       },
193
-
194 189
                       simplify = FALSE)
195
-
196 190
                   result <- unlist(unlist(resClassifier))
197
-
198 191
               }
199 192
 
200 193
               ################################
... ...
@@ -558,24 +551,6 @@ generateCrossValParams <- function(nRepeats, nFolds, nCores, selectionOptimisati
558 551
 }
559 552
 ######################################
560 553
 
561
-
562
-
563
-######################################
564
-######################################
565
-checkData <- function(measurements, outcome){
566
-    if(is.null(rownames(measurements)))
567
-        stop("'measurements' DataFrame must have sample identifiers as its row names.")
568
-    if(any(is.na(measurements)))
569
-        stop("Some data elements are missing and classifiers don't work with missing data. Consider imputation or filtering.")
570
-
571
-    # !!!  Need to check mcols has assay NUm
572
-
573
-}
574
-######################################
575
-
576
-
577
-
578
-######################################
579 554
 ######################################
580 555
 #' A function to generate a ModellingParams object
581 556
 #'
... ...
@@ -643,9 +618,10 @@ generateModellingParams <- function(assayIDs,
643 618
     knownClassifiers <- .ClassifyRenvir[["classifyKeywords"]][, "classifier Keyword"]
644 619
     if(!classifier %in% knownClassifiers)
645 620
         stop(paste("Classifier must exactly match of these (be careful of case):", paste(knownClassifiers, collapse = ", ")))
646
-    
621
+
647 622
     classifierParams <- .classifierKeywordToParams(classifier)
648
-    classifierParams$trainParams@tuneParams <- c(classifierParams$trainParams@tuneParams, performanceType = performanceType)
623
+    if(!is.null(classifierParams$trainParams@tuneParams))
624
+      classifierParams$trainParams@tuneParams <- c(classifierParams$trainParams@tuneParams, performanceType = performanceType)
649 625
 
650 626
     selectionMethod <- unlist(selectionMethod)
651 627
 
... ...
@@ -833,11 +809,6 @@ CV <- function(measurements = NULL,
833 809
                characteristicsLabel = NULL)
834 810
 
835 811
 {
836
-    # Check that data is in the right format
837
-    if(!is.null(measurements))
838
-      checkData(measurements, outcome)
839
-    else
840
-      checkData(x, x)
841 812
     # Check that other variables are in the right format and fix
842 813
     nFeatures <- cleanNFeatures(nFeatures = nFeatures,
843 814
                                 measurements = measurements)
... ...
@@ -11,6 +11,7 @@ GLMtrainInterface <- function(measurementsTrain, classesTrain, ..., verbose = 3)
11 11
     } else {fitData <- measurementsTrain}
12 12
   glm(class ~ . + 0, family = binomial, data = fitData, ...)
13 13
 }
14
+attr(GLMtrainInterface, "name") <- "GLMtrainInterface"
14 15
 
15 16
 # model is of class glm.
16 17
 GLMpredictInterface <- function(model, measurementsTest, returnType = c("both", "class", "score"),
... ...
@@ -51,11 +51,17 @@ setMethod("prepareData", "matrix",
51 51
 setMethod("prepareData", "DataFrame",
52 52
   function(measurements, outcome, useFeatures = "all", maxMissingProp = 0.0, topNvariance = NULL)
53 53
 {
54
+  if(is.null(rownames(measurements)))
55
+  {
56
+    warning("'measurements' DataFrame must have sample identifiers as its row names. Generating generic ones.")
57
+    rownames(measurements) <- paste("Sample", seq_len(nrow(measurements)))
58
+  }      
59
+            
54 60
   if(useFeatures != "all") # Subset to only the desired ones.
55 61
     measurements <- measurements[, useFeatures]
56 62
 
57 63
   # Won't ever be true if input data was MultiAssayExperiment because wideFormat already produces valid names.  
58
-  if(all.equal(colnames(measurements), make.names(colnames(measurements))) != TRUE)
64
+  if(!all(colnames(measurements) == make.names(colnames(measurements))))
59 65
   {
60 66
     warning("Unsafe feature names in input data. Converted into safe names.")
61 67
     mcols(measurements)$feature <- colnames(measurements) # Save the originals.
... ...
@@ -114,7 +120,7 @@ setMethod("prepareData", "DataFrame",
114 120
     else # Three columns. Therefore, counting process data.
115 121
       outcome <- survival::Surv(outcome[, 1], outcome[, 2], outcome[, 3])
116 122
   }
117
-  
123
+
118 124
   # Remove samples with indeterminate outcome.
119 125
   dropSamples <- which(is.na(outcome) | is.null(outcome))
120 126
   if(length(dropSamples) > 0)
... ...
@@ -125,8 +131,9 @@ setMethod("prepareData", "DataFrame",
125 131
   
126 132
   # Remove features with more missingness than allowed.
127 133
   nSamples <- nrow(measurements)
128
-  dropFeatures <- which(apply(measurements, 2, function(featureMeasurements) sum(is.na(featureMeasurements)))
129
-                        / nrow(measurements) > maxMissingProp)
134
+  measurementsMatrix <- as.matrix(measurements) # For speed of calculation.
135
+  dropFeatures <- which(apply(measurementsMatrix, 2, function(featureMeasurements) sum(is.na(featureMeasurements)))
136
+                        / nrow(measurementsMatrix) > maxMissingProp)
130 137
   if(length(dropFeatures) > 0)
131 138
     measurements <- measurements[, -dropFeatures]
132 139
   
... ...
@@ -139,6 +139,12 @@
139 139
         return(list(NULL, rankings[[1]], NULL))
140 140
     
141 141
     tuneParamsTrain <- list(topN = topNfeatures)
142
+    performanceIndex <- match("performanceType", names(modellingParams@trainParams@tuneParams))
143
+    if(!is.na(performanceIndex))
144
+    {
145
+      performanceType <- modellingParams@trainParams@tuneParams[["performanceType"]]
146
+      modellingParams@trainParams@tuneParams <- modellingParams@trainParams@tuneParams[-performanceIndex]
147
+    }
142 148
     tuneParamsTrain <- append(tuneParamsTrain, modellingParams@trainParams@tuneParams)
143 149
     tuneCombosTrain <- expand.grid(tuneParamsTrain, stringsAsFactors = FALSE)  
144 150
     modellingParams@trainParams@tuneParams <- NULL