Browse code

Update subsampling error handling, add subSize arg

If EM alg throws an error, subsampling will
now repeat that iteration up to ten percent of iter
times (after which it throws an error). Re-added
subSize argument as well (with minimum values
set)

Max McGrath authored on 29/09/2021 23:07:31
Showing2 changed files

... ...
@@ -22,6 +22,8 @@
22 22
 #' @param y ExpressionSet of -omics data, induces dual -omics analysis
23 23
 #' @param transform If TRUE v1 and v2 will be Fisher transformed
24 24
 #' @param subsampling If TRUE subsampling will be run
25
+#' @param subSize Indicates how many feature pairs to be used for subsampling. 
26
+#' Default is the feature size in x
25 27
 #' @param iter Number of iterations for subsampling. Default is 100
26 28
 #' @param components Number of components in mixture model.
27 29
 #' 
... ...
@@ -94,7 +96,7 @@
94 96
 #'                        TCGA_GBM_miRNA_microarray)
95 97
 #' @export
96 98
 discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, 
97
-                          subsampling = FALSE, iter = 100, 
99
+                          subsampling = FALSE, subSize = NULL, iter = 100, 
98 100
                           components = 3) {
99 101
   
100 102
     .checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, 
... ...
@@ -113,23 +115,41 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
113 115
                    .assignClass(v2, param2, components))
114 116
     
115 117
     if (subsampling) {
116
-        subSize <- .setSubSize(x, y)
118
+        subSize <- .setSubSize(x, y, subSize)
117 119
         total_mu <- total_sigma <- total_nu <- 
118 120
           total_tau <- total_pi <- rep(0, components)
119 121
         
120
-        for(i in 1:iter) {
122
+        i <- 0
123
+        repeats <- 0
124
+        
125
+        while(i < iter && repeats < floor(iter * .1)) {
121 126
             subSamples <- .createSubsamples(x, y, v1, v2, subSize)
122 127
             sub.pdata <- cbind(subSamples$v1, subSamples$v2)
123 128
             sub.class <- cbind(.assignClass(subSamples$v1, param1, components),
124 129
                                .assignClass(subSamples$v2, param2, components))
125 130
             
126
-            pd <- em.normal.partial.concordant(sub.pdata, sub.class, components)
127
-            total_mu <- total_mu + pd$mu_sigma[1,]
128
-            total_sigma <- total_sigma + pd$mu_sigma[2,]
129
-            total_nu <- total_nu + pd$nu_tau[1,]
130
-            total_tau <- total_tau + pd$nu_tau[2,]
131
-            total_pi <- total_pi + pd$pi
132
-      }
131
+            pd <- tryCatch({em.normal.partial.concordant(sub.pdata, sub.class, 
132
+                                                   components)},
133
+                error = function(unused) return(NULL))
134
+            
135
+            if(is.null(pd)) { 
136
+              repeats <- repeats + 1 
137
+            } else {
138
+              i <- i + 1
139
+              total_mu <- total_mu + pd$mu_sigma[1,]
140
+              total_sigma <- total_sigma + pd$mu_sigma[2,]
141
+              total_nu <- total_nu + pd$nu_tau[1,]
142
+              total_tau <- total_tau + pd$nu_tau[2,]
143
+              total_pi <- total_pi + pd$pi
144
+            }
145
+        }
146
+        
147
+        if (repeats >= floor(iter * .1)) {
148
+          stop("Insufficient data for subsampling. Increase number of
149
+               features, reduce number of components used, or increase 
150
+               subSize if not at default value. Alternatively, set
151
+               subsampling=FALSE.")
152
+        }
133 153
       
134 154
       mu <- total_mu / iter
135 155
       sigma <- total_sigma / iter
... ...
@@ -311,11 +331,23 @@ em.normal.partial.concordant <- function(data, class, components) {
311 331
 }
312 332
 
313 333
 # Internal function to set sub size based on data
314
-.setSubSize <- function(x, y) {
334
+.setSubSize <- function(x, y, subSize) {
315 335
   if (is.null(y)) {
316
-    sampleNames <- rownames(x)
317
-    subSize <- floor(length(sampleNames) / 2)
336
+    if (is.null(subSize)) {
337
+      subSize <- floor(length(rownames(x)) / 2)
338
+    } else if (subSize > floor(length(rownames(x)) / 2)) {
339
+      subSize <- floor(length(rownames(x)) / 2)
340
+      warning(paste0("subSize argument too large. Using subSize ", subSize,
341
+                     " See vignette for more information."))
342
+    }
318 343
   } else {
319
-    subSize <- min(nrow(x), nrow(y))
344
+    if (is.null(subSize)) {
345
+      subSize <- min(nrow(x), nrow(y))
346
+    } else if (subSize > min(nrow(x), nrow(y))) {
347
+      subSize <- min(nrow(x), nrow(y))
348
+      warning(paste0("subSize argument to large. Using subSize ", subSize,
349
+                     " See vignette for more information."))
350
+    }
320 351
   }
352
+  return(subSize)
321 353
 }
322 354
\ No newline at end of file
... ...
@@ -11,6 +11,7 @@ discordantRun(
11 11
   y = NULL,
12 12
   transform = TRUE,
13 13
   subsampling = FALSE,
14
+  subSize = NULL,
14 15
   iter = 100,
15 16
   components = 3
16 17
 )
... ...
@@ -28,6 +29,9 @@ discordantRun(
28 29
 
29 30
 \item{subsampling}{If TRUE subsampling will be run}
30 31
 
32
+\item{subSize}{Indicates how many feature pairs to be used for subsampling. 
33
+Default is the feature size in x}
34
+
31 35
 \item{iter}{Number of iterations for subsampling. Default is 100}
32 36
 
33 37
 \item{components}{Number of components in mixture model.}