If EM alg throws an error, subsampling will
now repeat that iteration up to ten percent of iter
times (after which it throws an error). Re-added
subSize argument as well (with minimum values
set)
... | ... |
@@ -22,6 +22,8 @@ |
22 | 22 |
#' @param y ExpressionSet of -omics data, induces dual -omics analysis |
23 | 23 |
#' @param transform If TRUE v1 and v2 will be Fisher transformed |
24 | 24 |
#' @param subsampling If TRUE subsampling will be run |
25 |
+#' @param subSize Indicates how many feature pairs to be used for subsampling. |
|
26 |
+#' Default is the feature size in x |
|
25 | 27 |
#' @param iter Number of iterations for subsampling. Default is 100 |
26 | 28 |
#' @param components Number of components in mixture model. |
27 | 29 |
#' |
... | ... |
@@ -94,7 +96,7 @@ |
94 | 96 |
#' TCGA_GBM_miRNA_microarray) |
95 | 97 |
#' @export |
96 | 98 |
discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
97 |
- subsampling = FALSE, iter = 100, |
|
99 |
+ subsampling = FALSE, subSize = NULL, iter = 100, |
|
98 | 100 |
components = 3) { |
99 | 101 |
|
100 | 102 |
.checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, |
... | ... |
@@ -113,23 +115,41 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
113 | 115 |
.assignClass(v2, param2, components)) |
114 | 116 |
|
115 | 117 |
if (subsampling) { |
116 |
- subSize <- .setSubSize(x, y) |
|
118 |
+ subSize <- .setSubSize(x, y, subSize) |
|
117 | 119 |
total_mu <- total_sigma <- total_nu <- |
118 | 120 |
total_tau <- total_pi <- rep(0, components) |
119 | 121 |
|
120 |
- for(i in 1:iter) { |
|
122 |
+ i <- 0 |
|
123 |
+ repeats <- 0 |
|
124 |
+ |
|
125 |
+ while(i < iter && repeats < floor(iter * .1)) { |
|
121 | 126 |
subSamples <- .createSubsamples(x, y, v1, v2, subSize) |
122 | 127 |
sub.pdata <- cbind(subSamples$v1, subSamples$v2) |
123 | 128 |
sub.class <- cbind(.assignClass(subSamples$v1, param1, components), |
124 | 129 |
.assignClass(subSamples$v2, param2, components)) |
125 | 130 |
|
126 |
- pd <- em.normal.partial.concordant(sub.pdata, sub.class, components) |
|
127 |
- total_mu <- total_mu + pd$mu_sigma[1,] |
|
128 |
- total_sigma <- total_sigma + pd$mu_sigma[2,] |
|
129 |
- total_nu <- total_nu + pd$nu_tau[1,] |
|
130 |
- total_tau <- total_tau + pd$nu_tau[2,] |
|
131 |
- total_pi <- total_pi + pd$pi |
|
132 |
- } |
|
131 |
+ pd <- tryCatch({em.normal.partial.concordant(sub.pdata, sub.class, |
|
132 |
+ components)}, |
|
133 |
+ error = function(unused) return(NULL)) |
|
134 |
+ |
|
135 |
+ if(is.null(pd)) { |
|
136 |
+ repeats <- repeats + 1 |
|
137 |
+ } else { |
|
138 |
+ i <- i + 1 |
|
139 |
+ total_mu <- total_mu + pd$mu_sigma[1,] |
|
140 |
+ total_sigma <- total_sigma + pd$mu_sigma[2,] |
|
141 |
+ total_nu <- total_nu + pd$nu_tau[1,] |
|
142 |
+ total_tau <- total_tau + pd$nu_tau[2,] |
|
143 |
+ total_pi <- total_pi + pd$pi |
|
144 |
+ } |
|
145 |
+ } |
|
146 |
+ |
|
147 |
+ if (repeats >= floor(iter * .1)) { |
|
148 |
+ stop("Insufficient data for subsampling. Increase number of |
|
149 |
+ features, reduce number of components used, or increase |
|
150 |
+ subSize if not at default value. Alternatively, set |
|
151 |
+ subsampling=FALSE.") |
|
152 |
+ } |
|
133 | 153 |
|
134 | 154 |
mu <- total_mu / iter |
135 | 155 |
sigma <- total_sigma / iter |
... | ... |
@@ -311,11 +331,23 @@ em.normal.partial.concordant <- function(data, class, components) { |
311 | 331 |
} |
312 | 332 |
|
313 | 333 |
# Internal function to set sub size based on data |
314 |
-.setSubSize <- function(x, y) { |
|
334 |
+.setSubSize <- function(x, y, subSize) { |
|
315 | 335 |
if (is.null(y)) { |
316 |
- sampleNames <- rownames(x) |
|
317 |
- subSize <- floor(length(sampleNames) / 2) |
|
336 |
+ if (is.null(subSize)) { |
|
337 |
+ subSize <- floor(length(rownames(x)) / 2) |
|
338 |
+ } else if (subSize > floor(length(rownames(x)) / 2)) { |
|
339 |
+ subSize <- floor(length(rownames(x)) / 2) |
|
340 |
+ warning(paste0("subSize argument too large. Using subSize ", subSize, |
|
341 |
+ " See vignette for more information.")) |
|
342 |
+ } |
|
318 | 343 |
} else { |
319 |
- subSize <- min(nrow(x), nrow(y)) |
|
344 |
+ if (is.null(subSize)) { |
|
345 |
+ subSize <- min(nrow(x), nrow(y)) |
|
346 |
+ } else if (subSize > min(nrow(x), nrow(y))) { |
|
347 |
+ subSize <- min(nrow(x), nrow(y)) |
|
348 |
+ warning(paste0("subSize argument to large. Using subSize ", subSize, |
|
349 |
+ " See vignette for more information.")) |
|
350 |
+ } |
|
320 | 351 |
} |
352 |
+ return(subSize) |
|
321 | 353 |
} |
322 | 354 |
\ No newline at end of file |
... | ... |
@@ -11,6 +11,7 @@ discordantRun( |
11 | 11 |
y = NULL, |
12 | 12 |
transform = TRUE, |
13 | 13 |
subsampling = FALSE, |
14 |
+ subSize = NULL, |
|
14 | 15 |
iter = 100, |
15 | 16 |
components = 3 |
16 | 17 |
) |
... | ... |
@@ -28,6 +29,9 @@ discordantRun( |
28 | 29 |
|
29 | 30 |
\item{subsampling}{If TRUE subsampling will be run} |
30 | 31 |
|
32 |
+\item{subSize}{Indicates how many feature pairs to be used for subsampling. |
|
33 |
+Default is the feature size in x} |
|
34 |
+ |
|
31 | 35 |
\item{iter}{Number of iterations for subsampling. Default is 100} |
32 | 36 |
|
33 | 37 |
\item{components}{Number of components in mixture model.} |