Parts of subsampling algorithm broken out into
seperate functions. subSize argument was removed
as it doesn't actually do anything (may add back
in later)
... | ... |
@@ -96,7 +96,7 @@ |
96 | 96 |
#' TCGA_GBM_miRNA_microarray) |
97 | 97 |
#' @export |
98 | 98 |
discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
99 |
- subsampling = FALSE, subSize = dim(x)[1], iter = 100, |
|
99 |
+ subsampling = FALSE, iter = 100, |
|
100 | 100 |
components = 3) { |
101 | 101 |
|
102 | 102 |
.checkDiscordantInputs(v1, v2, x, y, transform, subsampling, subSize, iter, |
... | ... |
@@ -124,42 +124,15 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
124 | 124 |
} |
125 | 125 |
|
126 | 126 |
if (subsampling) { |
127 |
- subSize <- min(nrow(x), nrow(y)) |
|
127 |
+ subSize = .setSubSize(x, y) |
|
128 | 128 |
total_mu <- total_sigma <- total_nu <- |
129 | 129 |
total_tau <- total_pi <- rep(0, components) |
130 | 130 |
|
131 | 131 |
for(i in 1:iter) { |
132 |
- # make sure pairs are independent |
|
133 |
- rowIndex <- sample(nrow(x), subSize) |
|
134 |
- |
|
135 |
- if(is.null(y)) { |
|
136 |
- # colIndex <- sample(nrow(x), subSize) |
|
137 |
- sampleNames <- rownames(x) |
|
138 |
- |
|
139 |
- if (floor(length(sampleNames) / 2) < subSize) { |
|
140 |
- warning("Provided subSize too high. Using number of features divided by 2.") |
|
141 |
- subSize <- floor(length(sampleNames) / 2) |
|
142 |
- } |
|
143 |
- |
|
144 |
- nameSet1 <- sample(sampleNames, subSize) |
|
145 |
- nameSet2 <- setdiff(sampleNames, nameSet1) |
|
146 |
- nameSetA <- paste0(nameSet1, "_", nameSet2) |
|
147 |
- nameSetB <- paste0(nameSet2, "_", nameSet1) |
|
148 |
- subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA]) |
|
149 |
- subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA]) |
|
150 |
- } else { |
|
151 |
- colIndex <- sample(nrow(y), subSize) |
|
152 |
- mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
153 |
- mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
154 |
- subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
155 |
- colIndex[x]]) |
|
156 |
- subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
157 |
- colIndex[x]]) |
|
158 |
- } |
|
159 |
- |
|
160 |
- sub.pdata <- cbind(subSampV1, subSampV2) |
|
161 |
- sub.class <- cbind(.assignClass(subSampV1, param1, components), |
|
162 |
- .assignClass(subSampV2, param2, components)) |
|
132 |
+ subSamples <- .createSubsamples(x, y, v1, v2, subSize) |
|
133 |
+ sub.pdata <- cbind(subSamples$v1, subSamples$v2) |
|
134 |
+ sub.class <- cbind(.assignClass(subSamples$v1, param1, components), |
|
135 |
+ .assignClass(subSamples$v2, param2, components)) |
|
163 | 136 |
|
164 | 137 |
pd <- em.normal.partial.concordant(sub.pdata, sub.class, components) |
165 | 138 |
total_mu <- total_mu + pd$mu_sigma[1,] |
... | ... |
@@ -282,9 +255,29 @@ em.normal.partial.concordant <- function(data, class, components) { |
282 | 255 |
return(rtn) |
283 | 256 |
} |
284 | 257 |
|
285 |
-# .createSubsamples <- function(x, y = NULL, v1, v2) { |
|
286 |
-# |
|
287 |
-# } |
|
258 |
+.createSubsamples <- function(x, y = NULL, v1, v2, subSize) { |
|
259 |
+ if(is.null(y)) { |
|
260 |
+ sampleNames <- rownames(x) |
|
261 |
+ nameSet1 <- sample(sampleNames, subSize) |
|
262 |
+ nameSet2 <- sample(setdiff(sampleNames, nameSet1)) |
|
263 |
+ nameSetA <- paste0(nameSet1, "_", nameSet2) |
|
264 |
+ nameSetB <- paste0(nameSet2, "_", nameSet1) |
|
265 |
+ subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA]) |
|
266 |
+ subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA]) |
|
267 |
+ } else { |
|
268 |
+ # make sure pairs are independent |
|
269 |
+ rowIndex <- sample(nrow(x), subSize) |
|
270 |
+ colIndex <- sample(nrow(y), subSize) |
|
271 |
+ mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
272 |
+ mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
273 |
+ subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
274 |
+ colIndex[x]]) |
|
275 |
+ subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
276 |
+ colIndex[x]]) |
|
277 |
+ } |
|
278 |
+ return(list(v1 = subSampV1, |
|
279 |
+ v2 = subSampV2)) |
|
280 |
+} |
|
288 | 281 |
|
289 | 282 |
# Internal function to validate user inputs for discordantRun() |
290 | 283 |
#' @importFrom methods is |
... | ... |
@@ -302,12 +295,12 @@ em.normal.partial.concordant <- function(data, class, components) { |
302 | 295 |
# } |
303 | 296 |
|
304 | 297 |
if (!(components %in% c(3, 5))) { |
305 |
- stop ("components must be equal to 3 or 5") |
|
298 |
+ stop ("Components must be equal to 3 or 5") |
|
306 | 299 |
} |
307 | 300 |
|
308 | 301 |
if (transform && (range(v1)[1] < -1 || range(v1)[2] > 1 || |
309 | 302 |
range(v2)[1] < -1 || range(v2)[2] > 1)) { |
310 |
- stop ("correlation vectors have values less than -1 and/or greater than 1.") |
|
303 |
+ stop ("Correlation vectors have values less than -1 and/or greater than 1.") |
|
311 | 304 |
} |
312 | 305 |
} |
313 | 306 |
|
... | ... |
@@ -320,6 +313,16 @@ em.normal.partial.concordant <- function(data, class, components) { |
320 | 313 |
if(any(c(sumZx, sumZy) == 0)) { |
321 | 314 |
stop("Insufficient data for component estimation. Increase number of |
322 | 315 |
features or reduce number of components used. If subsampling=TRUE, |
323 |
- consider increasing subSize as well.") |
|
316 |
+ you may need set subsampling=FALSE.") |
|
317 |
+ } |
|
318 |
+} |
|
319 |
+ |
|
320 |
+# Internal function to set sub size based on data |
|
321 |
+.setSubSize <- function(x, y) { |
|
322 |
+ if (is.null(y)) { |
|
323 |
+ sampleNames <- rownames(x) |
|
324 |
+ subSize <- floor(length(sampleNames) / 2) |
|
325 |
+ } else { |
|
326 |
+ subSize <- min(nrow(x), nrow(y)) |
|
324 | 327 |
} |
325 | 328 |
} |
326 | 329 |
\ No newline at end of file |