Browse code

Enable subsampling for within -omics

Adds code to independently subsample a single
dataset for within -omics analysis

Max McGrath authored on 22/09/2021 21:52:00
Showing1 changed files

... ...
@@ -131,14 +131,31 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
131 131
         for(i in 1:iter) {
132 132
             # make sure pairs are independent
133 133
             rowIndex <- sample(nrow(x), subSize)
134
-            colIndex <- sample(nrow(y), subSize)
135
-            mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE)
136
-            mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE)
137 134
             
138
-            subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], 
139
-                                                            colIndex[x]])
140
-            subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], 
141
-                                                            colIndex[x]])
135
+            if(is.null(y)) {
136
+              # colIndex <- sample(nrow(x), subSize)
137
+              sampleNames <- rownames(x)
138
+              
139
+              if (floor(length(sampleNames) / 2) < subSize) {
140
+                warning("Provided subSize too high. Using number of features divided by 2.")
141
+                subSize <- floor(length(sampleNames) / 2)
142
+              }
143
+              
144
+              nameSet1 <- sample(sampleNames, subSize)
145
+              nameSet2 <- setdiff(sampleNames, nameSet1)
146
+              nameSetA <- paste0(nameSet1, "_", nameSet2)
147
+              nameSetB <- paste0(nameSet2, "_", nameSet1)
148
+              subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA])
149
+              subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA])
150
+            } else {
151
+              colIndex <- sample(nrow(y), subSize)
152
+              mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE)
153
+              mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE)
154
+              subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], 
155
+                                                              colIndex[x]])
156
+              subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], 
157
+                                                              colIndex[x]])
158
+            }
142 159
             
143 160
             sub.pdata <- cbind(subSampV1, subSampV2)
144 161
             sub.class <- cbind(.assignClass(subSampV1, param1, components),
... ...
@@ -265,6 +282,10 @@ em.normal.partial.concordant <- function(data, class, components) {
265 282
   return(rtn)
266 283
 }
267 284
 
285
+# .createSubsamples <- function(x, y = NULL, v1, v2) {
286
+#   
287
+# }
288
+
268 289
 # Internal function to validate user inputs for discordantRun()
269 290
 #' @importFrom methods is
270 291
 .checkDiscordantInputs <- function(v1, v2, x, y, transform, 
... ...
@@ -290,6 +311,9 @@ em.normal.partial.concordant <- function(data, class, components) {
290 311
   }
291 312
 }
292 313
 
314
+# Internal function that checks whether all types of component are present
315
+#   in given vectors. If a certain component is not present, we run into a
316
+#   divide-by-zero error that crashes R
293 317
 .checkForMissingComponents <- function(zx, zy) {
294 318
   sumZx <- colSums(zx)
295 319
   sumZy <- colSums(zy)