Browse code

Refactor discordantRun output chunk

Break output chunks into seperate function, reduce
repeated code in output chunk.

Max McGrath authored on 22/09/2021 23:34:22
Showing1 changed files

... ...
@@ -106,25 +106,16 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
106 106
         v1 <- fishersTrans(v1)
107 107
         v2 <- fishersTrans(v2)
108 108
     }
109
-  
110 109
     x <- exprs(x)
111 110
     if (!is.null(y)) { y <- exprs(y) }
112
-    featureSize = dim(x)[1]
113
-    
114 111
     pdata <- cbind(v1, v2)
115 112
     param1 <- sd(v1)
116 113
     param2 <- sd(v2)
117 114
     class <- cbind(.assignClass(v1, param1, components),
118 115
                    .assignClass(v2, param2, components))
119 116
     
120
-    if (components == 3) {
121
-        discordClass <- c(2,3,4,6,7,8)
122
-    } else {
123
-        discordClass <- setdiff(1:25, c(1, 7, 13, 19, 25))
124
-    }
125
-    
126 117
     if (subsampling) {
127
-        subSize = .setSubSize(x, y)
118
+        subSize <- .setSubSize(x, y)
128 119
         total_mu <- total_sigma <- total_nu <- 
129 120
           total_tau <- total_pi <- rep(0, components)
130 121
         
... ...
@@ -158,41 +149,7 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE,
158 149
         classVector <- pd$class
159 150
     }
160 151
     
161
-    discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass])/sum(x))
162
-    
163
-    if(is.null(y)) {
164
-        discordPPMatrix <- matrix(NA,nrow = featureSize, ncol = featureSize)
165
-        classMatrix <- discordPPMatrix
166
-        diag <- lower.tri(discordPPMatrix, diag = FALSE)
167
-        discordPPMatrix[diag] <- discordPPV
168
-        classMatrix[diag] <- classVector
169
-        rownames(discordPPMatrix) <- rownames(x)
170
-        colnames(discordPPMatrix) <- rownames(x)
171
-        rownames(classMatrix) <- rownames(x)
172
-        colnames(classMatrix) <- rownames(x)
173
-        vector_names <- .getNames(x)
174
-        names(discordPPV) <- vector_names
175
-        names(classVector) <- vector_names
176
-    } else {
177
-        discordPPMatrix <- matrix(discordPPV, nrow = featureSize, 
178
-                                  byrow = FALSE)
179
-        classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE)
180
-        rownames(discordPPMatrix) <- rownames(x)
181
-        colnames(discordPPMatrix) <- rownames(y)
182
-        rownames(classMatrix) <- rownames(x)
183
-        colnames(classMatrix) <- rownames(y)
184
-        
185
-        vector_names <- .getNames(x,y)
186
-        names(discordPPV) <- vector_names
187
-        names(classVector) <- vector_names
188
-    }
189
-    
190
-    zTable <- t(apply(zTable, 1, function(x) x/sum(x)))
191
-    rownames(zTable) <- vector_names
192
-    
193
-    return(list(discordPPMatrix = discordPPMatrix, discordPPVector = discordPPV,
194
-                classMatrix = classMatrix, classVector = classVector, 
195
-                probMatrix = zTable, loglik = pd$loglik))
152
+    rtn <- .prepareOutput(x, y, pd, zTable, classVector, components)
196 153
 }
197 154
 
198 155
 em.normal.partial.concordant <- function(data, class, components) {
... ...
@@ -241,6 +198,47 @@ em.normal.partial.concordant <- function(data, class, components) {
241 198
                 z = array(results[[3]], dim = c(n, g*g))))
242 199
 }
243 200
 
201
+.prepareOutput <- function(x, y, pd, zTable, classVector, components) {
202
+  
203
+    if (components == 3) {
204
+        discordClass <- c(2,3,4,6,7,8)
205
+    } else {
206
+        discordClass <- setdiff(1:25, c(1, 7, 13, 19, 25))
207
+    }
208
+    featureSize = dim(x)[1]
209
+    discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass]) / sum(x))
210
+    
211
+    if(is.null(y)) {
212
+        discordPPMatrix <- matrix(NA, nrow = featureSize, ncol = featureSize)
213
+        classMatrix <- discordPPMatrix
214
+        diag <- lower.tri(discordPPMatrix, diag = FALSE)
215
+        discordPPMatrix[diag] <- discordPPV
216
+        classMatrix[diag] <- classVector
217
+        colnames(discordPPMatrix) <- rownames(x)
218
+        colnames(classMatrix) <- rownames(x)
219
+        vector_names <- .getNames(x)
220
+    } else {
221
+        discordPPMatrix <- matrix(discordPPV, nrow = featureSize, 
222
+                                  byrow = FALSE)
223
+        classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE)
224
+        colnames(discordPPMatrix) <- rownames(y)
225
+        colnames(classMatrix) <- rownames(y)
226
+        vector_names <- .getNames(x,y)
227
+    }
228
+
229
+    rownames(discordPPMatrix) <- rownames(x)
230
+    rownames(classMatrix) <- rownames(x)
231
+    names(discordPPV) <- vector_names
232
+    names(classVector) <- vector_names
233
+    
234
+    zTable <- t(apply(zTable, 1, function(x) x / sum(x)))
235
+    rownames(zTable) <- vector_names
236
+    
237
+    return(list(discordPPMatrix = discordPPMatrix, discordPPVector = discordPPV,
238
+                classMatrix = classMatrix, classVector = classVector, 
239
+                probMatrix = zTable, loglik = pd$loglik))
240
+}
241
+
244 242
 
245 243
 # Internal function to assign class to vector based on number of components and 
246 244
 #   each elements distance from zero
... ...
@@ -255,28 +253,30 @@ em.normal.partial.concordant <- function(data, class, components) {
255 253
   return(rtn)
256 254
 }
257 255
 
256
+# Internal function to independently subsample data according to whether the
257
+#   analysis is within -omics or between -omics. 
258 258
 .createSubsamples <- function(x, y = NULL, v1, v2, subSize) {
259
-  if(is.null(y)) {
260
-    sampleNames <- rownames(x)
261
-    nameSet1 <- sample(sampleNames, subSize)
262
-    nameSet2 <- sample(setdiff(sampleNames, nameSet1))
263
-    nameSetA <- paste0(nameSet1, "_", nameSet2)
264
-    nameSetB <- paste0(nameSet2, "_", nameSet1)
265
-    subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA])
266
-    subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA])
267
-  } else {
268
-    # make sure pairs are independent
269
-    rowIndex <- sample(nrow(x), subSize)
270
-    colIndex <- sample(nrow(y), subSize)
271
-    mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE)
272
-    mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE)
273
-    subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], 
274
-                                                    colIndex[x]])
275
-    subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], 
276
-                                                    colIndex[x]])
277
-  }
278
-  return(list(v1 = subSampV1,
279
-              v2 = subSampV2))
259
+    if(is.null(y)) {
260
+        sampleNames <- rownames(x)
261
+        nameSet1 <- sample(sampleNames, subSize)
262
+        nameSet2 <- sample(setdiff(sampleNames, nameSet1), subSize)
263
+        nameSetA <- paste0(nameSet1, "_", nameSet2)
264
+        nameSetB <- paste0(nameSet2, "_", nameSet1)
265
+        subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA])
266
+        subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA])
267
+    } else {
268
+        # make sure pairs are independent
269
+        rowIndex <- sample(nrow(x), subSize)
270
+        colIndex <- sample(nrow(y), subSize)
271
+        mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE)
272
+        mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE)
273
+        subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], 
274
+                                                        colIndex[x]])
275
+        subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], 
276
+                                                        colIndex[x]])
277
+    }
278
+    return(list(v1 = subSampV1,
279
+                v2 = subSampV2))
280 280
 }
281 281
 
282 282
 # Internal function to validate user inputs for discordantRun()
... ...
@@ -289,11 +289,6 @@ em.normal.partial.concordant <- function(data, class, components) {
289 289
     stop("x and y (if present) must be type ExpressionSet")
290 290
   }
291 291
   
292
-  # Need to double check if this is true w/ Katerina
293
-  # if (is.null(y) && subsampling) {
294
-  #   stop("y cannot be NULL if subsampling is TRUE")
295
-  # }
296
-  
297 292
   if (!(components %in% c(3, 5))) {
298 293
     stop ("Components must be equal to 3 or 5")
299 294
   }