Break output chunks into seperate function, reduce
repeated code in output chunk.
... | ... |
@@ -106,25 +106,16 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
106 | 106 |
v1 <- fishersTrans(v1) |
107 | 107 |
v2 <- fishersTrans(v2) |
108 | 108 |
} |
109 |
- |
|
110 | 109 |
x <- exprs(x) |
111 | 110 |
if (!is.null(y)) { y <- exprs(y) } |
112 |
- featureSize = dim(x)[1] |
|
113 |
- |
|
114 | 111 |
pdata <- cbind(v1, v2) |
115 | 112 |
param1 <- sd(v1) |
116 | 113 |
param2 <- sd(v2) |
117 | 114 |
class <- cbind(.assignClass(v1, param1, components), |
118 | 115 |
.assignClass(v2, param2, components)) |
119 | 116 |
|
120 |
- if (components == 3) { |
|
121 |
- discordClass <- c(2,3,4,6,7,8) |
|
122 |
- } else { |
|
123 |
- discordClass <- setdiff(1:25, c(1, 7, 13, 19, 25)) |
|
124 |
- } |
|
125 |
- |
|
126 | 117 |
if (subsampling) { |
127 |
- subSize = .setSubSize(x, y) |
|
118 |
+ subSize <- .setSubSize(x, y) |
|
128 | 119 |
total_mu <- total_sigma <- total_nu <- |
129 | 120 |
total_tau <- total_pi <- rep(0, components) |
130 | 121 |
|
... | ... |
@@ -158,41 +149,7 @@ discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, |
158 | 149 |
classVector <- pd$class |
159 | 150 |
} |
160 | 151 |
|
161 |
- discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass])/sum(x)) |
|
162 |
- |
|
163 |
- if(is.null(y)) { |
|
164 |
- discordPPMatrix <- matrix(NA,nrow = featureSize, ncol = featureSize) |
|
165 |
- classMatrix <- discordPPMatrix |
|
166 |
- diag <- lower.tri(discordPPMatrix, diag = FALSE) |
|
167 |
- discordPPMatrix[diag] <- discordPPV |
|
168 |
- classMatrix[diag] <- classVector |
|
169 |
- rownames(discordPPMatrix) <- rownames(x) |
|
170 |
- colnames(discordPPMatrix) <- rownames(x) |
|
171 |
- rownames(classMatrix) <- rownames(x) |
|
172 |
- colnames(classMatrix) <- rownames(x) |
|
173 |
- vector_names <- .getNames(x) |
|
174 |
- names(discordPPV) <- vector_names |
|
175 |
- names(classVector) <- vector_names |
|
176 |
- } else { |
|
177 |
- discordPPMatrix <- matrix(discordPPV, nrow = featureSize, |
|
178 |
- byrow = FALSE) |
|
179 |
- classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE) |
|
180 |
- rownames(discordPPMatrix) <- rownames(x) |
|
181 |
- colnames(discordPPMatrix) <- rownames(y) |
|
182 |
- rownames(classMatrix) <- rownames(x) |
|
183 |
- colnames(classMatrix) <- rownames(y) |
|
184 |
- |
|
185 |
- vector_names <- .getNames(x,y) |
|
186 |
- names(discordPPV) <- vector_names |
|
187 |
- names(classVector) <- vector_names |
|
188 |
- } |
|
189 |
- |
|
190 |
- zTable <- t(apply(zTable, 1, function(x) x/sum(x))) |
|
191 |
- rownames(zTable) <- vector_names |
|
192 |
- |
|
193 |
- return(list(discordPPMatrix = discordPPMatrix, discordPPVector = discordPPV, |
|
194 |
- classMatrix = classMatrix, classVector = classVector, |
|
195 |
- probMatrix = zTable, loglik = pd$loglik)) |
|
152 |
+ rtn <- .prepareOutput(x, y, pd, zTable, classVector, components) |
|
196 | 153 |
} |
197 | 154 |
|
198 | 155 |
em.normal.partial.concordant <- function(data, class, components) { |
... | ... |
@@ -241,6 +198,47 @@ em.normal.partial.concordant <- function(data, class, components) { |
241 | 198 |
z = array(results[[3]], dim = c(n, g*g)))) |
242 | 199 |
} |
243 | 200 |
|
201 |
+.prepareOutput <- function(x, y, pd, zTable, classVector, components) { |
|
202 |
+ |
|
203 |
+ if (components == 3) { |
|
204 |
+ discordClass <- c(2,3,4,6,7,8) |
|
205 |
+ } else { |
|
206 |
+ discordClass <- setdiff(1:25, c(1, 7, 13, 19, 25)) |
|
207 |
+ } |
|
208 |
+ featureSize = dim(x)[1] |
|
209 |
+ discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass]) / sum(x)) |
|
210 |
+ |
|
211 |
+ if(is.null(y)) { |
|
212 |
+ discordPPMatrix <- matrix(NA, nrow = featureSize, ncol = featureSize) |
|
213 |
+ classMatrix <- discordPPMatrix |
|
214 |
+ diag <- lower.tri(discordPPMatrix, diag = FALSE) |
|
215 |
+ discordPPMatrix[diag] <- discordPPV |
|
216 |
+ classMatrix[diag] <- classVector |
|
217 |
+ colnames(discordPPMatrix) <- rownames(x) |
|
218 |
+ colnames(classMatrix) <- rownames(x) |
|
219 |
+ vector_names <- .getNames(x) |
|
220 |
+ } else { |
|
221 |
+ discordPPMatrix <- matrix(discordPPV, nrow = featureSize, |
|
222 |
+ byrow = FALSE) |
|
223 |
+ classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE) |
|
224 |
+ colnames(discordPPMatrix) <- rownames(y) |
|
225 |
+ colnames(classMatrix) <- rownames(y) |
|
226 |
+ vector_names <- .getNames(x,y) |
|
227 |
+ } |
|
228 |
+ |
|
229 |
+ rownames(discordPPMatrix) <- rownames(x) |
|
230 |
+ rownames(classMatrix) <- rownames(x) |
|
231 |
+ names(discordPPV) <- vector_names |
|
232 |
+ names(classVector) <- vector_names |
|
233 |
+ |
|
234 |
+ zTable <- t(apply(zTable, 1, function(x) x / sum(x))) |
|
235 |
+ rownames(zTable) <- vector_names |
|
236 |
+ |
|
237 |
+ return(list(discordPPMatrix = discordPPMatrix, discordPPVector = discordPPV, |
|
238 |
+ classMatrix = classMatrix, classVector = classVector, |
|
239 |
+ probMatrix = zTable, loglik = pd$loglik)) |
|
240 |
+} |
|
241 |
+ |
|
244 | 242 |
|
245 | 243 |
# Internal function to assign class to vector based on number of components and |
246 | 244 |
# each elements distance from zero |
... | ... |
@@ -255,28 +253,30 @@ em.normal.partial.concordant <- function(data, class, components) { |
255 | 253 |
return(rtn) |
256 | 254 |
} |
257 | 255 |
|
256 |
+# Internal function to independently subsample data according to whether the |
|
257 |
+# analysis is within -omics or between -omics. |
|
258 | 258 |
.createSubsamples <- function(x, y = NULL, v1, v2, subSize) { |
259 |
- if(is.null(y)) { |
|
260 |
- sampleNames <- rownames(x) |
|
261 |
- nameSet1 <- sample(sampleNames, subSize) |
|
262 |
- nameSet2 <- sample(setdiff(sampleNames, nameSet1)) |
|
263 |
- nameSetA <- paste0(nameSet1, "_", nameSet2) |
|
264 |
- nameSetB <- paste0(nameSet2, "_", nameSet1) |
|
265 |
- subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA]) |
|
266 |
- subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA]) |
|
267 |
- } else { |
|
268 |
- # make sure pairs are independent |
|
269 |
- rowIndex <- sample(nrow(x), subSize) |
|
270 |
- colIndex <- sample(nrow(y), subSize) |
|
271 |
- mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
272 |
- mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
273 |
- subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
274 |
- colIndex[x]]) |
|
275 |
- subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
276 |
- colIndex[x]]) |
|
277 |
- } |
|
278 |
- return(list(v1 = subSampV1, |
|
279 |
- v2 = subSampV2)) |
|
259 |
+ if(is.null(y)) { |
|
260 |
+ sampleNames <- rownames(x) |
|
261 |
+ nameSet1 <- sample(sampleNames, subSize) |
|
262 |
+ nameSet2 <- sample(setdiff(sampleNames, nameSet1), subSize) |
|
263 |
+ nameSetA <- paste0(nameSet1, "_", nameSet2) |
|
264 |
+ nameSetB <- paste0(nameSet2, "_", nameSet1) |
|
265 |
+ subSampV1 <- ifelse(is.na(v1[nameSetA]), v1[nameSetB], v1[nameSetA]) |
|
266 |
+ subSampV2 <- ifelse(is.na(v2[nameSetA]), v2[nameSetB], v2[nameSetA]) |
|
267 |
+ } else { |
|
268 |
+ # make sure pairs are independent |
|
269 |
+ rowIndex <- sample(nrow(x), subSize) |
|
270 |
+ colIndex <- sample(nrow(y), subSize) |
|
271 |
+ mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE) |
|
272 |
+ mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE) |
|
273 |
+ subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], |
|
274 |
+ colIndex[x]]) |
|
275 |
+ subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], |
|
276 |
+ colIndex[x]]) |
|
277 |
+ } |
|
278 |
+ return(list(v1 = subSampV1, |
|
279 |
+ v2 = subSampV2)) |
|
280 | 280 |
} |
281 | 281 |
|
282 | 282 |
# Internal function to validate user inputs for discordantRun() |
... | ... |
@@ -289,11 +289,6 @@ em.normal.partial.concordant <- function(data, class, components) { |
289 | 289 |
stop("x and y (if present) must be type ExpressionSet") |
290 | 290 |
} |
291 | 291 |
|
292 |
- # Need to double check if this is true w/ Katerina |
|
293 |
- # if (is.null(y) && subsampling) { |
|
294 |
- # stop("y cannot be NULL if subsampling is TRUE") |
|
295 |
- # } |
|
296 |
- |
|
297 | 292 |
if (!(components %in% c(3, 5))) { |
298 | 293 |
stop ("Components must be equal to 3 or 5") |
299 | 294 |
} |