... | ... |
@@ -71,18 +71,21 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
71 | 71 |
|
72 | 72 |
|
73 | 73 |
x_from <- y_from <- x_to <- y_to <- Change <- muts <- |
74 |
- label <- fitness <- Type <- NULL |
|
75 |
- |
|
74 |
+ label <- birth <- Type <- NULL |
|
75 |
+ |
|
76 |
+ if ("Fitness" %in% colnames(tfm$afe)) { |
|
77 |
+ colnames(tfm$afe)[which(colnames(tfm$afe) == "Fitness")] <- "Birth" |
|
78 |
+ } |
|
76 | 79 |
|
77 | 80 |
dd <- data.frame(muts = mutated, |
78 |
- fitness = tfm$afe$Fitness, |
|
81 |
+ birth = tfm$afe$Birth, |
|
79 | 82 |
label = tfm$afe$Genotype, |
80 | 83 |
stringsAsFactors = TRUE) |
81 | 84 |
cl <- gaj[vv] |
82 | 85 |
sg <- data.frame(x_from = mutated[vv[, 1]], |
83 |
- y_from = tfm$afe$Fitness[vv[, 1]], |
|
86 |
+ y_from = tfm$afe$Birth[vv[, 1]], |
|
84 | 87 |
x_to = mutated[vv[, 2]], |
85 |
- y_to = tfm$afe$Fitness[vv[, 2]], |
|
88 |
+ y_to = tfm$afe$Birth[vv[, 2]], |
|
86 | 89 |
Change = factor(ifelse(cl == 0, "Neutral", |
87 | 90 |
ifelse(cl > 0, "Gain", "Loss")), |
88 | 91 |
levels = c("Gain", "Loss", "Neutral")), |
... | ... |
@@ -91,7 +94,7 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
91 | 94 |
number_ticks <- function(n) {function(limits) pretty(limits, n)} |
92 | 95 |
|
93 | 96 |
p <- ggplot() + |
94 |
- xlab("") + ylab("Fitness") + |
|
97 |
+ xlab("") + ylab("Birth") + |
|
95 | 98 |
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), |
96 | 99 |
panel.grid.minor.x = element_blank()) + |
97 | 100 |
geom_segment(data = sg, |
... | ... |
@@ -121,9 +124,9 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
121 | 124 |
|
122 | 125 |
if(show_labels && use_ggrepel) { |
123 | 126 |
p <- p + geom_text_repel(data = dd[-c(minF, maxF), ], |
124 |
- aes(x = muts, y = fitness, label = label)) + |
|
127 |
+ aes(x = muts, y = birth, label = label)) + |
|
125 | 128 |
geom_label_repel(data = ddMM, |
126 |
- aes(x = muts, y = fitness, label = label, fill = Type), |
|
129 |
+ aes(x = muts, y = birth, label = label, fill = Type), |
|
127 | 130 |
color = "black", |
128 | 131 |
fontface = "bold") |
129 | 132 |
## geom_label_repel(data = dd[maxF, ], aes(x = muts, y = fitness, label = label), |
... | ... |
@@ -141,10 +144,10 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
141 | 144 |
} |
142 | 145 |
|
143 | 146 |
p <- p + geom_text(data = dd[-c(minF, maxF), ], |
144 |
- aes(x = muts, y = fitness, label = label), |
|
147 |
+ aes(x = muts, y = birth, label = label), |
|
145 | 148 |
vjust = -0.2, hjust = "inward") + |
146 | 149 |
geom_label(data = ddMM, |
147 |
- aes(x = muts, y = fitness, label = label, fill = Type), |
|
150 |
+ aes(x = muts, y = birth, label = label, fill = Type), |
|
148 | 151 |
vjust = -0.2, hjust = "inward", color = "black", |
149 | 152 |
fontface = "bold") |
150 | 153 |
## geom_label(data = dd[maxF, ], aes(x = muts, y = fitness, label = label), |
... | ... |
@@ -230,6 +230,9 @@ fast_peaks <- function(x, th) { |
230 | 230 |
th)])) |
231 | 231 |
} |
232 | 232 |
|
233 |
+## FIXME! Unclear if we need the complete fitness matrix or if it will work with |
|
234 |
+## pieces (say, after removing those with fitness < WT, to try to make it faster |
|
235 |
+## and for general use in other cases) |
|
233 | 236 |
|
234 | 237 |
## wrapper to the C++ code |
235 | 238 |
wrap_accessibleGenotypes <- function(x, th) { |
... | ... |
@@ -319,65 +319,6 @@ faster_accessible_genotypes_R <- function(x, th) { |
319 | 319 |
} |
320 | 320 |
|
321 | 321 |
|
322 |
-## ## This uses slam, but that is actually slower because |
|
323 |
-## ## of the assignment |
|
324 |
-## faster_accessible_genots_slam <- function(x, th = 0) { |
|
325 |
- |
|
326 |
-## ## Given a genotype matrix, return the genotypes that are accessible |
|
327 |
-## ## via creating a directed adjacency matrix between genotypes |
|
328 |
-## ## connected (i.e., those that differ by gaining one mutation). 0 |
|
329 |
-## ## means not connected, 1 means connected. |
|
330 |
- |
|
331 |
-## ## There is a more general function in OncoSimulR that will give the |
|
332 |
-## ## fitness difference. But not doing the difference is faster than |
|
333 |
-## ## just setting a value, say 1, if all we want is to keep track of |
|
334 |
-## ## accessible ones. And by using only 0/1 we can store only an |
|
335 |
-## ## integer. And no na.omits, etc. Is too restricted? Yes. But for |
|
336 |
-## ## simulations and computing just accessible genotypes, probably a |
|
337 |
-## ## hell of a lot faster. |
|
338 |
- |
|
339 |
-## ## Well, this is not incredibly fast either. |
|
340 |
- |
|
341 |
-## ## Make sure sorted, so ancestors always before descendants |
|
342 |
-## rs0 <- rowSums(x[, -ncol(x)]) |
|
343 |
-## x <- x[order(rs0), ] |
|
344 |
-## rm(rs0) |
|
345 |
- |
|
346 |
-## y <- x[, -ncol(x)] |
|
347 |
-## f <- x[, ncol(x)] |
|
348 |
-## rs <- rowSums(y) |
|
349 |
- |
|
350 |
-## ## If 0, not accessible |
|
351 |
-## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs), |
|
352 |
-## mode = "integer") |
|
353 |
-## for(i in 1:length(rs)) { ## i is the current genotype |
|
354 |
-## candidates <- which(rs == (rs[i] + 1)) |
|
355 |
-## for(j in candidates) { |
|
356 |
-## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
357 |
-## ## if(sumdiff == 1) |
|
358 |
-## if( (sum(abs(y[j, ] - y[i, ])) == 1) && |
|
359 |
-## ( (f[j] - f[i]) >= th ) ) |
|
360 |
-## adm[i, j] <- 1L |
|
361 |
-## } |
|
362 |
-## } |
|
363 |
- |
|
364 |
-## colnames(adm) <- rownames(adm) <- 1:ncol(adm) |
|
365 |
-## admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column. |
|
366 |
-## while(TRUE) { |
|
367 |
-## ## We remove inaccessible cols (genotypes) and the corresponding |
|
368 |
-## ## rows repeatedly until nothing left to remove; any column left |
|
369 |
-## ## is therefore accessible throw at least one path. |
|
370 |
- |
|
371 |
-## ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L) |
|
372 |
-## inacc_col <- which(slam::col_sums(admtmp) == 0L) |
|
373 |
-## if(length(inacc_col) == 0) break; |
|
374 |
-## inacc_row <- inacc_col + 1 ## recall root row is left |
|
375 |
-## admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE] |
|
376 |
-## } |
|
377 |
-## return(as.numeric(c(colnames(adm)[1], colnames(admtmp)))) |
|
378 |
-## } |
|
379 |
- |
|
380 |
- |
|
381 | 322 |
generate_matrix_genotypes <- function(g) { |
382 | 323 |
## FIXME future: do this for order too? Only if rfitness for order. |
383 | 324 |
## Given a number of genes, generate all possible genotypes. |
... | ... |
@@ -470,25 +411,6 @@ genot_to_adj_mat <- function(x) { |
470 | 411 |
} |
471 | 412 |
|
472 | 413 |
|
473 |
-## ## to move above to C++ note that loop can be |
|
474 |
-## for(i in 1:length(rs)) { ## i is the current genotype |
|
475 |
-## for(j in (i:length(rs))) { |
|
476 |
-## if(rs[j] > (rs[i] + 1)) break; |
|
477 |
-## else if(rs[j] == (rs[i] + 1)) { |
|
478 |
-## ## and use here my HammingDistance function |
|
479 |
-## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
480 |
-## ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
|
481 |
-## if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]); |
|
482 |
-## } |
|
483 |
-## } |
|
484 |
-## } |
|
485 |
- |
|
486 |
-## actually, all that is already in accessibleGenotypes except for the |
|
487 |
-## filling up of adm. |
|
488 |
- |
|
489 |
- |
|
490 |
- |
|
491 |
- |
|
492 | 414 |
|
493 | 415 |
peak_valley <- function(x) { |
494 | 416 |
## FIXME: when there are no identical entries, all this |
... | ... |
@@ -16,12 +16,6 @@ |
16 | 16 |
## plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
17 | 17 |
## plot.genotype_fitness_matrix <- plotFitnessLandscape |
18 | 18 |
|
19 |
-## FIXME: show only accessible paths? |
|
20 |
-## FIXME: when show_labels = FALSE we still show the boxes |
|
21 |
-## and some of the labels.!!! |
|
22 |
-## FIXME: if using only_accessible, maybe we |
|
23 |
-## can try to use fast_peaks, and use the slower |
|
24 |
-## approach as fallback (if identical fitness) |
|
25 | 19 |
plotFitnessLandscape <- function(x, show_labels = TRUE, |
26 | 20 |
col = c("green4", "red", "yellow"), |
27 | 21 |
lty = c(1, 2, 3), use_ggrepel = FALSE, |
... | ... |
@@ -31,7 +25,12 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
31 | 25 |
...) { |
32 | 26 |
|
33 | 27 |
## FIXME future: |
34 |
- |
|
28 |
+ ## FIXME: when show_labels = FALSE we still show the boxes |
|
29 |
+ ## and some of the labels.!!! |
|
30 |
+ ## FIXME: if using only_accessible, maybe we |
|
31 |
+ ## can try to use fast_peaks, and use the slower |
|
32 |
+ ## approach as fallback (if identical fitness) |
|
33 |
+ |
|
35 | 34 |
## - Allow passing order effects. Change "allGenotypes_to_matrix" |
36 | 35 |
## below? Probably not, as we cannot put order effects as a |
37 | 36 |
## matrix. Do something else, like allow only order effects if from |
... | ... |
@@ -70,14 +69,7 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
70 | 69 |
} |
71 | 70 |
vv <- which(!is.na(gaj), arr.ind = TRUE) |
72 | 71 |
|
73 |
- ## plot(x = mutated, y = e1$Fitness, ylab = "Fitness", |
|
74 |
- ## xlab = "", type = "n", axes = FALSE) |
|
75 |
- ## box() |
|
76 |
- ## axis(2) |
|
77 |
- ## text(x = mutated, y = x$Fitness, labels = x$Genotype) |
|
78 |
- |
|
79 |
- ## The R CMD CHEKC notes about no visible binding for global variable |
|
80 |
- |
|
72 |
+ |
|
81 | 73 |
x_from <- y_from <- x_to <- y_to <- Change <- muts <- |
82 | 74 |
label <- fitness <- Type <- NULL |
83 | 75 |
|
... | ... |
@@ -184,27 +176,6 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
184 | 176 |
###################################################################### |
185 | 177 |
|
186 | 178 |
|
187 |
-## wrap_filter_inaccessible <- function(x, max_num_genotypes, accessible_th) { |
|
188 |
-## ## wrap it, for my consumption |
|
189 |
-## tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes) |
|
190 |
-## mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)]) |
|
191 |
-## gaj <- genot_to_adj_mat(tfm$gfm) |
|
192 |
-## gaj <- filter_inaccessible(gaj, accessible_th) |
|
193 |
-## remaining <- as.numeric(colnames(gaj)) |
|
194 |
-## mutated <- mutated[remaining] |
|
195 |
-## tfm$afe <- tfm$afe[remaining, , drop = FALSE] |
|
196 |
-## return(list(remaining = remaining, |
|
197 |
-## mutated = mutated, |
|
198 |
-## tfm = tfm)) |
|
199 |
-## } |
|
200 |
- |
|
201 |
-## No longer being used. Used to be in rfitness |
|
202 |
-## count_accessible_g <- function(gfm, accessible_th) { |
|
203 |
-## gaj <- genot_to_adj_mat(gfm) |
|
204 |
-## gaj <- filter_inaccessible(gaj, accessible_th) |
|
205 |
-## return(ncol(gaj) - 1) |
|
206 |
-## } |
|
207 |
- |
|
208 | 179 |
|
209 | 180 |
## There is now C++ code to get just the locations/positions of the |
210 | 181 |
## accessible genotypes |
... | ... |
@@ -17,7 +17,8 @@ |
17 | 17 |
## plot.genotype_fitness_matrix <- plotFitnessLandscape |
18 | 18 |
|
19 | 19 |
## FIXME: show only accessible paths? |
20 |
- |
|
20 |
+## FIXME: when show_labels = FALSE we still show the boxes |
|
21 |
+## and some of the labels.!!! |
|
21 | 22 |
## FIXME: if using only_accessible, maybe we |
22 | 23 |
## can try to use fast_peaks, and use the slower |
23 | 24 |
## approach as fallback (if identical fitness) |
... | ... |
@@ -83,7 +83,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
83 | 83 |
|
84 | 84 |
dd <- data.frame(muts = mutated, |
85 | 85 |
fitness = tfm$afe$Fitness, |
86 |
- label = tfm$afe$Genotype) |
|
86 |
+ label = tfm$afe$Genotype, |
|
87 |
+ stringsAsFactors = TRUE) |
|
87 | 88 |
cl <- gaj[vv] |
88 | 89 |
sg <- data.frame(x_from = mutated[vv[, 1]], |
89 | 90 |
y_from = tfm$afe$Fitness[vv[, 1]], |
... | ... |
@@ -91,7 +92,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
91 | 92 |
y_to = tfm$afe$Fitness[vv[, 2]], |
92 | 93 |
Change = factor(ifelse(cl == 0, "Neutral", |
93 | 94 |
ifelse(cl > 0, "Gain", "Loss")), |
94 |
- levels = c("Gain", "Loss", "Neutral"))) |
|
95 |
+ levels = c("Gain", "Loss", "Neutral")), |
|
96 |
+ stringsAsFactors = TRUE) |
|
95 | 97 |
## From http://stackoverflow.com/a/17257422 |
96 | 98 |
number_ticks <- function(n) {function(limits) pretty(limits, n)} |
97 | 99 |
|
... | ... |
@@ -601,6 +603,7 @@ peak_valley <- function(x) { |
601 | 603 |
## ## mutated <- rowSums(gfm[, -ncol(gfm)]) |
602 | 604 |
## gaj <- OncoSimulR:::genot_to_adj_mat(gfm) |
603 | 605 |
## gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0) |
606 |
+## Eh! that is assuming no genotype is inaccessible! |
|
604 | 607 |
## stopifnot(all(na.omit(as.vector(gaj == gaj2)))) |
605 | 608 |
## remaining <- as.numeric(colnames(gaj2)) |
606 | 609 |
## ## mutated <- mutated[remaining] |
... | ... |
@@ -18,6 +18,9 @@ |
18 | 18 |
|
19 | 19 |
## FIXME: show only accessible paths? |
20 | 20 |
|
21 |
+## FIXME: if using only_accessible, maybe we |
|
22 |
+## can try to use fast_peaks, and use the slower |
|
23 |
+## approach as fallback (if identical fitness) |
|
21 | 24 |
plotFitnessLandscape <- function(x, show_labels = TRUE, |
22 | 25 |
col = c("green4", "red", "yellow"), |
23 | 26 |
lty = c(1, 2, 3), use_ggrepel = FALSE, |
... | ... |
@@ -224,13 +227,45 @@ filter_inaccessible <- function(x, accessible_th) { |
224 | 227 |
} |
225 | 228 |
|
226 | 229 |
|
230 |
+## wrapper to the C++ code |
|
231 |
+fast_peaks <- function(x, th) { |
|
232 |
+ ## x is the fitness matrix, not adjacency matrix |
|
233 |
+ |
|
234 |
+ ## Only works if no connected genotypes that form maxima. I.e., no |
|
235 |
+ ## identical fitness. Do a sufficient check for it (too inclusive, though) |
|
236 |
+ ## And only under no back mutation |
|
237 |
+ |
|
238 |
+ original_pos <- 1:nrow(x) |
|
239 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
240 |
+ o_numMut <- order(numMut) |
|
241 |
+ x <- x[o_numMut, ] |
|
242 |
+ numMut <- numMut[o_numMut] |
|
243 |
+ f <- x[, ncol(x)] |
|
244 |
+ ## Two assumptions |
|
245 |
+ stopifnot(numMut[1] == 0) |
|
246 |
+ ## make sure no repeated in those that could be maxima |
|
247 |
+ if(any(duplicated(f[f >= f[1]]))) |
|
248 |
+ stop("There could be several connected maxima genotypes.", |
|
249 |
+ " This function is not safe to use.") |
|
250 |
+ |
|
251 |
+ y <- x[, -ncol(x)] |
|
252 |
+ storage.mode(y) <- "integer" |
|
253 |
+ original_pos <- original_pos[o_numMut] |
|
254 |
+ return(sort(original_pos[peaksLandscape(y, f, |
|
255 |
+ as.integer(numMut), |
|
256 |
+ th)])) |
|
257 |
+} |
|
258 |
+ |
|
259 |
+ |
|
227 | 260 |
## wrapper to the C++ code |
228 | 261 |
wrap_accessibleGenotypes <- function(x, th) { |
229 | 262 |
## x is the fitness matrix, not adjacency matrix |
263 |
+ original_pos <- 1:nrow(x) |
|
230 | 264 |
numMut <- rowSums(x[, -ncol(x)]) |
231 | 265 |
o_numMut <- order(numMut) |
232 | 266 |
x <- x[o_numMut, ] |
233 | 267 |
numMut <- numMut[o_numMut] |
268 |
+ original_pos <- original_pos[o_numMut] |
|
234 | 269 |
|
235 | 270 |
y <- x[, -ncol(x)] |
236 | 271 |
storage.mode(y) <- "integer" |
... | ... |
@@ -238,7 +273,29 @@ wrap_accessibleGenotypes <- function(x, th) { |
238 | 273 |
acc <- accessibleGenotypes(y, x[, ncol(x)], |
239 | 274 |
as.integer(numMut), |
240 | 275 |
th) |
241 |
- return(acc[acc > 0]) |
|
276 |
+ ## return(acc[acc > 0]) |
|
277 |
+ return(sort(original_pos[acc[acc > 0]])) |
|
278 |
+} |
|
279 |
+ |
|
280 |
+ |
|
281 |
+## wrapper to the C++ code; the former one, only for testing. Remove |
|
282 |
+## eventually FIXME |
|
283 |
+wrap_accessibleGenotypes_former <- function(x, th) { |
|
284 |
+ ## x is the fitness matrix, not adjacency matrix |
|
285 |
+ original_pos <- 1:nrow(x) |
|
286 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
287 |
+ o_numMut <- order(numMut) |
|
288 |
+ x <- x[o_numMut, ] |
|
289 |
+ numMut <- numMut[o_numMut] |
|
290 |
+ original_pos <- original_pos[o_numMut] |
|
291 |
+ |
|
292 |
+ y <- x[, -ncol(x)] |
|
293 |
+ storage.mode(y) <- "integer" |
|
294 |
+ |
|
295 |
+ acc <- accessibleGenotypes_former(y, x[, ncol(x)], |
|
296 |
+ as.integer(numMut), |
|
297 |
+ th) |
|
298 |
+ return(sort(original_pos[acc[acc > 0]])) |
|
242 | 299 |
} |
243 | 300 |
|
244 | 301 |
## A transitional function |
... | ... |
@@ -378,8 +435,10 @@ generate_matrix_genotypes <- function(g) { |
378 | 435 |
} |
379 | 436 |
|
380 | 437 |
|
381 |
- |
|
382 |
-genot_to_adj_mat <- function(x) { |
|
438 |
+## The R version. See also the C++ one |
|
439 |
+genot_to_adj_mat_R <- function(x) { |
|
440 |
+ ## x is the fitness matrix |
|
441 |
+ |
|
383 | 442 |
## FIXME this can take about 23% of the time of the ggplot call. |
384 | 443 |
## But them, we are quickly constructing a 2000*2000 matrix |
385 | 444 |
## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a |
... | ... |
@@ -390,13 +449,16 @@ genot_to_adj_mat <- function(x) { |
390 | 449 |
## FIXME: code is now in place to do all of this in C++ |
391 | 450 |
|
392 | 451 |
## Make sure sorted, so ancestors always before descendants |
393 |
- rs0 <- rowSums(x[, -ncol(x)]) |
|
394 |
- x <- x[order(rs0), ] |
|
395 |
- rm(rs0) |
|
452 |
+ original_pos <- 1:nrow(x) |
|
453 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
454 |
+ o_numMut <- order(numMut) |
|
455 |
+ x <- x[o_numMut, ] |
|
456 |
+ original_pos <- original_pos[o_numMut] |
|
457 |
+ rm(numMut) |
|
396 | 458 |
|
397 | 459 |
y <- x[, -ncol(x)] |
398 | 460 |
f <- x[, ncol(x)] |
399 |
- rs <- rowSums(y) |
|
461 |
+ rs <- rowSums(y) ## redo for paranoia; could have ordered numMut |
|
400 | 462 |
|
401 | 463 |
## Move this to C++? |
402 | 464 |
adm <- matrix(NA, nrow = length(rs), ncol = length(rs)) |
... | ... |
@@ -408,9 +470,51 @@ genot_to_adj_mat <- function(x) { |
408 | 470 |
if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
409 | 471 |
} |
410 | 472 |
} |
473 |
+ colnames(adm) <- rownames(adm) <- original_pos |
|
411 | 474 |
return(adm) |
412 | 475 |
} |
413 | 476 |
|
477 |
+genot_to_adj_mat <- function(x) { |
|
478 |
+ ## x is the fitness matrix |
|
479 |
+ |
|
480 |
+ ## adding column and row names should rarely be necessary |
|
481 |
+ ## as these are internal functions, etc. But just in case |
|
482 |
+ original_pos <- 1:nrow(x) |
|
483 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
484 |
+ o_numMut <- order(numMut) |
|
485 |
+ x <- x[o_numMut, ] |
|
486 |
+ numMut <- numMut[o_numMut] |
|
487 |
+ original_pos <- original_pos[o_numMut] |
|
488 |
+ |
|
489 |
+ y <- x[, -ncol(x)] |
|
490 |
+ storage.mode(y) <- "integer" |
|
491 |
+ |
|
492 |
+ adm <- genot2AdjMat(y, x[, ncol(x)], |
|
493 |
+ as.integer(numMut)) |
|
494 |
+ colnames(adm) <- rownames(adm) <- original_pos |
|
495 |
+ return(adm) |
|
496 |
+} |
|
497 |
+ |
|
498 |
+ |
|
499 |
+## ## to move above to C++ note that loop can be |
|
500 |
+## for(i in 1:length(rs)) { ## i is the current genotype |
|
501 |
+## for(j in (i:length(rs))) { |
|
502 |
+## if(rs[j] > (rs[i] + 1)) break; |
|
503 |
+## else if(rs[j] == (rs[i] + 1)) { |
|
504 |
+## ## and use here my HammingDistance function |
|
505 |
+## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
506 |
+## ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
|
507 |
+## if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]); |
|
508 |
+## } |
|
509 |
+## } |
|
510 |
+## } |
|
511 |
+ |
|
512 |
+## actually, all that is already in accessibleGenotypes except for the |
|
513 |
+## filling up of adm. |
|
514 |
+ |
|
515 |
+ |
|
516 |
+ |
|
517 |
+ |
|
414 | 518 |
|
415 | 519 |
peak_valley <- function(x) { |
416 | 520 |
## FIXME: when there are no identical entries, all this |
... | ... |
@@ -466,7 +570,13 @@ peak_valley <- function(x) { |
466 | 570 |
} |
467 | 571 |
bad_fwd <- vector("integer", nrow(x)) |
468 | 572 |
for(i in 1:nrow(x)) { |
573 |
+ ## Eh, why any? All. |
|
574 |
+ ## Nope, any: we want peaks in general, not just |
|
575 |
+ ## under assumption of "no back mutation" |
|
576 |
+ ## We get a different result when we restrict to accessible |
|
577 |
+ ## because all < 0 in adjacency are turned to NAs. |
|
469 | 578 |
if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) { |
579 |
+ ## if( all(x[, i] < 0, na.rm = TRUE) ) { |
|
470 | 580 |
## this node is bad. Any descendant with fitness >= is bad |
471 | 581 |
bad_fwd[i] <- 1 |
472 | 582 |
reach_f <- which(x[i, ] <= 0) |
... | ... |
@@ -478,3 +588,36 @@ peak_valley <- function(x) { |
478 | 588 |
peak <- setdiff(candidate, bad) |
479 | 589 |
return(list(peak = peak, valley = valley)) |
480 | 590 |
} |
591 |
+ |
|
592 |
+ |
|
593 |
+ |
|
594 |
+## For the future |
|
595 |
+## ## data.frame (two columns: genotype with "," and Fitness) -> fitness graph (DAG) |
|
596 |
+## ## Return an adj matrix of the fitness graph from a fitness |
|
597 |
+## ## landscape |
|
598 |
+## ## Based on code in plotFitnessLandscape |
|
599 |
+## flandscape_to_fgraph <- function(afe) { |
|
600 |
+## gfm <- OncoSimulR:::allGenotypes_to_matrix(afe) |
|
601 |
+## ## mutated <- rowSums(gfm[, -ncol(gfm)]) |
|
602 |
+## gaj <- OncoSimulR:::genot_to_adj_mat(gfm) |
|
603 |
+## gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0) |
|
604 |
+## stopifnot(all(na.omit(as.vector(gaj == gaj2)))) |
|
605 |
+## remaining <- as.numeric(colnames(gaj2)) |
|
606 |
+## ## mutated <- mutated[remaining] |
|
607 |
+## afe <- afe[remaining, , drop = FALSE] |
|
608 |
+## ## vv <- which(!is.na(gaj2), arr.ind = TRUE) |
|
609 |
+ |
|
610 |
+## gaj2 <- gaj2 |
|
611 |
+## gaj2[is.na(gaj2)] <- 0 |
|
612 |
+## gaj2[gaj2 > 0] <- 1 |
|
613 |
+## colnames(gaj2) <- rownames(gaj2) <- afe[, "Genotype"] |
|
614 |
+## return(gaj2) |
|
615 |
+## } |
|
616 |
+## ## This could be done easily in C++, taking care of row/colnames at end, |
|
617 |
+## ## without moving around the full adjacency matrix. |
|
618 |
+## ## Skeleton for C++ |
|
619 |
+## ## a call to accessibleGenotypesPeaksLandscape |
|
620 |
+## ## (with another argument or changing the returnpeaks by a three value thing) |
|
621 |
+## ## after done with first loop, |
|
622 |
+## ## return the matrix adm[accessible > 0, accessible >0] |
|
623 |
+## ## only need care with row/colnames |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@125769 bc3139a8-67e5-0310-9ffc-ced21a209358
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -192,16 +192,23 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
192 | 192 |
## tfm = tfm)) |
193 | 193 |
## } |
194 | 194 |
|
195 |
-count_accessible_g <- function(gfm, accessible_th) { |
|
196 |
- gaj <- genot_to_adj_mat(gfm) |
|
197 |
- gaj <- filter_inaccessible(gaj, accessible_th) |
|
198 |
- return(ncol(gaj) - 1) |
|
199 |
-} |
|
195 |
+## No longer being used. Used to be in rfitness |
|
196 |
+## count_accessible_g <- function(gfm, accessible_th) { |
|
197 |
+## gaj <- genot_to_adj_mat(gfm) |
|
198 |
+## gaj <- filter_inaccessible(gaj, accessible_th) |
|
199 |
+## return(ncol(gaj) - 1) |
|
200 |
+## } |
|
200 | 201 |
|
202 |
+ |
|
203 |
+## There is now C++ code to get just the locations/positions of the |
|
204 |
+## accessible genotypes |
|
201 | 205 |
filter_inaccessible <- function(x, accessible_th) { |
202 | 206 |
## Return an adjacency matrix with only accessible paths. x is the gaj |
203 | 207 |
## matrix created in the plots. The difference between genotypes |
204 | 208 |
## separated by a hamming distance of 1 |
209 |
+ |
|
210 |
+ ## FIXME: could do the x[, -1] before loop and not add the 1 |
|
211 |
+ ## inside while, and do that at end |
|
205 | 212 |
colnames(x) <- rownames(x) <- 1:ncol(x) |
206 | 213 |
while(TRUE) { |
207 | 214 |
## remove first column |
... | ... |
@@ -216,6 +223,132 @@ filter_inaccessible <- function(x, accessible_th) { |
216 | 223 |
return(x) |
217 | 224 |
} |
218 | 225 |
|
226 |
+f1 <- function() {} |
|
227 |
+ |
|
228 |
+x <- 99 |
|
229 |
+ |
|
230 |
+## wrapper to the C++ code |
|
231 |
+wrap_accessibleGenotypes <- function(x, th) { |
|
232 |
+ ## x is the fitness matrix, not adjacency matrix |
|
233 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
234 |
+ o_numMut <- order(numMut) |
|
235 |
+ x <- x[o_numMut, ] |
|
236 |
+ numMut <- numMut[o_numMut] |
|
237 |
+ |
|
238 |
+ y <- x[, -ncol(x)] |
|
239 |
+ storage.mode(y) <- "integer" |
|
240 |
+ |
|
241 |
+ acc <- accessibleGenotypes(y, x[, ncol(x)], |
|
242 |
+ as.integer(numMut), |
|
243 |
+ th) |
|
244 |
+ return(acc[acc > 0]) |
|
245 |
+} |
|
246 |
+ |
|
247 |
+## A transitional function |
|
248 |
+faster_accessible_genotypes_R <- function(x, th) { |
|
249 |
+ rs0 <- rowSums(x[, -ncol(x)]) |
|
250 |
+ x <- x[order(rs0), ] |
|
251 |
+ rm(rs0) |
|
252 |
+ |
|
253 |
+ y <- x[, -ncol(x)] |
|
254 |
+ f <- x[, ncol(x)] |
|
255 |
+ rs <- rowSums(y) |
|
256 |
+ |
|
257 |
+ ## If 0, not accessible |
|
258 |
+ ## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs), |
|
259 |
+ ## mode = "integer") |
|
260 |
+ |
|
261 |
+ adm <- matrix(0, nrow = length(rs), ncol = length(rs)) |
|
262 |
+ storage.mode(adm) <- "integer" |
|
263 |
+ |
|
264 |
+ ## Most time is gone here |
|
265 |
+ for(i in 1:length(rs)) { ## i is the current genotype |
|
266 |
+ candidates <- which(rs == (rs[i] + 1)) |
|
267 |
+ for(j in candidates) { |
|
268 |
+ if( (sum(abs(y[j, ] - y[i, ])) == 1) && |
|
269 |
+ ( (f[j] - f[i]) >= th ) ) { |
|
270 |
+ ## actually, this is the largest time sink using slam |
|
271 |
+ adm[i, j] <- 1L |
|
272 |
+ } |
|
273 |
+ } |
|
274 |
+ } |
|
275 |
+ |
|
276 |
+ colnames(adm) <- rownames(adm) <- 1:ncol(adm) |
|
277 |
+ admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column. |
|
278 |
+ while(TRUE) { |
|
279 |
+ ## We remove inaccessible cols (genotypes) and the corresponding |
|
280 |
+ ## rows repeatedly until nothing left to remove; any column left |
|
281 |
+ ## is therefore accessible throw at least one path. |
|
282 |
+ |
|
283 |
+ ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L) |
|
284 |
+ inacc_col <- which(colSums(admtmp) == 0L) |
|
285 |
+ if(length(inacc_col) == 0) break; |
|
286 |
+ inacc_row <- inacc_col + 1 ## recall root row is left |
|
287 |
+ admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE] |
|
288 |
+ } |
|
289 |
+ return(as.numeric(c(colnames(adm)[1], colnames(admtmp)))) |
|
290 |
+ |
|
291 |
+} |
|
292 |
+ |
|
293 |
+ |
|
294 |
+## ## This uses slam, but that is actually slower because |
|
295 |
+## ## of the assignment |
|
296 |
+## faster_accessible_genots_slam <- function(x, th = 0) { |
|
297 |
+ |
|
298 |
+## ## Given a genotype matrix, return the genotypes that are accessible |
|
299 |
+## ## via creating a directed adjacency matrix between genotypes |
|
300 |
+## ## connected (i.e., those that differ by gaining one mutation). 0 |
|
301 |
+## ## means not connected, 1 means connected. |
|
302 |
+ |
|
303 |
+## ## There is a more general function in OncoSimulR that will give the |
|
304 |
+## ## fitness difference. But not doing the difference is faster than |
|
305 |
+## ## just setting a value, say 1, if all we want is to keep track of |
|
306 |
+## ## accessible ones. And by using only 0/1 we can store only an |
|
307 |
+## ## integer. And no na.omits, etc. Is too restricted? Yes. But for |
|
308 |
+## ## simulations and computing just accessible genotypes, probably a |
|
309 |
+## ## hell of a lot faster. |
|
310 |
+ |
|
311 |
+## ## Well, this is not incredibly fast either. |
|
312 |
+ |
|
313 |
+## ## Make sure sorted, so ancestors always before descendants |
|
314 |
+## rs0 <- rowSums(x[, -ncol(x)]) |
|
315 |
+## x <- x[order(rs0), ] |
|
316 |
+## rm(rs0) |
|
317 |
+ |
|
318 |
+## y <- x[, -ncol(x)] |
|
319 |
+## f <- x[, ncol(x)] |
|
320 |
+## rs <- rowSums(y) |
|
321 |
+ |
|
322 |
+## ## If 0, not accessible |
|
323 |
+## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs), |
|
324 |
+## mode = "integer") |
|
325 |
+## for(i in 1:length(rs)) { ## i is the current genotype |
|
326 |
+## candidates <- which(rs == (rs[i] + 1)) |
|
327 |
+## for(j in candidates) { |
|
328 |
+## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
329 |
+## ## if(sumdiff == 1) |
|
330 |
+## if( (sum(abs(y[j, ] - y[i, ])) == 1) && |
|
331 |
+## ( (f[j] - f[i]) >= th ) ) |
|
332 |
+## adm[i, j] <- 1L |
|
333 |
+## } |
|
334 |
+## } |
|
335 |
+ |
|
336 |
+## colnames(adm) <- rownames(adm) <- 1:ncol(adm) |
|
337 |
+## admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column. |
|
338 |
+## while(TRUE) { |
|
339 |
+## ## We remove inaccessible cols (genotypes) and the corresponding |
|
340 |
+## ## rows repeatedly until nothing left to remove; any column left |
|
341 |
+## ## is therefore accessible throw at least one path. |
|
342 |
+ |
|
343 |
+## ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L) |
|
344 |
+## inacc_col <- which(slam::col_sums(admtmp) == 0L) |
|
345 |
+## if(length(inacc_col) == 0) break; |
|
346 |
+## inacc_row <- inacc_col + 1 ## recall root row is left |
|
347 |
+## admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE] |
|
348 |
+## } |
|
349 |
+## return(as.numeric(c(colnames(adm)[1], colnames(admtmp)))) |
|
350 |
+## } |
|
351 |
+ |
|
219 | 352 |
|
220 | 353 |
generate_matrix_genotypes <- function(g) { |
221 | 354 |
## FIXME future: do this for order too? Only if rfitness for order. |
... | ... |
@@ -257,6 +390,8 @@ genot_to_adj_mat <- function(x) { |
257 | 390 |
## that differ by gaining one mutation) with value being the |
258 | 391 |
## difference in fitness between destination and origin |
259 | 392 |
|
393 |
+ ## FIXME: code is now in place to do all of this in C++ |
|
394 |
+ |
|
260 | 395 |
## Make sure sorted, so ancestors always before descendants |
261 | 396 |
rs0 <- rowSums(x[, -ncol(x)]) |
262 | 397 |
x <- x[order(rs0), ] |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@119231 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -22,6 +22,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
22 | 22 |
col = c("green4", "red", "yellow"), |
23 | 23 |
lty = c(1, 2, 3), use_ggrepel = FALSE, |
24 | 24 |
log = FALSE, max_num_genotypes = 2000, |
25 |
+ only_accessible = FALSE, |
|
26 |
+ accessible_th = 0, |
|
25 | 27 |
...) { |
26 | 28 |
|
27 | 29 |
## FIXME future: |
... | ... |
@@ -37,7 +39,6 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
37 | 39 |
## adjacency matrix of genotype i go at row i and column i. Follow back |
38 | 40 |
## all entries in row i, and their previous, and forward all column i. |
39 | 41 |
|
40 |
- |
|
41 | 42 |
## gfm: genotype fitness matrix |
42 | 43 |
## afe: all fitness effects |
43 | 44 |
|
... | ... |
@@ -52,11 +53,17 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
52 | 53 |
## get the string representation, etc. And this is for use |
53 | 54 |
## with OncoSimul. |
54 | 55 |
|
56 |
+ |
|
55 | 57 |
tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes) |
56 | 58 |
|
57 |
- |
|
58 | 59 |
mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)]) |
59 | 60 |
gaj <- genot_to_adj_mat(tfm$gfm) |
61 |
+ if(only_accessible) { |
|
62 |
+ gaj <- filter_inaccessible(gaj, accessible_th) |
|
63 |
+ remaining <- as.numeric(colnames(gaj)) |
|
64 |
+ mutated <- mutated[remaining] |
|
65 |
+ tfm$afe <- tfm$afe[remaining, , drop = FALSE] |
|
66 |
+ } |
|
60 | 67 |
vv <- which(!is.na(gaj), arr.ind = TRUE) |
61 | 68 |
|
62 | 69 |
## plot(x = mutated, y = e1$Fitness, ylab = "Fitness", |
... | ... |
@@ -171,6 +178,44 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
171 | 178 |
###################################################################### |
172 | 179 |
|
173 | 180 |
|
181 |
+## wrap_filter_inaccessible <- function(x, max_num_genotypes, accessible_th) { |
|
182 |
+## ## wrap it, for my consumption |
|
183 |
+## tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes) |
|
184 |
+## mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)]) |
|
185 |
+## gaj <- genot_to_adj_mat(tfm$gfm) |
|
186 |
+## gaj <- filter_inaccessible(gaj, accessible_th) |
|
187 |
+## remaining <- as.numeric(colnames(gaj)) |
|
188 |
+## mutated <- mutated[remaining] |
|
189 |
+## tfm$afe <- tfm$afe[remaining, , drop = FALSE] |
|
190 |
+## return(list(remaining = remaining, |
|
191 |
+## mutated = mutated, |
|
192 |
+## tfm = tfm)) |
|
193 |
+## } |
|
194 |
+ |
|
195 |
+count_accessible_g <- function(gfm, accessible_th) { |
|
196 |
+ gaj <- genot_to_adj_mat(gfm) |
|
197 |
+ gaj <- filter_inaccessible(gaj, accessible_th) |
|
198 |
+ return(ncol(gaj) - 1) |
|
199 |
+} |
|
200 |
+ |
|
201 |
+filter_inaccessible <- function(x, accessible_th) { |
|
202 |
+ ## Return an adjacency matrix with only accessible paths. x is the gaj |
|
203 |
+ ## matrix created in the plots. The difference between genotypes |
|
204 |
+ ## separated by a hamming distance of 1 |
|
205 |
+ colnames(x) <- rownames(x) <- 1:ncol(x) |
|
206 |
+ while(TRUE) { |
|
207 |
+ ## remove first column |
|
208 |
+ ## We use fact that all(na.omit(x) < u) is true if all are NA |
|
209 |
+ ## so inaccessible rows are removed and thus destination columns |
|
210 |
+ wrm <- which(apply(x[, -1, drop = FALSE], 2, |
|
211 |
+ function(y) {all(na.omit(y) < accessible_th)})) + 1 |
|
212 |
+ if(length(wrm) == 0) break; |
|
213 |
+ x <- x[-wrm, -wrm, drop = FALSE] |
|
214 |
+ } |
|
215 |
+ x[x < 0] <- NA |
|
216 |
+ return(x) |
|
217 |
+} |
|
218 |
+ |
|
174 | 219 |
|
175 | 220 |
generate_matrix_genotypes <- function(g) { |
176 | 221 |
## FIXME future: do this for order too? Only if rfitness for order. |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@119150 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -25,8 +25,6 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
25 | 25 |
...) { |
26 | 26 |
|
27 | 27 |
## FIXME future: |
28 |
- |
|
29 |
- |
|
30 | 28 |
|
31 | 29 |
## - Allow passing order effects. Change "allGenotypes_to_matrix" |
32 | 30 |
## below? Probably not, as we cannot put order effects as a |
... | ... |
@@ -54,107 +52,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
54 | 52 |
## get the string representation, etc. And this is for use |
55 | 53 |
## with OncoSimul. |
56 | 54 |
|
57 |
- |
|
58 | 55 |
tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes) |
59 | 56 |
|
60 |
- ## if( (inherits(x, "genotype_fitness_matrix")) || |
|
61 |
- ## ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) { |
|
62 |
- ## ## Why this? We go back and forth twice. We need both things. We |
|
63 |
- ## ## could construct the afe below by appropriately pasting the |
|
64 |
- ## ## columns names |
|
65 |
- ## afe <- evalAllGenotypes(allFitnessEffects( |
|
66 |
- ## epistasis = from_genotype_fitness(x)), |
|
67 |
- ## order = FALSE, addwt = TRUE, max = max_num_genotypes) |
|
68 |
- ## ## Might not be needed with the proper gfm object (so gmf <- x) |
|
69 |
- ## ## but is needed if arbitrary matrices. |
|
70 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
71 |
- ## } else if(inherits(x, "fitnessEffects")) { |
|
72 |
- ## if(!is.null(x$orderEffects) ) |
|
73 |
- ## stop("We cannot yet deal with order effects") |
|
74 |
- ## afe <- evalAllGenotypes(x, |
|
75 |
- ## order = FALSE, |
|
76 |
- ## addwt = TRUE, max = max_num_genotypes) |
|
77 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
78 |
- ## } else if( (inherits(x, "evalAllGenotypes")) || |
|
79 |
- ## (inherits(x, "evalAllGenotypesMut"))) { |
|
80 |
- ## if(any(grepl(">", x[, 1], fixed = TRUE))) |
|
81 |
- ## stop("We cannot deal with order effects yet.") |
|
82 |
- ## x <- x[, c(1, 2)] |
|
83 |
- ## if(x[1, "Genotype"] != "WT") { |
|
84 |
- ## ## Yes, because we expect this present below |
|
85 |
- ## x <- rbind(data.frame(Genotype = "WT", |
|
86 |
- ## Fitness = 1, |
|
87 |
- ## stringsAsFactors = FALSE), |
|
88 |
- ## x) |
|
89 |
- ## } |
|
90 |
- ## afe <- x |
|
91 |
- ## ## in case we pass an evalAllgenotypesfitandmut |
|
92 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
93 |
- ## } else if(is.data.frame(x)) { |
|
94 |
- ## ## Assume a two-column data frame of genotypes as character |
|
95 |
- ## ## vectors and fitness |
|
96 |
- ## if(colnames(x)[2] != "Fitness") |
|
97 |
- ## stop("We cannot guess what you are passing here") |
|
98 |
- ## afe <- evalAllGenotypes(allFitnessEffects(genotFitness = x), |
|
99 |
- ## order = FALSE, addwt = TRUE, |
|
100 |
- ## max = max_num_genotypes) |
|
101 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
102 |
- ## } else { |
|
103 |
- ## stop("We cannot guess what you are passing here") |
|
104 |
- ## } |
|
105 |
- |
|
106 |
- |
|
107 |
- |
|
108 |
- ## if(inherits(x, "genotype_fitness_matrix")) { |
|
109 |
- ## ## Why this? We go back and forth twice. We need both things. We |
|
110 |
- ## ## could construct the afe below by appropriately pasting the |
|
111 |
- ## ## columns names |
|
112 |
- ## afe <- evalAllGenotypes(allFitnessEffects( |
|
113 |
- ## epistasis = from_genotype_fitness(x)), |
|
114 |
- ## order = FALSE, addwt = TRUE, max = 2000) |
|
115 |
- ## gfm <- allGenotypes_to_matrix(afe) ## should not be needed? gfm <- x |
|
116 |
- ## } else if(inherits(x, "fitnessEffects")) { |
|
117 |
- ## if(!is.null(x$orderEffects) ) |
|
118 |
- ## stop("We cannot yet deal with order effects") |
|
119 |
- ## afe <- evalAllGenotypes(x, |
|
120 |
- ## order = FALSE, |
|
121 |
- ## addwt = TRUE, max = 2000) |
|
122 |
- ## gfm <- allGenotypes_to_matrix(x) |
|
123 |
- ## } else { |
|
124 |
- ## lc <- ncol(x) |
|
125 |
- ## ## detect an appropriately formatted matrix |
|
126 |
- ## if( (is.data.frame(x) || is.matrix(x)) ) { |
|
127 |
- ## if(ncol(x) > 2) { |
|
128 |
- ## ## We cannot be sure all genotypes are present |
|
129 |
- ## ## but that is not our business? |
|
130 |
- ## ## colnames(x)[lc] <- "Fitness" |
|
131 |
- ## ## So this must be a matrix |
|
132 |
- ## afe <- evalAllGenotypes(allFitnessEffects( |
|
133 |
- ## epistasis = from_genotype_fitness(x)), |
|
134 |
- ## order = FALSE, addwt = TRUE, max = 2000) |
|
135 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
136 |
- ## } else { |
|
137 |
- ## ## This must be a data frame |
|
138 |
- ## if(!is.data.frame(x)) |
|
139 |
- ## stop("How are you passing a matrix here?") |
|
140 |
- ## if(colnames(x)[lc] != "Fitness") |
|
141 |
- ## stop("We cannot guess what you are passing here") |
|
142 |
- ## ## We are passed an allFitnessEffects output |
|
143 |
- ## ## Will be simpler later as we will know immediately |
|
144 |
- ## if(x[1, "Genotype"] != "WT") { |
|
145 |
- ## ## Yes, because we expect this present below |
|
146 |
- ## x <- rbind(data.frame(Genotype = "WT", |
|
147 |
- ## Fitness = 1, |
|
148 |
- ## stringsAsFactors = FALSE), |
|
149 |
- ## x) |
|
150 |
- ## } |
|
151 |
- ## x <- afe |
|
152 |
- ## gfm <- allGenotypes_to_matrix(afe) |
|
153 |
- ## } |
|
154 |
- ## } else { |
|
155 |
- ## stop("This is not allowed input") |
|
156 |
- ## } |
|
157 |
- ## } |
|
158 | 57 |
|
159 | 58 |
mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)]) |
160 | 59 |
gaj <- genot_to_adj_mat(tfm$gfm) |
... | ... |
@@ -181,7 +80,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
181 | 80 |
x_to = mutated[vv[, 2]], |
182 | 81 |
y_to = tfm$afe$Fitness[vv[, 2]], |
183 | 82 |
Change = factor(ifelse(cl == 0, "Neutral", |
184 |
- ifelse(cl > 0, "Gain", "Loss")))) |
|
83 |
+ ifelse(cl > 0, "Gain", "Loss")), |
|
84 |
+ levels = c("Gain", "Loss", "Neutral"))) |
|
185 | 85 |
## From http://stackoverflow.com/a/17257422 |
186 | 86 |
number_ticks <- function(n) {function(limits) pretty(limits, n)} |
187 | 87 |
|
... | ... |
@@ -194,7 +94,11 @@ plotFitnessLandscape <- function(x, show_labels = TRUE, |
194 | 94 |
xend = x_to, yend = y_to, |
195 |