... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progression with Epistasis |
4 |
-Version: 2.7.0 |
|
5 |
-Date: 2017-04-07 |
|
4 |
+Version: 2.7.2 |
|
5 |
+Date: 2017-09-27 |
|
6 | 6 |
Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"), |
7 | 7 |
email = "rdiaz02@gmail.com"), |
8 | 8 |
person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com")) |
... | ... |
@@ -1590,7 +1590,7 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) { |
1590 | 1590 |
|
1591 | 1591 |
if (length(candidate.time) == 0) { |
1592 | 1592 |
warning(paste("There is not a single sampled time", |
1593 |
- "at which there are any mutants.", |
|
1593 |
+ "at which there are any mutants with drivers. ", |
|
1594 | 1594 |
"Thus, no uniform sampling possible.", |
1595 | 1595 |
"You will get NAs")) |
1596 | 1596 |
the.time <- -99 |
... | ... |
@@ -21,6 +21,19 @@ accessibleGenotypes <- function(y, f, numMut, th) { |
21 | 21 |
.Call('OncoSimulR_accessibleGenotypes', PACKAGE = 'OncoSimulR', y, f, numMut, th) |
22 | 22 |
} |
23 | 23 |
|
24 |
+ |
|
25 |
+genot2AdjMat <- function(y, f, numMut) { |
|
26 |
+ .Call('OncoSimulR_genot2AdjMat', PACKAGE = 'OncoSimulR', y, f, numMut) |
|
27 |
+} |
|
28 |
+ |
|
29 |
+peaksLandscape <- function(y, f, numMut, th) { |
|
30 |
+ .Call('OncoSimulR_peaksLandscape', PACKAGE = 'OncoSimulR', y, f, numMut, th) |
|
31 |
+} |
|
32 |
+ |
|
33 |
+accessibleGenotypes_former <- function(y, f, numMut, th) { |
|
34 |
+ .Call('OncoSimulR_accessibleGenotypes_former', PACKAGE = 'OncoSimulR', y, f, numMut, th) |
|
35 |
+} |
|
36 |
+ |
|
24 | 37 |
## readFitnessEffects <- function(rFE, echo) { |
25 | 38 |
## invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo)) |
26 | 39 |
## } |
... | ... |
@@ -18,6 +18,9 @@ |
18 | 18 |
|
19 | 19 |
## FIXME: show only accessible paths? |
20 | 20 |
|
21 |
+## FIXME: if using only_accessible, maybe we |
|
22 |
+## can try to use fast_peaks, and use the slower |
|
23 |
+## approach as fallback (if identical fitness) |
|
21 | 24 |
plotFitnessLandscape <- function(x, show_labels = TRUE, |
22 | 25 |
col = c("green4", "red", "yellow"), |
23 | 26 |
lty = c(1, 2, 3), use_ggrepel = FALSE, |
... | ... |
@@ -224,13 +227,45 @@ filter_inaccessible <- function(x, accessible_th) { |
224 | 227 |
} |
225 | 228 |
|
226 | 229 |
|
230 |
+## wrapper to the C++ code |
|
231 |
+fast_peaks <- function(x, th) { |
|
232 |
+ ## x is the fitness matrix, not adjacency matrix |
|
233 |
+ |
|
234 |
+ ## Only works if no connected genotypes that form maxima. I.e., no |
|
235 |
+ ## identical fitness. Do a sufficient check for it (too inclusive, though) |
|
236 |
+ ## And only under no back mutation |
|
237 |
+ |
|
238 |
+ original_pos <- 1:nrow(x) |
|
239 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
240 |
+ o_numMut <- order(numMut) |
|
241 |
+ x <- x[o_numMut, ] |
|
242 |
+ numMut <- numMut[o_numMut] |
|
243 |
+ f <- x[, ncol(x)] |
|
244 |
+ ## Two assumptions |
|
245 |
+ stopifnot(numMut[1] == 0) |
|
246 |
+ ## make sure no repeated in those that could be maxima |
|
247 |
+ if(any(duplicated(f[f >= f[1]]))) |
|
248 |
+ stop("There could be several connected maxima genotypes.", |
|
249 |
+ " This function is not safe to use.") |
|
250 |
+ |
|
251 |
+ y <- x[, -ncol(x)] |
|
252 |
+ storage.mode(y) <- "integer" |
|
253 |
+ original_pos <- original_pos[o_numMut] |
|
254 |
+ return(sort(original_pos[peaksLandscape(y, f, |
|
255 |
+ as.integer(numMut), |
|
256 |
+ th)])) |
|
257 |
+} |
|
258 |
+ |
|
259 |
+ |
|
227 | 260 |
## wrapper to the C++ code |
228 | 261 |
wrap_accessibleGenotypes <- function(x, th) { |
229 | 262 |
## x is the fitness matrix, not adjacency matrix |
263 |
+ original_pos <- 1:nrow(x) |
|
230 | 264 |
numMut <- rowSums(x[, -ncol(x)]) |
231 | 265 |
o_numMut <- order(numMut) |
232 | 266 |
x <- x[o_numMut, ] |
233 | 267 |
numMut <- numMut[o_numMut] |
268 |
+ original_pos <- original_pos[o_numMut] |
|
234 | 269 |
|
235 | 270 |
y <- x[, -ncol(x)] |
236 | 271 |
storage.mode(y) <- "integer" |
... | ... |
@@ -238,7 +273,29 @@ wrap_accessibleGenotypes <- function(x, th) { |
238 | 273 |
acc <- accessibleGenotypes(y, x[, ncol(x)], |
239 | 274 |
as.integer(numMut), |
240 | 275 |
th) |
241 |
- return(acc[acc > 0]) |
|
276 |
+ ## return(acc[acc > 0]) |
|
277 |
+ return(sort(original_pos[acc[acc > 0]])) |
|
278 |
+} |
|
279 |
+ |
|
280 |
+ |
|
281 |
+## wrapper to the C++ code; the former one, only for testing. Remove |
|
282 |
+## eventually FIXME |
|
283 |
+wrap_accessibleGenotypes_former <- function(x, th) { |
|
284 |
+ ## x is the fitness matrix, not adjacency matrix |
|
285 |
+ original_pos <- 1:nrow(x) |
|
286 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
287 |
+ o_numMut <- order(numMut) |
|
288 |
+ x <- x[o_numMut, ] |
|
289 |
+ numMut <- numMut[o_numMut] |
|
290 |
+ original_pos <- original_pos[o_numMut] |
|
291 |
+ |
|
292 |
+ y <- x[, -ncol(x)] |
|
293 |
+ storage.mode(y) <- "integer" |
|
294 |
+ |
|
295 |
+ acc <- accessibleGenotypes_former(y, x[, ncol(x)], |
|
296 |
+ as.integer(numMut), |
|
297 |
+ th) |
|
298 |
+ return(sort(original_pos[acc[acc > 0]])) |
|
242 | 299 |
} |
243 | 300 |
|
244 | 301 |
## A transitional function |
... | ... |
@@ -378,8 +435,10 @@ generate_matrix_genotypes <- function(g) { |
378 | 435 |
} |
379 | 436 |
|
380 | 437 |
|
381 |
- |
|
382 |
-genot_to_adj_mat <- function(x) { |
|
438 |
+## The R version. See also the C++ one |
|
439 |
+genot_to_adj_mat_R <- function(x) { |
|
440 |
+ ## x is the fitness matrix |
|
441 |
+ |
|
383 | 442 |
## FIXME this can take about 23% of the time of the ggplot call. |
384 | 443 |
## But them, we are quickly constructing a 2000*2000 matrix |
385 | 444 |
## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a |
... | ... |
@@ -390,13 +449,16 @@ genot_to_adj_mat <- function(x) { |
390 | 449 |
## FIXME: code is now in place to do all of this in C++ |
391 | 450 |
|
392 | 451 |
## Make sure sorted, so ancestors always before descendants |
393 |
- rs0 <- rowSums(x[, -ncol(x)]) |
|
394 |
- x <- x[order(rs0), ] |
|
395 |
- rm(rs0) |
|
452 |
+ original_pos <- 1:nrow(x) |
|
453 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
454 |
+ o_numMut <- order(numMut) |
|
455 |
+ x <- x[o_numMut, ] |
|
456 |
+ original_pos <- original_pos[o_numMut] |
|
457 |
+ rm(numMut) |
|
396 | 458 |
|
397 | 459 |
y <- x[, -ncol(x)] |
398 | 460 |
f <- x[, ncol(x)] |
399 |
- rs <- rowSums(y) |
|
461 |
+ rs <- rowSums(y) ## redo for paranoia; could have ordered numMut |
|
400 | 462 |
|
401 | 463 |
## Move this to C++? |
402 | 464 |
adm <- matrix(NA, nrow = length(rs), ncol = length(rs)) |
... | ... |
@@ -408,9 +470,51 @@ genot_to_adj_mat <- function(x) { |
408 | 470 |
if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
409 | 471 |
} |
410 | 472 |
} |
473 |
+ colnames(adm) <- rownames(adm) <- original_pos |
|
411 | 474 |
return(adm) |
412 | 475 |
} |
413 | 476 |
|
477 |
+genot_to_adj_mat <- function(x) { |
|
478 |
+ ## x is the fitness matrix |
|
479 |
+ |
|
480 |
+ ## adding column and row names should rarely be necessary |
|
481 |
+ ## as these are internal functions, etc. But just in case |
|
482 |
+ original_pos <- 1:nrow(x) |
|
483 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
484 |
+ o_numMut <- order(numMut) |
|
485 |
+ x <- x[o_numMut, ] |
|
486 |
+ numMut <- numMut[o_numMut] |
|
487 |
+ original_pos <- original_pos[o_numMut] |
|
488 |
+ |
|
489 |
+ y <- x[, -ncol(x)] |
|
490 |
+ storage.mode(y) <- "integer" |
|
491 |
+ |
|
492 |
+ adm <- genot2AdjMat(y, x[, ncol(x)], |
|
493 |
+ as.integer(numMut)) |
|
494 |
+ colnames(adm) <- rownames(adm) <- original_pos |
|
495 |
+ return(adm) |
|
496 |
+} |
|
497 |
+ |
|
498 |
+ |
|
499 |
+## ## to move above to C++ note that loop can be |
|
500 |
+## for(i in 1:length(rs)) { ## i is the current genotype |
|
501 |
+## for(j in (i:length(rs))) { |
|
502 |
+## if(rs[j] > (rs[i] + 1)) break; |
|
503 |
+## else if(rs[j] == (rs[i] + 1)) { |
|
504 |
+## ## and use here my HammingDistance function |
|
505 |
+## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
506 |
+## ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
|
507 |
+## if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]); |
|
508 |
+## } |
|
509 |
+## } |
|
510 |
+## } |
|
511 |
+ |
|
512 |
+## actually, all that is already in accessibleGenotypes except for the |
|
513 |
+## filling up of adm. |
|
514 |
+ |
|
515 |
+ |
|
516 |
+ |
|
517 |
+ |
|
414 | 518 |
|
415 | 519 |
peak_valley <- function(x) { |
416 | 520 |
## FIXME: when there are no identical entries, all this |
... | ... |
@@ -466,7 +570,13 @@ peak_valley <- function(x) { |
466 | 570 |
} |
467 | 571 |
bad_fwd <- vector("integer", nrow(x)) |
468 | 572 |
for(i in 1:nrow(x)) { |
573 |
+ ## Eh, why any? All. |
|
574 |
+ ## Nope, any: we want peaks in general, not just |
|
575 |
+ ## under assumption of "no back mutation" |
|
576 |
+ ## We get a different result when we restrict to accessible |
|
577 |
+ ## because all < 0 in adjacency are turned to NAs. |
|
469 | 578 |
if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) { |
579 |
+ ## if( all(x[, i] < 0, na.rm = TRUE) ) { |
|
470 | 580 |
## this node is bad. Any descendant with fitness >= is bad |
471 | 581 |
bad_fwd[i] <- 1 |
472 | 582 |
reach_f <- which(x[i, ] <= 0) |
... | ... |
@@ -478,3 +588,36 @@ peak_valley <- function(x) { |
478 | 588 |
peak <- setdiff(candidate, bad) |
479 | 589 |
return(list(peak = peak, valley = valley)) |
480 | 590 |
} |
591 |
+ |
|
592 |
+ |
|
593 |
+ |
|
594 |
+## For the future |
|
595 |
+## ## data.frame (two columns: genotype with "," and Fitness) -> fitness graph (DAG) |
|
596 |
+## ## Return an adj matrix of the fitness graph from a fitness |
|
597 |
+## ## landscape |
|
598 |
+## ## Based on code in plotFitnessLandscape |
|
599 |
+## flandscape_to_fgraph <- function(afe) { |
|
600 |
+## gfm <- OncoSimulR:::allGenotypes_to_matrix(afe) |
|
601 |
+## ## mutated <- rowSums(gfm[, -ncol(gfm)]) |
|
602 |
+## gaj <- OncoSimulR:::genot_to_adj_mat(gfm) |
|
603 |
+## gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0) |
|
604 |
+## stopifnot(all(na.omit(as.vector(gaj == gaj2)))) |
|
605 |
+## remaining <- as.numeric(colnames(gaj2)) |
|
606 |
+## ## mutated <- mutated[remaining] |
|
607 |
+## afe <- afe[remaining, , drop = FALSE] |
|
608 |
+## ## vv <- which(!is.na(gaj2), arr.ind = TRUE) |
|
609 |
+ |
|
610 |
+## gaj2 <- gaj2 |
|
611 |
+## gaj2[is.na(gaj2)] <- 0 |
|
612 |
+## gaj2[gaj2 > 0] <- 1 |
|
613 |
+## colnames(gaj2) <- rownames(gaj2) <- afe[, "Genotype"] |
|
614 |
+## return(gaj2) |
|
615 |
+## } |
|
616 |
+## ## This could be done easily in C++, taking care of row/colnames at end, |
|
617 |
+## ## without moving around the full adjacency matrix. |
|
618 |
+## ## Skeleton for C++ |
|
619 |
+## ## a call to accessibleGenotypesPeaksLandscape |
|
620 |
+## ## (with another argument or changing the returnpeaks by a three value thing) |
|
621 |
+## ## after done with first loop, |
|
622 |
+## ## return the matrix adm[accessible > 0, accessible >0] |
|
623 |
+## ## only need care with row/colnames |
... | ... |
@@ -23,7 +23,7 @@ |
23 | 23 |
|
24 | 24 |
to_Magellan <- function(x, file, |
25 | 25 |
max_num_genotypes = 2000) { |
26 |
- ## This is stupid if we already have, as entry, an object from |
|
26 |
+ ## Go directly if you have, as entry, an object from |
|
27 | 27 |
## rfitness!! to_Fitness_Matrix can be very slow. |
28 | 28 |
if(is.null(file)) { |
29 | 29 |
file <- tempfile() |
... | ... |
@@ -44,7 +44,8 @@ to_Magellan <- function(x, file, |
44 | 44 |
to_Fitness_Matrix <- function(x, max_num_genotypes) { |
45 | 45 |
## A general converter. Ready to be used by plotFitnessLandscape and |
46 | 46 |
## Magellan exporter. |
47 |
- |
|
47 |
+ |
|
48 |
+ ## FIXME: really, some of this is inefficient. Very. Fix it. |
|
48 | 49 |
if( (inherits(x, "genotype_fitness_matrix")) || |
49 | 50 |
( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) { |
50 | 51 |
## Why this? We go back and forth twice. We need both things. We |
... | ... |
@@ -257,7 +258,7 @@ allGenotypes_to_matrix <- function(x) { |
257 | 258 |
splitted_genots <- lapply(x$Genotype, |
258 | 259 |
function(z) nice.vector.eo(z, ",")) |
259 | 260 |
|
260 |
- all_genes <- unique(unlist(splitted_genots)) |
|
261 |
+ all_genes <- sort(unique(unlist(splitted_genots))) |
|
261 | 262 |
|
262 | 263 |
m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes)) |
263 | 264 |
the_match <- lapply(splitted_genots, |
... | ... |
@@ -285,7 +286,8 @@ Magellan_stats <- function(x, max_num_genotypes = 2000, |
285 | 286 |
verbose = FALSE, |
286 | 287 |
use_log = TRUE, |
287 | 288 |
short = TRUE, |
288 |
- fl_statistics = "fl_statistics") { # nocov start |
|
289 |
+ fl_statistics = "fl_statistics", |
|
290 |
+ replace_missing = FALSE) { # nocov start |
|
289 | 291 |
## I always use |
290 | 292 |
## if(!is.null(x) && is.null(file)) |
291 | 293 |
## stop("one of object or file name") |
... | ... |
@@ -306,23 +308,43 @@ Magellan_stats <- function(x, max_num_genotypes = 2000, |
306 | 308 |
shortarg <- NULL |
307 | 309 |
} |
308 | 310 |
|
311 |
+ if(replace_missing) { |
|
312 |
+ zarg <- "-z" |
|
313 |
+ } else { |
|
314 |
+ zarg <- NULL |
|
315 |
+ } |
|
316 |
+ |
|
309 | 317 |
to_Magellan(x, fn, max_num_genotypes = max_num_genotypes) |
310 |
- call_M <- system2(fl_statistics, args = paste(fn, shortarg, logarg, "-o", fnret)) |
|
318 |
+ ## newer versions of Magellan provide some extra values to standard output |
|
319 |
+ call_M <- system2(fl_statistics, |
|
320 |
+ args = paste(fn, shortarg, logarg, zarg, "-o", fnret), |
|
321 |
+ stdout = NULL) |
|
311 | 322 |
if(short) { |
312 |
- tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1]) |
|
323 |
+ ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1]) |
|
324 |
+ |
|
325 |
+ tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)]) |
|
313 | 326 |
## Make names more explicit, but check we have what we think we have |
314 |
- stopifnot(length(tmp) == 23) |
|
315 |
- stopifnot(identical(names(tmp), |
|
327 |
+ ## New versions of Magellan produce different output apparently of variable length |
|
328 |
+ stopifnot(length(tmp) >= 23) ## 23) ## variable length |
|
329 |
+ stopifnot(identical(names(tmp)[1:13], ## only some |
|
316 | 330 |
c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s", |
317 | 331 |
"nchains", "nsteps", "nori", "depth", "magn", "sign", |
318 |
- "rsign", "w.1.", "w.2.", "w.3..", "mode_w", "s.1.", |
|
319 |
- "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v"))) |
|
320 |
- names(tmp) <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star", |
|
332 |
+ "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.", |
|
333 |
+ ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v"))) |
|
334 |
+ if(length(tmp) >= 24) ## the new version |
|
335 |
+ stopifnot(identical(names(tmp)[c(20, 24)], |
|
336 |
+ c("steps_m", "mProbOpt_0"))) |
|
337 |
+ ## steps_m: the mean number of steps over the entire landscape to |
|
338 |
+ ## reach the global optimum |
|
339 |
+ ## mProbOpt_0: The mean probability to |
|
340 |
+ ## reach that optimum (again averaged over the entire landscape). |
|
341 |
+ ## but if there are multiple optima, there are many of these |
|
342 |
+ names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star", |
|
321 | 343 |
"r/s","n_chains", "n_steps", "n_origins", "max_depth", |
322 |
- "epist_magn", "epist_sign", "epist_rsign", |
|
323 |
- "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh", |
|
324 |
- "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg", |
|
325 |
- "std_dev_pairs", "var_outdegree") |
|
344 |
+ "epist_magn", "epist_sign", "epist_rsign")## , |
|
345 |
+ ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh", |
|
346 |
+ ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg", |
|
347 |
+ ## "std_dev_pairs", "var_outdegree") |
|
326 | 348 |
} else { |
327 | 349 |
tmp <- readLines(fnret) |
328 | 350 |
} |
... | ... |
@@ -1866,14 +1866,24 @@ sampledGenotypes <- function(y, genes = NULL) { |
1866 | 1866 |
cols <- which(colnames(y) %in% genes ) |
1867 | 1867 |
y <- y[, cols] |
1868 | 1868 |
} |
1869 |
- nn <- colnames(y) |
|
1869 |
+ ## nn <- colnames(y) |
|
1870 |
+ genotlabel <- function(u, nn = colnames(y)) { |
|
1871 |
+ if(any(is.na(u))) return(NA) |
|
1872 |
+ else { |
|
1873 |
+ return(paste(sort(nn[as.logical(u)]), collapse = ", ")) |
|
1874 |
+ } |
|
1875 |
+ } |
|
1876 |
+ ## df <- data.frame(table( |
|
1877 |
+ ## apply(y, 1, function(z) paste(sort(nn[as.logical(z)]), collapse = ", ") ) |
|
1878 |
+ ## )) |
|
1870 | 1879 |
df <- data.frame(table( |
1871 |
- apply(y, 1, function(z) paste(nn[as.logical(z)], collapse = ", ") ) |
|
1872 |
- )) |
|
1880 |
+ apply(y, 1, genotlabel), |
|
1881 |
+ useNA = "ifany")) |
|
1882 |
+ |
|
1873 | 1883 |
gn <- as.character(df[, 1]) |
1874 | 1884 |
gn[gn == ""] <- "WT" |
1875 | 1885 |
df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE) |
1876 |
- attributes(df)$ShannonI <- shannonI(df$Freq) |
|
1886 |
+ attributes(df)$ShannonI <- shannonI(na.omit(df)$Freq) |
|
1877 | 1887 |
class(df) <- c("sampledGenotypes", class(df)) |
1878 | 1888 |
return(df) |
1879 | 1889 |
} |
... | ... |
@@ -1,3 +1,14 @@ |
1 |
+Changes in version 2.7.2 (2017-09-27): |
|
2 |
+ - genot_to_adj_mat in C++. |
|
3 |
+ - fast_peaks (for no backmutation cases). |
|
4 |
+ - Better explanation and testing of peaks and valleys. |
|
5 |
+ - Clarified simOGraph transitive reduction. |
|
6 |
+ - Better handling of ti corner cases. |
|
7 |
+ - Magellan reading fuctions adapted to output of newer (as of |
|
8 |
+ 2017-07) version of Magellan. |
|
9 |
+ - sorting gene names in allGenotypes_to_matrix. |
|
10 |
+ - sampledGenotypes: genotype names with sorted gene names. |
|
11 |
+ |
|
1 | 12 |
Changes in version 2.6.0 (for BioC 3.5): |
2 | 13 |
- Many additions to the vignette and documentation. |
3 | 14 |
- LOD and POM (lines of descent, path of maximum, sensu Szendro et |
... | ... |
@@ -34,8 +34,9 @@ s = 0.1, sh = -0.1, typeDep = "AND") |
34 | 34 |
\item{removeDirectIndirect}{ |
35 | 35 |
Ensure that no two nodes are connected both directly (i.e., with an |
36 | 36 |
edge between them) and indirectly, through intermediate nodes. If |
37 |
- TRUE, the direct connections are removed from the graph starting from |
|
38 |
- the bottom. |
|
37 |
+ TRUE, we return the transitive reduction of the DAG. |
|
38 |
+ %% the final DAG returned is the transitive reduction of thethe direct |
|
39 |
+ %% connections are removed from the graph starting from the bottom. |
|
39 | 40 |
} |
40 | 41 |
\item{rootName}{ |
41 | 42 |
The name you want to give the "Root" node. |
... | ... |
@@ -755,7 +755,7 @@ void addToPhylog(PhylogName& phylog, |
755 | 755 |
|
756 | 756 |
|
757 | 757 |
|
758 |
-static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
|
758 |
+static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects, |
|
759 | 759 |
const double& initSize, |
760 | 760 |
const double& K, |
761 | 761 |
// const double& alpha, |
... | ... |
@@ -809,7 +809,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
809 | 809 |
const double& PDBaseline, |
810 | 810 |
const double& checkSizePEvery, |
811 | 811 |
const bool& AND_DrvProbExit, |
812 |
- const std::vector< std::vector<int> >& fixation_l) { |
|
812 |
+ const std::vector< std::vector<int> >& fixation_l, |
|
813 |
+ int& ti_dbl_min, |
|
814 |
+ int& ti_e3) { |
|
813 | 815 |
|
814 | 816 |
double nextCheckSizeP = checkSizePEvery; |
815 | 817 |
const int numGenes = fitnessEffects.genomeSize; |
... | ... |
@@ -939,8 +941,10 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
939 | 941 |
std::multimap<double, int> mapTimes; |
940 | 942 |
//std::multimap<double, int>::iterator m1pos; |
941 | 943 |
|
942 |
- int ti_dbl_min = 0; |
|
943 |
- int ti_e3 = 0; |
|
944 |
+ // int ti_dbl_min = 0; |
|
945 |
+ // int ti_e3 = 0; |
|
946 |
+ ti_dbl_min = 0; |
|
947 |
+ ti_e3 = 0; |
|
944 | 948 |
|
945 | 949 |
|
946 | 950 |
// // FIXME debug |
... | ... |
@@ -1239,6 +1243,10 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1239 | 1243 |
mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1); |
1240 | 1244 |
popParams[u_1].timeLastUpdate = currentTime; |
1241 | 1245 |
|
1246 |
+#ifdef DEBUGV |
|
1247 |
+ detect_ti_duplicates(mapTimes, tmpdouble1, u_1); |
|
1248 |
+#endif |
|
1249 |
+ |
|
1242 | 1250 |
#ifdef DEBUGV |
1243 | 1251 |
Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update one ******\n For to_update = \n " |
1244 | 1252 |
<< " tSample = " << tSample |
... | ... |
@@ -1269,6 +1277,13 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1269 | 1277 |
popParams[u_1].timeLastUpdate = currentTime; |
1270 | 1278 |
popParams[u_2].timeLastUpdate = currentTime; |
1271 | 1279 |
|
1280 |
+#ifdef DEBUGV |
|
1281 |
+ detect_ti_duplicates(mapTimes, tmpdouble1, u_1); |
|
1282 |
+ detect_ti_duplicates(mapTimes, tmpdouble2, u_2); |
|
1283 |
+#endif |
|
1284 |
+ |
|
1285 |
+ |
|
1286 |
+ |
|
1272 | 1287 |
#ifdef DEBUGV |
1273 | 1288 |
Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update two ******\n " |
1274 | 1289 |
<< " tSample = " << tSample |
... | ... |
@@ -1306,6 +1321,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1306 | 1321 |
tSample, ti_dbl_min, ti_e3); |
1307 | 1322 |
mapTimes_updateP(mapTimes, popParams, i, tmpdouble1); |
1308 | 1323 |
popParams[i].timeLastUpdate = currentTime; |
1324 |
+#ifdef DEBUGV |
|
1325 |
+ detect_ti_duplicates(mapTimes, tmpdouble1, i); |
|
1326 |
+#endif |
|
1309 | 1327 |
|
1310 | 1328 |
#ifdef DEBUGV |
1311 | 1329 |
Rcpp::Rcout << "\n\n ********* 5.2: call to ti_nextTime, update all ******\n " |
... | ... |
@@ -1335,6 +1353,7 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1335 | 1353 |
|
1336 | 1354 |
// ******************** 5.3 and do we sample? *********** |
1337 | 1355 |
// Find minimum to know if we need to sample the whole pop |
1356 |
+ // We also obtain the nextMutant |
|
1338 | 1357 |
getMinNextMutationTime4(nextMutant, minNextMutationTime, |
1339 | 1358 |
mapTimes); |
1340 | 1359 |
|
... | ... |
@@ -1438,7 +1457,31 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1438 | 1457 |
Rcpp::Rcout <<"\n Creating new species " << (numSpecies - 1) |
1439 | 1458 |
<< " from species " << nextMutant; |
1440 | 1459 |
} |
1441 |
- |
|
1460 |
+ |
|
1461 |
+#ifdef DEBUGW |
|
1462 |
+ if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) { |
|
1463 |
+ DP2(currentTime); //this is set to minNextMutationTime above |
|
1464 |
+ DP2(minNextMutationTime); |
|
1465 |
+ DP2(tSample); |
|
1466 |
+ DP2(popParams[nextMutant].timeLastUpdate); |
|
1467 |
+ DP2( (currentTime - popParams[nextMutant].timeLastUpdate) ); |
|
1468 |
+ DP2( (currentTime < popParams[nextMutant].timeLastUpdate) ); |
|
1469 |
+ DP2( (currentTime == popParams[nextMutant].timeLastUpdate) ); |
|
1470 |
+ DP2(nextMutant); |
|
1471 |
+ DP2(u_1); |
|
1472 |
+ DP2(u_2); |
|
1473 |
+ DP2(tmpdouble1); |
|
1474 |
+ DP2(tmpdouble2); |
|
1475 |
+ DP2(popParams[nextMutant].timeLastUpdate); |
|
1476 |
+ DP2(popParams[u_1].timeLastUpdate); |
|
1477 |
+ DP2(popParams[u_2].timeLastUpdate); |
|
1478 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); |
|
1479 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); |
|
1480 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); |
|
1481 |
+ print_spP(popParams[nextMutant]); |
|
1482 |
+ throw std::out_of_range("new species: currentTime - timeLastUpdate[sp] out of range. ***###!!!Serious bug!!!###***"); |
|
1483 |
+ } |
|
1484 |
+#endif |
|
1442 | 1485 |
tmpParam.popSize = 1; |
1443 | 1486 |
|
1444 | 1487 |
nr_fitness(tmpParam, popParams[nextMutant], |
... | ... |
@@ -1523,12 +1566,75 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1523 | 1566 |
to_update = 2; |
1524 | 1567 |
|
1525 | 1568 |
#ifdef DEBUGW |
1526 |
- if( (currentTime - popParams[sp].timeLastUpdate) <= 0.0) { |
|
1527 |
- DP2(currentTime); |
|
1569 |
+ if( (currentTime - popParams[sp].timeLastUpdate) < 0.0) { |
|
1570 |
+ // Yes, the difference could be 0 if two next mutation times are identical. |
|
1571 |
+ // You enable detect_ti_duplicates and use trigger-duplicated-ti.R |
|
1572 |
+ // to see it. |
|
1573 |
+ // Often the involved culprits (nextMutant and the other, say sp) |
|
1574 |
+ // were lastUpdated with tiny difference and they were, when updated |
|
1575 |
+ // given an identical ti, each in its own run. |
|
1576 |
+ // Key is not timeLastUpdate. This is a possible sequence of events: |
|
1577 |
+ // - at time t0, species that will become nextMutant is updated and gets ti = tinm |
|
1578 |
+ // - t1: species u1 gets ti = tinm |
|
1579 |
+ // - t2: species u2 gets some ti > tinm |
|
1580 |
+ // - tinm becomes minimal, so we mutate u1, and it mutates to u2 |
|
1581 |
+ // - (so now the timeLastUpdate of u1 = u2 = tinm) |
|
1582 |
+ // - nextMutant is now mutated, and it mutates to u2, which becomes sp |
|
1583 |
+ // - tinm = timeLastUpdate of u1 and u2. |
|
1584 |
+ // - You will also see that number of mutations, or genotypes are such |
|
1585 |
+ // that, in this case, u2 is the most mutated, etc. |
|
1586 |
+ // - If you enable the detect_ti_duplicates, you would have seen duplicated ti |
|
1587 |
+ // for nextMutant and u1 |
|
1588 |
+ |
|
1589 |
+ // Even simpler is if above, nextMutant will mutate to u1 (not u2) so u1 becomes sp. |
|
1590 |
+ DP2(currentTime); //this is set to minNextMutationTime above |
|
1591 |
+ DP2(minNextMutationTime); |
|
1592 |
+ DP2(tSample); |
|
1593 |
+ DP2(popParams[sp].timeLastUpdate); |
|
1594 |
+ DP2( (currentTime - popParams[sp].timeLastUpdate) ); |
|
1595 |
+ DP2( (currentTime < popParams[sp].timeLastUpdate) ); |
|
1596 |
+ DP2( (currentTime == popParams[sp].timeLastUpdate) ); |
|
1528 | 1597 |
DP2(sp); |
1598 |
+ DP2(nextMutant); |
|
1599 |
+ DP2(u_1); |
|
1600 |
+ DP2(u_2); |
|
1601 |
+ DP2(tmpdouble1); |
|
1602 |
+ DP2(tmpdouble2); |
|
1529 | 1603 |
DP2(popParams[sp].timeLastUpdate); |
1604 |
+ DP2(popParams[nextMutant].timeLastUpdate); |
|
1605 |
+ DP2(popParams[u_1].timeLastUpdate); |
|
1606 |
+ DP2(popParams[u_2].timeLastUpdate); |
|
1607 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); |
|
1608 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); |
|
1609 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); |
|
1530 | 1610 |
print_spP(popParams[sp]); |
1531 |
- throw std::out_of_range("currentTime - timeLastUpdate out of range. Serious bug!"); |
|
1611 |
+ print_spP(popParams[nextMutant]); |
|
1612 |
+ throw std::out_of_range("currentTime - timeLastUpdate[sp] out of range. ***###!!!Serious bug!!!###***"); |
|
1613 |
+ } |
|
1614 |
+ if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) { |
|
1615 |
+ DP2(currentTime); //this is set to minNextMutationTime above |
|
1616 |
+ DP2(minNextMutationTime); |
|
1617 |
+ DP2(tSample); |
|
1618 |
+ DP2(popParams[nextMutant].timeLastUpdate); |
|
1619 |
+ DP2( (currentTime - popParams[nextMutant].timeLastUpdate) ); |
|
1620 |
+ DP2( (currentTime < popParams[nextMutant].timeLastUpdate) ); |
|
1621 |
+ DP2( (currentTime == popParams[nextMutant].timeLastUpdate) ); |
|
1622 |
+ DP2(sp); |
|
1623 |
+ DP2(nextMutant); |
|
1624 |
+ DP2(u_1); |
|
1625 |
+ DP2(u_2); |
|
1626 |
+ DP2(tmpdouble1); |
|
1627 |
+ DP2(tmpdouble2); |
|
1628 |
+ DP2(popParams[sp].timeLastUpdate); |
|
1629 |
+ DP2(popParams[nextMutant].timeLastUpdate); |
|
1630 |
+ DP2(popParams[u_1].timeLastUpdate); |
|
1631 |
+ DP2(popParams[u_2].timeLastUpdate); |
|
1632 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) ); |
|
1633 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) ); |
|
1634 |
+ DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) ); |
|
1635 |
+ print_spP(popParams[sp]); |
|
1636 |
+ print_spP(popParams[nextMutant]); |
|
1637 |
+ throw std::out_of_range("currentTime - timeLastUpdate[nextMutant] out of range. ***###!!!Serious bug!!!###***"); |
|
1532 | 1638 |
} |
1533 | 1639 |
#endif |
1534 | 1640 |
// if(verbosity >= 2) { |
... | ... |
@@ -1888,11 +1994,17 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1888 | 1994 |
// 5.1 Initialize |
1889 | 1995 |
|
1890 | 1996 |
int numRuns = 0; |
1997 |
+ int numRecoverExcept = 0; |
|
1891 | 1998 |
bool forceRerun = false; |
1892 | 1999 |
|
1893 | 2000 |
double currentTime = 0; |
1894 | 2001 |
int iter = 0; |
1895 | 2002 |
|
2003 |
+ int ti_dbl_min = 0; |
|
2004 |
+ int ti_e3 = 0; |
|
2005 |
+ |
|
2006 |
+ int accum_ti_dbl_min = 0; |
|
2007 |
+ int accum_ti_e3 = 0; |
|
1896 | 2008 |
// bool AND_DrvProbExit = ( (cpDetect >= 0) && |
1897 | 2009 |
// (detectionDrivers < 1e9) && |
1898 | 2010 |
// (detectionSize < std::numeric_limits<double>::infinity())); |
... | ... |
@@ -1975,13 +2087,21 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1975 | 2087 |
PDBaseline, |
1976 | 2088 |
checkSizePEvery, |
1977 | 2089 |
AND_DrvProbExit, |
1978 |
- fixation_l); |
|
2090 |
+ fixation_l, |
|
2091 |
+ ti_dbl_min, |
|
2092 |
+ ti_e3); |
|
1979 | 2093 |
++numRuns; |
1980 | 2094 |
forceRerun = false; |
2095 |
+ accum_ti_dbl_min += ti_dbl_min; |
|
2096 |
+ accum_ti_e3 += ti_e3; |
|
1981 | 2097 |
} catch (rerunExcept &e) { |
1982 | 2098 |
Rcpp::Rcout << "\n Recoverable exception " << e.what() |
1983 | 2099 |
<< ". Rerunning."; |
1984 | 2100 |
forceRerun = true; |
2101 |
+ ++numRecoverExcept; |
|
2102 |
+ ++numRuns; // exception should count here! |
|
2103 |
+ accum_ti_dbl_min += ti_dbl_min; |
|
2104 |
+ accum_ti_e3 += ti_e3; |
|
1985 | 2105 |
} catch (const std::exception &e) { |
1986 | 2106 |
Rcpp::Rcout << "\n Unrecoverable exception: " << e.what() |
1987 | 2107 |
<< ". Aborting. \n"; |
... | ... |
@@ -2125,9 +2245,11 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
2125 | 2245 |
Named("child") = phylog.child, |
2126 | 2246 |
Named("time") = phylog.time |
2127 | 2247 |
), |
2128 |
- Named("UnrecoverExcept") = false) |
|
2248 |
+ Named("UnrecoverExcept") = false, |
|
2249 |
+ Named("numRecoverExcept") = numRecoverExcept, |
|
2250 |
+ Named("accum_ti_dbl_min") = accum_ti_dbl_min, |
|
2251 |
+ Named("accum_ti_e3") = accum_ti_e3) |
|
2129 | 2252 |
); |
2130 |
- |
|
2131 | 2253 |
} |
2132 | 2254 |
|
2133 | 2255 |
|
... | ... |
@@ -12,6 +12,10 @@ SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SE |
12 | 12 |
// SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP); |
13 | 13 |
SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); |
14 | 14 |
|
15 |
+SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP); |
|
16 |
+SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); |
|
17 |
+SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); |
|
18 |
+ |
|
15 | 19 |
// The number is the number of arguments |
16 | 20 |
R_CallMethodDef callMethods[] = { |
17 | 21 |
{"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37}, |
... | ... |
@@ -19,6 +23,9 @@ R_CallMethodDef callMethods[] = { |
19 | 23 |
{"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5}, |
20 | 24 |
{"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6}, |
21 | 25 |
{"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4}, |
26 |
+ {"OncoSimulR_genot2AdjMat", (DL_FUNC) &OncoSimulR_genot2AdjMat, 3}, |
|
27 |
+ {"OncoSimulR_peaksLandscape", (DL_FUNC) &OncoSimulR_peaksLandscape, 4}, |
|
28 |
+ {"OncoSimulR_accessibleGenotypes_former", (DL_FUNC) &OncoSimulR_accessibleGenotypes_former, 4}, |
|
22 | 29 |
// {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2}, |
23 | 30 |
{NULL, NULL, 0} |
24 | 31 |
}; |
... | ... |
@@ -141,6 +141,50 @@ BEGIN_RCPP |
141 | 141 |
END_RCPP |
142 | 142 |
} |
143 | 143 |
|
144 |
+// genotype fitness matrix to adjacency matrix of genotypes |
|
145 |
+Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut); |
|
146 |
+RcppExport SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP) { |
|
147 |
+BEGIN_RCPP |
|
148 |
+ Rcpp::RObject __result; |
|
149 |
+// Rcpp::RNGScope __rngScope; |
|
150 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); |
|
151 |
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); |
|
152 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); |
|
153 |
+ __result = Rcpp::wrap(genot2AdjMat(y, f, numMut)); |
|
154 |
+ return __result; |
|
155 |
+ END_RCPP |
|
156 |
+} |
|
157 |
+ |
|
158 |
+ |
|
159 |
+// evalRGenotypeAndMut |
|
160 |
+Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); |
|
161 |
+RcppExport SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { |
|
162 |
+BEGIN_RCPP |
|
163 |
+ Rcpp::RObject __result; |
|
164 |
+// Rcpp::RNGScope __rngScope; |
|
165 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); |
|
166 |
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); |
|
167 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); |
|
168 |
+ Rcpp::traits::input_parameter< double >::type th(thSEXP); |
|
169 |
+ __result = Rcpp::wrap(peaksLandscape(y, f, numMut, th)); |
|
170 |
+ return __result; |
|
171 |
+ END_RCPP |
|
172 |
+} |
|
173 |
+ |
|
174 |
+// just for testing. Eventually remove |
|
175 |
+Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); |
|
176 |
+RcppExport SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { |
|
177 |
+BEGIN_RCPP |
|
178 |
+ Rcpp::RObject __result; |
|
179 |
+// Rcpp::RNGScope __rngScope; |
|
180 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); |
|
181 |
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); |
|
182 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); |
|
183 |
+ Rcpp::traits::input_parameter< double >::type th(thSEXP); |
|
184 |
+ __result = Rcpp::wrap(accessibleGenotypes_former(y, f, numMut, th)); |
|
185 |
+ return __result; |
|
186 |
+ END_RCPP |
|
187 |
+} |
|
144 | 188 |
|
145 | 189 |
|
146 | 190 |
// // readFitnessEffects |
... | ... |
@@ -22,12 +22,12 @@ inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVect |
22 | 22 |
} |
23 | 23 |
|
24 | 24 |
|
25 |
- |
|
25 |
+// eventually remove this. Left now for testing |
|
26 | 26 |
// [[Rcpp::export]] |
27 |
-Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
|
28 |
- Rcpp::NumericVector f, |
|
29 |
- Rcpp::IntegerVector numMut, // |
|
30 |
- double th) { |
|
27 |
+Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y, |
|
28 |
+ Rcpp::NumericVector f, |
|
29 |
+ Rcpp::IntegerVector numMut, // |
|
30 |
+ double th) { |
|
31 | 31 |
// Return just the indices. Could preserve fitness, but would need |
32 | 32 |
// another matrix. |
33 | 33 |
int ng = y.nrow(); //it counts the wt |
... | ... |
@@ -106,6 +106,189 @@ Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
106 | 106 |
|
107 | 107 |
|
108 | 108 |
|
109 |
+ |
|
110 |
+ |
|
111 |
+// [[Rcpp::export]] |
|
112 |
+Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y, |
|
113 |
+ Rcpp::NumericVector f, |
|
114 |
+ Rcpp::IntegerVector numMut) { |
|
115 |
+ // Return just the indices. Could preserve fitness, but would need |
|
116 |
+ // another matrix. |
|
117 |
+ int ng = y.nrow(); //it counts the wt |
|
118 |
+ Rcpp::NumericMatrix adm(ng, ng); |
|
119 |
+ |
|
120 |
+ // fill with NAs: https://stackoverflow.com/a/23753626 |
|
121 |
+ // Filling with NAs and in general having NAs might lead to performance |
|
122 |
+ // penalties. But I use the NAs in a lot of the code for accessible |
|
123 |
+ // genotypes, etc. |
|
124 |
+ std::fill( adm.begin(), adm.end(), Rcpp::NumericVector::get_na() ) ; |
|
125 |
+ int numMutdiff = 0; |
|
126 |
+ // I would have thought this would be faster. It ain't. |
|
127 |
+ // The last genotype never accesses anything. |
|
128 |
+ // for(int i = 0; i < (ng - 1); ++i) { |
|
129 |
+ // // Candidate genotypes to be accessed from i are always of larger |
|
130 |
+ // // mutation by 1. And candidates can thus not have smaller index |
|
131 |
+ // for(int j = (i + 1); j < ng; ++j) { |
|
132 |
+ // if( (numMut(j) == (numMut(i) + 1)) && |
|
133 |
+ // ( (f(j) - f(i)) >= th) && |
|
134 |
+ // (HammingDistance(y(i, _), y(j, _)) == 1) ) { |
|
135 |
+ // adm(i, j) = 1; |
|
136 |
+ // } else if( (numMut(j) > (numMut(i) + 1)) ) { |
|
137 |
+ // break; |
|
138 |
+ // } |
|
139 |
+ // } |
|
140 |
+ // } |
|
141 |
+ |
|
142 |
+ // The last genotype never accesses anything. |
|
143 |
+ for(int i = 0; i < (ng - 1); ++i) { |
|
144 |
+ // Candidate genotypes to be accessed from i are always of larger |
|
145 |
+ // mutation by 1. And candidates can thus not have smaller index |
|
146 |
+ for(int j = (i + 1); j < ng; ++j) { |
|
147 |
+ numMutdiff = numMut(j) - numMut(i); |
|
148 |
+ if( numMutdiff > 1) { // no more to search |
|
149 |
+ break; |
|
150 |
+ } else if(numMutdiff == 1) { |
|
151 |
+ if( HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) { |
|
152 |
+ adm(i, j) = (f(j) - f(i)); |
|
153 |
+ } |
|
154 |
+ } |
|
155 |
+ } |
|
156 |
+ } |
|
157 |
+ return adm; |
|
158 |
+} |
|
159 |
+ |
|
160 |
+ |
|
161 |
+Rcpp::IntegerMatrix integerAdjMat(Rcpp::IntegerMatrix y, |
|
162 |
+ Rcpp::NumericVector f, |
|
163 |
+ Rcpp::IntegerVector numMut, // |
|
164 |
+ double th) { |
|
165 |
+ // Return a genotype adjacency matrix with a 1 if genotype j is |
|
166 |
+ // accessible (fitness >, within th) from i. |
|
167 |
+ int ng = y.nrow(); //it counts the wt |
|
168 |
+ Rcpp::IntegerMatrix adm(ng, ng); |
|
169 |
+ int numMutdiff = 0; |
|
170 |
+ // I would have thought this would be faster. It ain't. |
|
171 |
+ // The last genotype never accesses anything. |
|
172 |
+ // for(int i = 0; i < (ng - 1); ++i) { |
|
173 |
+ // // Candidate genotypes to be accessed from i are always of larger |
|
174 |
+ // // mutation by 1. And candidates can thus not have smaller index |
|
175 |
+ // for(int j = (i + 1); j < ng; ++j) { |
|
176 |
+ // if( (numMut(j) == (numMut(i) + 1)) && |
|
177 |
+ // ( (f(j) - f(i)) >= th) && |
|
178 |
+ // (HammingDistance(y(i, _), y(j, _)) == 1) ) { |
|
179 |
+ // adm(i, j) = 1; |
|
180 |
+ // } else if( (numMut(j) > (numMut(i) + 1)) ) { |
|
181 |
+ // break; |
|
182 |
+ // } |
|
183 |
+ // } |
|
184 |
+ // } |
|
185 |
+ |
|
186 |
+ // The last genotype never accesses anything. |
|
187 |
+ for(int i = 0; i < (ng - 1); ++i) { |
|
188 |
+ // Candidate genotypes to be accessed from i are always of larger |
|
189 |
+ // mutation by 1. And candidates can thus not have smaller index |
|
190 |
+ for(int j = (i + 1); j < ng; ++j) { |
|
191 |
+ numMutdiff = numMut(j) - numMut(i); |
|
192 |
+ if( numMutdiff > 1) { // no more to search |
|
193 |
+ break; |
|
194 |
+ } else if(numMutdiff == 1) { |
|
195 |
+ // f(j) - f(i) is faster than HammingDistance |
|
196 |
+ // but might lead to more evals? |
|
197 |
+ // or fewer, depending on landscape |
|
198 |
+ if( ( (f(j) - f(i)) >= th) && |
|
199 |
+ (HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) |
|
200 |
+ ) { |
|
201 |
+ adm(i, j) = 1; |
|
202 |
+ // Rcpp::Rcout << "i = " << i << " j = " << j << " adm " << adm(i,j) << "\n"; |
|
203 |
+ } |
|
204 |
+ } |
|
205 |
+ } |
|
206 |
+ } |
|
207 |
+ return adm; |
|
208 |
+} |
|
209 |
+ |
|
210 |
+ |
|
211 |
+// used in both peaks and accessible genotypes |
|
212 |
+Rcpp::IntegerVector accessibleGenotypesPeaksLandscape(Rcpp::IntegerMatrix y, |
|
213 |
+ Rcpp::NumericVector f, |
|
214 |
+ Rcpp::IntegerVector numMut, // |
|
215 |
+ double th, |
|
216 |
+ bool returnpeaks) { |
|
217 |
+ // Return the indices. This is like accessibleGenotypes, but we do an |
|
218 |
+ // extra loop |
|
219 |
+ int ng = y.nrow(); //it counts the wt |
|
220 |
+ Rcpp::IntegerMatrix adm(ng, ng); |
|
221 |
+ |
|
222 |
+ adm = integerAdjMat(y, f, numMut, th); |
|
223 |
+ |
|
224 |
+ int numMutdiff = 0; |
|
225 |
+ |
|
226 |
+ // Slightly different logic from R: Do not resize object; set the row to |
|
227 |
+ // 0. |
|
228 |
+ int colsum = 0; |
|
229 |
+ // int indicator = 0; // indicator != 0 means we set one row to 0 |
|
230 |
+ // so we need to iterate at least once more. |
|
231 |
+ |
|
232 |
+ // accessible is the genotype number, not the column! WT is 1, |
|
233 |
+ // etc. This makes it easy to keep track of which are accessible. |
|
234 |
+ Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); |
|
235 |
+ // This is doable in one pass |
|
236 |
+ // while (true) { |
|
237 |
+ // indicator = 0; |
|
238 |
+ for(int k = 1; k < ng; ++k) { |
|
239 |
+ if(accessible(k) > 0) { |
|
240 |
+ colsum = std::accumulate(adm(Rcpp::_, k).begin(), |
|
241 |
+ adm(Rcpp::_, k).end(), 0); |
|
242 |
+ if(colsum == 0) { // This genotype ain't reachable |
|
243 |
+ // Nothing can be reached from this genotype; fill with 0. |
|
244 |
+ adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); |
|
245 |
+ accessible(k) = -9; |
|
246 |
+ // indicator = 1; |
|
247 |
+ } |
|
248 |
+ } |
|
249 |
+ } |
|
250 |
+ // if(indicator == 0) break; |
|
251 |
+ // } |
|
252 |
+ if(!returnpeaks) { |
|
253 |
+ return accessible; |
|
254 |
+ } else { |
|
255 |
+ // BEWARE: this will not work if several connected genotypes |
|
256 |
+ // have the same fitness and are maxima |
|
257 |
+ int rowsum = 0; |
|
258 |
+ Rcpp::IntegerVector peaks; |
|
259 |
+ for(int k = 0; k < ng; ++k) { |
|
260 |
+ if(accessible(k) > 0) { |
|
261 |
+ rowsum = std::accumulate(adm(k, Rcpp::_).begin(), |
|
262 |
+ adm(k, Rcpp::_).end(), 0); |
|
263 |
+ if(rowsum == 0) { // This genotype doesn't have children |
|
264 |
+ peaks.push_back(k + 1); // k is index. But in R, WT is in pos 1 |
|
265 |
+ } |
|
266 |
+ } |
|
267 |
+ } |
|
268 |
+ return peaks; |
|
269 |
+ } |
|
270 |
+} |
|
271 |
+ |
|
272 |
+ |
|
273 |
+ |
|
274 |
+// [[Rcpp::export]] |
|
275 |
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
|
276 |
+ Rcpp::NumericVector f, |
|
277 |
+ Rcpp::IntegerVector numMut, // |
|
278 |
+ double th) { |
|
279 |
+ return accessibleGenotypesPeaksLandscape(y, f, numMut, th, false); |
|
280 |
+} |
|
281 |
+ |
|
282 |
+// [[Rcpp::export]] |
|
283 |
+Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, |
|
284 |
+ Rcpp::NumericVector f, |
|
285 |
+ Rcpp::IntegerVector numMut, // |
|
286 |
+ double th) { |
|
287 |
+ return accessibleGenotypesPeaksLandscape(y, f, numMut, th, true); |
|
288 |
+} |
|
289 |
+ |
|
290 |
+ |
|
291 |
+ |
|
109 | 292 |
// // This would make it easier returning the actual accessible genotypes easily |
110 | 293 |
// // preserving the fitness if needed |
111 | 294 |
// // Not being used now |
... | ... |
@@ -169,4 +352,107 @@ Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
169 | 352 |
|
170 | 353 |
|
171 | 354 |
|
355 |
+// // [[Rcpp::export]] |
|
356 |
+// Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
|
357 |
+// Rcpp::NumericVector f, |
|
358 |
+// Rcpp::IntegerVector numMut, // |
|
359 |
+// double th) { |
|
360 |
+ |
|
361 |
+// // Return just the indices. Could preserve fitness, but would need |
|
362 |
+// // another matrix. |
|
363 |
+// int ng = y.nrow(); //it counts the wt |
|
364 |
+// Rcpp::IntegerMatrix adm(ng, ng); |
|
365 |
+ |
|
366 |
+// adm = integerAdjMat(y, f, numMut, th); |
|
367 |
+ |
|
368 |
+// int numMutdiff = 0; |
|
172 | 369 |
|
370 |
+// // Slightly different logic from R: Do not resize object; set the row to |
|
371 |
+// // 0. |
|
372 |
+// int colsum = 0; |
|
373 |
+// // int indicator = 0; // indicator != 0 means we set one row to 0 |
|
374 |
+// // so we need to iterate at least once more. |
|
375 |
+ |
|
376 |
+// // accessible is the genotype number, not the column! WT is 1, |
|
377 |
+// // etc. This makes it easy to keep track of which are accessible. |
|
378 |
+// Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); |
|
379 |
+ |
|
380 |
+// // This is doable in one pass |
|
381 |
+// // while (true) { |
|
382 |
+// // indicator = 0; |
|
383 |
+// for(int k = 1; k < ng; ++k) { |
|
384 |
+// if(accessible(k) > 0) { |
|
385 |
+// colsum = std::accumulate(adm(Rcpp::_, k).begin(), |
|
386 |
+// adm(Rcpp::_, k).end(), 0); |
|
387 |
+// if(colsum == 0) { // This genotype ain't reachable |
|
388 |
+// // Nothing can be reached from this genotype; fill with 0. |
|
389 |
+// adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); |
|
390 |
+// accessible(k) = -9; |
|
391 |
+// // indicator = 1; |
|
392 |
+// } |
|
393 |
+// } |
|
394 |
+// } |
|
395 |
+// // if(indicator == 0) break; |
|
396 |
+// // } |
|
397 |
+// return accessible; |
|
398 |
+// } |
|
399 |
+ |
|
400 |
+ |
|
401 |
+ |
|
402 |
+ |
|
403 |
+// // [[Rcpp::export]] |
|
404 |
+// Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, |
|
405 |
+// Rcpp::NumericVector f, |
|
406 |
+// Rcpp::IntegerVector numMut, // |
|
407 |
+// double th) { |
|
408 |
+// // Return the indices. This is like accessibleGenotypes, but we do an |
|
409 |
+// // extra loop |
|
410 |
+// int ng = y.nrow(); //it counts the wt |
|
411 |
+// Rcpp::IntegerMatrix adm(ng, ng); |
|
412 |
+ |
|
413 |
+// adm = integerAdjMat(y, f, numMut, th); |
|
414 |
+ |
|
415 |
+// int numMutdiff = 0; |
|
416 |
+ |
|
417 |
+// // Slightly different logic from R: Do not resize object; set the row to |
|
418 |
+// // 0. |
|
419 |
+// int colsum = 0; |
|
420 |
+// // int indicator = 0; // indicator != 0 means we set one row to 0 |
|
421 |
+// // so we need to iterate at least once more. |
|
422 |
+ |
|
423 |
+// // accessible is the genotype number, not the column! WT is 1, |
|
424 |
+// // etc. This makes it easy to keep track of which are accessible. |
|
425 |
+// Rcpp::IntegerVector accessible = Rcpp::seq_len(ng); |
|
426 |
+// // This is doable in one pass |
|
427 |
+// // while (true) { |
|
428 |
+// // indicator = 0; |
|
429 |
+// for(int k = 1; k < ng; ++k) { |
|
430 |
+// if(accessible(k) > 0) { |
|
431 |
+// colsum = std::accumulate(adm(Rcpp::_, k).begin(), |
|
432 |
+// adm(Rcpp::_, k).end(), 0); |
|
433 |
+// if(colsum == 0) { // This genotype ain't reachable |
|
434 |
+// // Nothing can be reached from this genotype; fill with 0. |
|
435 |
+// adm(k, Rcpp::_) = Rcpp::IntegerVector(ng); |
|
436 |
+// accessible(k) = -9; |
|
437 |
+// // indicator = 1; |
|
438 |
+// } |
|
439 |
+// } |
|
440 |
+// } |
|
441 |
+// // if(indicator == 0) break; |
|
442 |
+// // } |
|
443 |
+ |
|
444 |
+// int rowsum = 0; |
|
445 |
+// Rcpp::IntegerVector peaks; |
|
446 |
+// for(int k = 1; k < ng; ++k) { |
|
447 |
+// if(accessible(k) > 0) { |
|
448 |
+// rowsum = std::accumulate(adm(k, Rcpp::_).begin(), |
|
449 |
+// adm(k, Rcpp::_).end(), 0); |
|
450 |
+// if(rowsum == 0) { // This genotype doesn't have children |
|
451 |
+// peaks.push_back(k); |
|
452 |
+// } |
|
453 |
+// } |
|
454 |
+// } |
|
455 |
+ |
|
456 |
+ |
|
457 |
+// return peaks; |
|
458 |
+// } |
... | ... |
@@ -238,11 +238,28 @@ double ti_nextTime_tmax_2_st(const spParamsP& spP, |
238 | 238 |
print_spP(spP); |
239 | 239 |
throw std::range_error("ti: ti not finite"); |
240 | 240 |
} |
241 |
- if(ti == 0.0) { |
|
241 |
+ if((ti == 0.0) || (ti <= DBL_MIN)) { |
|
242 |
+// #ifdef DEBUGW |
|
243 |
+// // this is too verbose for routine use |
|
244 |
+// std::string ti_dbl_comp; |
|
245 |
+// if( ti == DBL_MIN) { |
|
246 |
+// ti_dbl_comp = "ti_equal_DBL_MIN"; |
|
247 |
+// DP2(ti); |
|
248 |
+// } else if (ti == 0.0) { |
|
249 |
+// ti_dbl_comp = "ti_equal_0.0"; |
|
250 |
+// } else if ( (ti < DBL_MIN) && (ti > 0.0) ) { |
|
251 |
+// ti_dbl_comp = "ti_gt_0.0_lt_DBL_MIN"; |
|
252 |
+// DP2(ti); |
|
253 |
+// } else { |
|
254 |
+// ti_dbl_comp = "IMPOSSIBLE!"; |
|
255 |
+// } |
|
256 |
+// DP2(ti_dbl_comp); |
|
257 |
+// #endif |
|
242 | 258 |
#ifdef DEBUGV |
243 | 259 |
// FIXME: pass verbosity as argument, and return the warning |
244 | 260 |
// if set to more than 0? |
245 |
- Rcpp::Rcout << "\n\n\n WARNING: ti == 0. Setting it to DBL_MIN \n"; |
|
261 |
+ Rcpp::Rcout << "\n\n\n WARNING: ti == 0. Setting it to DBL_MIN \n"; |
|
262 |
+ |
|
246 | 263 |
double eq12 = pow( (spP.R - spP.W + 2.0 * spP.death) / |
247 | 264 |
(spP.R + spP.W - 2.0 * spP.birth) , spP.popSize); |
248 | 265 |
|
... | ... |
@@ -279,10 +296,18 @@ double ti_nextTime_tmax_2_st(const spParamsP& spP, |
279 | 296 |
// Rcpp::Rcout << "ti set to DBL_MIN\n"; |
280 | 297 |
// Yes, abort because o.w. we can repeat it many, manu times |
281 | 298 |
// throw std::range_error("ti set to DBL_MIN"); |
299 |
+ // Even just touching DBL_MIN is enough to want a rerunExcept; |
|
300 |
+ // no need for it to be 0.0. |
|
282 | 301 |
throw rerunExcept("ti set to DBL_MIN"); |
283 | 302 |
} |
284 |
- if(ti < 0.001) ++ti_e3; |
|
303 |
+ if(ti < (2*DBL_MIN)) ++ti_e3; // Counting how often this happens. |
|
304 |
+ // Can be smaller than the ti_dbl_min count |
|
285 | 305 |
ti += currentTime; |
306 |
+ // But we can still have issues here if the difference is too small |
|
307 |
+ if( (ti <= currentTime) ) { |
|
308 |
+ // Rcpp::Rcout << "\n (ti <= currentTime): expect problems\n"; |
|
309 |
+ throw rerunExcept("ti <= currentTime"); |
|
310 |
+ } |
|
286 | 311 |
} |
287 | 312 |
} |
288 | 313 |
return ti; |
... | ... |
@@ -1106,3 +1131,26 @@ void fill_SStats(Rcpp::NumericMatrix& perSampleStats, |
1106 | 1131 |
perSampleStats(i, 4) = static_cast<double>(sampleNDrLargestPop[i]); |
1107 | 1132 |
} |
1108 | 1133 |
} |
1134 |
+ |
|
1135 |
+// Do not use this routinely. Too expensive and not needed. |
|
1136 |
+void detect_ti_duplicates(const std::multimap<double, int>& m, |
|
1137 |
+ const double ti, |
|
1138 |
+ const int species) { |
|
1139 |
+ |
|
1140 |
+ double maxti = m.rbegin()->first; |
|
1141 |
+ if((ti < maxti) && (m.count(ti) > 1)) { |
|
1142 |
+ Rcpp::Rcout << "\n *** duplicated ti for species " << species << "\n"; |
|
1143 |
+ |
|
1144 |
+ std::multimap<double, int>::const_iterator it = m.lower_bound(ti); |
|
1145 |
+ std::multimap<double, int>::const_iterator it2 = m.upper_bound(ti); |
|
1146 |
+ |
|
1147 |
+ while(it != it2) { |
|
1148 |
+ Rcpp::Rcout << "\tgenotype: " << (it->second) << "; time: " << |
|
1149 |
+ (it->first) << "\n"; |
|
1150 |
+ ++it; |
|
1151 |
+ } |
|
1152 |
+ Rcpp::Rcout << "\n\n\n"; |
|
1153 |
+ } |
|
1154 |
+} |
|
1155 |
+ |
|
1156 |
+ |
... | ... |
@@ -183,6 +183,8 @@ void updateRatesBeeren(std::vector<spParamsP>& popParams, |
183 | 183 |
const int& mutationPropGrowth, |
184 | 184 |
const double& mu); |
185 | 185 |
|
186 |
- |
|
186 |
+void detect_ti_duplicates(const std::multimap<double, int>& m, |
|
187 |
+ const double ti, |
|
188 |
+ const int spcies); |
|
187 | 189 |
#endif |
188 | 190 |
|
... | ... |
@@ -1,18 +1,189 @@ |
1 |
-test_that("We obtain same accessible genotypes with different functions", { |
|
1 |
+test_that("We obtain same accessible genotypes with different functions plus genot_to_adjm_mat", { |
|
2 | 2 |
## More likely to catch bugs if many iters, rather than large matrices |
3 | 3 |
niter <- 100 |
4 | 4 |
for(i in 1:niter) { |
5 | 5 |
## cat("\n i iteration fast accessible comp ", i) |
6 | 6 |
for(ng in 2:6) { |
7 | 7 |
rtmp <- rfitness(ng) |
8 |
- a1 <- OncoSimulR:::faster_accessible_genotypes_R(rtmp, 0) |
|
8 |
+ |
|
9 | 9 |
ajm <- OncoSimulR:::genot_to_adj_mat(rtmp) |
10 |
+ ajmr <- OncoSimulR:::genot_to_adj_mat_R(rtmp) |
|
11 |
+ stopifnot(all.equal(ajm, ajmr)) |
|
12 |
+ |
|
13 |
+ a1 <- OncoSimulR:::faster_accessible_genotypes_R(rtmp, 0) |
|
10 | 14 |
a2 <- colnames(OncoSimulR:::filter_inaccessible(ajm, 0)) |
11 | 15 |
a3 <- OncoSimulR:::wrap_accessibleGenotypes(rtmp, 0) |
16 |
+ a4 <- OncoSimulR:::wrap_accessibleGenotypes_former(rtmp, 0) |
|
17 |
+ |
|
12 | 18 |
stopifnot(identical(as.integer(a1), a3)) |
13 | 19 |
stopifnot(identical(as.integer(a2), a3)) |
14 |
- stopifnot(all(a2 == a3)) |
|
15 |
- stopifnot(identical(a2, as.character(a3))) |
|
20 |
+ stopifnot(all(a3 == a4)) |
|
21 |
+ |
|
16 | 22 |
} |
17 | 23 |
} |
18 | 24 |
}) |
25 |
+ |
|
26 |
+ |
|
27 |
+test_that("The indices of accessible genotypes are correct", { |
|
28 |
+ ## Make sure we do not assume matrix is ordered |
|
29 |
+ |
|
30 |
+ mf1 <- rbind(c(0, 0, 1), |
|
31 |
+ c(1, 0, 4), |
|
32 |
+ c(0, 1, .2), |
|
33 |
+ c(1, 1, 3)) |
|
34 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf1, 0), |
|
35 |
+ c(1, 2)) |
|
36 |
+ mf2 <- rbind(c(0, 0, 1), |
|
37 |
+ c(1, 0, 4), |
|
38 |
+ c(0, 1, .2), |
|
39 |
+ c(1, 1, 5)) |
|
40 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0), |
|
41 |
+ c(1, 2, 4)) |
|
42 |
+ |
|
43 |
+ |
|
44 |
+ mf1 <- rbind(c(0, 0, 1), |
|
45 |
+ c(0, 1, .2), |
|
46 |
+ c(1, 0, 4), |
|
47 |
+ c(1, 1, 3)) |
|
48 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf1, 0), |
|
49 |
+ c(1, 3)) |
|
50 |
+ |
|
51 |
+ mf2 <- rbind( |
|
52 |
+ c(0, 1, .2), |
|
53 |
+ c(0, 0, 1), |
|
54 |
+ c(1, 0, 4), |
|
55 |
+ c(1, 1, 5) |
|
56 |
+ ) |
|
57 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0), |
|
58 |
+ c(2, 3, 4)) |
|
59 |
+ |
|
60 |
+ mf2 <- rbind( |
|
61 |
+ c(0, 1, .2), |
|
62 |
+ c(0, 0, 1), |
|
63 |
+ c(1, 1, 5), |
|
64 |
+ c(1, 0, 4) |
|
65 |
+ ) |
|
66 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0), |
|
67 |
+ c(2, 3, 4)) |
|
68 |
+ |
|
69 |
+ mf2 <- rbind( |
|
70 |
+ c(0, 1, .2), |
|
71 |
+ c(0, 0, 1), |
|
72 |
+ c(1, 1, 5), |
|
73 |
+ c(1, 0, .4) |
|
74 |
+ ) |
|
75 |
+ expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0), |
|
76 |
+ c(2)) |
|
77 |
+ |
|
78 |
+}) |
|
79 |
+ |
|
80 |
+ |
|
81 |
+test_that("The indices of adjancey matrices are correct", { |
|
82 |
+ ## Make sure we do not assume matrix is ordered |
|
83 |
+ |
|
84 |
+ trueam <- matrix(NA, nrow = 4, ncol = 4) |
|
85 |
+ trueam[1, 2] <- 3 |
|
86 |
+ trueam[1, 3] <- -0.8 |
|
87 |
+ trueam[2, 4] <- -1 |
|
88 |
+ trueam[3, 4] <- 2.8 |
|
89 |
+ colnames(trueam) <- rownames(trueam) <- 1:4 |
|
90 |
+ |
|
91 |
+ mf1 <- rbind(c(0, 0, 1), |
|
92 |
+ c(1, 0, 4), |
|
93 |
+ c(0, 1, .2), |
|
94 |
+ c(1, 1, 3)) |
|
95 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf1), |
|
96 |
+ OncoSimulR:::genot_to_adj_mat_R(mf1))) |
|
97 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf1), |
|
98 |
+ trueam[1:4, 1:4])) |
|
99 |
+ |
|
100 |
+ ## these two make use of the fact that we are forced to reorder some |
|
101 |
+ mf2 <- mf1[c(2, 1, 4, 3), ] |
|
102 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
103 |
+ OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
104 |
+ trueam2 <- trueam |
|
105 |
+ colnames(trueam2) <- rownames(trueam2) <- colnames(trueam)[c(2, 1, 4, 3)] |
|
106 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
107 |
+ trueam2)) |
|
108 |
+ |
|
109 |
+ ## ## this is potentially confusing. See below for much cleaner |
|
110 |
+ ## ## which compares matrices ordered by their names |
|
111 |
+ ## mf2 <- mf1[c(3, 1, 4, 2), ] |
|
112 |
+ ## expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
113 |
+ ## OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
114 |
+ ## trueam2 <- trueam[c(1, 3, 2, 4), c(1, 3, 2, 4)] |
|
115 |
+ ## colnames(trueam2) <- rownames(trueam2) <- c(2, 1, 4, 3) |
|
116 |
+ ## expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
117 |
+ ## trueam2)) |
|
118 |
+ |
|
119 |
+ |
|
120 |
+ |
|
121 |
+ |
|
122 |
+ ## the next are cleaner: I compare the matrices ordered by the new names |
|
123 |
+ ii <- c(2, 1, 4, 3) |
|
124 |
+ mf2 <- mf1[ii, ] |
|
125 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
126 |
+ OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
127 |
+ trueam2 <- trueam[ii, ii] |
|
128 |
+ colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2) |
|
129 |
+ ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2) |
|
130 |
+ ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))] |
|
131 |
+ expect_true(all.equal(ogammf2, trueam2)) |
|
132 |
+ expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4) |
|
133 |
+ expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam))) |
|
134 |
+ expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped |
|
135 |
+ |
|
136 |
+ |
|
137 |
+ |
|
138 |
+ |
|
139 |
+ ii <- c(3, 1, 2, 4) |
|
140 |
+ mf2 <- mf1[ii, ] |
|
141 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
142 |
+ OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
143 |
+ trueam2 <- trueam[ii, ii] |
|
144 |
+ colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2) |
|
145 |
+ ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2) |
|
146 |
+ ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))] |
|
147 |
+ expect_true(all.equal(ogammf2, trueam2)) |
|
148 |
+ expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4) |
|
149 |
+ expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam))) |
|
150 |
+ expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped |
|
151 |
+ |
|
152 |
+ |
|
153 |
+ |
|
154 |
+ |
|
155 |
+ ii <- c(1, 3, 2, 4) |
|
156 |
+ mf2 <- mf1[ii, ] |
|
157 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
158 |
+ OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
159 |
+ trueam2 <- trueam[ii, ii] |
|
160 |
+ colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2) |
|
161 |
+ ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2) |
|
162 |
+ ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))] |
|
163 |
+ expect_true(all.equal(ogammf2, trueam2)) |
|
164 |
+ expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4) |
|
165 |
+ ## note this |
|
166 |
+ expect_true(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam))) |
|
167 |
+ expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped |
|
168 |
+ |
|
169 |
+ |
|
170 |
+ |
|
171 |
+ ii <- c(4, 3, 1, 2) |
|
172 |
+ mf2 <- mf1[ii, ] |
|
173 |
+ expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2), |
|
174 |
+ OncoSimulR:::genot_to_adj_mat_R(mf2))) |
|
175 |
+ trueam2 <- trueam[ii, ii] |
|
176 |
+ colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2) |
|
177 |
+ ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2) |
|
178 |
+ ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))] |
|
179 |
+ expect_true(all.equal(ogammf2, trueam2)) |
|
180 |
+ expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4) |
|
181 |
+ expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam))) |
|
182 |
+ expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped |
|
183 |
+ |
|
184 |
+}) |
|
185 |
+ |
|
186 |
+ |
|
187 |
+ |
|
188 |
+ |
|
189 |
+## For peaks, see code in test.plotFitnessLandscape.R |
... | ... |
@@ -272,3 +272,451 @@ test_that("internal peak valley functions", { |
272 | 272 |
|
273 | 273 |
|
274 | 274 |
}) |
275 |
+ |
|
276 |
+ |
|
277 |
+## Beware that using peak_valley on only_accessible makes a difference |
|
278 |
+test_that("internal peak valley functions w/wo inaccessible filter", { |
|
279 |
+ ## A is accessible, a peak |
|
280 |
+ ## AB is a peak if only forward. But there is no |
|
281 |
+ ## reciprocal sign epistasis here! |
|
282 |
+ |
|
283 |
+ ## We want peaks in general, not just |
|
284 |
+ ## under assumption of "no back mutation"? |
|
285 |
+ |
|
286 |
+ ## Well, no, that is not obvious with cancer progression models if we |
|
287 |
+ ## do not allow back mutations. |
|
288 |
+ |
|
289 |
+ ## We get a different result when we restrict to accessible |
|
290 |
+ ## because all < 0 in adjacency are turned to NAs. |
|
291 |
+ |
|
292 |
+ ## Thinking in terms of adjacency matrix, AB is not a peak if it has a |
|
293 |
+ ## positive and a negative entry in its column, because the negative |
|
294 |
+ ## entry means there is an ancestor with larger fitness. |
|
295 |
+ ## But see below for why plainly using the adjacency matrix can give bad results. |
|
296 |
+ |
|
297 |
+ ## The next matrices are all fitness matrix. Last column is fitness. |
|
298 |
+ mf1 <- rbind( |
|
299 |
+ c(0, 0, 1), |
|
300 |
+ c(1, 0, 4), |
|
301 |
+ c(0, 1, 2), |
|
302 |
+ c(1, 1, 3) |
|
303 |
+ ) |
|
304 |
+ |
|
305 |
+ plotFitnessLandscape(mf1) |
|
306 |
+ |
|
307 |
+ expect_equal( |
|
308 |
+ OncoSimulR:::peak_valley( |
|
309 |
+ OncoSimulR:::genot_to_adj_mat(mf1))$peak, 2) |
|
310 |
+ |
|
311 |
+ expect_equal( |
|
312 |
+ OncoSimulR:::peak_valley( |
|
313 |
+ OncoSimulR:::filter_inaccessible( |
|
314 |
+ OncoSimulR:::genot_to_adj_mat(mf1), 0))$peak, |
|
315 |
+ c(2, 4)) |
|
316 |
+ |
|
317 |
+ expect_equal( |
|
318 |
+ OncoSimulR:::fast_peaks(mf1, 0), |
|
319 |
+ c(2, 4)) |
|
320 |
+ |
|
321 |
+ |
|
322 |
+ ## reorder the rows of the matrix. Affects fast_peaks, as it should |
|
323 |
+ mf1 <- rbind( |
|
324 |
+ c(1, 0, 4), |
|
325 |
+ c(0, 0, 1), |
|
326 |
+ c(1, 1, 3), |
|
327 |
+ c(0, 1, 2) |
|
328 |
+ ) |
|
329 |
+ |
|
330 |
+ plotFitnessLandscape(mf1) |
|
331 |
+ ## this is not affected, since it uses, by construction, the ordered matrix |
|
332 |
+ expect_equal( |
|
333 |
+ OncoSimulR:::peak_valley( |
|
334 |
+ OncoSimulR:::genot_to_adj_mat(mf1))$peak, 2) |
|
335 |
+ ## ditto |
|
336 |
+ expect_equal( |
|
337 |
+ OncoSimulR:::peak_valley( |
|
338 |
+ OncoSimulR:::filter_inaccessible( |
|
339 |
+ OncoSimulR:::genot_to_adj_mat(mf1), 0))$peak, |
|
340 |
+ c(2, 4)) |
|
341 |
+ expect_equal( |
|