git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progression with Epistasis |
4 |
-Version: 2.5.0 |
|
5 |
-Date: 2016-09-22 |
|
4 |
+Version: 2.5.1 |
|
5 |
+Date: 2016-11-12 |
|
6 | 6 |
Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"), |
7 | 7 |
email = "rdiaz02@gmail.com"), |
8 | 8 |
person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com")) |
... | ... |
@@ -9,7 +9,8 @@ export("oncoSimulPop", "oncoSimulIndiv", "samplePop", |
9 | 9 |
"evalAllGenotypesFitAndMut", |
10 | 10 |
"rfitness", |
11 | 11 |
"plotFitnessLandscape", |
12 |
- "to_Magellan" |
|
12 |
+ "to_Magellan", |
|
13 |
+ "sampledGenotypes" |
|
13 | 14 |
) |
14 | 15 |
|
15 | 16 |
S3method(plot, oncosimul) |
... | ... |
@@ -47,7 +48,8 @@ importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate", |
47 | 48 |
"filter") |
48 | 49 |
importFrom("smatr", "ma") ## for major axis regression in some tests |
49 | 50 |
importFrom("car", "linearHypothesis") |
50 |
- |
|
51 |
+## importFrom("slam", "simple_triplet_zero_matrix", ## "colapply_simple_triplet_matrix", |
|
52 |
+## "col_sums") |
|
51 | 53 |
|
52 | 54 |
|
53 | 55 |
|
... | ... |
@@ -63,6 +63,8 @@ oncoSimulSample <- function(Nindiv, |
63 | 63 |
typeSample = "whole", |
64 | 64 |
thresholdWhole = 0.5, |
65 | 65 |
initMutant = NULL, |
66 |
+ AND_DrvProbExit = FALSE, |
|
67 |
+ fixation = NULL, |
|
66 | 68 |
verbosity = 1, |
67 | 69 |
showProgress = FALSE, |
68 | 70 |
seed = "auto"){ |
... | ... |
@@ -190,7 +192,9 @@ oncoSimulSample <- function(Nindiv, |
190 | 192 |
initMutant = initMutant, |
191 | 193 |
keepPhylog = keepPhylog, |
192 | 194 |
mutationPropGrowth = mutationPropGrowth, |
193 |
- detectionProb = detectionProb) |
|
195 |
+ detectionProb = detectionProb, |
|
196 |
+ AND_DrvProbExit = AND_DrvProbExit, |
|
197 |
+ fixation = fixation) |
|
194 | 198 |
if(tmp$other$UnrecoverExcept) { |
195 | 199 |
return(f.out.unrecover.except(tmp)) |
196 | 200 |
} |
... | ... |
@@ -291,7 +295,7 @@ samplePop <- function(x, timeSample = "last", |
291 | 295 |
popSizeSample = popSizeSample) |
292 | 296 |
dim(z) <- c(1, length(z)) |
293 | 297 |
if(is.null(gN) && (!is.null(x$geneNames))) |
294 |
- gN <- geneNames |
|
298 |
+ gN <- x$geneNames |
|
295 | 299 |
} |
296 | 300 |
message("\n Subjects by Genes matrix of ", |
297 | 301 |
nrow(z), " subjects and ", |
... | ... |
@@ -338,6 +342,8 @@ oncoSimulPop <- function(Nindiv, |
338 | 342 |
errorHitWallTime = TRUE, |
339 | 343 |
errorHitMaxTries = TRUE, |
340 | 344 |
initMutant = NULL, |
345 |
+ AND_DrvProbExit = FALSE, |
|
346 |
+ fixation = NULL, |
|
341 | 347 |
verbosity = 0, |
342 | 348 |
mc.cores = detectCores(), |
343 | 349 |
seed = "auto") { |
... | ... |
@@ -379,7 +385,9 @@ oncoSimulPop <- function(Nindiv, |
379 | 385 |
seed = seed, keepPhylog = keepPhylog, |
380 | 386 |
initMutant = initMutant, |
381 | 387 |
mutationPropGrowth = mutationPropGrowth, |
382 |
- detectionProb = detectionProb), |
|
388 |
+ detectionProb = detectionProb, |
|
389 |
+ AND_DrvProbExit = AND_DrvProbExit, |
|
390 |
+ fixation = fixation), |
|
383 | 391 |
mc.cores = mc.cores |
384 | 392 |
) |
385 | 393 |
class(pop) <- "oncosimulpop" |
... | ... |
@@ -422,16 +430,33 @@ oncoSimulIndiv <- function(fp, |
422 | 430 |
errorHitMaxTries = TRUE, |
423 | 431 |
verbosity = 0, |
424 | 432 |
initMutant = NULL, |
433 |
+ AND_DrvProbExit = FALSE, |
|
434 |
+ fixation = NULL, |
|
425 | 435 |
seed = NULL |
426 | 436 |
) { |
427 | 437 |
call <- match.call() |
428 |
- if(all(c(is.na(detectionProb), |
|
429 |
- is.na(detectionSize), |
|
430 |
- is.na(detectionDrivers), |
|
431 |
- is.na(finalTime)))) |
|
438 |
+ if(all(c(is_null_na(detectionProb), |
|
439 |
+ is_null_na(detectionSize), |
|
440 |
+ is_null_na(detectionDrivers), |
|
441 |
+ is_null_na(finalTime), |
|
442 |
+ is_null_na(fixation) |
|
443 |
+ ))) |
|
432 | 444 |
stop("At least one stopping condition should be given.", |
433 | 445 |
" At least one of detectionProb, detectionSize, detectionDrivers,", |
434 |
- " finalTime. Otherwise, we'll run forever.") |
|
446 |
+ " finalTime. Otherwise, we'll run until aborted by max.wall.time,", |
|
447 |
+ " max.num.tries, and the like.") |
|
448 |
+ |
|
449 |
+ if(AND_DrvProbExit && (is_null_na(detectionProb) || is_null_na(detectionDrivers))) |
|
450 |
+ stop("AND_DrvProbExit is TRUE: both of detectionProb and detectionDrivers", |
|
451 |
+ " must be non NA.") |
|
452 |
+ if(AND_DrvProbExit && !is_null_na(detectionSize)) { |
|
453 |
+ warning("With AND_DrvProbExit = TRUE, detectionSize is ignored.") |
|
454 |
+ detectionSize <- NA |
|
455 |
+ } |
|
456 |
+ if(inherits(fp, "fitnessEffects")) { |
|
457 |
+ s <- sh <- NULL ## force it soon! |
|
458 |
+ } |
|
459 |
+ |
|
435 | 460 |
## legacies from poor name choices |
436 | 461 |
typeFitness <- switch(model, |
437 | 462 |
"Bozic" = "bozic1", |
... | ... |
@@ -467,7 +492,8 @@ oncoSimulIndiv <- function(fp, |
467 | 492 |
if(model %in% c("Bozic", "Exp") ) |
468 | 493 |
minDetectDrvCloneSz <- 0 |
469 | 494 |
else if (model %in% c("McFL", "McFarlandLog")) |
470 |
- minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers) |
|
495 |
+ minDetectDrvCloneSz <- initSize |
|
496 |
+ ## minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers) |
|
471 | 497 |
else |
472 | 498 |
stop("Unknown model") |
473 | 499 |
} |
... | ... |
@@ -484,7 +510,7 @@ oncoSimulIndiv <- function(fp, |
484 | 510 |
## No user-visible magic numbers |
485 | 511 |
## if(is.null(keepEvery)) |
486 | 512 |
## keepEvery <- -9 |
487 |
- if(is.na(keepEvery)) keepEvery <- -9 |
|
513 |
+ if(is_null_na(keepEvery)) keepEvery <- -9 |
|
488 | 514 |
|
489 | 515 |
|
490 | 516 |
if( (keepEvery > 0) & (keepEvery < sampleEvery)) { |
... | ... |
@@ -499,10 +525,11 @@ oncoSimulIndiv <- function(fp, |
499 | 525 |
if( (typeFitness == "exp") && (death != 1) ) |
500 | 526 |
warning("Using fitness exp with death != 1") |
501 | 527 |
|
502 |
- |
|
503 |
- if(is.na(detectionDrivers)) detectionDrivers <- (2^31) - 1 |
|
504 |
- if(is.na(detectionSize)) detectionSize <- Inf |
|
505 |
- if(is.na(finalTime)) finalTime <- Inf |
|
528 |
+ if(!is_null_na(detectionDrivers) && (detectionDrivers >= 1e9)) |
|
529 |
+ stop("detectionDrivers > 1e9; this doesn't seem reasonable") |
|
530 |
+ if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1 |
|
531 |
+ if(is_null_na(detectionSize)) detectionSize <- Inf |
|
532 |
+ if(is_null_na(finalTime)) finalTime <- Inf |
|
506 | 533 |
|
507 | 534 |
|
508 | 535 |
if(!inherits(fp, "fitnessEffects")) { |
... | ... |
@@ -515,12 +542,18 @@ oncoSimulIndiv <- function(fp, |
515 | 542 |
stop(m) |
516 | 543 |
|
517 | 544 |
} |
545 |
+ if(AND_DrvProbExit) { |
|
546 |
+ stop("The AND_DrvProbExit = TRUE setting is invalid", |
|
547 |
+ " with the old poset format.") |
|
548 |
+ } |
|
518 | 549 |
if(!is.null(muEF)) |
519 |
- stop("Mutator effects cannot be especified with the old poset format") |
|
550 |
+ stop("Mutator effects cannot be specified with the old poset format.") |
|
520 | 551 |
if( length(initMutant) > 0) |
521 | 552 |
warning("With the old poset format you can no longer use initMutant.", |
522 | 553 |
" The initMutant you passed will be ignored.") |
523 |
- ## stop("With the old poset, initMutant can only take a single value.") |
|
554 |
+ ## stop("With the old poset, initMutant can only take a single value.") |
|
555 |
+ if(!is_null_na(fixation)) |
|
556 |
+ stop("'fixation' cannot be specified with the old poset format.") |
|
524 | 557 |
## Seeding C++ is now much better in new version |
525 | 558 |
if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed |
526 | 559 |
## name is a legacy. This is really the seed for the C++ generator. |
... | ... |
@@ -530,7 +563,7 @@ oncoSimulIndiv <- function(fp, |
530 | 563 |
if(verbosity >= 2) |
531 | 564 |
cat(paste("\n Using ", seed, " as seed for C++ generator\n")) |
532 | 565 |
|
533 |
- if(!is.na(detectionProb)) stop("detectionProb cannot be used in v.1 objects") |
|
566 |
+ if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects") |
|
534 | 567 |
## if(message.v1) |
535 | 568 |
## message("You are using the old poset format. Consider using the new one.") |
536 | 569 |
|
... | ... |
@@ -576,6 +609,7 @@ oncoSimulIndiv <- function(fp, |
576 | 609 |
silent = !verbosity) |
577 | 610 |
objClass <- "oncosimul" |
578 | 611 |
} else { |
612 |
+ s <- sh <- NULL ## force it. |
|
579 | 613 |
if(numPassengers != 0) |
580 | 614 |
warning(paste("Specifying numPassengers has no effect", |
581 | 615 |
" when using fitnessEffects objects. ", |
... | ... |
@@ -594,6 +628,18 @@ oncoSimulIndiv <- function(fp, |
594 | 628 |
if(verbosity >= 2) |
595 | 629 |
cat("\n A (high quality) random seed will be generated in C++\n") |
596 | 630 |
} |
631 |
+ if(!is_null_na(fixation)) { |
|
632 |
+ if( (!is.list(fixation)) && (!is.vector(fixation)) ) |
|
633 |
+ stop("'fixation' must be a list or a vector.") |
|
634 |
+ if(!(all(unlist(lapply(fixation, is.vector))))) |
|
635 |
+ stop("Each element of 'fixation' must be a single element character vector.") |
|
636 |
+ if(!(all(unlist(lapply(fixation, class)) == "character"))) |
|
637 |
+ stop("Each element of 'fixation' must be a single element character vector.") |
|
638 |
+ if(!(all( unlist(lapply(fixation, length)) == 1))) |
|
639 |
+ stop("Each element of 'fixation' must be a single element character vector.") |
|
640 |
+ if(AND_DrvProbExit) |
|
641 |
+ stop("It makes no sense to pass AND_DrvProbExit and a fixation list.") |
|
642 |
+ } |
|
597 | 643 |
op <- try(nr_oncoSimul.internal(rFE = fp, |
598 | 644 |
birth = birth, |
599 | 645 |
death = death, |
... | ... |
@@ -625,7 +671,9 @@ oncoSimulIndiv <- function(fp, |
625 | 671 |
errorHitMaxTries = errorHitMaxTries, |
626 | 672 |
keepPhylog = keepPhylog, |
627 | 673 |
MMUEF = muEF, |
628 |
- detectionProb = detectionProb), |
|
674 |
+ detectionProb = detectionProb, |
|
675 |
+ AND_DrvProbExit = AND_DrvProbExit, |
|
676 |
+ fixation = fixation), |
|
629 | 677 |
silent = !verbosity) |
630 | 678 |
objClass <- c("oncosimul", "oncosimul2") |
631 | 679 |
} |
... | ... |
@@ -666,7 +714,7 @@ summary.oncosimul <- function(object, ...) { |
666 | 714 |
if( (tmp$minDMratio == -99)) tmp$minDMratio <- NA |
667 | 715 |
if( (tmp$minBMratio == -99)) tmp$minBMratio <- NA |
668 | 716 |
tmp$OccurringDrivers <- object$OccurringDrivers |
669 |
- return(as.data.frame(tmp)) |
|
717 |
+ return(as.data.frame(tmp, stringsAsFactors = FALSE)) |
|
670 | 718 |
} |
671 | 719 |
} |
672 | 720 |
|
... | ... |
@@ -690,10 +738,29 @@ print.oncosimul <- function(x, ...) { |
690 | 738 |
} |
691 | 739 |
|
692 | 740 |
## I want this to return things that are storable |
741 |
+## summary.oncosimulpop <- function(object, ...) { |
|
742 |
+## as.data.frame(rbindlist(lapply(object, summary))) |
|
743 |
+## } |
|
744 |
+ |
|
693 | 745 |
summary.oncosimulpop <- function(object, ...) { |
694 |
- as.data.frame(rbindlist(lapply(object, summary))) |
|
746 |
+ tmp <- lapply(object, summary) |
|
747 |
+ rm <- which(unlist(lapply(tmp, function(x) (length(x) == 1) && (is.na(x))))) |
|
748 |
+ if(length(rm) > 0) |
|
749 |
+ if(length(rm) < length(object)) { |
|
750 |
+ warning("Some simulations seem to have failed and will be removed", |
|
751 |
+ " from the summary. The failed runs are ", |
|
752 |
+ paste(rm, collapse = ", "), |
|
753 |
+ ".") |
|
754 |
+ tmp <- tmp[-rm] |
|
755 |
+ } else { |
|
756 |
+ warning("All simulations failed.") |
|
757 |
+ return(NA) |
|
758 |
+ } |
|
759 |
+ as.data.frame(rbindlist(tmp)) |
|
695 | 760 |
} |
696 | 761 |
|
762 |
+ |
|
763 |
+ |
|
697 | 764 |
print.oncosimulpop <- function(x, ...) { |
698 | 765 |
cat("\nPopulation of OncoSimul trajectories of", |
699 | 766 |
length(x), "individuals. Call :\n") |
... | ... |
@@ -1383,8 +1450,17 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) { |
1383 | 1450 |
stop("Phylogenetic information is only stored with v >= 2") |
1384 | 1451 |
z <- which_N_at_T(x, N, t) |
1385 | 1452 |
tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all |
1386 |
- ## sample size info at each period |
|
1453 |
+ ## sample size info at each period |
|
1454 |
+ |
|
1455 |
+ if( (length(tG) == 1) && (tG == "")) { |
|
1456 |
+ warning("There never was a descendant of WT") |
|
1457 |
+ } |
|
1458 |
+ |
|
1387 | 1459 |
df <- x$other$PhylogDF |
1460 |
+ if(nrow(df) == 0) { |
|
1461 |
+ warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared.") |
|
1462 |
+ return(NA) |
|
1463 |
+ } |
|
1388 | 1464 |
if(!keepEvents) { ## is this just a graphical thing? or not? |
1389 | 1465 |
df <- df[!duplicated(df[, c(1, 2)]), ] |
1390 | 1466 |
} |
... | ... |
@@ -1420,6 +1496,13 @@ plotClonePhylog <- function(x, N = 1, t = "last", |
1420 | 1496 |
"very fast, before any clones beyond the initial were ", |
1421 | 1497 |
"generated.") |
1422 | 1498 |
pc <- phylogClone(x, N, t, keepEvents) |
1499 |
+ ## if(is.na(pc)) { |
|
1500 |
+ ## ## This should not be reachable, as caught before |
|
1501 |
+ ## ## where we check for nrow of PhylogDF |
|
1502 |
+ ## warning("No clone phylogeny available. Exiting without plotting.") |
|
1503 |
+ ## return(NULL) |
|
1504 |
+ ## } |
|
1505 |
+ |
|
1423 | 1506 |
l0 <- igraph::layout.reingold.tilford(pc$g) |
1424 | 1507 |
if(!timeEvents) { |
1425 | 1508 |
plot(pc$g, layout = l0) |
... | ... |
@@ -1502,6 +1585,12 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) { |
1502 | 1585 |
|
1503 | 1586 |
get.mut.vector <- function(x, timeSample, typeSample, |
1504 | 1587 |
thresholdWhole, popSizeSample) { |
1588 |
+ if(is.null(x$TotalPopSize)) { |
|
1589 |
+ warning(paste("It looks like this simulation never completed.", |
|
1590 |
+ " Maybe it reached maximum allowed limits.", |
|
1591 |
+ " You will get NAs")) |
|
1592 |
+ return(rep(NA, length(x$geneNames))) |
|
1593 |
+ } |
|
1505 | 1594 |
the.time <- get.the.time.for.sample(x, timeSample, popSizeSample) |
1506 | 1595 |
if(the.time < 0) { |
1507 | 1596 |
return(rep(NA, nrow(x$Genotypes))) |
... | ... |
@@ -1686,17 +1775,6 @@ oncoSimul.internal <- function(poset, ## restrict.table, |
1686 | 1775 |
|
1687 | 1776 |
} |
1688 | 1777 |
|
1689 |
-eFinalMf <- function(initSize, s, j) { |
|
1690 |
- ## Expected final sizes for McF, when K is set to the default. |
|
1691 |
- # j is number of drivers |
|
1692 |
- ## as it says, with no passengers |
|
1693 |
- ## Set B(d) = D(N) |
|
1694 |
- K <- initSize/(exp(1) - 1) |
|
1695 |
- return(K * (exp( (1 + s)^j) - 1)) |
|
1696 |
-} |
|
1697 |
- |
|
1698 |
- |
|
1699 |
- |
|
1700 | 1778 |
OncoSimulWide2Long <- function(x) { |
1701 | 1779 |
## Put data in long format, for ggplot et al |
1702 | 1780 |
|
... | ... |
@@ -1820,6 +1898,59 @@ plotShannon <- function(z) { |
1820 | 1898 |
axis(2) |
1821 | 1899 |
} |
1822 | 1900 |
|
1901 |
+ |
|
1902 |
+ |
|
1903 |
+is_null_na <- function(x) { |
|
1904 |
+ ## For arguments, if user passes "undefined" or "not set" |
|
1905 |
+ ## See also http://stackoverflow.com/a/19655909 |
|
1906 |
+ if(is.function(x)) return(FALSE) |
|
1907 |
+ if( is.null(x) || |
|
1908 |
+ ( (length(x) == 1) && (is.na(x)) ) || |
|
1909 |
+ ( (length(x) == 1) && (x == "") ) ## careful here |
|
1910 |
+ ) { |
|
1911 |
+ return(TRUE) |
|
1912 |
+ } else { |
|
1913 |
+ return(FALSE) |
|
1914 |
+ } |
|
1915 |
+} |
|
1916 |
+ |
|
1917 |
+ |
|
1918 |
+## Not used anymore, but left here in case they become useful. |
|
1919 |
+## Expected numbers at equilibrium under McFarland's |
|
1920 |
+## eFinalMf <- function(initSize, s, j) { |
|
1921 |
+## ## Expected final sizes for McF, when K is set to the default. |
|
1922 |
+## # j is number of drivers |
|
1923 |
+## ## as it says, with no passengers |
|
1924 |
+## ## Set B(d) = D(N) |
|
1925 |
+## K <- initSize/(exp(1) - 1) |
|
1926 |
+## return(K * (exp( (1 + s)^j) - 1)) |
|
1927 |
+## } |
|
1928 |
+ |
|
1929 |
+## mcflE <- function(p, s, initSize) { |
|
1930 |
+## K <- initSize/(exp(1) - 1) |
|
1931 |
+## ## Expected number at equilibrium |
|
1932 |
+## return( K * (exp((1 + s)^p) - 1)) |
|
1933 |
+## } |
|
1934 |
+ |
|
1935 |
+## mcflEv <- function(p, s, initSize) { |
|
1936 |
+## ## expects vectors for p and s |
|
1937 |
+## K <- initSize/(exp(1) - 1) |
|
1938 |
+ |
|
1939 |
+## ## Expected number at equilibrium |
|
1940 |
+## return( K * (exp(prod((1 + s)^p)) - 1)) |
|
1941 |
+## } |
|
1942 |
+ |
|
1943 |
+ |
|
1944 |
+ |
|
1945 |
+ |
|
1946 |
+ |
|
1947 |
+ |
|
1948 |
+ |
|
1949 |
+ |
|
1950 |
+ |
|
1951 |
+ |
|
1952 |
+ |
|
1953 |
+ |
|
1823 | 1954 |
## simpsonI <- function(x) { |
1824 | 1955 |
## sx <- sum(x) |
1825 | 1956 |
## p <- x/sx |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
# This file was generated by Rcpp::compileAttributes |
2 | 2 |
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
3 | 3 |
|
4 |
-nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery) { |
|
5 |
- .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery) |
|
4 |
+nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list) { |
|
5 |
+ .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list) |
|
6 | 6 |
} |
7 | 7 |
|
8 | 8 |
BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) { |
... | ... |
@@ -17,6 +17,10 @@ evalRGenotypeAndMut <- function(rG, rFE, muEF, full2mutator_, verbose, prodNeg) |
17 | 17 |
.Call('OncoSimulR_evalRGenotypeAndMut', PACKAGE = 'OncoSimulR', rG, rFE, muEF, full2mutator_, verbose, prodNeg) |
18 | 18 |
} |
19 | 19 |
|
20 |
+accessibleGenotypes <- function(y, f, numMut, th) { |
|
21 |
+ .Call('OncoSimulR_accessibleGenotypes', PACKAGE = 'OncoSimulR', y, f, numMut, th) |
|
22 |
+} |
|
23 |
+ |
|
20 | 24 |
## readFitnessEffects <- function(rFE, echo) { |
21 | 25 |
## invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo)) |
22 | 26 |
## } |
... | ... |
@@ -192,16 +192,23 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
192 | 192 |
## tfm = tfm)) |
193 | 193 |
## } |
194 | 194 |
|
195 |
-count_accessible_g <- function(gfm, accessible_th) { |
|
196 |
- gaj <- genot_to_adj_mat(gfm) |
|
197 |
- gaj <- filter_inaccessible(gaj, accessible_th) |
|
198 |
- return(ncol(gaj) - 1) |
|
199 |
-} |
|
195 |
+## No longer being used. Used to be in rfitness |
|
196 |
+## count_accessible_g <- function(gfm, accessible_th) { |
|
197 |
+## gaj <- genot_to_adj_mat(gfm) |
|
198 |
+## gaj <- filter_inaccessible(gaj, accessible_th) |
|
199 |
+## return(ncol(gaj) - 1) |
|
200 |
+## } |
|
200 | 201 |
|
202 |
+ |
|
203 |
+## There is now C++ code to get just the locations/positions of the |
|
204 |
+## accessible genotypes |
|
201 | 205 |
filter_inaccessible <- function(x, accessible_th) { |
202 | 206 |
## Return an adjacency matrix with only accessible paths. x is the gaj |
203 | 207 |
## matrix created in the plots. The difference between genotypes |
204 | 208 |
## separated by a hamming distance of 1 |
209 |
+ |
|
210 |
+ ## FIXME: could do the x[, -1] before loop and not add the 1 |
|
211 |
+ ## inside while, and do that at end |
|
205 | 212 |
colnames(x) <- rownames(x) <- 1:ncol(x) |
206 | 213 |
while(TRUE) { |
207 | 214 |
## remove first column |
... | ... |
@@ -216,6 +223,132 @@ filter_inaccessible <- function(x, accessible_th) { |
216 | 223 |
return(x) |
217 | 224 |
} |
218 | 225 |
|
226 |
+f1 <- function() {} |
|
227 |
+ |
|
228 |
+x <- 99 |
|
229 |
+ |
|
230 |
+## wrapper to the C++ code |
|
231 |
+wrap_accessibleGenotypes <- function(x, th) { |
|
232 |
+ ## x is the fitness matrix, not adjacency matrix |
|
233 |
+ numMut <- rowSums(x[, -ncol(x)]) |
|
234 |
+ o_numMut <- order(numMut) |
|
235 |
+ x <- x[o_numMut, ] |
|
236 |
+ numMut <- numMut[o_numMut] |
|
237 |
+ |
|
238 |
+ y <- x[, -ncol(x)] |
|
239 |
+ storage.mode(y) <- "integer" |
|
240 |
+ |
|
241 |
+ acc <- accessibleGenotypes(y, x[, ncol(x)], |
|
242 |
+ as.integer(numMut), |
|
243 |
+ th) |
|
244 |
+ return(acc[acc > 0]) |
|
245 |
+} |
|
246 |
+ |
|
247 |
+## A transitional function |
|
248 |
+faster_accessible_genotypes_R <- function(x, th) { |
|
249 |
+ rs0 <- rowSums(x[, -ncol(x)]) |
|
250 |
+ x <- x[order(rs0), ] |
|
251 |
+ rm(rs0) |
|
252 |
+ |
|
253 |
+ y <- x[, -ncol(x)] |
|
254 |
+ f <- x[, ncol(x)] |
|
255 |
+ rs <- rowSums(y) |
|
256 |
+ |
|
257 |
+ ## If 0, not accessible |
|
258 |
+ ## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs), |
|
259 |
+ ## mode = "integer") |
|
260 |
+ |
|
261 |
+ adm <- matrix(0, nrow = length(rs), ncol = length(rs)) |
|
262 |
+ storage.mode(adm) <- "integer" |
|
263 |
+ |
|
264 |
+ ## Most time is gone here |
|
265 |
+ for(i in 1:length(rs)) { ## i is the current genotype |
|
266 |
+ candidates <- which(rs == (rs[i] + 1)) |
|
267 |
+ for(j in candidates) { |
|
268 |
+ if( (sum(abs(y[j, ] - y[i, ])) == 1) && |
|
269 |
+ ( (f[j] - f[i]) >= th ) ) { |
|
270 |
+ ## actually, this is the largest time sink using slam |
|
271 |
+ adm[i, j] <- 1L |
|
272 |
+ } |
|
273 |
+ } |
|
274 |
+ } |
|
275 |
+ |
|
276 |
+ colnames(adm) <- rownames(adm) <- 1:ncol(adm) |
|
277 |
+ admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column. |
|
278 |
+ while(TRUE) { |
|
279 |
+ ## We remove inaccessible cols (genotypes) and the corresponding |
|
280 |
+ ## rows repeatedly until nothing left to remove; any column left |
|
281 |
+ ## is therefore accessible throw at least one path. |
|
282 |
+ |
|
283 |
+ ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L) |
|
284 |
+ inacc_col <- which(colSums(admtmp) == 0L) |
|
285 |
+ if(length(inacc_col) == 0) break; |
|
286 |
+ inacc_row <- inacc_col + 1 ## recall root row is left |
|
287 |
+ admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE] |
|
288 |
+ } |
|
289 |
+ return(as.numeric(c(colnames(adm)[1], colnames(admtmp)))) |
|
290 |
+ |
|
291 |
+} |
|
292 |
+ |
|
293 |
+ |
|
294 |
+## ## This uses slam, but that is actually slower because |
|
295 |
+## ## of the assignment |
|
296 |
+## faster_accessible_genots_slam <- function(x, th = 0) { |
|
297 |
+ |
|
298 |
+## ## Given a genotype matrix, return the genotypes that are accessible |
|
299 |
+## ## via creating a directed adjacency matrix between genotypes |
|
300 |
+## ## connected (i.e., those that differ by gaining one mutation). 0 |
|
301 |
+## ## means not connected, 1 means connected. |
|
302 |
+ |
|
303 |
+## ## There is a more general function in OncoSimulR that will give the |
|
304 |
+## ## fitness difference. But not doing the difference is faster than |
|
305 |
+## ## just setting a value, say 1, if all we want is to keep track of |
|
306 |
+## ## accessible ones. And by using only 0/1 we can store only an |
|
307 |
+## ## integer. And no na.omits, etc. Is too restricted? Yes. But for |
|
308 |
+## ## simulations and computing just accessible genotypes, probably a |
|
309 |
+## ## hell of a lot faster. |
|
310 |
+ |
|
311 |
+## ## Well, this is not incredibly fast either. |
|
312 |
+ |
|
313 |
+## ## Make sure sorted, so ancestors always before descendants |
|
314 |
+## rs0 <- rowSums(x[, -ncol(x)]) |
|
315 |
+## x <- x[order(rs0), ] |
|
316 |
+## rm(rs0) |
|
317 |
+ |
|
318 |
+## y <- x[, -ncol(x)] |
|
319 |
+## f <- x[, ncol(x)] |
|
320 |
+## rs <- rowSums(y) |
|
321 |
+ |
|
322 |
+## ## If 0, not accessible |
|
323 |
+## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs), |
|
324 |
+## mode = "integer") |
|
325 |
+## for(i in 1:length(rs)) { ## i is the current genotype |
|
326 |
+## candidates <- which(rs == (rs[i] + 1)) |
|
327 |
+## for(j in candidates) { |
|
328 |
+## ## sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
329 |
+## ## if(sumdiff == 1) |
|
330 |
+## if( (sum(abs(y[j, ] - y[i, ])) == 1) && |
|
331 |
+## ( (f[j] - f[i]) >= th ) ) |
|
332 |
+## adm[i, j] <- 1L |
|
333 |
+## } |
|
334 |
+## } |
|
335 |
+ |
|
336 |
+## colnames(adm) <- rownames(adm) <- 1:ncol(adm) |
|
337 |
+## admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column. |
|
338 |
+## while(TRUE) { |
|
339 |
+## ## We remove inaccessible cols (genotypes) and the corresponding |
|
340 |
+## ## rows repeatedly until nothing left to remove; any column left |
|
341 |
+## ## is therefore accessible throw at least one path. |
|
342 |
+ |
|
343 |
+## ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L) |
|
344 |
+## inacc_col <- which(slam::col_sums(admtmp) == 0L) |
|
345 |
+## if(length(inacc_col) == 0) break; |
|
346 |
+## inacc_row <- inacc_col + 1 ## recall root row is left |
|
347 |
+## admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE] |
|
348 |
+## } |
|
349 |
+## return(as.numeric(c(colnames(adm)[1], colnames(admtmp)))) |
|
350 |
+## } |
|
351 |
+ |
|
219 | 352 |
|
220 | 353 |
generate_matrix_genotypes <- function(g) { |
221 | 354 |
## FIXME future: do this for order too? Only if rfitness for order. |
... | ... |
@@ -257,6 +390,8 @@ genot_to_adj_mat <- function(x) { |
257 | 390 |
## that differ by gaining one mutation) with value being the |
258 | 391 |
## difference in fitness between destination and origin |
259 | 392 |
|
393 |
+ ## FIXME: code is now in place to do all of this in C++ |
|
394 |
+ |
|
260 | 395 |
## Make sure sorted, so ancestors always before descendants |
261 | 396 |
rs0 <- rowSums(x[, -ncol(x)]) |
262 | 397 |
x <- x[order(rs0), ] |
... | ... |
@@ -23,14 +23,22 @@ |
23 | 23 |
|
24 | 24 |
to_Magellan <- function(x, file, |
25 | 25 |
max_num_genotypes = 2000) { |
26 |
+ ## This is stupid if we already have, as entry, an object from |
|
27 |
+ ## rfitness!! to_Fitness_Matrix can be very slow. |
|
26 | 28 |
if(is.null(file)) { |
27 | 29 |
file <- tempfile() |
28 | 30 |
cat("\n Using file ", file, "\n") |
29 | 31 |
} |
30 |
- gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm |
|
31 |
- write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1) |
|
32 |
- write.table(gfm, file = file, append = TRUE, |
|
33 |
- row.names = FALSE, col.names = FALSE, sep = " ") |
|
32 |
+ if(inherits(x, "genotype_fitness_matrix")) { |
|
33 |
+ write(rep(2, ncol(x) - 1), file = file, ncolumns = ncol(x) - 1) |
|
34 |
+ write.table(x, file = file, append = TRUE, |
|
35 |
+ row.names = FALSE, col.names = FALSE, sep = " ") |
|
36 |
+ } else { |
|
37 |
+ gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm |
|
38 |
+ write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1) |
|
39 |
+ write.table(gfm, file = file, append = TRUE, |
|
40 |
+ row.names = FALSE, col.names = FALSE, sep = " ") |
|
41 |
+ } |
|
34 | 42 |
} |
35 | 43 |
|
36 | 44 |
to_Fitness_Matrix <- function(x, max_num_genotypes) { |
... | ... |
@@ -1,3 +1,4 @@ |
1 |
+ |
|
1 | 2 |
## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte |
2 | 3 |
|
3 | 4 |
## This program is free software: you can redistribute it and/or modify |
... | ... |
@@ -161,7 +162,7 @@ list.of.deps <- function(x) { |
161 | 162 |
stop("child not unique") |
162 | 163 |
} |
163 | 164 |
typeDep <- lookupTypeDep[unique(x$typeDep)] |
164 |
- if(any(is.na(typeDep))) |
|
165 |
+ if(any(is_null_na(typeDep))) |
|
165 | 166 |
stop("typeDep value incorrect. Check spelling.") |
166 | 167 |
return(list( |
167 | 168 |
child = unique(x$child), |
... | ... |
@@ -1436,7 +1437,9 @@ nr_oncoSimul.internal <- function(rFE, |
1436 | 1437 |
extraTime, |
1437 | 1438 |
keepPhylog, |
1438 | 1439 |
detectionProb, |
1439 |
- MMUEF = NULL ## avoid partial matching, and set default |
|
1440 |
+ AND_DrvProbExit, |
|
1441 |
+ MMUEF = NULL, ## avoid partial matching, and set default |
|
1442 |
+ fixation = NULL ## avoid partial matching |
|
1440 | 1443 |
) { |
1441 | 1444 |
if(!inherits(rFE, "fitnessEffects")) |
1442 | 1445 |
stop(paste("rFE must be an object of class fitnessEffects", |
... | ... |
@@ -1578,6 +1581,28 @@ nr_oncoSimul.internal <- function(rFE, |
1578 | 1581 |
## if( is.null(n2)) n2 <- -9 |
1579 | 1582 |
|
1580 | 1583 |
## call <- match.call() |
1584 |
+ |
|
1585 |
+ ## Process the fixed list, if any |
|
1586 |
+ if(!is_null_na(fixation)) { |
|
1587 |
+ ng <- namedGenes |
|
1588 |
+ rownames(ng) <- namedGenes[, "Gene"] |
|
1589 |
+ ## Usual genotype specification and might allow ordered vectors |
|
1590 |
+ ## in the future |
|
1591 |
+ fixation_b <- lapply(fixation, nice.vector.eo, sep = ",") |
|
1592 |
+ ulf <- unlist(fixation_b) |
|
1593 |
+ if(any(ulf == ">")) |
|
1594 |
+ stop("Order effects not allowed (yet?) in fixation.") |
|
1595 |
+ ulfg <- ng[ulf, 1] |
|
1596 |
+ if(any(is.na(ulfg))) |
|
1597 |
+ stop(paste("The 'fixation' list contains genes that are not present", |
|
1598 |
+ " in the fitness effects.")) |
|
1599 |
+ ## Sorting here is crucial!! |
|
1600 |
+ fixation_list <- lapply(fixation_b, function(x) sort(ng[x, 2])) |
|
1601 |
+ } else { |
|
1602 |
+ fixation_list <- list() |
|
1603 |
+ } |
|
1604 |
+ |
|
1605 |
+ |
|
1581 | 1606 |
return(c( |
1582 | 1607 |
nr_BNB_Algo5(rFE = rFE, |
1583 | 1608 |
mu_ = mu, |
... | ... |
@@ -1613,7 +1638,9 @@ nr_oncoSimul.internal <- function(rFE, |
1613 | 1638 |
p2 = dpr["p2"], |
1614 | 1639 |
PDBaseline = dpr["PDBaseline"], |
1615 | 1640 |
cPDetect_i= dpr["cPDetect"], |
1616 |
- checkSizePEvery = dpr["checkSizePEvery"]), |
|
1641 |
+ checkSizePEvery = dpr["checkSizePEvery"], |
|
1642 |
+ AND_DrvProbExit = AND_DrvProbExit, |
|
1643 |
+ fixation_list = fixation_list), |
|
1617 | 1644 |
Drivers = list(rFE$drv), ## but when doing pops, these will be repeated |
1618 | 1645 |
geneNames = list(names(getNamesID(rFE))) |
1619 | 1646 |
)) |
... | ... |
@@ -1753,8 +1780,8 @@ detectionProbCheckParse <- function(x, initSize, verbosity) { |
1753 | 1780 |
default_PDBaseline <- 1.2 * initSize |
1754 | 1781 |
default_checkSizePEvery <- 20 |
1755 | 1782 |
## No default cPDetect. That is done from p2 and n2 in C++. |
1756 |
- |
|
1757 |
- if((length(x) == 1) && (is.na(x))) { |
|
1783 |
+ if(is_null_na(x)) { |
|
1784 |
+ ## if((length(x) == 1) && (is.na(x))) { |
|
1758 | 1785 |
y <- vector() |
1759 | 1786 |
y["cPDetect"] <- -9 |
1760 | 1787 |
y["p2"] <- 9 |
... | ... |
@@ -1779,18 +1806,18 @@ detectionProbCheckParse <- function(x, initSize, verbosity) { |
1779 | 1806 |
} |
1780 | 1807 |
|
1781 | 1808 |
## This ain't conditional. If not given, always check |
1782 |
- if( !is.na(x["cPDetect"]) && (sum(!is.na(x["p2"]), !is.na(x["n2"])) >= 1 )) |
|
1809 |
+ if( !is_null_na(x["cPDetect"]) && (sum(!is_null_na(x["p2"]), !is_null_na(x["n2"])) >= 1 )) |
|
1783 | 1810 |
stop("Specify only cPDetect xor both of p2 and n2") |
1784 |
- if( (is.na(x["p2"]) + is.na(x["n2"])) == 1 ) |
|
1811 |
+ if( (is_null_na(x["p2"]) + is_null_na(x["n2"])) == 1 ) |
|
1785 | 1812 |
stop("If you pass one of n2 or p2, you must also pass the other. ", |
1786 | 1813 |
"Otherwise, we would not know what to do.") |
1787 | 1814 |
|
1788 |
- if(is.na(x["PDBaseline"])) { |
|
1815 |
+ if(is_null_na(x["PDBaseline"])) { |
|
1789 | 1816 |
x["PDBaseline"] <- default_PDBaseline |
1790 | 1817 |
if(verbosity > -1) |
1791 | 1818 |
message("\n PDBaseline set to default value of ", default_PDBaseline, "\n") |
1792 | 1819 |
} |
1793 |
- if(is.na(x["checkSizePEvery"])) { |
|
1820 |
+ if(is_null_na(x["checkSizePEvery"])) { |
|
1794 | 1821 |
x["checkSizePEvery"] <- default_checkSizePEvery |
1795 | 1822 |
if(verbosity > -1) |
1796 | 1823 |
message("\n checkSizePEvery set to default value of ", |
... | ... |
@@ -1800,9 +1827,9 @@ detectionProbCheckParse <- function(x, initSize, verbosity) { |
1800 | 1827 |
## If we get here, we checked consistency of whether cPDetect or p2/n2. |
1801 | 1828 |
## Fill up with defaults the missing values |
1802 | 1829 |
|
1803 |
- if(is.na(x["cPDetect"])) { |
|
1804 |
- if(is.na(x["p2"])) { |
|
1805 |
- if(!is.na(x["n2"])) stop("Eh? no p2 but n2? Bug") |
|
1830 |
+ if(is_null_na(x["cPDetect"])) { |
|
1831 |
+ if(is_null_na(x["p2"])) { |
|
1832 |
+ if(!is_null_na(x["n2"])) stop("Eh? no p2 but n2? Bug") |
|
1806 | 1833 |
x["n2"] <- default_n2 |
1807 | 1834 |
x["p2"] <- default_p2 |
1808 | 1835 |
if(verbosity > -1) |
... | ... |
@@ -1816,14 +1843,14 @@ detectionProbCheckParse <- function(x, initSize, verbosity) { |
1816 | 1843 |
if(x["PDBaseline"] < 0) |
1817 | 1844 |
stop("PDBaseline < 0") |
1818 | 1845 |
|
1819 |
- if(!is.na(x["n2"])) { |
|
1846 |
+ if(!is_null_na(x["n2"])) { |
|
1820 | 1847 |
if(x["n2"] <= x["PDBaseline"]) |
1821 | 1848 |
stop("n2 <= PDBaseline") |
1822 | 1849 |
if(x["p2"] >= 1) stop("p2 >= 1") |
1823 | 1850 |
if(x["p2"] <= 0) stop("p2 <= 0") |
1824 | 1851 |
x["cPDetect"] <- -9 |
1825 | 1852 |
} else { ## only if x["cPDetect"] is not NA |
1826 |
- if(is.na(x["cPDetect"])) stop("eh? you found a bug")## paranoia |
|
1853 |
+ if(is_null_na(x["cPDetect"])) stop("eh? you found a bug")## paranoia |
|
1827 | 1854 |
x["n2"] <- -9 |
1828 | 1855 |
x["p2"] <- -9 |
1829 | 1856 |
if(verbosity > -1) |
... | ... |
@@ -1832,6 +1859,51 @@ detectionProbCheckParse <- function(x, initSize, verbosity) { |
1832 | 1859 |
return(x) |
1833 | 1860 |
} |
1834 | 1861 |
|
1862 |
+sampledGenotypes <- function(y, genes = NULL) { |
|
1863 |
+ ## From a popSample object, or a matrix for that matter, |
|
1864 |
+ ## show the sampled genotypes and their frequencies |
|
1865 |
+ if(!is.null(genes)) { |
|
1866 |
+ cols <- which(colnames(y) %in% genes ) |
|
1867 |
+ y <- y[, cols] |
|
1868 |
+ } |
|
1869 |
+ nn <- colnames(y) |
|
1870 |
+ df <- data.frame(table( |
|
1871 |
+ apply(y, 1, function(z) paste(nn[as.logical(z)], collapse = ", ") ) |
|
1872 |
+ )) |
|
1873 |
+ gn <- as.character(df[, 1]) |
|
1874 |
+ gn[gn == ""] <- "WT" |
|
1875 |
+ df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE) |
|
1876 |
+ return(df) |
|
1877 |
+} |
|
1878 |
+ |
|
1879 |
+ |
|
1880 |
+ |
|
1881 |
+list_g_matches_fixed <- function(x, y) { |
|
1882 |
+ ## Internal function, for testing the fixation output. |
|
1883 |
+ ## x and y are vectors |
|
1884 |
+ |
|
1885 |
+ ## x is the set of output genotypes, y the set of fixed |
|
1886 |
+ ## genotypes/subset of genotypes. |
|
1887 |
+ |
|
1888 |
+ ## Yes, this function has tests too in test.fixation.R |
|
1889 |
+ |
|
1890 |
+ ## All genotypes in x satisfy that they are supersets of at least one |
|
1891 |
+ ## in y? That is true if, for every element in x, at least one y in |
|
1892 |
+ ## that x. |
|
1893 |
+ |
|
1894 |
+ if(is.list(y)) y <- unlist(y) |
|
1895 |
+ |
|
1896 |
+ y.nice <- lapply(y, nice.vector.eo, sep = ",") |
|
1897 |
+ x.nice <- lapply(x, nice.vector.eo, sep = ",") |
|
1898 |
+ |
|
1899 |
+ fu <- function(u, y.nice) |
|
1900 |
+ any(unlist(lapply(y.nice, function(z) all(z %in% u)))) |
|
1901 |
+ |
|
1902 |
+ return(all(unlist(lapply(x.nice, fu, y.nice)))) |
|
1903 |
+ |
|
1904 |
+} |
|
1905 |
+ |
|
1906 |
+ |
|
1835 | 1907 |
|
1836 | 1908 |
## emptyFitnessEffects <- function() { |
1837 | 1909 |
## list(long.rt = list(), |
... | ... |
@@ -61,7 +61,9 @@ rfitness <- function(g, c= 0.5, |
61 | 61 |
} |
62 | 62 |
m <- cbind(m, Fitness = fi) |
63 | 63 |
if(min_accessible_genotypes > 0) { |
64 |
- num_accessible_genotypes <- count_accessible_g(m, accessible_th) |
|
64 |
+ ## num_accessible_genotypes <- count_accessible_g(m, accessible_th) |
|
65 |
+ ## Recall accessibleGenotypes includes the wt, if accessible. |
|
66 |
+ num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1 |
|
65 | 67 |
if(num_accessible_genotypes >= min_accessible_genotypes) { |
66 | 68 |
done <- TRUE |
67 | 69 |
attributes(m) <- c(attributes(m), |
... | ... |
@@ -89,3 +91,7 @@ create_eq_ref <- function(g) { |
89 | 91 |
ref <- c(rep(1, nm), rep(0, g - nm)) |
90 | 92 |
sample(ref) |
91 | 93 |
} |
94 |
+ |
|
95 |
+ |
|
96 |
+ |
|
97 |
+ |
... | ... |
@@ -1,3 +1,16 @@ |
1 |
+Changes in version 2.5.1 (2016-11-12): |
|
2 |
+ - AND of detectedSizeP and lastMaxDr. |
|
3 |
+ - fixation as stopping mechanism. |
|
4 |
+ - sampledGenotypes in user code. |
|
5 |
+ - clonePhylog et al: deal with never any descendant. |
|
6 |
+ - samplePop can handle failed simulations graciously. |
|
7 |
+ - summary.oncosimulpop can handle failed simulations graciously. |
|
8 |
+ - accessible genotypes now done in C++. |
|
9 |
+ - OcurringDrivers should not be a factor. |
|
10 |
+ - samplePop always returns gene names. |
|
11 |
+ - to_Magellan is much faster with rfitness objects. |
|
12 |
+ - Several improvements in vignette (English and additional explanations). |
|
13 |
+ |
|
1 | 14 |
Changes in version 2.4.0 (for BioC 3.4): |
2 | 15 |
- Mutator phenotype and gene-specific mutation rates. |
3 | 16 |
- End simulations stochastically as a function of size. |
... | ... |
@@ -9,7 +22,7 @@ Changes in version 2.4.0 (for BioC 3.4): |
9 | 22 |
- Improved test coverage. |
10 | 23 |
- samplePop: sample at arbitrary sizes. |
11 | 24 |
- evalAllGenotypes: order = FALSE by default. |
12 |
- |
|
25 |
+ |
|
13 | 26 |
Changes in version 2.3.17 (2017-09-22): |
14 | 27 |
- random2 for rfitness. |
15 | 28 |
- Vignette: decrease size and running time. |
... | ... |
@@ -114,7 +114,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL, |
114 | 114 |
specifiy anything if you do not want to, and you can pass an empty |
115 | 115 |
vector (as \code{character(0)}). The default has changed with respect |
116 | 116 |
to v.2.1.3 and previous: it used to be to assume that all |
117 |
- genes that were not in the \code{noIntGenes} were drivers. The fault |
|
117 |
+ genes that were not in the \code{noIntGenes} were drivers. The default |
|
118 | 118 |
now is to assume nothing: if you want \code{drvNames} you have |
119 | 119 |
to specify them. |
120 | 120 |
|
... | ... |
@@ -40,6 +40,8 @@ |
40 | 40 |
errorHitMaxTries = TRUE, |
41 | 41 |
verbosity = 0, |
42 | 42 |
initMutant = NULL, |
43 |
+ AND_DrvProbExit = FALSE, |
|
44 |
+ fixation = NULL, |
|
43 | 45 |
seed = NULL) |
44 | 46 |
|
45 | 47 |
oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6, |
... | ... |
@@ -61,6 +63,8 @@ oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6, |
61 | 63 |
errorHitWallTime = TRUE, |
62 | 64 |
errorHitMaxTries = TRUE, |
63 | 65 |
initMutant = NULL, |
66 |
+ AND_DrvProbExit = FALSE, |
|
67 |
+ fixation = NULL, |
|
64 | 68 |
verbosity = 0, |
65 | 69 |
mc.cores = detectCores(), |
66 | 70 |
seed = "auto") |
... | ... |
@@ -108,6 +112,8 @@ oncoSimulSample(Nindiv, |
108 | 112 |
typeSample = "whole", |
109 | 113 |
thresholdWhole = 0.5, |
110 | 114 |
initMutant = NULL, |
115 |
+ AND_DrvProbExit = FALSE, |
|
116 |
+ fixation = NULL, |
|
111 | 117 |
verbosity = 1, |
112 | 118 |
showProgress = FALSE, |
113 | 119 |
seed = "auto") |
... | ... |
@@ -289,22 +295,7 @@ This option can not be used with v.1 objects. |
289 | 295 |
|
290 | 296 |
} |
291 | 297 |
|
292 |
-\item{s}{ |
|
293 |
- Selection coefficient for drivers. |
|
294 |
- Only relevant if using a poset as this is included in the |
|
295 |
- fitnessEffects object. |
|
296 |
-} |
|
297 |
-\item{sh}{ |
|
298 |
- Selection coefficient for drivers with restrictions not satisfied. A |
|
299 |
- value of 0 means there are no penalties for a driver appearing in a |
|
300 |
- clone when its restrictions are not satisfied. |
|
301 |
- |
|
302 |
- To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1. |
|
303 |
- |
|
304 |
- Only relevant if using a poset as this is included in the |
|
305 |
- fitnessEffects object. |
|
306 | 298 |
|
307 |
-} |
|
308 | 299 |
\item{K}{ |
309 | 300 |
Initial population equilibrium size in the McFarland models. |
310 | 301 |
|
... | ... |
@@ -497,10 +488,6 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}. |
497 | 488 |
} |
498 | 489 |
|
499 | 490 |
|
500 |
- |
|
501 |
- |
|
502 |
- |
|
503 |
- |
|
504 | 491 |
\item{verbosity}{ If 0, run silently. Iincreasing values of verbosity |
505 | 492 |
provide progressively more information about intermediate steps, |
506 | 493 |
possible numerical notes/warnings from the C++ code, etc. Values less |
... | ... |
@@ -529,6 +516,40 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}. |
529 | 516 |
\item{showProgress}{If TRUE, provide information, during exection, |
530 | 517 |
of the individual done, and the number of attempts and time used.} |
531 | 518 |
|
519 |
+\item{AND_DrvProbExit}{If TRUE, cancer will be considered to be reached |
|
520 |
+ if both the \code{detectionProb} mechanism and \code{detectionDrivers} |
|
521 |
+ are satisfied. This is and AND, not an OR condition. Using this option |
|
522 |
+ with \code{fixation} is not allowed (as it does not make much sense).} |
|
523 |
+ |
|
524 |
+\item{fixation}{If non-NULL, a list or a vector, where each element of |
|
525 |
+ is a string with a gene or a gene combination. Simulations will stop |
|
526 |
+ as soon as any of the genes or gene combinations are fixed (i.e., |
|
527 |
+ reach frequency of one). If you pass gene combinations, separate genes |
|
528 |
+ with commas (not '>'); this means order is not (yet?) supported. This |
|
529 |
+ way of specifying gene combinations is the same as the one used for |
|
530 |
+ \code{initMutant} and \code{\link{evalGenotype}}. |
|
531 |
+ |
|
532 |
+ Using this option with \code{AND_DrvProbExit} is not allowed (as it |
|
533 |
+ does not make much sense). This option is not allowed either with the |
|
534 |
+ old v.1 specification.} |
|
535 |
+ |
|
536 |
+\item{s}{ |
|
537 |
+ Selection coefficient for drivers. |
|
538 |
+ Only relevant if using a poset as this is included in the |
|
539 |
+ fitnessEffects object. This will eventually be deprecated. |
|
540 |
+} |
|
541 |
+\item{sh}{ |
|
542 |
+ Selection coefficient for drivers with restrictions not satisfied. A |
|
543 |
+ value of 0 means there are no penalties for a driver appearing in a |
|
544 |
+ clone when its restrictions are not satisfied. |
|
545 |
+ |
|
546 |
+ To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1. |
|
547 |
+ |
|
548 |
+ Only relevant if using a poset as this is included in the |
|
549 |
+ fitnessEffects object. This will eventually be deprecated. |
|
550 |
+ |
|
551 |
+} |
|
552 |
+ |
|
532 | 553 |
\item{seed}{The seed for the C++ PRNG. You can pass a value. If you set |
533 | 554 |
it to NULL, then a seed will be generated in R and passed to C++. If |
534 | 555 |
you set it to "auto", then if you are using v.1, the behavior is the |
... | ... |
@@ -612,13 +633,13 @@ of the individual done, and the number of attempts and time used.} |
612 | 633 |
|
613 | 634 |
Detection of cancer can be a deterministic process, where cancer is |
614 | 635 |
always detected (and, thus, simulation ended) when certain conditions |
615 |
- are met (\code{detectionSize}, |
|
616 |
- \code{detectionDrivers}). Alternatively, it can be stochastic process |
|
617 |
- where probability of detection depends on size. Every so often (see |
|
618 |
- below) we assess population size, and detect cancer or not |
|
619 |
- probabilistically (comparing the probability of detection for that |
|
620 |
- size with a random uniform number). Probability of detection changes |
|
621 |
- with population size according to the function |
|
636 |
+ are met (\code{detectionSize}, \code{detectionDrivers}, |
|
637 |
+ \code{fixation}). Alternatively, it can be stochastic process where |
|
638 |
+ probability of detection depends on size. Every so often (see below) |
|
639 |
+ we assess population size, and detect cancer or not probabilistically |
|
640 |
+ (comparing the probability of detection for that size with a random |
|
641 |
+ uniform number). Probability of detection changes with population size |
|
642 |
+ according to the function |
|
622 | 643 |
|
623 | 644 |
\deqn{ 1 - e^{-cPDetect (populationsize - PDBaseline)}}. |
624 | 645 |
|
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
\name{samplePop} |
2 | 2 |
\alias{samplePop} |
3 |
- |
|
3 |
+\alias{sampledGenotypes} |
|
4 | 4 |
\title{ |
5 | 5 |
Obtain a sample from a population of simulations. |
6 | 6 |
|
... | ... |
@@ -13,17 +13,26 @@ |
13 | 13 |
schemes include whole tumor and single cell sampling, and sampling at |
14 | 14 |
the end of the tumor progression or during the progression of the |
15 | 15 |
disease. |
16 |
+ |
|
17 |
+ |
|
18 |
+ \code{sampledGenotypes} shows the genotype frequencies from that |
|
19 |
+ sample. Order effects are ignored. |
|
20 |
+ |
|
16 | 21 |
} |
17 | 22 |
|
18 | 23 |
\usage{ |
19 | 24 |
samplePop(x, timeSample = "last", typeSample = "whole", |
20 | 25 |
thresholdWhole = 0.5, geneNames = NULL, popSizeSample = NULL) |
26 |
+ |
|
27 |
+sampledGenotypes(y, genes = NULL) |
|
28 |
+ |
|
21 | 29 |
} |
22 | 30 |
|
23 | 31 |
\arguments{ |
24 |
- \item{x}{ |
|
25 |
- An object of class \code{oncosimulpop}. |
|
26 |
- } |
|
32 |
+ \item{x}{An object of class \code{oncosimulpop} or class \code{oncosimul2} (a |
|
33 |
+ single simulation).} |
|
34 |
+ |
|
35 |
+ \item{y}{The output from a call to \code{samplePop}.} |
|
27 | 36 |
|
28 | 37 |
\item{timeSample}{ |
29 | 38 |
"last" means to sample each individual in the very last time period |
... | ... |
@@ -65,6 +74,17 @@ samplePop(x, timeSample = "last", typeSample = "whole", |
65 | 74 |
This allows you to specify arbitrary sampling schemes with respect |
66 | 75 |
to total population size. } |
67 | 76 |
|
77 |
+ \item{genes}{If non-NULL, use only the genes in \code{genes} to create |
|
78 |
+ the table of genotypes. This can be useful if you only care about the |
|
79 |
+ genotypes with respect to a subset of genes (say, X), and want to |
|
80 |
+ collapse with respect to another subset of genes (say, Y), for |
|
81 |
+ instance if Y is a large set of passenger genes. For example, suppose |
|
82 |
+ the complete set of genes is 'a', 'b', 'c', 'd', and you specify |
|
83 |
+ \code{genes = c('a', 'b')}; then, genotypes 'a, b, c' and genotypes |
|
84 |
+ 'a, b, d' will not be shown as different rows in the table of |
|
85 |
+ frequencies. Likewise, genotypes 'a, c' and genotypes 'a, d' will not |
|
86 |
+ be shown as different rows.} |
|
87 |
+ |
|
68 | 88 |
} |
69 | 89 |
|
70 | 90 |
\details{ |
... | ... |
@@ -135,13 +155,16 @@ p705 <- examplePosets[["p705"]] |
135 | 155 |
## leave it at its default value). |
136 | 156 |
|
137 | 157 |
p1 <- oncoSimulPop(4, p705, mc.cores = 2) |
138 |
-samplePop(p1) |
|
158 |
+(sp1 <- samplePop(p1)) |
|
159 |
+sampledGenotypes(sp1) |
|
160 |
+ |
|
139 | 161 |
|
140 | 162 |
## Sample at fixed sizes. Notice the requested size |
141 | 163 |
## for the last population is larger than the any population size |
142 | 164 |
## so we get NAs |
143 | 165 |
|
144 |
-samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13)) |
|
166 |
+(sp2 <- samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13))) |
|
167 |
+sampledGenotypes(sp2) |
|
145 | 168 |
|
146 | 169 |
|
147 | 170 |
## Now single cell sampling |
... | ... |
@@ -149,8 +172,7 @@ samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13)) |
149 | 172 |
r1 <- oncoSimulIndiv(p705) |
150 | 173 |
samplePop(r1, typeSample = "single") |
151 | 174 |
|
152 |
- |
|
153 |
- |
|
175 |
+sampledGenotypes(samplePop(r1, typeSample = "single")) |
|
154 | 176 |
|
155 | 177 |
} |
156 | 178 |
|
... | ... |
@@ -228,6 +228,8 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i, |
228 | 228 |
const double& checkSizePEvery, |
229 | 229 |
double& nextCheckSizeP, |
230 | 230 |
std::mt19937& ran_gen, |
231 |
+ const bool& AND_DrvProbExit, |
|
232 |
+ const std::vector<std::vector<int> >& fixation_l, |
|
231 | 233 |
const double& fatalPopSize = 1e15) { |
232 | 234 |
// Fill out, but also compute totPopSize |
233 | 235 |
// and return sample summaries for popsize, drivers. |
... | ... |
@@ -254,7 +256,38 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i, |
254 | 256 |
} |
255 | 257 |
lastMaxDr = max_ndr; |
256 | 258 |
|
259 |
+ // Until fixation done here. Recall we use an OR operation for exiting |
|
260 |
+ // below. Could be added to loop above. |
|
261 |
+ // And we call allGenesinGenotype also above, inside getGenotypeDrivers. |
|
262 |
+ // So room for speed ups? |
|
257 | 263 |
|
264 |
+ // Since we convert each genotype to a sorted allGenesinGenotype, iterate |
|
265 |
+ // over that first. Add that pop size if the combination is present in genotype. |
|
266 |
+ bool fixated = false; |
|
267 |
+ if(totPopSize > 0) { // Avoid silly things |
|
268 |
+ if( fixation_l.size() ) { |
|
269 |
+ std::vector<double> popSize_fixation(fixation_l.size()); |
|
270 |
+ for(size_t i = 0; i < popParams.size(); ++i) { |
|
271 |
+ std::vector<int> thisg = allGenesinGenotype(Genotypes[i]); |
|
272 |
+ for(size_t fc = 0; fc != popSize_fixation.size(); ++fc) { |
|
273 |
+ // Yes, fixation_l is sorted in R. |
|
274 |
+ if(std::includes(thisg.begin(), thisg.end(), |
|
275 |
+ fixation_l[fc].begin(), fixation_l[fc].end()) ) { |
|
276 |
+ popSize_fixation[fc] += popParams[i].popSize; |
|
277 |
+ } |
|
278 |
+ } |
|
279 |
+ } |
|
280 |
+ // Any fixated? But avoid trivial of totPopSize of 0! |
|
281 |
+ // Now check of > 0 is redundant as we check totPopSize > 0 |
|
282 |
+ double max_popSize_fixation = |
|
283 |
+ *std::max_element(popSize_fixation.begin(), popSize_fixation.end()); |
|
284 |
+ if( (max_popSize_fixation > 0 ) && |
|
285 |
+ (max_popSize_fixation == totPopSize)) { |
|
286 |
+ fixated = true; |
|
287 |
+ } |
|
288 |
+ } |
|
289 |
+ } |
|
290 |
+ |
|
258 | 291 |
if (keepEvery < 0) { |
259 | 292 |
storeThis = false; |
260 | 293 |
} else if( currentTime >= (lastStoredSample + keepEvery) ) { |
... | ... |
@@ -265,6 +298,7 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i, |
265 | 298 |
simulsDone = true; |
266 | 299 |
} |
267 | 300 |
|
301 |
+ |
|
268 | 302 |
// FIXME |
269 | 303 |
// this is the usual exit condition |
270 | 304 |
// (totPopSize >= detectionSize) || |
... | ... |
@@ -287,28 +321,64 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i, |
287 | 321 |
} else { |
288 | 322 |
checkSizePNow = false; |
289 | 323 |
} |
290 |
- |
|
291 |
- if(extraTime > 0) { |
|
292 |
- if(done_at < 0) { |
|
293 |
- if( (totPopSize >= detectionSize) || |
|
294 |
- ( (lastMaxDr >= detectionDrivers) && |
|
295 |
- (popSizeOverDDr >= minDetectDrvCloneSz) ) || |
|
296 |
- ( checkSizePNow && |
|
297 |
- detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) { |
|
298 |
- done_at = currentTime + extraTime; |
|
324 |
+ |
|
325 |
+ |
|
326 |
+ if(AND_DrvProbExit) { |
|
327 |
+ // The AND of detectionProb and drivers |
|
328 |
+ // fixated plays no role here, and cannot be passed from R |
|
329 |
+ if(extraTime > 0) { |
|
330 |
+ if(done_at < 0) { |
|
331 |
+ if( (lastMaxDr >= detectionDrivers) && |
|
332 |
+ (popSizeOverDDr >= minDetectDrvCloneSz) && |
|
333 |
+ checkSizePNow && |
|
334 |
+ detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { |
|
335 |
+ done_at = currentTime + extraTime; |
|
336 |
+ } |
|
337 |
+ } else if (currentTime >= done_at) { |
|
338 |
+ simulsDone = true; |
|
339 |
+ reachDetection = true; |
|
299 | 340 |
} |
300 |
- } else if (currentTime >= done_at) { |
|
301 |
- simulsDone = true; |
|
302 |
- reachDetection = true; |
|
341 |
+ } else if( (lastMaxDr >= detectionDrivers) && |
|
342 |
+ (popSizeOverDDr >= minDetectDrvCloneSz) && |
|
343 |
+ checkSizePNow && |
|
344 |
+ detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { |
|
345 |
+ simulsDone = true; |
|
346 |
+ reachDetection = true; |
|
347 |
+ } |
|
348 |
+ } else { |
|
349 |
+ // The usual OR mechanism of each option |
|
350 |
+ if(extraTime > 0) { |
|
351 |
+ if(done_at < 0) { |
|
352 |
+ if( (fixated) || |
|
353 |
+ (totPopSize >= detectionSize) || |
|
354 |
+ ( (lastMaxDr >= detectionDrivers) && |
|
355 |
+ (popSizeOverDDr >= minDetectDrvCloneSz) ) || |
|
356 |
+ ( checkSizePNow && |
|
357 |
+ detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) { |
|
358 |
+ done_at = currentTime + extraTime; |
|
359 |
+ } |
|
360 |
+ } else if (currentTime >= done_at) { |
|
361 |
+ simulsDone = true; |
|
362 |
+ reachDetection = true; |
|
303 | 363 |
} |
304 |
- } else if( (totPopSize >= detectionSize) || |
|
305 |
- ( (lastMaxDr >= detectionDrivers) && |
|
306 |
- (popSizeOverDDr >= minDetectDrvCloneSz) ) || |
|
307 |
- ( checkSizePNow && |
|
308 |
- detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) { |
|
309 |
- simulsDone = true; |
|
310 |
- reachDetection = true; |
|
364 |
+ } else if( (fixated) || |
|
365 |
+ (totPopSize >= detectionSize) || |
|
366 |
+ ( (lastMaxDr >= detectionDrivers) && |
|
367 |
+ (popSizeOverDDr >= minDetectDrvCloneSz) ) || |
|
368 |
+ ( checkSizePNow && |
|
369 |
+ detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) { |
|
370 |
+ simulsDone = true; |
|
371 |
+ reachDetection = true; |
|
372 |
+ } |
|
311 | 373 |
} |
374 |
+ |
|
375 |
+ |
|
376 |
+ // if( checkSizePNow && (lastMaxDr >= detectionDrivers) && |
|
377 |
+ // detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) { |
|
378 |
+ // simulsDone = true; |
|
379 |
+ // reachDetection = true; |
|
380 |
+ // } |
|
381 |
+ |
|
312 | 382 |
|
313 | 383 |
if(totPopSize >= fatalPopSize) { |
314 | 384 |
Rcpp::Rcout << "\n\totPopSize > " << fatalPopSize |
... | ... |
@@ -736,7 +806,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
736 | 806 |
const std::vector<int>& full2mutator, |
737 | 807 |
const double& cPDetect, |
738 | 808 |
const double& PDBaseline, |
739 |
- const double& checkSizePEvery) { |
|
809 |
+ const double& checkSizePEvery, |
|
810 |
+ const bool& AND_DrvProbExit, |
|
811 |
+ const std::vector< std::vector<int> >& fixation_l) { |
|
740 | 812 |
|
741 | 813 |
double nextCheckSizeP = checkSizePEvery; |
742 | 814 |
const int numGenes = fitnessEffects.genomeSize; |
... | ... |
@@ -1566,7 +1638,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects, |
1566 | 1638 |
PDBaseline, |
1567 | 1639 |
checkSizePEvery, |
1568 | 1640 |
nextCheckSizeP, |
1569 |
- ran_gen); //keepEvery is for thinning |
|
1641 |
+ ran_gen, |
|
1642 |
+ AND_DrvProbExit, |
|
1643 |
+ fixation_l); //keepEvery is for thinning |
|
1570 | 1644 |
if(verbosity >= 3) { |
1571 | 1645 |
Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() |
1572 | 1646 |
<< "\n totPopSize after sampling " << totPopSize << "\n"; |
... | ... |
@@ -1649,7 +1723,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1649 | 1723 |
double p2, |
1650 | 1724 |
double PDBaseline, |
1651 | 1725 |
double cPDetect_i, |
1652 |
- double checkSizePEvery) { |
|
1726 |
+ double checkSizePEvery, |
|
1727 |
+ bool AND_DrvProbExit, |
|
1728 |
+ Rcpp::List fixation_i) { |
|
1653 | 1729 |
// double cPDetect){ |
1654 | 1730 |
// double n2, |
1655 | 1731 |
// double p2, |
... | ... |
@@ -1717,6 +1793,13 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1717 | 1793 |
DP2(muEF.genomeSize); |
1718 | 1794 |
throw std::logic_error("full2mutator 0 with mutatorEffects.genomesize != 0"); |
1719 | 1795 |
} |
1796 |
+ |
|
1797 |
+ // fixation: run until some genotype combinations fixed |
|
1798 |
+ std::vector < std::vector<int> > fixation_l(fixation_i.size()); |
|
1799 |
+ if( fixation_i.size() != 0 ) { |
|
1800 |
+ fixation_l = list_to_vector_of_int_vectors(fixation_i); |
|
1801 |
+ } |
|
1802 |
+ |
|
1720 | 1803 |
|
1721 | 1804 |
bool runAgain = true; |
1722 | 1805 |
bool reachDetection = false; |
... | ... |
@@ -1797,6 +1880,10 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1797 | 1880 |
|
1798 | 1881 |
double currentTime = 0; |
1799 | 1882 |
int iter = 0; |
1883 |
+ |
|
1884 |
+ // bool AND_DrvProbExit = ( (cpDetect >= 0) && |
|
1885 |
+ // (detectionDrivers < 1e9) && |
|
1886 |
+ // (detectionSize < std::numeric_limits<double>::infinity())); |
|
1800 | 1887 |
while(runAgain) { |
1801 | 1888 |
|
1802 | 1889 |
if(numRuns >= maxNumTries) { |
... | ... |
@@ -1872,7 +1959,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, |
1872 | 1959 |
full2mutator, |
1873 | 1960 |
cPDetect, |
1874 | 1961 |
PDBaseline, |
1875 |
- checkSizePEvery); |
|
1962 |
+ checkSizePEvery, |
|
1963 |
+ AND_DrvProbExit, |
|
1964 |
+ fixation_l); |
|
1876 | 1965 |
++numRuns; |
1877 | 1966 |
forceRerun = false; |
1878 | 1967 |
} catch (rerunExcept &e) { |
... | ... |
@@ -5,18 +5,20 @@ |
5 | 5 |
|
6 | 6 |
#include <R_ext/Rdynload.h> |
7 | 7 |
|
8 |
-SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaseline, SEXP cPDetect_i, SEXP checkSizePEvery); |
|
8 |
+SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP); |
|
9 | 9 |
SEXP OncoSimulR_BNB_Algo5(SEXP restrictTableSEXP, SEXP numDriversSEXP, SEXP numGenesSEXP, SEXP typeCBN_SEXP, SEXP sSEXP, SEXP deathSEXP, SEXP muSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutantSEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP shSEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP); |
10 | 10 |
SEXP OncoSimulR_evalRGenotype(SEXP rGSEXP, SEXP rFESEXP, SEXP verboseSEXP, SEXP prodNegSEXP, SEXP calledBy_SEXP); |
11 | 11 |
SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SEXP full2mutator_SEXP, SEXP verboseSEXP, SEXP prodNegSEXP); |
12 | 12 |
// SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP); |
13 |
+SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP); |
|
13 | 14 |
|
14 | 15 |
// The number is the number of arguments |
15 | 16 |
R_CallMethodDef callMethods[] = { |
16 |
- {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 35}, |
|
17 |
+ {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37}, |
|
17 | 18 |
{"OncoSimulR_BNB_Algo5", (DL_FUNC) &OncoSimulR_BNB_Algo5, 32}, |
18 | 19 |
{"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5}, |
19 |
- {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6}, |
|
20 |
+ {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6}, |
|
21 |
+ {"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4}, |
|
20 | 22 |
// {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2}, |
21 | 23 |
{NULL, NULL, 0} |
22 | 24 |
}; |
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
using namespace Rcpp; |
7 | 7 |
|
8 | 8 |
// nr_BNB_Algo5 |
9 |
-Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery); |
|
10 |
-RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP) { |
|
9 |
+Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery, bool AND_DrvProbExit, Rcpp::List fixation_list); |
|
10 |
+RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP) { |
|
11 | 11 |
BEGIN_RCPP |
12 | 12 |
Rcpp::RObject __result; |
13 | 13 |
Rcpp::RNGScope __rngScope; |
... | ... |
@@ -46,7 +46,9 @@ BEGIN_RCPP |
46 | 46 |
Rcpp::traits::input_parameter< double >::type PDBaseline(PDBaselineSEXP); |
47 | 47 |
Rcpp::traits::input_parameter< double >::type cPDetect_i(cPDetect_iSEXP); |
48 | 48 |
Rcpp::traits::input_parameter< double >::type checkSizePEvery(checkSizePEverySEXP); |
49 |
- __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery)); |
|
49 |
+ Rcpp::traits::input_parameter< bool >::type AND_DrvProbExit(AND_DrvProbExitSEXP); |
|
50 |
+ Rcpp::traits::input_parameter< Rcpp::List >::type fixation_list(fixation_listSEXP); |
|
51 |
+ __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list)); |
|
50 | 52 |
return __result; |
51 | 53 |
END_RCPP |
52 | 54 |
} |
... | ... |
@@ -123,6 +125,24 @@ BEGIN_RCPP |
123 | 125 |
return __result; |
124 | 126 |
END_RCPP |
125 | 127 |
} |
128 |
+ |
|
129 |
+// evalRGenotypeAndMut |
|
130 |
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th); |
|
131 |
+RcppExport SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) { |
|
132 |
+BEGIN_RCPP |
|
133 |
+ Rcpp::RObject __result; |
|
134 |
+// Rcpp::RNGScope __rngScope; |
|
135 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP); |
|
136 |
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP); |
|
137 |
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP); |
|
138 |
+ Rcpp::traits::input_parameter< double >::type th(thSEXP); |
|
139 |
+ __result = Rcpp::wrap(accessibleGenotypes(y, f, numMut, th)); |
|
140 |
+ return __result; |
|
141 |
+ END_RCPP |
|
142 |
+} |
|
143 |
+ |
|
144 |
+ |
|
145 |
+ |
|
126 | 146 |
// // readFitnessEffects |
127 | 147 |
// void readFitnessEffects(Rcpp::List rFE, bool echo); |
128 | 148 |
// RcppExport SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP) { |
129 | 149 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,172 @@ |
1 |
+// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte |
|
2 |
+ |
|
3 |
+// This program is free software: you can redistribute it and/or modify |
|
4 |
+// it under the terms of the GNU General Public License as published by |
|
5 |
+// the Free Software Foundation, either version 3 of the License, or |
|
6 |
+// (at your option) any later version. |
|
7 |
+ |
|
8 |
+// This program is distributed in the hope that it will be useful, |
|
9 |
+// but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+// GNU General Public License for more details. |
|
12 |
+ |
|
13 |
+// You should have received a copy of the GNU General Public License |
|
14 |
+// along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+ |
|
16 |
+#include <Rcpp.h> |
|
17 |
+// using namespace Rcpp; |
|
18 |
+ |
|
19 |
+inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVector& y) { |
|
20 |
+ Rcpp::NumericVector diff = Rcpp::abs( x - y ); |
|
21 |
+ return std::accumulate(diff.begin(), diff.end(), 0); |
|
22 |
+} |
|
23 |
+ |
|
24 |
+ |
|
25 |
+ |
|
26 |
+// [[Rcpp::export]] |
|
27 |
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, |
|
28 |
+ Rcpp::NumericVector f, |
|
29 |
+ Rcpp::IntegerVector numMut, // |
|
30 |
+ double th) { |
|
31 |
+ // Return just the indices. Could preserve fitness, but would need |
|
32 |
+ // another matrix. |
|
33 |
+ int ng = y.nrow(); //it counts the wt |
|
34 |
+ Rcpp::IntegerMatrix adm(ng, ng); |
|
35 |
+ int numMutdiff = 0; |
|
36 |
+ // I would have thought this would be faster. It ain't. |
|
37 |
+ // The last genotype never accesses anything. |
|
38 |
+ // for(int i = 0; i < (ng - 1); ++i) { |
|
39 |
+ // // Candidate genotypes to be accessed from i are always of larger |
|
40 |
+ // // mutation by 1. And candidates can thus not have smaller index |
|
41 |
+ // for(int j = (i + 1); j < ng; ++j) { |
|
42 |
+ // if( (numMut(j) == (numMut(i) + 1)) && |
|
43 |
+ // ( (f(j) - f(i)) >= th) && |
|
44 |
+ // (HammingDistance(y(i, _), y(j, _)) == 1) ) { |
|
45 |
+ // adm(i, j) = 1; |
|
46 |
+ // } else if( (numMut(j) > (numMut(i) + 1)) ) { |
|
47 |
+ // break; |
|
48 |
+ // } |
|
49 |
+ // } |
|
50 |
+ // } |
|
51 |
+ |
|
52 |
+ // The last genotype never accesses anything. |
|
53 |
+ for(int i = 0; i < (ng - 1); ++i) { |
|
54 |
+ // Candidate genotypes to be accessed from i are always of larger |
|
55 |
+ // mutation by 1. And candidates can thus not have smaller index |
|
56 |
+ for(int j = (i + 1); j < ng; ++j) { |
|
57 |
+ numMutdiff = numMut(j) - numMut(i); |
|
58 |
+ if( numMutdiff > 1) { // no more to search |
|
59 |
+ break; |
|
60 |
+ } else if(numMutdiff == 1) { |
|
61 |
+ // f(j) - f(i) is faster than HammingDistance |
|
62 |
+ // but might lead to more evals? |
|
63 |
+ // or fewer, depending on landscape |
|
64 |
+ if( ( (f(j) - f(i)) >= th) && |
|