Browse code

v.2.5.1. \n - much faster accessible genots \n - AND of drivers and size \n - fixation \n - doc. improvements

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 13/11/2016 09:46:27
Showing29 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.5.0
5
-Date: 2016-09-22
4
+Version: 2.5.1
5
+Date: 2016-11-12
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -9,7 +9,8 @@ export("oncoSimulPop", "oncoSimulIndiv", "samplePop",
9 9
        "evalAllGenotypesFitAndMut",
10 10
        "rfitness",
11 11
        "plotFitnessLandscape",
12
-       "to_Magellan"
12
+       "to_Magellan",
13
+       "sampledGenotypes"
13 14
        )
14 15
 
15 16
 S3method(plot, oncosimul)
... ...
@@ -47,7 +48,8 @@ importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate",
47 48
            "filter")
48 49
 importFrom("smatr", "ma") ## for major axis regression in some tests
49 50
 importFrom("car", "linearHypothesis")
50
-
51
+## importFrom("slam", "simple_triplet_zero_matrix", ## "colapply_simple_triplet_matrix",
52
+##            "col_sums")
51 53
 
52 54
 
53 55
 
... ...
@@ -63,6 +63,8 @@ oncoSimulSample <- function(Nindiv,
63 63
                             typeSample = "whole",
64 64
                             thresholdWhole = 0.5,
65 65
                             initMutant = NULL,
66
+                            AND_DrvProbExit = FALSE,
67
+                            fixation = NULL,
66 68
                             verbosity  = 1,
67 69
                             showProgress = FALSE,
68 70
                             seed = "auto"){
... ...
@@ -190,7 +192,9 @@ oncoSimulSample <- function(Nindiv,
190 192
                                initMutant = initMutant,
191 193
                                keepPhylog = keepPhylog,
192 194
                                mutationPropGrowth = mutationPropGrowth,
193
-                               detectionProb = detectionProb)        
195
+                               detectionProb = detectionProb,
196
+                               AND_DrvProbExit = AND_DrvProbExit,
197
+                               fixation = fixation)        
194 198
         if(tmp$other$UnrecoverExcept) {
195 199
             return(f.out.unrecover.except(tmp))
196 200
         }
... ...
@@ -291,7 +295,7 @@ samplePop <- function(x, timeSample = "last",
291 295
                             popSizeSample = popSizeSample)
292 296
         dim(z) <- c(1, length(z))
293 297
         if(is.null(gN) && (!is.null(x$geneNames)))
294
-            gN <- geneNames
298
+            gN <- x$geneNames
295 299
     }
296 300
     message("\n Subjects by Genes matrix of ",
297 301
         nrow(z), " subjects and ",
... ...
@@ -338,6 +342,8 @@ oncoSimulPop <- function(Nindiv,
338 342
                          errorHitWallTime = TRUE,
339 343
                          errorHitMaxTries = TRUE,
340 344
                          initMutant = NULL,
345
+                         AND_DrvProbExit = FALSE,
346
+                         fixation = NULL,
341 347
                          verbosity  = 0,
342 348
                          mc.cores = detectCores(),
343 349
                          seed = "auto") {
... ...
@@ -379,7 +385,9 @@ oncoSimulPop <- function(Nindiv,
379 385
                         seed = seed, keepPhylog = keepPhylog,
380 386
                         initMutant = initMutant,
381 387
                         mutationPropGrowth = mutationPropGrowth,
382
-                        detectionProb = detectionProb),
388
+                        detectionProb = detectionProb,
389
+                        AND_DrvProbExit = AND_DrvProbExit,
390
+                        fixation = fixation),
383 391
                     mc.cores = mc.cores
384 392
                     )
385 393
     class(pop) <- "oncosimulpop"
... ...
@@ -422,16 +430,33 @@ oncoSimulIndiv <- function(fp,
422 430
                            errorHitMaxTries = TRUE,
423 431
                            verbosity = 0,
424 432
                            initMutant = NULL,
433
+                           AND_DrvProbExit = FALSE,
434
+                           fixation = NULL,
425 435
                            seed = NULL
426 436
                            ) {
427 437
     call <- match.call()
428
-    if(all(c(is.na(detectionProb),
429
-             is.na(detectionSize),
430
-             is.na(detectionDrivers),
431
-             is.na(finalTime))))
438
+    if(all(c(is_null_na(detectionProb),
439
+             is_null_na(detectionSize),
440
+             is_null_na(detectionDrivers),
441
+             is_null_na(finalTime),
442
+             is_null_na(fixation)
443
+             )))
432 444
         stop("At least one stopping condition should be given.",
433 445
              " At least one of detectionProb, detectionSize, detectionDrivers,",
434
-             " finalTime. Otherwise, we'll run forever.")
446
+             " finalTime. Otherwise, we'll run until aborted by max.wall.time,",
447
+             " max.num.tries, and the like.")
448
+
449
+    if(AND_DrvProbExit && (is_null_na(detectionProb) || is_null_na(detectionDrivers)))
450
+        stop("AND_DrvProbExit is TRUE: both of detectionProb and detectionDrivers",
451
+             " must be non NA.")
452
+    if(AND_DrvProbExit && !is_null_na(detectionSize)) {
453
+        warning("With AND_DrvProbExit = TRUE, detectionSize is ignored.")
454
+        detectionSize <- NA
455
+    }
456
+    if(inherits(fp, "fitnessEffects")) {
457
+        s <- sh <- NULL ## force it soon!
458
+    }
459
+
435 460
     ## legacies from poor name choices
436 461
     typeFitness <- switch(model,
437 462
                           "Bozic" = "bozic1",
... ...
@@ -467,7 +492,8 @@ oncoSimulIndiv <- function(fp,
467 492
         if(model %in% c("Bozic", "Exp") )
468 493
             minDetectDrvCloneSz <- 0
469 494
         else if (model %in% c("McFL", "McFarlandLog"))
470
-            minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
495
+            minDetectDrvCloneSz <- initSize
496
+        ## minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
471 497
         else
472 498
             stop("Unknown model")
473 499
     }
... ...
@@ -484,7 +510,7 @@ oncoSimulIndiv <- function(fp,
484 510
     ## No user-visible magic numbers
485 511
     ## if(is.null(keepEvery))
486 512
     ##     keepEvery <- -9
487
-    if(is.na(keepEvery)) keepEvery <- -9
513
+    if(is_null_na(keepEvery)) keepEvery <- -9
488 514
 
489 515
     
490 516
     if( (keepEvery > 0) & (keepEvery < sampleEvery)) {
... ...
@@ -499,10 +525,11 @@ oncoSimulIndiv <- function(fp,
499 525
     if( (typeFitness == "exp") && (death != 1) )
500 526
         warning("Using fitness exp with death != 1")
501 527
 
502
-
503
-    if(is.na(detectionDrivers)) detectionDrivers <- (2^31) - 1
504
-    if(is.na(detectionSize)) detectionSize <- Inf
505
-    if(is.na(finalTime)) finalTime <- Inf
528
+    if(!is_null_na(detectionDrivers) && (detectionDrivers >= 1e9))
529
+        stop("detectionDrivers > 1e9; this doesn't seem reasonable")
530
+    if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1
531
+    if(is_null_na(detectionSize)) detectionSize <- Inf
532
+    if(is_null_na(finalTime)) finalTime <- Inf
506 533
     
507 534
     
508 535
     if(!inherits(fp, "fitnessEffects")) {
... ...
@@ -515,12 +542,18 @@ oncoSimulIndiv <- function(fp,
515 542
             stop(m)
516 543
            
517 544
         }
545
+        if(AND_DrvProbExit) {
546
+            stop("The AND_DrvProbExit = TRUE setting is invalid",
547
+                 " with the old poset format.")
548
+        }
518 549
         if(!is.null(muEF))
519
-            stop("Mutator effects cannot be especified with the old poset format")
550
+            stop("Mutator effects cannot be specified with the old poset format.")
520 551
         if( length(initMutant) > 0)  
521 552
             warning("With the old poset format you can no longer use initMutant.",
522 553
                     " The initMutant you passed will be ignored.")
523
-            ## stop("With the old poset, initMutant can only take a single value.")
554
+        ## stop("With the old poset, initMutant can only take a single value.")
555
+        if(!is_null_na(fixation))
556
+            stop("'fixation' cannot be specified with the old poset format.")
524 557
         ## Seeding C++ is now much better in new version
525 558
         if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
526 559
             ## name is a legacy. This is really the seed for the C++ generator.
... ...
@@ -530,7 +563,7 @@ oncoSimulIndiv <- function(fp,
530 563
         if(verbosity >= 2)
531 564
             cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
532 565
 
533
-        if(!is.na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
566
+        if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
534 567
         ## if(message.v1)
535 568
         ##     message("You are using the old poset format. Consider using the new one.")
536 569
    
... ...
@@ -576,6 +609,7 @@ oncoSimulIndiv <- function(fp,
576 609
                   silent = !verbosity)
577 610
         objClass <- "oncosimul"
578 611
     } else {
612
+        s <- sh <- NULL ## force it.
579 613
         if(numPassengers != 0)
580 614
             warning(paste("Specifying numPassengers has no effect",
581 615
                           " when using fitnessEffects objects. ",
... ...
@@ -594,6 +628,18 @@ oncoSimulIndiv <- function(fp,
594 628
             if(verbosity >= 2)
595 629
                 cat("\n A (high quality) random seed will be generated in C++\n")
596 630
         }
631
+        if(!is_null_na(fixation)) {
632
+            if( (!is.list(fixation)) && (!is.vector(fixation))  )
633
+                stop("'fixation' must be a list or a vector.")
634
+            if(!(all(unlist(lapply(fixation, is.vector)))))
635
+                stop("Each element of 'fixation' must be a single element character vector.")
636
+            if(!(all(unlist(lapply(fixation, class)) == "character")))
637
+                stop("Each element of 'fixation' must be a single element character vector.")
638
+            if(!(all( unlist(lapply(fixation, length)) == 1)))
639
+                stop("Each element of 'fixation' must be a single element character vector.")
640
+            if(AND_DrvProbExit)
641
+                stop("It makes no sense to pass AND_DrvProbExit and a fixation list.")
642
+        }
597 643
         op <- try(nr_oncoSimul.internal(rFE = fp, 
598 644
                                         birth = birth,
599 645
                                         death = death,  
... ...
@@ -625,7 +671,9 @@ oncoSimulIndiv <- function(fp,
625 671
                                         errorHitMaxTries = errorHitMaxTries,
626 672
                                         keepPhylog = keepPhylog,
627 673
                                         MMUEF = muEF,
628
-                                        detectionProb = detectionProb),
674
+                                        detectionProb = detectionProb,
675
+                                        AND_DrvProbExit = AND_DrvProbExit,
676
+                                        fixation = fixation),
629 677
                   silent = !verbosity)
630 678
         objClass <- c("oncosimul", "oncosimul2")
631 679
     }
... ...
@@ -666,7 +714,7 @@ summary.oncosimul <- function(object, ...) {
666 714
         if( (tmp$minDMratio == -99)) tmp$minDMratio <- NA
667 715
         if( (tmp$minBMratio == -99)) tmp$minBMratio <- NA
668 716
         tmp$OccurringDrivers <- object$OccurringDrivers
669
-        return(as.data.frame(tmp))
717
+        return(as.data.frame(tmp, stringsAsFactors = FALSE))
670 718
     }
671 719
 }
672 720
 
... ...
@@ -690,10 +738,29 @@ print.oncosimul <- function(x, ...) {
690 738
 }
691 739
 
692 740
 ## I want this to return things that are storable
741
+## summary.oncosimulpop <- function(object, ...) {
742
+##     as.data.frame(rbindlist(lapply(object, summary)))
743
+## }
744
+
693 745
 summary.oncosimulpop <- function(object, ...) {
694
-    as.data.frame(rbindlist(lapply(object, summary)))
746
+    tmp <- lapply(object, summary)
747
+    rm <- which(unlist(lapply(tmp, function(x) (length(x) == 1) && (is.na(x)))))
748
+    if(length(rm) > 0)
749
+        if(length(rm) < length(object)) {
750
+        warning("Some simulations seem to have failed and will be removed",
751
+                " from the summary. The failed runs are ",
752
+                paste(rm, collapse = ", "),
753
+                ".")
754
+        tmp <- tmp[-rm]
755
+        } else {
756
+            warning("All simulations failed.")
757
+            return(NA)
758
+        }
759
+    as.data.frame(rbindlist(tmp))
695 760
 }
696 761
 
762
+
763
+
697 764
 print.oncosimulpop <- function(x, ...) {
698 765
     cat("\nPopulation of OncoSimul trajectories of",
699 766
         length(x), "individuals. Call :\n")
... ...
@@ -1383,8 +1450,17 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
1383 1450
         stop("Phylogenetic information is only stored with v >= 2")
1384 1451
     z <- which_N_at_T(x, N, t)
1385 1452
     tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
1386
-                               ## sample size info at each period
1453
+    ## sample size info at each period
1454
+
1455
+    if( (length(tG) == 1) && (tG == "")) {
1456
+        warning("There never was a descendant of WT")
1457
+    }
1458
+    
1387 1459
     df <- x$other$PhylogDF
1460
+    if(nrow(df) == 0) {
1461
+        warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared.")
1462
+        return(NA)
1463
+    }
1388 1464
     if(!keepEvents) { ## is this just a graphical thing? or not?
1389 1465
         df <- df[!duplicated(df[, c(1, 2)]), ]
1390 1466
     }
... ...
@@ -1420,6 +1496,13 @@ plotClonePhylog <- function(x, N = 1, t = "last",
1420 1496
              "very fast, before any clones beyond the initial were ",
1421 1497
              "generated.")
1422 1498
     pc <- phylogClone(x, N, t, keepEvents)
1499
+    ## if(is.na(pc)) {
1500
+    ##     ## This should not be reachable, as caught before
1501
+    ##     ## where we check for nrow of PhylogDF   
1502
+    ##     warning("No clone phylogeny available. Exiting without plotting.")
1503
+    ##     return(NULL)
1504
+    ## }
1505
+        
1423 1506
     l0 <- igraph::layout.reingold.tilford(pc$g)
1424 1507
     if(!timeEvents) {
1425 1508
         plot(pc$g, layout = l0)
... ...
@@ -1502,6 +1585,12 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
1502 1585
 
1503 1586
 get.mut.vector <- function(x, timeSample, typeSample,
1504 1587
                            thresholdWhole, popSizeSample) {
1588
+    if(is.null(x$TotalPopSize)) {
1589
+        warning(paste("It looks like this simulation never completed.",
1590
+                      " Maybe it reached maximum allowed limits.",
1591
+                      " You will get NAs"))
1592
+        return(rep(NA, length(x$geneNames)))
1593
+    }
1505 1594
     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1506 1595
     if(the.time < 0) { 
1507 1596
         return(rep(NA, nrow(x$Genotypes)))
... ...
@@ -1686,17 +1775,6 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1686 1775
 
1687 1776
 }
1688 1777
 
1689
-eFinalMf <- function(initSize, s, j) {
1690
-    ## Expected final sizes for McF, when K is set to the default.
1691
-    # j is number of drivers
1692
-    ## as it says, with no passengers
1693
-    ## Set B(d) = D(N)
1694
-    K <- initSize/(exp(1) - 1)
1695
-    return(K * (exp( (1 + s)^j) - 1))
1696
-}
1697
-
1698
-
1699
-
1700 1778
 OncoSimulWide2Long <- function(x) {
1701 1779
     ## Put data in long format, for ggplot et al
1702 1780
     
... ...
@@ -1820,6 +1898,59 @@ plotShannon <- function(z) {
1820 1898
     axis(2)
1821 1899
 }
1822 1900
 
1901
+
1902
+
1903
+is_null_na <- function(x) {
1904
+    ## For arguments, if user passes "undefined" or "not set"
1905
+    ## See also http://stackoverflow.com/a/19655909
1906
+    if(is.function(x)) return(FALSE)
1907
+    if( is.null(x) ||
1908
+        ( (length(x) == 1) && (is.na(x)) ) ||
1909
+        ( (length(x) == 1) && (x == "") ) ## careful here
1910
+       )  {
1911
+        return(TRUE)
1912
+    } else {
1913
+        return(FALSE)
1914
+    }
1915
+}
1916
+
1917
+
1918
+## Not used anymore, but left here in case they become useful.
1919
+## Expected numbers at equilibrium under McFarland's
1920
+## eFinalMf <- function(initSize, s, j) {
1921
+##     ## Expected final sizes for McF, when K is set to the default.
1922
+##     # j is number of drivers
1923
+##     ## as it says, with no passengers
1924
+##     ## Set B(d) = D(N)
1925
+##     K <- initSize/(exp(1) - 1)
1926
+##     return(K * (exp( (1 + s)^j) - 1))
1927
+## }
1928
+
1929
+## mcflE <- function(p, s, initSize) {
1930
+##     K <- initSize/(exp(1) - 1)
1931
+##     ## Expected number at equilibrium
1932
+##     return( K * (exp((1 + s)^p) - 1))
1933
+## }
1934
+
1935
+## mcflEv <- function(p, s, initSize) {
1936
+##     ## expects vectors for p and s
1937
+##     K <- initSize/(exp(1) - 1)
1938
+    
1939
+##     ## Expected number at equilibrium
1940
+##     return( K * (exp(prod((1 + s)^p)) - 1))
1941
+## }
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1823 1954
 ## simpsonI <- function(x) {
1824 1955
 ##     sx <- sum(x)
1825 1956
 ##     p <- x/sx
... ...
@@ -1,8 +1,8 @@
1 1
 # This file was generated by Rcpp::compileAttributes
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery) {
5
-    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery)
4
+nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list) {
5
+    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list)
6 6
 }
7 7
 
8 8
 BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) {
... ...
@@ -17,6 +17,10 @@ evalRGenotypeAndMut <- function(rG, rFE, muEF, full2mutator_, verbose, prodNeg)
17 17
     .Call('OncoSimulR_evalRGenotypeAndMut', PACKAGE = 'OncoSimulR', rG, rFE, muEF, full2mutator_, verbose, prodNeg)
18 18
 }
19 19
 
20
+accessibleGenotypes <- function(y, f, numMut, th) {
21
+    .Call('OncoSimulR_accessibleGenotypes', PACKAGE = 'OncoSimulR', y, f, numMut, th)
22
+}
23
+
20 24
 ## readFitnessEffects <- function(rFE, echo) {
21 25
 ##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
22 26
 ## }
... ...
@@ -192,16 +192,23 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
192 192
 ##                 tfm = tfm))
193 193
 ## }
194 194
 
195
-count_accessible_g <- function(gfm, accessible_th) {
196
-    gaj <- genot_to_adj_mat(gfm)
197
-    gaj <- filter_inaccessible(gaj, accessible_th)
198
-    return(ncol(gaj) - 1)
199
-}
195
+## No longer being used. Used to be in rfitness
196
+## count_accessible_g <- function(gfm, accessible_th) {
197
+##     gaj <- genot_to_adj_mat(gfm)
198
+##     gaj <- filter_inaccessible(gaj, accessible_th)
199
+##     return(ncol(gaj) - 1)
200
+## }
200 201
 
202
+
203
+## There is now C++ code to get just the locations/positions of the
204
+## accessible genotypes
201 205
 filter_inaccessible <- function(x, accessible_th) {
202 206
     ## Return an adjacency matrix with only accessible paths. x is the gaj
203 207
     ## matrix created in the plots. The difference between genotypes
204 208
     ## separated by a hamming distance of 1
209
+
210
+    ## FIXME: could do the x[, -1] before loop and not add the 1
211
+    ## inside while, and do that at end
205 212
     colnames(x) <- rownames(x) <- 1:ncol(x)
206 213
     while(TRUE) {
207 214
         ## remove first column
... ...
@@ -216,6 +223,132 @@ filter_inaccessible <- function(x, accessible_th) {
216 223
     return(x)
217 224
 }
218 225
 
226
+f1 <- function() {}
227
+
228
+x <- 99
229
+
230
+## wrapper to the C++ code
231
+wrap_accessibleGenotypes <- function(x, th) {
232
+    ## x is the fitness matrix, not adjacency matrix
233
+    numMut <- rowSums(x[, -ncol(x)])
234
+    o_numMut <- order(numMut)
235
+    x <- x[o_numMut, ]
236
+    numMut <- numMut[o_numMut]
237
+    
238
+    y <- x[, -ncol(x)]
239
+    storage.mode(y) <- "integer"
240
+
241
+    acc <- accessibleGenotypes(y, x[, ncol(x)],
242
+                               as.integer(numMut),
243
+                               th)
244
+    return(acc[acc > 0])
245
+}
246
+
247
+## A transitional function
248
+faster_accessible_genotypes_R <- function(x, th) {
249
+   rs0 <- rowSums(x[, -ncol(x)])
250
+    x <- x[order(rs0), ]
251
+    rm(rs0)
252
+    
253
+    y <- x[, -ncol(x)]
254
+    f <- x[, ncol(x)]
255
+    rs <- rowSums(y)
256
+
257
+   ## If 0, not accessible
258
+   ## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
259
+   ##                                          mode = "integer")
260
+   
261
+   adm <- matrix(0, nrow = length(rs), ncol = length(rs))
262
+   storage.mode(adm) <- "integer"
263
+   
264
+   ## Most time is gone here
265
+    for(i in 1:length(rs)) { ## i is the current genotype
266
+        candidates <- which(rs == (rs[i] + 1))
267
+        for(j in candidates) {
268
+            if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
269
+                ( (f[j] - f[i]) >= th ) ) {
270
+                ## actually, this is the largest time sink using slam
271
+                adm[i, j] <- 1L
272
+                }
273
+        }
274
+    }
275
+
276
+    colnames(adm) <- rownames(adm) <- 1:ncol(adm)
277
+    admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
278
+    while(TRUE) {
279
+        ## We remove inaccessible cols (genotypes) and the corresponding
280
+        ## rows repeatedly until nothing left to remove; any column left
281
+        ## is therefore accessible throw at least one path.
282
+
283
+        ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
284
+        inacc_col <- which(colSums(admtmp) == 0L)
285
+        if(length(inacc_col) == 0) break;
286
+        inacc_row <- inacc_col + 1 ## recall root row is left
287
+        admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
288
+    }
289
+    return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
290
+
291
+}
292
+
293
+
294
+## ## This uses slam, but that is actually slower because
295
+## ## of the assignment
296
+## faster_accessible_genots_slam <- function(x, th = 0) {
297
+
298
+##     ## Given a genotype matrix, return the genotypes that are accessible
299
+##     ## via creating a directed adjacency matrix between genotypes
300
+##     ## connected (i.e., those that differ by gaining one mutation). 0
301
+##     ## means not connected, 1 means connected.
302
+    
303
+##     ## There is a more general function in OncoSimulR that will give the
304
+##     ## fitness difference. But not doing the difference is faster than
305
+##     ## just setting a value, say 1, if all we want is to keep track of
306
+##     ## accessible ones. And by using only 0/1 we can store only an
307
+##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
308
+##     ## simulations and computing just accessible genotypes, probably a
309
+##     ## hell of a lot faster.
310
+
311
+##     ## Well, this is not incredibly fast either.
312
+    
313
+##     ## Make sure sorted, so ancestors always before descendants
314
+##     rs0 <- rowSums(x[, -ncol(x)])
315
+##     x <- x[order(rs0), ]
316
+##     rm(rs0)
317
+    
318
+##     y <- x[, -ncol(x)]
319
+##     f <- x[, ncol(x)]
320
+##     rs <- rowSums(y)
321
+
322
+##     ## If 0, not accessible
323
+##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
324
+##                                       mode = "integer")
325
+##     for(i in 1:length(rs)) { ## i is the current genotype
326
+##         candidates <- which(rs == (rs[i] + 1))
327
+##         for(j in candidates) {
328
+##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
329
+##             ## if(sumdiff == 1)
330
+##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
331
+##                 ( (f[j] - f[i]) >= th ) )
332
+##                 adm[i, j] <- 1L
333
+##         }
334
+##     }
335
+
336
+##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
337
+##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
338
+##     while(TRUE) {
339
+##         ## We remove inaccessible cols (genotypes) and the corresponding
340
+##         ## rows repeatedly until nothing left to remove; any column left
341
+##         ## is therefore accessible throw at least one path.
342
+
343
+##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
344
+##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
345
+##         if(length(inacc_col) == 0) break;
346
+##         inacc_row <- inacc_col + 1 ## recall root row is left
347
+##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
348
+##     }
349
+##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
350
+## }
351
+
219 352
 
220 353
 generate_matrix_genotypes <- function(g) {
221 354
     ## FIXME future: do this for order too? Only if rfitness for order.
... ...
@@ -257,6 +390,8 @@ genot_to_adj_mat <- function(x) {
257 390
     ## that differ by gaining one mutation) with value being the
258 391
     ## difference in fitness between destination and origin
259 392
 
393
+    ## FIXME: code is now in place to do all of this in C++
394
+    
260 395
     ## Make sure sorted, so ancestors always before descendants
261 396
     rs0 <- rowSums(x[, -ncol(x)])
262 397
     x <- x[order(rs0), ]
... ...
@@ -23,14 +23,22 @@
23 23
 
24 24
 to_Magellan <- function(x, file,
25 25
                         max_num_genotypes = 2000) {
26
+    ## This is stupid if we already have, as entry, an object from
27
+    ## rfitness!! to_Fitness_Matrix can be very slow.
26 28
     if(is.null(file)) {
27 29
         file <- tempfile()
28 30
         cat("\n Using file ", file, "\n")
29 31
     }
30
-    gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm
31
-    write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1)
32
-    write.table(gfm, file = file, append = TRUE,
33
-                row.names = FALSE, col.names = FALSE, sep = " ")
32
+    if(inherits(x, "genotype_fitness_matrix")) {
33
+        write(rep(2, ncol(x) - 1), file = file, ncolumns = ncol(x) - 1)
34
+        write.table(x, file = file, append = TRUE,
35
+                    row.names = FALSE, col.names = FALSE, sep = " ")
36
+    } else {
37
+        gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm
38
+        write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1)
39
+        write.table(gfm, file = file, append = TRUE,
40
+                    row.names = FALSE, col.names = FALSE, sep = " ")
41
+    }
34 42
 }
35 43
 
36 44
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
... ...
@@ -1,3 +1,4 @@
1
+
1 2
 ## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2 3
 
3 4
 ## This program is free software: you can redistribute it and/or modify
... ...
@@ -161,7 +162,7 @@ list.of.deps <- function(x) {
161 162
             stop("child not unique")
162 163
     }
163 164
     typeDep <- lookupTypeDep[unique(x$typeDep)]
164
-    if(any(is.na(typeDep)))
165
+    if(any(is_null_na(typeDep)))
165 166
         stop("typeDep value incorrect. Check spelling.")
166 167
     return(list(
167 168
         child = unique(x$child),
... ...
@@ -1436,7 +1437,9 @@ nr_oncoSimul.internal <- function(rFE,
1436 1437
                                   extraTime,
1437 1438
                                   keepPhylog,
1438 1439
                                   detectionProb,
1439
-                                  MMUEF = NULL ## avoid partial matching, and set default
1440
+                                  AND_DrvProbExit,
1441
+                                  MMUEF = NULL, ## avoid partial matching, and set default
1442
+                                  fixation = NULL ## avoid partial matching
1440 1443
                                   ) {
1441 1444
     if(!inherits(rFE, "fitnessEffects"))
1442 1445
         stop(paste("rFE must be an object of class fitnessEffects",
... ...
@@ -1578,6 +1581,28 @@ nr_oncoSimul.internal <- function(rFE,
1578 1581
     ## if( is.null(n2)) n2 <- -9
1579 1582
 
1580 1583
     ## call <- match.call()
1584
+    
1585
+    ## Process the fixed list, if any
1586
+    if(!is_null_na(fixation)) {
1587
+        ng <- namedGenes
1588
+        rownames(ng) <- namedGenes[, "Gene"]
1589
+        ## Usual genotype specification and might allow ordered vectors
1590
+        ## in the future
1591
+        fixation_b <- lapply(fixation, nice.vector.eo, sep = ",")
1592
+        ulf <- unlist(fixation_b)
1593
+        if(any(ulf == ">"))
1594
+            stop("Order effects not allowed (yet?) in fixation.")
1595
+        ulfg <- ng[ulf, 1]
1596
+        if(any(is.na(ulfg)))
1597
+            stop(paste("The 'fixation' list contains genes that are not present",
1598
+                       " in the fitness effects."))
1599
+        ## Sorting here is crucial!!
1600
+        fixation_list <- lapply(fixation_b, function(x) sort(ng[x, 2]))
1601
+    } else {
1602
+        fixation_list <- list()
1603
+    }
1604
+
1605
+    
1581 1606
     return(c(
1582 1607
         nr_BNB_Algo5(rFE = rFE,
1583 1608
                      mu_ = mu,
... ...
@@ -1613,7 +1638,9 @@ nr_oncoSimul.internal <- function(rFE,
1613 1638
                      p2 = dpr["p2"],
1614 1639
                      PDBaseline = dpr["PDBaseline"],
1615 1640
                      cPDetect_i= dpr["cPDetect"],
1616
-                     checkSizePEvery = dpr["checkSizePEvery"]),
1641
+                     checkSizePEvery = dpr["checkSizePEvery"],
1642
+                     AND_DrvProbExit = AND_DrvProbExit,
1643
+                     fixation_list = fixation_list),
1617 1644
         Drivers = list(rFE$drv), ## but when doing pops, these will be repeated
1618 1645
         geneNames = list(names(getNamesID(rFE)))
1619 1646
     ))
... ...
@@ -1753,8 +1780,8 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1753 1780
     default_PDBaseline <- 1.2 * initSize
1754 1781
     default_checkSizePEvery <- 20
1755 1782
     ## No default cPDetect. That is done from p2 and n2 in C++.
1756
-    
1757
-    if((length(x) == 1) && (is.na(x))) {
1783
+    if(is_null_na(x)) {
1784
+    ## if((length(x) == 1) && (is.na(x))) {
1758 1785
         y <- vector()
1759 1786
         y["cPDetect"] <- -9
1760 1787
         y["p2"] <- 9
... ...
@@ -1779,18 +1806,18 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1779 1806
     }
1780 1807
    
1781 1808
     ## This ain't conditional. If not given, always check
1782
-    if( !is.na(x["cPDetect"]) && (sum(!is.na(x["p2"]), !is.na(x["n2"])) >= 1 ))
1809
+    if( !is_null_na(x["cPDetect"]) && (sum(!is_null_na(x["p2"]), !is_null_na(x["n2"])) >= 1 ))
1783 1810
         stop("Specify only cPDetect xor both of p2 and n2")
1784
-    if( (is.na(x["p2"]) + is.na(x["n2"])) == 1 )
1811
+    if( (is_null_na(x["p2"]) + is_null_na(x["n2"])) == 1 )
1785 1812
         stop("If you pass one of n2 or p2, you must also pass the other. ",
1786 1813
              "Otherwise, we would not know what to do.")
1787 1814
 
1788
-    if(is.na(x["PDBaseline"])) {
1815
+    if(is_null_na(x["PDBaseline"])) {
1789 1816
         x["PDBaseline"] <- default_PDBaseline
1790 1817
         if(verbosity > -1)
1791 1818
             message("\n  PDBaseline set to default value of ", default_PDBaseline, "\n")
1792 1819
         }
1793
-    if(is.na(x["checkSizePEvery"])) {
1820
+    if(is_null_na(x["checkSizePEvery"])) {
1794 1821
         x["checkSizePEvery"] <- default_checkSizePEvery
1795 1822
         if(verbosity > -1)
1796 1823
             message("\n  checkSizePEvery set to default value of ",
... ...
@@ -1800,9 +1827,9 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1800 1827
     ## If we get here, we checked consistency of whether cPDetect or p2/n2.
1801 1828
     ## Fill up with defaults the missing values
1802 1829
 
1803
-    if(is.na(x["cPDetect"])) {
1804
-        if(is.na(x["p2"])) {
1805
-            if(!is.na(x["n2"])) stop("Eh? no p2 but n2? Bug")
1830
+    if(is_null_na(x["cPDetect"])) {
1831
+        if(is_null_na(x["p2"])) {
1832
+            if(!is_null_na(x["n2"])) stop("Eh? no p2 but n2? Bug")
1806 1833
             x["n2"] <- default_n2
1807 1834
             x["p2"] <- default_p2
1808 1835
             if(verbosity > -1)
... ...
@@ -1816,14 +1843,14 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1816 1843
     if(x["PDBaseline"] < 0)
1817 1844
         stop("PDBaseline < 0")
1818 1845
     
1819
-    if(!is.na(x["n2"])) {
1846
+    if(!is_null_na(x["n2"])) {
1820 1847
         if(x["n2"] <= x["PDBaseline"])
1821 1848
             stop("n2 <= PDBaseline")
1822 1849
         if(x["p2"] >= 1) stop("p2 >= 1")
1823 1850
         if(x["p2"] <= 0) stop("p2 <= 0")
1824 1851
         x["cPDetect"] <- -9
1825 1852
     } else { ## only if x["cPDetect"] is not NA
1826
-        if(is.na(x["cPDetect"])) stop("eh? you found a bug")## paranoia
1853
+        if(is_null_na(x["cPDetect"])) stop("eh? you found a bug")## paranoia
1827 1854
         x["n2"] <- -9
1828 1855
         x["p2"] <- -9
1829 1856
         if(verbosity > -1)
... ...
@@ -1832,6 +1859,51 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1832 1859
     return(x)
1833 1860
 }
1834 1861
 
1862
+sampledGenotypes <- function(y, genes = NULL) {
1863
+    ## From a popSample object, or a matrix for that matter,
1864
+    ## show the sampled genotypes and their frequencies
1865
+    if(!is.null(genes)) {
1866
+        cols <- which(colnames(y) %in% genes )
1867
+        y <- y[, cols]
1868
+    }
1869
+    nn <- colnames(y)
1870
+    df <- data.frame(table(
1871
+        apply(y, 1, function(z) paste(nn[as.logical(z)], collapse = ", ") )
1872
+    ))
1873
+    gn <- as.character(df[, 1])
1874
+    gn[gn == ""] <- "WT"
1875
+    df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE)
1876
+    return(df)
1877
+}
1878
+
1879
+
1880
+
1881
+list_g_matches_fixed <- function(x, y) {
1882
+    ## Internal function, for testing the fixation output.
1883
+    ## x and y are vectors
1884
+
1885
+    ## x is the set of output genotypes, y the set of fixed
1886
+    ## genotypes/subset of genotypes.
1887
+
1888
+    ## Yes, this function has tests too in test.fixation.R
1889
+    
1890
+    ## All genotypes in x satisfy that they are supersets of at least one
1891
+    ## in y? That is true if, for every element in x, at least one y in
1892
+    ## that x.
1893
+
1894
+    if(is.list(y)) y <- unlist(y)
1895
+    
1896
+    y.nice <- lapply(y, nice.vector.eo, sep = ",")
1897
+    x.nice <- lapply(x, nice.vector.eo, sep = ",")
1898
+
1899
+    fu <- function(u, y.nice)
1900
+        any(unlist(lapply(y.nice, function(z) all(z %in% u))))
1901
+
1902
+    return(all(unlist(lapply(x.nice, fu, y.nice))))
1903
+
1904
+}
1905
+
1906
+
1835 1907
 
1836 1908
 ## emptyFitnessEffects <- function() {
1837 1909
 ##     list(long.rt = list(),
... ...
@@ -61,7 +61,9 @@ rfitness <- function(g, c= 0.5,
61 61
         }
62 62
         m <- cbind(m, Fitness = fi)
63 63
         if(min_accessible_genotypes > 0) {
64
-            num_accessible_genotypes <- count_accessible_g(m, accessible_th)
64
+            ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
65
+            ## Recall accessibleGenotypes includes the wt, if accessible.
66
+            num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
65 67
             if(num_accessible_genotypes >= min_accessible_genotypes) {
66 68
                 done <- TRUE
67 69
                 attributes(m) <- c(attributes(m),
... ...
@@ -89,3 +91,7 @@ create_eq_ref <- function(g) {
89 91
     ref <- c(rep(1, nm), rep(0, g - nm))
90 92
     sample(ref)
91 93
 }
94
+
95
+
96
+
97
+
... ...
@@ -1,3 +1,16 @@
1
+Changes in version 2.5.1 (2016-11-12):
2
+	- AND of detectedSizeP and lastMaxDr.
3
+	- fixation as stopping mechanism.
4
+	- sampledGenotypes in user code.
5
+	- clonePhylog et al: deal with never any descendant.
6
+	- samplePop can handle failed simulations graciously.
7
+	- summary.oncosimulpop can handle failed simulations graciously.
8
+	- accessible genotypes now done in C++.
9
+	- OcurringDrivers should not be a factor.
10
+	- samplePop always returns gene names.
11
+	- to_Magellan is much faster with rfitness objects.
12
+	- Several improvements in vignette (English and additional explanations).
13
+
1 14
 Changes in version 2.4.0 (for BioC 3.4):
2 15
 	- Mutator phenotype and gene-specific mutation rates.
3 16
 	- End simulations stochastically as a function of size.	
... ...
@@ -9,7 +22,7 @@ Changes in version 2.4.0 (for BioC 3.4):
9 22
 	- Improved test coverage.
10 23
 	- samplePop: sample at arbitrary sizes.	
11 24
 	- evalAllGenotypes: order = FALSE by default.
12
-
25
+	
13 26
 Changes in version 2.3.17 (2017-09-22):
14 27
 	- random2 for rfitness.
15 28
 	- Vignette: decrease size and running time.
... ...
@@ -114,7 +114,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
114 114
   specifiy anything if you do not want to, and you can pass an empty
115 115
   vector (as \code{character(0)}). The default has changed with respect
116 116
   to v.2.1.3 and previous: it used to be to assume that all
117
-  genes that were not in the \code{noIntGenes} were drivers. The fault
117
+  genes that were not in the \code{noIntGenes} were drivers. The default
118 118
   now is to assume nothing: if you want \code{drvNames} you have
119 119
   to specify them.
120 120
 
... ...
@@ -40,6 +40,8 @@
40 40
                 errorHitMaxTries = TRUE,
41 41
                 verbosity = 0,
42 42
                 initMutant = NULL,
43
+                AND_DrvProbExit = FALSE,
44
+                fixation = NULL,
43 45
                 seed = NULL)
44 46
 
45 47
 oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6,
... ...
@@ -61,6 +63,8 @@ oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6,
61 63
                 errorHitWallTime = TRUE,
62 64
                 errorHitMaxTries = TRUE,
63 65
                 initMutant = NULL,
66
+                AND_DrvProbExit = FALSE,
67
+                fixation = NULL,
64 68
                 verbosity = 0,
65 69
                 mc.cores = detectCores(),
66 70
                 seed = "auto")
... ...
@@ -108,6 +112,8 @@ oncoSimulSample(Nindiv,
108 112
                 typeSample = "whole",
109 113
                 thresholdWhole = 0.5,
110 114
                 initMutant = NULL,
115
+                AND_DrvProbExit = FALSE,
116
+                fixation = NULL,
111 117
                 verbosity  = 1,
112 118
                 showProgress = FALSE,
113 119
                 seed = "auto")
... ...
@@ -289,22 +295,7 @@ This option can not be used with v.1 objects.
289 295
 
290 296
 }
291 297
 
292
-\item{s}{
293
-  Selection coefficient for drivers. 
294
-    Only relevant if using a poset as this is included in the
295
-  fitnessEffects object.
296
-}
297
-\item{sh}{
298
-  Selection coefficient for drivers with restrictions not satisfied. A
299
-  value of 0 means there are no penalties for a driver appearing in a
300
-  clone when its restrictions are not satisfied.
301
-
302
-  To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1.
303
-
304
-  Only relevant if using a poset as this is included in the
305
-  fitnessEffects object.
306 298
 
307
-}
308 299
 \item{K}{
309 300
   Initial population equilibrium size in the McFarland models.
310 301
 
... ...
@@ -497,10 +488,6 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}.
497 488
 }
498 489
 
499 490
 
500
-
501
-
502
-
503
-
504 491
 \item{verbosity}{ If 0, run silently. Iincreasing values of verbosity
505 492
   provide progressively more information about intermediate steps,
506 493
   possible numerical notes/warnings from the C++ code, etc. Values less
... ...
@@ -529,6 +516,40 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}.
529 516
 \item{showProgress}{If TRUE, provide information, during exection,
530 517
 of the individual done, and the number of attempts and time used.}
531 518
 
519
+\item{AND_DrvProbExit}{If TRUE, cancer will be considered to be reached
520
+  if both the \code{detectionProb} mechanism and \code{detectionDrivers}
521
+  are satisfied. This is and AND, not an OR condition. Using this option
522
+  with \code{fixation} is not allowed (as it does not make much sense).}
523
+
524
+\item{fixation}{If non-NULL, a list or a vector, where each element of
525
+  is a string with a gene or a gene combination. Simulations will stop
526
+  as soon as any of the genes or gene combinations are fixed (i.e.,
527
+  reach frequency of one). If you pass gene combinations, separate genes
528
+  with commas (not '>'); this means order is not (yet?)  supported. This
529
+  way of specifying gene combinations is the same as the one used for
530
+  \code{initMutant} and \code{\link{evalGenotype}}.
531
+
532
+  Using this option with \code{AND_DrvProbExit} is not allowed (as it
533
+  does not make much sense). This option is not allowed either with the
534
+  old v.1 specification.}
535
+
536
+\item{s}{
537
+  Selection coefficient for drivers. 
538
+    Only relevant if using a poset as this is included in the
539
+  fitnessEffects object. This will eventually be deprecated.
540
+}
541
+\item{sh}{
542
+  Selection coefficient for drivers with restrictions not satisfied. A
543
+  value of 0 means there are no penalties for a driver appearing in a
544
+  clone when its restrictions are not satisfied.
545
+
546
+  To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1.
547
+
548
+  Only relevant if using a poset as this is included in the
549
+  fitnessEffects object. This will eventually be deprecated.
550
+
551
+}
552
+
532 553
 \item{seed}{The seed for the C++ PRNG. You can pass a value. If you set
533 554
   it to NULL, then a seed will be generated in R and passed to C++. If
534 555
   you set it to "auto", then if you are using v.1, the behavior is the
... ...
@@ -612,13 +633,13 @@ of the individual done, and the number of attempts and time used.}
612 633
 
613 634
   Detection of cancer can be a deterministic process, where cancer is
614 635
   always detected (and, thus, simulation ended) when certain conditions
615
-  are met (\code{detectionSize},
616
-  \code{detectionDrivers}). Alternatively, it can be stochastic process
617
-  where probability of detection depends on size. Every so often (see
618
-  below) we assess population size, and detect cancer or not
619
-  probabilistically (comparing the probability of detection for that
620
-  size with a random uniform number). Probability of detection changes
621
-  with population size according to the function
636
+  are met (\code{detectionSize}, \code{detectionDrivers},
637
+  \code{fixation}). Alternatively, it can be stochastic process where
638
+  probability of detection depends on size. Every so often (see below)
639
+  we assess population size, and detect cancer or not probabilistically
640
+  (comparing the probability of detection for that size with a random
641
+  uniform number). Probability of detection changes with population size
642
+  according to the function
622 643
   
623 644
   \deqn{ 1 - e^{-cPDetect (populationsize - PDBaseline)}}.
624 645
 
... ...
@@ -1,6 +1,6 @@
1 1
 \name{samplePop}
2 2
 \alias{samplePop}
3
-
3
+\alias{sampledGenotypes}
4 4
 \title{
5 5
   Obtain a sample from a population of simulations.
6 6
   
... ...
@@ -13,17 +13,26 @@
13 13
   schemes include whole tumor and single cell sampling, and sampling at
14 14
   the end of the tumor progression or during the progression of the
15 15
   disease.
16
+
17
+  
18
+  \code{sampledGenotypes} shows the genotype frequencies from that
19
+  sample. Order effects are ignored.
20
+  
16 21
 }
17 22
 
18 23
 \usage{
19 24
 samplePop(x, timeSample = "last", typeSample = "whole",
20 25
           thresholdWhole = 0.5, geneNames = NULL, popSizeSample = NULL)
26
+
27
+sampledGenotypes(y, genes = NULL)
28
+
21 29
 }
22 30
 
23 31
 \arguments{
24
-  \item{x}{
25
-    An object of class \code{oncosimulpop}.
26
-  }
32
+  \item{x}{An object of class \code{oncosimulpop} or class \code{oncosimul2} (a
33
+  single simulation).}
34
+
35
+  \item{y}{The output from a call to \code{samplePop}.}
27 36
   
28 37
   \item{timeSample}{
29 38
     "last" means to sample each individual in the very last time period
... ...
@@ -65,6 +74,17 @@ samplePop(x, timeSample = "last", typeSample = "whole",
65 74
     This allows you to specify arbitrary sampling schemes with respect
66 75
     to total population size.  }
67 76
 
77
+  \item{genes}{If non-NULL, use only the genes in \code{genes} to create
78
+  the table of genotypes. This can be useful if you only care about the
79
+  genotypes with respect to a subset of genes (say, X), and want to
80
+  collapse with respect to another subset of genes (say, Y), for
81
+  instance if Y is a large set of passenger genes. For example, suppose
82
+  the complete set of genes is 'a', 'b', 'c', 'd', and you specify
83
+  \code{genes = c('a', 'b')}; then, genotypes 'a, b, c' and genotypes
84
+  'a, b, d' will not be shown as different rows in the table of
85
+  frequencies. Likewise, genotypes 'a, c' and genotypes 'a, d' will not
86
+  be shown as different rows.}
87
+
68 88
 }
69 89
 
70 90
 \details{
... ...
@@ -135,13 +155,16 @@ p705 <- examplePosets[["p705"]]
135 155
 ##  leave it at its default value).
136 156
 
137 157
 p1 <- oncoSimulPop(4, p705, mc.cores = 2)
138
-samplePop(p1)
158
+(sp1 <- samplePop(p1))
159
+sampledGenotypes(sp1)
160
+
139 161
 
140 162
 ## Sample at fixed sizes. Notice the requested size
141 163
 ## for the last population is larger than the any population size
142 164
 ## so we get NAs
143 165
 
144
-samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13))
166
+(sp2 <- samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13)))
167
+sampledGenotypes(sp2)
145 168
 
146 169
 
147 170
 ## Now single cell sampling
... ...
@@ -149,8 +172,7 @@ samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13))
149 172
 r1 <- oncoSimulIndiv(p705)
150 173
 samplePop(r1, typeSample = "single")
151 174
 
152
-
153
-
175
+sampledGenotypes(samplePop(r1, typeSample = "single"))
154 176
 
155 177
 }
156 178
 
... ...
@@ -228,6 +228,8 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
228 228
 					const double& checkSizePEvery,
229 229
 					double& nextCheckSizeP,
230 230
 					std::mt19937& ran_gen,
231
+					const bool& AND_DrvProbExit,
232
+					const std::vector<std::vector<int> >& fixation_l,
231 233
 					const double& fatalPopSize = 1e15) {
232 234
   // Fill out, but also compute totPopSize
233 235
   // and return sample summaries for popsize, drivers.
... ...
@@ -254,7 +256,38 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
254 256
   }
255 257
   lastMaxDr = max_ndr;
256 258
 
259
+  // Until fixation done here. Recall we use an OR operation for exiting
260
+  // below.  Could be added to loop above.
261
+  // And we call allGenesinGenotype also above, inside getGenotypeDrivers.
262
+  // So room for speed ups?
257 263
   
264
+  // Since we convert each genotype to a sorted allGenesinGenotype, iterate
265
+  // over that first. Add that pop size if the combination is present in genotype.
266
+  bool fixated = false;
267
+  if(totPopSize > 0) { // Avoid silly things
268
+    if( fixation_l.size() ) {
269
+      std::vector<double> popSize_fixation(fixation_l.size());
270
+      for(size_t i = 0; i < popParams.size(); ++i) {
271
+	std::vector<int> thisg = allGenesinGenotype(Genotypes[i]);
272
+	for(size_t fc = 0; fc != popSize_fixation.size(); ++fc) {
273
+	  // Yes, fixation_l is sorted in R.
274
+	  if(std::includes(thisg.begin(), thisg.end(),
275
+			   fixation_l[fc].begin(), fixation_l[fc].end()) ) {
276
+	    popSize_fixation[fc] += popParams[i].popSize;
277
+	  }
278
+	}
279
+      }
280
+      // Any fixated? But avoid trivial of totPopSize of 0!
281
+      // Now check of > 0 is redundant as we check totPopSize > 0
282
+      double max_popSize_fixation =
283
+	*std::max_element(popSize_fixation.begin(), popSize_fixation.end());
284
+      if( (max_popSize_fixation > 0 ) &&
285
+	  (max_popSize_fixation == totPopSize)) {
286
+	fixated = true;
287
+      }
288
+    }
289
+  }
290
+    
258 291
   if (keepEvery < 0) {
259 292
     storeThis = false;
260 293
   } else if( currentTime >= (lastStoredSample + keepEvery) ) {
... ...
@@ -265,6 +298,7 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
265 298
     simulsDone = true;
266 299
   }
267 300
 
301
+  
268 302
   // FIXME
269 303
   // this is the usual exit condition
270 304
   // (totPopSize >= detectionSize) ||
... ...
@@ -287,28 +321,64 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
287 321
   } else {
288 322
     checkSizePNow = false;
289 323
   }
290
-  
291
-  if(extraTime > 0) {
292
-    if(done_at <  0) {
293
-      if( (totPopSize >= detectionSize) ||
294
-	  ( (lastMaxDr >= detectionDrivers) &&
295
-	    (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
296
-	  ( checkSizePNow &&
297
-	    detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) {
298
-	done_at = currentTime + extraTime;
324
+
325
+
326
+  if(AND_DrvProbExit) {
327
+    // The AND of detectionProb and drivers
328
+    // fixated plays no role here, and cannot be passed from R
329
+    if(extraTime > 0) {
330
+      if(done_at <  0) {
331
+	if( (lastMaxDr >= detectionDrivers) &&
332
+	    (popSizeOverDDr >= minDetectDrvCloneSz) &&
333
+	    checkSizePNow  &&
334
+	    detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) {
335
+	  done_at = currentTime + extraTime;
336
+	}
337
+      } else if (currentTime >= done_at) {
338
+  	simulsDone = true;
339
+  	reachDetection = true; 
299 340
       }
300
-    } else if (currentTime >= done_at) {
301
-	simulsDone = true;
302
-	reachDetection = true; 
341
+    } else if( (lastMaxDr >= detectionDrivers) &&
342
+	       (popSizeOverDDr >= minDetectDrvCloneSz) &&
343
+	       checkSizePNow  &&
344
+	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) {
345
+      simulsDone = true;
346
+      reachDetection = true; 
347
+    }
348
+  } else {
349
+    // The usual OR mechanism of each option
350
+    if(extraTime > 0) {
351
+      if(done_at <  0) {
352
+	if( (fixated) ||
353
+	    (totPopSize >= detectionSize) ||
354
+	    ( (lastMaxDr >= detectionDrivers) &&
355
+	      (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
356
+	    ( checkSizePNow  &&
357
+	      detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) {
358
+	  done_at = currentTime + extraTime;
359
+	}
360
+      } else if (currentTime >= done_at) {
361
+  	simulsDone = true;
362
+  	reachDetection = true; 
303 363
       }
304
-  } else if( (totPopSize >= detectionSize) ||
305
-	     ( (lastMaxDr >= detectionDrivers) &&
306
-	       (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
307
-	     ( checkSizePNow &&
308
-	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) {
309
-    simulsDone = true;
310
-    reachDetection = true; 
364
+    } else if( (fixated) ||
365
+	       (totPopSize >= detectionSize) ||
366
+	       ( (lastMaxDr >= detectionDrivers) &&
367
+		 (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
368
+	       ( checkSizePNow  &&
369
+		 detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) {
370
+      simulsDone = true;
371
+      reachDetection = true; 
372
+    }
311 373
   }
374
+
375
+  
376
+  // if( checkSizePNow && (lastMaxDr >= detectionDrivers) &&
377
+  // 	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) )  {
378
+  //   simulsDone = true;
379
+  //   reachDetection = true; 
380
+  // }
381
+  
312 382
   
313 383
   if(totPopSize >= fatalPopSize) {
314 384
     Rcpp::Rcout << "\n\totPopSize > " << fatalPopSize
... ...
@@ -736,7 +806,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
736 806
 			const std::vector<int>& full2mutator,
737 807
 			const double& cPDetect,
738 808
 			const double& PDBaseline,
739
-			const double& checkSizePEvery) {
809
+			const double& checkSizePEvery,
810
+			const bool& AND_DrvProbExit,
811
+			const std::vector< std::vector<int> >& fixation_l) {
740 812
   
741 813
   double nextCheckSizeP = checkSizePEvery;
742 814
   const int numGenes = fitnessEffects.genomeSize;
... ...
@@ -1566,7 +1638,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1566 1638
 					 PDBaseline,
1567 1639
 					 checkSizePEvery,
1568 1640
 					 nextCheckSizeP,
1569
-					 ran_gen); //keepEvery is for thinning
1641
+					 ran_gen,
1642
+					 AND_DrvProbExit,
1643
+					 fixation_l); //keepEvery is for thinning
1570 1644
       if(verbosity >= 3) {
1571 1645
 	Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() 
1572 1646
 		  << "\n totPopSize after sampling " << totPopSize << "\n";
... ...
@@ -1649,7 +1723,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1649 1723
 			double p2,
1650 1724
 			double PDBaseline,
1651 1725
 			double cPDetect_i,
1652
-			double checkSizePEvery) {
1726
+			double checkSizePEvery,
1727
+			bool AND_DrvProbExit,
1728
+			Rcpp::List fixation_i) {
1653 1729
   // double cPDetect){
1654 1730
   // double n2,
1655 1731
   // double p2,
... ...
@@ -1717,6 +1793,13 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1717 1793
     DP2(muEF.genomeSize);
1718 1794
     throw std::logic_error("full2mutator 0 with mutatorEffects.genomesize != 0");
1719 1795
   }
1796
+
1797
+  // fixation: run until some genotype combinations fixed
1798
+  std::vector < std::vector<int> > fixation_l(fixation_i.size());
1799
+  if( fixation_i.size() != 0 ) {
1800
+    fixation_l = list_to_vector_of_int_vectors(fixation_i);
1801
+  }
1802
+
1720 1803
   
1721 1804
   bool runAgain = true;
1722 1805
   bool reachDetection = false;
... ...
@@ -1797,6 +1880,10 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1797 1880
   
1798 1881
   double currentTime = 0;
1799 1882
   int iter = 0;
1883
+
1884
+  // bool AND_DrvProbExit = ( (cpDetect >= 0) &&
1885
+  // 			     (detectionDrivers < 1e9) &&
1886
+  // 			     (detectionSize < std::numeric_limits<double>::infinity()));
1800 1887
   while(runAgain) {
1801 1888
 
1802 1889
     if(numRuns >= maxNumTries) {
... ...
@@ -1872,7 +1959,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1872 1959
 		  full2mutator,
1873 1960
 		  cPDetect,
1874 1961
 		  PDBaseline,
1875
-		  checkSizePEvery);
1962
+		  checkSizePEvery,
1963
+		  AND_DrvProbExit,
1964
+		  fixation_l);
1876 1965
       ++numRuns;
1877 1966
       forceRerun = false;
1878 1967
     } catch (rerunExcept &e) {
... ...
@@ -5,18 +5,20 @@
5 5
 
6 6
 #include <R_ext/Rdynload.h>
7 7
 
8
-SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaseline, SEXP cPDetect_i, SEXP checkSizePEvery);
8
+SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP);
9 9
 SEXP OncoSimulR_BNB_Algo5(SEXP restrictTableSEXP, SEXP numDriversSEXP, SEXP numGenesSEXP, SEXP typeCBN_SEXP,  SEXP sSEXP, SEXP deathSEXP, SEXP muSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutantSEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP shSEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP);
10 10
 SEXP OncoSimulR_evalRGenotype(SEXP rGSEXP, SEXP rFESEXP, SEXP verboseSEXP, SEXP prodNegSEXP, SEXP calledBy_SEXP);
11 11
 SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SEXP full2mutator_SEXP, SEXP verboseSEXP, SEXP prodNegSEXP);
12 12
 // SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP);
13
+SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP);
13 14
 
14 15
 // The number is the number of arguments
15 16
 R_CallMethodDef callMethods[]  = {
16
-  {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 35},
17
+  {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37},
17 18
   {"OncoSimulR_BNB_Algo5", (DL_FUNC) &OncoSimulR_BNB_Algo5, 32},
18 19
   {"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5},
19
-  {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6},  
20
+  {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6},
21
+  {"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4},
20 22
   //  {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2},
21 23
   {NULL, NULL, 0}
22 24
 };
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // nr_BNB_Algo5
9
-Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery);
10
-RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP) {
9
+Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery, bool AND_DrvProbExit, Rcpp::List fixation_list);
10
+RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject __result;
13 13
     Rcpp::RNGScope __rngScope;
... ...
@@ -46,7 +46,9 @@ BEGIN_RCPP
46 46
     Rcpp::traits::input_parameter< double >::type PDBaseline(PDBaselineSEXP);
47 47
     Rcpp::traits::input_parameter< double >::type cPDetect_i(cPDetect_iSEXP);
48 48
     Rcpp::traits::input_parameter< double >::type checkSizePEvery(checkSizePEverySEXP);
49
-    __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery));
49
+    Rcpp::traits::input_parameter< bool >::type AND_DrvProbExit(AND_DrvProbExitSEXP);
50
+    Rcpp::traits::input_parameter< Rcpp::List >::type fixation_list(fixation_listSEXP);
51
+    __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list));
50 52
     return __result;
51 53
 END_RCPP
52 54
 }
... ...
@@ -123,6 +125,24 @@ BEGIN_RCPP
123 125
     return __result;
124 126
 END_RCPP
125 127
 }
128
+
129
+// evalRGenotypeAndMut
130
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th);
131
+RcppExport SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) {
132
+BEGIN_RCPP
133
+    Rcpp::RObject __result;
134
+// Rcpp::RNGScope __rngScope;
135
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP);
136
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP);
137
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP);
138
+ Rcpp::traits::input_parameter< double >::type th(thSEXP);
139
+ __result = Rcpp::wrap(accessibleGenotypes(y, f, numMut, th));
140
+ return __result;
141
+ END_RCPP
142
+}
143
+
144
+
145
+
126 146
 // // readFitnessEffects
127 147
 // void readFitnessEffects(Rcpp::List rFE, bool echo);
128 148
 // RcppExport SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP) {
129 149
new file mode 100644
... ...
@@ -0,0 +1,172 @@
1
+//     Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2
+
3
+//     This program is free software: you can redistribute it and/or modify
4
+//     it under the terms of the GNU General Public License as published by
5
+//     the Free Software Foundation, either version 3 of the License, or
6
+//     (at your option) any later version.
7
+
8
+//     This program is distributed in the hope that it will be useful,
9
+//     but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+//     GNU General Public License for more details.
12
+
13
+//     You should have received a copy of the GNU General Public License
14
+//     along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+#include <Rcpp.h>
17
+// using namespace Rcpp;
18
+
19
+inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVector& y) {
20
+  Rcpp::NumericVector diff = Rcpp::abs( x - y );
21
+  return std::accumulate(diff.begin(), diff.end(), 0);
22
+}
23
+
24
+
25
+
26
+// [[Rcpp::export]]
27
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
28
+					Rcpp::NumericVector f,
29
+					Rcpp::IntegerVector numMut, //
30
+					double th) {
31
+  // Return just the indices. Could preserve fitness, but would need
32
+  // another matrix.
33
+  int ng = y.nrow(); //it counts the wt
34
+  Rcpp::IntegerMatrix adm(ng, ng);
35
+  int numMutdiff = 0;
36
+  // I would have thought this would be faster. It ain't.
37
+  // The last genotype never accesses anything.
38
+  // for(int i = 0; i < (ng - 1); ++i) {
39
+  //   // Candidate genotypes to be accessed from i are always of larger
40
+  //   // mutation by 1. And candidates can thus not have smaller index
41
+  //   for(int j = (i + 1); j < ng; ++j) {
42
+  //     if( (numMut(j) == (numMut(i) + 1)) &&
43
+  // 	  ( (f(j) - f(i)) >= th) &&
44
+  // 	  (HammingDistance(y(i, _), y(j, _)) == 1) ) {
45
+  // 	adm(i, j) = 1;
46
+  //     } else if( (numMut(j) > (numMut(i) + 1)) ) {
47
+  // 	break;
48
+  //     }
49
+  //   }
50
+  // }
51
+
52
+  // The last genotype never accesses anything.
53
+  for(int i = 0; i < (ng - 1); ++i) {
54
+    // Candidate genotypes to be accessed from i are always of larger
55
+    // mutation by 1. And candidates can thus not have smaller index
56
+    for(int j = (i + 1); j < ng; ++j) {
57
+      numMutdiff = numMut(j) - numMut(i);
58
+      if( numMutdiff > 1) { // no more to search
59
+  	break; 
60
+      } else if(numMutdiff == 1) {
61
+  	// f(j) - f(i) is faster than HammingDistance
62
+  	// but might lead to more evals?
63
+  	// or fewer, depending on landscape
64
+  	if( ( (f(j) - f(i)) >= th) &&
65
+	    (HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1)
66
+  	    ) {
67
+  	  adm(i, j) = 1;
68
+	  // Rcpp::Rcout << "i = " << i << " j = " << j << " adm " << adm(i,j) << "\n"; 
69
+  	}
70
+      }
71
+    }
72
+  }
73
+
74
+
75
+
76
+  // Slightly different logic from R: Do not resize object; set the row to
77
+  // 0.
78
+  int colsum = 0;
79
+  // int indicator = 0; // indicator != 0 means we set one row to 0
80
+  // so we need to iterate at least once more.
81
+  
82
+  // accessible is the genotype number, not the column!  WT is 1,
83
+  // etc. This makes it easy to keep track of which are accessible.
84
+  Rcpp::IntegerVector accessible = Rcpp::seq_len(ng);
85
+
86
+  // This is doable in one pass
87
+  // while (true) {
88
+  //   indicator = 0;
89
+    for(int k = 1; k < ng; ++k) {
90
+      if(accessible(k) > 0) {
91
+	colsum = std::accumulate(adm(Rcpp::_, k).begin(),
92
+				 adm(Rcpp::_, k).end(), 0);
93
+	if(colsum == 0) { // This genotype ain't reachable
94
+	  // Nothing can be reached from this genotype; fill with 0.
95
+	  adm(k, Rcpp::_) = Rcpp::IntegerVector(ng);
96
+	  accessible(k) = -9;
97
+	  // indicator = 1;
98
+	}
99
+      }
100
+    }
101
+  //   if(indicator == 0) break;
102
+  // }
103
+  return accessible;
104
+}
105
+
106
+
107
+
108
+
109
+// // This would make it easier returning the actual accessible genotypes easily
110
+// // preserving the fitness if needed
111
+// // Not being used now
112
+// // [[Rcpp::export]]
113
+// IntegerVector acc_ge(Rcpp::IntegerMatrix y, Rcpp::NumericVector f,
114
+// 		     Rcpp::IntegerVector numMut,
115
+// 		     int ng, //it counts the wt
116
+// 		     double th) {
117
+  
118
+//   IntegerMatrix adm(ng, ng);
119
+//   int numMutdiff = 0;
120
+  
121
+//   for(int i = 0; i < (ng - 1); ++i) {
122
+//     // Candidates are always of larger mutation by 1
123
+//     for(int j = (i + 1); j < ng; ++j) {
124
+//       numMutdiff = numMut(j) - numMut(i);
125
+//       if(numMutdiff > 1) { // no more to search
126
+// 	break; 
127
+//       } else if(numMutdiff == 1) {
128
+// 	if( ( (f(j) - f(i)) >= th) &&
129
+// 	    (HammingDistance(y(i, _), y(j, _)) == 1) ) {
130
+// 	  adm(i, j) = 1;
131
+// 	}
132
+//       }
133
+//     }
134
+//   }
135
+//   // Keeps root in Rows
136
+//   IntegerMatrix admtmp = adm(Range(0, ng - 1), Range(1, ng - 1));
137
+
138
+//   // Slightly different logic from R: Do not resize object; set the row to
139
+//   // 0.
140
+//   int colsum = 0;
141
+//   int indicator = 0; // indicator != 0 means we set one row to 0
142
+//   // so we need to iterate at least once more.
143
+  
144
+//   // accessible is the genotype number, not the column!  WT is 1,
145
+//   // etc. This makes it easy to keep track of which are accessible.
146
+//   IntegerVector accessible = seq_len(ng - 1) + 1;
147
+  
148
+//   while (true) {
149
+//     indicator = 0;
150
+//     for(int k = 0; k < (ng - 1); ++k) {
151
+//       if(accessible(k) > 0) {
152
+// 	colsum = std::accumulate(admtmp(_, k).begin(),
153
+// 				 admtmp(_, k).end(), 0);
154
+// 	if(colsum == 0) { // This genotype ain't reachable
155
+// 	  // Recall row keeps Root.
156
+// 	  // Nothing can be reached from this genotype; fill with 0.
157
+// 	  admtmp(k + 1, _) = IntegerVector(ng - 1);
158
+// 	  accessible(k) = -9;
159
+// 	  indicator = 1;
160
+// 	}
161
+//       }
162
+//     }
163
+//     if(indicator == 0) break;
164
+//   }
165
+//   return accessible;
166
+// }
167
+
168
+
169
+
170
+
171
+
172
+
... ...
@@ -1332,6 +1332,33 @@ double mutationFromScratch(const std::vector<double>& mu,
1332 1332
 }
1333 1333
 
1334 1334
 
1335
+std::vector < std::vector<int> > list_to_vector_of_int_vectors(Rcpp::List vlist) {
1336
+  // As it says. We check each vector is sorted!
1337
+  std::vector < std::vector<int> > vv(vlist.size());
1338
+  for(int i = 0; i != vlist.size(); ++i) {
1339
+    vv[i] = Rcpp::as<std::vector<int> >(vlist[i]);
1340
+    if( ! is_sorted(vv[i].begin(), vv[i].end()) )
1341
+      throw std::logic_error("Fixation genotypes not sorted. Bug in R code.");
1342
+  }
1343
+  return vv;
1344
+}
1345
+
1346
+// // [[Rcpp::export]]
1347
+// void wrap_list_to_vector_of_int_vectors(Rcpp::List vlist) {
1348
+//   std::vector < std::vector<int> > vo(vlist.size());
1349
+//   vo = list_to_vector_of_int_vectors(vlist);
1350
+//   for(int ii = 0; ii != vo.size(); ++ii) {
1351
+//     Rcpp::Rcout << "\n";
1352
+//     Rcpp::Rcout << " list position " << ii + 1 << ": ";
1353
+//     for(int jj = 0; jj != vo[ii].size(); ++jj ) {
1354
+//       Rcpp::Rcout << vo[ii][jj] << " ";
1355
+//     }
1356
+//   }
1357
+//   Rcpp::Rcout << "\n";
1358
+// }
1359
+
1360
+
1361
+
1335 1362
 // Wrong when/if there are mutator effects. For suppose there wre, and
1336 1363
 // they affected the parent, but no new mutator gene affects the child.
1337 1364
 // We will, however, multiply twice by the mutator effect.  Therefore, we
... ...
@@ -215,6 +215,7 @@ std::vector<double> evalGenotypeFitness(const Genotype& ge,
215 215
 
216 216
 fitnessEffectsAll convertFitnessEffects(Rcpp::List rFE);
217 217
 std::vector<int> getGenotypeDrivers(const Genotype& ge, const std::vector<int>& drv);
218
+std::vector<int> allGenesinGenotype(const Genotype& ge);
218 219
 void print_Genotype(const Genotype& ge);
219 220
 
220 221
 fitness_as_genes fitnessAsGenes(const fitnessEffectsAll& fe);
... ...
@@ -254,6 +255,6 @@ double set_cPDetect(const double n2, const double p2,
254 255
 bool detectedSizeP(const double n, const double cPDetect,
255 256
 		   const double PDBaseline, std::mt19937& ran_gen);
256 257
 
257
-
258
+std::vector < std::vector<int> > list_to_vector_of_int_vectors(Rcpp::List vlist);
258 259
 #endif
259 260
 
260 261
new file mode 100644
... ...
@@ -0,0 +1,240 @@
1
+## This is not to be run by default in BioC servers, only
2
+## on our, or user's, machines
3
+## But using all cores might be bad, and also precludes my logic
4
+## of launching many at the same time.
5
+cat("\n Starting long fixation  at", date(), "\n") 
6
+test_that("Check output is correct", {
7
+    initS <- 200
8
+    u <- 0.2; i <- -0.02; vi <- 0.6; ui <- uv <- -Inf
9
+    od <- allFitnessEffects(
10
+        epistasis = c("u" = u,  "u:i" = ui,
11
+                      "u:v" = uv, "i" = i,
12
+                      "v:-i" = -Inf, "v:i" = vi))
13
+    ## drvNames = c("u", "v"))
14
+    odm <- allMutatorEffects(noIntGenes = c("i" = 50))
15
+    evalAllGenotypesFitAndMut(od, odm, addwt = TRUE)
16
+    initS <- 2000
17
+    cat("\n\n ************** fixation 1:  ***********\n")
18
+    op <- oncoSimulPop(2000, od, muEF = odm, model = "McFL",
19
+                        mu = 1e-4, 
20
+                        onlyCancer = TRUE, finalTime = 5000, detectionSize = NA, detectionProb = NA,
21
+                        initSize = initS, 
22
+                        keepEvery = NA,
23
+                        fixation = c("u", "v"),
24
+                        mc.cores = 2
25
+                        )
26
+    sp <- samplePop(op)
27
+    rsop <- rowSums(sp)
28
+    stopifnot(all(rsop >= 1))
29
+    sg <- sampledGenotypes(sp)
30
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("u", "v")))
31
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
32
+    cat("\n\n ************** fixation 2:  ***********\n")
33
+    op <- oncoSimulPop(2000, od, muEF = odm, model = "McFL",
34
+                        mu = 1e-4, 
35
+                        onlyCancer = TRUE, finalTime = NA, detectionSize = NA, detectionProb = NA,
36
+                        initSize = initS, 
37
+                        keepEvery = NA,
38
+                        fixation = c("u", "i"),
39
+                        mc.cores = 2
40
+                        )
41
+    sp <- samplePop(op)
42
+    rsop <- rowSums(sp)
43
+    stopifnot(all(rsop >= 1))
44
+    sg <- sampledGenotypes(sp)
45
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("u", "i")))
46
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("ui")))
47
+    ## this takes much longer, of course, so only 100
48
+    ## and increase mu and its fitness
49
+    u <- 0.8; i <- -0.02; vi <- 0.6; ui <- uv <- -Inf
50
+    od <- allFitnessEffects(
51
+        epistasis = c("u" = u,  "u:i" = ui,
52
+                      "u:v" = uv, "i" = i,
53
+                      "v:-i" = -Inf, "v:i" = vi))
54
+    ## drvNames = c("u", "v"))
55
+    odm <- allMutatorEffects(noIntGenes = c("i" = 50))
56
+    evalAllGenotypesFitAndMut(od, odm, addwt = TRUE)
57
+    initS <- 2000
58
+    cat("\n\n ************** fixation 3:  ***********\n")    
59
+    op <- oncoSimulPop(2000, od, muEF = odm, model = "McFL",
60
+                        mu = 1e-3, 
61
+                        onlyCancer = TRUE, finalTime = 5000, detectionSize = NA, detectionProb = NA,
62
+                        initSize = initS,
63
+                        keepEvery = NA,
64
+                        fixation = c("u"),
65
+                        mc.cores = 2
66
+                        )
67
+    sp <- samplePop(op)
68
+    rsop <- rowSums(sp)
69
+    stopifnot(all(rsop >= 1))
70
+    sg <- sampledGenotypes(sp)
71
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("u")))
72
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
73
+    ## this takes much longer, of course, 
74
+    ## and increase mu and its fitness
75
+    u <- 0.8; i <- -0.02; vi <- 2.6; ui <- uv <- -Inf
76
+    od <- allFitnessEffects(
77
+        epistasis = c("u" = u,  "u:i" = ui,
78
+                      "u:v" = uv, "i" = i,
79
+                      "v:-i" = -Inf, "v:i" = vi))
80
+    ## drvNames = c("u", "v"))
81
+    odm <- allMutatorEffects(noIntGenes = c("i" = 50))
82
+    evalAllGenotypesFitAndMut(od, odm, addwt = TRUE)
83
+    initS <- 50
84
+    cat("\n\n ************** fixation 4:  ***********\n")    
85
+    op <- oncoSimulPop(200, od, muEF = odm, model = "McFL",
86
+                       mu = 1e-3, 
87
+                       onlyCancer = TRUE, finalTime = 1000, detectionSize = NA, detectionProb = NA,
88
+                       initSize = initS,
89
+                       keepEvery = NA,
90
+                       max.num.tries = 5000,
91
+                       fixation = c("i"),
92
+                       mc.cores = 2
93
+                        )
94
+    sp <- samplePop(op)
95
+    rsop <- rowSums(sp)
96
+    stopifnot(all(rsop >= 1))
97
+    sg <- sampledGenotypes(sp)
98
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("i")))
99
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("iv")))
100
+    ## very slow
101
+    cat("\n\n ************** fixation 5:  ***********\n")    
102
+    op <- oncoSimulPop(200, od, model = "McFL",
103
+                       mu = 1e-3, 
104
+                       onlyCancer = TRUE, finalTime = 1000, detectionSize = NA, detectionProb = NA,
105
+                       initSize = 50,
106
+                       keepEvery = NA,
107
+                       fixation = c("v"),
108
+                       max.num.tries = 10000,
109
+                       mc.cores = 2
110
+                       )
111
+    sp <- samplePop(op)
112
+    rsop <- rowSums(sp)
113
+    stopifnot(all(rsop >= 1))
114
+    sg <- sampledGenotypes(sp)
115
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("v")))
116
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("iv")))
117
+    initS <- 100
118
+    u <- 0.2; i <- -0.02; vi <- 0.6; ui <- uv <- 1.2
119
+    od <- allFitnessEffects(
120
+        epistasis = c("u" = u,  "u:i" = ui,
121
+                      "u:v" = uv, "i" = i,
122
+                      "v:i" = vi))
123
+    odm <- allMutatorEffects(noIntGenes = c("i" = 50))
124
+    evalAllGenotypesFitAndMut(od, odm, addwt = TRUE)
125
+    initS <- 1000
126
+    cat("\n\n ************** fixation 6:  ***********\n")    
127
+    op <- oncoSimulPop(2000, od, muEF = odm, model = "McFL",
128
+                        mu = 1e-3, 
129
+                        onlyCancer = TRUE, finalTime = 5000, detectionSize = NA, detectionProb = NA,
130
+                        initSize = initS,
131
+                        keepEvery = NA,
132
+                        fixation = c("u, v"),
133
+                        mc.cores = 2
134
+                        )
135
+    sp <- samplePop(op)
136
+    rsop <- rowSums(sp)
137
+    stopifnot(all(rsop >= 1))
138
+    sg <- sampledGenotypes(sp)
139
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("u,v")))
140
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
141
+    cat("\n\n ************** fixation 7:  ***********\n")    
142
+    op <- oncoSimulPop(1000, od, model = "McFL",
143
+                        mu = 1e-3, 
144
+                        onlyCancer = TRUE, finalTime = 1000, detectionSize = NA, detectionProb = NA,
145
+                        initSize = initS,
146
+                        keepEvery = NA,
147
+                        fixation = c("u,v"),
148
+                        mc.cores = 2
149
+                        )
150
+    sp <- samplePop(op)
151
+    rsop <- rowSums(sp)
152
+    stopifnot(all(rsop >= 1))
153
+    sg <- sampledGenotypes(sp)
154
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("u,v")))
155
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
156
+    cat("\n\n ************** fixation 8:  ***********\n")    
157
+    op <- oncoSimulPop(200, od, model = "McFL",
158
+                        mu = 1e-3, 
159
+                        onlyCancer = TRUE, finalTime = 1000, detectionSize = NA, detectionProb = NA,
160
+                        initSize = 30,
161
+                        keepEvery = NA,
162
+                        fixation = c("i,v"),
163
+                        mc.cores = 2
164
+                       )
165
+    sp <- samplePop(op)
166
+    rsop <- rowSums(sp)
167
+    stopifnot(all(rsop >= 1))
168
+    sg <- sampledGenotypes(sp)
169
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("i,v")))
170
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
171
+
172
+    
173
+    cat("\n\n ************** fixation 9:  ***********\n")    
174
+    op <- oncoSimulPop(2000, od, model = "McFL",
175
+                       mu = 1e-3, 
176
+                       onlyCancer = TRUE, finalTime = NA, detectionSize = NA, detectionProb = NA,
177
+                       initSize = initS,
178
+                       keepEvery = NA,
179
+                       fixation = c("u, i, v"),
180
+                       mc.cores = 2
181
+                       )
182
+    sp <- samplePop(op)
183
+    rsop <- rowSums(sp)
184
+    stopifnot(all(rsop >= 1))
185
+    sg <- sampledGenotypes(sp)
186
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("v, i, u")))
187
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
188
+
189
+
190
+    
191
+    u <- 0.2; i <- -0.02; vi <- 1.6; ui <- uv <- -Inf
192
+    od <- allFitnessEffects(
193
+        epistasis = c("u" = u,  "u:i" = ui,
194
+                      "u:v" = uv, "i" = i,
195
+                      "v:i" = vi))
196
+    cat("\n\n ************** fixation 10:  ***********\n")    
197
+    op <- oncoSimulPop(2000, od, model = "McFL",
198
+                       mu = 1e-3, 
199
+                       onlyCancer = TRUE, finalTime = NA, detectionSize = NA, detectionProb = NA,
200
+                       initSize = initS,
201
+                       keepEvery = NA,
202
+                       fixation = c("u", "i", "v"),
203
+                       mc.cores = 2
204
+                       )
205
+    sp <- samplePop(op)
206
+    rsop <- rowSums(sp)
207
+    stopifnot(all(rsop >= 1))
208
+    sg <- sampledGenotypes(sp)
209
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("v", "i", "u")))
210
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
211
+
212
+
213
+    u <- 0.2; i <- -0.02; vi <- 1.6; 
214
+    od <- allFitnessEffects(
215
+        epistasis = c("u" = u,  "u:i" = -.9, "v" = 1.2,
216
+                      "u:v" = 2, "i" = i,
217
+                      "v:i" = vi, "u:i:v" = 10))
218
+    evalAllGenotypes(od, addwt = TRUE)
219
+    cat("\n\n ************** fixation 11:  ***********\n")    
220
+    op <- oncoSimulPop(2000, od, model = "McFL",
221
+                       mu = 1e-3, 
222
+                       onlyCancer = TRUE, finalTime = NA, detectionSize = NA, detectionProb = NA,
223
+                       initSize = 100,
224
+                       keepEvery = NA,
225
+                       fixation = c("u,i,v"),
226
+                       mc.cores = 2
227
+                       )
228
+    sp <- samplePop(op)
229
+    rsop <- rowSums(sp)
230
+    stopifnot(all(rsop >= 1))
231
+    sg <- sampledGenotypes(sp)
232
+    expect_true(list_g_matches_fixed(sg[, "Genotype"], c("v, i, u")))
233
+    expect_false(list_g_matches_fixed(sg[, "Genotype"], c("uv")))
234
+
235
+
236
+
237
+    
238
+})
239
+cat("\n Ending long fixation  at", date(), "\n") 
240
+
... ...
@@ -160,4 +160,5 @@ test_that("exercise mu > 1, new format", {
160 160
 
161 161
 
162 162
 
163
+
163 164
 set.seed(NULL)
164 165
new file mode 100644
... ...
@@ -0,0 +1,18 @@
1
+test_that("We obtain same accessible genotypes with different functions", {
2
+    ## More likely to catch bugs if many iters, rather than large matrices
3
+    niter <- 100
4
+    for(i in 1:niter) {
5
+        ## cat("\n i   iteration fast accessible comp ", i)
6
+        for(ng in 2:6) {
7
+            rtmp <- rfitness(ng)
8
+            a1 <- OncoSimulR:::faster_accessible_genotypes_R(rtmp, 0)
9
+            ajm <- OncoSimulR:::genot_to_adj_mat(rtmp)
10
+            a2 <- colnames(OncoSimulR:::filter_inaccessible(ajm, 0))
11
+            a3 <- OncoSimulR:::wrap_accessibleGenotypes(rtmp, 0)
12
+            stopifnot(identical(as.integer(a1), a3))
13
+            stopifnot(identical(as.integer(a2), a3))
14
+            stopifnot(all(a2 ==  a3))
15
+            stopifnot(identical(a2, as.character(a3)))
16
+        }
17
+    } 
18
+})
... ...
@@ -98,6 +98,7 @@ test_that("exercising plotClonePhylog", {
98 98
               plotClonePhylog(tmp, N = 0, timeEvents = TRUE)
99 99
           })
100 100
 
101
+
101 102
 ## the next is slightly slow
102 103
 test_that("exercising the fitnessEffects plotting code", {
103 104
               data(examplesFitnessEffects)
... ...
@@ -487,5 +488,43 @@ test_that("exercise single clone and single driver", {
487 488
 ## plot.stacked(1:2, log10(cbind(c(5, 1), c(5, 11))))
488 489
 ## plot.stacked(1:2, log10(cbind(c(6, 2), c(8, 14))))
489 490
 
491
+
492
+
493
+
494
+test_that("exercising phylogClone", {
495
+    ## Testing an internal function
496
+    data(examplesFitnessEffects)
497
+    for(i in 1:15){ 
498
+              tmp <-  oncoSimulIndiv(examplesFitnessEffects[["o3"]],
499
+                                     model = "McFL", 
500
+                                     mu = 5e-6,
501
+                                     detectionSize = 1e8, 
502
+                                     detectionDrivers = 3,
503
+                                     sampleEvery = 0.03, 
504
+                                     max.num.tries = 10,
505
+                                     keepEvery = 15,
506
+                                     initSize = 20,
507
+                                     finalTime = 1,
508
+                                     onlyCancer = FALSE,
509
+                                     keepPhylog = TRUE)
510
+              OncoSimulR:::phylogClone(tmp)
511
+    }
512
+    for(i in 1:15){ 
513
+        tmp <-  oncoSimulIndiv(examplesFitnessEffects[["cbn2"]],
514
+                               model = "McFL", 
515
+                               mu = 5e-6,
516
+                               detectionSize = 1e8, 
517
+                               detectionDrivers = 3,
518
+                               sampleEvery = 0.03, 
519
+                               max.num.tries = 10,
520
+                               keepEvery = 15,
521
+                               initSize = 20,
522
+                               finalTime = 1,
523
+                               onlyCancer = FALSE,
524
+                               keepPhylog = TRUE)
525
+        OncoSimulR:::phylogClone(tmp)
526
+    }
527
+})
528
+
490 529
 cat(paste("\n Ending exercise-plotting-code at", date()))
491 530
 
... ...
@@ -20,3 +20,23 @@ test_that("Expect output", {
20 20
                                  min_accessible_genotypes = 6)), "Fitness",
21 21
                   fixed = TRUE)
22 22
 })
23
+
24
+
25
+test_that("Minimal tests of generate_matrix_genotypes", {
26
+    ## By induction, if it works for the few first, should work for all,
27
+    ## unless memory issues. And if we go beyond, say, 10 or 12, it can
28
+    ## take long in slow machines.
29
+    for(i in 1:13) {
30
+        tmp <- OncoSimulR:::generate_matrix_genotypes(i)
31
+        stopifnot(nrow(tmp) == (2^i))
32
+        stopifnot(ncol(tmp) == i)
33
+        cstmp <- colSums(tmp)
34
+        lucstmp <- unique(cstmp)
35
+        stopifnot(length(lucstmp) == 1)
36
+        stopifnot(lucstmp[1] == ((2^i)/2)) ## yes, 2^(i - 1) but do full
37
+        ## simple logic
38
+        rm(tmp)
39
+        rm(cstmp)
40
+        rm(lucstmp)
41
+    }
42
+})
23 43
new file mode 100644
... ...
@@ -0,0 +1,430 @@
1
+cat("\n Starting fixation  at", date(), "\n") ## about 4 seconds
2
+test_that("Minimal run", {
3
+    initS <- 2000
4
+    u <- 0.2; i <- -0.02; vi <- 0.6; ui <- uv <- -Inf
5
+    od <- allFitnessEffects(
6
+        epistasis = c("u" = u,  "u:i" = ui,
7
+                      "u:v" = uv, "i" = i,
8
+                      "v:-i" = -Inf, "v:i" = vi))
9
+    ## drvNames = c("u", "v"))
10
+    odm <- allMutatorEffects(noIntGenes = c("i" = 50))
11
+    ## evalAllGenotypesFitAndMut(od, odm, addwt = TRUE)
12
+    initS <- 1000
13
+    ## these should all run
14
+    oncoSimulIndiv(od, muEF = odm, model = "McFL",
15
+                   mu = 1e-4, 
16
+                   onlyCancer = TRUE, finalTime = 15000, detectionSize = NA, detectionProb = NA,
17
+                   initSize = initS, 
18
+                   keepEvery = NA,
19
+                   fixation = c("u", "v")
20
+                   )
21
+    oncoSimulIndiv(od, muEF = odm, model = "McFL",
22
+                   mu = 1e-4, 
23
+                   onlyCancer = TRUE, finalTime = 15000, detectionSize = NA, detectionProb = NA,
24
+                   initSize = initS, 
25
+                   keepEvery = NA,
26
+                   fixation = list("u", "v")
27
+                   )
28
+    oncoSimulPop(2, od, muEF = odm, model = "McFL",
29
+                 mu = 1e-4, 
30
+                 onlyCancer = TRUE, finalTime = 15000, detectionSize = NA, detectionProb = NA,
31
+                 initSize = initS, 
32
+                 keepEvery = NA,
33
+                 fixation = c("u", "v"),
34
+                 mc.cores = 2
35
+                 )
36
+    oncoSimulPop(2, od, muEF = odm, model = "McFL",
37
+                 mu = 1e-4, 
38
+                 onlyCancer = TRUE, finalTime = 15000, detectionSize = NA, detectionProb = NA,
39
+                 initSize = initS, 
40
+                 keepEvery = NA,
41
+                 fixation = list("u", "v"),
42
+                 mc.cores = 2
43
+                 )
44
+    oncoSimulSample(2, od, muEF = odm, model = "McFL",
45
+                    mu = 1e-4, 
46
+                    onlyCancer = TRUE, finalTime = 15000, detectionSize = NA, detectionProb = NA,
47
+                    initSize = 100, 
48
+                    fixation = c("u", "v")
49
+                    )
50
+    ## A gene without fitness effects