Browse code

v.2.5.1. \n - much faster accessible genots \n - AND of drivers and size \n - fixation \n - doc. improvements

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 13/11/2016 09:46:27
Showing 29 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.5.0
5
-Date: 2016-09-22
4
+Version: 2.5.1
5
+Date: 2016-11-12
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -9,7 +9,8 @@ export("oncoSimulPop", "oncoSimulIndiv", "samplePop",
9 9
        "evalAllGenotypesFitAndMut",
10 10
        "rfitness",
11 11
        "plotFitnessLandscape",
12
-       "to_Magellan"
12
+       "to_Magellan",
13
+       "sampledGenotypes"
13 14
        )
14 15
 
15 16
 S3method(plot, oncosimul)
... ...
@@ -47,7 +48,8 @@ importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate",
47 48
            "filter")
48 49
 importFrom("smatr", "ma") ## for major axis regression in some tests
49 50
 importFrom("car", "linearHypothesis")
50
-
51
+## importFrom("slam", "simple_triplet_zero_matrix", ## "colapply_simple_triplet_matrix",
52
+##            "col_sums")
51 53
 
52 54
 
53 55
 
... ...
@@ -63,6 +63,8 @@ oncoSimulSample <- function(Nindiv,
63 63
                             typeSample = "whole",
64 64
                             thresholdWhole = 0.5,
65 65
                             initMutant = NULL,
66
+                            AND_DrvProbExit = FALSE,
67
+                            fixation = NULL,
66 68
                             verbosity  = 1,
67 69
                             showProgress = FALSE,
68 70
                             seed = "auto"){
... ...
@@ -190,7 +192,9 @@ oncoSimulSample <- function(Nindiv,
190 192
                                initMutant = initMutant,
191 193
                                keepPhylog = keepPhylog,
192 194
                                mutationPropGrowth = mutationPropGrowth,
193
-                               detectionProb = detectionProb)        
195
+                               detectionProb = detectionProb,
196
+                               AND_DrvProbExit = AND_DrvProbExit,
197
+                               fixation = fixation)        
194 198
         if(tmp$other$UnrecoverExcept) {
195 199
             return(f.out.unrecover.except(tmp))
196 200
         }
... ...
@@ -291,7 +295,7 @@ samplePop <- function(x, timeSample = "last",
291 295
                             popSizeSample = popSizeSample)
292 296
         dim(z) <- c(1, length(z))
293 297
         if(is.null(gN) && (!is.null(x$geneNames)))
294
-            gN <- geneNames
298
+            gN <- x$geneNames
295 299
     }
296 300
     message("\n Subjects by Genes matrix of ",
297 301
         nrow(z), " subjects and ",
... ...
@@ -338,6 +342,8 @@ oncoSimulPop <- function(Nindiv,
338 342
                          errorHitWallTime = TRUE,
339 343
                          errorHitMaxTries = TRUE,
340 344
                          initMutant = NULL,
345
+                         AND_DrvProbExit = FALSE,
346
+                         fixation = NULL,
341 347
                          verbosity  = 0,
342 348
                          mc.cores = detectCores(),
343 349
                          seed = "auto") {
... ...
@@ -379,7 +385,9 @@ oncoSimulPop <- function(Nindiv,
379 385
                         seed = seed, keepPhylog = keepPhylog,
380 386
                         initMutant = initMutant,
381 387
                         mutationPropGrowth = mutationPropGrowth,
382
-                        detectionProb = detectionProb),
388
+                        detectionProb = detectionProb,
389
+                        AND_DrvProbExit = AND_DrvProbExit,
390
+                        fixation = fixation),
383 391
                     mc.cores = mc.cores
384 392
                     )
385 393
     class(pop) <- "oncosimulpop"
... ...
@@ -422,16 +430,33 @@ oncoSimulIndiv <- function(fp,
422 430
                            errorHitMaxTries = TRUE,
423 431
                            verbosity = 0,
424 432
                            initMutant = NULL,
433
+                           AND_DrvProbExit = FALSE,
434
+                           fixation = NULL,
425 435
                            seed = NULL
426 436
                            ) {
427 437
     call <- match.call()
428
-    if(all(c(is.na(detectionProb),
429
-             is.na(detectionSize),
430
-             is.na(detectionDrivers),
431
-             is.na(finalTime))))
438
+    if(all(c(is_null_na(detectionProb),
439
+             is_null_na(detectionSize),
440
+             is_null_na(detectionDrivers),
441
+             is_null_na(finalTime),
442
+             is_null_na(fixation)
443
+             )))
432 444
         stop("At least one stopping condition should be given.",
433 445
              " At least one of detectionProb, detectionSize, detectionDrivers,",
434
-             " finalTime. Otherwise, we'll run forever.")
446
+             " finalTime. Otherwise, we'll run until aborted by max.wall.time,",
447
+             " max.num.tries, and the like.")
448
+
449
+    if(AND_DrvProbExit && (is_null_na(detectionProb) || is_null_na(detectionDrivers)))
450
+        stop("AND_DrvProbExit is TRUE: both of detectionProb and detectionDrivers",
451
+             " must be non NA.")
452
+    if(AND_DrvProbExit && !is_null_na(detectionSize)) {
453
+        warning("With AND_DrvProbExit = TRUE, detectionSize is ignored.")
454
+        detectionSize <- NA
455
+    }
456
+    if(inherits(fp, "fitnessEffects")) {
457
+        s <- sh <- NULL ## force it soon!
458
+    }
459
+
435 460
     ## legacies from poor name choices
436 461
     typeFitness <- switch(model,
437 462
                           "Bozic" = "bozic1",
... ...
@@ -467,7 +492,8 @@ oncoSimulIndiv <- function(fp,
467 492
         if(model %in% c("Bozic", "Exp") )
468 493
             minDetectDrvCloneSz <- 0
469 494
         else if (model %in% c("McFL", "McFarlandLog"))
470
-            minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
495
+            minDetectDrvCloneSz <- initSize
496
+        ## minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
471 497
         else
472 498
             stop("Unknown model")
473 499
     }
... ...
@@ -484,7 +510,7 @@ oncoSimulIndiv <- function(fp,
484 510
     ## No user-visible magic numbers
485 511
     ## if(is.null(keepEvery))
486 512
     ##     keepEvery <- -9
487
-    if(is.na(keepEvery)) keepEvery <- -9
513
+    if(is_null_na(keepEvery)) keepEvery <- -9
488 514
 
489 515
     
490 516
     if( (keepEvery > 0) & (keepEvery < sampleEvery)) {
... ...
@@ -499,10 +525,11 @@ oncoSimulIndiv <- function(fp,
499 525
     if( (typeFitness == "exp") && (death != 1) )
500 526
         warning("Using fitness exp with death != 1")
501 527
 
502
-
503
-    if(is.na(detectionDrivers)) detectionDrivers <- (2^31) - 1
504
-    if(is.na(detectionSize)) detectionSize <- Inf
505
-    if(is.na(finalTime)) finalTime <- Inf
528
+    if(!is_null_na(detectionDrivers) && (detectionDrivers >= 1e9))
529
+        stop("detectionDrivers > 1e9; this doesn't seem reasonable")
530
+    if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1
531
+    if(is_null_na(detectionSize)) detectionSize <- Inf
532
+    if(is_null_na(finalTime)) finalTime <- Inf
506 533
     
507 534
     
508 535
     if(!inherits(fp, "fitnessEffects")) {
... ...
@@ -515,12 +542,18 @@ oncoSimulIndiv <- function(fp,
515 542
             stop(m)
516 543
            
517 544
         }
545
+        if(AND_DrvProbExit) {
546
+            stop("The AND_DrvProbExit = TRUE setting is invalid",
547
+                 " with the old poset format.")
548
+        }
518 549
         if(!is.null(muEF))
519
-            stop("Mutator effects cannot be especified with the old poset format")
550
+            stop("Mutator effects cannot be specified with the old poset format.")
520 551
         if( length(initMutant) > 0)  
521 552
             warning("With the old poset format you can no longer use initMutant.",
522 553
                     " The initMutant you passed will be ignored.")
523
-            ## stop("With the old poset, initMutant can only take a single value.")
554
+        ## stop("With the old poset, initMutant can only take a single value.")
555
+        if(!is_null_na(fixation))
556
+            stop("'fixation' cannot be specified with the old poset format.")
524 557
         ## Seeding C++ is now much better in new version
525 558
         if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
526 559
             ## name is a legacy. This is really the seed for the C++ generator.
... ...
@@ -530,7 +563,7 @@ oncoSimulIndiv <- function(fp,
530 563
         if(verbosity >= 2)
531 564
             cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
532 565
 
533
-        if(!is.na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
566
+        if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
534 567
         ## if(message.v1)
535 568
         ##     message("You are using the old poset format. Consider using the new one.")
536 569
    
... ...
@@ -576,6 +609,7 @@ oncoSimulIndiv <- function(fp,
576 609
                   silent = !verbosity)
577 610
         objClass <- "oncosimul"
578 611
     } else {
612
+        s <- sh <- NULL ## force it.
579 613
         if(numPassengers != 0)
580 614
             warning(paste("Specifying numPassengers has no effect",
581 615
                           " when using fitnessEffects objects. ",
... ...
@@ -594,6 +628,18 @@ oncoSimulIndiv <- function(fp,
594 628
             if(verbosity >= 2)
595 629
                 cat("\n A (high quality) random seed will be generated in C++\n")
596 630
         }
631
+        if(!is_null_na(fixation)) {
632
+            if( (!is.list(fixation)) && (!is.vector(fixation))  )
633
+                stop("'fixation' must be a list or a vector.")
634
+            if(!(all(unlist(lapply(fixation, is.vector)))))
635
+                stop("Each element of 'fixation' must be a single element character vector.")
636
+            if(!(all(unlist(lapply(fixation, class)) == "character")))
637
+                stop("Each element of 'fixation' must be a single element character vector.")
638
+            if(!(all( unlist(lapply(fixation, length)) == 1)))
639
+                stop("Each element of 'fixation' must be a single element character vector.")
640
+            if(AND_DrvProbExit)
641
+                stop("It makes no sense to pass AND_DrvProbExit and a fixation list.")
642
+        }
597 643
         op <- try(nr_oncoSimul.internal(rFE = fp, 
598 644
                                         birth = birth,
599 645
                                         death = death,  
... ...
@@ -625,7 +671,9 @@ oncoSimulIndiv <- function(fp,
625 671
                                         errorHitMaxTries = errorHitMaxTries,
626 672
                                         keepPhylog = keepPhylog,
627 673
                                         MMUEF = muEF,
628
-                                        detectionProb = detectionProb),
674
+                                        detectionProb = detectionProb,
675
+                                        AND_DrvProbExit = AND_DrvProbExit,
676
+                                        fixation = fixation),
629 677
                   silent = !verbosity)
630 678
         objClass <- c("oncosimul", "oncosimul2")
631 679
     }
... ...
@@ -666,7 +714,7 @@ summary.oncosimul <- function(object, ...) {
666 714
         if( (tmp$minDMratio == -99)) tmp$minDMratio <- NA
667 715
         if( (tmp$minBMratio == -99)) tmp$minBMratio <- NA
668 716
         tmp$OccurringDrivers <- object$OccurringDrivers
669
-        return(as.data.frame(tmp))
717
+        return(as.data.frame(tmp, stringsAsFactors = FALSE))
670 718
     }
671 719
 }
672 720
 
... ...
@@ -690,10 +738,29 @@ print.oncosimul <- function(x, ...) {
690 738
 }
691 739
 
692 740
 ## I want this to return things that are storable
741
+## summary.oncosimulpop <- function(object, ...) {
742
+##     as.data.frame(rbindlist(lapply(object, summary)))
743
+## }
744
+
693 745
 summary.oncosimulpop <- function(object, ...) {
694
-    as.data.frame(rbindlist(lapply(object, summary)))
746
+    tmp <- lapply(object, summary)
747
+    rm <- which(unlist(lapply(tmp, function(x) (length(x) == 1) && (is.na(x)))))
748
+    if(length(rm) > 0)
749
+        if(length(rm) < length(object)) {
750
+        warning("Some simulations seem to have failed and will be removed",
751
+                " from the summary. The failed runs are ",
752
+                paste(rm, collapse = ", "),
753
+                ".")
754
+        tmp <- tmp[-rm]
755
+        } else {
756
+            warning("All simulations failed.")
757
+            return(NA)
758
+        }
759
+    as.data.frame(rbindlist(tmp))
695 760
 }
696 761
 
762
+
763
+
697 764
 print.oncosimulpop <- function(x, ...) {
698 765
     cat("\nPopulation of OncoSimul trajectories of",
699 766
         length(x), "individuals. Call :\n")
... ...
@@ -1383,8 +1450,17 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
1383 1450
         stop("Phylogenetic information is only stored with v >= 2")
1384 1451
     z <- which_N_at_T(x, N, t)
1385 1452
     tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
1386
-                               ## sample size info at each period
1453
+    ## sample size info at each period
1454
+
1455
+    if( (length(tG) == 1) && (tG == "")) {
1456
+        warning("There never was a descendant of WT")
1457
+    }
1458
+    
1387 1459
     df <- x$other$PhylogDF
1460
+    if(nrow(df) == 0) {
1461
+        warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared.")
1462
+        return(NA)
1463
+    }
1388 1464
     if(!keepEvents) { ## is this just a graphical thing? or not?
1389 1465
         df <- df[!duplicated(df[, c(1, 2)]), ]
1390 1466
     }
... ...
@@ -1420,6 +1496,13 @@ plotClonePhylog <- function(x, N = 1, t = "last",
1420 1496
              "very fast, before any clones beyond the initial were ",
1421 1497
              "generated.")
1422 1498
     pc <- phylogClone(x, N, t, keepEvents)
1499
+    ## if(is.na(pc)) {
1500
+    ##     ## This should not be reachable, as caught before
1501
+    ##     ## where we check for nrow of PhylogDF   
1502
+    ##     warning("No clone phylogeny available. Exiting without plotting.")
1503
+    ##     return(NULL)
1504
+    ## }
1505
+        
1423 1506
     l0 <- igraph::layout.reingold.tilford(pc$g)
1424 1507
     if(!timeEvents) {
1425 1508
         plot(pc$g, layout = l0)
... ...
@@ -1502,6 +1585,12 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
1502 1585
 
1503 1586
 get.mut.vector <- function(x, timeSample, typeSample,
1504 1587
                            thresholdWhole, popSizeSample) {
1588
+    if(is.null(x$TotalPopSize)) {
1589
+        warning(paste("It looks like this simulation never completed.",
1590
+                      " Maybe it reached maximum allowed limits.",
1591
+                      " You will get NAs"))
1592
+        return(rep(NA, length(x$geneNames)))
1593
+    }
1505 1594
     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1506 1595
     if(the.time < 0) { 
1507 1596
         return(rep(NA, nrow(x$Genotypes)))
... ...
@@ -1686,17 +1775,6 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1686 1775
 
1687 1776
 }
1688 1777
 
1689
-eFinalMf <- function(initSize, s, j) {
1690
-    ## Expected final sizes for McF, when K is set to the default.
1691
-    # j is number of drivers
1692
-    ## as it says, with no passengers
1693
-    ## Set B(d) = D(N)
1694
-    K <- initSize/(exp(1) - 1)
1695
-    return(K * (exp( (1 + s)^j) - 1))
1696
-}
1697
-
1698
-
1699
-
1700 1778
 OncoSimulWide2Long <- function(x) {
1701 1779
     ## Put data in long format, for ggplot et al
1702 1780
     
... ...
@@ -1820,6 +1898,59 @@ plotShannon <- function(z) {
1820 1898
     axis(2)
1821 1899
 }
1822 1900
 
1901
+
1902
+
1903
+is_null_na <- function(x) {
1904
+    ## For arguments, if user passes "undefined" or "not set"
1905
+    ## See also http://stackoverflow.com/a/19655909
1906
+    if(is.function(x)) return(FALSE)
1907
+    if( is.null(x) ||
1908
+        ( (length(x) == 1) && (is.na(x)) ) ||
1909
+        ( (length(x) == 1) && (x == "") ) ## careful here
1910
+       )  {
1911
+        return(TRUE)
1912
+    } else {
1913
+        return(FALSE)
1914
+    }
1915
+}
1916
+
1917
+
1918
+## Not used anymore, but left here in case they become useful.
1919
+## Expected numbers at equilibrium under McFarland's
1920
+## eFinalMf <- function(initSize, s, j) {
1921
+##     ## Expected final sizes for McF, when K is set to the default.
1922
+##     # j is number of drivers
1923
+##     ## as it says, with no passengers
1924
+##     ## Set B(d) = D(N)
1925
+##     K <- initSize/(exp(1) - 1)
1926
+##     return(K * (exp( (1 + s)^j) - 1))
1927
+## }
1928
+
1929
+## mcflE <- function(p, s, initSize) {
1930
+##     K <- initSize/(exp(1) - 1)
1931
+##     ## Expected number at equilibrium
1932
+##     return( K * (exp((1 + s)^p) - 1))
1933
+## }
1934
+
1935
+## mcflEv <- function(p, s, initSize) {
1936
+##     ## expects vectors for p and s
1937
+##     K <- initSize/(exp(1) - 1)
1938
+    
1939
+##     ## Expected number at equilibrium
1940
+##     return( K * (exp(prod((1 + s)^p)) - 1))
1941
+## }
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1823 1954
 ## simpsonI <- function(x) {
1824 1955
 ##     sx <- sum(x)
1825 1956
 ##     p <- x/sx
... ...
@@ -1,8 +1,8 @@
1 1
 # This file was generated by Rcpp::compileAttributes
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery) {
5
-    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery)
4
+nr_BNB_Algo5 <- function(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list) {
5
+    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list)
6 6
 }
7 7
 
8 8
 BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) {
... ...
@@ -17,6 +17,10 @@ evalRGenotypeAndMut <- function(rG, rFE, muEF, full2mutator_, verbose, prodNeg)
17 17
     .Call('OncoSimulR_evalRGenotypeAndMut', PACKAGE = 'OncoSimulR', rG, rFE, muEF, full2mutator_, verbose, prodNeg)
18 18
 }
19 19
 
20
+accessibleGenotypes <- function(y, f, numMut, th) {
21
+    .Call('OncoSimulR_accessibleGenotypes', PACKAGE = 'OncoSimulR', y, f, numMut, th)
22
+}
23
+
20 24
 ## readFitnessEffects <- function(rFE, echo) {
21 25
 ##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
22 26
 ## }
... ...
@@ -192,16 +192,23 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
192 192
 ##                 tfm = tfm))
193 193
 ## }
194 194
 
195
-count_accessible_g <- function(gfm, accessible_th) {
196
-    gaj <- genot_to_adj_mat(gfm)
197
-    gaj <- filter_inaccessible(gaj, accessible_th)
198
-    return(ncol(gaj) - 1)
199
-}
195
+## No longer being used. Used to be in rfitness
196
+## count_accessible_g <- function(gfm, accessible_th) {
197
+##     gaj <- genot_to_adj_mat(gfm)
198
+##     gaj <- filter_inaccessible(gaj, accessible_th)
199
+##     return(ncol(gaj) - 1)
200
+## }
200 201
 
202
+
203
+## There is now C++ code to get just the locations/positions of the
204
+## accessible genotypes
201 205
 filter_inaccessible <- function(x, accessible_th) {
202 206
     ## Return an adjacency matrix with only accessible paths. x is the gaj
203 207
     ## matrix created in the plots. The difference between genotypes
204 208
     ## separated by a hamming distance of 1
209
+
210
+    ## FIXME: could do the x[, -1] before loop and not add the 1
211
+    ## inside while, and do that at end
205 212
     colnames(x) <- rownames(x) <- 1:ncol(x)
206 213
     while(TRUE) {
207 214
         ## remove first column
... ...
@@ -216,6 +223,132 @@ filter_inaccessible <- function(x, accessible_th) {
216 223
     return(x)
217 224
 }
218 225
 
226
+f1 <- function() {}
227
+
228
+x <- 99
229
+
230
+## wrapper to the C++ code
231
+wrap_accessibleGenotypes <- function(x, th) {
232
+    ## x is the fitness matrix, not adjacency matrix
233
+    numMut <- rowSums(x[, -ncol(x)])
234
+    o_numMut <- order(numMut)
235
+    x <- x[o_numMut, ]
236
+    numMut <- numMut[o_numMut]
237
+    
238
+    y <- x[, -ncol(x)]
239
+    storage.mode(y) <- "integer"
240
+
241
+    acc <- accessibleGenotypes(y, x[, ncol(x)],
242
+                               as.integer(numMut),
243
+                               th)
244
+    return(acc[acc > 0])
245
+}
246
+
247
+## A transitional function
248
+faster_accessible_genotypes_R <- function(x, th) {
249
+   rs0 <- rowSums(x[, -ncol(x)])
250
+    x <- x[order(rs0), ]
251
+    rm(rs0)
252
+    
253
+    y <- x[, -ncol(x)]
254
+    f <- x[, ncol(x)]
255
+    rs <- rowSums(y)
256
+
257
+   ## If 0, not accessible
258
+   ## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
259
+   ##                                          mode = "integer")
260
+   
261
+   adm <- matrix(0, nrow = length(rs), ncol = length(rs))
262
+   storage.mode(adm) <- "integer"
263
+   
264
+   ## Most time is gone here
265
+    for(i in 1:length(rs)) { ## i is the current genotype
266
+        candidates <- which(rs == (rs[i] + 1))
267
+        for(j in candidates) {
268
+            if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
269
+                ( (f[j] - f[i]) >= th ) ) {
270
+                ## actually, this is the largest time sink using slam
271
+                adm[i, j] <- 1L
272
+                }
273
+        }
274
+    }
275
+
276
+    colnames(adm) <- rownames(adm) <- 1:ncol(adm)
277
+    admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
278
+    while(TRUE) {
279
+        ## We remove inaccessible cols (genotypes) and the corresponding
280
+        ## rows repeatedly until nothing left to remove; any column left
281
+        ## is therefore accessible throw at least one path.
282
+
283
+        ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
284
+        inacc_col <- which(colSums(admtmp) == 0L)
285
+        if(length(inacc_col) == 0) break;
286
+        inacc_row <- inacc_col + 1 ## recall root row is left
287
+        admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
288
+    }
289
+    return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
290
+
291
+}
292
+
293
+
294
+## ## This uses slam, but that is actually slower because
295
+## ## of the assignment
296
+## faster_accessible_genots_slam <- function(x, th = 0) {
297
+
298
+##     ## Given a genotype matrix, return the genotypes that are accessible
299
+##     ## via creating a directed adjacency matrix between genotypes
300
+##     ## connected (i.e., those that differ by gaining one mutation). 0
301
+##     ## means not connected, 1 means connected.
302
+    
303
+##     ## There is a more general function in OncoSimulR that will give the
304
+##     ## fitness difference. But not doing the difference is faster than
305
+##     ## just setting a value, say 1, if all we want is to keep track of
306
+##     ## accessible ones. And by using only 0/1 we can store only an
307
+##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
308
+##     ## simulations and computing just accessible genotypes, probably a
309
+##     ## hell of a lot faster.
310
+
311
+##     ## Well, this is not incredibly fast either.
312
+    
313
+##     ## Make sure sorted, so ancestors always before descendants
314
+##     rs0 <- rowSums(x[, -ncol(x)])
315
+##     x <- x[order(rs0), ]
316
+##     rm(rs0)
317
+    
318
+##     y <- x[, -ncol(x)]
319
+##     f <- x[, ncol(x)]
320
+##     rs <- rowSums(y)
321
+
322
+##     ## If 0, not accessible
323
+##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
324
+##                                       mode = "integer")
325
+##     for(i in 1:length(rs)) { ## i is the current genotype
326
+##         candidates <- which(rs == (rs[i] + 1))
327
+##         for(j in candidates) {
328
+##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
329
+##             ## if(sumdiff == 1)
330
+##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
331
+##                 ( (f[j] - f[i]) >= th ) )
332
+##                 adm[i, j] <- 1L
333
+##         }
334
+##     }
335
+
336
+##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
337
+##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
338
+##     while(TRUE) {
339
+##         ## We remove inaccessible cols (genotypes) and the corresponding
340
+##         ## rows repeatedly until nothing left to remove; any column left
341
+##         ## is therefore accessible throw at least one path.
342
+
343
+##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
344
+##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
345
+##         if(length(inacc_col) == 0) break;
346
+##         inacc_row <- inacc_col + 1 ## recall root row is left
347
+##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
348
+##     }
349
+##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
350
+## }
351
+
219 352
 
220 353
 generate_matrix_genotypes <- function(g) {
221 354
     ## FIXME future: do this for order too? Only if rfitness for order.
... ...
@@ -257,6 +390,8 @@ genot_to_adj_mat <- function(x) {
257 390
     ## that differ by gaining one mutation) with value being the
258 391
     ## difference in fitness between destination and origin
259 392
 
393
+    ## FIXME: code is now in place to do all of this in C++
394
+    
260 395
     ## Make sure sorted, so ancestors always before descendants
261 396
     rs0 <- rowSums(x[, -ncol(x)])
262 397
     x <- x[order(rs0), ]
... ...
@@ -23,14 +23,22 @@
23 23
 
24 24
 to_Magellan <- function(x, file,
25 25
                         max_num_genotypes = 2000) {
26
+    ## This is stupid if we already have, as entry, an object from
27
+    ## rfitness!! to_Fitness_Matrix can be very slow.
26 28
     if(is.null(file)) {
27 29
         file <- tempfile()
28 30
         cat("\n Using file ", file, "\n")
29 31
     }
30
-    gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm
31
-    write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1)
32
-    write.table(gfm, file = file, append = TRUE,
33
-                row.names = FALSE, col.names = FALSE, sep = " ")
32
+    if(inherits(x, "genotype_fitness_matrix")) {
33
+        write(rep(2, ncol(x) - 1), file = file, ncolumns = ncol(x) - 1)
34
+        write.table(x, file = file, append = TRUE,
35
+                    row.names = FALSE, col.names = FALSE, sep = " ")
36
+    } else {
37
+        gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm
38
+        write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1)
39
+        write.table(gfm, file = file, append = TRUE,
40
+                    row.names = FALSE, col.names = FALSE, sep = " ")
41
+    }
34 42
 }
35 43
 
36 44
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
... ...
@@ -1,3 +1,4 @@
1
+
1 2
 ## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2 3
 
3 4
 ## This program is free software: you can redistribute it and/or modify
... ...
@@ -161,7 +162,7 @@ list.of.deps <- function(x) {
161 162
             stop("child not unique")
162 163
     }
163 164
     typeDep <- lookupTypeDep[unique(x$typeDep)]
164
-    if(any(is.na(typeDep)))
165
+    if(any(is_null_na(typeDep)))
165 166
         stop("typeDep value incorrect. Check spelling.")
166 167
     return(list(
167 168
         child = unique(x$child),
... ...
@@ -1436,7 +1437,9 @@ nr_oncoSimul.internal <- function(rFE,
1436 1437
                                   extraTime,
1437 1438
                                   keepPhylog,
1438 1439
                                   detectionProb,
1439
-                                  MMUEF = NULL ## avoid partial matching, and set default
1440
+                                  AND_DrvProbExit,
1441
+                                  MMUEF = NULL, ## avoid partial matching, and set default
1442
+                                  fixation = NULL ## avoid partial matching
1440 1443
                                   ) {
1441 1444
     if(!inherits(rFE, "fitnessEffects"))
1442 1445
         stop(paste("rFE must be an object of class fitnessEffects",
... ...
@@ -1578,6 +1581,28 @@ nr_oncoSimul.internal <- function(rFE,
1578 1581
     ## if( is.null(n2)) n2 <- -9
1579 1582
 
1580 1583
     ## call <- match.call()
1584
+    
1585
+    ## Process the fixed list, if any
1586
+    if(!is_null_na(fixation)) {
1587
+        ng <- namedGenes
1588
+        rownames(ng) <- namedGenes[, "Gene"]
1589
+        ## Usual genotype specification and might allow ordered vectors
1590
+        ## in the future
1591
+        fixation_b <- lapply(fixation, nice.vector.eo, sep = ",")
1592
+        ulf <- unlist(fixation_b)
1593
+        if(any(ulf == ">"))
1594
+            stop("Order effects not allowed (yet?) in fixation.")
1595
+        ulfg <- ng[ulf, 1]
1596
+        if(any(is.na(ulfg)))
1597
+            stop(paste("The 'fixation' list contains genes that are not present",
1598
+                       " in the fitness effects."))
1599
+        ## Sorting here is crucial!!
1600
+        fixation_list <- lapply(fixation_b, function(x) sort(ng[x, 2]))
1601
+    } else {
1602
+        fixation_list <- list()
1603
+    }
1604
+
1605
+    
1581 1606
     return(c(
1582 1607
         nr_BNB_Algo5(rFE = rFE,
1583 1608
                      mu_ = mu,
... ...
@@ -1613,7 +1638,9 @@ nr_oncoSimul.internal <- function(rFE,
1613 1638
                      p2 = dpr["p2"],
1614 1639
                      PDBaseline = dpr["PDBaseline"],
1615 1640
                      cPDetect_i= dpr["cPDetect"],
1616
-                     checkSizePEvery = dpr["checkSizePEvery"]),
1641
+                     checkSizePEvery = dpr["checkSizePEvery"],
1642
+                     AND_DrvProbExit = AND_DrvProbExit,
1643
+                     fixation_list = fixation_list),
1617 1644
         Drivers = list(rFE$drv), ## but when doing pops, these will be repeated
1618 1645
         geneNames = list(names(getNamesID(rFE)))
1619 1646
     ))
... ...
@@ -1753,8 +1780,8 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1753 1780
     default_PDBaseline <- 1.2 * initSize
1754 1781
     default_checkSizePEvery <- 20
1755 1782
     ## No default cPDetect. That is done from p2 and n2 in C++.
1756
-    
1757
-    if((length(x) == 1) && (is.na(x))) {
1783
+    if(is_null_na(x)) {
1784
+    ## if((length(x) == 1) && (is.na(x))) {
1758 1785
         y <- vector()
1759 1786
         y["cPDetect"] <- -9
1760 1787
         y["p2"] <- 9
... ...
@@ -1779,18 +1806,18 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1779 1806
     }
1780 1807
    
1781 1808
     ## This ain't conditional. If not given, always check
1782
-    if( !is.na(x["cPDetect"]) && (sum(!is.na(x["p2"]), !is.na(x["n2"])) >= 1 ))
1809
+    if( !is_null_na(x["cPDetect"]) && (sum(!is_null_na(x["p2"]), !is_null_na(x["n2"])) >= 1 ))
1783 1810
         stop("Specify only cPDetect xor both of p2 and n2")
1784
-    if( (is.na(x["p2"]) + is.na(x["n2"])) == 1 )
1811
+    if( (is_null_na(x["p2"]) + is_null_na(x["n2"])) == 1 )
1785 1812
         stop("If you pass one of n2 or p2, you must also pass the other. ",
1786 1813
              "Otherwise, we would not know what to do.")
1787 1814
 
1788
-    if(is.na(x["PDBaseline"])) {
1815
+    if(is_null_na(x["PDBaseline"])) {
1789 1816
         x["PDBaseline"] <- default_PDBaseline
1790 1817
         if(verbosity > -1)
1791 1818
             message("\n  PDBaseline set to default value of ", default_PDBaseline, "\n")
1792 1819
         }
1793
-    if(is.na(x["checkSizePEvery"])) {
1820
+    if(is_null_na(x["checkSizePEvery"])) {
1794 1821
         x["checkSizePEvery"] <- default_checkSizePEvery
1795 1822
         if(verbosity > -1)
1796 1823
             message("\n  checkSizePEvery set to default value of ",
... ...
@@ -1800,9 +1827,9 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1800 1827
     ## If we get here, we checked consistency of whether cPDetect or p2/n2.
1801 1828
     ## Fill up with defaults the missing values
1802 1829
 
1803
-    if(is.na(x["cPDetect"])) {
1804
-        if(is.na(x["p2"])) {
1805
-            if(!is.na(x["n2"])) stop("Eh? no p2 but n2? Bug")
1830
+    if(is_null_na(x["cPDetect"])) {
1831
+        if(is_null_na(x["p2"])) {
1832
+            if(!is_null_na(x["n2"])) stop("Eh? no p2 but n2? Bug")
1806 1833
             x["n2"] <- default_n2
1807 1834
             x["p2"] <- default_p2
1808 1835
             if(verbosity > -1)
... ...
@@ -1816,14 +1843,14 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1816 1843
     if(x["PDBaseline"] < 0)
1817 1844
         stop("PDBaseline < 0")
1818 1845
     
1819
-    if(!is.na(x["n2"])) {
1846
+    if(!is_null_na(x["n2"])) {
1820 1847
         if(x["n2"] <= x["PDBaseline"])
1821 1848
             stop("n2 <= PDBaseline")
1822 1849
         if(x["p2"] >= 1) stop("p2 >= 1")
1823 1850
         if(x["p2"] <= 0) stop("p2 <= 0")
1824 1851
         x["cPDetect"] <- -9
1825 1852
     } else { ## only if x["cPDetect"] is not NA
1826
-        if(is.na(x["cPDetect"])) stop("eh? you found a bug")## paranoia
1853
+        if(is_null_na(x["cPDetect"])) stop("eh? you found a bug")## paranoia
1827 1854
         x["n2"] <- -9
1828 1855
         x["p2"] <- -9
1829 1856
         if(verbosity > -1)
... ...
@@ -1832,6 +1859,51 @@ detectionProbCheckParse <- function(x, initSize, verbosity) {
1832 1859
     return(x)
1833 1860
 }
1834 1861
 
1862
+sampledGenotypes <- function(y, genes = NULL) {
1863
+    ## From a popSample object, or a matrix for that matter,
1864
+    ## show the sampled genotypes and their frequencies
1865
+    if(!is.null(genes)) {
1866
+        cols <- which(colnames(y) %in% genes )
1867
+        y <- y[, cols]
1868
+    }
1869
+    nn <- colnames(y)
1870
+    df <- data.frame(table(
1871
+        apply(y, 1, function(z) paste(nn[as.logical(z)], collapse = ", ") )
1872
+    ))
1873
+    gn <- as.character(df[, 1])
1874
+    gn[gn == ""] <- "WT"
1875
+    df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE)
1876
+    return(df)
1877
+}
1878
+
1879
+
1880
+
1881
+list_g_matches_fixed <- function(x, y) {
1882
+    ## Internal function, for testing the fixation output.
1883
+    ## x and y are vectors
1884
+
1885
+    ## x is the set of output genotypes, y the set of fixed
1886
+    ## genotypes/subset of genotypes.
1887
+
1888
+    ## Yes, this function has tests too in test.fixation.R
1889
+    
1890
+    ## All genotypes in x satisfy that they are supersets of at least one
1891
+    ## in y? That is true if, for every element in x, at least one y in
1892
+    ## that x.
1893
+
1894
+    if(is.list(y)) y <- unlist(y)
1895
+    
1896
+    y.nice <- lapply(y, nice.vector.eo, sep = ",")
1897
+    x.nice <- lapply(x, nice.vector.eo, sep = ",")
1898
+
1899
+    fu <- function(u, y.nice)
1900
+        any(unlist(lapply(y.nice, function(z) all(z %in% u))))
1901
+
1902
+    return(all(unlist(lapply(x.nice, fu, y.nice))))
1903
+
1904
+}
1905
+
1906
+
1835 1907
 
1836 1908
 ## emptyFitnessEffects <- function() {
1837 1909
 ##     list(long.rt = list(),
... ...
@@ -61,7 +61,9 @@ rfitness <- function(g, c= 0.5,
61 61
         }
62 62
         m <- cbind(m, Fitness = fi)
63 63
         if(min_accessible_genotypes > 0) {
64
-            num_accessible_genotypes <- count_accessible_g(m, accessible_th)
64
+            ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
65
+            ## Recall accessibleGenotypes includes the wt, if accessible.
66
+            num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
65 67
             if(num_accessible_genotypes >= min_accessible_genotypes) {
66 68
                 done <- TRUE
67 69
                 attributes(m) <- c(attributes(m),
... ...
@@ -89,3 +91,7 @@ create_eq_ref <- function(g) {
89 91
     ref <- c(rep(1, nm), rep(0, g - nm))
90 92
     sample(ref)
91 93
 }
94
+
95
+
96
+
97
+
... ...
@@ -1,3 +1,16 @@
1
+Changes in version 2.5.1 (2016-11-12):
2
+	- AND of detectedSizeP and lastMaxDr.
3
+	- fixation as stopping mechanism.
4
+	- sampledGenotypes in user code.
5
+	- clonePhylog et al: deal with never any descendant.
6
+	- samplePop can handle failed simulations graciously.
7
+	- summary.oncosimulpop can handle failed simulations graciously.
8
+	- accessible genotypes now done in C++.
9
+	- OcurringDrivers should not be a factor.
10
+	- samplePop always returns gene names.
11
+	- to_Magellan is much faster with rfitness objects.
12
+	- Several improvements in vignette (English and additional explanations).
13
+
1 14
 Changes in version 2.4.0 (for BioC 3.4):
2 15
 	- Mutator phenotype and gene-specific mutation rates.
3 16
 	- End simulations stochastically as a function of size.	
... ...
@@ -9,7 +22,7 @@ Changes in version 2.4.0 (for BioC 3.4):
9 22
 	- Improved test coverage.
10 23
 	- samplePop: sample at arbitrary sizes.	
11 24
 	- evalAllGenotypes: order = FALSE by default.
12
-
25
+	
13 26
 Changes in version 2.3.17 (2017-09-22):
14 27
 	- random2 for rfitness.
15 28
 	- Vignette: decrease size and running time.
... ...
@@ -114,7 +114,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
114 114
   specifiy anything if you do not want to, and you can pass an empty
115 115
   vector (as \code{character(0)}). The default has changed with respect
116 116
   to v.2.1.3 and previous: it used to be to assume that all
117
-  genes that were not in the \code{noIntGenes} were drivers. The fault
117
+  genes that were not in the \code{noIntGenes} were drivers. The default
118 118
   now is to assume nothing: if you want \code{drvNames} you have
119 119
   to specify them.
120 120
 
... ...
@@ -40,6 +40,8 @@
40 40
                 errorHitMaxTries = TRUE,
41 41
                 verbosity = 0,
42 42
                 initMutant = NULL,
43
+                AND_DrvProbExit = FALSE,
44
+                fixation = NULL,
43 45
                 seed = NULL)
44 46
 
45 47
 oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6,
... ...
@@ -61,6 +63,8 @@ oncoSimulPop(Nindiv, fp, model = "Exp", numPassengers = 0, mu = 1e-6,
61 63
                 errorHitWallTime = TRUE,
62 64
                 errorHitMaxTries = TRUE,
63 65
                 initMutant = NULL,
66
+                AND_DrvProbExit = FALSE,
67
+                fixation = NULL,
64 68
                 verbosity = 0,
65 69
                 mc.cores = detectCores(),
66 70
                 seed = "auto")
... ...
@@ -108,6 +112,8 @@ oncoSimulSample(Nindiv,
108 112
                 typeSample = "whole",
109 113
                 thresholdWhole = 0.5,
110 114
                 initMutant = NULL,
115
+                AND_DrvProbExit = FALSE,
116
+                fixation = NULL,
111 117
                 verbosity  = 1,
112 118
                 showProgress = FALSE,
113 119
                 seed = "auto")
... ...
@@ -289,22 +295,7 @@ This option can not be used with v.1 objects.
289 295
 
290 296
 }
291 297
 
292
-\item{s}{
293
-  Selection coefficient for drivers. 
294
-    Only relevant if using a poset as this is included in the
295
-  fitnessEffects object.
296
-}
297
-\item{sh}{
298
-  Selection coefficient for drivers with restrictions not satisfied. A
299
-  value of 0 means there are no penalties for a driver appearing in a
300
-  clone when its restrictions are not satisfied.
301
-
302
-  To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1.
303
-
304
-  Only relevant if using a poset as this is included in the
305
-  fitnessEffects object.
306 298
 
307
-}
308 299
 \item{K}{
309 300
   Initial population equilibrium size in the McFarland models.
310 301
 
... ...
@@ -497,10 +488,6 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}.
497 488
 }
498 489
 
499 490
 
500
-
501
-
502
-
503
-
504 491
 \item{verbosity}{ If 0, run silently. Iincreasing values of verbosity
505 492
   provide progressively more information about intermediate steps,
506 493
   possible numerical notes/warnings from the C++ code, etc. Values less
... ...
@@ -529,6 +516,40 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}.
529 516
 \item{showProgress}{If TRUE, provide information, during exection,
530 517
 of the individual done, and the number of attempts and time used.}
531 518
 
519
+\item{AND_DrvProbExit}{If TRUE, cancer will be considered to be reached
520
+  if both the \code{detectionProb} mechanism and \code{detectionDrivers}
521
+  are satisfied. This is and AND, not an OR condition. Using this option
522
+  with \code{fixation} is not allowed (as it does not make much sense).}
523
+
524
+\item{fixation}{If non-NULL, a list or a vector, where each element of
525
+  is a string with a gene or a gene combination. Simulations will stop
526
+  as soon as any of the genes or gene combinations are fixed (i.e.,
527
+  reach frequency of one). If you pass gene combinations, separate genes
528
+  with commas (not '>'); this means order is not (yet?)  supported. This
529
+  way of specifying gene combinations is the same as the one used for
530
+  \code{initMutant} and \code{\link{evalGenotype}}.
531
+
532
+  Using this option with \code{AND_DrvProbExit} is not allowed (as it
533
+  does not make much sense). This option is not allowed either with the
534
+  old v.1 specification.}
535
+
536
+\item{s}{
537
+  Selection coefficient for drivers. 
538
+    Only relevant if using a poset as this is included in the
539
+  fitnessEffects object. This will eventually be deprecated.
540
+}
541
+\item{sh}{
542
+  Selection coefficient for drivers with restrictions not satisfied. A
543
+  value of 0 means there are no penalties for a driver appearing in a
544
+  clone when its restrictions are not satisfied.
545
+
546
+  To specify "sh=Inf" (in Diaz-Uriarte, 2015) use sh = -1.
547
+
548
+  Only relevant if using a poset as this is included in the
549
+  fitnessEffects object. This will eventually be deprecated.
550
+
551
+}
552
+
532 553
 \item{seed}{The seed for the C++ PRNG. You can pass a value. If you set
533 554
   it to NULL, then a seed will be generated in R and passed to C++. If
534 555
   you set it to "auto", then if you are using v.1, the behavior is the
... ...
@@ -612,13 +633,13 @@ of the individual done, and the number of attempts and time used.}
612 633
 
613 634
   Detection of cancer can be a deterministic process, where cancer is
614 635
   always detected (and, thus, simulation ended) when certain conditions
615
-  are met (\code{detectionSize},
616
-  \code{detectionDrivers}). Alternatively, it can be stochastic process
617
-  where probability of detection depends on size. Every so often (see
618
-  below) we assess population size, and detect cancer or not
619
-  probabilistically (comparing the probability of detection for that
620
-  size with a random uniform number). Probability of detection changes
621
-  with population size according to the function
636
+  are met (\code{detectionSize}, \code{detectionDrivers},
637
+  \code{fixation}). Alternatively, it can be stochastic process where
638
+  probability of detection depends on size. Every so often (see below)
639
+  we assess population size, and detect cancer or not probabilistically
640
+  (comparing the probability of detection for that size with a random
641
+  uniform number). Probability of detection changes with population size
642
+  according to the function
622 643
   
623 644
   \deqn{ 1 - e^{-cPDetect (populationsize - PDBaseline)}}.
624 645
 
... ...
@@ -1,6 +1,6 @@
1 1
 \name{samplePop}
2 2
 \alias{samplePop}
3
-
3
+\alias{sampledGenotypes}
4 4
 \title{
5 5
   Obtain a sample from a population of simulations.
6 6
   
... ...
@@ -13,17 +13,26 @@
13 13
   schemes include whole tumor and single cell sampling, and sampling at
14 14
   the end of the tumor progression or during the progression of the
15 15
   disease.
16
+
17
+  
18
+  \code{sampledGenotypes} shows the genotype frequencies from that
19
+  sample. Order effects are ignored.
20
+  
16 21
 }
17 22
 
18 23
 \usage{
19 24
 samplePop(x, timeSample = "last", typeSample = "whole",
20 25
           thresholdWhole = 0.5, geneNames = NULL, popSizeSample = NULL)
26
+
27
+sampledGenotypes(y, genes = NULL)
28
+
21 29
 }
22 30
 
23 31
 \arguments{
24
-  \item{x}{
25
-    An object of class \code{oncosimulpop}.
26
-  }
32
+  \item{x}{An object of class \code{oncosimulpop} or class \code{oncosimul2} (a
33
+  single simulation).}
34
+
35
+  \item{y}{The output from a call to \code{samplePop}.}
27 36
   
28 37
   \item{timeSample}{
29 38
     "last" means to sample each individual in the very last time period
... ...
@@ -65,6 +74,17 @@ samplePop(x, timeSample = "last", typeSample = "whole",
65 74
     This allows you to specify arbitrary sampling schemes with respect
66 75
     to total population size.  }
67 76
 
77
+  \item{genes}{If non-NULL, use only the genes in \code{genes} to create
78
+  the table of genotypes. This can be useful if you only care about the
79
+  genotypes with respect to a subset of genes (say, X), and want to
80
+  collapse with respect to another subset of genes (say, Y), for
81
+  instance if Y is a large set of passenger genes. For example, suppose
82
+  the complete set of genes is 'a', 'b', 'c', 'd', and you specify
83
+  \code{genes = c('a', 'b')}; then, genotypes 'a, b, c' and genotypes
84
+  'a, b, d' will not be shown as different rows in the table of
85
+  frequencies. Likewise, genotypes 'a, c' and genotypes 'a, d' will not
86
+  be shown as different rows.}
87
+
68 88
 }
69 89
 
70 90
 \details{
... ...
@@ -135,13 +155,16 @@ p705 <- examplePosets[["p705"]]
135 155
 ##  leave it at its default value).
136 156
 
137 157
 p1 <- oncoSimulPop(4, p705, mc.cores = 2)
138
-samplePop(p1)
158
+(sp1 <- samplePop(p1))
159
+sampledGenotypes(sp1)
160
+
139 161
 
140 162
 ## Sample at fixed sizes. Notice the requested size
141 163
 ## for the last population is larger than the any population size
142 164
 ## so we get NAs
143 165
 
144
-samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13))
166
+(sp2 <- samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13)))
167
+sampledGenotypes(sp2)
145 168
 
146 169
 
147 170
 ## Now single cell sampling
... ...
@@ -149,8 +172,7 @@ samplePop(p1, popSizeSample = c(1e7, 1e6, 4e5, 1e13))
149 172
 r1 <- oncoSimulIndiv(p705)
150 173
 samplePop(r1, typeSample = "single")
151 174
 
152
-
153
-
175
+sampledGenotypes(samplePop(r1, typeSample = "single"))
154 176
 
155 177
 }
156 178
 
... ...
@@ -228,6 +228,8 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
228 228
 					const double& checkSizePEvery,
229 229
 					double& nextCheckSizeP,
230 230
 					std::mt19937& ran_gen,
231
+					const bool& AND_DrvProbExit,
232
+					const std::vector<std::vector<int> >& fixation_l,
231 233
 					const double& fatalPopSize = 1e15) {
232 234
   // Fill out, but also compute totPopSize
233 235
   // and return sample summaries for popsize, drivers.
... ...
@@ -254,7 +256,38 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
254 256
   }
255 257
   lastMaxDr = max_ndr;
256 258
 
259
+  // Until fixation done here. Recall we use an OR operation for exiting
260
+  // below.  Could be added to loop above.
261
+  // And we call allGenesinGenotype also above, inside getGenotypeDrivers.
262
+  // So room for speed ups?
257 263
   
264
+  // Since we convert each genotype to a sorted allGenesinGenotype, iterate
265
+  // over that first. Add that pop size if the combination is present in genotype.
266
+  bool fixated = false;
267
+  if(totPopSize > 0) { // Avoid silly things
268
+    if( fixation_l.size() ) {
269
+      std::vector<double> popSize_fixation(fixation_l.size());
270
+      for(size_t i = 0; i < popParams.size(); ++i) {
271
+	std::vector<int> thisg = allGenesinGenotype(Genotypes[i]);
272
+	for(size_t fc = 0; fc != popSize_fixation.size(); ++fc) {
273
+	  // Yes, fixation_l is sorted in R.
274
+	  if(std::includes(thisg.begin(), thisg.end(),
275
+			   fixation_l[fc].begin(), fixation_l[fc].end()) ) {
276
+	    popSize_fixation[fc] += popParams[i].popSize;
277
+	  }
278
+	}
279
+      }
280
+      // Any fixated? But avoid trivial of totPopSize of 0!
281
+      // Now check of > 0 is redundant as we check totPopSize > 0
282
+      double max_popSize_fixation =
283
+	*std::max_element(popSize_fixation.begin(), popSize_fixation.end());
284
+      if( (max_popSize_fixation > 0 ) &&
285
+	  (max_popSize_fixation == totPopSize)) {
286
+	fixated = true;
287
+      }
288
+    }
289
+  }
290
+    
258 291
   if (keepEvery < 0) {
259 292
     storeThis = false;
260 293
   } else if( currentTime >= (lastStoredSample + keepEvery) ) {
... ...
@@ -265,6 +298,7 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
265 298
     simulsDone = true;
266 299
   }
267 300
 
301
+  
268 302
   // FIXME
269 303
   // this is the usual exit condition
270 304
   // (totPopSize >= detectionSize) ||
... ...
@@ -287,28 +321,64 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
287 321
   } else {
288 322
     checkSizePNow = false;
289 323
   }
290
-  
291
-  if(extraTime > 0) {
292
-    if(done_at <  0) {
293
-      if( (totPopSize >= detectionSize) ||
294
-	  ( (lastMaxDr >= detectionDrivers) &&
295
-	    (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
296
-	  ( checkSizePNow &&
297
-	    detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) {
298
-	done_at = currentTime + extraTime;
324
+
325
+
326
+  if(AND_DrvProbExit) {
327
+    // The AND of detectionProb and drivers
328
+    // fixated plays no role here, and cannot be passed from R
329
+    if(extraTime > 0) {
330
+      if(done_at <  0) {
331
+	if( (lastMaxDr >= detectionDrivers) &&
332
+	    (popSizeOverDDr >= minDetectDrvCloneSz) &&
333
+	    checkSizePNow  &&
334
+	    detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) {
335
+	  done_at = currentTime + extraTime;
336
+	}
337
+      } else if (currentTime >= done_at) {
338
+  	simulsDone = true;
339
+  	reachDetection = true; 
299 340
       }
300
-    } else if (currentTime >= done_at) {
301
-	simulsDone = true;
302
-	reachDetection = true; 
341
+    } else if( (lastMaxDr >= detectionDrivers) &&
342
+	       (popSizeOverDDr >= minDetectDrvCloneSz) &&
343
+	       checkSizePNow  &&
344
+	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) ) {
345
+      simulsDone = true;
346
+      reachDetection = true; 
347
+    }
348
+  } else {
349
+    // The usual OR mechanism of each option
350
+    if(extraTime > 0) {
351
+      if(done_at <  0) {
352
+	if( (fixated) ||
353
+	    (totPopSize >= detectionSize) ||
354
+	    ( (lastMaxDr >= detectionDrivers) &&
355
+	      (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
356
+	    ( checkSizePNow  &&
357
+	      detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen))) {
358
+	  done_at = currentTime + extraTime;
359
+	}
360
+      } else if (currentTime >= done_at) {
361
+  	simulsDone = true;
362
+  	reachDetection = true; 
303 363
       }
304
-  } else if( (totPopSize >= detectionSize) ||
305
-	     ( (lastMaxDr >= detectionDrivers) &&
306
-	       (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
307
-	     ( checkSizePNow &&
308
-	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) {
309
-    simulsDone = true;
310
-    reachDetection = true; 
364
+    } else if( (fixated) ||
365
+	       (totPopSize >= detectionSize) ||
366
+	       ( (lastMaxDr >= detectionDrivers) &&
367
+		 (popSizeOverDDr >= minDetectDrvCloneSz) ) ||
368
+	       ( checkSizePNow  &&
369
+		 detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen)) ) {
370
+      simulsDone = true;
371
+      reachDetection = true; 
372
+    }
311 373
   }
374
+
375
+  
376
+  // if( checkSizePNow && (lastMaxDr >= detectionDrivers) &&
377
+  // 	       detectedSizeP(totPopSize, cPDetect, PDBaseline, ran_gen) )  {
378
+  //   simulsDone = true;
379
+  //   reachDetection = true; 
380
+  // }
381
+  
312 382
   
313 383
   if(totPopSize >= fatalPopSize) {
314 384
     Rcpp::Rcout << "\n\totPopSize > " << fatalPopSize
... ...
@@ -736,7 +806,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
736 806
 			const std::vector<int>& full2mutator,
737 807
 			const double& cPDetect,
738 808
 			const double& PDBaseline,
739
-			const double& checkSizePEvery) {
809
+			const double& checkSizePEvery,
810
+			const bool& AND_DrvProbExit,
811
+			const std::vector< std::vector<int> >& fixation_l) {
740 812
   
741 813
   double nextCheckSizeP = checkSizePEvery;
742 814
   const int numGenes = fitnessEffects.genomeSize;
... ...
@@ -1566,7 +1638,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1566 1638
 					 PDBaseline,
1567 1639
 					 checkSizePEvery,
1568 1640
 					 nextCheckSizeP,
1569
-					 ran_gen); //keepEvery is for thinning
1641
+					 ran_gen,
1642
+					 AND_DrvProbExit,
1643
+					 fixation_l); //keepEvery is for thinning
1570 1644
       if(verbosity >= 3) {
1571 1645
 	Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() 
1572 1646
 		  << "\n totPopSize after sampling " << totPopSize << "\n";
... ...
@@ -1649,7 +1723,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1649 1723
 			double p2,
1650 1724
 			double PDBaseline,
1651 1725
 			double cPDetect_i,
1652
-			double checkSizePEvery) {
1726
+			double checkSizePEvery,
1727
+			bool AND_DrvProbExit,
1728
+			Rcpp::List fixation_i) {
1653 1729
   // double cPDetect){
1654 1730
   // double n2,
1655 1731
   // double p2,
... ...
@@ -1717,6 +1793,13 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1717 1793
     DP2(muEF.genomeSize);
1718 1794
     throw std::logic_error("full2mutator 0 with mutatorEffects.genomesize != 0");
1719 1795
   }
1796
+
1797
+  // fixation: run until some genotype combinations fixed
1798
+  std::vector < std::vector<int> > fixation_l(fixation_i.size());
1799
+  if( fixation_i.size() != 0 ) {
1800
+    fixation_l = list_to_vector_of_int_vectors(fixation_i);
1801
+  }
1802
+
1720 1803
   
1721 1804
   bool runAgain = true;
1722 1805
   bool reachDetection = false;
... ...
@@ -1797,6 +1880,10 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1797 1880
   
1798 1881
   double currentTime = 0;
1799 1882
   int iter = 0;
1883
+
1884
+  // bool AND_DrvProbExit = ( (cpDetect >= 0) &&
1885
+  // 			     (detectionDrivers < 1e9) &&
1886
+  // 			     (detectionSize < std::numeric_limits<double>::infinity()));
1800 1887
   while(runAgain) {
1801 1888
 
1802 1889
     if(numRuns >= maxNumTries) {
... ...
@@ -1872,7 +1959,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1872 1959
 		  full2mutator,
1873 1960
 		  cPDetect,
1874 1961
 		  PDBaseline,
1875
-		  checkSizePEvery);
1962
+		  checkSizePEvery,
1963
+		  AND_DrvProbExit,
1964
+		  fixation_l);
1876 1965
       ++numRuns;
1877 1966
       forceRerun = false;
1878 1967
     } catch (rerunExcept &e) {
... ...
@@ -5,18 +5,20 @@
5 5
 
6 6
 #include <R_ext/Rdynload.h>
7 7
 
8
-SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaseline, SEXP cPDetect_i, SEXP checkSizePEvery);
8
+SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP);
9 9
 SEXP OncoSimulR_BNB_Algo5(SEXP restrictTableSEXP, SEXP numDriversSEXP, SEXP numGenesSEXP, SEXP typeCBN_SEXP,  SEXP sSEXP, SEXP deathSEXP, SEXP muSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutantSEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP,  SEXP shSEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP);
10 10
 SEXP OncoSimulR_evalRGenotype(SEXP rGSEXP, SEXP rFESEXP, SEXP verboseSEXP, SEXP prodNegSEXP, SEXP calledBy_SEXP);
11 11
 SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SEXP full2mutator_SEXP, SEXP verboseSEXP, SEXP prodNegSEXP);
12 12
 // SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP);
13
+SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP);
13 14
 
14 15
 // The number is the number of arguments
15 16
 R_CallMethodDef callMethods[]  = {
16
-  {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 35},
17
+  {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37},
17 18
   {"OncoSimulR_BNB_Algo5", (DL_FUNC) &OncoSimulR_BNB_Algo5, 32},
18 19
   {"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5},
19
-  {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6},  
20
+  {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6},
21
+  {"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4},
20 22
   //  {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2},
21 23
   {NULL, NULL, 0}
22 24
 };
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // nr_BNB_Algo5
9
-Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery);
10
-RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP) {
9
+Rcpp::List nr_BNB_Algo5(Rcpp::List rFE, Rcpp::NumericVector mu_, double death, double initSize, double sampleEvery, double detectionSize, double finalTime, int initSp, int initIt, double seed, int verbosity, int speciesFS, double ratioForce, Rcpp::CharacterVector typeFitness_, int maxram, int mutationPropGrowth, Rcpp::IntegerVector initMutant_, double maxWallTime, double keepEvery, double K, int detectionDrivers, bool onlyCancer, bool errorHitWallTime, int maxNumTries, bool errorHitMaxTries, double minDetectDrvCloneSz, double extraTime, bool keepPhylog, Rcpp::List MMUEF, Rcpp::IntegerVector full2mutator_, double n2, double p2, double PDBaseline, double cPDetect_i, double checkSizePEvery, bool AND_DrvProbExit, Rcpp::List fixation_list);
10
+RcppExport SEXP OncoSimulR_nr_BNB_Algo5(SEXP rFESEXP, SEXP mu_SEXP, SEXP deathSEXP, SEXP initSizeSEXP, SEXP sampleEverySEXP, SEXP detectionSizeSEXP, SEXP finalTimeSEXP, SEXP initSpSEXP, SEXP initItSEXP, SEXP seedSEXP, SEXP verbositySEXP, SEXP speciesFSSEXP, SEXP ratioForceSEXP, SEXP typeFitness_SEXP, SEXP maxramSEXP, SEXP mutationPropGrowthSEXP, SEXP initMutant_SEXP, SEXP maxWallTimeSEXP, SEXP keepEverySEXP, SEXP KSEXP, SEXP detectionDriversSEXP, SEXP onlyCancerSEXP, SEXP errorHitWallTimeSEXP, SEXP maxNumTriesSEXP, SEXP errorHitMaxTriesSEXP, SEXP minDetectDrvCloneSzSEXP, SEXP extraTimeSEXP, SEXP keepPhylogSEXP, SEXP MMUEFSEXP, SEXP full2mutator_SEXP, SEXP n2SEXP, SEXP p2SEXP, SEXP PDBaselineSEXP, SEXP cPDetect_iSEXP, SEXP checkSizePEverySEXP, SEXP AND_DrvProbExitSEXP, SEXP fixation_listSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject __result;
13 13
     Rcpp::RNGScope __rngScope;
... ...
@@ -46,7 +46,9 @@ BEGIN_RCPP
46 46
     Rcpp::traits::input_parameter< double >::type PDBaseline(PDBaselineSEXP);
47 47
     Rcpp::traits::input_parameter< double >::type cPDetect_i(cPDetect_iSEXP);
48 48
     Rcpp::traits::input_parameter< double >::type checkSizePEvery(checkSizePEverySEXP);
49
-    __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery));
49
+    Rcpp::traits::input_parameter< bool >::type AND_DrvProbExit(AND_DrvProbExitSEXP);
50
+    Rcpp::traits::input_parameter< Rcpp::List >::type fixation_list(fixation_listSEXP);
51
+    __result = Rcpp::wrap(nr_BNB_Algo5(rFE, mu_, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_, n2, p2, PDBaseline, cPDetect_i, checkSizePEvery, AND_DrvProbExit, fixation_list));
50 52
     return __result;
51 53
 END_RCPP
52 54
 }
... ...
@@ -123,6 +125,24 @@ BEGIN_RCPP
123 125
     return __result;
124 126
 END_RCPP
125 127
 }
128
+
129
+// evalRGenotypeAndMut
130
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th);
131
+RcppExport SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) {
132
+BEGIN_RCPP
133
+    Rcpp::RObject __result;
134
+// Rcpp::RNGScope __rngScope;
135
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP);
136
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP);
137
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP);
138
+ Rcpp::traits::input_parameter< double >::type th(thSEXP);
139
+ __result = Rcpp::wrap(accessibleGenotypes(y, f, numMut, th));
140
+ return __result;
141
+ END_RCPP
142
+}
143
+
144
+
145
+
126 146
 // // readFitnessEffects
127 147
 // void readFitnessEffects(Rcpp::List rFE, bool echo);
128 148
 // RcppExport SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP) {
129 149
new file mode 100644
... ...
@@ -0,0 +1,172 @@
1
+//     Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2
+
3
+//     This program is free software: you can redistribute it and/or modify
4
+//     it under the terms of the GNU General Public License as published by
5
+//     the Free Software Foundation, either version 3 of the License, or
6
+//     (at your option) any later version.
7
+
8
+//     This program is distributed in the hope that it will be useful,
9
+//     but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+//     GNU General Public License for more details.
12
+
13
+//     You should have received a copy of the GNU General Public License
14
+//     along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+#include <Rcpp.h>
17
+// using namespace Rcpp;
18
+
19
+inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVector& y) {
20
+  Rcpp::NumericVector diff = Rcpp::abs( x - y );
21
+  return std::accumulate(diff.begin(), diff.end(), 0);
22
+}
23
+
24
+
25
+
26
+// [[Rcpp::export]]
27
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
28
+					Rcpp::NumericVector f,
29
+					Rcpp::IntegerVector numMut, //
30
+					double th) {
31
+  // Return just the indices. Could preserve fitness, but would need
32
+  // another matrix.
33
+  int ng = y.nrow(); //it counts the wt
34
+  Rcpp::IntegerMatrix adm(ng, ng);
35
+  int numMutdiff = 0;
36
+  // I would have thought this would be faster. It ain't.
37
+  // The last genotype never accesses anything.
38
+  // for(int i = 0; i < (ng - 1); ++i) {
39
+  //   // Candidate genotypes to be accessed from i are always of larger
40
+  //   // mutation by 1. And candidates can thus not have smaller index
41
+  //   for(int j = (i + 1); j < ng; ++j) {
42
+  //     if( (numMut(j) == (numMut(i) + 1)) &&
43
+  // 	  ( (f(j) - f(i)) >= th) &&
44
+  // 	  (HammingDistance(y(i, _), y(j, _)) == 1) ) {
45
+  // 	adm(i, j) = 1;
46
+  //     } else if( (numMut(j) > (numMut(i) + 1)) ) {
47
+  // 	break;
48
+  //     }
49
+  //   }
50
+  // }
51
+
52
+  // The last genotype never accesses anything.
53
+  for(int i = 0; i < (ng - 1); ++i) {
54
+    // Candidate genotypes to be accessed from i are always of larger
55
+    // mutation by 1. And candidates can thus not have smaller index
56
+    for(int j = (i + 1); j < ng; ++j) {
57
+      numMutdiff = numMut(j) - numMut(i);
58
+      if( numMutdiff > 1) { // no more to search
59
+  	break; 
60
+      } else if(numMutdiff == 1) {
61
+  	// f(j) - f(i) is faster than HammingDistance
62
+  	// but might lead to more evals?
63
+  	// or fewer, depending on landscape
64
+  	if( ( (f(j) - f(i)) >= th) &&