R/OncoSimulR.R
b137a4a3
 ## Copyright 2013, 2014, 2015, 2016, 2017 Ramon Diaz-Uriarte
2f97de14
 
 ## This program is free software: you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
 ## the Free Software Foundation, either version 3 of the License, or
 ## (at your option) any later version.
 
 ## This program is distributed in the hope that it will be useful,
 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ## GNU General Public License for more details.
 
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 
5d9775a3
 ## Does it even make sense to keepPhylog with oncoSimulSample?
 ## Yes, but think what we store.
e693f153
 
 
 oncoSimulSample <- function(Nindiv,
                             fp,
                             model = "Exp",
                             numPassengers = 0,
                             mu = 1e-6,
50a94207
                             muEF = NULL,
e693f153
                             detectionSize = round(runif(Nindiv, 1e5, 1e8)),
                             detectionDrivers = {
                                 if(inherits(fp, "fitnessEffects")) {
                                     if(length(fp$drv)) {
                                         nd <- (2: round(0.75 * length(fp$drv)))
                                     } else {
1b1e5e0c
                                         nd <- 9e6 
e693f153
                                     }
                                 } else {
                                     nd <- (2 : round(0.75 * max(fp)))
                                 }
e6e95f54
                                 if(length(nd) == 1) ## for sample
                                        nd <- c(nd, nd)
e693f153
                                 sample(nd, Nindiv,
                                        replace = TRUE)
                             },
4d70b942
                             detectionProb = NA,
e693f153
                             sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
4d70b942
                                                  0.025),
e693f153
                             initSize = 500,
                             s = 0.1,
                             sh = -1,
                             K = initSize/(exp(1) - 1),
                             minDetectDrvCloneSz = "auto",
                             extraTime = 0,
                             finalTime = 0.25 * 25 * 365,
                             onlyCancer = TRUE,
debcf5f5
                             keepPhylog = FALSE,
                             mutationPropGrowth = ifelse(model == "Bozic",
                                                         FALSE, TRUE),
e693f153
                             max.memory = 2000,
                             max.wall.time.total = 600,
                             max.num.tries.total = 500 * Nindiv,
                             ## well, obviously they are errors
                             ## errorHitWallTime = TRUE,
                             ## errorHitMaxTries = TRUE,
                             typeSample = "whole",
5d9775a3
                             thresholdWhole = 0.5,
0f3a3188
                             initMutant = NULL,
c95df82d
                             AND_DrvProbExit = FALSE,
                             fixation = NULL,
0f3a3188
                             verbosity  = 1,
4d70b942
                             showProgress = FALSE,
5d9775a3
                             seed = "auto"){
e693f153
     ## No longer using mclapply, because of the way we use the limit on
     ## the number of tries.
     
     ## leaving detectionSize and detectionDrivers as they are, produces
     ## the equivalente of uniform sampling. For last, fix a single number
 
1b1e5e0c
     ## detectionDrivers when there are none: had we left it at 0, then
     ## when there are no drivers we would stop at the first sampling
     ## period.
     
     if(Nindiv < 1)
         stop("Nindiv must be >= 1")
5d9775a3
     if(keepPhylog)
         warning(paste("oncoSimulSample does not return the phylogeny",
                       "for now, so there is little point in storing it."))
e6e95f54
 
e693f153
     if(max.num.tries.total < Nindiv)
         stop(paste("You have requested something impossible: ",
                    "max.num.tries.total < Nindiv"))
     attemptsLeft <- max.num.tries.total
     attemptsUsed <- 0
     numToRun <- Nindiv
     pop <- vector(mode = "list", length = Nindiv)
     indiv <- 1
     ## FIXME! really pass params such as extraTime and minDDr as vectors,
     ## or give a warning
     params <- data.frame(seq.int(Nindiv),
                          extraTime = extraTime,
                          minDetectDrvCloneSz = minDetectDrvCloneSz,
                          detectionSize = detectionSize,
                          detectionDrivers = detectionDrivers)[, -1, drop = FALSE]
 
e6e95f54
     ## FIXME: we are not triggering an error, just a message. This is on
     ## purpose, since some of these conditions DO provide useful
     ## output. Do we want these to be errors?
e693f153
     f.out.attempts <- function() {
         message("Run out of attempts")
         return(list(
             popSummary = NA,
             popSample = NA,
             HittedMaxTries = TRUE,
e6e95f54
             HittedWallTime = FALSE,
e693f153
             UnrecoverExcept = FALSE
         ))    
     }    
 
     f.out.time <- function() {
         message("Run out of time")
         return(list(
             popSummary = NA,
             popSample = NA,
             HittedMaxTries = FALSE,
             HittedWallTime = TRUE,
             UnrecoverExcept = FALSE
         ))    
     }    
 
     f.out.attempts.cpp <- function() {
         message("Run out of attempts (in C++)")
         return(list(
             popSummary = NA,
             popSample = NA,
             HittedMaxTries = TRUE,
e6e95f54
             HittedWallTime = FALSE,
e693f153
             UnrecoverExcept = FALSE
         ))    
     }    
 
     f.out.time.cpp <- function() {
         message("Run out of time (in C++)")
         return(list(
             popSummary = NA,
             popSample = NA,
             HittedMaxTries = FALSE,
             HittedWallTime = TRUE,
             UnrecoverExcept = FALSE
         ))    
     }    
 
     f.out.unrecover.except <- function(x) {
         message("Unrecoverable exception (in C++)")
         return(list(
             popSummary = NA,
             popSample = NA,
             HittedMaxTries = NA,
             HittedWallTime = NA,
             UnrecoverExcept = TRUE,
             ExceptionMessage = x$other$ExceptionMessage
         ))    
     }    
    
     
     startTime <- Sys.time()
     while(TRUE) {
         
         possibleAttempts <- attemptsLeft - (numToRun - 1)
         ## I think I do not want a try here.
         tmp <-  oncoSimulIndiv(fp = fp,
                                model = model,
                                numPassengers = numPassengers,
                                mu = mu,
50a94207
                                muEF = muEF,
e693f153
                                detectionSize = params[indiv, "detectionSize"],
                                detectionDrivers = params[indiv, "detectionDrivers"],
                                sampleEvery = sampleEvery,
                                initSize = initSize,
                                s = s,
                                sh = sh,
                                K = K,
                                minDetectDrvCloneSz = params[indiv, "minDetectDrvCloneSz"],
                                extraTime = params[indiv, "extraTime"],
                                finalTime = finalTime,
                                max.memory = max.memory,
                                max.wall.time = max.wall.time.total,
                                max.num.tries = possibleAttempts,
                                verbosity = verbosity,
                                keepEvery = -9,
                                onlyCancer = onlyCancer,
                                errorHitWallTime = TRUE,
5d9775a3
                                errorHitMaxTries = TRUE,
                                seed = seed,
0f3a3188
                                initMutant = initMutant,
debcf5f5
                                keepPhylog = keepPhylog,
4d70b942
                                mutationPropGrowth = mutationPropGrowth,
c95df82d
                                detectionProb = detectionProb,
                                AND_DrvProbExit = AND_DrvProbExit,
                                fixation = fixation)        
e693f153
         if(tmp$other$UnrecoverExcept) {
             return(f.out.unrecover.except(tmp))
         }
         
         pop[[indiv]] <- tmp
         numToRun <- (numToRun - 1)
         attemptsUsed <- attemptsUsed + tmp$other$attemptsUsed
         attemptsLeft <- (max.num.tries.total - attemptsUsed)
50a94207
         if(showProgress) {
             cat("......  Done individual ", indiv,
                 ". Used ", attemptsUsed, "attempts.",
                 ". Running for ", as.double(difftime(Sys.time(),
                                                     startTime, units = "secs")),
                 " secs.\n"
                 )
         }
e693f153
         indiv <- indiv + 1
         
         ## We need to check in exactly this order. Attempts left only
         ## matters if no remaining individuals to run. But C++ might bail
         ## out in exactly the last individual
 
         if(  
             (exists("HittedMaxTries", where = tmp) &&
                  tmp[["HittedMaxTries"]])  ) {
             ## in C++ code
             return(f.out.attempts.cpp())
         } else if(  
             (exists("HittedWallTime", where = tmp) &&
                  tmp[["HittedWallTime"]])  ) {
             ## in C++ code
             return(f.out.time.cpp())
         } else if( indiv > Nindiv ) {
             if(verbosity > 0)
e6e95f54
                 message(paste0("Successfully sampled ", Nindiv, " individuals"))
e693f153
             class(pop) <- "oncosimulpop"
             if(inherits(fp, "fitnessEffects")) {
                 geneNames <- names(getNamesID(fp))
             } else {
                 geneNames <- NULL
             }
             return(list(
                 popSummary = summary(pop),
0f3a3188
                 popSample = samplePop(pop, timeSample = "last",
                                       typeSample = typeSample,
                                       thresholdWhole = thresholdWhole,
                                       geneNames = geneNames),
e693f153
                 attemptsUsed = attemptsUsed,
                 probCancer = Nindiv/attemptsUsed,
                 HittedMaxTries = FALSE,
                 HittedWallTime = FALSE,
                 UnrecoverExcept = FALSE
             ))
         } else if( attemptsLeft <= 0 ) {
e6e95f54
               ## it is very unlikely this will ever happen. 
e693f153
             return(f.out.attempts())
         } else  if( as.double(difftime(Sys.time(), startTime, units = "secs"))
                    > max.wall.time.total ) {
             return(f.out.time())
         } 
     }
 }
 
fc38a875
 add_noise <- function(x, properr) {
     if(properr <= 0) {
         return(x)
     }
     else {
         if(properr > 1)
             stop("Proportion with error cannot be > 1")
         nn <- prod(dim(x))
         flipped <- sample(nn, round(nn * properr))
         x[flipped] <- as.integer(!x[flipped])
         return(x)
     }
 }
e693f153
 
50a94207
 samplePop <- function(x, timeSample = "last",
                       typeSample = "whole",
e693f153
                       thresholdWhole = 0.5,
50a94207
                       geneNames = NULL,
fc38a875
                       popSizeSample = NULL,
                       propError = 0) {
50a94207
     ## timeSample <- match.arg(timeSample)
e693f153
     gN <- geneNames
50a94207
     
     if(!is.null(popSizeSample) && (length(popSizeSample) > 1) &&
        (length(popSizeSample) != length(x))) {
         message("length popSizeSample != number of subjects")
         popSizeSample <- rep(popSizeSample, length.out = length(x))
         }
     ## A hack to prevent Map from crashing with a NULL
     if(is.null(popSizeSample)) popSizeSample <- -99
e693f153
     if(inherits(x, "oncosimulpop")) {
2f97de14
         z <- do.call(rbind,
50a94207
                      Map(get.mut.vector,
                          x = x,
                          timeSample = timeSample,
                          typeSample = typeSample,
                          thresholdWhole = thresholdWhole,
                          popSizeSample = popSizeSample
                          ))
e693f153
         ## We need to check if the object is coming from v.2., to avoid
         ## having to force passing a vector of names
         if(is.null(gN) && (!is.null(x[[1]]$geneNames)))
             gN <- x[[1]]$geneNames
     } else {
2f97de14
         z <- get.mut.vector(x,
                             timeSample = timeSample,
                             typeSample = typeSample,
50a94207
                             thresholdWhole = thresholdWhole,
                             popSizeSample = popSizeSample)
2f97de14
         dim(z) <- c(1, length(z))
e693f153
         if(is.null(gN) && (!is.null(x$geneNames)))
c95df82d
             gN <- x$geneNames
2f97de14
     }
e693f153
     message("\n Subjects by Genes matrix of ",
2f97de14
         nrow(z), " subjects and ",
e693f153
             ncol(z), " genes.\n")
 
fc38a875
     if(propError > 0) {
         z <- add_noise(z, propError)
     }
e693f153
     if(!is.null(gN)) {
         colnames(z) <- gN
     } else {
         colnames(z) <- seq_len(ncol(z))
     }
2f97de14
     return(z)
 }
 
 
 oncoSimulPop <- function(Nindiv,
e693f153
                          fp,
                          model = "Exp",
1b1e5e0c
                          numPassengers = 0,
2f97de14
                          mu = 1e-6,
50a94207
                          muEF = NULL,
2f97de14
                          detectionSize = 1e8,
                          detectionDrivers = 4,
4d70b942
                          detectionProb = NA,
502e447e
                          sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
4d70b942
                                               0.025),
2f97de14
                          initSize = 500,
                          s = 0.1,
                          sh = -1,
                          K = initSize/(exp(1) - 1),
e693f153
                          keepEvery = sampleEvery, 
                          minDetectDrvCloneSz = "auto",
                          extraTime = 0,
                          ## used to be this
                          ## ifelse(model \%in\% c("Bozic", "Exp"), -9,
                          ##                       5 * sampleEvery),
2f97de14
                          finalTime = 0.25 * 25 * 365,
                          onlyCancer = TRUE,
5d9775a3
                          keepPhylog = FALSE,
debcf5f5
                          mutationPropGrowth = ifelse(model == "Bozic",
                                                      FALSE, TRUE),
2f97de14
                          max.memory = 2000,
                          max.wall.time = 200,
e693f153
                          max.num.tries = 500,
                          errorHitWallTime = TRUE,
                          errorHitMaxTries = TRUE,
0f3a3188
                          initMutant = NULL,
c95df82d
                          AND_DrvProbExit = FALSE,
                          fixation = NULL,
2f97de14
                          verbosity  = 0,
5d9775a3
                          mc.cores = detectCores(),
                          seed = "auto") {
2f97de14
 
1b1e5e0c
     if(Nindiv < 1)
         stop("Nindiv must be >= 1")
     
2f97de14
     if(.Platform$OS.type == "windows") {
         if(mc.cores != 1)
             message("You are running Windows. Setting mc.cores = 1")
         mc.cores <- 1
     }
     pop <- mclapply(seq.int(Nindiv),
                     function(x)
                     oncoSimulIndiv(
e693f153
                         fp = fp,
2f97de14
                         model = model,
                         numPassengers = numPassengers,
                         mu = mu,
50a94207
                         muEF = muEF,
2f97de14
                         detectionSize = detectionSize,
                         detectionDrivers = detectionDrivers,
                         sampleEvery = sampleEvery,
                         initSize = initSize,
                         s = s,
                         sh = sh,
                         K = K,
                         keepEvery = keepEvery,
e693f153
                         minDetectDrvCloneSz = minDetectDrvCloneSz,
                         extraTime = extraTime,
2f97de14
                         finalTime = finalTime,
                         onlyCancer = onlyCancer,
                         max.memory = max.memory,
                         max.wall.time = max.wall.time,
e693f153
                         max.num.tries = max.num.tries,
                         errorHitWallTime = errorHitWallTime,
                         errorHitMaxTries = errorHitMaxTries,
5d9775a3
                         verbosity = verbosity,
0f3a3188
                         seed = seed, keepPhylog = keepPhylog,
debcf5f5
                         initMutant = initMutant,
4d70b942
                         mutationPropGrowth = mutationPropGrowth,
c95df82d
                         detectionProb = detectionProb,
                         AND_DrvProbExit = AND_DrvProbExit,
                         fixation = fixation),
2f97de14
                     mc.cores = mc.cores
                     )
     class(pop) <- "oncosimulpop"
     attributes(pop)$call <- match.call()
     return(pop)
 }
 
 ## where is the default K coming from? Here:
 ## log( (K+N)/K  ) = 1; k + n = k * exp(1); k(exp - 1) = n; k = n/(exp - 1)
 
e693f153
 oncoSimulIndiv <- function(fp,
                            model = "Exp",
1b1e5e0c
                            numPassengers = 0,
2f97de14
                            mu = 1e-6,
50a94207
                            muEF = NULL,
2f97de14
                            detectionSize = 1e8,
                            detectionDrivers = 4,
4d70b942
                            detectionProb = NA,
502e447e
                            sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
4d70b942
                                                 0.025),
2f97de14
                            initSize = 500,
                            s = 0.1,
                            sh = -1,
                            K = initSize/(exp(1) - 1),
                            keepEvery = sampleEvery,
e693f153
                            minDetectDrvCloneSz = "auto",
                            extraTime = 0,
                            ## used to be this
                            ## ifelse(model \%in\% c("Bozic", "Exp"), -9,
                            ##                     5 * sampleEvery),
2f97de14
                            finalTime = 0.25 * 25 * 365,
                            onlyCancer = TRUE,
5d9775a3
                            keepPhylog = FALSE,
debcf5f5
                            mutationPropGrowth = ifelse(model == "Bozic",
                                                        FALSE, TRUE),
2f97de14
                            max.memory = 2000,
                            max.wall.time = 200,
e693f153
                            max.num.tries = 500,
                            errorHitWallTime = TRUE,
                            errorHitMaxTries = TRUE,
                            verbosity = 0,
                            initMutant = NULL,
c95df82d
                            AND_DrvProbExit = FALSE,
                            fixation = NULL,
e693f153
                            seed = NULL
2f97de14
                            ) {
     call <- match.call()
c95df82d
     if(all(c(is_null_na(detectionProb),
              is_null_na(detectionSize),
              is_null_na(detectionDrivers),
              is_null_na(finalTime),
              is_null_na(fixation)
              )))
4d70b942
         stop("At least one stopping condition should be given.",
              " At least one of detectionProb, detectionSize, detectionDrivers,",
c95df82d
              " finalTime. Otherwise, we'll run until aborted by max.wall.time,",
              " max.num.tries, and the like.")
 
     if(AND_DrvProbExit && (is_null_na(detectionProb) || is_null_na(detectionDrivers)))
         stop("AND_DrvProbExit is TRUE: both of detectionProb and detectionDrivers",
              " must be non NA.")
     if(AND_DrvProbExit && !is_null_na(detectionSize)) {
         warning("With AND_DrvProbExit = TRUE, detectionSize is ignored.")
         detectionSize <- NA
     }
     if(inherits(fp, "fitnessEffects")) {
         s <- sh <- NULL ## force it soon!
     }
 
2f97de14
     ## legacies from poor name choices
     typeFitness <- switch(model,
                           "Bozic" = "bozic1",
                           "Exp" = "exp",
                           "McFarlandLog" = "mcfarlandlog",
                           "McFL" = "mcfarlandlog",
                           stop("No valid value for model")
                           )
1b1e5e0c
     if(initSize < 1)
         stop("initSize < 1")
     
     if( (K < 1) && (model %in% c("McFL", "McFarlandLog") )) {
         stop("Using McFarland's model: K cannot be < 1")
     }       ##  if ( !(model %in% c("McFL", "McFarlandLog") )) {
             ## K <- 1 ## K is ONLY used for McFarland; set it to 1, to avoid
             ##        ## C++ blowing.
2f97de14
 
     if(typeFitness == "exp") {
         death <- 1
debcf5f5
         ## mutationPropGrowth <- 1
2f97de14
     } else {
         death <- -99
debcf5f5
         ## mutationPropGrowth <- 0
2f97de14
     }
     birth <- -99
 
     if( (typeFitness == "mcfarlandlog") &&
        (sampleEvery > 0.05)) {
         warning("With the McFarland model you often want smaller sampleEvery")
     }
 
e693f153
     if(minDetectDrvCloneSz == "auto") {
         if(model %in% c("Bozic", "Exp") )
             minDetectDrvCloneSz <- 0
         else if (model %in% c("McFL", "McFarlandLog"))
c95df82d
             minDetectDrvCloneSz <- initSize
         ## minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
e693f153
         else
             stop("Unknown model")
     }
 
50a94207
     if( (length(mu) > 1) && !inherits(fp, "fitnessEffects"))
         stop("Per-gene mutation rates cannot be used with the old poset format")
 
1b1e5e0c
     if(any(mu < 0)) {
50a94207
         stop("(at least one) mutation rate (mu) is negative")
e693f153
     }
e6e95f54
     ## We do not test for equality to 0. That might be a weird but
     ## legitimate case?
e693f153
 
4d70b942
     ## No user-visible magic numbers
     ## if(is.null(keepEvery))
     ##     keepEvery <- -9
c95df82d
     if(is_null_na(keepEvery)) keepEvery <- -9
4d70b942
 
     
e693f153
     if( (keepEvery > 0) & (keepEvery < sampleEvery)) {
         keepEvery <- sampleEvery
         warning("setting keepEvery <- sampleEvery")
     }
 
debcf5f5
     if( (typeFitness == "bozic1") && (mutationPropGrowth) )
         warning("Using fitness Bozic (bozic1) with mutationPropGrowth = TRUE;",
e693f153
                 "this will have no effect.")
 
     if( (typeFitness == "exp") && (death != 1) )
         warning("Using fitness exp with death != 1")
2f97de14
 
c95df82d
     if(!is_null_na(detectionDrivers) && (detectionDrivers >= 1e9))
         stop("detectionDrivers > 1e9; this doesn't seem reasonable")
     if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1
     if(is_null_na(detectionSize)) detectionSize <- Inf
     if(is_null_na(finalTime)) finalTime <- Inf
fc38a875
 
     if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA")
2f97de14
     
e693f153
     if(!inherits(fp, "fitnessEffects")) {
         if(any(unlist(lapply(list(fp, 
                                   numPassengers,
                                   s, sh), is.null)))) {
             m <- paste("You are using the old poset format.",
                        "You must specify all of poset, numPassengers",
                        "s, and sh.")
             stop(m)
5d9775a3
            
         }
c95df82d
         if(AND_DrvProbExit) {
             stop("The AND_DrvProbExit = TRUE setting is invalid",
                  " with the old poset format.")
         }
50a94207
         if(!is.null(muEF))
c95df82d
             stop("Mutator effects cannot be specified with the old poset format.")
4d70b942
         if( length(initMutant) > 0)  
             warning("With the old poset format you can no longer use initMutant.",
                     " The initMutant you passed will be ignored.")
c95df82d
         ## stop("With the old poset, initMutant can only take a single value.")
         if(!is_null_na(fixation))
             stop("'fixation' cannot be specified with the old poset format.")
5d9775a3
         ## Seeding C++ is now much better in new version
         if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
             ## name is a legacy. This is really the seed for the C++ generator.
             ## Nope, we cannot use 2^32, because as.integer will fail.
             seed <- as.integer(round(runif(1, min = 0, max = 2^16)))
e693f153
         }
5d9775a3
         if(verbosity >= 2)
             cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
 
c95df82d
         if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
e693f153
         ## if(message.v1)
         ##     message("You are using the old poset format. Consider using the new one.")
    
     
         ## A simulation stops if cancer or finalTime appear, the first
         ## one. But if we set onlyCnacer = FALSE, we also accept simuls
         ## without cancer (or without anything)
         
         op <- try(oncoSimul.internal(poset = fp, ## restrict.table = rt,
                                      ## numGenes = numGenes,
                                      numPassengers = numPassengers,
                                      typeCBN = "CBN",
2f97de14
                                      birth = birth,
                                      s = s,
                                      death = death,  
                                      mu =  mu,  
                                      initSize =  initSize, 
                                      sampleEvery =  sampleEvery,  
                                      detectionSize =  detectionSize, 
                                      finalTime = finalTime, 
                                      initSize_species = 2000, 
                                      initSize_iter = 500, 
e693f153
                                      seed = seed, 
2f97de14
                                      verbosity = verbosity, 
4d70b942
                                      speciesFS = 10000,  
e693f153
                                      ratioForce = 2,
                                      typeFitness = typeFitness,
                                      max.memory = max.memory,
debcf5f5
                                      mutationPropGrowth = mutationPropGrowth,                                   
e693f153
                                      initMutant = -1, 
                                      max.wall.time = max.wall.time,
                                      max.num.tries = max.num.tries,
2f97de14
                                      keepEvery = keepEvery,  
50a94207
                                      ## alpha = 0.0015,  
e693f153
                                      sh = sh,
2f97de14
                                      K = K, 
e693f153
                                      minDetectDrvCloneSz = minDetectDrvCloneSz,
                                      extraTime = extraTime,
                                      detectionDrivers = detectionDrivers,
                                      onlyCancer = onlyCancer,
                                      errorHitWallTime = errorHitWallTime,
                                      errorHitMaxTries = errorHitMaxTries),
2f97de14
                   silent = !verbosity)
e693f153
         objClass <- "oncosimul"
     } else {
c95df82d
         s <- sh <- NULL ## force it.
1b1e5e0c
         if(numPassengers != 0)
             warning(paste("Specifying numPassengers has no effect",
                           " when using fitnessEffects objects. ",
                           " The fitnessEffects objects are much more ",
                           "flexible and you can use, for example,",
                           "the noIntGenes component for passengers."))
5d9775a3
         if(is.null(seed)) {## Passing a null creates a random seed
             ## We use a double, to be able to pass in range > 2^16.
             ## Do not use 0, as that is our way of signaling to C++ to
             ## generate the seed.
             seed <- round(runif(1, min = 1, max = 2^32))
             if(verbosity >= 2)
                 cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
         } else if(seed == "auto") {
             seed <- 0.0
             if(verbosity >= 2)
                 cat("\n A (high quality) random seed will be generated in C++\n")
         }
c95df82d
         if(!is_null_na(fixation)) {
             if( (!is.list(fixation)) && (!is.vector(fixation))  )
                 stop("'fixation' must be a list or a vector.")
             if(!(all(unlist(lapply(fixation, is.vector)))))
                 stop("Each element of 'fixation' must be a single element character vector.")
             if(!(all(unlist(lapply(fixation, class)) == "character")))
                 stop("Each element of 'fixation' must be a single element character vector.")
             if(!(all( unlist(lapply(fixation, length)) == 1)))
                 stop("Each element of 'fixation' must be a single element character vector.")
             if(AND_DrvProbExit)
                 stop("It makes no sense to pass AND_DrvProbExit and a fixation list.")
         }
e693f153
         op <- try(nr_oncoSimul.internal(rFE = fp, 
                                         birth = birth,
                                         death = death,  
                                         mu =  mu,  
                                         initSize =  initSize, 
                                         sampleEvery =  sampleEvery,  
                                         detectionSize =  detectionSize, 
                                         finalTime = finalTime, 
                                         initSize_species = 2000, 
                                         initSize_iter = 500, 
                                         seed = seed, 
                                         verbosity = verbosity, 
4d70b942
                                         speciesFS = 10000,  
e693f153
                                         ratioForce = 2,
                                         typeFitness = typeFitness,
                                         max.memory = max.memory,
debcf5f5
                                         mutationPropGrowth = mutationPropGrowth,                                   
e693f153
                                         initMutant = initMutant, 
                                         max.wall.time = max.wall.time,
                                         max.num.tries = max.num.tries,
                                         keepEvery = keepEvery,  
50a94207
                                         ## alpha = 0.0015,  
e693f153
                                         K = K, 
                                         minDetectDrvCloneSz = minDetectDrvCloneSz,
                                         extraTime = extraTime,
                                         detectionDrivers = detectionDrivers,
                                         onlyCancer = onlyCancer,
                                         errorHitWallTime = errorHitWallTime,
5d9775a3
                                         errorHitMaxTries = errorHitMaxTries,
50a94207
                                         keepPhylog = keepPhylog,
4d70b942
                                         MMUEF = muEF,
c95df82d
                                         detectionProb = detectionProb,
                                         AND_DrvProbExit = AND_DrvProbExit,
                                         fixation = fixation),
e693f153
                   silent = !verbosity)
         objClass <- c("oncosimul", "oncosimul2")
     }
     if(inherits(op, "try-error")) {
         ##         if(length(grep("BAIL OUT NOW", op)))
         stop(paste("Unrecoverable error:", op ))
     }
     if(verbosity >= 2) {
         cat("\n ... finished this run:")
         cat("\n       Total Pop Size = ", op$TotalPopSize)
         cat("\n       Drivers Last = ", op$MaxDriversLast)
         cat("\n       Final Time = ", op$FinalTime, "\n")
2f97de14
     }
e693f153
 
     class(op) <- objClass
2f97de14
     attributes(op)$call <- call
     return(op)
 }
 
 summary.oncosimul <- function(object, ...) {
fc38a875
     ## This should be present even in HittedWallTime and HittedMaxTries
     ## if those are not regarded as errors
     pbp <- ("pops.by.time" %in% names(object) )
e6e95f54
     
fc38a875
     if(object$other$UnrecoverExcept) { ## yes, when bailing out from
e6e95f54
                                      ## except. can have just minimal
                                      ## content
         return(NA)
fc38a875
     } else if( !pbp & (object$HittedWallTime || object$HittedMaxTries) ) {
e693f153
         return(NA)
fc38a875
     } else if ( !pbp & !(object$HittedWallTime || object$HittedMaxTries) ) {
         stop("Eh? No pops.by.time but did not hit wall time or max tries? BUG!")
     } else {
e693f153
         tmp <- object[c("NumClones", "TotalPopSize", "LargestClone",
                         "MaxNumDrivers", "MaxDriversLast",
                         "NumDriversLargestPop", "TotalPresentDrivers",
fc38a875
                         "FinalTime", "NumIter", "HittedWallTime",
                         "HittedMaxTries")]
e693f153
  
         tmp$errorMF <- object$other$errorMF
         tmp$minDMratio <- object$other$minDMratio
         tmp$minBMratio <- object$other$minBMratio
         if( (tmp$errorMF == -99)) tmp$errorMF <- NA
         if( (tmp$minDMratio == -99)) tmp$minDMratio <- NA
         if( (tmp$minBMratio == -99)) tmp$minBMratio <- NA
         tmp$OccurringDrivers <- object$OccurringDrivers
c95df82d
         return(as.data.frame(tmp, stringsAsFactors = FALSE))
e693f153
     }
2f97de14
 }
 
e693f153
 
 
 
2f97de14
 print.oncosimul <- function(x, ...) {
     cat("\nIndividual OncoSimul trajectory with call:\n ")
     print(attributes(x)$call)
     cat("\n")
     print(summary(x))
e693f153
 
     if(inherits(x, "oncosimul2")) {
         ## we know small object
         cat("\n")
         cat("Final population composition:\n")
         df <- data.frame(Genotype = x$GenotypesLabels,
                          N = x$pops.by.time[nrow(x$pops.by.time), -1])
         print(df)
     }
2f97de14
 }
 
 ## I want this to return things that are storable
c95df82d
 ## summary.oncosimulpop <- function(object, ...) {
 ##     as.data.frame(rbindlist(lapply(object, summary)))
 ## }
 
2f97de14
 summary.oncosimulpop <- function(object, ...) {
c95df82d
     tmp <- lapply(object, summary)
     rm <- which(unlist(lapply(tmp, function(x) (length(x) == 1) && (is.na(x)))))
     if(length(rm) > 0)
         if(length(rm) < length(object)) {
         warning("Some simulations seem to have failed and will be removed",
                 " from the summary. The failed runs are ",
                 paste(rm, collapse = ", "),
                 ".")
         tmp <- tmp[-rm]
         } else {
             warning("All simulations failed.")
             return(NA)
         }
     as.data.frame(rbindlist(tmp))
2f97de14
 }
 
c95df82d
 
 
2f97de14
 print.oncosimulpop <- function(x, ...) {
e6e95f54
     cat("\nPopulation of OncoSimul trajectories of",
         length(x), "individuals. Call :\n")
2f97de14
     print(attributes(x)$call)
     cat("\n")
     print(summary(x))
 }
 
 
 plot.oncosimulpop <- function(x, ask = TRUE,
debcf5f5
                               show = "drivers", 
                               type = ifelse(show == "genotypes",
                                             "stacked", "line"),
50a94207
                               col = "auto",
debcf5f5
                               log = ifelse(type == "line", "y", ""),
2f97de14
                               ltyClone = 2:6,
e693f153
                               lwdClone = 0.9,
2f97de14
                               ltyDrivers = 1,
                               lwdDrivers = 3,
                               xlab = "Time units",
                               ylab = "Number of cells",
                               plotClones = TRUE,
                               plotDrivers = TRUE,
                               addtot = FALSE,
                               addtotlwd = 0.5,
debcf5f5
                               ylim = NULL,
                               xlim = NULL,
2f97de14
                               thinData = FALSE,
                               thinData.keep = 0.1,
                               thinData.min = 2,
5d9775a3
                               plotDiversity = FALSE,
debcf5f5
                               order.method = "as.is",
                               stream.center = TRUE,
                               stream.frac.rand = 0.01,
                               stream.spar = 0.2,
                               border = NULL,
                               lwdStackedStream = 1,
                               srange = c(0.4, 1),
                               vrange = c(0.8, 1),
                               breakSortColors = "oe",
                               legend.ncols = "auto",
2f97de14
                               ...
                               ) {
     op <- par(ask = ask)
     on.exit(par(op))
     null <- lapply(x, function(z)
debcf5f5
         plot.oncosimul(z,
                        show = show,
                        type = type,
                        col = col,
                        log = log,
                        ltyClone = ltyClone,
                        lwdClone = lwdClone,
                        ltyDrivers = ltyDrivers,
                        lwdDrivers = lwdDrivers,
                        xlab = xlab,
                        ylab = ylab,
                        plotClones = plotClones,
                        plotDrivers = plotDrivers,
                        addtot = addtot,
                        addtotlwd = addtotlwd,
                        ylim = ylim,
                        xlim = xlim,
                        thinData = thinData,
                        thinData.keep = thinData.keep,
                        thinData.min = thinData.min,
                        plotDiversity = plotDiversity,
                        order.method = order.method,
                        stream.center = stream.center,
                        stream.frac.rand = stream.frac.rand,
                        stream.spar = stream.spar,
                        border = border,
                        lwdStackedStream = lwdStackedStream,
                        srange = srange,
                        vrange = vrange,
                        breakSortColors = breakSortColors,
                        legend.ncols = legend.ncols,
                        ...))
2f97de14
 }
 
 
debcf5f5
 ## plot.oncosimul <- function(x, col = c(8, "orange", 6:1),
 ##                            log = "y",
 ##                            ltyClone = 2:6,
 ##                            lwdClone = 0.9,
 ##                            ltyDrivers = 1,
 ##                            lwdDrivers = 3,
 ##                            xlab = "Time units",
 ##                            ylab = "Number of cells",
 ##                            plotClones = TRUE,
 ##                            plotDrivers = TRUE,
 ##                            addtot = FALSE,
 ##                            addtotlwd = 0.5,
 ##                            yl = NULL,
 ##                            thinData = FALSE,
 ##                            thinData.keep = 0.1,
 ##                            thinData.min = 2,
 ##                            plotDiversity = FALSE,
 ##                            ...
 ##                            ) {
 
 ##     if(thinData)
 ##         x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
 
 ##     ## uvx
 ##     if(!inherits(x, "oncosimul2"))
 ##         ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
 ##     else {
 ##         ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
 ##     }
     
 ##     if(is.null(yl)) {
 ##         if(log %in% c("y", "xy", "yx") )
 ##             yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
 ##         else
 ##             yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
 ##     }
 ##     if(plotDiversity) {
 ##         par(fig = c(0, 1, 0.8, 1))
 ##         m1 <- par()$mar
 ##         m <- m1
 ##         m[c(1, 3)] <- c(0, 0.7)
 ##         op <- par(mar = m )
 ##         plotShannon(x)
 ##         par(op)
 ##         m1[c(3)] <- 0.2
 ##         op <- par(mar = m1)
 ##         par(fig = c(0, 1, 0, 0.8), new = TRUE)  
 ##     }
 ##     if(plotClones) {
 ##         plotClones(x,
 ##                    ndr = ndr, 
 ##                    xlab = xlab,
 ##                    ylab = ylab,
 ##                    lty = ltyClone,
 ##                    col = col, 
 ##                    ylim = yl,
 ##                    lwd = lwdClone,
 ##                    axes = FALSE,
 ##                    log = log,
 ##                    ...)
 ##     }
 
 ##     if(plotClones && plotDrivers)
 ##         par(new = TRUE)
     
 ##     if(plotDrivers){
 ##         plotDrivers0(x,
 ##                      ndr,
 ##                      timescale = 1,
 ##                      trim.no.drivers = FALSE,
 ##                      xlab = "", ylab = "",
 ##                      lwd = lwdDrivers,
 ##                      lty = ltyDrivers,
 ##                      col = col, 
 ##                      addtot = addtot,
 ##                      addtotlwd = addtotlwd,
 ##                      log = log, ylim = yl,
 ##                      ...)
 ##     }
     
 ## }
 
 
 plot.oncosimul <- function(x,
                            show = "drivers", 
                            type = ifelse(show == "genotypes",
                                          "stacked", "line"),
                            col = "auto",
                            log = ifelse(type == "line", "y", ""),
2f97de14
                            ltyClone = 2:6,
e693f153
                            lwdClone = 0.9,
2f97de14
                            ltyDrivers = 1,
                            lwdDrivers = 3,
                            xlab = "Time units",
                            ylab = "Number of cells",
                            plotClones = TRUE,
                            plotDrivers = TRUE,
                            addtot = FALSE,
                            addtotlwd = 0.5,
debcf5f5
                            ylim = NULL,
                            xlim = NULL,
2f97de14
                            thinData = FALSE,
                            thinData.keep = 0.1,
                            thinData.min = 2,
5d9775a3
                            plotDiversity = FALSE,
debcf5f5
                            order.method = "as.is",
                            stream.center = TRUE,
                            stream.frac.rand = 0.01,
                            stream.spar = 0.2,
                            border = NULL,
                            lwdStackedStream = 1,
                            srange = c(0.4, 1),
                            vrange = c(0.8, 1),
                            breakSortColors = "oe",
                            legend.ncols = "auto",
2f97de14
                            ...
                            ) {
 
debcf5f5
 
     if(!(type %in% c("stacked", "stream", "line")))
         stop("Type of plot unknown: it must be one of",
              "stacked, stream or line")
 
     if(!(show %in% c("genotypes", "drivers")))
         stop("show must be one of ",
              "genotypes or drivers")
 
     if(!(breakSortColors %in% c("oe", "distave", "random")))
         stop("breakSortColors must be one of ",
              "oe, distave, or random")
 
     
 
     colauto <- FALSE
     if(col == "auto" && (type == "line") && (show == "drivers"))
         col <- c(8, "orange", 6:1)
     if(col == "auto" && (show == "genotypes")) {
         ## For categorical data, I find Dark2, Paired, or Set1 to work best.
         col <- colorRampPalette(brewer.pal(8, "Dark2"))(ncol(x$pops.by.time) - 1)
         colauto <- TRUE
     }
     
     if(show == "genotypes") {
         plotDrivers <- FALSE
         plotClones <- TRUE
     }
     
2f97de14
     if(thinData)
         x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
 
debcf5f5
     if(!is.null(xlim))
         x <- xlim.pop.data(x, xlim)
     
     ## For genotypes, ndr is now the genotypes.  Actually, ndr is now just
     ## a sequence 1:(ncol(y) - 1)
 
     ## The user will want to change the colors, like a colorRamp, etc. Or
     ## rainbow.
 
     ## genotypes and line, always call plotDrivers0
     if(show == "drivers") {
         if(!inherits(x, "oncosimul2"))
             ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
         else {
             ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
         }
     } else { ## show we are showing genotypes
         ndr <- 1:(ncol(x$pops.by.time) - 1)
e693f153
     }
     
debcf5f5
     if((type == "line") && is.null(ylim)) {
2f97de14
         if(log %in% c("y", "xy", "yx") )
debcf5f5
             ylim <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
2f97de14
         else
debcf5f5
             ylim <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
2f97de14
     }
5d9775a3
     if(plotDiversity) {
debcf5f5
         oppd <- par(fig = c(0, 1, 0.8, 1))
         m1 <- par()$mar
         m <- m1
5d9775a3
         m[c(1, 3)] <- c(0, 0.7)
         op <- par(mar = m )
         plotShannon(x)
         par(op)
debcf5f5
         m1[c(3)] <- 0.2
         op <- par(mar = m1)
5d9775a3
         par(fig = c(0, 1, 0, 0.8), new = TRUE)  
     }
debcf5f5
 
     ## Shows its history: plotClones makes plotDrivers0 unneeded with
     ## stacked and stream plots. But now so with line plot.
     ## When showing genotypes, plotDrivers0 with line only used for
     ## showing the legend.
2f97de14
     if(plotClones) {
debcf5f5
         plotClonesSt(x,
                      ndr = ndr,
                      show = show,
                      na.subs = TRUE,
                      log = log,
                      lwd = lwdClone,
                      lty = ifelse(show == "drivers", ltyClone, ltyDrivers),
                      col = col, 
                      order.method = order.method,
                      stream.center = stream.center,
                      stream.frac.rand = stream.frac.rand,
                      stream.spar = stream.spar,
                      border = border,
                      srange = srange,
                      vrange = vrange,
                      type = type,
                      breakSortColors = breakSortColors,
                      colauto = colauto,
                      legend.ncols = legend.ncols,
                      lwdStackedStream = lwdStackedStream,
                      xlab = xlab,
                      ylab = ylab,
                      ylim = ylim,
                      xlim = xlim,
                      ...)
2f97de14
     }
 
debcf5f5
     if(plotClones && plotDrivers && (type == "line"))
2f97de14
         par(new = TRUE)
     
debcf5f5
     if( plotDrivers && (type == "line") ) {
2f97de14
         plotDrivers0(x,
e693f153
                      ndr,
2f97de14
                      timescale = 1,
                      trim.no.drivers = FALSE,
                      xlab = "", ylab = "",
                      lwd = lwdDrivers,
                      lty = ltyDrivers,
                      col = col, 
                      addtot = addtot,
                      addtotlwd = addtotlwd,
debcf5f5
                      log = log, ylim = ylim,
                      xlim = xlim,
                      legend.ncols = legend.ncols,
2f97de14
                      ...)
     }
debcf5f5
     if(plotDiversity) {
         par(oppd)
     }
     
 }
 
 plotClonesSt <- function(z,
                          ndr,
                          show = "drivers",
                          na.subs = TRUE,
                          log = "y",
                          lwd = 1,
                          ## type = "l",
                          lty = 1:8, col = 1:9,
                          order.method = "as.is",
                          stream.center = TRUE,
                          stream.frac.rand = 0.01,
                          stream.spar = 0.2,
                          border = NULL,
                          srange = c(0.4, 1),
                          vrange = c(0.8, 1),
                          type = "stacked",
                          breakSortColors = "oe",
                          colauto = TRUE,
                          legend.ncols = "auto",
                          lwdStackedStream = 1,
                          xlab = "Time units",
                          ylab = "Number of cells",
                          ylim = NULL,
                          xlim = NULL,
                          ...) {
 
     ## if given ndr, we order columns based on ndr, so clones with more
     ## drivers are plotted last
 
     y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
 
     ## Code in stacked and stream plots relies on there being no NAs. Could
     ## change it, but it does not seem reasonable.
     ##  But my original plotting code runs faster and is simpler if 0 are
     ##  dealt as NAs (which also makes log transformations simpler).
     
     if(type %in% c("stacked", "stream") )
         na.subs <- FALSE
     
     if(na.subs){
         y[y == 0] <- NA
     }
     ## if(is.null(ndr))
     ##     stop("Should never have null ndr")
     ## if(!is.null(ndr)) {
         ## could be done above, to avoid creating
         ## more copies
     oo <- order(ndr)
     y <- y[, oo, drop = FALSE]
     ndr <- ndr[oo]
     if(show == "drivers") {
         col <- rep(col, length.out = (1 + max(ndr)))[ndr + 1]
         lty <- rep(lty, length.out = ncol(y))
     } else {
         if(length(col) < max(ndr))
             warning("Repeating colors; you might want to",
                     "pass a col vector of more elements")
         col <- rep(col, length.out = (max(ndr)))[ndr]
     }
     ## }
     if(type == "line") {
         matplot(x = z$pops.by.time[, 1],
                 y = y,
                 log = log, type = "l",
                 col = col, lty = lty,
                 lwd = lwd,
                 xlab = xlab,
                 ylab = ylab,
                 ylim = ylim,
                 xlim = xlim,
                 ...)
         box()
         if(show == "genotypes") {
             if(!inherits(z, "oncosimul2")) {
                 ldrv <- genotypeLabel(z)
             } else {
                 ldrv <- z$GenotypesLabels
             }
             ldrv[ldrv == ""] <- "WT"
             ldrv[ldrv == " _ "] <- "WT"
             if(legend.ncols == "auto") {
                 if(length(ldrv) > 6) legend.ncols <- 2
                 else legend.ncols <- 1
             }
             legend(x = "topleft",
                    title = "Genotypes",
                    lty = lty,
                    col = col, lwd = lwd,
                    legend = ldrv,
                    ncol = legend.ncols)
         }
     } else {
         ymax <- colSums(y)
         if((show == "drivers") || ((show == "genotypes") && (colauto))) {
             cll <- myhsvcols(ndr, ymax, srange = srange, vrange = vrange,
                              breakSortColors = breakSortColors)
         } else {
             cll <- list(colors = col)
         }
         x <- z$pops.by.time[, 1]
         if(grepl("y", log)) {
             stop("It makes little sense to do a stacked/stream",
                  "plot after taking the log of the y data.")
         }
         if(grepl("x", log)) {
             x <- log10(x + 1)
         }
         
         if (type == "stacked") {
             plot.stacked2(x = x,
                           y = y,
                           order.method = order.method,
                           border = border,
                           lwd = lwdStackedStream,
                           col = cll$colors,
                           log = log,
                           xlab = xlab,
                           ylab = ylab,
                           ylim = ylim,
                           xlim = xlim,
                           ...) 
         } else if (type == "stream") {
             plot.stream2(x = x,
                          y = y,
                          order.method = order.method,
                          border = border,
                          lwd = lwdStackedStream,
                          col = cll$colors,
                          frac.rand = stream.frac.rand,
                          spar = stream.spar,
                          center = stream.center,
                          log = log,
                          xlab = xlab,
                          ylab = ylab,
                          ylim = ylim,
                          xlim = xlim,
                          ...)
         }
         if(show == "drivers") {
             if(legend.ncols == "auto") {
                 if(length(cll$colorsLegend$Drivers) > 6) legend.ncols <- 2
                 else legend.ncols <- 1
             }
             legend(x = "topleft",
                    title = "Number of drivers",
                    pch = 15,
                    ## lty = 1,
                    ## lwd = 2,
                    col = cll$colorsLegend$Color,
                    legend = cll$colorsLegend$Drivers,
                    ncol = legend.ncols)
         } else if (show == "genotypes") {
             if(!inherits(z, "oncosimul2")) {
                 ldrv <- genotypeLabel(z)
             } else {
                 ldrv <- z$GenotypesLabels
             }
             ldrv[ldrv == ""] <- "WT"
             ldrv[ldrv == " _ "] <- "WT"            
             if(legend.ncols == "auto") {
                 if(length(ldrv) > 6) legend.ncols <- 2
                 else legend.ncols <- 1
             }
             legend(x = "topleft",
                    title = "Genotypes",
                    pch = 15,
                    col = cll$colors,
                    legend = ldrv,
                    ncol = legend.ncols)
         }
     }
 }
 
 
 
 
 
 relabelLogaxis <- function(side = 2) {
     ## we do a plot but pass the already transformed data; relabel
     ## afterwards.
     po <- axis( side = side, labels = FALSE, tick = FALSE, lwd = 0)
     axis(side = side, labels = 10^po, at = po, tick = TRUE)
 }
 
 
 
 
 myhsvcols <- function(ndr, ymax, srange = c(0.4, 1),
                       vrange = c(0.8, 1),
                       breakSortColors = "oe") {
     ## Generate a set of colors so that:
     ##  - easy to tell when we increase number of drivers
     ##  - reasonably easy to tell in a legend
     ##  - different clones with same number of drivers have "similar" colors
 
     ## I use hsv color specification as this seems the most reasonable.
     
     minor <- table(ndr)
     major <- length(unique(ndr)) ## yeah same as length(minor), but least
                                  ## surprise
     
     h <- seq(from = 0, to = 1, length.out = major + 1)[-1]
     ## do not keep similar hues next to each other
     if(breakSortColors == "oe") {
         oe <- seq_along(h) %% 2
         h <- h[order(oe, h)]
     } else if(breakSortColors == "distave"){
         sl <- seq_along(h)
         h <- h[order(-abs(mean(sl) - sl))]
     } else if(breakSortColors == "random") {
         rr <- order(runif(length(h)))
         h <- h[rr]
     } 
     
     hh <- rep(h, minor)
     
     sr <- unlist(lapply(minor, function(x) 
         seq(from = srange[1], to = srange[2], length.out = x)))
     sv <- unlist(lapply(minor, function(x) 
         seq(from = vrange[1], to = vrange[2], length.out = x))
         )
 
     colors <- hsv(hh, sr, sv)
 
     ## This gives "average" or "median" color for legend
     ## colorsLegend <- aggregate(list(Color = colors), list(Drivers = ndr),
     ##                           function(x)
     ##                               as.character(x[((length(x) %/% 2) + 1 )]))
 
     ## Give the most abundant class color as the legend. Simpler to read
     colorsLegend <- by(data.frame(Color = colors, maxnum = ymax),
                        list(Drivers = ndr),
                        function(x) as.character(x$Color[which.max(x$maxnum)]))
     colorsLegend <- data.frame(Drivers = as.integer(row.names(colorsLegend)),
                                Color = cbind(colorsLegend)[, 1],
                                stringsAsFactors = FALSE)
     ## To show what it would look like
     ## plot(1:(sum(minor)), col = colors, pch = 16, cex = 3)
     ## legend(1, length(ndr), col = colorsLegend$Color, legend = names(minor),
     ##        pch = 16)
 
     return(list(colors = colors,
                 colorsLegend = colorsLegend))
 }
 
 genotypeLabel <- function(x) {
     ## For oncosimul objects, that do not have the GenotypesLabels object
     apply(x$Genotypes, 2,
           function(x) paste(which(x == 1), sep = "", collapse = ", "))
 }
 
 plotDrivers0 <- function(x,
                          ndr,
                          timescale = 1,
                          trim.no.drivers = FALSE,
                          addtot = TRUE,
                          addtotlwd = 2,
                          na.subs = TRUE, log = "y", type = "l",
                          lty = 1:9, col = c(8, "orange", 6:1),
                          lwd = 2,
                          legend.ncols = "auto",
                          ...) {
     ## z <- create.drivers.by.time(x, numDrivers)
     z <- create.drivers.by.time(x, ndr)
     ## We can now never enter here because trim.no.drivers is always FALSE
     ## in call.
     ## if(trim.no.drivers && x$MaxDriversLast) {
     ##     fi <- which(apply(z[, -c(1, 2), drop = FALSE], 1,
     ##                       function(x) sum(x) > 0))[1]
     ##     z <- z[fi:nrow(z), , drop = FALSE]
     ## }
     y <- z[, 2:ncol(z), drop = FALSE]
     if(na.subs){
         y[y == 0] <- NA
     }
 
     ## Likewise, we can never enter here now as timescale fixed at 1. And
     ## this is silly.
     ## if(timescale != 1) {
     ##     time <- timescale * z[, 1]
     ## } else {
     ##     time <- z[, 1]
     ## }
     time <- timescale * z[, 1]
     if(nrow(y) <= 2) type <- "b"
     ## Allow for the weird case of possibly missing drivers
     col2 <- rep(col, length.out = (1 + max(ndr)))
     col2 <- col2[sort(unique(ndr)) + 1]
     matplot(x = time,
             y = y,
             type = type, log = log, lty = lty, col = col2, lwd = lwd,
             ...)
     if(addtot) {
         tot <- rowSums(y, na.rm = TRUE)
         lines(time, tot, col = "black", lty = 1, lwd = addtotlwd)
     }
5d9775a3
     
debcf5f5
     ## This will work even if under the weird case of a driver missing
     ldrv <- unlist(lapply(strsplit(colnames(y), "dr_", fixed = TRUE),
                           function(x) x[2]))
     if(legend.ncols == "auto") {
         if(length(ldrv) > 6) legend.ncols <- 2
         else legend.ncols <- 1
     }
     legend(x = "topleft",
            title = "Number of drivers",
            lty = lty, col = col2, lwd = lwd,
            legend = ldrv,
            ncol = legend.ncols)
     ## legend = (1:ncol(y)) - 1)
2f97de14
 }
 
 
e693f153
 
debcf5f5
 
 
2f97de14
 plotPoset <- function(x, names = NULL, addroot = FALSE,
                       box = FALSE, ...) {
     if(is.null(names)) {
         if(addroot) names <- c("Root", 1:max(x))
         else names <- 1:max(x)
     }
e693f153
     plot(posetToGraph(x, names =  names, addroot = addroot,
                       type = "graphNEL",
                       strictAdjMat = FALSE), ...)
2f97de14
     if(box)
         box()
 }
 
e6e95f54
 ## this function seems to never be used
 ## plotAdjMat <- function(adjmat) {
 ##     plot(as(adjmat, "graphNEL"))
 ## }
e693f153
 
 
 
5d9775a3
 which_N_at_T <- function(x, N = 1, t = "last") {
     if((length(t) == 1) && (t == "last"))
         T <- nrow(x$pops.by.time)
     else if(length(t) == 2) {
         if(t[1] < 0)
             warning("smallest t must be >= 0; setting to 0")
         if(t[1] > t[2])
             stop("t[1] must be <= t[2]")
         if(t[2] > max(x$pops.by.time[, 1]))
             message("t[2] > largest time; setting it to the max")
         T <- which(
             (x$pops.by.time[, 1] >= t[1]) &
                 (x$pops.by.time[, 1] <= t[2]))
     }
     else
         stop("t must be either 'last' or a vector of length 2")
     z <- which(x$pops.by.time[T, -1, drop = FALSE] >= N, arr.ind = TRUE)[, 2]
     z <- unique(z) ## recall we removed first column but we index from first.
     return(z)
 }
 
 phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
     if(!inherits(x, "oncosimul2"))
         stop("Phylogenetic information is only stored with v >= 2")
     z <- which_N_at_T(x, N, t)
     tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
c95df82d
     ## sample size info at each period
 
     if( (length(tG) == 1) && (tG == "")) {
         warning("There never was a descendant of WT")
     }
     
5d9775a3
     df <- x$other$PhylogDF
c95df82d
     if(nrow(df) == 0) {
fc38a875
         warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared. ",
                 "This also happens if you did not set 'keepPhylog = TRUE'.")
c95df82d
         return(NA)
     }
5d9775a3
     if(!keepEvents) { ## is this just a graphical thing? or not?
         df <- df[!duplicated(df[, c(1, 2)]), ]
     }
     g <- igraph::graph.data.frame(df[, c(1, 2)])
     ## nodes <- match(tG, V(g)$name)
     nodesInP <- unique(unlist(igraph::neighborhood(g, order = 1e9,
                                                    nodes = tG,
                                                    mode = "in")))
     ## Remember that the phylog info can contain clones that are
     ## not in pops.by.time, as they go extinct between creation
     ## and sampling.
     allLabels <- unique(as.character(unlist(df[, c(1, 2)])))
     nodesRm <- setdiff(allLabels, V(g)$name[nodesInP])
     g <- igraph::delete.vertices(g, nodesRm)
     tmp <- list(graph = g, df = df)
     class(tmp) <- c(class(tmp), "phylogClone")
     return(tmp)
     ## trivial to return an adjacency matrix if needed. The keepEvents = FALSE
 }
 
 
 
 plotClonePhylog <- function(x, N = 1, t = "last",
                             timeEvents = FALSE,
                             keepEvents = FALSE,
                             fixOverlap = TRUE,
                             returnGraph = FALSE, ...) {
     if(!inherits(x, "oncosimul2"))
         stop("Phylogenetic information is only stored with v >=2")
     if(nrow(x$other$PhylogDF) == 0)
4d70b942
         stop("It seems you run the simulation with keepPhylog= FALSE. ",
              "This error can also appear if your simulation exited ",
              "very fast, before any clones beyond the initial were ",
              "generated.")
5d9775a3
     pc <- phylogClone(x, N, t, keepEvents)
c95df82d
     ## if(is.na(pc)) {
     ##     ## This should not be reachable, as caught before
     ##     ## where we check for nrow of PhylogDF   
     ##     warning("No clone phylogeny available. Exiting without plotting.")
     ##     return(NULL)
     ## }
         
5d9775a3
     l0 <- igraph::layout.reingold.tilford(pc$g)
     if(!timeEvents) {
         plot(pc$g, layout = l0)
     } else {
         l1 <- l0
         indexAppear <- match(V(pc$g)$name, as.character(pc$df[, 2]))
         firstAppear <- pc$df$time[indexAppear]
         firstAppear[1] <- 0
         l1[, 2] <- (max(firstAppear) - firstAppear)
         if(fixOverlap) {
             dx <- which(duplicated(l1[, 1]))
             if(length(dx)) {
                 ra <- range(l1[, 1])
                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
             }
         }
         plot(pc$g, layout = l1)         
     }
     if(returnGraph)
         return(pc$g)
 }
 
 
 
 
 
 
 
 
2f97de14
 ############# The rest are internal functions
 
50a94207
 closest_time <- function(x, size) {
     ## Find the first time when a given size is reached
     sizes <- rowSums(x$pops.by.time[, -1, drop = FALSE])
     candidates <- which(sizes >= size)
     if(length(candidates) == 0) {
         warning(paste("Pop size never >= requested size.",
                       "Thus, there is nothing to sample. You will get NAs"))
         return(-99)
     } else {
         return(candidates[1])
     }
 }
 
2f97de14
 
50a94207
 get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
     if( !is.null(popSizeSample) && (popSizeSample >= 0) )  {
         the.time <- closest_time(tmp, popSizeSample)
     } else if(timeSample == "last") {
e6e95f54
         if(tmp$TotalPopSize == 0) {
             warning(paste("Final population size is 0.",
                           "Thus, there is nothing to sample with",
                           "sampling last. You will get NAs"))
             the.time <- -99
         } else {
50a94207
             the.time <- nrow(tmp$pops.by.time)
         }
2f97de14
     } else if (timeSample %in% c("uniform", "unif")) {
e6e95f54
           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
           
           if (length(candidate.time) == 0) {
               warning(paste("There is not a single sampled time",
f9a38e24
                             "at which there are any mutants with drivers. ",
e6e95f54
                             "Thus, no uniform sampling possible.",
                             "You will get NAs"))
               the.time <- -99
               ## return(rep(NA, nrow(tmp$Genotypes)))
           } else if (length(candidate.time) == 1) {
                 message("Only one sampled time period with mutants.")
                 the.time <- candidate.time
             } else {
                   the.time <- sample(candidate.time, 1)
               }
50a94207
     } else {
e6e95f54
             stop("Unknown timeSample option")
         }
     return(the.time)
2f97de14
 }
 
 
e6e95f54
 get.mut.vector <- function(x, timeSample, typeSample,
50a94207
                            thresholdWhole, popSizeSample) {
c95df82d
     if(is.null(x$TotalPopSize)) {
         warning(paste("It looks like this simulation never completed.",
                       " Maybe it reached maximum allowed limits.",
                       " You will get NAs"))
         return(rep(NA, length(x$geneNames)))
     }
50a94207
     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
e6e95f54
     if(the.time < 0) { 
         return(rep(NA, nrow(x$Genotypes)))
     } 
     pop <- x$pops.by.time[the.time, -1]
2f97de14
     
e6e95f54
     if(all(pop == 0)) {
         stop("You found a bug: this should never happen")
2f97de14
     }
e6e95f54
     
     if(typeSample %in% c("wholeTumor", "whole")) {
         popSize <- x$PerSampleStats[the.time, 1]
         return( as.numeric((tcrossprod(pop,
0f3a3188
                                        x$Genotypes)/popSize) >= thresholdWhole) )
e6e95f54
     } else if (typeSample %in%  c("singleCell", "single")) {
 
50a94207
         return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
     } else {
         stop("Unknown typeSample option")
     }
2f97de14
 }
 
 
e6e95f54
 
 
 
 
 
 
 
 
2f97de14
 
 
e693f153
 oncoSimul.internal <- function(poset, ## restrict.table,
                                numPassengers, 
                                ## numGenes,
2f97de14
                                typeCBN,
                                birth, 
                                s,
                                death,
                                mu,
                                initSize,
                                sampleEvery,
                                detectionSize,
                                finalTime,
e693f153
                                initSize_species,
                                initSize_iter,
                                seed,
                                verbosity,
                                speciesFS,
                                ratioForce,
                                typeFitness,
                                max.memory,
debcf5f5
                                mutationPropGrowth, ## make it explicit
e693f153
                                initMutant,
                                max.wall.time,
                                keepEvery,
                                alpha,
                                sh,                               
                                K,
                                ## endTimeEvery,
                                detectionDrivers,
                                onlyCancer,
                                errorHitWallTime,
                                max.num.tries,
                                errorHitMaxTries,
                                minDetectDrvCloneSz,
                                extraTime) {
 
     ## the value of 20000, in megabytes, for max.memory sets a limit of ~ 20 GB
   
 
     ## if(keepEvery < sampleEvery)
     ##     warning("setting keepEvery to sampleEvery")
2f97de14
 
e693f153
     ## a backdoor to allow passing restrictionTables directly
     if(inherits(poset, "restrictionTable"))
         restrict.table <- poset
     else
         restrict.table <- poset.to.restrictTable(poset)
     numDrivers <- nrow(restrict.table)
     numGenes <- (numDrivers + numPassengers)
50a94207
     if(numGenes < 2)
         stop("There must be at least two genes (loci) in the fitness effects.",
              "If you only care about a degenerate case with just one,",
              "you can enter a second gene",
              "with fitness effect of zero.")
e693f153
     if(numGenes > 64)
50a94207
         stop("Largest possible number of genes (loci) is 64 for version 1.",
              "You are strongly encouraged to use the new specification",
              "as in version 2.")
e693f153
 
e6e95f54
     ## These can never be set by the user
     ## if(initSize_species < 10) {
     ##     warning("initSize_species too small?")
     ## }
     ## if(initSize_iter < 100) {
     ##     warning("initSize_iter too small?")
     ## }
2f97de14
 
e693f153
     ## numDrivers <- nrow(restrict.table)
2f97de14
     if(length(unique(restrict.table[, 1])) != numDrivers)
         stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)")
     ddr <- restrict.table[, 1]
     if(any(diff(ddr) != 1))
         stop("BAIL OUT NOW:  any(diff(ddr) != 1")
     ## sanity checks
     if(max(restrict.table[, 1]) != numDrivers)
         stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers")
     if(numDrivers > numGenes)
         stop("BAIL OUT NOW: numDrivers > numGenes")
     
     non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1]
 
 
 
 
502e447e
     ## if( (is.null(endTimeEvery) || (endTimeEvery > 0)) &&
     ##    (typeFitness %in% c("bozic1", "exp") )) {
     ##     warning(paste("endTimeEvery will take a positive value. ",
     ##                   "This will make simulations not stop until the next ",
     ##                   "endTimeEvery has been reached. Thus, in simulations ",
     ##                   "with very fast growth, simulations can take a long ",
     ##                   "time to finish, or can hit the wall time limit. "))
     ## }
     ## if(is.null(endTimeEvery))
     ##     endTimeEvery <- keepEvery
     ## if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) )
     ##     warning("!(endTimeEvery %% keepEvery)")
2f97de14
     ## a sanity check in restricTable, so no neg. indices for the positive deps
     neg.deps <- function(x) {
         ## checks a row of restrict.table
         numdeps <- x[2]
         if(numdeps) {
             deps <- x[3:(3+numdeps - 1)]
             return(any(deps < 0))
         } else FALSE
     }
     if(any(apply(restrict.table, 1, neg.deps)))
         stop("BAIL OUT NOW: Negative dependencies in restriction table")
 
     ## transpose the table
     rtC <- convertRestrictTable(restrict.table)
 
4d70b942
     
e693f153
     return(c(
1b1e5e0c
         BNB_Algo5(restrictTable = rtC,
         numDrivers = numDrivers,
         numGenes = numGenes,
50a94207
         typeCBN_= typeCBN,
1b1e5e0c
         s = s, 
         death = death,
         mu = mu,
         initSize = initSize,
         sampleEvery = sampleEvery,
         detectionSize = detectionSize,
         finalTime = finalTime,
         initSp = initSize_species,
         initIt = initSize_iter,
         seed = seed,
         verbosity = verbosity,
         speciesFS = speciesFS,
         ratioForce = ratioForce,
50a94207
         typeFitness_ = typeFitness,
1b1e5e0c
         maxram = max.memory,
         mutationPropGrowth = as.integer(mutationPropGrowth),
         initMutant = initMutant,
         maxWallTime = max.wall.time,
         keepEvery = keepEvery,
         sh = sh,
         K = K,
         detectionDrivers = detectionDrivers,
         onlyCancer = onlyCancer,
         errorHitWallTime = errorHitWallTime,
         maxNumTries = max.num.tries,
         errorHitMaxTries = errorHitMaxTries,
         minDetectDrvCloneSz = minDetectDrvCloneSz,
         extraTime = extraTime
e693f153
     ),
50a94207
     NumDrivers = numDrivers
2f97de14
              ))
e693f153
 
2f97de14
 }
 
debcf5f5
 OncoSimulWide2Long <- function(x) {
     ## Put data in long format, for ggplot et al
     
     if(!inherits(x, "oncosimul2")) {
         ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
         genotLabels <- genotypeLabel(x)
     } else {
         ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
         genotLabels <- x$GenotypesLabels
     }
     genotLabels[genotLabels == ""] <- "WT"
     genotLabels[genotLabels == " _ "] <- "WT"
     y <- x$pops.by.time[, 2:ncol(x$pops.by.time), drop = FALSE]
     y[y == 0] <- NA
     
     oo <- order(ndr)
     y <- y[, oo, drop = FALSE]
     ndr <- ndr[oo]
 
     nc <- ncol(y)
     nr <- nrow(y)
     y <- as.vector(y)
     return(data.frame(Time = rep(x$pops.by.time[, 1], nc),
                       Y = y,
                       Drivers = factor(rep(ndr, rep(nr, nc))),
                       Genotype = rep(genotLabels, rep(nr, nc))))
 }
 
 
 
e6e95f54
 ## We are not using this anymore
 ## create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
 ##     if(tmp$NumClones > 1) {
 ##         NumMutations <- apply(tmp$Genotypes, 2, sum)
 ##         muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
 ##                               t(apply(tmp$pops.by.time[, -c(1),
 ##                                                        drop = FALSE], 1,
 ##                                       function(x) tapply(x,
 ##                                                          NumMutations, sum))))
 ##         colnames(muts.by.time)[c(1)] <- "Time"
 ##     } else {
 ##         muts.by.time <- tmp$pops.by.time
 ##     }
 ##     return(muts.by.time)
 ## } 
2f97de14
 
 
e693f153
 create.drivers.by.time <- function(tmp, ndr) {
     ## CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum)
     CountNumDrivers <- ndr
2f97de14
     if(tmp$NumClones >= 1) {
         if(tmp$NumClones == 1) {
             if(ncol(tmp$pops.by.time) != 2)
                 stop("This is impossible!")
             drivers.by.time <- tmp$pops.by.time
         } else {
             if(length(unique(CountNumDrivers )) > 1) {
                 drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
                                                           drop = FALSE] ,
                                          t(apply(tmp$pops.by.time[, -c(1),
                                                                   drop = FALSE],
                                                  1,
                                                  function(x)
                                                  tapply(x,
                                                         CountNumDrivers,
                                                         sum)))) 
             } else {
                 drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
                                                           drop = FALSE] ,
                                          rowSums(tmp$pops.by.time[, -c(1),
                                                                   drop =FALSE]))
             }
         }
         colnames(drivers.by.time) <- c("Time",
                                        paste("dr_",
                                              colnames(drivers.by.time)[-c(1)],
                                              sep = ""))
     } else {
         drivers.by.time <- NULL
     }
     return(drivers.by.time)
 } 
 
 
 
 ## For plotting, this helps decrease huge file sizes, while still showing
 ## the start of each clone, if it was originally recorded.
 
 thin.pop.data <- function(x, keep = 0.1, min.keep = 3) {
     norig <- nrow(x$pops.by.time)
     keep1 <- round(seq.int(from = 1, to = norig,
                            length.out = round(norig * keep)))
     keep2 <- apply(x$pops.by.time[, -1, drop = FALSE],
                    1, function(x) any((x[x > 0] < min.keep)))
     keep <- sort(union(keep1, keep2))
     x$pops.by.time <- x$pops.by.time[keep, , drop = FALSE]
     return(x)
 }
 
debcf5f5
 xlim.pop.data <- function(x, xlim) {
     x$pops.by.time <- x$pops.by.time[
     (x$pops.by.time[, 1] >= xlim[1]) &
     (x$pops.by.time[, 1] <= xlim[2]),   ]
     return(x)
 }
 
 
5d9775a3
 shannonI <- function(x) {
     sx <- sum(x)
     p <- x/sx
     p <- p[p > 0]
     return(-sum(p * log(p)))
 }
 
 plotShannon <- function(z) {
     h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
                1, shannonI)
     plot(x = z$pops.by.time[, 1],
          y = h, type = "l", xlab = "", ylab = "H", axes = FALSE)
     box()
     axis(2)
 }
 
c95df82d
 
 
 is_null_na <- function(x) {
     ## For arguments, if user passes "undefined" or "not set"
     ## See also http://stackoverflow.com/a/19655909
     if(is.function(x)) return(FALSE)
     if( is.null(x) ||
         ( (length(x) == 1) && (is.na(x)) ) ||
         ( (length(x) == 1) && (x == "") ) ## careful here
        )  {
         return(TRUE)
     } else {
         return(FALSE)
     }
 }
 
 
 ## Not used anymore, but left here in case they become useful.
 ## Expected numbers at equilibrium under McFarland's
 ## eFinalMf <- function(initSize, s, j) {
 ##     ## Expected final sizes for McF, when K is set to the default.
 ##     # j is number of drivers
 ##     ## as it says, with no passengers
 ##     ## Set B(d) = D(N)
 ##     K <- initSize/(exp(1) - 1)
 ##     return(K * (exp( (1 + s)^j) - 1))
 ## }
 
 ## mcflE <- function(p, s, initSize) {
 ##     K <- initSize/(exp(1) - 1)
 ##     ## Expected number at equilibrium
 ##     return( K * (exp((1 + s)^p) - 1))
 ## }
 
 ## mcflEv <- function(p, s, initSize) {
 ##     ## expects vectors for p and s
 ##     K <- initSize/(exp(1) - 1)
     
 ##     ## Expected number at equilibrium
 ##     return( K * (exp(prod((1 + s)^p)) - 1))
 ## }
 
 
 
 
 
 
 
 
 
 
 
 
5d9775a3
 ## simpsonI <- function(x) {
 ##     sx <- sum(x)
 ##     p <- x/sx
 ##     p <- p[p > 0]
 ##     return(sum(p^2)))
 ## }
 
 ## plotSimpson <- function(z) {
     
 ##     h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
 ##                1, shannonI)
 ##     plot(x = z$pops.by.time[, 1],
 ##          y = h, lty = "l", xlab = "", ylab = "H")
 ## }
2f97de14
 
 
debcf5f5
 ## plotClones <- function(z, ndr = NULL, na.subs = TRUE,
 ##                        log = "y", type = "l",
 ##                        lty = 1:8, col = 1:9, ...) {
2f97de14
 
debcf5f5
 ##     ## if given ndr, we order columns based on ndr, so clones with more
 ##     ## drivers are plotted last
2f97de14
 
debcf5f5
 ##     y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
2f97de14
     
debcf5f5
 ##     if(na.subs){
 ##         y[y == 0] <- NA
 ##     }
 ##     if(!is.null(ndr)) {
 ##         ## could be done above, to avoid creating
 ##         ## more copies
 ##         oo <- order(ndr)
 ##         y <- y[, oo, drop = FALSE]
 ##         ndr <- ndr[oo]
 ##         col <- col[ndr + 1]
 ##     }
 ##     matplot(x = z$pops.by.time[, 1],
 ##             y = y,
 ##             log = log, type = type,
 ##             col = col, lty = lty,
 ##             ...)
 ##     box()
 ## }
e6e95f54
 
2f97de14
 
 
 
 
e6e95f54
 ## No longer used
 ## rtNoDep <- function(numdrivers) {
 ##     ## create a restriction table with no dependencies
 ##     x <- matrix(nrow = numdrivers, ncol = 3)
 ##     x[, 1] <- 1:numdrivers
 ##     x[, 2] <- 0
 ##     x[, 3] <- -9
 ##     return(x)
 ## }
2f97de14
 
e6e95f54
 
 ## Simulate from generative model. This is a quick function, and is most
 ## likely wrong! Never used for anything.
 
 ## simposet <- function(poset, p) {
 ##     ## if (length(parent.nodes) != length (child.nodes)){
 ##     ##     print("An Error Occurred")
 ##     ## }
 ##     ##    else {
 ##     num.genes <- max(poset) - 1 ## as root is not a gene
 ##     genotype <-t(c(1, rep(NA, num.genes)))
 ##     colnames(genotype) <- as.character(0:num.genes)
2f97de14
     
     
e6e95f54
 ##     poset$runif <- runif(nrow(poset))
 ##     ## this.relation.prob.OK could be done outside, but having it inside
 ##     ## the loop would allow to use different thresholds for different
 ##     ## relationships
 ##     for (i in (1:nrow(poset))) {
 ##         child <- poset[i, 2]
 ##         this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
 ##         the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. 
 ##         if (is.na(genotype[child])){
 ##             genotype[child] <- this.relation.prob.OK * the.parent  
 ##         }
 ##         else
 ##             genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
 ##     }
 ##     ##    }
502e447e
     
e6e95f54
 ##     return(genotype)
 ## }
2f97de14
 
 
e693f153
 ## to plot and adjacency matrix in this context can do
 ## plotPoset(intAdjMatToPoset(adjMat))
 ## where intAdjMatToPoset is from best oncotree code: generate-random-trees.
 ## No! the above is simpler
e6e95f54
 
 
 
 
 ## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
 ##     ## Obtain, from  results from a simulation run, the vector
 ##     ## of 0/1 corresponding to each gene.
     
 ##     ## threshold is the min. proportion for a mutation to be detected
 ##     ## We are doing whole tumor sampling here, as in Sprouffske
 
 ##     ## timeSample: do we sample at end, or at a time point, chosen
 ##     ## randomly, from all those with at least one driver?
     
 ##     if(timeSample == "last") {
 ##         if(tmp$TotalPopSize == 0)
 ##             warning(paste("Final population size is 0.",
 ##                           "Thus, there is nothing to sample with ",
 ##                           "sampling last. You will get NAs"))
 ##         return(as.numeric(
 ##             (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
 ##                         tmp$Genotypes)/tmp$TotalPopSize) > threshold))
 ##     } else if (timeSample %in% c("uniform", "unif")) {
 ##           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
           
 ##           if (length(candidate.time) == 0) {
 ##               warning(paste("There is not a single sampled time",
 ##                             "at which there are any mutants.",
 ##                             "Thus, no uniform sampling possible.",
 ##                             "You will get NAs"))
 ##               return(rep(NA, nrow(tmp$Genotypes)))
 ##           } else if (length(candidate.time) == 1) {
 ##                 the.time <- candidate.time
 ##             } else {
 ##                   the.time <- sample(candidate.time, 1)
 ##               }
 ##           pop <- tmp$pops.by.time[the.time, -1]
 ##           popSize <- tmp$PerSampleStats[the.time, 1]
 ##           ## if(popSize == 0)
 ##           ##     warning(paste("Population size at this time is 0.",
 ##           ##                   "Thus, there is nothing to sample at this time point.",
 ##           ##                   "You will get NAs"))
 ##           return( as.numeric((tcrossprod(pop,
 ##                                        tmp$Genotypes)/popSize) > threshold) )
 ##       }
 ## }
 
 
 
 ##           the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
 ##           if(length(the.time) == 0) {
 ##               warning(paste("There are no clones with drivers at any time point.",
 ##                             "No uniform sampling possible.",
 ##                             "You will get a vector of NAs."))
 ##             return(rep(NA, nrow(tmp$Genotypes)))  
 ##           }
 ## get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
 ##     ## No threshold, as single cell.
 
 ##     ## timeSample: do we sample at end, or at a time point, chosen
 ##     ## randomly, from all those with at least one driver?
     
 ##     if(timeSample == "last") {
 ##         the.time <- nrow(tmp$pops.by.time)
 ##     } else if (timeSample %in% c("uniform", "unif")) {
 ##          candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
          
 ##          if (length(candidate.time) == 0) {
 ##              warning(paste("There is not a single sampled time",
 ##                            "at which there are any mutants.",
 ##                            "Thus, no uniform sampling possible.",
 ##                            "You will get NAs"))
 ##              return(rep(NA, nrow(tmp$Genotypes)))
 ##          } else if (length(candidate.time) == 1) {
 ##                the.time <- candidate.time
 ##            } else {
 ##                  the.time <- sample(candidate.time, 1)
 ##              }
 
 ##      }
 ##     pop <- tmp$pops.by.time[the.time, -1]
 ##     ##       popSize <- tmp$PerSampleStats[the.time, 1]
 ##     ## genot <- sample(seq_along(pop), 1, prob = pop)
 ##     if(all(pop == 0)) {
 ##         warning(paste("All clones have a population size of 0",
 ##                       "at the chosen time. Nothing to sample.",
 ##                       "You will get NAs"))
 ##         return(rep(NA, nrow(tmp$Genotypes)))
 ##     } else {
 ##           return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
 ##       }
 ## }
 
 
 ## get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
 ##                            thresholdWhole = 0.5) {
 ##     if(typeSample %in% c("wholeTumor", "whole")) {
 ##         get.mut.vector.whole(x, timeSample = timeSample,
 ##                              threshold = thresholdWhole)
 ##     } else if(typeSample %in%  c("singleCell", "single")) {
 ##         get.mut.vector.singlecell(x, timeSample = timeSample)
 ##     }
 ## }
 
 
 
 
 
 ## plotClonePhylog <- function(x, timeEvent = FALSE,
 ##                             showEvents = TRUE,
 ##                             fixOverlap = TRUE) {
 ##     if(!inherits(x, "oncosimul2"))
 ##         stop("Phylogenetic information is only stored with v >=2")
 ##     if(nrow(x$other$PhylogDF) == 0)
 ##         stop("It seems you run the simulation with keepPhylog= FALSE")
 ##     ## requireNamespace("igraph")
 ##     df <- x$other$PhylogDF
 ##     if(!showEvents) {
 ##         df <- df[!duplicated(df[, c(1, 2)]), ]
 ##     }
 ##     g <- igraph::graph.data.frame(df)
 ##     l0 <- igraph::layout.reingold.tilford(g)
 ##     if(!timeEvent) {
 ##         plot(g, layout = l0)
 ##     } else {
 ##         l1 <- l0
 ##         indexAppear <- match(V(g)$name, as.character(df[, 2]))
 ##         firstAppear <- df$time[indexAppear]
 ##         firstAppear[1] <- 0
 ##         l1[, 2] <- (max(firstAppear) - firstAppear)
 ##         if(fixOverlap) {
 ##             dx <- which(duplicated(l1[, 1]))
 ##             if(length(dx)) {
 ##                 ra <- range(l1[, 1])
 ##                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
 ##             }
 ##         }
 ##         plot(g, layout = l1)         
 ##     }
 ## }