Browse code

Adds OncoSimulR, missMethyl, ballgown and STAN to the repos.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@92963 bc3139a8-67e5-0310-9ffc-ced21a209358

Marc Carlson authored on 28/07/2014 22:43:51
Showing 20 changed files

1 1
new file mode 100644
... ...
@@ -0,0 +1,21 @@
1
+Package: OncoSimulR
2
+Type: Package
3
+Title: Simulation of cancer progresion with order restrictions
4
+Version: 0.99.2
5
+Date: 2014-07-14
6
+Author: Ramon Diaz-Uriarte.
7
+Maintainer: Ramon Diaz-Uriarte <rdiaz02@gmail.com>
8
+Description: Functions for simulating and plotting cancer progression
9
+        data, including drivers and passengers, and allowing for order
10
+        restrictions. Simulations use continuous-time models (based on
11
+        Bozic et al., 2010 and McFarland et al., 2013) and fitness
12
+        functions account for possible restrictions in the order of
13
+        accumulation of mutations.
14
+biocViews: BiologicalQuestion, SomaticMutation
15
+License: GPL (>= 3)
16
+Depends: R (>= 3.1.0)
17
+Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz
18
+Suggests: BiocStyle, knitr, Oncotree
19
+LinkingTo: Rcpp
20
+VignetteBuilder: knitr
21
+SystemRequirements: C++11
0 22
new file mode 100644
... ...
@@ -0,0 +1,21 @@
1
+useDynLib(OncoSimulR, .registration=TRUE)
2
+## useDynLib(OncoSimulR)
3
+
4
+export("oncoSimulPop", "oncoSimulIndiv", "samplePop", "plotPoset")
5
+
6
+S3method(plot, oncosimul)
7
+S3method(print, oncosimul)
8
+S3method(summary, oncosimul)
9
+S3method(plot, oncosimulpop)
10
+S3method(summary, oncosimulpop)
11
+S3method(print, oncosimulpop)
12
+
13
+
14
+importFrom("data.table", rbindlist, .rbind.data.table) 
15
+importFrom(Rcpp, evalCpp)
16
+import(graph)
17
+import(Rgraphviz)
18
+importFrom("parallel", mclapply, detectCores)
19
+
20
+
21
+
0 22
new file mode 100644
... ...
@@ -0,0 +1,861 @@
1
+## Copyright 2013, 2014 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+
17
+samplePop <- function(x, timeSample = "last", typeSample = "whole",
18
+                      thresholdWhole = 0.5) {
19
+    if(inherits(x, "oncosimulpop"))
20
+        z <- do.call(rbind,
21
+                     lapply(x,
22
+                            get.mut.vector,
23
+                            timeSample = timeSample,
24
+                            typeSample = typeSample,
25
+                            thresholdWhole = thresholdWhole))
26
+    else {
27
+        z <- get.mut.vector(x,
28
+                            timeSample = timeSample,
29
+                            typeSample = typeSample,
30
+                            thresholdWhole = thresholdWhole)
31
+        dim(z) <- c(1, length(z))
32
+    }
33
+    cat("\n Subjects by Genes matrix of ",
34
+        nrow(z), " subjects and ",
35
+        ncol(z), " genes:\n")
36
+    colnames(z) <- paste0("G.", seq_len(ncol(z)))
37
+    return(z)
38
+}
39
+
40
+
41
+oncoSimulPop <- function(Nindiv,
42
+                         poset,
43
+                         model = "Bozic",
44
+                         numPassengers = 30,
45
+                         mu = 1e-6,
46
+                         detectionSize = 1e8,
47
+                         detectionDrivers = 4,
48
+                         sampleEvery = 1,
49
+                         initSize = 500,
50
+                         s = 0.1,
51
+                         sh = -1,
52
+                         K = initSize/(exp(1) - 1),
53
+                         keepEvery = sampleEvery,
54
+                         finalTime = 0.25 * 25 * 365,
55
+                         onlyCancer = TRUE,
56
+                         max.memory = 2000,
57
+                         max.wall.time = 200,
58
+                         verbosity  = 0,
59
+                         mc.cores = detectCores()) {
60
+
61
+    if(.Platform$OS.type == "windows") {
62
+        if(mc.cores != 1)
63
+            message("You are running Windows. Setting mc.cores = 1")
64
+        mc.cores <- 1
65
+    }
66
+    pop <- mclapply(seq.int(Nindiv),
67
+                    function(x)
68
+                    oncoSimulIndiv(
69
+                        poset = poset,
70
+                        model = model,
71
+                        numPassengers = numPassengers,
72
+                        mu = mu,
73
+                        detectionSize = detectionSize,
74
+                        detectionDrivers = detectionDrivers,
75
+                        sampleEvery = sampleEvery,
76
+                        initSize = initSize,
77
+                        s = s,
78
+                        sh = sh,
79
+                        K = K,
80
+                        keepEvery = keepEvery,
81
+                        finalTime = finalTime,
82
+                        onlyCancer = onlyCancer,
83
+                        max.memory = max.memory,
84
+                        max.wall.time = max.wall.time,
85
+                        verbosity = verbosity),
86
+                    mc.cores = mc.cores
87
+                    )
88
+    class(pop) <- "oncosimulpop"
89
+    attributes(pop)$call <- match.call()
90
+    return(pop)
91
+}
92
+
93
+## where is the default K coming from? Here:
94
+## log( (K+N)/K  ) = 1; k + n = k * exp(1); k(exp - 1) = n; k = n/(exp - 1)
95
+
96
+oncoSimulIndiv <- function(poset,
97
+                           model = "Bozic",
98
+                           numPassengers = 30,
99
+                           mu = 1e-6,
100
+                           detectionSize = 1e8,
101
+                           detectionDrivers = 4,
102
+                           sampleEvery = 1,
103
+                           initSize = 500,
104
+                           s = 0.1,
105
+                           sh = -1,
106
+                           K = initSize/(exp(1) - 1),
107
+                           keepEvery = sampleEvery,
108
+                           finalTime = 0.25 * 25 * 365,
109
+                           onlyCancer = TRUE,
110
+                           max.memory = 2000,
111
+                           max.wall.time = 200,
112
+                           verbosity = 0
113
+                           ) {
114
+    call <- match.call()
115
+    rt <- poset.to.restrictTable(poset)
116
+
117
+
118
+    numDrivers <- nrow(rt)
119
+    numGenes <- (numDrivers + numPassengers)
120
+    
121
+    if(numGenes > 64)
122
+        stop("Largest possible number of genes is 64")
123
+
124
+    if(keepEvery < sampleEvery)
125
+        warning("setting keepEvery to sampleEvery")
126
+
127
+
128
+    ## legacies from poor name choices
129
+    typeFitness <- switch(model,
130
+                          "Bozic" = "bozic1",
131
+                          "Exp" = "exp",
132
+                          "McFarlandLog" = "mcfarlandlog",
133
+                          "McFL" = "mcfarlandlog",
134
+                          stop("No valid value for model")
135
+                          )
136
+
137
+    if(typeFitness == "exp") {
138
+        death <- 1
139
+        mutatorGenotype <- 1
140
+    } else {
141
+        death <- -99
142
+        mutatorGenotype <- 0
143
+    }
144
+    birth <- -99
145
+
146
+    if( (typeFitness == "mcfarlandlog") &&
147
+       (sampleEvery > 0.05)) {
148
+        warning("With the McFarland model you often want smaller sampleEvery")
149
+    }
150
+    
151
+    if(typeFitness == "mcfarlandlog") {
152
+        endTimeEvery <- keepEvery
153
+    } else {
154
+        endTimeEvery <- -9
155
+    }
156
+
157
+
158
+
159
+    ## A simulation stops if cancer or finalTime appear, the first
160
+    ## one. But if we set onlyCnacer = FALSE, we also accept simuls
161
+    ## without cancer (or without anything)
162
+    
163
+    doneSimuls <- FALSE
164
+    while(!doneSimuls) {
165
+        op <- try(oncoSimul.internal(restrict.table = rt,
166
+                                     numGenes = numGenes,
167
+                                     typeFitness = typeFitness,
168
+                                     typeCBN = "-99",
169
+                                     birth = birth,
170
+                                     s = s,
171
+                                     sh = sh,
172
+                                     death = death,  
173
+                                     mu =  mu,  
174
+                                     initSize =  initSize, 
175
+                                     sampleEvery =  sampleEvery,  
176
+                                     detectionSize =  detectionSize, 
177
+                                     mutatorGenotype = mutatorGenotype,  
178
+                                     finalTime = finalTime, 
179
+                                     initSize_species = 2000, 
180
+                                     initSize_iter = 500, 
181
+                                     seed_gsl = NULL, 
182
+                                     verbosity = verbosity, 
183
+                                     initMutant = -1, 
184
+                                     speciesFS = 40000,  
185
+                                     ratioForce = 2,  
186
+                                     max.memory = max.memory, 
187
+                                     max.wall.time = max.wall.time, 
188
+                                     keepEvery = keepEvery,  
189
+                                     alpha = 0.0015,  
190
+                                     K = K, 
191
+                                     endTimeEvery = endTimeEvery, 
192
+                                     finalDrivers = detectionDrivers),
193
+                  silent = !verbosity)
194
+
195
+        if(!inherits(op, "try-error")) {
196
+            if(verbosity >= 2) {
197
+                cat("\n ... finished this run:")
198
+                cat("\n       Total Pop Size = ", op$TotalPopSize)
199
+                cat("\n       Drivers Last = ", op$MaxDriversLast)
200
+                cat("\n       Final Time = ", op$FinalTime)
201
+            }
202
+            if(onlyCancer) {
203
+                doneSimuls <- reachCancer(op, ndr = detectionDrivers,
204
+                                          detectionSize = detectionSize,
205
+                                          maxPopSize = 1e15)
206
+            } else {
207
+                doneSimuls <- TRUE
208
+            }
209
+            if(verbosity >= 2) {
210
+                if(doneSimuls)
211
+                    cat("\n ... Keeping this one\n")
212
+                else
213
+                    cat("\n ... Cancer not reached\n")
214
+            }
215
+        } else {
216
+            if(length(grep("BAIL OUT NOW", op)))
217
+                stop("Unrecoverable error")
218
+            if(verbosity >= 2)
219
+                cat("\nSimulation aborted because of numerical/other issues.",
220
+                    "Proceeding to next one.\n")
221
+        }
222
+    }
223
+    class(op) <- "oncosimul"
224
+    attributes(op)$call <- call
225
+    return(op)
226
+}
227
+
228
+
229
+summary.oncosimul <- function(object, ...) {
230
+    tmp <- object[c("NumClones", "TotalPopSize", "LargestClone",
231
+                    "MaxNumDrivers", "MaxDriversLast",
232
+                    "NumDriversLargestPop", "TotalPresentDrivers",
233
+                    "FinalTime", "NumIter", "HittedWallTime")]
234
+    tmp$errorMF <- object$other$errorMF
235
+    if(tmp$errorMF == -99) tmp$errorMF <- NA
236
+    tmp$OccurringDrivers <- object$OccurringDrivers
237
+    return(as.data.frame(tmp))
238
+}
239
+
240
+print.oncosimul <- function(x, ...) {
241
+    cat("\nIndividual OncoSimul trajectory with call:\n ")
242
+    print(attributes(x)$call)
243
+    cat("\n")
244
+    print(summary(x))
245
+}
246
+
247
+## I want this to return things that are storable
248
+summary.oncosimulpop <- function(object, ...) {
249
+    as.data.frame(rbindlist(lapply(object, summary)))
250
+}
251
+
252
+print.oncosimulpop <- function(x, ...) {
253
+    cat("\nPopulation of OncoSimul trajectories of ",
254
+        length(x), " individuals. Call :\n")
255
+    print(attributes(x)$call)
256
+    cat("\n")
257
+    print(summary(x))
258
+}
259
+
260
+
261
+plot.oncosimulpop <- function(x, ask = TRUE,
262
+                              col = c(8, "orange", 6:1),
263
+                              log = "y",
264
+                              ltyClone = 2:6,
265
+                              lwdClone = 0.2,
266
+                              ltyDrivers = 1,
267
+                              lwdDrivers = 3,
268
+                              xlab = "Time units",
269
+                              ylab = "Number of cells",
270
+                              plotClones = TRUE,
271
+                              plotDrivers = TRUE,
272
+                              addtot = FALSE,
273
+                              addtotlwd = 0.5,
274
+                              yl = NULL,
275
+                              thinData = FALSE,
276
+                              thinData.keep = 0.1,
277
+                              thinData.min = 2,
278
+                              ...
279
+                              ) {
280
+    op <- par(ask = ask)
281
+    on.exit(par(op))
282
+    null <- lapply(x, function(z)
283
+                   plot.oncosimul(z,
284
+                                  col = col,
285
+                                  log = log,
286
+                                  ltyClone = ltyClone,
287
+                                  lwdClone = lwdClone,
288
+                                  ltyDrivers = ltyDrivers,
289
+                                  lwdDrivers = lwdDrivers,
290
+                                  xlab = xlab,
291
+                                  ylab = ylab,
292
+                                  plotClones = plotClones,
293
+                                  plotDrivers = plotDrivers,
294
+                                  addtot = addtot,
295
+                                  addtotlwd = addtotlwd,
296
+                                  yl = yl,
297
+                                  thinData = thinData,
298
+                                  thinData.keep = thinData.keep,
299
+                                  thinData.min = thinData.min,
300
+                                  ...))
301
+}
302
+
303
+
304
+plot.oncosimul <- function(x, col = c(8, "orange", 6:1),
305
+                           log = "y",
306
+                           ltyClone = 2:6,
307
+                           lwdClone = 0.2,
308
+                           ltyDrivers = 1,
309
+                           lwdDrivers = 3,
310
+                           xlab = "Time units",
311
+                           ylab = "Number of cells",
312
+                           plotClones = TRUE,
313
+                           plotDrivers = TRUE,
314
+                           addtot = FALSE,
315
+                           addtotlwd = 0.5,
316
+                           yl = NULL,
317
+                           thinData = FALSE,
318
+                           thinData.keep = 0.1,
319
+                           thinData.min = 2,
320
+                           ...
321
+                           ) {
322
+
323
+    if(thinData)
324
+        x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
325
+    
326
+    ndr <- apply(x$Genotypes[1:x$NumDrivers, , drop = FALSE], 2, sum)
327
+
328
+    if(is.null(yl)) {
329
+        if(log %in% c("y", "xy", "yx") )
330
+            yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
331
+        else
332
+            yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
333
+    }
334
+    if(plotClones) {
335
+        plotClones(x,
336
+                   ndr = ndr, 
337
+                   xlab = xlab,
338
+                   ylab = ylab,
339
+                   lty = ltyClone,
340
+                   col = col, 
341
+                   ylim = yl,
342
+                   lwd = lwdClone,
343
+                   axes = FALSE,
344
+                   log = log,
345
+                   ...)
346
+    }
347
+
348
+    if(plotClones && plotDrivers)
349
+        par(new = TRUE)
350
+    
351
+    if(plotDrivers){
352
+        plotDrivers0(x,
353
+                     timescale = 1,
354
+                     trim.no.drivers = FALSE,
355
+                     xlab = "", ylab = "",
356
+                     lwd = lwdDrivers,
357
+                     lty = ltyDrivers,
358
+                     col = col, 
359
+                     addtot = addtot,
360
+                     addtotlwd = addtotlwd,
361
+                     log = log, ylim = yl,
362
+                     ...)
363
+    }
364
+}
365
+
366
+
367
+plotPoset <- function(x, names = NULL, addroot = FALSE,
368
+                      box = FALSE, ...) {
369
+    if(is.null(names)) {
370
+        if(addroot) names <- c("Root", 1:max(x))
371
+        else names <- 1:max(x)
372
+    }
373
+    plot(posetToGraph(x, names, addroot), ...)
374
+    if(box)
375
+        box()
376
+}
377
+
378
+############# The rest are internal functions
379
+
380
+get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
381
+    ## Obtain, from  results from a simulation run, the vector
382
+    ## of 0/1 corresponding to each gene.
383
+    
384
+    ## threshold is the min. proportion for a mutation to be detected
385
+    ## We are doing whole tumor sampling here, as in Sprouffske
386
+
387
+    ## timeSample: do we sample at end, or at a time point, chosen
388
+    ## randomly, from all those with at least one driver?
389
+    
390
+    
391
+    if(timeSample == "last") {
392
+        return(as.numeric(
393
+            (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
394
+                        tmp$Genotypes)/tmp$TotalPopSize) > threshold))
395
+    } else if (timeSample %in% c("uniform", "unif")) {
396
+        the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
397
+        pop <- tmp$pops.by.time[the.time, -1]
398
+        popSize <- tmp$PerSampleStats[the.time, 1]
399
+        return( as.numeric((tcrossprod(pop,
400
+                                       tmp$Genotypes)/popSize) > threshold) )
401
+    }
402
+}
403
+
404
+
405
+get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
406
+    ## No threshold, as single cell.
407
+
408
+    ## timeSample: do we sample at end, or at a time point, chosen
409
+    ## randomly, from all those with at least one driver?
410
+    
411
+    if(timeSample == "last") {
412
+        the.time <- nrow(tmp$pops.by.time)
413
+    } else if (timeSample %in% c("uniform", "unif")) {
414
+        the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
415
+    }
416
+    pop <- tmp$pops.by.time[the.time, -1]
417
+    ##       popSize <- tmp$PerSampleStats[the.time, 1]
418
+    ## genot <- sample(seq_along(pop), 1, prob = pop)
419
+    return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
420
+}
421
+
422
+
423
+get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
424
+                           thresholdWhole = 0.5) {
425
+    if(typeSample %in% c("wholeTumor", "whole")) {
426
+        get.mut.vector.whole(x, timeSample = timeSample,
427
+                             threshold = thresholdWhole)
428
+    } else if(typeSample %in%  c("singleCell", "single")) {
429
+        get.mut.vector.singlecell(x, timeSample = timeSample)
430
+    }
431
+}
432
+
433
+
434
+
435
+
436
+
437
+
438
+
439
+reachCancer <- function(x, ndr = 0, detectionSize = 0,
440
+                        maxPopSize = 1e15) {
441
+    return(
442
+        ( ((x$TotalPopSize >= detectionSize) ||
443
+           (x$MaxDriversLast >= ndr)) &&
444
+         ## (x$ti_dbl_min == 0) && ## silly, since now impossible
445
+         (x$TotalPopSize < maxPopSize) ## numerical issues here
446
+         ))
447
+}
448
+
449
+oncoSimul.internal <- function(restrict.table,
450
+                               numGenes,
451
+                               typeFitness,
452
+                               typeCBN,
453
+                               birth, 
454
+                               s,
455
+                               sh,
456
+                               death,
457
+                               mu,
458
+                               initSize,
459
+                               sampleEvery,
460
+                               detectionSize,
461
+                               mutatorGenotype,
462
+                               finalTime,
463
+                               initSize_species = 2000,
464
+                               initSize_iter = 500,
465
+                               seed_gsl = NULL,
466
+                               verbosity = 1,
467
+                               initMutant = -1,
468
+                               speciesFS = 40000,
469
+                               ratioForce = 2,
470
+                               max.memory = 20000,
471
+                               max.wall.time = 3600,
472
+                               keepEvery = 20,
473
+                               alpha = 0.0015,
474
+                               K = 1000,
475
+                               endTimeEvery = NULL,
476
+                               finalDrivers = 1000) {
477
+
478
+    if(initSize_species < 10) {
479
+        warning("initSize_species too small?")
480
+    }
481
+    if(initSize_iter < 100) {
482
+        warning("initSize_iter too small?")
483
+    }
484
+    if(keepEvery < sampleEvery)
485
+        warning("setting keepEvery to sampleEvery")
486
+    if(is.null(seed_gsl)) {## passing a null creates a random seed
487
+        ## name is a legacy. This is really the seed for the C++ generator.
488
+        ## Not a user modifieble argument for now, though.
489
+        seed_gsl <- as.integer(round(runif(1, min = 0, max = 2^16)))
490
+        if(verbosity >= 2)
491
+            cat(paste("\n Using ", seed_gsl, " as seed for C++ generator\n"))
492
+    }
493
+
494
+    numDrivers <- nrow(restrict.table)
495
+    if(length(unique(restrict.table[, 1])) != numDrivers)
496
+        stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)")
497
+    ddr <- restrict.table[, 1]
498
+    if(any(diff(ddr) != 1))
499
+        stop("BAIL OUT NOW:  any(diff(ddr) != 1")
500
+    ## sanity checks
501
+    if(max(restrict.table[, 1]) != numDrivers)
502
+        stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers")
503
+    if(numDrivers > numGenes)
504
+        stop("BAIL OUT NOW: numDrivers > numGenes")
505
+    
506
+    non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1]
507
+
508
+
509
+    if( (typeFitness == "bozic1") && (mutatorGenotype) )
510
+        warning("Using fitness bozic1 with mutatorGenotype;",
511
+                "this will have no effect.")
512
+
513
+    if( (typeFitness == "exp") && (death != 1) )
514
+        warning("Using fitness exp with death != 1")
515
+
516
+
517
+    if( (is.null(endTimeEvery) || (endTimeEvery > 0)) &&
518
+       (typeFitness %in% c("bozic1", "exp") )) {
519
+        warning(paste("endTimeEvery will take a positive value. ",
520
+                      "This will make simulations not stop until the next ",
521
+                      "endTimeEvery has been reached. Thus, in simulations ",
522
+                      "with very fast growth, simulations can take a long ",
523
+                      "time to finish, or can hit the wall time limit. "))
524
+    }
525
+    if(is.null(endTimeEvery))
526
+        endTimeEvery <- keepEvery
527
+    if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) )
528
+        warning("!(endTimeEvery %% keepEvery)")
529
+    ## a sanity check in restricTable, so no neg. indices for the positive deps
530
+    neg.deps <- function(x) {
531
+        ## checks a row of restrict.table
532
+        numdeps <- x[2]
533
+        if(numdeps) {
534
+            deps <- x[3:(3+numdeps - 1)]
535
+            return(any(deps < 0))
536
+        } else FALSE
537
+    }
538
+    if(any(apply(restrict.table, 1, neg.deps)))
539
+        stop("BAIL OUT NOW: Negative dependencies in restriction table")
540
+
541
+    ## transpose the table
542
+    rtC <- convertRestrictTable(restrict.table)
543
+
544
+    
545
+    ## return the matching call? call <- match.call()
546
+    ## and then return(c(.Call(), call))
547
+    call <- match.call()
548
+    return(c(.Call(
549
+        ## "Algorithm5",
550
+        "C_Algorithm5",
551
+        rtC,
552
+        numDrivers,
553
+        numGenes,
554
+        typeCBN,
555
+        birth, 
556
+        s, 
557
+        death,
558
+        mu,
559
+        initSize,
560
+        sampleEvery,
561
+        detectionSize,
562
+        finalTime,
563
+        initSize_species,
564
+        initSize_iter,
565
+        seed_gsl,
566
+        verbosity,
567
+        speciesFS,
568
+        ratioForce,
569
+        typeFitness,
570
+        max.memory,
571
+        mutatorGenotype,
572
+        initMutant,
573
+        max.wall.time,
574
+        keepEvery,
575
+        alpha,
576
+        sh,
577
+        K,
578
+        endTimeEvery,
579
+        finalDrivers),
580
+             NumDrivers = numDrivers
581
+             ))
582
+}
583
+
584
+
585
+create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
586
+    if(tmp$NumClones > 1) {
587
+        NumMutations <- apply(tmp$Genotypes, 2, sum)
588
+        muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
589
+                              t(apply(tmp$pops.by.time[, -c(1),
590
+                                                       drop = FALSE], 1,
591
+                                      function(x) tapply(x,
592
+                                                         NumMutations, sum))))
593
+        colnames(muts.by.time)[c(1)] <- "Time"
594
+    } else {
595
+        muts.by.time <- tmp$pops.by.time
596
+    }
597
+    return(muts.by.time)
598
+} 
599
+
600
+
601
+create.drivers.by.time <- function(tmp, numDrivers) {
602
+    CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum)
603
+    if(tmp$NumClones >= 1) {
604
+        if(tmp$NumClones == 1) {
605
+            if(ncol(tmp$pops.by.time) != 2)
606
+                stop("This is impossible!")
607
+            drivers.by.time <- tmp$pops.by.time
608
+        } else {
609
+            if(length(unique(CountNumDrivers )) > 1) {
610
+                drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
611
+                                                          drop = FALSE] ,
612
+                                         t(apply(tmp$pops.by.time[, -c(1),
613
+                                                                  drop = FALSE],
614
+                                                 1,
615
+                                                 function(x)
616
+                                                 tapply(x,
617
+                                                        CountNumDrivers,
618
+                                                        sum)))) 
619
+            } else {
620
+                drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
621
+                                                          drop = FALSE] ,
622
+                                         rowSums(tmp$pops.by.time[, -c(1),
623
+                                                                  drop =FALSE]))
624
+            }
625
+        }
626
+        colnames(drivers.by.time) <- c("Time",
627
+                                       paste("dr_",
628
+                                             colnames(drivers.by.time)[-c(1)],
629
+                                             sep = ""))
630
+    } else {
631
+        drivers.by.time <- NULL
632
+    }
633
+    return(drivers.by.time)
634
+} 
635
+
636
+
637
+
638
+## For plotting, this helps decrease huge file sizes, while still showing
639
+## the start of each clone, if it was originally recorded.
640
+
641
+thin.pop.data <- function(x, keep = 0.1, min.keep = 3) {
642
+    norig <- nrow(x$pops.by.time)
643
+    keep1 <- round(seq.int(from = 1, to = norig,
644
+                           length.out = round(norig * keep)))
645
+    keep2 <- apply(x$pops.by.time[, -1, drop = FALSE],
646
+                   1, function(x) any((x[x > 0] < min.keep)))
647
+    keep <- sort(union(keep1, keep2))
648
+    x$pops.by.time <- x$pops.by.time[keep, , drop = FALSE]
649
+    return(x)
650
+}
651
+
652
+
653
+
654
+plotClones <- function(z, ndr = NULL, na.subs = TRUE,
655
+                       log = "y", type = "l",
656
+                       lty = 1:8, col = 1:9, ...) {
657
+
658
+    ## if given ndr, we order columns based on ndr, so clones with more
659
+    ## drivers are plotted last
660
+
661
+    y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
662
+    
663
+    if(na.subs){
664
+        y[y == 0] <- NA
665
+    }
666
+    if(!is.null(ndr)) {
667
+        ## could be done above, to avoid creating
668
+        ## more copies
669
+        oo <- order(ndr)
670
+        y <- y[, oo, drop = FALSE]
671
+        ndr <- ndr[oo]
672
+        col <- col[ndr + 1]
673
+    }
674
+    matplot(x = z$pops.by.time[, 1],
675
+            y = y,
676
+            log = log, type = type,
677
+            col = col, lty = lty,
678
+            ...)
679
+}
680
+
681
+
682
+plotDrivers0 <- function(x,
683
+                         timescale = 4,
684
+                         trim.no.drivers = TRUE,
685
+                         addtot = TRUE,
686
+                         addtotlwd = 2,
687
+                         na.subs = TRUE, log = "y", type = "l",
688
+                         lty = 1:9, col = c(8, "orange", 6:1),
689
+                         lwd = 2,
690
+                         ...) {
691
+
692
+    z <- create.drivers.by.time(x, x$NumDrivers)
693
+    if(trim.no.drivers && x$MaxDriversLast) {
694
+        fi <- which(apply(z[, -c(1, 2), drop = FALSE], 1,
695
+                          function(x) sum(x) > 0))[1]
696
+        z <- z[fi:nrow(z), , drop = FALSE]
697
+    }
698
+    y <- z[, 2:ncol(z), drop = FALSE]
699
+    if(na.subs){
700
+        y[y == 0] <- NA
701
+    }
702
+    if(timescale != 1) {
703
+        time <- timescale * z[, 1]
704
+    } else {
705
+        time <- z[, 1]
706
+    }
707
+    if(nrow(y) <= 2) type <- "b"
708
+    matplot(x = time,
709
+            y = y,
710
+            type = type, log = log, lty = lty, col = col, lwd = lwd,
711
+            ...)
712
+    if(addtot) {
713
+        tot <- rowSums(y, na.rm = TRUE)
714
+        lines(time, tot, col = "black", lty = 1, lwd = addtotlwd)
715
+    }
716
+    legend(x = "topleft",
717
+           title = "Number of drivers",
718
+           lty = lty, col = col, lwd = lwd,
719
+           legend = (1:ncol(y)) - 1)
720
+}
721
+
722
+
723
+rtNoDep <- function(numdrivers) {
724
+    ## create a restriction table with no dependencies
725
+    x <- matrix(nrow = numdrivers, ncol = 3)
726
+    x[, 1] <- 1:numdrivers
727
+    x[, 2] <- 0
728
+    x[, 3] <- -9
729
+    return(x)
730
+}
731
+
732
+
733
+
734
+convertRestrictTable <- function(x) {
735
+    t.restrictTable <- matrix(as.integer(x),
736
+                              ncol = nrow(x), byrow = TRUE)
737
+
738
+    t.restrictTable[-2, ] <- t.restrictTable[-2, ] - 1
739
+    return(t.restrictTable)
740
+}
741
+
742
+
743
+
744
+
745
+
746
+adjmat.to.restrictTable <- function(x) {
747
+    ## we have the zero
748
+    ## x <- x[-1, -1]
749
+    if(!is.null(colnames(x))) {
750
+        ## FIXME: this makes sense with numeric labels for columns, but
751
+        ## not ow.
752
+        oi <- order(as.numeric(colnames(x)))
753
+        if(any(oi != (1:ncol(x)))) {
754
+            warning("Reordering adjacency matrix")
755
+            x <- x[oi, oi]
756
+        }
757
+    }
758
+    
759
+    num.deps <- colSums(x)
760
+    max.n.deps <- max(num.deps)
761
+    rt <- matrix(-9, nrow = nrow(x),
762
+                 ncol = max.n.deps + 2)
763
+    for(i in 1:ncol(x)) {
764
+        if( num.deps[ i ])
765
+            rt[i , 1:(2 + num.deps[ i ])] <- c(i, num.deps[i ],
766
+                                               which(x[, i ] != 0))
767
+        else
768
+            rt[i , 1:2] <- c(i , 0)
769
+    }
770
+    return(rt)
771
+}
772
+
773
+
774
+
775
+poset.to.restrictTable <- function(x) {
776
+    x1 <- posetToGraph(x, names = 1:max(x), addroot = FALSE, type = "adjmat")
777
+    adjmat.to.restrictTable(x1)
778
+}
779
+
780
+
781
+## have a graph without a null??
782
+
783
+posetToGraph <- function(x, names,
784
+                         addroot = FALSE,
785
+                         type = "graphNEL") {
786
+    ## Intermediate nodes, if no ancestor or descendant, need not
787
+    ## be in the poset.
788
+    ## Any node with index largest than any node with ancestor or descendant
789
+    ## needs to be in the file.
790
+
791
+    ## E.g., rbind(c(1,2), c(4, 5)) will get three as a child-less and
792
+    ## parent-less node.  But to place 6 you need c(6, NA)
793
+
794
+    
795
+    ## We could use something like in run.oncotree,
796
+    ## as a poset also is a set of parent-child,
797
+    ## and we would then need to add the root connections
798
+    ## as is done below in no.ancestor.
799
+
800
+    ## But we do not for now. Note we show lonely nodes, which oncotrees
801
+    ## do not.  wait: when using root, we do not have "lonely nodes"
802
+    ## anymore.  But that is irrelevant for metrics based on transitive
803
+    ## closure. Note for Diff, etc.
804
+
805
+    ## In fact, this is all OK, but is confussing, because I can
806
+    ## have two kinds of posets: ones that are full, with NAs, etc, if
807
+    ## needed. Others that are not, but are fixed by the code here. But if
808
+    ## using the later, the user needs to make sure that the last node is
809
+    ## in the poset. This can be used as a shortcut trick, but in the docs
810
+    ## I do not do it, as it is bad practice.
811
+    
812
+    m <- length(names) 
813
+    m1 <- matrix(0, nrow = m, ncol = m)
814
+    colnames(m1) <- names
815
+    rownames(m1) <- names
816
+    if(is.null(dim(x)) ) {
817
+        if(length(x) != 1)
818
+            stop("If poset is not a matrix, it must be a vector of length 1")
819
+    } else { ## a matrix so a non-null poset
820
+        ## a debugging check. 
821
+        if(nrow(x) > 0) {
822
+            if(addroot) {
823
+                if(max(x, na.rm = TRUE) != (m - 1))
824
+                    stop("\n in poset, max(x) != (m - 1)")
825
+            } else { ## this was missing!
826
+                if(max(x, na.rm = TRUE) != m)
827
+                    stop("\n in poset, max(x) != m")
828
+            }
829
+        }
830
+        if(nrow(x) > 0) {
831
+            if(addroot)
832
+                m1[x + 1] <- 1
833
+            else
834
+                m1[x] <- 1
835
+        }
836
+        if((length(names) > 1) & addroot) {
837
+            no.ancestor <- which(apply(m1, 2, function(x) all(x == 0)))
838
+            no.ancestor <- no.ancestor[-1]
839
+            m1[cbind(1, no.ancestor)] <- 1
840
+        } ## o.w. do nothing
841
+    }
842
+    if(type == "adjmat") return(m1)
843
+    else if (type == "graphNEL") return(as(m1, "graphNEL"))
844
+    ## does not show the labels
845
+    ## else if (type == "igraph") return(graph.adjacency(m1))
846
+}
847
+
848
+
849
+
850
+
851
+
852
+
853
+
854
+
855
+
856
+
857
+
858
+
859
+
860
+
861
+
0 862
new file mode 100644
1 863
Binary files /dev/null and b/data/examplePosets.RData differ
2 864
new file mode 100644
... ...
@@ -0,0 +1,11 @@
1
+Changes in version 0.99.2 (2014-07-14)
2
+	- Consistently using indentation in .Rd files.
3
+
4
+Changes in version 0.99.1 (2014-07-14)
5
+	- Minor changes for BioConductor submission: 
6
+	    - extended description
7
+	    - minor changes to vignette
8
+	    - improved documentation of posets
9
+
10
+Changes in version 0.99.0 (2014-06-26)
11
+	- First version submitted to BioConductor.
0 12
new file mode 100644
... ...
@@ -0,0 +1,124 @@
1
+#include <Rcpp.h>
2
+
3
+using namespace Rcpp ;
4
+
5
+// [[Rcpp::export]]
6
+void precissionLoss(NumericVector nin_){
7
+  // We are storing population sizes as doubles.
8
+  // Should not loose any precission up to 2^53 - 1
9
+  // (e.g., http://stackoverflow.com/a/1848762)
10
+  // but double check if optims break it.
11
+  // Note that the original code by Mather stores it as int.
12
+
13
+  // Problems are likely to arise soon, with 4.5e15, because
14
+  // of rbinom. See notes in example-binom-problems.cpp
15
+  double a, b, c, d;
16
+  int e, f;
17
+  a = pow(2, 52) + 1.0;	
18
+  b = pow(2, 52); // 2^53 a little over 9*1e15
19
+  c = (9.0 * 1e15) + 1.0;
20
+  d = (9.0 * 1e15);
21
+
22
+  e = static_cast<int>(a - b);
23
+  f = static_cast<int>(c - d);
24
+
25
+  if( a == b) std::cout << "WARNING!!!! \n Precission loss: a == b\n";
26
+  if( !(a > b)) std::cout << "WARNING!!!! \n Precission loss: !(a > b)\n";
27
+  if(c == d) std::cout << "WARNING!!!! \n Precission loss: c == d\n";
28
+  if( !(c > d)) std::cout << "WARNING!!!! \n Precission loss: !(c > d)\n";
29
+  if( e != 1 ) std::cout << "WARNING!!!! \n Precission loss: e != 1\n";
30
+  if( f != 1 ) std::cout << "WARNING!!!! \n Precission loss: f != 1\n";
31
+
32
+  // Now the binomial issue
33
+  double nin, r;
34
+  nin = as<double>(nin_);
35
+  r = floor(nin + 0.5);
36
+  if( r != nin ) {
37
+    std::cout << "\n r != nin \n";
38
+    std::cout << "\n r - nin is " << (r - nin) << "\n";
39
+  }
40
+}
41
+// precissionLoss(1 + 5e15)
42
+
43
+// [[Rcpp::export]]
44
+double Bin(NumericVector num, NumericVector p) {
45
+  double n = as<double>(num);
46
+  std::cout << "\n Num = " << n << "\n";
47
+  return ::Rf_rbinom(n, as<double>(p));
48
+}
49
+// [[Rcpp::export]]
50
+double Bin1(NumericVector num, NumericVector p) {
51
+  double n = as<double>(num);
52
+  n = n + 1.0;
53
+  std::cout << "\n Num = " << n << "\n";
54
+  return ::Rf_rbinom(n, as<double>(p));
55
+}
56
+// very strange: Bin can do 9e15, but this fails with 1e15 and above.
57
+// but not if we add large numbers, say 10000. See below
58
+// with Bin2
59
+
60
+// [[Rcpp::export]]
61
+double Bin11(NumericVector num, NumericVector p) {
62
+  double n = as<double>(num);
63
+  double oldn = n;
64
+  ++n;
65
+  std::cout << "\n Num = " << n << "\n";
66
+  std::cout << "\n Num - oldn " << (n - oldn) << "\n";
67
+
68
+  return ::Rf_rbinom(n, as<double>(p));
69
+}
70
+// [[Rcpp::export]]
71
+double Bin2(NumericVector num, NumericVector a, NumericVector p) {
72
+  double n = num(0); //as<double>(num);
73
+  double aa = a(0);
74
+  double oldn = n;
75
+  n += aa;
76
+  std::cout << "\n n = " << n << "\n";
77
+  std::cout << "\n n - oldn = " << (n - oldn) << "\n";
78
+  double b1 = ::Rf_rbinom(n, as<double>(p));
79
+  double b2 = ::Rf_rbinom(oldn, as<double>(p));
80
+
81
+  std::cout << "\n with small number " << b2;
82
+  std::cout << "\n with large number " << b1 <<"\n";
83
+
84
+  // The binom code in binom.c
85
+  double nin, r;
86
+  nin = n;
87
+  r = floor(nin + 0.5);
88
+  if( r != nin ) {
89
+    std::cout << "\n r != nin \n";
90
+    std::cout << "\n r - nin is " << (r - nin) << "\n";
91
+  }
92
+  return b1;
93
+}
94
+
95
+
96
+
97
+
98
+// Problem is in largest integer storable,
99
+// but then we add a 0.5, ...
100
+// So we can only go up to 2^52.
101
+// run precissionLoss with 4e15, 5e15, 2^52, 2^53
102
+
103
+
104
+// Notice this: Bin2(4e15, 1 + 1e15, .4)
105
+// But this works Bin2(5e15, 2, .4)
106
+// Though this breaks Bin2(5e15, 1, .4)
107
+
108
+
109
+// See rbinom.c for the cause of the problem.
110
+// It is     r = floor(nin + 0.5);
111
+//    if (r != nin) ML_ERR_return_NAN;
112
+
113
+// This is with odd numbers, not even. E.g.: if x > 5e15 and odd, it fails.
114
+// 6e15 -> x; y <- (x + 1); floor(y + 0.5) == y 
115
+
116
+// [[Rcpp::export]]
117
+double NBin(NumericVector num, NumericVector p) {
118
+  return ::Rf_rnbinom(as<double>(num), as<double>(p));
119
+}
120
+
121
+
122
+// check we do not get overflows as follows
123
+// a <- Bin1(4e15, 1); b <- Bin(4e15, 1); a - b
124
+
0 125
new file mode 100644
... ...
@@ -0,0 +1,69 @@
1
+\name{examplePosets}
2
+\alias{examplePosets}
3
+\docType{data}
4
+\title{
5
+Example posets
6
+}
7
+
8
+\description{
9
+
10
+  Some example posets. For simplicity, all the posets are in a single
11
+  list. You can access each poset by accessing each element of the
12
+  list. The first digit or pair of digits denotes the number of nodes.
13
+  
14
+  
15
+  Poset 1101 is the same as the one in Gerstung et al., 2009 (figure
16
+  2A, poset 2). Poset 701 is the same as the one in Gerstung et al.,
17
+  2011 (figure 2B, left, the pancreatic cancer poset). Those posets
18
+  were entered manually at the command line: see \code{\link{poset}}.
19
+  
20
+  
21
+
22
+}
23
+\usage{data("examplePosets")}
24
+\format{
25
+The format is:
26
+List of 13
27
+$ p1101: num [1:10, 1:2] 1 1 3 3 3 7 7 8 9 10 ...
28
+$ p1102: num [1:9, 1:2] 1 1 3 3 3 7 7 9 10 2 ...
29
+$ p1103: num [1:9, 1:2] 1 1 3 3 3 7 7 8 10 2 ...
30
+$ p1104: num [1:9, 1:2] 1 1 3 3 7 7 9 2 10 2 ...
31
+$ p901 : num [1:8, 1:2] 1 2 4 5 7 8 5 1 2 3 ...
32
+$ p902 : num [1:6, 1:2] 1 2 4 5 7 5 2 3 5 6 ...
33
+$ p903 : num [1:6, 1:2] 1 2 5 7 8 1 2 3 6 8 ...
34
+$ p904 : num [1:6, 1:2] 1 4 5 5 1 7 2 5 8 6 ...
35
+$ p701 : num [1:9, 1:2] 1 1 1 1 2 3 4 4 5 2 ...
36
+$ p702 : num [1:6, 1:2] 1 1 1 1 2 4 2 3 4 5 ...
37
+$ p703 : num [1:6, 1:2] 1 1 1 1 3 5 2 3 4 5 ...
38
+$ p704 : num [1:6, 1:2] 1 1 1 1 4 5 2 3 4 5 ...
39
+$ p705 : num [1:6, 1:2] 1 2 1 1 1 2 2 5 4 6 ...
40
+}
41
+\source{
42
+  Gerstung et al., 2009. Quantifying cancer progression with conjunctive
43
+  Bayesian networks. \emph{Bioinformatics}, 21: 2809--2815.
44
+  
45
+  Gerstung et al., 2011. The Temporal Order of Genetic and Pathway
46
+  Alterations in Tumorigenesis. \emph{PLoS ONE}, 6.
47
+  
48
+}
49
+
50
+\seealso{
51
+  \code{\link{poset}}
52
+}
53
+
54
+
55
+\examples{
56
+data(examplePosets)
57
+
58
+## Plot all of them
59
+par(mfrow = c(3, 5))
60
+
61
+invisible(sapply(names(examplePosets),
62
+                 function(x) {plotPoset(examplePosets[[x]],
63
+                     main = x,
64
+                     box = TRUE)}))
65
+
66
+}
67
+\keyword{datasets}
68
+
69
+
0 70
new file mode 100644
... ...
@@ -0,0 +1,335 @@
1
+\name{oncoSimulIndiv}
2
+\alias{oncoSimulIndiv}
3
+\alias{oncoSimulPop}
4
+\alias{print.oncosimul}
5
+\alias{print.oncosimulpop}
6
+\alias{summary.oncosimul}
7
+\alias{summary.oncosimulpop}
8
+
9
+
10
+
11
+\title{
12
+  Simulate tumor progression for one or more individuals.
13
+}
14
+\description{
15
+  Simulate tumor progression including possible restrictions in the
16
+  order of driver mutations. Optionally add passenger
17
+  mutations. Simulation is done using the BNB algorithm of Mather et
18
+  al., 2012.
19
+}
20
+\usage{
21
+ oncoSimulIndiv(poset, model = "Bozic", numPassengers = 30, mu = 1e-6,
22
+                detectionSize = 1e8, detectionDrivers = 4,
23
+                sampleEvery = 1, initSize = 500, s = 0.1, sh = -1,
24
+                K = initSize/(exp(1) - 1), keepEvery = sampleEvery,
25
+                finalTime = 0.25 * 25 * 365, onlyCancer = TRUE,
26
+                max.memory = 2000, max.wall.time = 200,
27
+                verbosity = 0)
28
+
29
+ oncoSimulPop(Nindiv, poset, model = "Bozic", numPassengers = 30, mu = 1e-6,
30
+                detectionSize = 1e8, detectionDrivers = 4,
31
+                sampleEvery = 1, initSize = 500, s = 0.1, sh = -1,
32
+                K = initSize/(exp(1) - 1), keepEvery = sampleEvery,
33
+                finalTime = 0.25 * 25 * 365, onlyCancer = TRUE,
34
+                max.memory = 2000, max.wall.time = 200,
35
+                verbosity = 0, mc.cores = detectCores())
36
+
37
+
38
+}
39
+
40
+\arguments{
41
+  
42
+  \item{Nindiv}{Number of individuals or number of different
43
+    trajectories to simulate.}
44
+  \item{poset}{
45
+    
46
+    The poset that specifies the order restrictions. See \code{\link{poset}}.
47
+
48
+}
49
+\item{model}{
50
+  One of "Bozic", "Exp", "McFarlandLog" (the last one can be abbreviated
51
+  to "McFL").
52
+
53
+}
54
+\item{numPassengers}{
55
+  
56
+  The number of passenger genes.The total number of genes (drivers plus
57
+  passengers) must be smaller than 64.
58
+  
59
+  All driver genes should be included in the poset (even if they depend
60
+  on no one and no one depends on them), and will be numbered from 1 to
61
+  the total number of driver genes. Thus, passenger genes will be
62
+  numbered from (number of driver genes + 1):(number of drivers + number
63
+  of passengers).
64
+
65
+}
66
+\item{mu}{
67
+  Mutation rate.
68
+
69
+}
70
+\item{detectionSize}{
71
+  What is the minimal number of cells for cancer to be detected.
72
+
73
+}
74
+\item{detectionDrivers}{
75
+  The minimal number of drivers present in any clone for cancer to be detected.
76
+
77
+}
78
+\item{sampleEvery}{
79
+  
80
+  How often the whole population is sampled. This is not the same as the
81
+  interval between successive samples that keep stored (for that, see
82
+  \code{keepEvery}).
83
+
84
+  For very fast growing clones, you might need to have a small value
85
+  here to minimize possible numerical problems (such as huge increase in
86
+  population size between two successive samples that can then lead to
87
+  problems for random number generators). Likewise, for models with
88
+  density dependence (such as McF) this value should be very small.
89
+
90
+}
91
+\item{initSize}{
92
+  Initial population size.
93
+
94
+}
95
+\item{s}{
96
+  Selection coefficient for drivers.
97
+
98
+}
99
+\item{sh}{
100
+  Selection coefficient for drivers with restrictions not satisfied. A
101
+  value of 0 means there are no penalties for a driver appearing in a
102
+  clone when its restrictions are not satisfied.
103
+
104
+  To specify "sh=Inf" (in Diaz-Uriarte, 2014) use sh = -1.
105
+
106
+}
107
+\item{K}{
108
+  Initial population equilibrium size in the McFarland models.
109
+
110
+}
111
+\item{keepEvery}{
112
+  Time interval between successive whole population samples that are
113
+  actually stored. This must be larger or equal to sampleEvery. If keepEvery is
114
+  not a multiple integer of sampleEvery, the keepEvery in use will be the
115
+  smallest multiple integer of keepEvery larger than the specified
116
+  keepEvery.
117
+
118
+  If you want nice plots, set \code{sampleEvery} and \code{keepEvery} to
119
+  small values (say, 1 or 0.5). Otherwise, you can use a
120
+  \code{sampleEvery} of 1 but a \code{keepEvery} of 15, so that the
121
+  return objects are not huge.
122
+
123
+}
124
+\item{finalTime}{
125
+  What is the maximum number of time units that the simulation can run.
126
+
127
+}
128
+\item{onlyCancer}{
129
+  Return only simulations that reach cancer?
130
+
131
+  If set to TRUE, only simulations that satisfy the detectionDrivers or
132
+  the detectionSize will be returned (i.e., the simulation will be
133
+  repeated until one which meets that cancer requirements is
134
+  met). Otherwise, the simulation is returned regardless of final
135
+  population size or number of drivers in any clone and this includes
136
+  simulations where the population goes extinct.
137
+
138
+}
139
+\item{max.memory}{
140
+  The largest size (in MB) of the matrix of Populations by Time. If it
141
+  creating it would use more than this amount of memory, it is not
142
+  created. This prevents you from accidentally passing parameters that
143
+  will return an enormous object.
144
+
145
+}
146
+\item{max.wall.time}{
147
+  Maximum wall time for each individual simulation run. If the
148
+  simulation is not done in this time, it is aborted.
149
+
150
+}
151
+\item{verbosity}{
152
+  If 0, run as silently as possible. Otherwise, increasing values of
153
+  verbosity provide progressively more information about intermediate
154
+  steps, possible numerical notes/warnings from the C++ code, etc.
155
+
156
+}
157
+
158
+\item{mc.cores}{Number of cores to use when simulating more than one
159
+  individual (i.e., when calling oncoSimulPop).}
160
+
161
+}
162
+\details{
163
+
164
+  The basic simulation algorithm implemented is the BNB one of Mather et
165
+  al., 2012, where I have added modifications to fitness based on the
166
+  restrictions in the order of mutations.
167
+
168
+  Full details about the algorithm are provided in Mather et al.,
169
+  2012. The evolutionary models, including references, and the rest of
170
+  the parameters are explained in Diaz-Uriarte, 2014, especially in the
171
+  Supplementary Material. The model called "Bozic" is based on Bozic et
172
+  al., 2010, and the model called "McFarland" in McFarland et al., 2013.
173
+
174
+
175
+  oncoSimulPop simply calls oncoSimulIndiv multiple times. When run on
176
+  POSIX systems, it can use multiple cores (via mclapply).
177
+
178
+
179
+  The \code{summary} methods for these classes return some of the return
180
+  values (see next) as a one-row (for class oncosimul) or multiple row
181
+  (for class oncosimulpop) data frame.  The \code{print} methods for
182
+  these classes simply print the summary.
183
+
184
+}
185
+\value{
186
+  
187
+  For \code{oncoSimulIndiv} a list, of class "oncosimul", with the
188
+  following components:
189
+  
190
+    \item{pops.by.time }{A matrix of the population sizes of the clones,
191
+    with clones in columns and time in row. Not all clones are shown here,
192
+    only those that were present in at least on of the keepEvery samples.}
193
+  
194
+  \item{NumClones   }{Total number of clones in the above matrix.  }
195
+
196
+  \item{TotalPopSize}{Total population size at the end.}
197
+
198
+  \item{Genotypes}{A matrix of genotypes. For each of the clones in the
199
+    pops.by.time matrix, its genotype, with a 0 if the gene is not
200
+    mutated and a 1 if it is mutated.}
201
+  
202
+  \item{MaxNumDrivers}{The largest number of mutated driver genes ever
203
+    seen in the simulation in any clone.}
204
+
205
+  \item{MaxDriversLast}{The largest number of mutated drivers in any
206
+    clone     at the   end of the simulation.}
207
+
208
+  \item{NumDriversLargestPop}{The number of mutated driver genes in the
209
+    clone with largest population size. }
210
+
211
+  \item{LargestClone}{Population size of the clone with largest number
212
+    of population size.}
213
+
214
+  \item{PropLargestPopLast}{Ratio of LargestClone/TotalPopSize}
215
+
216
+  \item{FinalTime}{The time (in time units) at the end of the
217
+    simulation.}
218
+
219
+  \item{NumIter}{The number of iterations of the BNB algorithm.}
220
+
221
+  \item{HittedWallTime}{TRUE if we reached the limit of max.wall.time. FALSE
222
+    otherwise.}
223
+
224
+  \item{TotalPresentDrivers}{The total number of mutated driver genes,
225
+    whether or not in the same clone. The number of elements in
226
+    \code{OccurringDrivers}, below.}
227
+
228
+  \item{CountByDriver}{A vector of length number of drivers, with the
229
+    count of the number of clones that have that driver mutated.}
230
+
231
+  \item{OccurringDrivers}{The actual number of drivers mutated.}
232
+
233
+  \item{PerSampleStats}{A 5 column matrix with a row for each sampling
234
+    period. The columns are: total population size, population size of the
235
+    largest clone, the ratio of the two, the largest number of drivers in
236
+    any clone, and the number of drivers in the clone with the largest
237
+    population size.}
238
+  
239
+  \item{other}{A list that contains statistics for an estimate of the
240
+    simulation error when using the McFarland model. The relevant value
241
+    is errorMF, which is -99 unless in the McFarland model. For the
242
+    McFarland model it is the largest difference of successive death
243
+    rates.}
244
+  
245
+
246
+  For \code{oncoSimulPop} a list of length \code{Nindiv}, and of class
247
+  \code{"oncosimulpop"}, where each element of the list is itself a
248
+  list, of class \code{oncosimul}, with components as described above.
249
+  
250
+
251
+}
252
+\references{
253
+
254
+  Bozic, I., et al., (2010). Accumulation of driver and passenger
255
+  mutations during tumor progression. \emph{ Proceedings of the National
256
+  Academy of Sciences of the United States of America\/}, \bold{107},
257
+  18545--18550.
258
+  
259
+  Diaz-Uriarte, R. (2014). Inferring restrictions in the temporal order
260
+  of mutations during tumor progression: effects of passenger mutations,
261
+  evolutionary models, and
262
+  sampling. \url{http://dx.doi.org/10.1101/005587}
263
+  
264
+  McFarland, C.~D. et al. (2013). Impact of deleterious passenger
265
+  mutations on cancer progression.  \emph{Proceedings of the National
266
+  Academy of Sciences of the United States of America\/}, \bold{110}(8),
267
+  2910--5.
268
+
269
+  Mather, W.~H., Hasty, J., and Tsimring, L.~S. (2012). Fast stochastic
270
+  algorithm for simulating evolutionary population dynamics.
271
+  \emph{Bioinformatics (Oxford, England)\/}, \bold{28}(9), 1230--1238.