git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@92963 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,21 @@ |
1 |
+Package: OncoSimulR |
|
2 |
+Type: Package |
|
3 |
+Title: Simulation of cancer progresion with order restrictions |
|
4 |
+Version: 0.99.2 |
|
5 |
+Date: 2014-07-14 |
|
6 |
+Author: Ramon Diaz-Uriarte. |
|
7 |
+Maintainer: Ramon Diaz-Uriarte <rdiaz02@gmail.com> |
|
8 |
+Description: Functions for simulating and plotting cancer progression |
|
9 |
+ data, including drivers and passengers, and allowing for order |
|
10 |
+ restrictions. Simulations use continuous-time models (based on |
|
11 |
+ Bozic et al., 2010 and McFarland et al., 2013) and fitness |
|
12 |
+ functions account for possible restrictions in the order of |
|
13 |
+ accumulation of mutations. |
|
14 |
+biocViews: BiologicalQuestion, SomaticMutation |
|
15 |
+License: GPL (>= 3) |
|
16 |
+Depends: R (>= 3.1.0) |
|
17 |
+Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz |
|
18 |
+Suggests: BiocStyle, knitr, Oncotree |
|
19 |
+LinkingTo: Rcpp |
|
20 |
+VignetteBuilder: knitr |
|
21 |
+SystemRequirements: C++11 |
0 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,21 @@ |
1 |
+useDynLib(OncoSimulR, .registration=TRUE) |
|
2 |
+## useDynLib(OncoSimulR) |
|
3 |
+ |
|
4 |
+export("oncoSimulPop", "oncoSimulIndiv", "samplePop", "plotPoset") |
|
5 |
+ |
|
6 |
+S3method(plot, oncosimul) |
|
7 |
+S3method(print, oncosimul) |
|
8 |
+S3method(summary, oncosimul) |
|
9 |
+S3method(plot, oncosimulpop) |
|
10 |
+S3method(summary, oncosimulpop) |
|
11 |
+S3method(print, oncosimulpop) |
|
12 |
+ |
|
13 |
+ |
|
14 |
+importFrom("data.table", rbindlist, .rbind.data.table) |
|
15 |
+importFrom(Rcpp, evalCpp) |
|
16 |
+import(graph) |
|
17 |
+import(Rgraphviz) |
|
18 |
+importFrom("parallel", mclapply, detectCores) |
|
19 |
+ |
|
20 |
+ |
|
21 |
+ |
0 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,861 @@ |
1 |
+## Copyright 2013, 2014 Ramon Diaz-Uriarte |
|
2 |
+ |
|
3 |
+## This program is free software: you can redistribute it and/or modify |
|
4 |
+## it under the terms of the GNU General Public License as published by |
|
5 |
+## the Free Software Foundation, either version 3 of the License, or |
|
6 |
+## (at your option) any later version. |
|
7 |
+ |
|
8 |
+## This program is distributed in the hope that it will be useful, |
|
9 |
+## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+## GNU General Public License for more details. |
|
12 |
+ |
|
13 |
+## You should have received a copy of the GNU General Public License |
|
14 |
+## along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+ |
|
16 |
+ |
|
17 |
+samplePop <- function(x, timeSample = "last", typeSample = "whole", |
|
18 |
+ thresholdWhole = 0.5) { |
|
19 |
+ if(inherits(x, "oncosimulpop")) |
|
20 |
+ z <- do.call(rbind, |
|
21 |
+ lapply(x, |
|
22 |
+ get.mut.vector, |
|
23 |
+ timeSample = timeSample, |
|
24 |
+ typeSample = typeSample, |
|
25 |
+ thresholdWhole = thresholdWhole)) |
|
26 |
+ else { |
|
27 |
+ z <- get.mut.vector(x, |
|
28 |
+ timeSample = timeSample, |
|
29 |
+ typeSample = typeSample, |
|
30 |
+ thresholdWhole = thresholdWhole) |
|
31 |
+ dim(z) <- c(1, length(z)) |
|
32 |
+ } |
|
33 |
+ cat("\n Subjects by Genes matrix of ", |
|
34 |
+ nrow(z), " subjects and ", |
|
35 |
+ ncol(z), " genes:\n") |
|
36 |
+ colnames(z) <- paste0("G.", seq_len(ncol(z))) |
|
37 |
+ return(z) |
|
38 |
+} |
|
39 |
+ |
|
40 |
+ |
|
41 |
+oncoSimulPop <- function(Nindiv, |
|
42 |
+ poset, |
|
43 |
+ model = "Bozic", |
|
44 |
+ numPassengers = 30, |
|
45 |
+ mu = 1e-6, |
|
46 |
+ detectionSize = 1e8, |
|
47 |
+ detectionDrivers = 4, |
|
48 |
+ sampleEvery = 1, |
|
49 |
+ initSize = 500, |
|
50 |
+ s = 0.1, |
|
51 |
+ sh = -1, |
|
52 |
+ K = initSize/(exp(1) - 1), |
|
53 |
+ keepEvery = sampleEvery, |
|
54 |
+ finalTime = 0.25 * 25 * 365, |
|
55 |
+ onlyCancer = TRUE, |
|
56 |
+ max.memory = 2000, |
|
57 |
+ max.wall.time = 200, |
|
58 |
+ verbosity = 0, |
|
59 |
+ mc.cores = detectCores()) { |
|
60 |
+ |
|
61 |
+ if(.Platform$OS.type == "windows") { |
|
62 |
+ if(mc.cores != 1) |
|
63 |
+ message("You are running Windows. Setting mc.cores = 1") |
|
64 |
+ mc.cores <- 1 |
|
65 |
+ } |
|
66 |
+ pop <- mclapply(seq.int(Nindiv), |
|
67 |
+ function(x) |
|
68 |
+ oncoSimulIndiv( |
|
69 |
+ poset = poset, |
|
70 |
+ model = model, |
|
71 |
+ numPassengers = numPassengers, |
|
72 |
+ mu = mu, |
|
73 |
+ detectionSize = detectionSize, |
|
74 |
+ detectionDrivers = detectionDrivers, |
|
75 |
+ sampleEvery = sampleEvery, |
|
76 |
+ initSize = initSize, |
|
77 |
+ s = s, |
|
78 |
+ sh = sh, |
|
79 |
+ K = K, |
|
80 |
+ keepEvery = keepEvery, |
|
81 |
+ finalTime = finalTime, |
|
82 |
+ onlyCancer = onlyCancer, |
|
83 |
+ max.memory = max.memory, |
|
84 |
+ max.wall.time = max.wall.time, |
|
85 |
+ verbosity = verbosity), |
|
86 |
+ mc.cores = mc.cores |
|
87 |
+ ) |
|
88 |
+ class(pop) <- "oncosimulpop" |
|
89 |
+ attributes(pop)$call <- match.call() |
|
90 |
+ return(pop) |
|
91 |
+} |
|
92 |
+ |
|
93 |
+## where is the default K coming from? Here: |
|
94 |
+## log( (K+N)/K ) = 1; k + n = k * exp(1); k(exp - 1) = n; k = n/(exp - 1) |
|
95 |
+ |
|
96 |
+oncoSimulIndiv <- function(poset, |
|
97 |
+ model = "Bozic", |
|
98 |
+ numPassengers = 30, |
|
99 |
+ mu = 1e-6, |
|
100 |
+ detectionSize = 1e8, |
|
101 |
+ detectionDrivers = 4, |
|
102 |
+ sampleEvery = 1, |
|
103 |
+ initSize = 500, |
|
104 |
+ s = 0.1, |
|
105 |
+ sh = -1, |
|
106 |
+ K = initSize/(exp(1) - 1), |
|
107 |
+ keepEvery = sampleEvery, |
|
108 |
+ finalTime = 0.25 * 25 * 365, |
|
109 |
+ onlyCancer = TRUE, |
|
110 |
+ max.memory = 2000, |
|
111 |
+ max.wall.time = 200, |
|
112 |
+ verbosity = 0 |
|
113 |
+ ) { |
|
114 |
+ call <- match.call() |
|
115 |
+ rt <- poset.to.restrictTable(poset) |
|
116 |
+ |
|
117 |
+ |
|
118 |
+ numDrivers <- nrow(rt) |
|
119 |
+ numGenes <- (numDrivers + numPassengers) |
|
120 |
+ |
|
121 |
+ if(numGenes > 64) |
|
122 |
+ stop("Largest possible number of genes is 64") |
|
123 |
+ |
|
124 |
+ if(keepEvery < sampleEvery) |
|
125 |
+ warning("setting keepEvery to sampleEvery") |
|
126 |
+ |
|
127 |
+ |
|
128 |
+ ## legacies from poor name choices |
|
129 |
+ typeFitness <- switch(model, |
|
130 |
+ "Bozic" = "bozic1", |
|
131 |
+ "Exp" = "exp", |
|
132 |
+ "McFarlandLog" = "mcfarlandlog", |
|
133 |
+ "McFL" = "mcfarlandlog", |
|
134 |
+ stop("No valid value for model") |
|
135 |
+ ) |
|
136 |
+ |
|
137 |
+ if(typeFitness == "exp") { |
|
138 |
+ death <- 1 |
|
139 |
+ mutatorGenotype <- 1 |
|
140 |
+ } else { |
|
141 |
+ death <- -99 |
|
142 |
+ mutatorGenotype <- 0 |
|
143 |
+ } |
|
144 |
+ birth <- -99 |
|
145 |
+ |
|
146 |
+ if( (typeFitness == "mcfarlandlog") && |
|
147 |
+ (sampleEvery > 0.05)) { |
|
148 |
+ warning("With the McFarland model you often want smaller sampleEvery") |
|
149 |
+ } |
|
150 |
+ |
|
151 |
+ if(typeFitness == "mcfarlandlog") { |
|
152 |
+ endTimeEvery <- keepEvery |
|
153 |
+ } else { |
|
154 |
+ endTimeEvery <- -9 |
|
155 |
+ } |
|
156 |
+ |
|
157 |
+ |
|
158 |
+ |
|
159 |
+ ## A simulation stops if cancer or finalTime appear, the first |
|
160 |
+ ## one. But if we set onlyCnacer = FALSE, we also accept simuls |
|
161 |
+ ## without cancer (or without anything) |
|
162 |
+ |
|
163 |
+ doneSimuls <- FALSE |
|
164 |
+ while(!doneSimuls) { |
|
165 |
+ op <- try(oncoSimul.internal(restrict.table = rt, |
|
166 |
+ numGenes = numGenes, |
|
167 |
+ typeFitness = typeFitness, |
|
168 |
+ typeCBN = "-99", |
|
169 |
+ birth = birth, |
|
170 |
+ s = s, |
|
171 |
+ sh = sh, |
|
172 |
+ death = death, |
|
173 |
+ mu = mu, |
|
174 |
+ initSize = initSize, |
|
175 |
+ sampleEvery = sampleEvery, |
|
176 |
+ detectionSize = detectionSize, |
|
177 |
+ mutatorGenotype = mutatorGenotype, |
|
178 |
+ finalTime = finalTime, |
|
179 |
+ initSize_species = 2000, |
|
180 |
+ initSize_iter = 500, |
|
181 |
+ seed_gsl = NULL, |
|
182 |
+ verbosity = verbosity, |
|
183 |
+ initMutant = -1, |
|
184 |
+ speciesFS = 40000, |
|
185 |
+ ratioForce = 2, |
|
186 |
+ max.memory = max.memory, |
|
187 |
+ max.wall.time = max.wall.time, |
|
188 |
+ keepEvery = keepEvery, |
|
189 |
+ alpha = 0.0015, |
|
190 |
+ K = K, |
|
191 |
+ endTimeEvery = endTimeEvery, |
|
192 |
+ finalDrivers = detectionDrivers), |
|
193 |
+ silent = !verbosity) |
|
194 |
+ |
|
195 |
+ if(!inherits(op, "try-error")) { |
|
196 |
+ if(verbosity >= 2) { |
|
197 |
+ cat("\n ... finished this run:") |
|
198 |
+ cat("\n Total Pop Size = ", op$TotalPopSize) |
|
199 |
+ cat("\n Drivers Last = ", op$MaxDriversLast) |
|
200 |
+ cat("\n Final Time = ", op$FinalTime) |
|
201 |
+ } |
|
202 |
+ if(onlyCancer) { |
|
203 |
+ doneSimuls <- reachCancer(op, ndr = detectionDrivers, |
|
204 |
+ detectionSize = detectionSize, |
|
205 |
+ maxPopSize = 1e15) |
|
206 |
+ } else { |
|
207 |
+ doneSimuls <- TRUE |
|
208 |
+ } |
|
209 |
+ if(verbosity >= 2) { |
|
210 |
+ if(doneSimuls) |
|
211 |
+ cat("\n ... Keeping this one\n") |
|
212 |
+ else |
|
213 |
+ cat("\n ... Cancer not reached\n") |
|
214 |
+ } |
|
215 |
+ } else { |
|
216 |
+ if(length(grep("BAIL OUT NOW", op))) |
|
217 |
+ stop("Unrecoverable error") |
|
218 |
+ if(verbosity >= 2) |
|
219 |
+ cat("\nSimulation aborted because of numerical/other issues.", |
|
220 |
+ "Proceeding to next one.\n") |
|
221 |
+ } |
|
222 |
+ } |
|
223 |
+ class(op) <- "oncosimul" |
|
224 |
+ attributes(op)$call <- call |
|
225 |
+ return(op) |
|
226 |
+} |
|
227 |
+ |
|
228 |
+ |
|
229 |
+summary.oncosimul <- function(object, ...) { |
|
230 |
+ tmp <- object[c("NumClones", "TotalPopSize", "LargestClone", |
|
231 |
+ "MaxNumDrivers", "MaxDriversLast", |
|
232 |
+ "NumDriversLargestPop", "TotalPresentDrivers", |
|
233 |
+ "FinalTime", "NumIter", "HittedWallTime")] |
|
234 |
+ tmp$errorMF <- object$other$errorMF |
|
235 |
+ if(tmp$errorMF == -99) tmp$errorMF <- NA |
|
236 |
+ tmp$OccurringDrivers <- object$OccurringDrivers |
|
237 |
+ return(as.data.frame(tmp)) |
|
238 |
+} |
|
239 |
+ |
|
240 |
+print.oncosimul <- function(x, ...) { |
|
241 |
+ cat("\nIndividual OncoSimul trajectory with call:\n ") |
|
242 |
+ print(attributes(x)$call) |
|
243 |
+ cat("\n") |
|
244 |
+ print(summary(x)) |
|
245 |
+} |
|
246 |
+ |
|
247 |
+## I want this to return things that are storable |
|
248 |
+summary.oncosimulpop <- function(object, ...) { |
|
249 |
+ as.data.frame(rbindlist(lapply(object, summary))) |
|
250 |
+} |
|
251 |
+ |
|
252 |
+print.oncosimulpop <- function(x, ...) { |
|
253 |
+ cat("\nPopulation of OncoSimul trajectories of ", |
|
254 |
+ length(x), " individuals. Call :\n") |
|
255 |
+ print(attributes(x)$call) |
|
256 |
+ cat("\n") |
|
257 |
+ print(summary(x)) |
|
258 |
+} |
|
259 |
+ |
|
260 |
+ |
|
261 |
+plot.oncosimulpop <- function(x, ask = TRUE, |
|
262 |
+ col = c(8, "orange", 6:1), |
|
263 |
+ log = "y", |
|
264 |
+ ltyClone = 2:6, |
|
265 |
+ lwdClone = 0.2, |
|
266 |
+ ltyDrivers = 1, |
|
267 |
+ lwdDrivers = 3, |
|
268 |
+ xlab = "Time units", |
|
269 |
+ ylab = "Number of cells", |
|
270 |
+ plotClones = TRUE, |
|
271 |
+ plotDrivers = TRUE, |
|
272 |
+ addtot = FALSE, |
|
273 |
+ addtotlwd = 0.5, |
|
274 |
+ yl = NULL, |
|
275 |
+ thinData = FALSE, |
|
276 |
+ thinData.keep = 0.1, |
|
277 |
+ thinData.min = 2, |
|
278 |
+ ... |
|
279 |
+ ) { |
|
280 |
+ op <- par(ask = ask) |
|
281 |
+ on.exit(par(op)) |
|
282 |
+ null <- lapply(x, function(z) |
|
283 |
+ plot.oncosimul(z, |
|
284 |
+ col = col, |
|
285 |
+ log = log, |
|
286 |
+ ltyClone = ltyClone, |
|
287 |
+ lwdClone = lwdClone, |
|
288 |
+ ltyDrivers = ltyDrivers, |
|
289 |
+ lwdDrivers = lwdDrivers, |
|
290 |
+ xlab = xlab, |
|
291 |
+ ylab = ylab, |
|
292 |
+ plotClones = plotClones, |
|
293 |
+ plotDrivers = plotDrivers, |
|
294 |
+ addtot = addtot, |
|
295 |
+ addtotlwd = addtotlwd, |
|
296 |
+ yl = yl, |
|
297 |
+ thinData = thinData, |
|
298 |
+ thinData.keep = thinData.keep, |
|
299 |
+ thinData.min = thinData.min, |
|
300 |
+ ...)) |
|
301 |
+} |
|
302 |
+ |
|
303 |
+ |
|
304 |
+plot.oncosimul <- function(x, col = c(8, "orange", 6:1), |
|
305 |
+ log = "y", |
|
306 |
+ ltyClone = 2:6, |
|
307 |
+ lwdClone = 0.2, |
|
308 |
+ ltyDrivers = 1, |
|
309 |
+ lwdDrivers = 3, |
|
310 |
+ xlab = "Time units", |
|
311 |
+ ylab = "Number of cells", |
|
312 |
+ plotClones = TRUE, |
|
313 |
+ plotDrivers = TRUE, |
|
314 |
+ addtot = FALSE, |
|
315 |
+ addtotlwd = 0.5, |
|
316 |
+ yl = NULL, |
|
317 |
+ thinData = FALSE, |
|
318 |
+ thinData.keep = 0.1, |
|
319 |
+ thinData.min = 2, |
|
320 |
+ ... |
|
321 |
+ ) { |
|
322 |
+ |
|
323 |
+ if(thinData) |
|
324 |
+ x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min) |
|
325 |
+ |
|
326 |
+ ndr <- apply(x$Genotypes[1:x$NumDrivers, , drop = FALSE], 2, sum) |
|
327 |
+ |
|
328 |
+ if(is.null(yl)) { |
|
329 |
+ if(log %in% c("y", "xy", "yx") ) |
|
330 |
+ yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) |
|
331 |
+ else |
|
332 |
+ yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum))) |
|
333 |
+ } |
|
334 |
+ if(plotClones) { |
|
335 |
+ plotClones(x, |
|
336 |
+ ndr = ndr, |
|
337 |
+ xlab = xlab, |
|
338 |
+ ylab = ylab, |
|
339 |
+ lty = ltyClone, |
|
340 |
+ col = col, |
|
341 |
+ ylim = yl, |
|
342 |
+ lwd = lwdClone, |
|
343 |
+ axes = FALSE, |
|
344 |
+ log = log, |
|
345 |
+ ...) |
|
346 |
+ } |
|
347 |
+ |
|
348 |
+ if(plotClones && plotDrivers) |
|
349 |
+ par(new = TRUE) |
|
350 |
+ |
|
351 |
+ if(plotDrivers){ |
|
352 |
+ plotDrivers0(x, |
|
353 |
+ timescale = 1, |
|
354 |
+ trim.no.drivers = FALSE, |
|
355 |
+ xlab = "", ylab = "", |
|
356 |
+ lwd = lwdDrivers, |
|
357 |
+ lty = ltyDrivers, |
|
358 |
+ col = col, |
|
359 |
+ addtot = addtot, |
|
360 |
+ addtotlwd = addtotlwd, |
|
361 |
+ log = log, ylim = yl, |
|
362 |
+ ...) |
|
363 |
+ } |
|
364 |
+} |
|
365 |
+ |
|
366 |
+ |
|
367 |
+plotPoset <- function(x, names = NULL, addroot = FALSE, |
|
368 |
+ box = FALSE, ...) { |
|
369 |
+ if(is.null(names)) { |
|
370 |
+ if(addroot) names <- c("Root", 1:max(x)) |
|
371 |
+ else names <- 1:max(x) |
|
372 |
+ } |
|
373 |
+ plot(posetToGraph(x, names, addroot), ...) |
|
374 |
+ if(box) |
|
375 |
+ box() |
|
376 |
+} |
|
377 |
+ |
|
378 |
+############# The rest are internal functions |
|
379 |
+ |
|
380 |
+get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) { |
|
381 |
+ ## Obtain, from results from a simulation run, the vector |
|
382 |
+ ## of 0/1 corresponding to each gene. |
|
383 |
+ |
|
384 |
+ ## threshold is the min. proportion for a mutation to be detected |
|
385 |
+ ## We are doing whole tumor sampling here, as in Sprouffske |
|
386 |
+ |
|
387 |
+ ## timeSample: do we sample at end, or at a time point, chosen |
|
388 |
+ ## randomly, from all those with at least one driver? |
|
389 |
+ |
|
390 |
+ |
|
391 |
+ if(timeSample == "last") { |
|
392 |
+ return(as.numeric( |
|
393 |
+ (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1], |
|
394 |
+ tmp$Genotypes)/tmp$TotalPopSize) > threshold)) |
|
395 |
+ } else if (timeSample %in% c("uniform", "unif")) { |
|
396 |
+ the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1) |
|
397 |
+ pop <- tmp$pops.by.time[the.time, -1] |
|
398 |
+ popSize <- tmp$PerSampleStats[the.time, 1] |
|
399 |
+ return( as.numeric((tcrossprod(pop, |
|
400 |
+ tmp$Genotypes)/popSize) > threshold) ) |
|
401 |
+ } |
|
402 |
+} |
|
403 |
+ |
|
404 |
+ |
|
405 |
+get.mut.vector.singlecell <- function(tmp, timeSample = "last") { |
|
406 |
+ ## No threshold, as single cell. |
|
407 |
+ |
|
408 |
+ ## timeSample: do we sample at end, or at a time point, chosen |
|
409 |
+ ## randomly, from all those with at least one driver? |
|
410 |
+ |
|
411 |
+ if(timeSample == "last") { |
|
412 |
+ the.time <- nrow(tmp$pops.by.time) |
|
413 |
+ } else if (timeSample %in% c("uniform", "unif")) { |
|
414 |
+ the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1) |
|
415 |
+ } |
|
416 |
+ pop <- tmp$pops.by.time[the.time, -1] |
|
417 |
+ ## popSize <- tmp$PerSampleStats[the.time, 1] |
|
418 |
+ ## genot <- sample(seq_along(pop), 1, prob = pop) |
|
419 |
+ return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)]) |
|
420 |
+} |
|
421 |
+ |
|
422 |
+ |
|
423 |
+get.mut.vector <- function(x, timeSample = "whole", typeSample = "last", |
|
424 |
+ thresholdWhole = 0.5) { |
|
425 |
+ if(typeSample %in% c("wholeTumor", "whole")) { |
|
426 |
+ get.mut.vector.whole(x, timeSample = timeSample, |
|
427 |
+ threshold = thresholdWhole) |
|
428 |
+ } else if(typeSample %in% c("singleCell", "single")) { |
|
429 |
+ get.mut.vector.singlecell(x, timeSample = timeSample) |
|
430 |
+ } |
|
431 |
+} |
|
432 |
+ |
|
433 |
+ |
|
434 |
+ |
|
435 |
+ |
|
436 |
+ |
|
437 |
+ |
|
438 |
+ |
|
439 |
+reachCancer <- function(x, ndr = 0, detectionSize = 0, |
|
440 |
+ maxPopSize = 1e15) { |
|
441 |
+ return( |
|
442 |
+ ( ((x$TotalPopSize >= detectionSize) || |
|
443 |
+ (x$MaxDriversLast >= ndr)) && |
|
444 |
+ ## (x$ti_dbl_min == 0) && ## silly, since now impossible |
|
445 |
+ (x$TotalPopSize < maxPopSize) ## numerical issues here |
|
446 |
+ )) |
|
447 |
+} |
|
448 |
+ |
|
449 |
+oncoSimul.internal <- function(restrict.table, |
|
450 |
+ numGenes, |
|
451 |
+ typeFitness, |
|
452 |
+ typeCBN, |
|
453 |
+ birth, |
|
454 |
+ s, |
|
455 |
+ sh, |
|
456 |
+ death, |
|
457 |
+ mu, |
|
458 |
+ initSize, |
|
459 |
+ sampleEvery, |
|
460 |
+ detectionSize, |
|
461 |
+ mutatorGenotype, |
|
462 |
+ finalTime, |
|
463 |
+ initSize_species = 2000, |
|
464 |
+ initSize_iter = 500, |
|
465 |
+ seed_gsl = NULL, |
|
466 |
+ verbosity = 1, |
|
467 |
+ initMutant = -1, |
|
468 |
+ speciesFS = 40000, |
|
469 |
+ ratioForce = 2, |
|
470 |
+ max.memory = 20000, |
|
471 |
+ max.wall.time = 3600, |
|
472 |
+ keepEvery = 20, |
|
473 |
+ alpha = 0.0015, |
|
474 |
+ K = 1000, |
|
475 |
+ endTimeEvery = NULL, |
|
476 |
+ finalDrivers = 1000) { |
|
477 |
+ |
|
478 |
+ if(initSize_species < 10) { |
|
479 |
+ warning("initSize_species too small?") |
|
480 |
+ } |
|
481 |
+ if(initSize_iter < 100) { |
|
482 |
+ warning("initSize_iter too small?") |
|
483 |
+ } |
|
484 |
+ if(keepEvery < sampleEvery) |
|
485 |
+ warning("setting keepEvery to sampleEvery") |
|
486 |
+ if(is.null(seed_gsl)) {## passing a null creates a random seed |
|
487 |
+ ## name is a legacy. This is really the seed for the C++ generator. |
|
488 |
+ ## Not a user modifieble argument for now, though. |
|
489 |
+ seed_gsl <- as.integer(round(runif(1, min = 0, max = 2^16))) |
|
490 |
+ if(verbosity >= 2) |
|
491 |
+ cat(paste("\n Using ", seed_gsl, " as seed for C++ generator\n")) |
|
492 |
+ } |
|
493 |
+ |
|
494 |
+ numDrivers <- nrow(restrict.table) |
|
495 |
+ if(length(unique(restrict.table[, 1])) != numDrivers) |
|
496 |
+ stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)") |
|
497 |
+ ddr <- restrict.table[, 1] |
|
498 |
+ if(any(diff(ddr) != 1)) |
|
499 |
+ stop("BAIL OUT NOW: any(diff(ddr) != 1") |
|
500 |
+ ## sanity checks |
|
501 |
+ if(max(restrict.table[, 1]) != numDrivers) |
|
502 |
+ stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers") |
|
503 |
+ if(numDrivers > numGenes) |
|
504 |
+ stop("BAIL OUT NOW: numDrivers > numGenes") |
|
505 |
+ |
|
506 |
+ non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1] |
|
507 |
+ |
|
508 |
+ |
|
509 |
+ if( (typeFitness == "bozic1") && (mutatorGenotype) ) |
|
510 |
+ warning("Using fitness bozic1 with mutatorGenotype;", |
|
511 |
+ "this will have no effect.") |
|
512 |
+ |
|
513 |
+ if( (typeFitness == "exp") && (death != 1) ) |
|
514 |
+ warning("Using fitness exp with death != 1") |
|
515 |
+ |
|
516 |
+ |
|
517 |
+ if( (is.null(endTimeEvery) || (endTimeEvery > 0)) && |
|
518 |
+ (typeFitness %in% c("bozic1", "exp") )) { |
|
519 |
+ warning(paste("endTimeEvery will take a positive value. ", |
|
520 |
+ "This will make simulations not stop until the next ", |
|
521 |
+ "endTimeEvery has been reached. Thus, in simulations ", |
|
522 |
+ "with very fast growth, simulations can take a long ", |
|
523 |
+ "time to finish, or can hit the wall time limit. ")) |
|
524 |
+ } |
|
525 |
+ if(is.null(endTimeEvery)) |
|
526 |
+ endTimeEvery <- keepEvery |
|
527 |
+ if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) ) |
|
528 |
+ warning("!(endTimeEvery %% keepEvery)") |
|
529 |
+ ## a sanity check in restricTable, so no neg. indices for the positive deps |
|
530 |
+ neg.deps <- function(x) { |
|
531 |
+ ## checks a row of restrict.table |
|
532 |
+ numdeps <- x[2] |
|
533 |
+ if(numdeps) { |
|
534 |
+ deps <- x[3:(3+numdeps - 1)] |
|
535 |
+ return(any(deps < 0)) |
|
536 |
+ } else FALSE |
|
537 |
+ } |
|
538 |
+ if(any(apply(restrict.table, 1, neg.deps))) |
|
539 |
+ stop("BAIL OUT NOW: Negative dependencies in restriction table") |
|
540 |
+ |
|
541 |
+ ## transpose the table |
|
542 |
+ rtC <- convertRestrictTable(restrict.table) |
|
543 |
+ |
|
544 |
+ |
|
545 |
+ ## return the matching call? call <- match.call() |
|
546 |
+ ## and then return(c(.Call(), call)) |
|
547 |
+ call <- match.call() |
|
548 |
+ return(c(.Call( |
|
549 |
+ ## "Algorithm5", |
|
550 |
+ "C_Algorithm5", |
|
551 |
+ rtC, |
|
552 |
+ numDrivers, |
|
553 |
+ numGenes, |
|
554 |
+ typeCBN, |
|
555 |
+ birth, |
|
556 |
+ s, |
|
557 |
+ death, |
|
558 |
+ mu, |
|
559 |
+ initSize, |
|
560 |
+ sampleEvery, |
|
561 |
+ detectionSize, |
|
562 |
+ finalTime, |
|
563 |
+ initSize_species, |
|
564 |
+ initSize_iter, |
|
565 |
+ seed_gsl, |
|
566 |
+ verbosity, |
|
567 |
+ speciesFS, |
|
568 |
+ ratioForce, |
|
569 |
+ typeFitness, |
|
570 |
+ max.memory, |
|
571 |
+ mutatorGenotype, |
|
572 |
+ initMutant, |
|
573 |
+ max.wall.time, |
|
574 |
+ keepEvery, |
|
575 |
+ alpha, |
|
576 |
+ sh, |
|
577 |
+ K, |
|
578 |
+ endTimeEvery, |
|
579 |
+ finalDrivers), |
|
580 |
+ NumDrivers = numDrivers |
|
581 |
+ )) |
|
582 |
+} |
|
583 |
+ |
|
584 |
+ |
|
585 |
+create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5 |
|
586 |
+ if(tmp$NumClones > 1) { |
|
587 |
+ NumMutations <- apply(tmp$Genotypes, 2, sum) |
|
588 |
+ muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE], |
|
589 |
+ t(apply(tmp$pops.by.time[, -c(1), |
|
590 |
+ drop = FALSE], 1, |
|
591 |
+ function(x) tapply(x, |
|
592 |
+ NumMutations, sum)))) |
|
593 |
+ colnames(muts.by.time)[c(1)] <- "Time" |
|
594 |
+ } else { |
|
595 |
+ muts.by.time <- tmp$pops.by.time |
|
596 |
+ } |
|
597 |
+ return(muts.by.time) |
|
598 |
+} |
|
599 |
+ |
|
600 |
+ |
|
601 |
+create.drivers.by.time <- function(tmp, numDrivers) { |
|
602 |
+ CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum) |
|
603 |
+ if(tmp$NumClones >= 1) { |
|
604 |
+ if(tmp$NumClones == 1) { |
|
605 |
+ if(ncol(tmp$pops.by.time) != 2) |
|
606 |
+ stop("This is impossible!") |
|
607 |
+ drivers.by.time <- tmp$pops.by.time |
|
608 |
+ } else { |
|
609 |
+ if(length(unique(CountNumDrivers )) > 1) { |
|
610 |
+ drivers.by.time <- cbind(tmp$pops.by.time[, c(1), |
|
611 |
+ drop = FALSE] , |
|
612 |
+ t(apply(tmp$pops.by.time[, -c(1), |
|
613 |
+ drop = FALSE], |
|
614 |
+ 1, |
|
615 |
+ function(x) |
|
616 |
+ tapply(x, |
|
617 |
+ CountNumDrivers, |
|
618 |
+ sum)))) |
|
619 |
+ } else { |
|
620 |
+ drivers.by.time <- cbind(tmp$pops.by.time[, c(1), |
|
621 |
+ drop = FALSE] , |
|
622 |
+ rowSums(tmp$pops.by.time[, -c(1), |
|
623 |
+ drop =FALSE])) |
|
624 |
+ } |
|
625 |
+ } |
|
626 |
+ colnames(drivers.by.time) <- c("Time", |
|
627 |
+ paste("dr_", |
|
628 |
+ colnames(drivers.by.time)[-c(1)], |
|
629 |
+ sep = "")) |
|
630 |
+ } else { |
|
631 |
+ drivers.by.time <- NULL |
|
632 |
+ } |
|
633 |
+ return(drivers.by.time) |
|
634 |
+} |
|
635 |
+ |
|
636 |
+ |
|
637 |
+ |
|
638 |
+## For plotting, this helps decrease huge file sizes, while still showing |
|
639 |
+## the start of each clone, if it was originally recorded. |
|
640 |
+ |
|
641 |
+thin.pop.data <- function(x, keep = 0.1, min.keep = 3) { |
|
642 |
+ norig <- nrow(x$pops.by.time) |
|
643 |
+ keep1 <- round(seq.int(from = 1, to = norig, |
|
644 |
+ length.out = round(norig * keep))) |
|
645 |
+ keep2 <- apply(x$pops.by.time[, -1, drop = FALSE], |
|
646 |
+ 1, function(x) any((x[x > 0] < min.keep))) |
|
647 |
+ keep <- sort(union(keep1, keep2)) |
|
648 |
+ x$pops.by.time <- x$pops.by.time[keep, , drop = FALSE] |
|
649 |
+ return(x) |
|
650 |
+} |
|
651 |
+ |
|
652 |
+ |
|
653 |
+ |
|
654 |
+plotClones <- function(z, ndr = NULL, na.subs = TRUE, |
|
655 |
+ log = "y", type = "l", |
|
656 |
+ lty = 1:8, col = 1:9, ...) { |
|
657 |
+ |
|
658 |
+ ## if given ndr, we order columns based on ndr, so clones with more |
|
659 |
+ ## drivers are plotted last |
|
660 |
+ |
|
661 |
+ y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE] |
|
662 |
+ |
|
663 |
+ if(na.subs){ |
|
664 |
+ y[y == 0] <- NA |
|
665 |
+ } |
|
666 |
+ if(!is.null(ndr)) { |
|
667 |
+ ## could be done above, to avoid creating |
|
668 |
+ ## more copies |
|
669 |
+ oo <- order(ndr) |
|
670 |
+ y <- y[, oo, drop = FALSE] |
|
671 |
+ ndr <- ndr[oo] |
|
672 |
+ col <- col[ndr + 1] |
|
673 |
+ } |
|
674 |
+ matplot(x = z$pops.by.time[, 1], |
|
675 |
+ y = y, |
|
676 |
+ log = log, type = type, |
|
677 |
+ col = col, lty = lty, |
|
678 |
+ ...) |
|
679 |
+} |
|
680 |
+ |
|
681 |
+ |
|
682 |
+plotDrivers0 <- function(x, |
|
683 |
+ timescale = 4, |
|
684 |
+ trim.no.drivers = TRUE, |
|
685 |
+ addtot = TRUE, |
|
686 |
+ addtotlwd = 2, |
|
687 |
+ na.subs = TRUE, log = "y", type = "l", |
|
688 |
+ lty = 1:9, col = c(8, "orange", 6:1), |
|
689 |
+ lwd = 2, |
|
690 |
+ ...) { |
|
691 |
+ |
|
692 |
+ z <- create.drivers.by.time(x, x$NumDrivers) |
|
693 |
+ if(trim.no.drivers && x$MaxDriversLast) { |
|
694 |
+ fi <- which(apply(z[, -c(1, 2), drop = FALSE], 1, |
|
695 |
+ function(x) sum(x) > 0))[1] |
|
696 |
+ z <- z[fi:nrow(z), , drop = FALSE] |
|
697 |
+ } |
|
698 |
+ y <- z[, 2:ncol(z), drop = FALSE] |
|
699 |
+ if(na.subs){ |
|
700 |
+ y[y == 0] <- NA |
|
701 |
+ } |
|
702 |
+ if(timescale != 1) { |
|
703 |
+ time <- timescale * z[, 1] |
|
704 |
+ } else { |
|
705 |
+ time <- z[, 1] |
|
706 |
+ } |
|
707 |
+ if(nrow(y) <= 2) type <- "b" |
|
708 |
+ matplot(x = time, |
|
709 |
+ y = y, |
|
710 |
+ type = type, log = log, lty = lty, col = col, lwd = lwd, |
|
711 |
+ ...) |
|
712 |
+ if(addtot) { |
|
713 |
+ tot <- rowSums(y, na.rm = TRUE) |
|
714 |
+ lines(time, tot, col = "black", lty = 1, lwd = addtotlwd) |
|
715 |
+ } |
|
716 |
+ legend(x = "topleft", |
|
717 |
+ title = "Number of drivers", |
|
718 |
+ lty = lty, col = col, lwd = lwd, |
|
719 |
+ legend = (1:ncol(y)) - 1) |
|
720 |
+} |
|
721 |
+ |
|
722 |
+ |
|
723 |
+rtNoDep <- function(numdrivers) { |
|
724 |
+ ## create a restriction table with no dependencies |
|
725 |
+ x <- matrix(nrow = numdrivers, ncol = 3) |
|
726 |
+ x[, 1] <- 1:numdrivers |
|
727 |
+ x[, 2] <- 0 |
|
728 |
+ x[, 3] <- -9 |
|
729 |
+ return(x) |
|
730 |
+} |
|
731 |
+ |
|
732 |
+ |
|
733 |
+ |
|
734 |
+convertRestrictTable <- function(x) { |
|
735 |
+ t.restrictTable <- matrix(as.integer(x), |
|
736 |
+ ncol = nrow(x), byrow = TRUE) |
|
737 |
+ |
|
738 |
+ t.restrictTable[-2, ] <- t.restrictTable[-2, ] - 1 |
|
739 |
+ return(t.restrictTable) |
|
740 |
+} |
|
741 |
+ |
|
742 |
+ |
|
743 |
+ |
|
744 |
+ |
|
745 |
+ |
|
746 |
+adjmat.to.restrictTable <- function(x) { |
|
747 |
+ ## we have the zero |
|
748 |
+ ## x <- x[-1, -1] |
|
749 |
+ if(!is.null(colnames(x))) { |
|
750 |
+ ## FIXME: this makes sense with numeric labels for columns, but |
|
751 |
+ ## not ow. |
|
752 |
+ oi <- order(as.numeric(colnames(x))) |
|
753 |
+ if(any(oi != (1:ncol(x)))) { |
|
754 |
+ warning("Reordering adjacency matrix") |
|
755 |
+ x <- x[oi, oi] |
|
756 |
+ } |
|
757 |
+ } |
|
758 |
+ |
|
759 |
+ num.deps <- colSums(x) |
|
760 |
+ max.n.deps <- max(num.deps) |
|
761 |
+ rt <- matrix(-9, nrow = nrow(x), |
|
762 |
+ ncol = max.n.deps + 2) |
|
763 |
+ for(i in 1:ncol(x)) { |
|
764 |
+ if( num.deps[ i ]) |
|
765 |
+ rt[i , 1:(2 + num.deps[ i ])] <- c(i, num.deps[i ], |
|
766 |
+ which(x[, i ] != 0)) |
|
767 |
+ else |
|
768 |
+ rt[i , 1:2] <- c(i , 0) |
|
769 |
+ } |
|
770 |
+ return(rt) |
|
771 |
+} |
|
772 |
+ |
|
773 |
+ |
|
774 |
+ |
|
775 |
+poset.to.restrictTable <- function(x) { |
|
776 |
+ x1 <- posetToGraph(x, names = 1:max(x), addroot = FALSE, type = "adjmat") |
|
777 |
+ adjmat.to.restrictTable(x1) |
|
778 |
+} |
|
779 |
+ |
|
780 |
+ |
|
781 |
+## have a graph without a null?? |
|
782 |
+ |
|
783 |
+posetToGraph <- function(x, names, |
|
784 |
+ addroot = FALSE, |
|
785 |
+ type = "graphNEL") { |
|
786 |
+ ## Intermediate nodes, if no ancestor or descendant, need not |
|
787 |
+ ## be in the poset. |
|
788 |
+ ## Any node with index largest than any node with ancestor or descendant |
|
789 |
+ ## needs to be in the file. |
|
790 |
+ |
|
791 |
+ ## E.g., rbind(c(1,2), c(4, 5)) will get three as a child-less and |
|
792 |
+ ## parent-less node. But to place 6 you need c(6, NA) |
|
793 |
+ |
|
794 |
+ |
|
795 |
+ ## We could use something like in run.oncotree, |
|
796 |
+ ## as a poset also is a set of parent-child, |
|
797 |
+ ## and we would then need to add the root connections |
|
798 |
+ ## as is done below in no.ancestor. |
|
799 |
+ |
|
800 |
+ ## But we do not for now. Note we show lonely nodes, which oncotrees |
|
801 |
+ ## do not. wait: when using root, we do not have "lonely nodes" |
|
802 |
+ ## anymore. But that is irrelevant for metrics based on transitive |
|
803 |
+ ## closure. Note for Diff, etc. |
|
804 |
+ |
|
805 |
+ ## In fact, this is all OK, but is confussing, because I can |
|
806 |
+ ## have two kinds of posets: ones that are full, with NAs, etc, if |
|
807 |
+ ## needed. Others that are not, but are fixed by the code here. But if |
|
808 |
+ ## using the later, the user needs to make sure that the last node is |
|
809 |
+ ## in the poset. This can be used as a shortcut trick, but in the docs |
|
810 |
+ ## I do not do it, as it is bad practice. |
|
811 |
+ |
|
812 |
+ m <- length(names) |
|
813 |
+ m1 <- matrix(0, nrow = m, ncol = m) |
|
814 |
+ colnames(m1) <- names |
|
815 |
+ rownames(m1) <- names |
|
816 |
+ if(is.null(dim(x)) ) { |
|
817 |
+ if(length(x) != 1) |
|
818 |
+ stop("If poset is not a matrix, it must be a vector of length 1") |
|
819 |
+ } else { ## a matrix so a non-null poset |
|
820 |
+ ## a debugging check. |
|
821 |
+ if(nrow(x) > 0) { |
|
822 |
+ if(addroot) { |
|
823 |
+ if(max(x, na.rm = TRUE) != (m - 1)) |
|
824 |
+ stop("\n in poset, max(x) != (m - 1)") |
|
825 |
+ } else { ## this was missing! |
|
826 |
+ if(max(x, na.rm = TRUE) != m) |
|
827 |
+ stop("\n in poset, max(x) != m") |
|
828 |
+ } |
|
829 |
+ } |
|
830 |
+ if(nrow(x) > 0) { |
|
831 |
+ if(addroot) |
|
832 |
+ m1[x + 1] <- 1 |
|
833 |
+ else |
|
834 |
+ m1[x] <- 1 |
|
835 |
+ } |
|
836 |
+ if((length(names) > 1) & addroot) { |
|
837 |
+ no.ancestor <- which(apply(m1, 2, function(x) all(x == 0))) |
|
838 |
+ no.ancestor <- no.ancestor[-1] |
|
839 |
+ m1[cbind(1, no.ancestor)] <- 1 |
|
840 |
+ } ## o.w. do nothing |
|
841 |
+ } |
|
842 |
+ if(type == "adjmat") return(m1) |
|
843 |
+ else if (type == "graphNEL") return(as(m1, "graphNEL")) |
|
844 |
+ ## does not show the labels |
|
845 |
+ ## else if (type == "igraph") return(graph.adjacency(m1)) |
|
846 |
+} |
|
847 |
+ |
|
848 |
+ |
|
849 |
+ |
|
850 |
+ |
|
851 |
+ |
|
852 |
+ |
|
853 |
+ |
|
854 |
+ |
|
855 |
+ |
|
856 |
+ |
|
857 |
+ |
|
858 |
+ |
|
859 |
+ |
|
860 |
+ |
|
861 |
+ |
2 | 864 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,11 @@ |
1 |
+Changes in version 0.99.2 (2014-07-14) |
|
2 |
+ - Consistently using indentation in .Rd files. |
|
3 |
+ |
|
4 |
+Changes in version 0.99.1 (2014-07-14) |
|
5 |
+ - Minor changes for BioConductor submission: |
|
6 |
+ - extended description |
|
7 |
+ - minor changes to vignette |
|
8 |
+ - improved documentation of posets |
|
9 |
+ |
|
10 |
+Changes in version 0.99.0 (2014-06-26) |
|
11 |
+ - First version submitted to BioConductor. |
0 | 12 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,124 @@ |
1 |
+#include <Rcpp.h> |
|
2 |
+ |
|
3 |
+using namespace Rcpp ; |
|
4 |
+ |
|
5 |
+// [[Rcpp::export]] |
|
6 |
+void precissionLoss(NumericVector nin_){ |
|
7 |
+ // We are storing population sizes as doubles. |
|
8 |
+ // Should not loose any precission up to 2^53 - 1 |
|
9 |
+ // (e.g., http://stackoverflow.com/a/1848762) |
|
10 |
+ // but double check if optims break it. |
|
11 |
+ // Note that the original code by Mather stores it as int. |
|
12 |
+ |
|
13 |
+ // Problems are likely to arise soon, with 4.5e15, because |
|
14 |
+ // of rbinom. See notes in example-binom-problems.cpp |
|
15 |
+ double a, b, c, d; |
|
16 |
+ int e, f; |
|
17 |
+ a = pow(2, 52) + 1.0; |
|
18 |
+ b = pow(2, 52); // 2^53 a little over 9*1e15 |
|
19 |
+ c = (9.0 * 1e15) + 1.0; |
|
20 |
+ d = (9.0 * 1e15); |
|
21 |
+ |
|
22 |
+ e = static_cast<int>(a - b); |
|
23 |
+ f = static_cast<int>(c - d); |
|
24 |
+ |
|
25 |
+ if( a == b) std::cout << "WARNING!!!! \n Precission loss: a == b\n"; |
|
26 |
+ if( !(a > b)) std::cout << "WARNING!!!! \n Precission loss: !(a > b)\n"; |
|
27 |
+ if(c == d) std::cout << "WARNING!!!! \n Precission loss: c == d\n"; |
|
28 |
+ if( !(c > d)) std::cout << "WARNING!!!! \n Precission loss: !(c > d)\n"; |
|
29 |
+ if( e != 1 ) std::cout << "WARNING!!!! \n Precission loss: e != 1\n"; |
|
30 |
+ if( f != 1 ) std::cout << "WARNING!!!! \n Precission loss: f != 1\n"; |
|
31 |
+ |
|
32 |
+ // Now the binomial issue |
|
33 |
+ double nin, r; |
|
34 |
+ nin = as<double>(nin_); |
|
35 |
+ r = floor(nin + 0.5); |
|
36 |
+ if( r != nin ) { |
|
37 |
+ std::cout << "\n r != nin \n"; |
|
38 |
+ std::cout << "\n r - nin is " << (r - nin) << "\n"; |
|
39 |
+ } |
|
40 |
+} |
|
41 |
+// precissionLoss(1 + 5e15) |
|
42 |
+ |
|
43 |
+// [[Rcpp::export]] |
|
44 |
+double Bin(NumericVector num, NumericVector p) { |
|
45 |
+ double n = as<double>(num); |
|
46 |
+ std::cout << "\n Num = " << n << "\n"; |
|
47 |
+ return ::Rf_rbinom(n, as<double>(p)); |
|
48 |
+} |
|
49 |
+// [[Rcpp::export]] |
|
50 |
+double Bin1(NumericVector num, NumericVector p) { |
|
51 |
+ double n = as<double>(num); |
|
52 |
+ n = n + 1.0; |
|
53 |
+ std::cout << "\n Num = " << n << "\n"; |
|
54 |
+ return ::Rf_rbinom(n, as<double>(p)); |
|
55 |
+} |
|
56 |
+// very strange: Bin can do 9e15, but this fails with 1e15 and above. |
|
57 |
+// but not if we add large numbers, say 10000. See below |
|
58 |
+// with Bin2 |
|
59 |
+ |
|
60 |
+// [[Rcpp::export]] |
|
61 |
+double Bin11(NumericVector num, NumericVector p) { |
|
62 |
+ double n = as<double>(num); |
|
63 |
+ double oldn = n; |
|
64 |
+ ++n; |
|
65 |
+ std::cout << "\n Num = " << n << "\n"; |
|
66 |
+ std::cout << "\n Num - oldn " << (n - oldn) << "\n"; |
|
67 |
+ |
|
68 |
+ return ::Rf_rbinom(n, as<double>(p)); |
|
69 |
+} |
|
70 |
+// [[Rcpp::export]] |
|
71 |
+double Bin2(NumericVector num, NumericVector a, NumericVector p) { |
|
72 |
+ double n = num(0); //as<double>(num); |
|
73 |
+ double aa = a(0); |
|
74 |
+ double oldn = n; |
|
75 |
+ n += aa; |
|
76 |
+ std::cout << "\n n = " << n << "\n"; |
|
77 |
+ std::cout << "\n n - oldn = " << (n - oldn) << "\n"; |
|
78 |
+ double b1 = ::Rf_rbinom(n, as<double>(p)); |
|
79 |
+ double b2 = ::Rf_rbinom(oldn, as<double>(p)); |
|
80 |
+ |
|
81 |
+ std::cout << "\n with small number " << b2; |
|
82 |
+ std::cout << "\n with large number " << b1 <<"\n"; |
|
83 |
+ |
|
84 |
+ // The binom code in binom.c |
|
85 |
+ double nin, r; |
|
86 |
+ nin = n; |
|
87 |
+ r = floor(nin + 0.5); |
|
88 |
+ if( r != nin ) { |
|
89 |
+ std::cout << "\n r != nin \n"; |
|
90 |
+ std::cout << "\n r - nin is " << (r - nin) << "\n"; |
|
91 |
+ } |
|
92 |
+ return b1; |
|
93 |
+} |
|
94 |
+ |
|
95 |
+ |
|
96 |
+ |
|
97 |
+ |
|
98 |
+// Problem is in largest integer storable, |
|
99 |
+// but then we add a 0.5, ... |
|
100 |
+// So we can only go up to 2^52. |
|
101 |
+// run precissionLoss with 4e15, 5e15, 2^52, 2^53 |
|
102 |
+ |
|
103 |
+ |
|
104 |
+// Notice this: Bin2(4e15, 1 + 1e15, .4) |
|
105 |
+// But this works Bin2(5e15, 2, .4) |
|
106 |
+// Though this breaks Bin2(5e15, 1, .4) |
|
107 |
+ |
|
108 |
+ |
|
109 |
+// See rbinom.c for the cause of the problem. |
|
110 |
+// It is r = floor(nin + 0.5); |
|
111 |
+// if (r != nin) ML_ERR_return_NAN; |
|
112 |
+ |
|
113 |
+// This is with odd numbers, not even. E.g.: if x > 5e15 and odd, it fails. |
|
114 |
+// 6e15 -> x; y <- (x + 1); floor(y + 0.5) == y |
|
115 |
+ |
|
116 |
+// [[Rcpp::export]] |
|
117 |
+double NBin(NumericVector num, NumericVector p) { |
|
118 |
+ return ::Rf_rnbinom(as<double>(num), as<double>(p)); |
|
119 |
+} |
|
120 |
+ |
|
121 |
+ |
|
122 |
+// check we do not get overflows as follows |
|
123 |
+// a <- Bin1(4e15, 1); b <- Bin(4e15, 1); a - b |
|
124 |
+ |
0 | 125 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,69 @@ |
1 |
+\name{examplePosets} |
|
2 |
+\alias{examplePosets} |
|
3 |
+\docType{data} |
|
4 |
+\title{ |
|
5 |
+Example posets |
|
6 |
+} |
|
7 |
+ |
|
8 |
+\description{ |
|
9 |
+ |
|
10 |
+ Some example posets. For simplicity, all the posets are in a single |
|
11 |
+ list. You can access each poset by accessing each element of the |
|
12 |
+ list. The first digit or pair of digits denotes the number of nodes. |
|
13 |
+ |
|
14 |
+ |
|
15 |
+ Poset 1101 is the same as the one in Gerstung et al., 2009 (figure |
|
16 |
+ 2A, poset 2). Poset 701 is the same as the one in Gerstung et al., |
|
17 |
+ 2011 (figure 2B, left, the pancreatic cancer poset). Those posets |
|
18 |
+ were entered manually at the command line: see \code{\link{poset}}. |
|
19 |
+ |
|
20 |
+ |
|
21 |
+ |
|
22 |
+} |
|
23 |
+\usage{data("examplePosets")} |
|
24 |
+\format{ |
|
25 |
+The format is: |
|
26 |
+List of 13 |
|
27 |
+$ p1101: num [1:10, 1:2] 1 1 3 3 3 7 7 8 9 10 ... |
|
28 |
+$ p1102: num [1:9, 1:2] 1 1 3 3 3 7 7 9 10 2 ... |
|
29 |
+$ p1103: num [1:9, 1:2] 1 1 3 3 3 7 7 8 10 2 ... |
|
30 |
+$ p1104: num [1:9, 1:2] 1 1 3 3 7 7 9 2 10 2 ... |
|
31 |
+$ p901 : num [1:8, 1:2] 1 2 4 5 7 8 5 1 2 3 ... |
|
32 |
+$ p902 : num [1:6, 1:2] 1 2 4 5 7 5 2 3 5 6 ... |
|
33 |
+$ p903 : num [1:6, 1:2] 1 2 5 7 8 1 2 3 6 8 ... |
|
34 |
+$ p904 : num [1:6, 1:2] 1 4 5 5 1 7 2 5 8 6 ... |
|
35 |
+$ p701 : num [1:9, 1:2] 1 1 1 1 2 3 4 4 5 2 ... |
|
36 |
+$ p702 : num [1:6, 1:2] 1 1 1 1 2 4 2 3 4 5 ... |
|
37 |
+$ p703 : num [1:6, 1:2] 1 1 1 1 3 5 2 3 4 5 ... |
|
38 |
+$ p704 : num [1:6, 1:2] 1 1 1 1 4 5 2 3 4 5 ... |
|
39 |
+$ p705 : num [1:6, 1:2] 1 2 1 1 1 2 2 5 4 6 ... |
|
40 |
+} |
|
41 |
+\source{ |
|
42 |
+ Gerstung et al., 2009. Quantifying cancer progression with conjunctive |
|
43 |
+ Bayesian networks. \emph{Bioinformatics}, 21: 2809--2815. |
|
44 |
+ |
|
45 |
+ Gerstung et al., 2011. The Temporal Order of Genetic and Pathway |
|
46 |
+ Alterations in Tumorigenesis. \emph{PLoS ONE}, 6. |
|
47 |
+ |
|
48 |
+} |
|
49 |
+ |
|
50 |
+\seealso{ |
|
51 |
+ \code{\link{poset}} |
|
52 |
+} |
|
53 |
+ |
|
54 |
+ |
|
55 |
+\examples{ |
|
56 |
+data(examplePosets) |
|
57 |
+ |
|
58 |
+## Plot all of them |
|
59 |
+par(mfrow = c(3, 5)) |
|
60 |
+ |
|
61 |
+invisible(sapply(names(examplePosets), |
|
62 |
+ function(x) {plotPoset(examplePosets[[x]], |
|
63 |
+ main = x, |
|
64 |
+ box = TRUE)})) |
|
65 |
+ |
|
66 |
+} |
|
67 |
+\keyword{datasets} |
|
68 |
+ |
|
69 |
+ |
0 | 70 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,335 @@ |
1 |
+\name{oncoSimulIndiv} |
|
2 |
+\alias{oncoSimulIndiv} |
|
3 |
+\alias{oncoSimulPop} |
|
4 |
+\alias{print.oncosimul} |
|
5 |
+\alias{print.oncosimulpop} |
|
6 |
+\alias{summary.oncosimul} |
|
7 |
+\alias{summary.oncosimulpop} |
|
8 |
+ |
|
9 |
+ |
|
10 |
+ |
|
11 |
+\title{ |
|
12 |
+ Simulate tumor progression for one or more individuals. |
|
13 |
+} |
|
14 |
+\description{ |
|
15 |
+ Simulate tumor progression including possible restrictions in the |
|
16 |
+ order of driver mutations. Optionally add passenger |
|
17 |
+ mutations. Simulation is done using the BNB algorithm of Mather et |
|
18 |
+ al., 2012. |
|
19 |
+} |
|
20 |
+\usage{ |
|
21 |
+ oncoSimulIndiv(poset, model = "Bozic", numPassengers = 30, mu = 1e-6, |
|
22 |
+ detectionSize = 1e8, detectionDrivers = 4, |
|
23 |
+ sampleEvery = 1, initSize = 500, s = 0.1, sh = -1, |
|
24 |
+ K = initSize/(exp(1) - 1), keepEvery = sampleEvery, |
|
25 |
+ finalTime = 0.25 * 25 * 365, onlyCancer = TRUE, |
|
26 |
+ max.memory = 2000, max.wall.time = 200, |
|
27 |
+ verbosity = 0) |
|
28 |
+ |
|
29 |
+ oncoSimulPop(Nindiv, poset, model = "Bozic", numPassengers = 30, mu = 1e-6, |
|
30 |
+ detectionSize = 1e8, detectionDrivers = 4, |
|
31 |
+ sampleEvery = 1, initSize = 500, s = 0.1, sh = -1, |
|
32 |
+ K = initSize/(exp(1) - 1), keepEvery = sampleEvery, |
|
33 |
+ finalTime = 0.25 * 25 * 365, onlyCancer = TRUE, |
|
34 |
+ max.memory = 2000, max.wall.time = 200, |
|
35 |
+ verbosity = 0, mc.cores = detectCores()) |
|
36 |
+ |
|
37 |
+ |
|
38 |
+} |
|
39 |
+ |
|
40 |
+\arguments{ |
|
41 |
+ |
|
42 |
+ \item{Nindiv}{Number of individuals or number of different |
|
43 |
+ trajectories to simulate.} |
|
44 |
+ \item{poset}{ |
|
45 |
+ |
|
46 |
+ The poset that specifies the order restrictions. See \code{\link{poset}}. |
|
47 |
+ |
|
48 |
+} |
|
49 |
+\item{model}{ |
|
50 |
+ One of "Bozic", "Exp", "McFarlandLog" (the last one can be abbreviated |
|
51 |
+ to "McFL"). |
|
52 |
+ |
|
53 |
+} |
|
54 |
+\item{numPassengers}{ |
|
55 |
+ |
|
56 |
+ The number of passenger genes.The total number of genes (drivers plus |
|
57 |
+ passengers) must be smaller than 64. |
|
58 |
+ |
|
59 |
+ All driver genes should be included in the poset (even if they depend |
|
60 |
+ on no one and no one depends on them), and will be numbered from 1 to |
|
61 |
+ the total number of driver genes. Thus, passenger genes will be |
|
62 |
+ numbered from (number of driver genes + 1):(number of drivers + number |
|
63 |
+ of passengers). |
|
64 |
+ |
|
65 |
+} |
|
66 |
+\item{mu}{ |
|
67 |
+ Mutation rate. |
|
68 |
+ |
|
69 |
+} |
|
70 |
+\item{detectionSize}{ |
|
71 |
+ What is the minimal number of cells for cancer to be detected. |
|
72 |
+ |
|
73 |
+} |
|
74 |
+\item{detectionDrivers}{ |
|
75 |
+ The minimal number of drivers present in any clone for cancer to be detected. |
|
76 |
+ |
|
77 |
+} |
|
78 |
+\item{sampleEvery}{ |
|
79 |
+ |
|
80 |
+ How often the whole population is sampled. This is not the same as the |
|
81 |
+ interval between successive samples that keep stored (for that, see |
|
82 |
+ \code{keepEvery}). |
|
83 |
+ |
|
84 |
+ For very fast growing clones, you might need to have a small value |
|
85 |
+ here to minimize possible numerical problems (such as huge increase in |
|
86 |
+ population size between two successive samples that can then lead to |
|
87 |
+ problems for random number generators). Likewise, for models with |
|
88 |
+ density dependence (such as McF) this value should be very small. |
|
89 |
+ |
|
90 |
+} |
|
91 |
+\item{initSize}{ |
|
92 |
+ Initial population size. |
|
93 |
+ |
|
94 |
+} |
|
95 |
+\item{s}{ |
|
96 |
+ Selection coefficient for drivers. |
|
97 |
+ |
|
98 |
+} |
|
99 |
+\item{sh}{ |
|
100 |
+ Selection coefficient for drivers with restrictions not satisfied. A |
|
101 |
+ value of 0 means there are no penalties for a driver appearing in a |
|
102 |
+ clone when its restrictions are not satisfied. |
|
103 |
+ |
|
104 |
+ To specify "sh=Inf" (in Diaz-Uriarte, 2014) use sh = -1. |
|
105 |
+ |
|
106 |
+} |
|
107 |
+\item{K}{ |
|
108 |
+ Initial population equilibrium size in the McFarland models. |
|
109 |
+ |
|
110 |
+} |
|
111 |
+\item{keepEvery}{ |
|
112 |
+ Time interval between successive whole population samples that are |
|
113 |
+ actually stored. This must be larger or equal to sampleEvery. If keepEvery is |
|
114 |
+ not a multiple integer of sampleEvery, the keepEvery in use will be the |
|
115 |
+ smallest multiple integer of keepEvery larger than the specified |
|
116 |
+ keepEvery. |
|
117 |
+ |
|
118 |
+ If you want nice plots, set \code{sampleEvery} and \code{keepEvery} to |
|
119 |
+ small values (say, 1 or 0.5). Otherwise, you can use a |
|
120 |
+ \code{sampleEvery} of 1 but a \code{keepEvery} of 15, so that the |
|
121 |
+ return objects are not huge. |
|
122 |
+ |
|
123 |
+} |
|
124 |
+\item{finalTime}{ |
|
125 |
+ What is the maximum number of time units that the simulation can run. |
|
126 |
+ |
|
127 |
+} |
|
128 |
+\item{onlyCancer}{ |
|
129 |
+ Return only simulations that reach cancer? |
|
130 |
+ |
|
131 |
+ If set to TRUE, only simulations that satisfy the detectionDrivers or |
|
132 |
+ the detectionSize will be returned (i.e., the simulation will be |
|
133 |
+ repeated until one which meets that cancer requirements is |
|
134 |
+ met). Otherwise, the simulation is returned regardless of final |
|
135 |
+ population size or number of drivers in any clone and this includes |
|
136 |
+ simulations where the population goes extinct. |
|
137 |
+ |
|
138 |
+} |
|
139 |
+\item{max.memory}{ |
|
140 |
+ The largest size (in MB) of the matrix of Populations by Time. If it |
|
141 |
+ creating it would use more than this amount of memory, it is not |
|
142 |
+ created. This prevents you from accidentally passing parameters that |
|
143 |
+ will return an enormous object. |
|
144 |
+ |
|
145 |
+} |
|
146 |
+\item{max.wall.time}{ |
|
147 |
+ Maximum wall time for each individual simulation run. If the |
|
148 |
+ simulation is not done in this time, it is aborted. |
|
149 |
+ |
|
150 |
+} |
|
151 |
+\item{verbosity}{ |
|
152 |
+ If 0, run as silently as possible. Otherwise, increasing values of |
|
153 |
+ verbosity provide progressively more information about intermediate |
|
154 |
+ steps, possible numerical notes/warnings from the C++ code, etc. |
|
155 |
+ |
|
156 |
+} |
|
157 |
+ |
|
158 |
+\item{mc.cores}{Number of cores to use when simulating more than one |
|
159 |
+ individual (i.e., when calling oncoSimulPop).} |
|
160 |
+ |
|
161 |
+} |
|
162 |
+\details{ |
|
163 |
+ |
|
164 |
+ The basic simulation algorithm implemented is the BNB one of Mather et |
|
165 |
+ al., 2012, where I have added modifications to fitness based on the |
|
166 |
+ restrictions in the order of mutations. |
|
167 |
+ |
|
168 |
+ Full details about the algorithm are provided in Mather et al., |
|
169 |
+ 2012. The evolutionary models, including references, and the rest of |
|
170 |
+ the parameters are explained in Diaz-Uriarte, 2014, especially in the |
|
171 |
+ Supplementary Material. The model called "Bozic" is based on Bozic et |
|
172 |
+ al., 2010, and the model called "McFarland" in McFarland et al., 2013. |
|
173 |
+ |
|
174 |
+ |
|
175 |
+ oncoSimulPop simply calls oncoSimulIndiv multiple times. When run on |
|
176 |
+ POSIX systems, it can use multiple cores (via mclapply). |
|
177 |
+ |
|
178 |
+ |
|
179 |
+ The \code{summary} methods for these classes return some of the return |
|
180 |
+ values (see next) as a one-row (for class oncosimul) or multiple row |
|
181 |
+ (for class oncosimulpop) data frame. The \code{print} methods for |
|
182 |
+ these classes simply print the summary. |
|
183 |
+ |
|
184 |
+} |
|
185 |
+\value{ |
|
186 |
+ |
|
187 |
+ For \code{oncoSimulIndiv} a list, of class "oncosimul", with the |
|
188 |
+ following components: |
|
189 |
+ |
|
190 |
+ \item{pops.by.time }{A matrix of the population sizes of the clones, |
|
191 |
+ with clones in columns and time in row. Not all clones are shown here, |
|
192 |
+ only those that were present in at least on of the keepEvery samples.} |
|
193 |
+ |
|
194 |
+ \item{NumClones }{Total number of clones in the above matrix. } |
|
195 |
+ |
|
196 |
+ \item{TotalPopSize}{Total population size at the end.} |
|
197 |
+ |
|
198 |
+ \item{Genotypes}{A matrix of genotypes. For each of the clones in the |
|
199 |
+ pops.by.time matrix, its genotype, with a 0 if the gene is not |
|
200 |
+ mutated and a 1 if it is mutated.} |
|
201 |
+ |
|
202 |
+ \item{MaxNumDrivers}{The largest number of mutated driver genes ever |
|
203 |
+ seen in the simulation in any clone.} |
|
204 |
+ |
|
205 |
+ \item{MaxDriversLast}{The largest number of mutated drivers in any |
|
206 |
+ clone at the end of the simulation.} |
|
207 |
+ |
|
208 |
+ \item{NumDriversLargestPop}{The number of mutated driver genes in the |
|
209 |
+ clone with largest population size. } |
|
210 |
+ |
|
211 |
+ \item{LargestClone}{Population size of the clone with largest number |
|
212 |
+ of population size.} |
|
213 |
+ |
|
214 |
+ \item{PropLargestPopLast}{Ratio of LargestClone/TotalPopSize} |
|
215 |
+ |
|
216 |
+ \item{FinalTime}{The time (in time units) at the end of the |
|
217 |
+ simulation.} |
|
218 |
+ |
|
219 |
+ \item{NumIter}{The number of iterations of the BNB algorithm.} |
|
220 |
+ |
|
221 |
+ \item{HittedWallTime}{TRUE if we reached the limit of max.wall.time. FALSE |
|
222 |
+ otherwise.} |
|
223 |
+ |
|
224 |
+ \item{TotalPresentDrivers}{The total number of mutated driver genes, |
|
225 |
+ whether or not in the same clone. The number of elements in |
|
226 |
+ \code{OccurringDrivers}, below.} |
|
227 |
+ |
|
228 |
+ \item{CountByDriver}{A vector of length number of drivers, with the |
|
229 |
+ count of the number of clones that have that driver mutated.} |
|
230 |
+ |
|
231 |
+ \item{OccurringDrivers}{The actual number of drivers mutated.} |
|
232 |
+ |
|
233 |
+ \item{PerSampleStats}{A 5 column matrix with a row for each sampling |
|
234 |
+ period. The columns are: total population size, population size of the |
|
235 |
+ largest clone, the ratio of the two, the largest number of drivers in |
|
236 |
+ any clone, and the number of drivers in the clone with the largest |
|
237 |
+ population size.} |
|
238 |
+ |
|
239 |
+ \item{other}{A list that contains statistics for an estimate of the |
|
240 |
+ simulation error when using the McFarland model. The relevant value |
|
241 |
+ is errorMF, which is -99 unless in the McFarland model. For the |
|
242 |
+ McFarland model it is the largest difference of successive death |
|
243 |
+ rates.} |
|
244 |
+ |
|
245 |
+ |
|
246 |
+ For \code{oncoSimulPop} a list of length \code{Nindiv}, and of class |
|
247 |
+ \code{"oncosimulpop"}, where each element of the list is itself a |
|
248 |
+ list, of class \code{oncosimul}, with components as described above. |
|
249 |
+ |
|
250 |
+ |
|
251 |
+} |
|
252 |
+\references{ |
|
253 |
+ |
|
254 |
+ Bozic, I., et al., (2010). Accumulation of driver and passenger |
|
255 |
+ mutations during tumor progression. \emph{ Proceedings of the National |
|
256 |
+ Academy of Sciences of the United States of America\/}, \bold{107}, |
|
257 |
+ 18545--18550. |
|
258 |
+ |
|
259 |
+ Diaz-Uriarte, R. (2014). Inferring restrictions in the temporal order |
|
260 |
+ of mutations during tumor progression: effects of passenger mutations, |
|
261 |
+ evolutionary models, and |
|
262 |
+ sampling. \url{http://dx.doi.org/10.1101/005587} |
|
263 |
+ |
|
264 |
+ McFarland, C.~D. et al. (2013). Impact of deleterious passenger |
|
265 |
+ mutations on cancer progression. \emph{Proceedings of the National |
|
266 |
+ Academy of Sciences of the United States of America\/}, \bold{110}(8), |
|
267 |
+ 2910--5. |
|
268 |
+ |
|
269 |
+ Mather, W.~H., Hasty, J., and Tsimring, L.~S. (2012). Fast stochastic |
|
270 |
+ algorithm for simulating evolutionary population dynamics. |
|
271 |
+ \emph{Bioinformatics (Oxford, England)\/}, \bold{28}(9), 1230--1238. |
|