Browse code

v.2.5.2 - Lots and lots of addition to vignette including benchmarks. - Diversity of sampled genotypes. - Genotyping error can be added in samplePop. - LOD and POM (lines of descent, path of maximum, sensu Szendro et al.). - simOGraph can also out rT data frames. - Better (and better explained) estimates of simulation error for McFL.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@124982 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 10/12/2016 16:05:05
Showing36 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.5.1
5
-Date: 2016-11-12
4
+Version: 2.5.2
5
+Date: 2016-12-10
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -11,6 +11,8 @@ export("oncoSimulPop", "oncoSimulIndiv", "samplePop",
11 11
        "plotFitnessLandscape",
12 12
        "to_Magellan",
13 13
        "sampledGenotypes"
14
+       , "POM", "LOD"
15
+       , "diversityPOM", "diversityLOD"
14 16
        )
15 17
 
16 18
 S3method(plot, oncosimul)
... ...
@@ -23,6 +25,19 @@ S3method(plot, fitnessEffects)
23 25
 S3method(plot, genotype_fitness_matrix)
24 26
 S3method(plot, evalAllGenotypes)
25 27
 S3method(plot, evalAllGenotypesMut)
28
+S3method(print, sampledGenotypes)
29
+
30
+S3method(POM, oncosimul2)
31
+S3method(POM, oncosimulpop)
32
+S3method(LOD, oncosimul2)
33
+S3method(LOD, oncosimulpop)
34
+
35
+
36
+## S3method(summary, oncosimul_lod_list)
37
+## S3method(summary, oncosimul_pom_list)
38
+
39
+
40
+
26 41
 
27 42
 import(ggplot2)
28 43
 importFrom("ggrepel", geom_text_repel, geom_label_repel)
29 44
new file mode 100644
... ...
@@ -0,0 +1,168 @@
1
+## Copyright 2016 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+
17
+
18
+## Functions to obtain LOD and POM similar to Szendro et al., 2014, PNAS.
19
+genot_max <- function(x) {
20
+    x$GenotypesLabels[which.max(x$pops.by.time[nrow(x$pops.by.time), -1])]
21
+}
22
+
23
+LOD.internal <- function(x) {
24
+    ## Not identical to LOD of Szendro because:
25
+    
26
+    ##  a) I think we want all paths, not just their single LOD, which I
27
+    ##  think they might use out of convenience.
28
+
29
+    ##  b) For me it is a mess, a complicated mess, to use their LOD as
30
+    ##  they define it and there are many ambiguities in how to define it
31
+    ##  in continuous time.
32
+
33
+    ## This also means that single simulation might yield multiple LODs
34
+
35
+    ## keepEvents is FALSE to make this object as small as possible.
36
+    if(is.null(x$pops.by.time)) {
37
+        warning("Missing needed components. This might be a failed simulation.",
38
+                " Returning NA.")
39
+        return(list(all_paths = NA, lod_single = NA))
40
+    }
41
+    pc <- phylogClone(x, keepEvents = FALSE)
42
+    
43
+    if((length(pc) == 1) && (is.na(pc))) {
44
+        return(list(all_paths = NA,
45
+                    lod_single = "No_descendants"))
46
+    }
47
+    pcg <- pc$graph
48
+    end <- genot_max(x)
49
+    all_paths <- igraph::all_simple_paths(pcg, from = "", to = end, mode = "out")
50
+    ## the next is partially redundant
51
+    ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
52
+    ##                                        mode = "in")
53
+    ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
54
+    ## I am not sure if I should keep the last one. Redundant
55
+
56
+    ## This gives a single path and it is the first entry into each
57
+    ## destination. But we do not check there is no extinction afterwards.
58
+    ## The closest to the single Szendro LOD
59
+    if(end == "") {
60
+        ## Max is WT
61
+        lod_single <- "WT_is_end"
62
+    } else {
63
+        singlep <- pc$df
64
+        singlep[, 1] <- as.character(singlep[, 1])
65
+        singlep[, 2] <- as.character(singlep[, 2])
66
+        singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
67
+        singlep <- singlep[!duplicated(singlep[, 2]), ]
68
+        gsingle <- igraph::graph_from_data_frame(singlep)
69
+        lod_single <- igraph::all_simple_paths(gsingle, from = "", to = end, mode = "out")
70
+        if(length(lod_single) != 1) stop("lod_single != 1")
71
+    }
72
+    return(list(all_paths = all_paths,
73
+                lod_single = lod_single[[1]])) ##, graph_to_end = graph_to_end[[1]]))
74
+                ## graph_phylog_clone = pcg))
75
+}
76
+
77
+
78
+POM.internal <- function(x) {
79
+    if(is.null(x$pops.by.time)) {
80
+        warning("Missing needed components. This might be a failed simulation.",
81
+                " Returning NA.")
82
+        return(NA)
83
+    }
84
+    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE], 1, which.max))$values]
85
+}
86
+
87
+## First do, over a set of simulations, sim:
88
+## l_lod_single <- mclapply(sim, LODs)
89
+## l_poms <- mclapply(sim, POM)
90
+
91
+
92
+diversityLOD <- function(llod) {
93
+    nn <- names(llod[[1]])
94
+    if( is_null_na(nn) ||
95
+        !(nn == c("all_paths", "lod_single")))
96
+        stop("Object must be a list of LODs")
97
+    pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
98
+                                                     collapse = "_")))
99
+    shannonI(table(pathstr))
100
+}
101
+
102
+diversityPOM <- function(lpom) {
103
+    if(!inherits(lpom, "list"))
104
+        stop("Object must be a list of POMs")
105
+    ## if(!inherits(x, "oncosimul_lod_list"))
106
+    ##     stop("This is not a list of POMs")
107
+    pomstr <- unlist(lapply(lpom, function(x) paste(x, collapse = "_")))
108
+    shannonI(table(pomstr))
109
+}
110
+
111
+## a legacy
112
+diversity_POM <- diversityPOM
113
+diversity_LOD <- diversityLOD
114
+
115
+## POM.oncosimul2 <- POM.internal
116
+## LOD2.oncosimul2 <- LOD.internal
117
+
118
+POM <- function(x) {
119
+    UseMethod("POM", x)
120
+}
121
+
122
+LOD <- function(x) {
123
+    UseMethod("LOD", x)
124
+}
125
+
126
+POM.oncosimul2 <- function(x) return(POM.internal(x))
127
+LOD.oncosimul2 <- function(x) return(LOD.internal(x))
128
+
129
+POM.oncosimulpop <- function(x) return(lapply(x, POM.internal))
130
+LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))
131
+
132
+## POM.oncosimul2 <- function(x) {
133
+##     out <- POM.internal(x)
134
+##     class(out) <- c(class(out), "oncosimul_pom")
135
+##     return(out)
136
+## }
137
+
138
+## LOD.oncosimul2 <- function(x) {
139
+##     out <- LOD.internal(x)
140
+##     class(out) <- c(class(out), "oncosimul_lod")
141
+##     return(out)
142
+## }
143
+
144
+
145
+## POM.oncosimulpop <- function(x) {
146
+##     out <- lapply(x, POM.internal)
147
+##     class(out) <- c(class(out), "oncosimul_pom_list")
148
+##     return(out)
149
+## }
150
+
151
+## LOD.oncosimulpop <- function(x) {
152
+##     out <- lapply(x, LOD.internal)
153
+##     class(out) <- c(class(out), "oncosimul_lod_list")
154
+##     return(out)
155
+## }
156
+
157
+
158
+## summary.oncosimul_lod_list <- function(x) {
159
+##     cat("List of ", length(x), " simulations\n.")
160
+##     cat("Shannon's diversity (entropy) = ", diversityLOD(x), "\n")
161
+## }
162
+
163
+## summary.oncosimul_pom_list <- function(x) {
164
+##     cat("List of ", length(x), " simulations\n.")
165
+##     cat("Shannon's diversity (entropy) = ", diversityPOM(x), "\n")
166
+## }
167
+
168
+
... ...
@@ -258,12 +258,26 @@ oncoSimulSample <- function(Nindiv,
258 258
     }
259 259
 }
260 260
 
261
+add_noise <- function(x, properr) {
262
+    if(properr <= 0) {
263
+        return(x)
264
+    }
265
+    else {
266
+        if(properr > 1)
267
+            stop("Proportion with error cannot be > 1")
268
+        nn <- prod(dim(x))
269
+        flipped <- sample(nn, round(nn * properr))
270
+        x[flipped] <- as.integer(!x[flipped])
271
+        return(x)
272
+    }
273
+}
261 274
 
262 275
 samplePop <- function(x, timeSample = "last",
263 276
                       typeSample = "whole",
264 277
                       thresholdWhole = 0.5,
265 278
                       geneNames = NULL,
266
-                      popSizeSample = NULL) {
279
+                      popSizeSample = NULL,
280
+                      propError = 0) {
267 281
     ## timeSample <- match.arg(timeSample)
268 282
     gN <- geneNames
269 283
     
... ...
@@ -301,6 +315,9 @@ samplePop <- function(x, timeSample = "last",
301 315
         nrow(z), " subjects and ",
302 316
             ncol(z), " genes.\n")
303 317
 
318
+    if(propError > 0) {
319
+        z <- add_noise(z, propError)
320
+    }
304 321
     if(!is.null(gN)) {
305 322
         colnames(z) <- gN
306 323
     } else {
... ...
@@ -530,7 +547,8 @@ oncoSimulIndiv <- function(fp,
530 547
     if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1
531 548
     if(is_null_na(detectionSize)) detectionSize <- Inf
532 549
     if(is_null_na(finalTime)) finalTime <- Inf
533
-    
550
+
551
+    if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA")
534 552
     
535 553
     if(!inherits(fp, "fitnessEffects")) {
536 554
         if(any(unlist(lapply(list(fp, 
... ...
@@ -694,18 +712,24 @@ oncoSimulIndiv <- function(fp,
694 712
 }
695 713
 
696 714
 summary.oncosimul <- function(object, ...) {
715
+    ## This should be present even in HittedWallTime and HittedMaxTries
716
+    ## if those are not regarded as errors
717
+    pbp <- ("pops.by.time" %in% names(object) )
697 718
     
698
-    if(object$other$UnrecoverExcept) ## yes, when bailing out from
719
+    if(object$other$UnrecoverExcept) { ## yes, when bailing out from
699 720
                                      ## except. can have just minimal
700 721
                                      ## content
701 722
         return(NA)
702
-    else if (object$HittedWallTime || object$HittedMaxTries)
723
+    } else if( !pbp & (object$HittedWallTime || object$HittedMaxTries) ) {
703 724
         return(NA)
704
-    else {
725
+    } else if ( !pbp & !(object$HittedWallTime || object$HittedMaxTries) ) {
726
+        stop("Eh? No pops.by.time but did not hit wall time or max tries? BUG!")
727
+    } else {
705 728
         tmp <- object[c("NumClones", "TotalPopSize", "LargestClone",
706 729
                         "MaxNumDrivers", "MaxDriversLast",
707 730
                         "NumDriversLargestPop", "TotalPresentDrivers",
708
-                        "FinalTime", "NumIter", "HittedWallTime")]
731
+                        "FinalTime", "NumIter", "HittedWallTime",
732
+                        "HittedMaxTries")]
709 733
  
710 734
         tmp$errorMF <- object$other$errorMF
711 735
         tmp$minDMratio <- object$other$minDMratio
... ...
@@ -1458,7 +1482,8 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
1458 1482
     
1459 1483
     df <- x$other$PhylogDF
1460 1484
     if(nrow(df) == 0) {
1461
-        warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared.")
1485
+        warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared. ",
1486
+                "This also happens if you did not set 'keepPhylog = TRUE'.")
1462 1487
         return(NA)
1463 1488
     }
1464 1489
     if(!keepEvents) { ## is this just a graphical thing? or not?
... ...
@@ -15,10 +15,22 @@
15 15
 
16 16
 
17 17
 
18
-simOGraph <- function(n, h = 4, conjunction = TRUE, nparents = 3,
18
+simOGraph <- function(n, h = ifelse(n >= 4, 4, n),
19
+                      conjunction = TRUE, nparents = 3,
19 20
                       multilevelParent = TRUE,
20 21
                       removeDirectIndirect = TRUE,
21
-                      rootName = "Root") {
22
+                      rootName = "Root",
23
+                      geneNames = seq.int(n),
24
+                      out = c("adjmat", "rT"),
25
+                      s = 0.1,
26
+                      sh = -0.1,
27
+                      typeDep = "AND") {
28
+    out <- match.arg(out)
29
+    if(!is_null_na(geneNames)) {
30
+        stopifnot(length(geneNames) == n)
31
+    } else {
32
+        geneNames <- 1:n
33
+    }
22 34
     ## Returns an adjacency matrix
23 35
     if(h > n)
24 36
         stop("h > n")
... ...
@@ -65,13 +77,22 @@ simOGraph <- function(n, h = 4, conjunction = TRUE, nparents = 3,
65 77
         adjMat[1, 2:(n+1) ] <- 1L
66 78
     }
67 79
     
68
-    colnames(adjMat) <- rownames(adjMat) <- c(rootName, 1:n)
69
-    
80
+    colnames(adjMat) <- rownames(adjMat) <- c(rootName, geneNames)
81
+   
70 82
 
71 83
     ## Prune to remove indirect connections
72 84
     if(multilevelParent & removeDirectIndirect)
73 85
         adjMat <- removeIndirectConnections(adjMat)
74
-    return(adjMat)
86
+    if(out == "adjmat")
87
+        return(adjMat)
88
+    else {
89
+        df <- igraph::as_data_frame(igraph::graph.adjacency(adjMat))
90
+        colnames(df) <- c("parent", "child")
91
+        df$s <- s
92
+        df$sh <- sh
93
+        df$typeDep <- typeDep
94
+        return(df)
95
+    }
75 96
 }
76 97
 
77 98
 ## simOG <- simOGraph
... ...
@@ -1873,9 +1873,16 @@ sampledGenotypes <- function(y, genes = NULL) {
1873 1873
     gn <- as.character(df[, 1])
1874 1874
     gn[gn == ""] <- "WT"
1875 1875
     df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE)
1876
+    attributes(df)$ShannonI <- shannonI(df$Freq)
1877
+    class(df) <- c("sampledGenotypes", class(df))
1876 1878
     return(df)
1877 1879
 }
1878 1880
 
1881
+print.sampledGenotypes <- function(x, ...) {
1882
+    print.data.frame(x, ...)
1883
+    cat("\n Shannon's diversity (entropy) of sampled genotypes: ")
1884
+    cat(attributes(x)$ShannonI, "\n")
1885
+}
1879 1886
 
1880 1887
 
1881 1888
 list_g_matches_fixed <- function(x, y) {
1882 1889
new file mode 100644
1883 1890
Binary files /dev/null and b/data/benchmark_1.RData differ
1884 1891
new file mode 100644
1885 1892
Binary files /dev/null and b/data/benchmark_1_0.05.RData differ
1886 1893
new file mode 100644
1887 1894
Binary files /dev/null and b/data/benchmark_2.RData differ
1888 1895
new file mode 100644
1889 1896
Binary files /dev/null and b/data/benchmark_3.RData differ
... ...
@@ -1,3 +1,12 @@
1
+Changes in version 2.5.2 (2016-12-10):
2
+        - Lots and lots of addition to vignette including benchmarks.
3
+        - Diversity of sampled genotypes.
4
+	- Genotyping error can be added in samplePop.
5
+	- LOD and POM (lines of descent, path of maximum, sensu Szendro et
6
+	  al.).
7
+	- simOGraph can also out rT data frames.
8
+	- Better (and better explained) estimates of simulation error for McFL.
9
+
1 10
 Changes in version 2.5.1 (2016-11-12):
2 11
 	- AND of detectedSizeP and lastMaxDr.
3 12
 	- fixation as stopping mechanism.
4 13
new file mode 100644
... ...
@@ -0,0 +1,475 @@
1
+### Benchmark runs. This produces "benchmark_1.RData"
2
+
3
+
4
+rm(list = ls())
5
+set.seed(NULL)
6
+
7
+library(OncoSimulR)
8
+
9
+######################################################################
10
+######################################################################
11
+######################################################################
12
+######################################################################
13
+
14
+
15
+system_summary <- function() {
16
+    return(list(versioninfo = version,
17
+                memimfo = system("free", intern = TRUE),
18
+                cpuinfo = system("cat /proc/cpuinfo | grep 'model name'", intern = TRUE),
19
+                packageinfo = paste("OncoSimulR, ", packageVersion("OncoSimulR")),
20
+##                nodeinfo = Sys.info()$nodename,
21
+##                nodelinuxinfo = paste(Sys.info()$sysname, Sys.info()$release),
22
+                dateinfo = date()))
23
+}
24
+
25
+stats_simuls <- function(sim) {
26
+    ## sim is an oncoSimulPop output
27
+    tmp <- do.call("rbind",
28
+                   lapply(sim, function(x) c(NumClones = x$NumClones,
29
+                                      NumIter = x$NumIter,
30
+                                      FinalTime = x$FinalTime,
31
+                                      TotalPopSize = x$TotalPopSize,
32
+                                      Attempts = x$other$attemptsUsed)))
33
+    unlist(lapply(as.data.frame(tmp), summary))
34
+}
35
+
36
+all_sim_stats <- function(...) {
37
+    ## Returns a single data frame with all benchmark info
38
+    ## in ... pass the names of the objects
39
+    names <- as.character(as.list(match.call())[-c(1)])
40
+    m1 <- do.call("rbind", lapply(list(...), stats_simuls))
41
+    rownames(m1) <- names
42
+    Numindiv <- unlist(lapply(list(...), length))
43
+    time <- unlist(lapply(paste0("t_", names), get))
44
+    size <- unlist(lapply(list(...), object.size))
45
+    df <- data.frame(m1, size, time, Numindiv)
46
+    df$time_per_simul <- df$time/df$Numindiv
47
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
48
+    attributes(df)$system_summary <- system_summary()
49
+    return(df)
50
+}
51
+
52
+######################################################################
53
+######################################################################
54
+######################################################################
55
+######################################################################
56
+
57
+
58
+pancr <- allFitnessEffects(
59
+    data.frame(parent = c("Root", rep("KRAS", 4), 
60
+                   "SMAD4", "CDNK2A", 
61
+                   "TP53", "TP53", "MLL3"),
62
+               child = c("KRAS","SMAD4", "CDNK2A", 
63
+                   "TP53", "MLL3",
64
+                   rep("PXDN", 3), rep("TGFBR2", 2)),
65
+               s = 0.1,
66
+               sh = -0.9,
67
+               typeDep = "MN"),
68
+    drvNames = c("KRAS", "SMAD4", "CDNK2A", "TP53", 
69
+	             "MLL3", "TGFBR2", "PXDN"))
70
+
71
+
72
+Nindiv <- 100
73
+
74
+
75
+## keepEvery = 1
76
+t_exp1 <- system.time(
77
+    exp1 <- oncoSimulPop(Nindiv, pancr, 
78
+                            detectionProb = "default", 
79
+                            detectionSize = NA,
80
+                            detectionDrivers = NA,
81
+                            finalTime = NA,
82
+                            keepPhylog = TRUE, keepEvery = 1,
83
+                            model = "Exp", 
84
+                            mc.cores = 1))["elapsed"]
85
+
86
+
87
+t_mc1 <- system.time(
88
+    mc1 <- oncoSimulPop(Nindiv, pancr, 
89
+                           detectionProb = "default", 
90
+                           detectionSize = NA,
91
+                           detectionDrivers = NA,
92
+                           finalTime = NA,
93
+                           keepPhylog = TRUE, keepEvery = 1,                                  
94
+                           model = "McFL", 
95
+                           mc.cores = 1))["elapsed"]
96
+
97
+## keepEvery = NA
98
+t_exp2 <- system.time(
99
+    exp2 <- oncoSimulPop(Nindiv, pancr, 
100
+                            detectionProb = "default", 
101
+                            detectionSize = NA,
102
+                            detectionDrivers = NA,
103
+                            finalTime = NA,
104
+                            keepPhylog = TRUE, keepEvery = NA, 
105
+                            model = "Exp", 
106
+                            mc.cores = 1))["elapsed"]
107
+
108
+
109
+t_mc2 <- system.time(
110
+    mc2 <- oncoSimulPop(Nindiv, pancr, 
111
+                           detectionProb = "default", 
112
+                           detectionSize = NA,
113
+                           detectionDrivers = NA,
114
+                           finalTime = NA,
115
+                           keepPhylog = TRUE, keepEvery = NA,
116
+                           model = "McFL", 
117
+                           mc.cores = 1))["elapsed"]
118
+
119
+
120
+
121
+### exp3 to exp6
122
+t_exp3 <- system.time(
123
+    exp3 <- oncoSimulPop(Nindiv, pancr, 
124
+                            detectionProb = c(PDBaseline = 5e4,
125
+                                              p2 = 0.1, n2 = 5e5,
126
+                                              checkSizePEvery = 20), 
127
+                            detectionSize = NA,
128
+                            detectionDrivers = NA,
129
+                            finalTime = NA,
130
+                            keepPhylog = TRUE, keepEvery = 1, 
131
+                            model = "Exp", 
132
+                            mc.cores = 1))["elapsed"]
133
+
134
+t_exp4 <- system.time(
135
+    exp4 <- oncoSimulPop(Nindiv, pancr, 
136
+                            detectionProb = c(PDBaseline = 5e4,
137
+                                              p2 = 0.1, n2 = 5e5,
138
+                                              checkSizePEvery = 20), 
139
+                            detectionSize = NA,
140
+                            detectionDrivers = NA,
141
+                            finalTime = NA,
142
+                            keepPhylog = TRUE, keepEvery = NA, 
143
+                            model = "Exp", 
144
+                            mc.cores = 1))["elapsed"]
145
+
146
+
147
+
148
+t_exp5 <- system.time(
149
+    exp5 <- oncoSimulPop(Nindiv, pancr, 
150
+                            detectionProb = c(PDBaseline = 5e5,
151
+                                              p2 = 0.1, n2 = 5e7,
152
+                                              checkSizePEvery = 20), 
153
+                            detectionSize = NA,
154
+                            detectionDrivers = NA,
155
+                            finalTime = NA,
156
+                            keepPhylog = TRUE, keepEvery = 1, 
157
+                            model = "Exp", 
158
+                            mc.cores = 1))["elapsed"]
159
+
160
+t_exp6 <- system.time(
161
+    exp6 <- oncoSimulPop(Nindiv, pancr, 
162
+                            detectionProb = c(PDBaseline = 5e5,
163
+                                              p2 = 0.1, n2 = 5e7,
164
+                                              checkSizePEvery = 20), 
165
+                            detectionSize = NA,
166
+                            detectionDrivers = NA,
167
+                            finalTime = NA,
168
+                            keepPhylog = TRUE, keepEvery = NA, 
169
+                            model = "Exp", 
170
+                            mc.cores = 1))["elapsed"]
171
+
172
+
173
+####################################################
174
+###### onlyCancer = FALSE
175
+
176
+
177
+## keepEvery = 1
178
+t_exp1_noc <- system.time(
179
+    exp1_noc <- oncoSimulPop(Nindiv, pancr, 
180
+                            detectionProb = "default", 
181
+                            detectionSize = NA,
182
+                            detectionDrivers = NA,
183
+                            finalTime = NA, onlyCancer = FALSE,
184
+                            keepPhylog = TRUE, keepEvery = 1,
185
+                            model = "Exp", 
186
+                            mc.cores = 1))["elapsed"]
187
+t_mc1_noc <- system.time(
188
+    mc1_noc <- oncoSimulPop(Nindiv, pancr, 
189
+                           detectionProb = "default", 
190
+                           detectionSize = NA,
191
+                           detectionDrivers = NA,
192
+                           finalTime = NA, onlyCancer = FALSE,
193
+                           keepPhylog = TRUE, keepEvery = 1,                                  
194
+                           model = "McFL", 
195
+                           mc.cores = 1))["elapsed"]
196
+## keepEvery = NA
197
+t_exp2_noc <- system.time(
198
+    exp2_noc <- oncoSimulPop(Nindiv, pancr, 
199
+                            detectionProb = "default", 
200
+                            detectionSize = NA,
201
+                            detectionDrivers = NA,
202
+                            finalTime = NA, onlyCancer = FALSE,
203
+                            keepPhylog = TRUE, keepEvery = NA, 
204
+                            model = "Exp", 
205
+                            mc.cores = 1))["elapsed"]
206
+t_mc2_noc <- system.time(
207
+    mc2_noc <- oncoSimulPop(Nindiv, pancr, 
208
+                           detectionProb = "default", 
209
+                           detectionSize = NA,
210
+                           detectionDrivers = NA,
211
+                           finalTime = NA, onlyCancer = FALSE,
212
+                           keepPhylog = TRUE, keepEvery = NA,
213
+                           model = "McFL", 
214
+                           mc.cores = 1))["elapsed"]
215
+### exp3_noc to exp6_noc
216
+t_exp3_noc <- system.time(
217
+    exp3_noc <- oncoSimulPop(Nindiv, pancr, 
218
+                            detectionProb = c(PDBaseline = 5e4,
219
+                                              p2 = 0.1, n2 = 5e5,
220
+                                              checkSizePEvery = 20), 
221
+                            detectionSize = NA,
222
+                            detectionDrivers = NA,
223
+                            finalTime = NA, onlyCancer = FALSE,
224
+                            keepPhylog = TRUE, keepEvery = 1, 
225
+                            model = "Exp", 
226
+                            mc.cores = 1))["elapsed"]
227
+t_exp4_noc <- system.time(
228
+    exp4_noc <- oncoSimulPop(Nindiv, pancr, 
229
+                            detectionProb = c(PDBaseline = 5e4,
230
+                                              p2 = 0.1, n2 = 5e5,
231
+                                              checkSizePEvery = 20), 
232
+                            detectionSize = NA,
233
+                            detectionDrivers = NA,
234
+                            finalTime = NA, onlyCancer = FALSE,
235
+                            keepPhylog = TRUE, keepEvery = NA, 
236
+                            model = "Exp", 
237
+                            mc.cores = 1))["elapsed"]
238
+t_exp5_noc <- system.time(
239
+    exp5_noc <- oncoSimulPop(Nindiv, pancr, 
240
+                            detectionProb = c(PDBaseline = 5e5,
241
+                                              p2 = 0.1, n2 = 5e7,
242
+                                              checkSizePEvery = 20), 
243
+                            detectionSize = NA,
244
+                            detectionDrivers = NA,
245
+                            finalTime = NA, onlyCancer = FALSE,
246
+                            keepPhylog = TRUE, keepEvery = 1, 
247
+                            model = "Exp", 
248
+                            mc.cores = 1))["elapsed"]
249
+t_exp6_noc <- system.time(
250
+    exp6_noc <- oncoSimulPop(Nindiv, pancr, 
251
+                            detectionProb = c(PDBaseline = 5e5,
252
+                                              p2 = 0.1, n2 = 5e7,
253
+                                              checkSizePEvery = 20), 
254
+                            detectionSize = NA,
255
+                            detectionDrivers = NA,
256
+                            finalTime = NA, onlyCancer = FALSE,
257
+                            keepPhylog = TRUE, keepEvery = NA, 
258
+                            model = "Exp", 
259
+                            mc.cores = 1))["elapsed"]
260
+
261
+benchmark_1 <- all_sim_stats(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6,
262
+                         exp1_noc, mc1_noc, exp2_noc, mc2_noc, exp3_noc, exp4_noc, exp5_noc, exp6_noc
263
+                         )
264
+
265
+## Add the info about key settings
266
+benchmark_1$keepEvery <- c(1, 1, NA, NA, 1, NA, 1, NA,
267
+                           1, 1, NA, NA, 1, NA, 1, NA)
268
+benchmark_1$PDBaseline <- c(rep(1.2 * 500, 4), 5e4, 5e4, 5e5, 5e5,
269
+                            rep(1.2 * 500, 4), 5e4, 5e4, 5e5, 5e5)
270
+benchmark_1$n2 <- c(rep(2 * 500, 4), 5e5, 5e5, 5e7, 5e7,
271
+                    rep(2 * 500, 4), 5e5, 5e5, 5e7, 5e7)
272
+benchmark_1$onlyCancer <- c(rep(TRUE, 8), rep(FALSE, 8))
273
+
274
+save(file = "../../data/benchmark_1.RData", benchmark_1)
275
+
276
+
277
+gc()
278
+
279
+## ######################################################################
280
+## ######################################################################
281
+## ###############                                         ##############
282
+## ###############      If you do it by hand ...           ##############
283
+## ###############                                         ##############
284
+## ######################################################################
285
+## ######################################################################
286
+
287
+
288
+
289
+## cat("\n\n\n t_exp1 = ", t_exp1, "\n")
290
+## object.size(exp1)/(Nindiv * 1024^2)
291
+## cat("\n\n")
292
+## summary(unlist(lapply(exp1, "[[", "NumClones")))
293
+## summary(unlist(lapply(exp1, "[[", "NumIter")))
294
+## summary(unlist(lapply(exp1, "[[", "FinalTime")))
295
+## summary(unlist(lapply(exp1, "[[", "TotalPopSize")))
296
+
297
+
298
+## cat("\n\n")
299
+## cat("\n\n\n t_mc1 = ", t_mc1, "\n")
300
+## object.size(mc1)/(Nindiv * 1024^2)
301
+## cat("\n\n")
302
+## summary(unlist(lapply(mc1, "[[", "NumClones")))
303
+## summary(unlist(lapply(mc1, "[[", "NumIter")))
304
+## summary(unlist(lapply(mc1, "[[", "FinalTime")))
305
+## summary(unlist(lapply(mc1, "[[", "TotalPopSize")))
306
+
307
+## cat("\n\n")
308
+## cat("\n\n\n t_exp2 = ", t_exp2, "\n")
309
+## object.size(exp2)/(Nindiv * 1024^2)
310
+## cat("\n\n")
311
+## summary(unlist(lapply(exp2, "[[", "NumClones")))
312
+## summary(unlist(lapply(exp2, "[[", "NumIter")))
313
+## summary(unlist(lapply(exp2, "[[", "FinalTime")))
314
+## summary(unlist(lapply(exp2, "[[", "TotalPopSize")))
315
+
316
+## cat("\n\n")
317
+## cat("\n\n\n t_mc2 = ", t_mc2, "\n")
318
+## object.size(mc2)/(Nindiv * 1024^2)
319
+## cat("\n\n")
320
+## summary(unlist(lapply(mc2, "[[", "NumClones")))
321
+## summary(unlist(lapply(mc2, "[[", "NumIter")))
322
+## summary(unlist(lapply(mc2, "[[", "FinalTime")))
323
+## summary(unlist(lapply(mc2, "[[", "TotalPopSize")))
324
+
325
+
326
+
327
+## cat("\n\n")
328
+## cat("\n\n\n t_exp3 = ", t_exp3, "\n")
329
+## object.size(exp3)/(Nindiv * 1024^2)
330
+## cat("\n\n")
331
+## summary(unlist(lapply(exp3, "[[", "NumClones")))
332
+## summary(unlist(lapply(exp3, "[[", "NumIter")))
333
+## summary(unlist(lapply(exp3, "[[", "FinalTime")))
334
+## summary(unlist(lapply(exp3, "[[", "TotalPopSize")))
335
+
336
+
337
+## cat("\n\n")
338
+## cat("\n\n\n t_exp4 = ", t_exp4, "\n")
339
+## object.size(exp4)/(Nindiv * 1024^2)
340
+## cat("\n\n")
341
+## summary(unlist(lapply(exp4, "[[", "NumClones")))
342
+## summary(unlist(lapply(exp4, "[[", "NumIter")))
343
+## summary(unlist(lapply(exp4, "[[", "FinalTime")))
344
+## summary(unlist(lapply(exp4, "[[", "TotalPopSize")))
345
+
346
+
347
+## cat("\n\n")
348
+## cat("\n\n\n t_exp5 = ", t_exp5, "\n")
349
+## object.size(exp5)/(Nindiv * 1024^2)
350
+## cat("\n\n")
351
+## summary(unlist(lapply(exp5, "[[", "NumClones")))
352
+## summary(unlist(lapply(exp5, "[[", "NumIter")))
353
+## summary(unlist(lapply(exp5, "[[", "FinalTime")))
354
+## summary(unlist(lapply(exp5, "[[", "TotalPopSize")))
355
+
356
+
357
+## cat("\n\n")
358
+## cat("\n\n\n t_exp6 = ", t_exp6, "\n")
359
+## object.size(exp6)/(Nindiv * 1024^2)
360
+## cat("\n\n")
361
+## summary(unlist(lapply(exp6, "[[", "NumClones")))
362
+## summary(unlist(lapply(exp6, "[[", "NumIter")))
363
+## summary(unlist(lapply(exp6, "[[", "FinalTime")))
364
+## summary(unlist(lapply(exp6, "[[", "TotalPopSize")))
365
+
366
+
367
+## ## Median runs until cancer
368
+
369
+## lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
370
+##        function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
371
+
372
+
373
+
374
+
375
+
376
+
377
+## cat("\n\n\n t_exp1_noc = ", t_exp1_noc, "\n")
378
+## object.size(exp1_noc)/(Nindiv * 1024^2)
379
+## cat("\n\n")
380
+## summary(unlist(lapply(exp1_noc, "[[", "NumClones")))
381
+## summary(unlist(lapply(exp1_noc, "[[", "NumIter")))
382
+## summary(unlist(lapply(exp1_noc, "[[", "FinalTime")))
383
+## summary(unlist(lapply(exp1_noc, "[[", "TotalPopSize")))
384
+
385
+
386
+## cat("\n\n")
387
+## cat("\n\n\n t_mc1_noc = ", t_mc1_noc, "\n")
388
+## object.size(mc1_noc)/(Nindiv * 1024^2)
389
+## cat("\n\n")
390
+## summary(unlist(lapply(mc1_noc, "[[", "NumClones")))
391
+## summary(unlist(lapply(mc1_noc, "[[", "NumIter")))
392
+## summary(unlist(lapply(mc1_noc, "[[", "FinalTime")))
393
+## summary(unlist(lapply(mc1_noc, "[[", "TotalPopSize")))
394
+
395
+## cat("\n\n")
396
+## cat("\n\n\n t_exp2_noc = ", t_exp2_noc, "\n")
397
+## object.size(exp2_noc)/(Nindiv * 1024^2)
398
+## cat("\n\n")
399
+## summary(unlist(lapply(exp2_noc, "[[", "NumClones")))
400
+## summary(unlist(lapply(exp2_noc, "[[", "NumIter")))
401
+## summary(unlist(lapply(exp2_noc, "[[", "FinalTime")))
402
+## summary(unlist(lapply(exp2_noc, "[[", "TotalPopSize")))
403
+
404
+## cat("\n\n")
405
+## cat("\n\n\n t_mc2_noc = ", t_mc2_noc, "\n")
406
+## object.size(mc2_noc)/(Nindiv * 1024^2)
407
+## cat("\n\n")
408
+## summary(unlist(lapply(mc2_noc, "[[", "NumClones")))
409
+## summary(unlist(lapply(mc2_noc, "[[", "NumIter")))
410
+## summary(unlist(lapply(mc2_noc, "[[", "FinalTime")))
411
+## summary(unlist(lapply(mc2_noc, "[[", "TotalPopSize")))
412
+
413
+
414
+
415
+## cat("\n\n")
416
+## cat("\n\n\n t_exp3_noc = ", t_exp3_noc, "\n")
417
+## object.size(exp3_noc)/(Nindiv * 1024^2)
418
+## cat("\n\n")
419
+## summary(unlist(lapply(exp3_noc, "[[", "NumClones")))
420
+## summary(unlist(lapply(exp3_noc, "[[", "NumIter")))
421
+## summary(unlist(lapply(exp3_noc, "[[", "FinalTime")))
422
+## summary(unlist(lapply(exp3_noc, "[[", "TotalPopSize")))
423
+
424
+
425
+## cat("\n\n")
426
+## cat("\n\n\n t_exp4_noc = ", t_exp4_noc, "\n")
427
+## object.size(exp4_noc)/(Nindiv * 1024^2)
428
+## cat("\n\n")
429
+## summary(unlist(lapply(exp4_noc, "[[", "NumClones")))
430
+## summary(unlist(lapply(exp4_noc, "[[", "NumIter")))
431
+## summary(unlist(lapply(exp4_noc, "[[", "FinalTime")))
432
+## summary(unlist(lapply(exp4_noc, "[[", "TotalPopSize")))
433
+
434
+
435
+## cat("\n\n")
436
+## cat("\n\n\n t_exp5_noc = ", t_exp5_noc, "\n")
437
+## object.size(exp5_noc)/(Nindiv * 1024^2)
438
+## cat("\n\n")
439
+## summary(unlist(lapply(exp5_noc, "[[", "NumClones")))
440
+## summary(unlist(lapply(exp5_noc, "[[", "NumIter")))
441
+## summary(unlist(lapply(exp5_noc, "[[", "FinalTime")))
442
+## summary(unlist(lapply(exp5_noc, "[[", "TotalPopSize")))
443
+
444
+
445
+## cat("\n\n")
446
+## cat("\n\n\n t_exp6_noc = ", t_exp6_noc, "\n")
447
+## object.size(exp6_noc)/(Nindiv * 1024^2)
448
+## cat("\n\n")
449
+## summary(unlist(lapply(exp6_noc, "[[", "NumClones")))
450
+## summary(unlist(lapply(exp6_noc, "[[", "NumIter")))
451
+## summary(unlist(lapply(exp6_noc, "[[", "FinalTime")))
452
+## summary(unlist(lapply(exp6_noc, "[[", "TotalPopSize")))
453
+
454
+
455
+## ## Median runs until cancer
456
+
457
+## lapply(list(exp1_noc, mc1_noc, exp2_noc, mc2_noc, exp3_noc, exp4_noc, exp5_noc, exp6_noc),
458
+##        function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
459
+
460
+
461
+
462
+
463
+
464
+## ## bench1 <- data.frame(
465
+## ##     time = c(t_exp1, t_mc1, t_exp2, t_mc2, t_exp3, t_exp4, t_exp5, t_exp6),
466
+## ##     size = unlist(lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
467
+## ##                          object.size))/(Nindiv * 1024^2),
468
+    
469
+## ##     lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
470
+## ##            function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
471
+## ## )
472
+
473
+
474
+
475
+
0 476
new file mode 100644
... ...
@@ -0,0 +1,484 @@
1
+### Benchmark runs. This produces "benchmark_1.RData"
2
+
3
+
4
+rm(list = ls())
5
+set.seed(NULL)
6
+
7
+library(OncoSimulR)
8
+
9
+######################################################################
10
+######################################################################
11
+######################################################################
12
+######################################################################
13
+
14
+
15
+system_summary <- function() {
16
+    return(list(versioninfo = version,
17
+                memimfo = system("free", intern = TRUE),
18
+                cpuinfo = system("cat /proc/cpuinfo | grep 'model name'", intern = TRUE),
19
+                packageinfo = paste("OncoSimulR, ", packageVersion("OncoSimulR")),
20
+##                nodeinfo = Sys.info()$nodename,
21
+##                nodelinuxinfo = paste(Sys.info()$sysname, Sys.info()$release),
22
+                dateinfo = date()))
23
+}
24
+stats_simuls <- function(sim) {
25
+    ## sim is an oncoSimulPop output
26
+    trf <- function(x) {
27
+        tt <- try(c(NumClones = x$NumClones,
28
+                    NumIter = x$NumIter,
29
+                    FinalTime = x$FinalTime,
30
+                    TotalPopSize = x$TotalPopSize,
31
+                    Attempts = x$other$attemptsUsed))
32
+        if(!inherits(tt, "try-error")) {
33
+            return(tt)
34
+        } else {
35
+            return(c(NumClones = NA,
36
+                    NumIter = NA,
37
+                    FinalTime = NA,
38
+                    TotalPopSize = NA,
39
+                    Attempts = NA))
40
+        }
41
+    }
42
+    tmp <- try(do.call("rbind", lapply(sim, trf)))
43
+    unlist(lapply(as.data.frame(tmp), summary))
44
+}
45
+
46
+all_sim_stats <- function(...) {
47
+    ## Returns a single data frame with all benchmark info
48
+    ## in ... pass the names of the objects
49
+    names <- as.character(as.list(match.call())[-c(1)])
50
+    m1 <- do.call("rbind", lapply(list(...), stats_simuls))
51
+    rownames(m1) <- names
52
+    Numindiv <- unlist(lapply(list(...), length))
53
+    time <- unlist(lapply(paste0("t_", names), get))
54
+    size <- unlist(lapply(list(...), object.size))
55
+    df <- data.frame(m1, size, time, Numindiv)
56
+    df$time_per_simul <- df$time/df$Numindiv
57
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
58
+    attributes(df)$system_summary <- system_summary()
59
+    return(df)
60
+}
61
+
62
+######################################################################
63
+######################################################################
64
+######################################################################
65
+######################################################################
66
+
67
+
68
+pancr <- allFitnessEffects(
69
+    data.frame(parent = c("Root", rep("KRAS", 4), 
70
+                   "SMAD4", "CDNK2A", 
71
+                   "TP53", "TP53", "MLL3"),
72
+               child = c("KRAS","SMAD4", "CDNK2A", 
73
+                   "TP53", "MLL3",
74
+                   rep("PXDN", 3), rep("TGFBR2", 2)),
75
+               s = 0.05,
76
+               sh = -0.9,
77
+               typeDep = "MN"),
78
+    drvNames = c("KRAS", "SMAD4", "CDNK2A", "TP53", 
79
+	             "MLL3", "TGFBR2", "PXDN"))
80
+
81
+
82
+Nindiv <- 100
83
+
84
+
85
+## keepEvery = 1
86
+t_exp1 <- system.time(
87
+    exp1 <- oncoSimulPop(Nindiv, pancr, 
88
+                            detectionProb = "default", 
89
+                            detectionSize = NA,
90
+                            detectionDrivers = NA,
91
+                            finalTime = NA,
92
+                            keepPhylog = TRUE, keepEvery = 1,
93
+                            model = "Exp", 
94
+                            mc.cores = 1))["elapsed"]
95
+
96
+
97
+t_mc1 <- system.time(
98
+    mc1 <- oncoSimulPop(Nindiv, pancr, 
99
+                           detectionProb = "default", 
100
+                           detectionSize = NA,
101
+                           detectionDrivers = NA,
102
+                           finalTime = NA,
103
+                           keepPhylog = TRUE, keepEvery = 1,                                  
104
+                           model = "McFL", 
105
+                           mc.cores = 1))["elapsed"]
106
+
107
+## keepEvery = NA
108
+t_exp2 <- system.time(
109
+    exp2 <- oncoSimulPop(Nindiv, pancr, 
110
+                            detectionProb = "default", 
111
+                            detectionSize = NA,
112
+                            detectionDrivers = NA,
113
+                            finalTime = NA,
114
+                            keepPhylog = TRUE, keepEvery = NA, 
115
+                            model = "Exp", 
116
+                            mc.cores = 1))["elapsed"]
117
+
118
+
119
+t_mc2 <- system.time(
120
+    mc2 <- oncoSimulPop(Nindiv, pancr, 
121
+                           detectionProb = "default", 
122
+                           detectionSize = NA,
123
+                           detectionDrivers = NA,
124
+                           finalTime = NA,
125
+                           keepPhylog = TRUE, keepEvery = NA,
126
+                           model = "McFL", 
127
+                           mc.cores = 1))["elapsed"]
128
+
129
+
130
+
131
+### exp3 to exp6
132
+t_exp3 <- system.time(
133
+    exp3 <- oncoSimulPop(Nindiv, pancr, 
134
+                            detectionProb = c(PDBaseline = 5e4,
135
+                                              p2 = 0.1, n2 = 5e5,
136
+                                              checkSizePEvery = 20), 
137
+                            detectionSize = NA,
138
+                            detectionDrivers = NA,
139
+                            finalTime = NA,
140
+                            keepPhylog = TRUE, keepEvery = 1, 
141
+                            model = "Exp", 
142
+                            mc.cores = 1))["elapsed"]
143
+
144
+t_exp4 <- system.time(
145
+    exp4 <- oncoSimulPop(Nindiv, pancr, 
146
+                            detectionProb = c(PDBaseline = 5e4,
147
+                                              p2 = 0.1, n2 = 5e5,
148
+                                              checkSizePEvery = 20), 
149
+                            detectionSize = NA,
150
+                            detectionDrivers = NA,
151
+                            finalTime = NA,
152
+                            keepPhylog = TRUE, keepEvery = NA, 
153
+                            model = "Exp", 
154
+                            mc.cores = 1))["elapsed"]
155
+
156
+
157
+
158
+t_exp5 <- system.time(
159
+    exp5 <- oncoSimulPop(Nindiv, pancr, 
160
+                            detectionProb = c(PDBaseline = 5e5,
161
+                                              p2 = 0.1, n2 = 5e7,
162
+                                              checkSizePEvery = 20), 
163
+                            detectionSize = NA,
164
+                            detectionDrivers = NA,
165
+                            finalTime = NA,
166
+                            keepPhylog = TRUE, keepEvery = 1, 
167
+                            model = "Exp", 
168
+                            mc.cores = 1))["elapsed"]
169
+
170
+t_exp6 <- system.time(
171
+    exp6 <- oncoSimulPop(Nindiv, pancr, 
172
+                            detectionProb = c(PDBaseline = 5e5,
173
+                                              p2 = 0.1, n2 = 5e7,
174
+                                              checkSizePEvery = 20), 
175
+                            detectionSize = NA,
176
+                            detectionDrivers = NA,
177
+                            finalTime = NA,
178
+                            keepPhylog = TRUE, keepEvery = NA, 
179
+                            model = "Exp", 
180
+                            mc.cores = 1))["elapsed"]
181
+
182
+
183
+####################################################
184
+###### onlyCancer = FALSE
185
+
186
+
187
+## keepEvery = 1
188
+t_exp1_noc <- system.time(
189
+    exp1_noc <- oncoSimulPop(Nindiv, pancr, 
190
+                            detectionProb = "default", 
191
+                            detectionSize = NA,
192
+                            detectionDrivers = NA,
193
+                            finalTime = NA, onlyCancer = FALSE,
194
+                            keepPhylog = TRUE, keepEvery = 1,
195
+                            model = "Exp", 
196
+                            mc.cores = 1))["elapsed"]
197
+t_mc1_noc <- system.time(
198
+    mc1_noc <- oncoSimulPop(Nindiv, pancr, 
199
+                           detectionProb = "default", 
200
+                           detectionSize = NA,
201
+                           detectionDrivers = NA,
202
+                           finalTime = NA, onlyCancer = FALSE,
203
+                           keepPhylog = TRUE, keepEvery = 1,                                  
204
+                           model = "McFL", 
205
+                           mc.cores = 1))["elapsed"]
206
+## keepEvery = NA
207
+t_exp2_noc <- system.time(
208
+    exp2_noc <- oncoSimulPop(Nindiv, pancr, 
209
+                            detectionProb = "default", 
210
+                            detectionSize = NA,
211
+                            detectionDrivers = NA,
212
+                            finalTime = NA, onlyCancer = FALSE,
213
+                            keepPhylog = TRUE, keepEvery = NA, 
214
+                            model = "Exp", 
215
+                            mc.cores = 1))["elapsed"]
216
+t_mc2_noc <- system.time(
217
+    mc2_noc <- oncoSimulPop(Nindiv, pancr, 
218
+                           detectionProb = "default", 
219
+                           detectionSize = NA,
220
+                           detectionDrivers = NA,
221
+                           finalTime = NA, onlyCancer = FALSE,
222
+                           keepPhylog = TRUE, keepEvery = NA,
223
+                           model = "McFL", 
224
+                           mc.cores = 1))["elapsed"]
225
+### exp3_noc to exp6_noc
226
+t_exp3_noc <- system.time(
227
+    exp3_noc <- oncoSimulPop(Nindiv, pancr, 
228
+                            detectionProb = c(PDBaseline = 5e4,
229
+                                              p2 = 0.1, n2 = 5e5,
230
+                                              checkSizePEvery = 20), 
231
+                            detectionSize = NA,
232
+                            detectionDrivers = NA,
233
+                            finalTime = NA, onlyCancer = FALSE,
234
+                            keepPhylog = TRUE, keepEvery = 1, 
235
+                            model = "Exp", 
236
+                            mc.cores = 1))["elapsed"]
237
+t_exp4_noc <- system.time(
238
+    exp4_noc <- oncoSimulPop(Nindiv, pancr, 
239
+                            detectionProb = c(PDBaseline = 5e4,
240
+                                              p2 = 0.1, n2 = 5e5,
241
+                                              checkSizePEvery = 20), 
242
+                            detectionSize = NA,
243
+                            detectionDrivers = NA,
244
+                            finalTime = NA, onlyCancer = FALSE,
245
+                            keepPhylog = TRUE, keepEvery = NA, 
246
+                            model = "Exp", 
247
+                            mc.cores = 1))["elapsed"]
248
+t_exp5_noc <- system.time(
249
+    exp5_noc <- oncoSimulPop(Nindiv, pancr, 
250
+                            detectionProb = c(PDBaseline = 5e5,
251
+                                              p2 = 0.1, n2 = 5e7,
252
+                                              checkSizePEvery = 20), 
253
+                            detectionSize = NA,
254
+                            detectionDrivers = NA,
255
+                            finalTime = NA, onlyCancer = FALSE,
256
+                            keepPhylog = TRUE, keepEvery = 1, 
257
+                            model = "Exp", 
258
+                            mc.cores = 1))["elapsed"]
259
+t_exp6_noc <- system.time(
260
+    exp6_noc <- oncoSimulPop(Nindiv, pancr, 
261
+                            detectionProb = c(PDBaseline = 5e5,
262
+                                              p2 = 0.1, n2 = 5e7,
263
+                                              checkSizePEvery = 20), 
264
+                            detectionSize = NA,
265
+                            detectionDrivers = NA,
266
+                            finalTime = NA, onlyCancer = FALSE,
267
+                            keepPhylog = TRUE, keepEvery = NA, 
268
+                            model = "Exp", 
269
+                            mc.cores = 1))["elapsed"]
270
+
271
+benchmark_1_0.05 <- all_sim_stats(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6,
272
+                         exp1_noc, mc1_noc, exp2_noc, mc2_noc, exp3_noc, exp4_noc, exp5_noc, exp6_noc
273
+                         )
274
+
275
+## Add the info about key settings
276
+benchmark_1_0.05$keepEvery <- c(1, 1, NA, NA, 1, NA, 1, NA,
277
+                           1, 1, NA, NA, 1, NA, 1, NA)
278
+benchmark_1_0.05$PDBaseline <- c(rep(1.2 * 500, 4), 5e4, 5e4, 5e5, 5e5,
279
+                            rep(1.2 * 500, 4), 5e4, 5e4, 5e5, 5e5)
280
+benchmark_1_0.05$n2 <- c(rep(2 * 500, 4), 5e5, 5e5, 5e7, 5e7,
281
+                    rep(2 * 500, 4), 5e5, 5e5, 5e7, 5e7)
282
+benchmark_1_0.05$onlyCancer <- c(rep(TRUE, 8), rep(FALSE, 8))
283
+save(file = "../../data/benchmark_1_0.05.RData", benchmark_1_0.05)
284
+
285
+
286
+gc()
287
+
288
+## ######################################################################
289
+## ######################################################################
290
+## ###############                                         ##############
291
+## ###############      If you do it by hand ...           ##############
292
+## ###############                                         ##############
293
+## ######################################################################
294
+## ######################################################################
295
+
296
+
297
+
298
+## cat("\n\n\n t_exp1 = ", t_exp1, "\n")
299
+## object.size(exp1)/(Nindiv * 1024^2)
300
+## cat("\n\n")
301
+## summary(unlist(lapply(exp1, "[[", "NumClones")))
302
+## summary(unlist(lapply(exp1, "[[", "NumIter")))
303
+## summary(unlist(lapply(exp1, "[[", "FinalTime")))
304
+## summary(unlist(lapply(exp1, "[[", "TotalPopSize")))
305
+
306
+
307
+## cat("\n\n")
308
+## cat("\n\n\n t_mc1 = ", t_mc1, "\n")
309
+## object.size(mc1)/(Nindiv * 1024^2)
310
+## cat("\n\n")
311
+## summary(unlist(lapply(mc1, "[[", "NumClones")))
312
+## summary(unlist(lapply(mc1, "[[", "NumIter")))
313
+## summary(unlist(lapply(mc1, "[[", "FinalTime")))
314
+## summary(unlist(lapply(mc1, "[[", "TotalPopSize")))
315
+
316
+## cat("\n\n")
317
+## cat("\n\n\n t_exp2 = ", t_exp2, "\n")
318
+## object.size(exp2)/(Nindiv * 1024^2)
319
+## cat("\n\n")
320
+## summary(unlist(lapply(exp2, "[[", "NumClones")))
321
+## summary(unlist(lapply(exp2, "[[", "NumIter")))
322
+## summary(unlist(lapply(exp2, "[[", "FinalTime")))
323
+## summary(unlist(lapply(exp2, "[[", "TotalPopSize")))
324
+
325
+## cat("\n\n")
326
+## cat("\n\n\n t_mc2 = ", t_mc2, "\n")
327
+## object.size(mc2)/(Nindiv * 1024^2)
328
+## cat("\n\n")
329
+## summary(unlist(lapply(mc2, "[[", "NumClones")))
330
+## summary(unlist(lapply(mc2, "[[", "NumIter")))
331
+## summary(unlist(lapply(mc2, "[[", "FinalTime")))
332
+## summary(unlist(lapply(mc2, "[[", "TotalPopSize")))
333
+
334
+
335
+
336
+## cat("\n\n")
337
+## cat("\n\n\n t_exp3 = ", t_exp3, "\n")
338
+## object.size(exp3)/(Nindiv * 1024^2)
339
+## cat("\n\n")
340
+## summary(unlist(lapply(exp3, "[[", "NumClones")))
341
+## summary(unlist(lapply(exp3, "[[", "NumIter")))
342
+## summary(unlist(lapply(exp3, "[[", "FinalTime")))
343
+## summary(unlist(lapply(exp3, "[[", "TotalPopSize")))
344
+
345
+
346
+## cat("\n\n")
347
+## cat("\n\n\n t_exp4 = ", t_exp4, "\n")
348
+## object.size(exp4)/(Nindiv * 1024^2)
349
+## cat("\n\n")
350
+## summary(unlist(lapply(exp4, "[[", "NumClones")))
351
+## summary(unlist(lapply(exp4, "[[", "NumIter")))
352
+## summary(unlist(lapply(exp4, "[[", "FinalTime")))
353
+## summary(unlist(lapply(exp4, "[[", "TotalPopSize")))
354
+
355
+
356
+## cat("\n\n")
357
+## cat("\n\n\n t_exp5 = ", t_exp5, "\n")
358
+## object.size(exp5)/(Nindiv * 1024^2)
359
+## cat("\n\n")
360
+## summary(unlist(lapply(exp5, "[[", "NumClones")))
361
+## summary(unlist(lapply(exp5, "[[", "NumIter")))
362
+## summary(unlist(lapply(exp5, "[[", "FinalTime")))
363
+## summary(unlist(lapply(exp5, "[[", "TotalPopSize")))
364
+
365
+
366
+## cat("\n\n")
367
+## cat("\n\n\n t_exp6 = ", t_exp6, "\n")
368
+## object.size(exp6)/(Nindiv * 1024^2)
369
+## cat("\n\n")
370
+## summary(unlist(lapply(exp6, "[[", "NumClones")))
371
+## summary(unlist(lapply(exp6, "[[", "NumIter")))
372
+## summary(unlist(lapply(exp6, "[[", "FinalTime")))
373
+## summary(unlist(lapply(exp6, "[[", "TotalPopSize")))
374
+
375
+
376
+## ## Median runs until cancer
377
+
378
+## lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
379
+##        function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
380
+
381
+
382
+
383
+
384
+
385
+
386
+## cat("\n\n\n t_exp1_noc = ", t_exp1_noc, "\n")
387
+## object.size(exp1_noc)/(Nindiv * 1024^2)
388
+## cat("\n\n")
389
+## summary(unlist(lapply(exp1_noc, "[[", "NumClones")))
390
+## summary(unlist(lapply(exp1_noc, "[[", "NumIter")))
391
+## summary(unlist(lapply(exp1_noc, "[[", "FinalTime")))
392
+## summary(unlist(lapply(exp1_noc, "[[", "TotalPopSize")))
393
+
394
+
395
+## cat("\n\n")
396
+## cat("\n\n\n t_mc1_noc = ", t_mc1_noc, "\n")
397
+## object.size(mc1_noc)/(Nindiv * 1024^2)
398
+## cat("\n\n")
399
+## summary(unlist(lapply(mc1_noc, "[[", "NumClones")))
400
+## summary(unlist(lapply(mc1_noc, "[[", "NumIter")))
401
+## summary(unlist(lapply(mc1_noc, "[[", "FinalTime")))
402
+## summary(unlist(lapply(mc1_noc, "[[", "TotalPopSize")))
403
+
404
+## cat("\n\n")
405
+## cat("\n\n\n t_exp2_noc = ", t_exp2_noc, "\n")
406
+## object.size(exp2_noc)/(Nindiv * 1024^2)
407
+## cat("\n\n")
408
+## summary(unlist(lapply(exp2_noc, "[[", "NumClones")))
409
+## summary(unlist(lapply(exp2_noc, "[[", "NumIter")))
410
+## summary(unlist(lapply(exp2_noc, "[[", "FinalTime")))
411
+## summary(unlist(lapply(exp2_noc, "[[", "TotalPopSize")))
412
+
413
+## cat("\n\n")
414
+## cat("\n\n\n t_mc2_noc = ", t_mc2_noc, "\n")
415
+## object.size(mc2_noc)/(Nindiv * 1024^2)
416
+## cat("\n\n")
417
+## summary(unlist(lapply(mc2_noc, "[[", "NumClones")))
418
+## summary(unlist(lapply(mc2_noc, "[[", "NumIter")))
419
+## summary(unlist(lapply(mc2_noc, "[[", "FinalTime")))
420
+## summary(unlist(lapply(mc2_noc, "[[", "TotalPopSize")))
421
+
422
+
423
+
424
+## cat("\n\n")
425
+## cat("\n\n\n t_exp3_noc = ", t_exp3_noc, "\n")
426
+## object.size(exp3_noc)/(Nindiv * 1024^2)
427
+## cat("\n\n")
428
+## summary(unlist(lapply(exp3_noc, "[[", "NumClones")))
429
+## summary(unlist(lapply(exp3_noc, "[[", "NumIter")))
430
+## summary(unlist(lapply(exp3_noc, "[[", "FinalTime")))
431
+## summary(unlist(lapply(exp3_noc, "[[", "TotalPopSize")))
432
+
433
+
434
+## cat("\n\n")
435
+## cat("\n\n\n t_exp4_noc = ", t_exp4_noc, "\n")
436
+## object.size(exp4_noc)/(Nindiv * 1024^2)
437
+## cat("\n\n")
438
+## summary(unlist(lapply(exp4_noc, "[[", "NumClones")))
439
+## summary(unlist(lapply(exp4_noc, "[[", "NumIter")))
440
+## summary(unlist(lapply(exp4_noc, "[[", "FinalTime")))
441
+## summary(unlist(lapply(exp4_noc, "[[", "TotalPopSize")))
442
+
443
+
444
+## cat("\n\n")
445
+## cat("\n\n\n t_exp5_noc = ", t_exp5_noc, "\n")
446
+## object.size(exp5_noc)/(Nindiv * 1024^2)
447
+## cat("\n\n")
448
+## summary(unlist(lapply(exp5_noc, "[[", "NumClones")))
449
+## summary(unlist(lapply(exp5_noc, "[[", "NumIter")))
450
+## summary(unlist(lapply(exp5_noc, "[[", "FinalTime")))
451
+## summary(unlist(lapply(exp5_noc, "[[", "TotalPopSize")))
452
+
453
+
454
+## cat("\n\n")
455
+## cat("\n\n\n t_exp6_noc = ", t_exp6_noc, "\n")
456
+## object.size(exp6_noc)/(Nindiv * 1024^2)
457
+## cat("\n\n")
458
+## summary(unlist(lapply(exp6_noc, "[[", "NumClones")))
459
+## summary(unlist(lapply(exp6_noc, "[[", "NumIter")))
460
+## summary(unlist(lapply(exp6_noc, "[[", "FinalTime")))
461
+## summary(unlist(lapply(exp6_noc, "[[", "TotalPopSize")))
462
+
463
+
464
+## ## Median runs until cancer
465
+
466
+## lapply(list(exp1_noc, mc1_noc, exp2_noc, mc2_noc, exp3_noc, exp4_noc, exp5_noc, exp6_noc),
467
+##        function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
468
+
469
+
470
+
471
+
472
+
473
+## ## bench1 <- data.frame(
474
+## ##     time = c(t_exp1, t_mc1, t_exp2, t_mc2, t_exp3, t_exp4, t_exp5, t_exp6),
475
+## ##     size = unlist(lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
476
+## ##                          object.size))/(Nindiv * 1024^2),
477
+    
478
+## ##     lapply(list(exp1, mc1, exp2, mc2, exp3, exp4, exp5, exp6),
479
+## ##            function(y) median(unlist(lapply(y, function(x) x$other$attemptsUsed))))
480
+## ## )
481
+
482
+
483
+
484
+
0 485
new file mode 100644
... ...
@@ -0,0 +1,223 @@
1
+### Benchmark runs. This produces "benchmark_1.RData"
2
+
3
+
4
+rm(list = ls())
5
+set.seed(NULL)
6
+
7
+library(OncoSimulR)
8
+
9
+######################################################################
10
+######################################################################
11
+######################################################################
12
+######################################################################
13
+system_summary <- function() {
14
+    return(list(versioninfo = version,
15
+                memimfo = system("free", intern = TRUE),
16
+                cpuinfo = system("cat /proc/cpuinfo | grep 'model name'", intern = TRUE),
17
+                packageinfo = paste("OncoSimulR, ", packageVersion("OncoSimulR")),
18
+##                nodeinfo = Sys.info()$nodename,
19
+##                nodelinuxinfo = paste(Sys.info()$sysname, Sys.info()$release),
20
+                dateinfo = date()))
21
+}
22
+
23
+stats_simuls <- function(sim) {
24
+    ## sim is an oncoSimulPop output
25
+    trf <- function(x) {
26
+        tt <- try(c(NumClones = x$NumClones,
27
+                    NumIter = x$NumIter,
28
+                    FinalTime = x$FinalTime,
29
+                    TotalPopSize = x$TotalPopSize,
30
+                    Attempts = x$other$attemptsUsed))
31
+        if(!inherits(tt, "try-error")) {
32
+            return(tt)
33
+        } else {
34
+            return(c(NumClones = NA,
35
+                    NumIter = NA,
36
+                    FinalTime = NA,
37
+                    TotalPopSize = NA,
38
+                    Attempts = NA))
39
+        }
40
+    }
41
+    tmp <- try(do.call("rbind", lapply(sim, trf)))
42
+    unlist(lapply(as.data.frame(tmp), summary))
43
+}
44
+
45
+all_sim_stats <- function(...) {
46
+    ## Returns a single data frame with all benchmark info
47
+    ## in ... pass the names of the objects
48
+    names <- as.character(as.list(match.call())[-c(1)])
49
+    m1 <- do.call("rbind", lapply(list(...), stats_simuls))
50
+    ## the function will break next if something failed in previous
51
+    rownames(m1) <- names
52
+    Numindiv <- unlist(lapply(list(...), length))
53
+    time <- unlist(lapply(paste0("t_", names), get))
54
+    size <- unlist(lapply(list(...), object.size))
55
+    df <- data.frame(m1, size, time, Numindiv)
56
+    df$time_per_simul <- df$time/df$Numindiv
57
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
58
+    attributes(df)$system_summary <- system_summary()
59
+    return(df)
60
+}
61
+
62
+
63
+all_sim_stats_single <- function(sim, tsim, name) {
64
+    ## Like previous, but for a single simulation. We avoid the dangerous
65
+    ## dynGet that we would nedd to obtain the "t_"
66
+
67
+    ## Returns a single data
68
+    ## frame with all benchmark info in ... pass the names of the objects
69
+    ## names <- as.character(as.list(match.call())[-c(1)])
70
+    names <- name
71
+    m1 <- data.frame(as.list(stats_simuls(sim)))
72
+    ## the function will break next if something failed in previous
73
+    rownames(m1) <- names
74
+    Numindiv <- length(sim)
75
+    time <- tsim
76
+    size <- as.numeric(object.size(sim))
77
+    df <- data.frame(m1, size, time, Numindiv)
78
+    df$time_per_simul <- df$time/df$Numindiv
79
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
80
+    df$name <- names
81
+    ## attributes(df)$system_summary <- system_summary()
82
+    return(df)
83
+}
84
+
85
+
86
+######################################################################
87
+######################################################################
88
+######################################################################
89
+######################################################################
90
+####################                             #####################
91
+####################   Fitness specifications    #####################
92
+####################                             #####################
93
+######################################################################
94
+######################################################################
95
+######################################################################
96
+######################################################################
97
+
98
+
99
+pancr <- allFitnessEffects(
100
+    data.frame(parent = c("Root", rep("KRAS", 4), 
101
+                   "SMAD4", "CDNK2A", 
102
+                   "TP53", "TP53", "MLL3"),
103
+               child = c("KRAS","SMAD4", "CDNK2A", 
104
+                   "TP53", "MLL3",
105
+                   rep("PXDN", 3), rep("TGFBR2", 2)),
106
+               s = 0.1,
107
+               sh = -0.9,
108
+               typeDep = "MN"),
109
+    drvNames = c("KRAS", "SMAD4", "CDNK2A", "TP53", 
110
+	             "MLL3", "TGFBR2", "PXDN"))
111
+
112
+
113
+## Random fitness landscape with 6 genes 
114
+rfl6 <- rfitness(6, min_accessible_genotypes = 50)
115
+attributes(rfl6)$accessible_genotypes ## How many actually accessible
116
+rf6 <- allFitnessEffects(genotFitness = rfl6)
117
+
118
+
119
+## Random fitness landscape with 12 genes
120
+rfl12 <- rfitness(12, min_accessible_genotypes = 200)
121
+attributes(rfl12)$accessible_genotypes ## How many actually accessible
122
+rf12 <- allFitnessEffects(genotFitness = rfl12)
123
+
124
+
125
+
126
+
127
+## Independent genes; positive fitness from exponential distribution
128
+## mean around 0.1, and negative from exponential with mean around 0.02.
129
+## Half positive, half negative
130
+ng <- 200
131
+re_200 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
132
+
133
+ng <- 500
134
+re_500 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
135
+
136
+ng <- 2000
137
+re_2000 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
138
+
139
+ng <- 4000
140
+re_4000 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
141
+
142
+
143
+
144
+cases <- expand.grid(Model = c("Exp", "McFL"),
145
+                     fitness = c("pancr", "rf6", "rf12", 
146
+                                 "re_200", "re_500", "re_2000", "re_4000"),
147
+                     stringsAsFactors = FALSE)
148
+
149
+
150
+
151
+sim_bench <- function(Model, fitness, Nindiv, ...) {
152
+    cat("\n\n\n")
153
+    cat("***  Doing ", Model, " ", fitness)
154
+    
155
+    t_tmp <- system.time({
156
+        if(Model == "Exp") {
157
+            tmp <- oncoSimulPop(Nindiv,
158
+                                get(fitness),
159
+                                detectionProb = NA, 
160
+                                detectionSize = 1e6,
161
+                                initSize = 500,
162
+                                detectionDrivers = NA,
163
+                                keepPhylog = TRUE,
164
+                                model = "Exp",
165
+                                errorHitWallTime = FALSE,
166
+                                errorHitMaxTries = FALSE,
167
+                                finalTime = 5000,
168
+                                onlyCancer = FALSE,
169
+                                mc.cores = 1,
170
+                                sampleEvery = 0.5,
171
+                                ...)
172
+        } else {
173
+            initSize <- 1000
174
+            tmp <- oncoSimulPop(Nindiv,
175
+                                get(fitness),
176
+                                detectionProb = c(
177
+                                    PDBaseline = 1.4 * initSize,
178
+                                    n2 = 2 * initSize,
179
+                                    p2 = 0.1,
180
+                                    checkSizePEvery = 4),
181
+                                initSize = initSize,
182
+                                detectionSize = NA,
183
+                                detectionDrivers = NA,
184
+                                keepPhylog = TRUE,
185
+                                model = "McFL",
186
+                                errorHitWallTime = FALSE,
187
+                                errorHitMaxTries = FALSE,
188
+                                finalTime = 5000,
189
+                                max.wall.time = 10,
190
+                                onlyCancer = FALSE,
191
+                                mc.cores = 1,
192
+                                ...)
193
+        }
194
+    })["elapsed"]
195
+
196
+    cat("\n\n\n t_tmp = ", t_tmp, "\n")
197
+    print(object.size(tmp)/(Nindiv * 1024^2))
198
+    cat("\n\n")
199
+    print(summary(unlist(lapply(tmp, "[[", "NumClones"))))
200
+    print(summary(unlist(lapply(tmp, "[[", "NumIter"))))
201
+    print(summary(unlist(lapply(tmp, "[[", "FinalTime"))))
202
+    print(summary(unlist(lapply(tmp, "[[", "TotalPopSize"))))
203
+    name <- paste(Model, fitness, sep = "_")
204
+    df <- all_sim_stats_single(tmp, t_tmp, name)
205
+    df$Model <- Model
206
+    df$fitness <- fitness
207
+    return(df)
208
+}
209
+
210
+
211
+
212
+Nindiv <- 100
213
+
214
+benchmark_2 <- dplyr::bind_rows(Map(sim_bench,
215
+                             cases[, 1], cases[, 2], Nindiv,
216
+                             keepEvery = 1))
217
+
218
+attributes(benchmark_2)$system_summary <- system_summary()
219
+
220
+save(file = "../../data/benchmark_2.RData", benchmark_2)
221
+
222
+
223
+gc()
0 224
new file mode 100644
... ...
@@ -0,0 +1,223 @@
1
+### Benchmark runs. This produces "benchmark_1.RData"
2
+
3
+
4
+rm(list = ls())
5
+set.seed(NULL)
6
+
7
+library(OncoSimulR)
8
+
9
+######################################################################
10
+######################################################################
11
+######################################################################
12
+######################################################################
13
+system_summary <- function() {
14
+    return(list(versioninfo = version,
15
+                memimfo = system("free", intern = TRUE),
16
+                cpuinfo = system("cat /proc/cpuinfo | grep 'model name'", intern = TRUE),
17
+                packageinfo = paste("OncoSimulR, ", packageVersion("OncoSimulR")),
18
+##                nodeinfo = Sys.info()$nodename,
19
+##                nodelinuxinfo = paste(Sys.info()$sysname, Sys.info()$release),
20
+                dateinfo = date()))
21
+}
22
+
23
+stats_simuls <- function(sim) {
24
+    ## sim is an oncoSimulPop output
25
+    trf <- function(x) {
26
+        tt <- try(c(NumClones = x$NumClones,
27
+                    NumIter = x$NumIter,
28
+                    FinalTime = x$FinalTime,
29
+                    TotalPopSize = x$TotalPopSize,
30
+                    Attempts = x$other$attemptsUsed))
31
+        if(!inherits(tt, "try-error")) {
32
+            return(tt)
33
+        } else {
34
+            return(c(NumClones = NA,
35
+                    NumIter = NA,
36
+                    FinalTime = NA,
37
+                    TotalPopSize = NA,
38
+                    Attempts = NA))
39
+        }
40
+    }
41
+    tmp <- try(do.call("rbind", lapply(sim, trf)))
42
+    unlist(lapply(as.data.frame(tmp), summary))
43
+}
44
+
45
+all_sim_stats <- function(...) {
46
+    ## Returns a single data frame with all benchmark info
47
+    ## in ... pass the names of the objects
48
+    names <- as.character(as.list(match.call())[-c(1)])
49
+    m1 <- do.call("rbind", lapply(list(...), stats_simuls))
50
+    ## the function will break next if something failed in previous
51
+    rownames(m1) <- names
52
+    Numindiv <- unlist(lapply(list(...), length))
53
+    time <- unlist(lapply(paste0("t_", names), get))
54
+    size <- unlist(lapply(list(...), object.size))
55
+    df <- data.frame(m1, size, time, Numindiv)
56
+    df$time_per_simul <- df$time/df$Numindiv
57
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
58
+    attributes(df)$system_summary <- system_summary()
59
+    return(df)
60
+}
61
+
62
+
63
+all_sim_stats_single <- function(sim, tsim, name) {
64
+    ## Like previous, but for a single simulation. We avoid the dangerous
65
+    ## dynGet that we would nedd to obtain the "t_"
66
+
67
+    ## Returns a single data
68
+    ## frame with all benchmark info in ... pass the names of the objects
69
+    ## names <- as.character(as.list(match.call())[-c(1)])
70
+    names <- name
71
+    m1 <- data.frame(as.list(stats_simuls(sim)))
72
+    ## the function will break next if something failed in previous
73
+    rownames(m1) <- names
74
+    Numindiv <- length(sim)
75
+    time <- tsim
76
+    size <- as.numeric(object.size(sim))
77
+    df <- data.frame(m1, size, time, Numindiv)
78
+    df$time_per_simul <- df$time/df$Numindiv
79
+    df$size_mb_per_simul <- df$size/(df$Numindiv * 1024^2)
80
+    df$name <- names
81
+    ## attributes(df)$system_summary <- system_summary()
82
+    return(df)
83
+}
84
+
85
+
86
+######################################################################
87
+######################################################################
88
+######################################################################
89
+######################################################################
90
+####################                             #####################
91
+####################   Fitness specifications    #####################
92
+####################                             #####################
93
+######################################################################
94
+######################################################################
95
+######################################################################
96
+######################################################################
97
+
98
+
99
+pancr <- allFitnessEffects(
100
+    data.frame(parent = c("Root", rep("KRAS", 4), 
101
+                   "SMAD4", "CDNK2A", 
102
+                   "TP53", "TP53", "MLL3"),
103
+               child = c("KRAS","SMAD4", "CDNK2A", 
104
+                   "TP53", "MLL3",
105
+                   rep("PXDN", 3), rep("TGFBR2", 2)),
106
+               s = 0.1,
107
+               sh = -0.9,
108
+               typeDep = "MN"),
109
+    drvNames = c("KRAS", "SMAD4", "CDNK2A", "TP53", 
110
+	             "MLL3", "TGFBR2", "PXDN"))
111
+
112
+
113
+## Random fitness landscape with 6 genes 
114
+rfl6 <- rfitness(6, min_accessible_genotypes = 50)
115
+attributes(rfl6)$accessible_genotypes ## How many actually accessible
116
+rf6 <- allFitnessEffects(genotFitness = rfl6)
117
+
118
+
119
+## Random fitness landscape with 12 genes
120
+rfl12 <- rfitness(12, min_accessible_genotypes = 200)
121
+attributes(rfl12)$accessible_genotypes ## How many actually accessible
122
+rf12 <- allFitnessEffects(genotFitness = rfl12)
123
+
124
+
125
+
126
+
127
+## Independent genes; positive fitness from exponential distribution
128
+## mean around 0.1, and negative from exponential with mean around 0.02.
129
+## Half positive, half negative
130
+ng <- 200
131
+re_200 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
132
+
133
+ng <- 500
134
+re_500 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
135
+
136
+ng <- 2000
137
+re_2000 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
138
+
139
+ng <- 4000
140
+re_4000 <- allFitnessEffects(noIntGenes = c(rexp(ng/2, 10), -rexp(ng/2, 50)))
141
+
142
+
143
+
144
+cases <- expand.grid(Model = c("Exp", "McFL"),
145
+                     fitness = c("pancr", "rf6", "rf12", 
146
+                                 "re_200", "re_500", "re_2000", "re_4000"),
147
+                     stringsAsFactors = FALSE)
148
+
149
+
150
+
151
+sim_bench <- function(Model, fitness, Nindiv, ...) {
152
+    cat("\n\n\n")
153
+    cat("***  Doing ", Model, " ", fitness)
154
+    
155
+    t_tmp <- system.time({
156
+        if(Model == "Exp") {
157
+            tmp <- oncoSimulPop(Nindiv,
158
+                                get(fitness),
159
+                                detectionProb = NA, 
160
+                                detectionSize = 1e5,
161
+                                initSize = 500,
162
+                                detectionDrivers = NA,
163
+                                keepPhylog = TRUE,
164
+                                model = "Exp",
165
+                                errorHitWallTime = FALSE,
166
+                                errorHitMaxTries = FALSE,
167
+                                finalTime = 25000,
168
+                                onlyCancer = TRUE,
169
+                                mc.cores = 1,
170
+                                sampleEvery = 0.5,
171
+                                ...)
172
+        } else {
173
+            initSize <- 1000
174
+            tmp <- oncoSimulPop(Nindiv,
175
+                                get(fitness),
176
+                                detectionProb = c(
177
+                                    PDBaseline = 1.4 * initSize,
178
+                                    n2 = 2 * initSize,
179
+                                    p2 = 0.1,
180
+                                    checkSizePEvery = 4),
181
+                                initSize = initSize,
182
+                                detectionSize = NA,
183
+                                detectionDrivers = NA,
184
+                                keepPhylog = TRUE,
185
+                                model = "McFL",
186
+                                errorHitWallTime = FALSE,
187
+                                errorHitMaxTries = FALSE,
188
+                                finalTime = 25000,
189
+                                max.wall.time = 10,
190
+                                onlyCancer = TRUE,
191
+                                mc.cores = 1,
192
+                                ...)
193
+        }
194
+    })["elapsed"]
195
+
196
+    cat("\n\n\n t_tmp = ", t_tmp, "\n")
197
+    print(object.size(tmp)/(Nindiv * 1024^2))
198
+    cat("\n\n")
199
+    print(summary(unlist(lapply(tmp, "[[", "NumClones"))))
200
+    print(summary(unlist(lapply(tmp, "[[", "NumIter"))))
201
+    print(summary(unlist(lapply(tmp, "[[", "FinalTime"))))
202
+    print(summary(unlist(lapply(tmp, "[[", "TotalPopSize"))))
203
+    name <- paste(Model, fitness, sep = "_")
204
+    df <- all_sim_stats_single(tmp, t_tmp, name)
205
+    df$Model <- Model
206
+    df$fitness <- fitness
207
+    return(df)
208
+}
209
+
210
+
211
+
212
+Nindiv <- 100
213
+
214
+benchmark_3 <- dplyr::bind_rows(Map(sim_bench,
215
+                             cases[, 1], cases[, 2], Nindiv,
216
+                             keepEvery = 1))
217
+
218
+attributes(benchmark_3)$system_summary <- system_summary()
219
+
220
+save(file = "../../data/benchmark_3.RData", benchmark_3)
221
+
222
+
223
+gc()
0 224
new file mode 100644
... ...
@@ -0,0 +1,290 @@
1
+## Example code used in the vignette, but not executed there.
2
+
3
+## This can use memory rather quickly. You might want to rm objects and
4
+## gc().
5
+
6
+library(OncoSimulR)
7
+rm(list = ls()); gc()
8
+
9
+ng <- 10000
10
+u <- allFitnessEffects(noIntGenes = c(rep(0.1, ng/2), rep(-0.1, ng/2)))
11
+
12
+t_e_10000 <- system.time(e_10000 <- oncoSimulPop(5,
13
+                                                 u,
14
+                                                 model = "Exp",
15
+                                                 mu = 1e-7,
16
+                                                 detectionSize = 1e6,
17
+                                                 detectionDrivers = NA,
18
+                                                 detectionProb = NA,
19
+                                                 keepPhylog = TRUE,
20
+                                                 onlyCancer = FALSE,
21
+                                                 mutationPropGrowth = TRUE,
22
+                                                 mc.cores = 1
23
+                                ))
24
+t_e_10000
25
+summary(e_10000)[, c(1:3, 8, 9)]
26
+print(object.size(e_10000), units = "MB")
27
+
28
+
29
+
30
+
31
+t_e_10000b <- system.time(e_10000b <- oncoSimulPop(5,
32
+                                                   u,
33
+                                                   model = "Exp",
34
+                                                   mu = 1e-7,
35
+                                                   detectionSize = 1e6,
36
+                                                   detectionDrivers = NA,
37
+                                                   detectionProb = NA,
38
+                                                   keepPhylog = TRUE,
39
+                                                   onlyCancer = FALSE,
40
+                                                   keepEvery = NA,
41
+                                                   mutationPropGrowth = TRUE,
42
+                                                   mc.cores = 1
43
+                                ))
44
+t_e_10000b
45
+summary(e_10000b)[, c(1:3, 8, 9)]
46
+print(object.size(e_10000b), units = "MB")
47
+
48
+
49
+
50
+rm(list = ls()); gc()
51
+ng <- 50000
52
+u <- allFitnessEffects(noIntGenes = c(rep(0.1, ng/2), rep(-0.1, ng/2)))
53
+
54
+t_e_50000 <- system.time(e_50000 <- oncoSimulPop(5,
55
+                                                 u,
56
+                                                 model = "Exp",
57
+                                                 mu = 1e-7,
58
+                                                 detectionSize = 1e6,
59
+                                                 detectionDrivers = NA,
60
+                                                 detectionProb = NA,
61
+                                                 keepPhylog = TRUE,
62
+                                                 onlyCancer = FALSE,
63
+                                                 keepEvery = NA,
64
+                                                 mutationPropGrowth = FALSE,
65
+                                                 mc.cores = 1
66
+                                                 ))
67
+
68
+t_e_50000
69
+
70
+summary(e_50000)[, c(1:3, 8, 9)]
71
+
72
+print(object.size(e_50000), units = "MB")
73
+
74
+
75
+rm(list = ls()); gc()
76
+ng <- 50000
77
+u <- allFitnessEffects(noIntGenes = c(rep(0.1, ng/2), rep(-0.1, ng/2)))
78
+
79
+t_e_50000np <- system.time(e_50000np <- oncoSimulPop(5,
80
+                                                 u,
81
+                                                 model = "Exp",
82
+                                                 mu = 1e-7,
83
+                                                 detectionSize = 1e6,
84
+                                                 detectionDrivers = NA,
85
+                                                 detectionProb = NA,
86
+                                                 keepPhylog = TRUE,
87
+                                                 onlyCancer = FALSE,
88
+                                                 keepEvery = 1,
89
+                                                 mutationPropGrowth = FALSE,
90
+                                                 mc.cores = 1
91
+                                                 ))
92
+
93
+t_e_50000np
94
+
95
+summary(e_50000np)[, c(1:3, 8, 9)]
96
+
97
+print(object.size(e_50000np), units = "MB")
98
+
99
+
100
+
101
+rm(list = ls()); gc()
102
+ng <- 50000
103
+u <- allFitnessEffects(noIntGenes = c(rep(0.1, ng/2), rep(-0.1, ng/2)))
104
+
105
+t_e_50000c <- system.time(e_50000c <- oncoSimulPop(5,
106
+                                                 u,
107
+                                                 model = "Exp",
108
+                                                 mu = 1e-7,
109
+                                                 detectionSize = 1e6,
110
+                                                 detectionDrivers = NA,
111
+                                                 detectionProb = NA,
112
+                                                 keepPhylog = TRUE,
113
+                                                 onlyCancer = FALSE,
114
+                                                 keepEvery = NA,
115
+                                                 mutationPropGrowth = TRUE,
116
+                                                 mc.cores = 1
117
+                                                 ))
118
+
119
+t_e_50000c
120
+
121
+summary(e_50000c)[, c(1:3, 8, 9)]
122
+
123
+print(object.size(e_50000c), units = "MB")
124
+
125
+
126
+
127
+#### McFL
128
+
129
+
130
+rm(list = ls()); gc()
131
+ng <- 50000
132
+u <- allFitnessEffects(noIntGenes = c(rep(0.1, ng/2), rep(-0.1, ng/2)))
133
+
134
+t_mc_50000_nmpg <- system.time(mc_50000_nmpg <- oncoSimulPop(5,
135
+                                                   u,
136
+                                                   model = "McFL",
137
+                                                   mu = 1e-7,