git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@120278 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progresion with Epistasis |
4 |
-Version: 2.3.12 |
|
5 |
-Date: 2016-08-10 |
|
4 |
+Version: 2.3.13 |
|
5 |
+Date: 2016-08-19 |
|
6 | 6 |
Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"), |
7 | 7 |
email = "rdiaz02@gmail.com"), |
8 | 8 |
person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com")) |
... | ... |
@@ -30,7 +30,7 @@ URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cance |
30 | 30 |
BugReports: https://github.com/rdiaz02/OncoSimul/issues |
31 | 31 |
Depends: R (>= 3.3.0) |
32 | 32 |
Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel |
33 |
-Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0) |
|
33 |
+Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0), rmarkdown, bookdown |
|
34 | 34 |
LinkingTo: Rcpp |
35 | 35 |
VignetteBuilder: knitr |
36 | 36 |
|
... | ... |
@@ -1,13 +1,32 @@ |
1 |
-citHeader("If you use OncoSimulR, please cite OncoSimulR itself. Note that a former version of OncoSimulR has been used in a large comparative study of methods to infer restrictions, published in BMC Bioinformatics; you might want to cite that too, if appropriate.") |
|
1 |
+citHeader("If you use OncoSimulR, please cite the OncoSimulR bioRxiv paper.", |
|
2 |
+ " A former version of OncoSimulR has been used in a large", |
|
3 |
+ " comparative study of methods to infer restrictions,", |
|
4 |
+ " published in BMC Bioinformatics; you might want to cite that too,", |
|
5 |
+ " if appropriate, such as when referring to using evolutionary simulations", |
|
6 |
+ " to assess oncogenetic tree methods performance.") |
|
2 | 7 |
|
3 |
-citEntry(entry="Manual", |
|
8 |
+ |
|
9 |
+citEntry(entry="Article", |
|
4 | 10 |
author = "R Diaz-Uriarte", |
5 |
- title = "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis.", |
|
6 |
- year = "2015", |
|
7 |
- note = "R package version 2.0.0", |
|
11 |
+ title = "OncoSimulR: genetic simulation of cancer progression with arbitrary epistasis and mutator genes.", |
|
12 |
+ journal = "bioRxiv", |
|
13 |
+ year = "2016", |
|
14 |
+ doi = "10.1101/069500", |
|
15 |
+ publisher = "Cold Spring Harbor Labs Journals", |
|
16 |
+ url = "http://biorxiv.org/content/early/2016/08/14/069500", |
|
8 | 17 |
textVersion = paste("R Diaz-Uriarte.", |
9 |
- "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis. 2015. BioConductor package version 2.0.0 ") |
|
10 |
-) |
|
18 |
+ "OncoSimulR: genetic simulation of cancer progression with arbitrary epistasis and mutator genes.", |
|
19 |
+ " 2016. bioRxiv, http://dx.doi.org/10.1101/069500.") |
|
20 |
+ ) |
|
21 |
+ |
|
22 |
+## citEntry(entry="Manual", |
|
23 |
+## author = "R Diaz-Uriarte", |
|
24 |
+## title = "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis.", |
|
25 |
+## year = "2015", |
|
26 |
+## note = "R package version 2.0.0", |
|
27 |
+## textVersion = paste("R Diaz-Uriarte.", |
|
28 |
+## "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis. 2015. BioConductor package version 2.0.0 ") |
|
29 |
+## ) |
|
11 | 30 |
|
12 | 31 |
|
13 | 32 |
## citHeader("A former version of OncoSimulR has been used in this paper:") |
... | ... |
@@ -323,9 +323,9 @@ of \code{sampleEvery} that is larger than or equal to \code{keepEvery}. |
323 | 323 |
%% keepEvery. |
324 | 324 |
|
325 | 325 |
If you want nice plots, set \code{sampleEvery} and \code{keepEvery} to |
326 |
- small values (say, 1 or 0.5). Otherwise, you can use a |
|
326 |
+ small values (say, 5 or 2). Otherwise, you can use a |
|
327 | 327 |
\code{sampleEvery} of 1 but a \code{keepEvery} of 15, so that the |
328 |
- return objects are not huge. |
|
328 |
+ return objects are not huge and the code runs a lot faster. |
|
329 | 329 |
|
330 | 330 |
|
331 | 331 |
Setting \code{keepEvery = NA} means we only keep the very last |
332 | 332 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,5956 @@ |
1 |
+--- |
|
2 |
+title: "OncoSimulR: forward genetic simulation in asexual populations with arbitrary epistatic interactions and a focus on modeling tumor progression." |
|
3 |
+author: " |
|
4 |
+ |
|
5 |
+ Ramon Diaz-Uriarte\\ |
|
6 |
+ |
|
7 |
+ Dept. Biochemistry, Universidad Autónoma de Madrid, Instituto de |
|
8 |
+ Investigaciones Biomédicas 'Alberto Sols' (UAM-CSIC), Madrid, |
|
9 |
+ Spain.\\ |
|
10 |
+ |
|
11 |
+ <rdiaz02@gmail.com>, <http://ligarto.org/rdiaz> |
|
12 |
+ " |
|
13 |
+date: "`r paste0(Sys.Date(),'. OncoSimulR version ', packageVersion('OncoSimulR'), '. Revision: ', system('git rev-parse --short HEAD', intern = TRUE))`" |
|
14 |
+output: |
|
15 |
+ bookdown::html_document2: |
|
16 |
+ toc: yes |
|
17 |
+ toc_float: true |
|
18 |
+ css: custom4.css |
|
19 |
+classoption: a4paper |
|
20 |
+geometry: margin=3cm |
|
21 |
+fontsize: 12pt |
|
22 |
+bibliography: OncoSimulR.bib |
|
23 |
+link-citations: true |
|
24 |
+vignette: > |
|
25 |
+ %\VignetteIndexEntry{OncoSimulR: forward genetic simulation in asexual populations with arbitrary epistatic interactions and a focus on modeling tumor progression.} |
|
26 |
+ %\VignetteEngine{knitr::rmarkdown} |
|
27 |
+ %\VignettePackage{OncoSimulR} |
|
28 |
+ %\VignetteEngine{knitr::rmarkdown} |
|
29 |
+ %\VignetteEncoding{UTF-8} |
|
30 |
+ %\VignetteKeywords{OncoSimulR simulation cancer oncogenetic trees} |
|
31 |
+ %\VignetteDepends{OncoSimulR} |
|
32 |
+--- |
|
33 |
+ |
|
34 |
+<!-- ## <rdiaz02@gmail.com>, <http://ligarto.org/rdiaz> --> |
|
35 |
+<!-- ## date: "`r Sys.Date()`" --> |
|
36 |
+ |
|
37 |
+<!-- ## Bioc html_document no refs --> |
|
38 |
+<!-- ## Bioc html_document2 too wide margins and really ugly --> |
|
39 |
+<!-- ## Simplest is to use bookdown and add BioC CSS --> |
|
40 |
+<!-- ## Rmdformats is really neat, but no support for \@ref --> |
|
41 |
+<!-- ## PDF as: render("OncoSimulR.Rmd", output_format = bookdown::pdf_document2(toc = TRUE, toc_depth = 4, keep_tex = TRUE, includes = includes("before_body" = "my-header.tex"))) --> |
|
42 |
+ |
|
43 |
+ |
|
44 |
+<!-- Fomr https://github.com/rstudio/bookdown/issues/153 --> |
|
45 |
+<script type="text/x-mathjax-config"> |
|
46 |
+MathJax.Hub.Config({ |
|
47 |
+ TeX: { equationNumbers: { autoNumber: "AMS" } } |
|
48 |
+}); |
|
49 |
+</script> |
|
50 |
+ |
|
51 |
+ |
|
52 |
+```{r setup, include=FALSE} |
|
53 |
+## use collapse for bookdowan, to collapse all the source and output |
|
54 |
+## blocks from one code chunk into a single block |
|
55 |
+knitr::opts_chunk$set(echo = TRUE, collapse = TRUE) |
|
56 |
+options(width = 70) |
|
57 |
+require(BiocStyle) |
|
58 |
+``` |
|
59 |
+ |
|
60 |
+<!-- bookdown::pdf_document2 seems to produce the tex even if failures --> |
|
61 |
+ |
|
62 |
+ |
|
63 |
+<!-- This did not work --> |
|
64 |
+<!-- bookdown::pdf_document2: --> |
|
65 |
+<!-- template: template2.tex --> |
|
66 |
+<!-- keep_tex: true --> |
|
67 |
+<!-- toc: true --> |
|
68 |
+ |
|
69 |
+<!-- I convert Rfunction to italics on typewriter --> |
|
70 |
+<!-- sed -i 's/\\Rfunction{\([^}]\+\)}/*`\1`*/g' p22.md --> |
|
71 |
+ |
|
72 |
+ |
|
73 |
+ |
|
74 |
+# Introduction {#intro} |
|
75 |
+ |
|
76 |
+OncoSimulR was originally developed to simulate tumor progression |
|
77 |
+using several models of tumor progression with emphasis on allowing |
|
78 |
+users to set restrictions in the accumulation of mutations as |
|
79 |
+specified, for example, by Oncogenetic Trees |
|
80 |
+[OT: @Desper1999JCB; @Szabo2008] or Conjunctive Bayesian Networks |
|
81 |
+[CBN: @Beerenwinkel2007; @Gerstung2009; @Gerstung2011], with the |
|
82 |
+possibility of adding passenger mutations to the simulations and |
|
83 |
+allowing for several types of sampling. |
|
84 |
+ |
|
85 |
+ |
|
86 |
+ |
|
87 |
+ |
|
88 |
+Since then, OncoSimulR has been vastly extended to allow you to |
|
89 |
+specify other types of restrictions in the accumulation of genes, as |
|
90 |
+such as the XOR models of @Korsunsky2014 or the "semimonotone" model |
|
91 |
+of Farahani and Lagergren [@Farahani2013]. Moreover, different |
|
92 |
+fitness effects related to the order in which mutations appear can |
|
93 |
+also be incorporated, involving arbitrary numbers of genes. This is |
|
94 |
+different from "restrictions in the order of accumulation of |
|
95 |
+mutations". With order effects, shown empirically in a recent cancer |
|
96 |
+paper by Ortmann and collaborators [@Ortmann2015], the effect of |
|
97 |
+having both mutations "A" and "B" differs depending on whether "A" |
|
98 |
+appeared before or after "B". More generally, now OncoSimulR also |
|
99 |
+allows you to specify arbitrary epistatic interactions between |
|
100 |
+arbitrary collections of genes and to model, for example, synthetic |
|
101 |
+mortality or synthetic viability (again, involving an arbitrary |
|
102 |
+number of genes, some of which might also depend on other genes, or |
|
103 |
+show order effects with other genes). Moreover, it is possible to |
|
104 |
+specify the above interactions in terms of modules, not genes. This |
|
105 |
+idea is discussed in, for example, @Raphael2014a and @Gerstung2011: |
|
106 |
+the restrictions encoded in, say, CBNs or OT can be considered to |
|
107 |
+apply not to genes, but to modules, where each module is a set of |
|
108 |
+genes (and the intersection between modules is the empty set) that |
|
109 |
+performs a specific biological function. Modules, then, play the |
|
110 |
+role of a "union operation" over the set of genes in a module. In |
|
111 |
+addition, arbitrary numbers of genes without interactions (and with |
|
112 |
+fitness effects coming from any distribution you might want) are |
|
113 |
+also possible. |
|
114 |
+ |
|
115 |
+ |
|
116 |
+The models so far implemented are all continuous time models, which |
|
117 |
+are simulated using the BNB algorithm of @Mather2012. The core of |
|
118 |
+the code is implemented in C++, providing for fast execution. To |
|
119 |
+help with simulation studies, code to simulate random graphs of the |
|
120 |
+kind often seen in CBN, OTs, etc, is also available. Finally, |
|
121 |
+OncoSimulR also allows for the generation of random fitness |
|
122 |
+landscapes and the representation of fitness landscapes. |
|
123 |
+ |
|
124 |
+## Key features of OncoSimulR {#key} |
|
125 |
+ |
|
126 |
+As mentioned above, OncoSimulR is now a very general package for forward |
|
127 |
+genetic simulation, with applicability well beyond tumor progression. This |
|
128 |
+is a summary of some of the key features: |
|
129 |
+ |
|
130 |
+<!-- FIXME: add the tables of the poster --> |
|
131 |
+ |
|
132 |
+ |
|
133 |
+ |
|
134 |
+* You can specify arbitrary interactions between genes, with arbitrary fitness effects, with explicit support for: |
|
135 |
+ - Restrictions in the accumulations of mutations, as specified by |
|
136 |
+ Oncogenetic Trees (OTs), Conjunctive Bayesian Networks (CBNs), |
|
137 |
+ semimonotone progression networks, and XOR relationships. |
|
138 |
+ |
|
139 |
+ - Epistatic interactions, including, but not limited to, synthetic |
|
140 |
+ viability and synthetic lethality. |
|
141 |
+ - Order effects. |
|
142 |
+ |
|
143 |
+* You can add passenger mutations. |
|
144 |
+* You can add mutator/antimutator effects. |
|
145 |
+* Fitness and mutation rates can be gene-specific. |
|
146 |
+* More generally, you can add arbitrary numbers of non-interacting |
|
147 |
+ genes with arbitrary fitness effects. |
|
148 |
+ |
|
149 |
+* You can allow for deviations from the OT, CBN, semimonotone, and |
|
150 |
+ XOR models, specifying a penalty for such deviations (the $s_h$ |
|
151 |
+ parameter). |
|
152 |
+ |
|
153 |
+* You can conduct multiple simulations, and sample from them with |
|
154 |
+ different temporal schemes and using both whole tumor or single cell |
|
155 |
+ sampling. |
|
156 |
+ |
|
157 |
+* Right now, three different models are available, two that lead |
|
158 |
+ to exponential growth, one of them loosely based on @Bozic2010, and |
|
159 |
+ another that leads to logistic-like growth, based on @McFarland2013. |
|
160 |
+ |
|
161 |
+<!-- * Code in C++ is available (though not yet callable from R) for --> |
|
162 |
+<!-- using several other models, including the one from @Beerenwinkel2007b. --> |
|
163 |
+ |
|
164 |
+* You can use very large numbers of genes (e.g., see an example of |
|
165 |
+ 50000 in section \@ref(mcf50070) ). |
|
166 |
+ |
|
167 |
+* Simulations are generally very fast as I use C++ to implement |
|
168 |
+ the BNB algorithm. |
|
169 |
+ |
|
170 |
+* You can obtain the true sequence of events and the phylogenetic |
|
171 |
+ relationships between clones. |
|
172 |
+ |
|
173 |
+* You can generate random fitness landscapes (under the House of |
|
174 |
+ Cards, Rough Mount Fuji, or additive models, or combinations of the |
|
175 |
+ former) and use those landscapes as input to the simulation |
|
176 |
+ functions. |
|
177 |
+ |
|
178 |
+* You can plot fitness landscapes. |
|
179 |
+ |
|
180 |
+ |
|
181 |
+ |
|
182 |
+ |
|
183 |
+Further details about the motivation for wanting to simulate data this way |
|
184 |
+in the context of tumor progression can be found in |
|
185 |
+@Diaz-Uriarte2015, where additional comments about model parameters |
|
186 |
+and caveats are discussed. |
|
187 |
+ |
|
188 |
+Are there similar programs? The Java program by @Reiter2013a offers |
|
189 |
+somewhat similar functionality to the previous version of |
|
190 |
+OncoSimulR, but it is restricted to at most four drivers (whereas |
|
191 |
+v.1 of OncoSimulR allowed for up to 64), you cannot use arbitrary |
|
192 |
+CBNs or OTs (or XORs or semimonotone graphs) to specify |
|
193 |
+restrictions, there is no allowance for passengers, and a single |
|
194 |
+type of model (a discrete time Galton-Watson process) is |
|
195 |
+implemented. The current functionality of OncoSimulR goes well |
|
196 |
+beyond the the previous version (and, thus, also the TPT of |
|
197 |
+[@Reiter2013a]). We now allow you to specify all types of fitness |
|
198 |
+effects in other general forward genetic simulators such as FFPopSim |
|
199 |
+[@Zanini2012], and some that, to our knowledge (e.g., order effects) |
|
200 |
+are not available from any genetics simulator. In addition, the |
|
201 |
+"lego approach" to flexibly combine different fitness specifications |
|
202 |
+is also unique. |
|
203 |
+ |
|
204 |
+ |
|
205 |
+ |
|
206 |
+ |
|
207 |
+<!-- These are some tables that might help to get an idea of the --> |
|
208 |
+<!-- functionality; they come from this --> |
|
209 |
+<!-- poster[http://dx.doi.org/10.7490/f1000research.1112860.1](http://dx.doi.org/10.7490/f1000research.1112860.1) --> |
|
210 |
+<!-- presented at ECCB 2016. --> |
|
211 |
+ |
|
212 |
+<!-- I need to define a custom block. See --> |
|
213 |
+<!-- https://bookdown.org/yihui/bookdown/custom-blocks.html. Later --> |
|
214 |
+ |
|
215 |
+ |
|
216 |
+ |
|
217 |
+## Steps in using OncoSimulR |
|
218 |
+ |
|
219 |
+ |
|
220 |
+Using this package will often involve the following steps: |
|
221 |
+ |
|
222 |
+ |
|
223 |
+1. Specify the fitness effects: sections \@ref(specfit) and \@ref(litex). |
|
224 |
+ |
|
225 |
+2. Simulate cancer progression: section \@ref(simul). You can simulate |
|
226 |
+ for a single subject or for a set of subjects. You will need to: |
|
227 |
+ |
|
228 |
+ - Decide on a model. This basically amounts to choosing a model with |
|
229 |
+ exponential growth ("Exp" or "Bozic") or a model with |
|
230 |
+ gompertz-like growth ("McFL"). If exponential growth, you can choose |
|
231 |
+ whether the the effects of mutations operate on the death rate |
|
232 |
+ ("Bozic") or the birth rate ("Exp")[^1] |
|
233 |
+ |
|
234 |
+ - Specify other parameters of the simulation. In particular, decide |
|
235 |
+ when to stop the simulation, mutation rates, etc. |
|
236 |
+ |
|
237 |
+ Of course, at least for initial playing around, you can use the |
|
238 |
+ defaults. |
|
239 |
+ |
|
240 |
+3. Sample from the simulated data and do something with those simulated |
|
241 |
+ data (e.g., fit an OT model to them). What you do with the data, |
|
242 |
+ however, is outside the scope of this package. |
|
243 |
+ |
|
244 |
+ |
|
245 |
+ |
|
246 |
+[^1]:It is of course possible to do this with the gompertz-like models, |
|
247 |
+ but there probably is little reason to do it. @McFarland2013 discuss |
|
248 |
+ this has little effect on their results, for example. In addition, |
|
249 |
+ decreasing the death rate will more easily lead to numerical |
|
250 |
+ problems as shown in section \@ref(ex-0-death). |
|
251 |
+ |
|
252 |
+ |
|
253 |
+ |
|
254 |
+ |
|
255 |
+Before anything else, let us load the package. We also explicitly load |
|
256 |
+`r Biocpkg("graph")` and `r CRANpkg("igraph")` for the vignette to work (you |
|
257 |
+do not need that for your usual interactive work). And I set the default |
|
258 |
+color for vertices in igraph. |
|
259 |
+ |
|
260 |
+```{r, results="hide",message=FALSE, echo=TRUE, include=TRUE} |
|
261 |
+library(OncoSimulR) |
|
262 |
+library(graph) |
|
263 |
+library(igraph) |
|
264 |
+igraph_options(vertex.color = "SkyBlue2") |
|
265 |
+``` |
|
266 |
+ |
|
267 |
+```{r, echo=FALSE, results='hide'} |
|
268 |
+options(width = 68) |
|
269 |
+``` |
|
270 |
+ |
|
271 |
+To be explicit, what version are we running? |
|
272 |
+```{r} |
|
273 |
+packageVersion("OncoSimulR") |
|
274 |
+``` |
|
275 |
+ |
|
276 |
+ |
|
277 |
+ |
|
278 |
+## Two quick examples {#quickexample} |
|
279 |
+ |
|
280 |
+Following the above we will run two examples. First a model with a few |
|
281 |
+genes and **epistasis**: |
|
282 |
+ |
|
283 |
+```{r, fig.width=6.5, fig.height=10} |
|
284 |
+## 1. Fitness effects: here we specify an |
|
285 |
+## epistatic model with modules. |
|
286 |
+sa <- 0.1 |
|
287 |
+sb <- -0.2 |
|
288 |
+sab <- 0.25 |
|
289 |
+sac <- -0.1 |
|
290 |
+sbc <- 0.25 |
|
291 |
+sv2 <- allFitnessEffects(epistasis = c("-A : B" = sb, |
|
292 |
+ "A : -B" = sa, |
|
293 |
+ "A : C" = sac, |
|
294 |
+ "A:B" = sab, |
|
295 |
+ "-A:B:C" = sbc), |
|
296 |
+ geneToModule = c( |
|
297 |
+ "A" = "a1, a2", |
|
298 |
+ "B" = "b", |
|
299 |
+ "C" = "c"), |
|
300 |
+ drvNames = c("a1", "a2", "b", "c")) |
|
301 |
+evalAllGenotypes(sv2, addwt = TRUE) |
|
302 |
+ |
|
303 |
+## 2. Simulate the data. Here we use the "McFL" model and set |
|
304 |
+## explicitly parameters for mutation rate, initial size and size |
|
305 |
+## of the population that will end the simulations, etc |
|
306 |
+ |
|
307 |
+RNGkind("Mersenne-Twister") |
|
308 |
+set.seed(983) |
|
309 |
+ep1 <- oncoSimulIndiv(sv2, model = "McFL", |
|
310 |
+ mu = 5e-6, |
|
311 |
+ sampleEvery = 0.02, |
|
312 |
+ keepEvery = 0.5, |
|
313 |
+ initSize = 2000, |
|
314 |
+ finalTime = 3000, |
|
315 |
+ onlyCancer = FALSE) |
|
316 |
+``` |
|
317 |
+ |
|
318 |
+ |
|
319 |
+ |
|
320 |
+<!-- % set.seed(9) --> |
|
321 |
+<!-- % ep1 <- oncoSimulIndiv(sv2, model = "McFL", --> |
|
322 |
+<!-- % mu = 5e-6, --> |
|
323 |
+<!-- % sampleEvery = 0.02, --> |
|
324 |
+<!-- % keepEvery = 0.5, --> |
|
325 |
+<!-- % initSize = 2000, --> |
|
326 |
+<!-- % finalTime = 3000, --> |
|
327 |
+<!-- % onlyCancer = FALSE) --> |
|
328 |
+ |
|
329 |
+<!-- %% <<fig.width=6.5, fig.height=10>>= --> |
|
330 |
+<!-- %% ## 1. Fitness effects: here we specify a --> |
|
331 |
+<!-- %% ## epistatic model with modules. --> |
|
332 |
+<!-- %% sa <- 0.1 --> |
|
333 |
+<!-- %% sb <- -0.2 --> |
|
334 |
+<!-- %% sab <- 0.25 --> |
|
335 |
+<!-- %% sac <- -0.1 --> |
|
336 |
+<!-- %% sbc <- 0.25 --> |
|
337 |
+<!-- %% sv2 <- allFitnessEffects(epistasis = c("-A : B" = sb, --> |
|
338 |
+<!-- %% "A : -B" = sa, --> |
|
339 |
+<!-- %% "A : C" = sac, --> |
|
340 |
+<!-- %% "A:B" = sab, --> |
|
341 |
+<!-- %% "-A:B:C" = sbc), --> |
|
342 |
+<!-- %% geneToModule = c( --> |
|
343 |
+<!-- %% "Root" = "Root", --> |
|
344 |
+<!-- %% "A" = "a1, a2", --> |
|
345 |
+<!-- %% "B" = "b", --> |
|
346 |
+<!-- %% "C" = "c")) --> |
|
347 |
+<!-- %% evalAllGenotypes(sv2, order = FALSE, addwt = TRUE) --> |
|
348 |
+ |
|
349 |
+<!-- %% ## 2. Simulate the data. Here we use the "McFL" model and set explicitly --> |
|
350 |
+<!-- %% ## parameters for mutation rate, final and initial sizes, etc. --> |
|
351 |
+<!-- %% RNGkind("Mersenne-Twister") --> |
|
352 |
+<!-- %% set.seed(983) --> |
|
353 |
+<!-- %% ep1 <- oncoSimulIndiv(sv2, model = "McFL", --> |
|
354 |
+<!-- %% mu = 5e-6, --> |
|
355 |
+<!-- %% sampleEvery = 0.02, --> |
|
356 |
+<!-- %% keepEvery = 0.5, --> |
|
357 |
+<!-- %% initSize = 2000, --> |
|
358 |
+<!-- %% finalTime = 3000, --> |
|
359 |
+<!-- %% onlyCancer = FALSE) --> |
|
360 |
+<!-- %% @ --> |
|
361 |
+ |
|
362 |
+ |
|
363 |
+```{r iep1x1,fig.width=6.5, fig.height=9.5} |
|
364 |
+## 3. We will not analyze those data any further. We will only plot |
|
365 |
+## them. For the sake of a small plot, we thin the data. |
|
366 |
+par(mfrow = c(2, 1)) |
|
367 |
+plot(ep1, show = "drivers", xlim = c(0, 1500), |
|
368 |
+ thinData = TRUE, thinData.keep = 0.5) |
|
369 |
+## Increase ylim and legend.ncols to avoid overlap of |
|
370 |
+## legend with rest of figure |
|
371 |
+plot(ep1, show = "genotypes", ylim = c(0, 4500), |
|
372 |
+ legend.ncols = 4, |
|
373 |
+ xlim = c(0, 1500), |
|
374 |
+ thinData = TRUE, thinData.keep = 0.5) |
|
375 |
+ |
|
376 |
+``` |
|
377 |
+ |
|
378 |
+ |
|
379 |
+ |
|
380 |
+ |
|
381 |
+As a second example, we will use a model where we specify |
|
382 |
+**restrictions in the order of accumulation of mutations** using the |
|
383 |
+pancreatic cancer poset in @Gerstung2011 (see more |
|
384 |
+details in section \@ref(pancreas)): |
|
385 |
+ |
|
386 |
+```{r} |
|
387 |
+## 1. Fitness effects: |
|
388 |
+pancr <- allFitnessEffects( |
|
389 |
+ data.frame(parent = c("Root", rep("KRAS", 4), |
|
390 |
+ "SMAD4", "CDNK2A", |
|
391 |
+ "TP53", "TP53", "MLL3"), |
|
392 |
+ child = c("KRAS","SMAD4", "CDNK2A", |
|
393 |
+ "TP53", "MLL3", |
|
394 |
+ rep("PXDN", 3), rep("TGFBR2", 2)), |
|
395 |
+ s = 0.1, |
|
396 |
+ sh = -0.9, |
|
397 |
+ typeDep = "MN"), |
|
398 |
+ drvNames = c("KRAS", "SMAD4", "CDNK2A", "TP53", |
|
399 |
+ "MLL3", "TGFBR2", "PXDN")) |
|
400 |
+ |
|
401 |
+## Plot the DAG of the fitnessEffects object |
|
402 |
+plot(pancr) |
|
403 |
+``` |
|
404 |
+```{r, fig.width=6.5, fig.height=10} |
|
405 |
+## 2. Simulate from it. |
|
406 |
+ |
|
407 |
+set.seed(4) ## Fix the seed, so we can repeat it |
|
408 |
+ep2 <- oncoSimulIndiv(pancr, model = "McFL", |
|
409 |
+ mu = 1e-6, |
|
410 |
+ sampleEvery = 0.02, |
|
411 |
+ keepEvery = 1, |
|
412 |
+ initSize = 1000, |
|
413 |
+ finalTime = 10000, |
|
414 |
+ onlyCancer = FALSE) |
|
415 |
+``` |
|
416 |
+ |
|
417 |
+<!-- % set.seed(6) ## Fix the seed, so we can repeat it --> |
|
418 |
+<!-- % ep2 <- oncoSimulIndiv(pancr, model = "McFL", --> |
|
419 |
+<!-- % mu = 1e-6, --> |
|
420 |
+<!-- % sampleEvery = 0.02, --> |
|
421 |
+<!-- % keepEvery = 1, --> |
|
422 |
+<!-- % initSize = 2000, --> |
|
423 |
+<!-- % finalTime = 10000, --> |
|
424 |
+<!-- % onlyCancer = FALSE) |
|
425 |
+--> |
|
426 |
+```{r iep2x2,fig.width=6.5, fig.height=9} |
|
427 |
+## 3. What genotypes and drivers we get? And play with limits |
|
428 |
+## to show only parts of the data. We also thin them. |
|
429 |
+ |
|
430 |
+par(mfrow = c(2, 1)) |
|
431 |
+par(cex = 0.7) |
|
432 |
+plot(ep2, show = "genotypes", xlim = c(1000, 8000), |
|
433 |
+ ylim = c(0, 2400), |
|
434 |
+ thinData = TRUE, thinData.keep = 0.5) |
|
435 |
+plot(ep2, show = "drivers", addtot = TRUE, |
|
436 |
+ xlim = c(400, 1800), |
|
437 |
+ thinData = TRUE, thinData.keep = 0.2) |
|
438 |
+ |
|
439 |
+``` |
|
440 |
+ |
|
441 |
+ |
|
442 |
+ |
|
443 |
+## How long does it take? How fast is OncoSimulR? How much space do thereturn objects take? {#timings} |
|
444 |
+ |
|
445 |
+ |
|
446 |
+How long simulations take depend on several factors: |
|
447 |
+ |
|
448 |
+ |
|
449 |
+* Your hardware, of course. |
|
450 |
+* The evolutionary model: using the "McFL" model is often much |
|
451 |
+ slower (this model includes density dependence and is more complex). |
|
452 |
+* The granularity of how often you keep data (`keepEvery` |
|
453 |
+ argument). Note that the default, which is to keep as often as you |
|
454 |
+ sample (so that we preserve all history) can lead to very slow execution |
|
455 |
+ times. |
|
456 |
+* The mutation rate, because higher mutation rates lead to more |
|
457 |
+ clones, and more clones mean we need to iterate over, well, more clones, |
|
458 |
+ and keep larger data structures. |
|
459 |
+ |
|
460 |
+* The fitness specification (more complex fitness specifications tend |
|
461 |
+ to be slightly slower). |
|
462 |
+* Whether or not you keep the complete clone history (this affects |
|
463 |
+ mainly size of return object, not speed). |
|
464 |
+* The stopping conditions. |
|
465 |
+ |
|
466 |
+ |
|
467 |
+ |
|
468 |
+To give you an idea, we will use the above examples, with the |
|
469 |
+`detectionProb` stopping mechanism (see \@ref(detectprob)), two |
|
470 |
+different growth models (exponential and McFarland) and will obtain |
|
471 |
+100 simulations. The results I show are for a laptop with an 8-core |
|
472 |
+Intel Xeon E3-1505M CPU, running Debian GNU/Linux. |
|
473 |
+ |
|
474 |
+```{r, eval=FALSE} |
|
475 |
+ |
|
476 |
+system.time( |
|
477 |
+ ep1_exp <- oncoSimulPop(100, sv2, |
|
478 |
+ detectionProb = "default", |
|
479 |
+ detectionSize = NA, |
|
480 |
+ detectionDrivers = NA, |
|
481 |
+ finalTime = NA, |
|
482 |
+ keepEvery = 5, |
|
483 |
+ model = "Exp", |
|
484 |
+ sampleEvery = 0.02, |
|
485 |
+ initSize = 500, |
|
486 |
+ mc.cores = detectCores())) |
|
487 |
+ |
|
488 |
+system.time( |
|
489 |
+ ep1_mc <- oncoSimulPop(100, sv2, |
|
490 |
+ detectionProb = "default", |
|
491 |
+ detectionSize = NA, |
|
492 |
+ detectionDrivers = NA, |
|
493 |
+ finalTime = NA, |
|
494 |
+ keepEvery = 5, |
|
495 |
+ model = "McFL", |
|
496 |
+ sampleEvery = 0.02, |
|
497 |
+ initSize = 2000, |
|
498 |
+ mc.cores = detectCores())) |
|
499 |
+ |
|
500 |
+system.time( |
|
501 |
+ ep2_exp <- oncoSimulPop(100, pancr, |
|
502 |
+ detectionProb = "default", |
|
503 |
+ detectionSize = NA, |
|
504 |
+ detectionDrivers = NA, |
|
505 |
+ finalTime = NA, |
|
506 |
+ keepEvery = 5, |
|
507 |
+ model = "Exp", |
|
508 |
+ sampleEvery = 0.02, |
|
509 |
+ initSize = 500, |
|
510 |
+ mc.cores = detectCores())) |
|
511 |
+ |
|
512 |
+system.time( |
|
513 |
+ ep2_mc <- oncoSimulPop(100, pancr, |
|
514 |
+ detectionProb = "default", |
|
515 |
+ detectionSize = NA, |
|
516 |
+ detectionDrivers = NA, |
|
517 |
+ finalTime = NA, |
|
518 |
+ keepEvery = 5, |
|
519 |
+ model = "McFL", |
|
520 |
+ sampleEvery = 0.02, |
|
521 |
+ initSize = 2000, |
|
522 |
+ mc.cores = detectCores())) |
|
523 |
+ |
|
524 |
+``` |
|
525 |
+ |
|
526 |
+And we look at the sizes of objects using: |
|
527 |
+ |
|
528 |
+ |
|
529 |
+``` {r, eval=FALSE} |
|
530 |
+print(object.size(ep1_exp), units = "MB") |
|
531 |
+``` |
|
532 |
+ |
|
533 |
+<!-- ```{r, echo=FALSE, results='hide'} --> |
|
534 |
+<!-- dftm1 <- data.frame(Simulation = c("ep1_exp", "ep1_mc", "ep2_exp","ep2_mc"), --> |
|
535 |
+<!-- ElapsedTime = c(0.8, 6.6, 1.1, 5.8), Size = c(1.6, 16.6, 1.6, 20.6)) --> |
|
536 |
+ |
|
537 |
+<!-- ``` --> |
|
538 |
+ |
|
539 |
+The above runs yield the following: |
|
540 |
+ |
|
541 |
+<!-- ```{r, results='show', echo=FALSE} --> |
|
542 |
+<!-- knitr::kable(dftm1, col.names = c("Simulation", "Elapsed Time (s)", "Object Size (MB)")) --> |
|
543 |
+ |
|
544 |
+<!-- ``` --> |
|
545 |
+ |
|
546 |
+ |
|
547 |
+ |
|
548 |
+<!-- |Simulation | Elapsed Time (s)| Object Size (MB)| --> |
|
549 |
+<!-- |:----------|----------------:|----------------:| --> |
|
550 |
+<!-- |ep1_exp | 0.8| 1.6| --> |
|
551 |
+<!-- |ep1_mc | 6.6| 16.6| --> |
|
552 |
+<!-- |ep2_exp | 1.1| 1.6| --> |
|
553 |
+<!-- |ep2_mc | 5.8| 20.6| --> |
|
554 |
+ |
|
555 |
+---------------------------------------------------------- |
|
556 |
+Simulation Elapsed Time (s) Object Size (MB) |
|
557 |
+---------- --------------------- --------------------- |
|
558 |
+ep1_exp 0.8 1.6 |
|
559 |
+ |
|
560 |
+ep1_mc 6.6 16.6 |
|
561 |
+ |
|
562 |
+ep2_exp 1.1 1.6 |
|
563 |
+ |
|
564 |
+ep2_mc 5.8 20.6 |
|
565 |
+---------- --------------------- --------------------- |
|
566 |
+ |
|
567 |
+ |
|
568 |
+ |
|
569 |
+ |
|
570 |
+ |
|
571 |
+ |
|
572 |
+<!-- The above yields the following: --> |
|
573 |
+ |
|
574 |
+<!-- \begin{tabular}[h]{lll} --> |
|
575 |
+<!-- \hline --> |
|
576 |
+<!-- Simulation&Elapsed wall time (s) &Object size (MB)\\ --> |
|
577 |
+<!-- \hline --> |
|
578 |
+<!-- ep1\_exp & 0.8 & 1.6 \\ --> |
|
579 |
+<!-- ep1\_mc & 6.6 & 16.8 \\ --> |
|
580 |
+<!-- ep2\_exp & 1.1 & 1.6 \\ --> |
|
581 |
+<!-- ep2\_mc & 5.8 & 20.6 \\ --> |
|
582 |
+<!-- \hline --> |
|
583 |
+<!-- \end{tabular} --> |
|
584 |
+ |
|
585 |
+ |
|
586 |
+There are large differences in speed between models and sizes of objects |
|
587 |
+differ too: in these cases, the simulations using the "McFL" model run |
|
588 |
+for much longer (2000 to 2000 time units, vs. 100 to 1500 of the "Exp" |
|
589 |
+models) and also had a larger number of clones returned (5 to 16 vs. 1 to |
|
590 |
+3). |
|
591 |
+ |
|
592 |
+ |
|
593 |
+ |
|
594 |
+## Citing OncoSimulR and other documentation {#citing} |
|
595 |
+ |
|
596 |
+In R, you can do |
|
597 |
+```{r} |
|
598 |
+citation("OncoSimulR") |
|
599 |
+``` |
|
600 |
+which will tell you how to cite the package. |
|
601 |
+ |
|
602 |
+ |
|
603 |
+This is the URL for the bioRxiv |
|
604 |
+paper: [http://biorxiv.org/content/early/2016/08/14/069500](http://biorxiv.org/content/early/2016/08/14/069500). You |
|
605 |
+can also take a look at this |
|
606 |
+poster, [http://dx.doi.org/10.7490/f1000research.1112860.1](http://dx.doi.org/10.7490/f1000research.1112860.1), |
|
607 |
+presented at ECCB 2016. |
|
608 |
+ |
|
609 |
+ |
|
610 |
+ |
|
611 |
+ |
|
612 |
+ |
|
613 |
+## Versions {#versions} |
|
614 |
+ |
|
615 |
+In this vignette and the documentation I often refer to version 1 (v.1) |
|
616 |
+and version 2 of OncoSimulR. Version 1 is the version available up to, and |
|
617 |
+including, BioConductor v. 3.1. Version 2 of OncoSimulR is available |
|
618 |
+starting from BioConductor 3.2 (and, of course, available too from |
|
619 |
+development versions of BioC). |
|
620 |
+So, if you are using the current stable or development version of |
|
621 |
+BioConductor, or you grab the sources from github |
|
622 |
+(<https://github.com/rdiaz02/OncoSimul>) you are using what we call |
|
623 |
+version 2. |
|
624 |
+ |
|
625 |
+ |
|
626 |
+**The functionality of version 1 will soon be removed.** |
|
627 |
+ |
|
628 |
+ |
|
629 |
+ |
|
630 |
+ |
|
631 |
+# Specifying fitness effects {#specfit} |
|
632 |
+ |
|
633 |
+## Introduction to the specification of fitness effects {#introfit} |
|
634 |
+ |
|
635 |
+With OncoSimulR you can specify different types of effects on fitness: |
|
636 |
+ |
|
637 |
+ |
|
638 |
+ |
|
639 |
+* A special type of epistatic effect that is particularly amenable |
|
640 |
+ to be represented as a graph. In this graph, having, say, "B" be a |
|
641 |
+ child of "A" means that B can only accumulate if A is already |
|
642 |
+ present. This is what OT [@Desper1999JCB; @Szabo2008], CBN |
|
643 |
+ [@Beerenwinkel2007; @Gerstung2009; @Gerstung2011], progression |
|
644 |
+ networks [@Farahani2013], and other similar models |
|
645 |
+ [@Korsunsky2014] generally mean. Details are provided in section |
|
646 |
+ \@ref(posetslong). Note that this is not an order effect |
|
647 |
+ (discussed below): the fitness of a genotype from this DAGs is a |
|
648 |
+ function of whether or not the restrictions in the graph are |
|
649 |
+ satisfied, not the historical sequence of how they were satisfied. |
|
650 |
+ |
|
651 |
+* Effects where the order in which mutations are acquired matters, as |
|
652 |
+ illustrated in section \@ref(oe). There is, in fact, empirical evidence |
|
653 |
+ of these effects [@Ortmann2015]. For instance, the fitness of |
|
654 |
+ genotype "A, B" would differ depending on whether A or B was acquired |
|
655 |
+ first. |
|
656 |
+ |
|
657 |
+ |
|
658 |
+* General epistatic effects (e.g., section \@ref(epi)), including |
|
659 |
+ synthetic viability (e.g., section \@ref(sv)) and synthetic |
|
660 |
+ lethality/mortality (e.g., section \@ref(sl)). |
|
661 |
+ |
|
662 |
+ |
|
663 |
+* Genes that have independent effects on fitness (section \@ref(noint)). |
|
664 |
+ |
|
665 |
+ |
|
666 |
+ |
|
667 |
+ |
|
668 |
+Modules (see section \@ref(modules0)) allow you to specify any of the above |
|
669 |
+effects (except those for genes without interactions, as it would not make |
|
670 |
+sense there) in terms of modules (sets of genes), not individual genes. We |
|
671 |
+will introduce them right after \@ref(posetslong), and continue using them |
|
672 |
+thereafter. |
|
673 |
+ |
|
674 |
+ |
|
675 |
+A guiding design principle of OncoSimulR is to try to make the |
|
676 |
+specification of those effects as simple as possible but also as flexible |
|
677 |
+as possible. Thus, there are two main ways of specifying fitness effects: |
|
678 |
+ |
|
679 |
+ |
|
680 |
+* Combining different types of effects in a single specification. For |
|
681 |
+ instance, you can combine epistasis with order effects with no |
|
682 |
+ interaction genes with modules. What you would do here is specify the |
|
683 |
+ effects that different mutations (or their combinations) have on fitness |
|
684 |
+ (the fitness effects) and then have OncoSimulR take care of combining |
|
685 |
+ them as if each of these were lego pieces. We will refer to this as the |
|
686 |
+ **lego system of fitness effects**. |
|
687 |
+ |
|
688 |
+ |
|
689 |
+* Explicitly passing to OncoSimulR a mapping of genotypes to |
|
690 |
+ fitness. Here you specify the fitness of each genotype. We will refer to |
|
691 |
+ this as the **explicit mapping of genotypes to fitness**. |
|
692 |
+ |
|
693 |
+ |
|
694 |
+ |
|
695 |
+Both approaches have advantages and disadvantages. Here I emphasize some |
|
696 |
+relevant differences. |
|
697 |
+ |
|
698 |
+* With the lego system you can specify huge genomes with an enormous |
|
699 |
+ variety of interactions, since the possible genotypes are not |
|
700 |
+ constructed in advance. You would not be able to do this with the |
|
701 |
+ explicit mapping of genotypes to fitness if you wanted to, say, |
|
702 |
+ construct that mapping for a modest genotype of 500 genes (you'd have |
|
703 |
+ more genotypes than particles in the Universe). |
|
704 |
+ |
|
705 |
+* For many models/data you often intuitively start with the fitness of |
|
706 |
+ the genotypes, not the fitness consequences of the different |
|
707 |
+ mutations. In these cases, you'd need to do the math to specify the |
|
708 |
+ terms you want if you used the lego system. |
|
709 |
+ |
|
710 |
+* Sometimes you already have a moderate size genotype $\rightarrow$ |
|
711 |
+ fitness mapping and you certainly do not want to do the math by |
|
712 |
+ hand: here the lego system would be painful to use. |
|
713 |
+ |
|
714 |
+* But sometimes we do think in terms of "the effects on fitness of |
|
715 |
+ such and such mutations are" and that immediately calls for the lego |
|
716 |
+ system, where you focus on the effects, and let OncoSimulR take care of |
|
717 |
+ doing the math of combining. |
|
718 |
+ |
|
719 |
+* If you want to use order effects, you must use the lego system (at |
|
720 |
+ least for now). |
|
721 |
+ |
|
722 |
+* If you want to specify modules, you must use the lego system (the |
|
723 |
+ explicit mapping of genotypes is, by its very nature, ill-suited for |
|
724 |
+ this). |
|
725 |
+ |
|
726 |
+* The lego system might help you see what your model really means: in |
|
727 |
+ many cases, you can obtain fairly succinct specification of complex |
|
728 |
+ fitness models with just a few terms. Similarly, depending on what your |
|
729 |
+ emphasis is, you can often specify the same fitness landscape in several |
|
730 |
+ different ways. |
|
731 |
+ |
|
732 |
+ |
|
733 |
+ |
|
734 |
+ |
|
735 |
+Regardless of the route, you need to get that information into |
|
736 |
+ OncoSimulR's functions. The main function we will use is |
|
737 |
+ *`allFitnessEffects`*: this is the function in charge of reading |
|
738 |
+ the fitness specifications. We also need to discuss how, what, and where |
|
739 |
+ you have to pass to *`allFitnessEffects`*. |
|
740 |
+ |
|
741 |
+ |
|
742 |
+### Explicit mapping of genotypes to fitness {#explicitmap} |
|
743 |
+ |
|
744 |
+Conceptually, the simplest way to specify fitness is to specify the |
|
745 |
+mapping of all genotypes to fitness explicitly. An example will make |
|
746 |
+this clear. Let's suppose you have a simple two-gene scenario, so a |
|
747 |
+total of four genotypes, and you have a data frame with genotypes |
|
748 |
+and fitness, where genoytpes are specified as character vectors, |
|
749 |
+with mutated genes separated by commas: |
|
750 |
+ |
|
751 |
+```{r} |
|
752 |
+m4 <- data.frame(G = c("WT", "A", "B", "A, B"), F = c(1, 2, 3, 4)) |
|
753 |
+``` |
|
754 |
+ |
|
755 |
+Now, let's give that to the *`allFitnessEffects`* function: |
|
756 |
+ |
|
757 |
+```{r} |
|
758 |
+fem4 <- allFitnessEffects(genotFitness = m4) |
|
759 |
+``` |
|
760 |
+(The message is just telling you what the program guessed you |
|
761 |
+wanted.) |
|
762 |
+ |
|
763 |
+ |
|
764 |
+That's it. You can plot that fitnessEffects object |
|
765 |
+```{r, out.width="6cm", out.height = "6cm"} |
|
766 |
+plot(fem4) |
|
767 |
+``` |
|
768 |
+ |
|
769 |
+In this case, that plot is not very interesting (compare with the |
|
770 |
+`plot(pancr)` we saw in \@ref(quickexample) or the plots in |
|
771 |
+\@ref(posetslong)). |
|
772 |
+ |
|
773 |
+You can also check what OncoSimulR thinks the fitnesses are, with the |
|
774 |
+*`evalAllGenotypes`* function that we will use repeatedly below |
|
775 |
+(of course, here we should see the same fitnesses we entered): |
|
776 |
+ |
|
777 |
+```{r} |
|
778 |
+evalAllGenotypes(fem4, addwt = TRUE) |
|
779 |
+``` |
|
780 |
+ |
|
781 |
+ |
|
782 |
+And you can plot the fitness landscape: |
|
783 |
+ |
|
784 |
+```{r} |
|
785 |
+plotFitnessLandscape(evalAllGenotypes(fem4)) |
|
786 |
+``` |
|
787 |
+ |
|
788 |
+ |
|
789 |
+To specify the mapping you can also use a matrix (or data frame) with |
|
790 |
+$g + 1$ columns; each of the first $g$ columns contains a 1 or a 0 |
|
791 |
+indicating that the gene of that column is mutated or not. Column $g+ 1$ |
|
792 |
+contains the fitness values. And you do not even need to specify all the |
|
793 |
+genotypes (we will assume that the missing genotypes have fitness 1): |
|
794 |
+ |
|
795 |
+```{r, out.width="6cm", out.height = "6cm"} |
|
796 |
+m6 <- cbind(c(1, 1), c(1, 0), c(2, 3)) |
|
797 |
+fem6 <- allFitnessEffects(genotFitness = m6) |
|
798 |
+evalAllGenotypes(fem6, addwt = TRUE) |
|
799 |
+plot(fem6) |
|
800 |
+``` |
|
801 |
+ |
|
802 |
+```{r, fig.width=6.5, fig.height = 6.5} |
|
803 |
+plotFitnessLandscape(evalAllGenotypes(fem6)) |
|
804 |
+``` |
|
805 |
+ |
|
806 |
+ |
|
807 |
+This way of giving a fitness specification to OncoSimulR might be |
|
808 |
+ideal if you directly generate random mappings of genotypes to |
|
809 |
+fitness (or random fitness landscapes), as we will do in section |
|
810 |
+\@ref(gener-fit-land). |
|
811 |
+ |
|
812 |
+ |
|
813 |
+We will see an example of this way of passing fitness again in |
|
814 |
+\@ref(bauer), where will compare it with the lego system. |
|
815 |
+ |
|
816 |
+<!-- % Please see the documentation of *`allFitnessEffects`* for further --> |
|
817 |
+<!-- % details and examples. --> |
|
818 |
+ |
|
819 |
+ |
|
820 |
+ |
|
821 |
+ |
|
822 |
+ |
|
823 |
+<!-- % This can be done with OncoSimulR (e.g., see sections \@ref(e2), \@ref(e3) --> |
|
824 |
+<!-- % and \@ref(theminus) or the example in \@ref(weis1b)), but this only makes --> |
|
825 |
+<!-- % sense for subsets of the genes or for very small genotypes, as you --> |
|
826 |
+<!-- % probably do not want to be explicit about the mapping of $2^k$ genotypes --> |
|
827 |
+<!-- % to fitness when $k$ is larger than, say, four or five, and definitely not --> |
|
828 |
+<!-- % when $k$ is 10. --> |
|
829 |
+ |
|
830 |
+ |
|
831 |
+### How to specify fitness effects with the lego system {#howfit} |
|
832 |
+ |
|
833 |
+<!-- % A guiding design principle of OncoSimulR is to try to make the --> |
|
834 |
+<!-- % specification of those effects as simple as possible but also as flexible --> |
|
835 |
+<!-- % as possible. --> |
|
836 |
+ |
|
837 |
+ |
|
838 |
+An alternative general approach followed in many genetic simulators |
|
839 |
+is to specify how particular combinations of alleles modify the |
|
840 |
+wildtype genotype or the genotype that contains the individual |
|
841 |
+effects of the interacting genes (e.g., see equation 1 in the |
|
842 |
+supplementary material for FFPopSim, @Zanini2012). For example, if |
|
843 |
+we specify that a mutation in "A" contributes 0.04, a mutation in |
|
844 |
+"B" contributes 0.03, and the double mutation "A:B" contributes 0.1, |
|
845 |
+that means that the fitness of the "A, B" genotype (the genotype |
|
846 |
+with A and B mutated) is that of the wildtype (1, by default), plus |
|
847 |
+(actually, times ---see section \@ref(numfit)) the effects of having |
|
848 |
+A mutated, plus (times) the effects of having B mutated, plus |
|
849 |
+(times) the effects of "A:B" both being mutated. |
|
850 |
+ |
|
851 |
+ |
|
852 |
+We will see below that with the "lego system" it is possible to do |
|
853 |
+something very similar to the explicit mapping of section |
|
854 |
+\@ref(explicitmap). But this will sometimes require a more cumbersome |
|
855 |
+notation (and sometimes also will require your doing some math). We will see |
|
856 |
+examples in sections \@ref(e2), \@ref(e3) and \@ref(theminus) or the example |
|
857 |
+in \@ref(weis1b). But then, if we can be explicit about (at least some of) |
|
858 |
+the mappings $genotype \rightarrow fitness$, how are these procedures |
|
859 |
+different? When you use the "lego system" you can combine both a partial |
|
860 |
+explicit mapping of genotypes to fitness with arbitrary fitness effects of |
|
861 |
+other genes/modules. In other words, with the "lego system" OncoSimulR |
|
862 |
+makes it simple to be explicit about the mapping of specific genotypes, |
|
863 |
+while also using the "how this specific effects modifies previous |
|
864 |
+effects" logic, leading to a flexible specification. This also means that |
|
865 |
+in many cases the same fitness effects can be specified in several |
|
866 |
+different ways. |
|
867 |
+ |
|
868 |
+Most of the rest of this section is devoted to explaining how to combine |
|
869 |
+those pieces. Before that, however, we need to discuss the fitness model |
|
870 |
+we use. |
|
871 |
+ |
|
872 |
+ |
|
873 |
+ |
|
874 |
+<!-- % As we will see in the examples (e.g., see sections \@ref(e2), \@ref(e3), --> |
|
875 |
+<!-- % \@ref(exlong)) OncoSimulR makes it simple to be explicit about the mapping --> |
|
876 |
+<!-- % of specific genotypes, while also using the ``how this specific effects --> |
|
877 |
+<!-- % modifies previous effects" logic, leading to a flexible --> |
|
878 |
+<!-- % specification. This also means that in many cases the same fitness --> |
|
879 |
+<!-- % effects can be specified in several different ways. --> |
|
880 |
+ |
|
881 |
+ |
|
882 |
+## Numeric values of fitness effects {#numfit} |
|
883 |
+ |
|
884 |
+We evaluate fitness using the usual [@Zanini2012; @Gillespie1993; |
|
885 |
+ @Beerenwinkel2007; @Datta2013] multiplicative model: fitness is |
|
886 |
+$\prod (1 + s_i)$ where $s_i$ is the fitness effect of gene (or gene |
|
887 |
+interaction) $i$. In all models except Bozic, this fitness refers to the |
|
888 |
+growth rate (the death rate being fixed to 1[^2]). |
|
889 |
+The original model of @McFarland2013 has a slightly different |
|
890 |
+parameterization, but you can go easily from one to the other (see section |
|
891 |
+\@ref(mcfl)). |
|
892 |
+ |
|
893 |
+For the Bozic model [@Bozic2010], however, the birth rate is set to |
|
894 |
+1, and the death rate then becomes $\prod (1 - s_i)$. |
|
895 |
+ |
|
896 |
+[^2]: You can change this if you really want to. |
|
897 |
+ |
|
898 |
+ |
|
899 |
+### McFarland parameterization {#mcfl} |
|
900 |
+ |
|
901 |
+In the original model of @McFarland2013, the effects of drivers |
|
902 |
+contribute to the numerator of the birth rate, and those of the |
|
903 |
+(deleterious) passengers to the denominator as: $\frac{(1 + |
|
904 |
+s)^D}{(1 - s_p)^P}$, where $D$ and $P$ are, respectively, the total |
|
905 |
+number of drivers and passengers in a genotype, and here the fitness |
|
906 |
+effects of all drivers is the same ($s$) and that of all passengers |
|
907 |
+the same too ($s_p$). However, we can map from this ratio to the |
|
908 |
+usual product of terms by using a different value of $s_p$, that we |
|
909 |
+will call $s_{pp} = -s_p/(1 + s_p)$ (see @McFarland2014-phd, his |
|
910 |
+eq. 2.1 in p.9). This reparameterization applies to v.2. In v.1 we |
|
911 |
+use the same parameterization as in the original one in |
|
912 |
+@McFarland2013. |
|
913 |
+ |
|
914 |
+ |
|
915 |
+ |
|
916 |
+### No viability of clones and types of models {#noviab} |
|
917 |
+ |
|
918 |
+For all models where fitness affects directly the birth rate (for now, all |
|
919 |
+except Bozic), if you specify that some event (say, mutating gene A) has |
|
920 |
+$s_A \le -1$, if that event happens then birth rate becomes zero which is |
|
921 |
+taken to indicate that the clone is not even viable and thus disappears |
|
922 |
+immediately without any chance for mutation[^3]. |
|
923 |
+ |
|
924 |
+[^3]:This is a shortcut that we take because we think that it is |
|
925 |
+ what you mean. Note, however, that technically a clone with birth |
|
926 |
+ rate of 0 might have a non-zero probability of mutating before |
|
927 |
+ becoming extinct because in the continuous time model we use |
|
928 |
+ mutation is not linked to reproduction. In the present code, we |
|
929 |
+ are not allowing for any mutation when birth rate is 0. There are |
|
930 |
+ other options, but none which I find really better. An alternative |
|
931 |
+ implementation makes a clone immediately extinct if and only if |
|
932 |
+ any of the $s_i = -\infty$. However, we still need to handle the |
|
933 |
+ case with $s_i < -1$ as a special case. We either make it |
|
934 |
+ identical to the case with any $s_i = -\infty$ or for any $s_i > |
|
935 |
+ -\infty$ we set $(1 + s_i) = \max(0, 1 + s_i)$ (i.e., if $s_i < |
|
936 |
+ -1$ then $(1 + s_i) = 0$), to avoid obtaining negative birth rates |
|
937 |
+ (that make no sense) and the problem of multiplying an even number |
|
938 |
+ of negative numbers. I think only the second would make sense as |
|
939 |
+ an alternative. |
|
940 |
+ |
|
941 |
+Models based on Bozic, however, have a birth rate of 1 and mutations |
|
942 |
+affect the death rate. In this case, a death rate larger than birth rate, |
|
943 |
+per se, does not signal immediate extinction and, moreover, even for death |
|
944 |
+rates that are a few times larger than birth rates, the clone could mutate |
|
945 |
+before becoming extinct[^4]. |
|
946 |
+ |
|
947 |
+[^4]:We said "a few times". For a clone of population size 1 ---which is |
|
948 |
+the size at which all clones start from mutation---, if death rate is, |
|
949 |
+say, 90 but birth rate is 1, the probability of mutating before becoming |
|
950 |
+extinct is very, very close to zero for all reasonable values of mutation |
|
951 |
+rate}. How do we signal immediate extinction or no viability in this case? |
|
952 |
+You can set the value of $s = -\infty$. |
|
953 |
+ |
|
954 |
+<!-- %% Setting a value of --> |
|
955 |
+<!-- %% $s < -90$ has the same effect. --> |
|
956 |
+ |
|
957 |
+In general, if you want to identify some mutations or some |
|
958 |
+combinations of mutations as leading to immediate extinction (i.e., |
|
959 |
+no viability), of the affected clone, set it to $-\infty$ as this |
|
960 |
+would work even if how birth rates of 0 are handled changes. Most |
|
961 |
+examples below evaluate fitness by its effects on the birth |
|
962 |
+rate. You can see one where we do it both ways in Section |
|
963 |
+\@ref(fit-neg-pos). |
|
964 |
+ |
|
965 |
+ |
|
966 |
+ |
|
967 |
+<!-- \footnote{In the C++ --> |
|
968 |
+<!-- code there is a different model, not directly callable from R for now, --> |
|
969 |
+<!-- called ``bozic2" that is slightly different. These comments apply to --> |
|
970 |
+<!-- the model that is right now callable from R --> |
|
971 |
+ |
|
972 |
+ |
|
973 |
+## Genes without interactions {#noint} |
|
974 |
+ |
|
975 |
+This is a imple scenario. Each gene $i$ has a fitness effect $s_i$ if |
|
976 |
+mutated. The $s_i$ can come from any distribution you want. As an example |
|
977 |
+let's use three genes. We know there are no order effects, but we will |
|
978 |
+also see what happens if we examine genotypes as ordered. |
|
979 |
+ |
|
980 |
+```{r} |
|
981 |
+ |
|
982 |
+ai1 <- evalAllGenotypes(allFitnessEffects( |
|
983 |
+ noIntGenes = c(0.05, -.2, .1)), order = FALSE) |
|
984 |
+``` |
|
985 |
+ |
|
986 |
+ |
|
987 |
+We can easily verify the first results: |
|
988 |
+ |
|
989 |
+```{r} |
|
990 |
+ai1 |
|
991 |
+``` |
|
992 |
+ |
|
993 |
+```{r} |
|
994 |
+all(ai1[, "Fitness"] == c( (1 + .05), (1 - .2), (1 + .1), |
|
995 |
+ (1 + .05) * (1 - .2), |
|
996 |
+ (1 + .05) * (1 + .1), |
|
997 |
+ (1 - .2) * (1 + .1), |
|
998 |
+ (1 + .05) * (1 - .2) * (1 + .1))) |
|
999 |
+ |
|
1000 |
+``` |
|
1001 |
+ |
|
1002 |
+And we can see that considering the order of mutations (see section |
|
1003 |
+\@ref(oe)) makes no difference: |
|
1004 |
+ |
|
1005 |
+```{r} |
|
1006 |
+(ai2 <- evalAllGenotypes(allFitnessEffects( |
|
1007 |
+ noIntGenes = c(0.05, -.2, .1)), order = TRUE, |
|
1008 |
+ addwt = TRUE)) |
|
1009 |
+ |
|
1010 |
+``` |
|
1011 |
+ |
|
1012 |
+(The meaning of the notation in the output table is as follows: "WT" |
|
1013 |
+denotes the wild-type, or non-mutated clone. The notation $x > y$ means |
|
1014 |
+that a mutation in "x" happened before a mutation in "y". A genotype |
|
1015 |
+$x > y\ \_\ z$ means that a mutation in "x" happened before a |
|
1016 |
+mutation in "y"; there is also a mutation in "z", but that is a gene |
|
1017 |
+for which order does not matter). |
|
1018 |
+ |
|
1019 |
+ |
|
1020 |
+ |
|
1021 |
+And what if I want genes without interactions but I want modules (see |
|
1022 |
+section \@ref(modules0))? Go to section \@ref(mod-no-epi). |
|
1023 |
+ |
|
1024 |
+ |
|
1025 |
+ |
|
1026 |
+## Using DAGs: Restrictions in the order of mutations as extended posets {#posetslong} |
|
1027 |
+ |
|
1028 |
+ |
|
1029 |
+### AND, OR, XOR relationships {#andorxor} |
|
1030 |
+ |
|
1031 |
+The literature on oncogenetic trees, CBNs, etc, has used graphs as a way |
|
1032 |
+of showing the restrictions in the order in which mutations can |
|
1033 |
+accumulate. The meaning of "convergent arrows" in these graphs, however, |
|
1034 |
+differs. In Figure 1 of @Korsunsky2014 we are shown a simple diagram |
|
1035 |
+that illustrates the three basic different meanings of convergent arrows |
|
1036 |
+using two parental nodes. We will illustrate it here with three. Suppose |
|
1037 |
+we focus on node "g" in the following figure (we will create it shortly) |
|
1038 |
+ |
|
1039 |
+```{r, fig.height=4} |
|
1040 |
+data(examplesFitnessEffects) |
|
1041 |
+plot(examplesFitnessEffects[["cbn1"]]) |
|
1042 |
+``` |
|
1043 |
+ |
|
1044 |
+ |
|
1045 |
+* In relationships of the type used in **Conjunctive Bayesian |
|
1046 |
+ Networks (CBN)** [e.g., @Gerstung2009], we are modeling an **AND** |
|
1047 |
+ relationship, also called **CMPN** by @Korsunsky2014 or |
|
1048 |
+ **monotone** relationship by @Farahani2013. If the relationship in |
|
1049 |
+ the graph is fully respected, then "g" will only appear if all of |
|
1050 |
+ "c", "d", and "e" are already mutated. |
|
1051 |
+ |
|
1052 |
+* **Semimonotone** relationships *sensu* |
|
1053 |
+ @Farahani2013 or **DMPN** *sensu* |
|
1054 |
+ @Korsunsky2014 are **OR** relationships: "g" will appear if |
|
1055 |
+ one or more of "c", "d", or "e" are already mutated. |
|
1056 |
+ |
|
1057 |
+* **XMPN** relationships [@Korsunsky2014] are **XOR** |
|
1058 |
+ relationships: "g" will be present only if exactly one of "c", |
|
1059 |
+ "d", or "e" is present. |
|
1060 |
+ |
|
1061 |
+ |
|
1062 |
+ |
|
1063 |
+Note that oncogenetic trees [@Desper1999JCB; @Szabo2008] need not |
|
1064 |
+deal with the above distinctions, since the DAGs are trees: no node has |
|
1065 |
+more than one incoming connection or more than one parent[^5]. |
|
1066 |
+ |
|
1067 |
+[^5]: OTs and CBNs have some other technical differences about the |
|
1068 |
+underlying model they assume, such as the exponential waiting time in |
|
1069 |
+CBNs. We will not discuss them here. |
|
1070 |
+ |
|
1071 |
+ |
|
1072 |
+To have a flexible way of specifying all of these restrictions, we will |
|
1073 |
+want to be able to say what kind of dependency each child |
|
1074 |
+node has on its parents. |
|
1075 |
+ |
|
1076 |
+ |
|
1077 |
+### Fitness effects {#fitnessposets} |
|
1078 |
+ |
|
1079 |
+Those DAGs specify dependencies and, as explained in |
|
1080 |
+@Diaz-Uriarte2015, it is simple to map them to a simple evolutionary |
|
1081 |
+model: any set of mutations that does not conform to the restrictions |
|
1082 |
+encoded in the graph will have a fitness of 0. However, we might not want |
|
1083 |
+to require absolute compliance with the DAG. This means we might want to |
|
1084 |
+allow deviations from the DAG with a corresponding penalization that is, |
|
1085 |
+however, not identical to setting fitness to 0 [again, see |
|
1086 |
+@Diaz-Uriarte2015]. This we can do by being explicit about the |
|
1087 |
+fitness effects of the deviations from the restrictions encoded in the |
|
1088 |
+DAG. We will use below a column of `s` for the fitness effect when |
|
1089 |
+the restrictions are satisfied and a column of `sh` when they are |
|
1090 |
+not. (See also \@ref(numfit) for the details about the meaning of the |
|
1091 |
+fitness effects). |
|
1092 |
+ |
|
1093 |
+ |
|
1094 |
+That way of specifying fitness effects makes it also trivial to use the |
|
1095 |
+model in @Hjelm2006 where all mutations might be allowed to occur, |
|
1096 |
+but the presence of some mutations increases the probability of occurrence |
|
1097 |
+of other mutations. For example, the values of `sh` could be all |
|
1098 |
+small positive ones (or for mildly deleterious effects, small negative |
|
1099 |
+numbers), while the values of `s` are much larger positive numbers. |
|
1100 |
+ |
|
1101 |
+ |
|
1102 |
+### Extended posets |
|
1103 |
+ |
|
1104 |
+In version 1 of this package we used posets in the sense of |
|
1105 |
+@Beerenwinkel2007 and @Gerstung2009, as explained in section |
|
1106 |
+\@ref(poset) and in the help for *`poset`*. Here, we continue using |
|
1107 |
+two columns, that specify parents and children, but we add columns |
|
1108 |
+for the specific values of fitness effects (both s and sh ---i.e., |
|
1109 |
+fitness effects for what happens when restrictions are and are not |
|
1110 |
+satisfied) and for the type of dependency as explained in section |
|
1111 |
+\@ref(andorxor). |
|
1112 |
+ |
|
1113 |
+ |
|
1114 |
+ |
|
1115 |
+We can now illustrate the specification of different fitness effects |
|
1116 |
+using DAGs. |
|
1117 |
+ |
|
1118 |
+### DAGs: A first conjunction (AND) example {#cbn1} |
|
1119 |
+ |
|
1120 |
+```{r} |
|
1121 |
+ |
|
1122 |
+cs <- data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"), |
|
1123 |
+ child = c("a", "b", "d", "e", "c", "c", rep("g", 3)), |
|
1124 |
+ s = 0.1, |
|
1125 |
+ sh = -0.9, |
|
1126 |
+ typeDep = "MN") |
|
1127 |
+ |
|
1128 |
+cbn1 <- allFitnessEffects(cs) |
|
1129 |
+ |
|
1130 |
+``` |
|
1131 |
+ |
|
1132 |
+(We skip one letter, just to show that names need not be consecutive or |
|
1133 |
+have any particular order.) |
|
1134 |
+ |
|
1135 |
+ |
|
1136 |
+We can get a graphical representation using the default "graphNEL" |
|
1137 |
+```{r, fig.height=3} |
|
1138 |
+plot(cbn1) |
|
1139 |
+``` |
|
1140 |
+ |
|
1141 |
+or one using "igraph": |
|
1142 |
+```{r, fig.height=5} |
|
1143 |
+plot(cbn1, "igraph") |
|
1144 |
+``` |
|
1145 |
+ |
|
1146 |
+<!-- %% The vignette crashes if I try to use the layout. --> |
|
1147 |
+ |
|
1148 |
+Since we have a parent and children, the reingold.tilford layout is |
|
1149 |
+probably the best here, so you might want to use that: |
|
1150 |
+ |
|
1151 |
+```{r, fig.height=5} |
|
1152 |
+library(igraph) ## to make the reingold.tilford layout available |
|
1153 |
+plot(cbn1, "igraph", layout = layout.reingold.tilford) |
|
1154 |
+``` |
|
1155 |
+ |
|
1156 |
+ |
|
1157 |
+ |
|
1158 |
+And what is the fitness of all genotypes? |
|
1159 |
+ |
|
1160 |
+```{r} |
|
1161 |
+gfs <- evalAllGenotypes(cbn1, order = FALSE, addwt = TRUE) |
|
1162 |
+ |
|
1163 |
+gfs[1:15, ] |
|
1164 |
+``` |
|
1165 |
+ |
|
1166 |
+You can verify that for each genotype, if a mutation is present without |
|
1167 |
+all of its dependencies present, you get a $(1 - 0.9)$ multiplier, and you |
|
1168 |
+get a $(1 + 0.1)$ multiplier for all the rest with its direct parents |
|
1169 |
+satisfied. For example, genotypes "a", or "b", or "d", or "e" have |
|
1170 |
+fitness $(1 + 0.1)$, genotype "a, b, c" has fitness $(1 + 0.1)^3$, but |
|
1171 |
+genotype "a, c" has fitness $(1 + 0.1) (1 - 0.9) = 0.11$. |
|
1172 |
+ |
|
1173 |
+ |
|
1174 |
+ |
|
1175 |
+### DAGs: A second conjunction example {#cbn2} |
|
1176 |
+ |
|
1177 |
+ |
|
1178 |
+Let's try a first attempt at a somewhat more complex example, where the |
|
1179 |
+fitness consequences of different genes differ. |
|
1180 |
+```{r} |
|
1181 |
+ |
|
1182 |
+c1 <- data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"), |
|
1183 |
+ child = c("a", "b", "d", "e", "c", "c", rep("g", 3)), |
|
1184 |
+ s = c(0.01, 0.02, 0.03, 0.04, 0.1, 0.1, rep(0.2, 3)), |
|
1185 |
+ sh = c(rep(0, 4), c(-.1, -.2), c(-.05, -.06, -.07)), |
|
1186 |
+ typeDep = "MN") |
|
1187 |
+ |
|
1188 |
+try(fc1 <- allFitnessEffects(c1)) |
|
1189 |
+ |
|
1190 |
+``` |
|
1191 |
+If you try this, you'll get an error. There is an error because the |
|
1192 |
+"sh" varies within a child, and we do not allow that for a |
|
1193 |
+poset-type specification, as it is ambiguous. If you need arbitrary |
|
1194 |
+fitness values for arbitrary combinations of genotypes, you can |
|
1195 |
+specify them using epistatic effects as in section \@ref(epi) and |
|
1196 |
+order effects as in section \@ref(oe). |
|
1197 |
+ |
|
1198 |
+Why do we need to specify as many "s" and "sh" as there are rows (or a |
|
1199 |
+single one, that gets expanded to those many) when the "s" and "sh" |
|
1200 |
+are properties of the child node, not of the edges? Because, for ease, we |
|
1201 |
+use a data.frame. |
|
1202 |
+ |
|
1203 |
+<!-- %% (By the way, yes, we convert all factors to strings in the parent, child, --> |
|
1204 |
+<!-- %% and typeDep columns, so no need to specify `stringsAsFactor = TRUE`). --> |
|
1205 |
+ |
|
1206 |
+ |
|
1207 |
+ |
|
1208 |
+We fix the error in our specification. Notice that the "sh" is not set |
|
1209 |
+to $-1$ in these examples. If you want strict compliance with the poset |
|
1210 |
+restrictions, you should set $sh = -1$ or, better yet, $sh = -\infty$ (see |
|
1211 |
+section \@ref(noviab)), but having an $sh > -1$ will lead to fitnesses that |
|
1212 |
+are $> 0$ and, thus, is a way of modeling small deviations from the poset |
|
1213 |
+[see discussion in @Diaz-Uriarte2015]. |
|
1214 |
+ |
|
1215 |
+<!-- %% In these examples, the reason to set "sh" to values larger than $-1$ and --> |
|
1216 |
+<!-- %% different among the genes is to allow us to easily see the actual, --> |
|
1217 |
+<!-- %% different, terms that enter into the multiplication of the fitness effects --> |
|
1218 |
+<!-- %% (and, also, to make it easier to catch bugs). --> |
|
1219 |
+ |
|
1220 |
+ |
|
1221 |
+Note that for those nodes that depend only on "Root" the type of |
|
1222 |
+dependency is irrelevant. |
|
1223 |
+ |
|
1224 |
+```{r} |
|
1225 |
+c1 <- data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"), |
|
1226 |
+ child = c("a", "b", "d", "e", "c", "c", rep("g", 3)), |
|
1227 |
+ s = c(0.01, 0.02, 0.03, 0.04, 0.1, 0.1, rep(0.2, 3)), |
|
1228 |
+ sh = c(rep(0, 4), c(-.9, -.9), rep(-.95, 3)), |
|
1229 |
+ typeDep = "MN") |
|
1230 |
+ |
|
1231 |
+cbn2 <- allFitnessEffects(c1) |
|
1232 |
+ |
|
1233 |
+``` |
|
1234 |
+ |
|
1235 |
+<!-- %% We can get a graphical representation using the default "graphNEL" --> |
|
1236 |
+<!-- %% <<fig.height=3>>= --> |
|
1237 |
+<!-- %% plot(cbn2) --> |
|
1238 |
+<!-- %% @ --> |
|
1239 |
+ |
|
1240 |
+<!-- %% or one using "igraph": --> |
|
1241 |
+<!-- %% <<fig.height=5>>= --> |
|
1242 |
+<!-- %%plot(cbn2, "igraph", layout = layout.reingold.tilford) --> |
|
1243 |
+<!-- %% @ --> |
|
1244 |
+ |
|
1245 |
+<!-- %% (since this is a tree, the reingold.tilford layout is probably the best here). --> |
|
1246 |
+ |
|
1247 |
+We could get graphical representations but the figures would |
|
1248 |
+be the same as in the example in section \@ref(cbn1), since the structure |
|
1249 |
+has not changed, only the numeric values. |
|
1250 |
+ |
|
1251 |
+What is the fitness of all possible genotypes? Here, order of events |
|
1252 |
+*per se* does not matter, beyond that considered in the poset. In |
|
1253 |
+other words, the fitness of genotype "a, b, c" is the same no matter how |
|
1254 |
+we got to "a, b, c". What matters is whether or not the genes on which |
|
1255 |
+each of "a", "b", and "c" depend are present or not (I only show the first |
|
1256 |
+10 genotypes) |
|
1257 |
+ |
|
1258 |
+```{r} |
|
1259 |
+gcbn2 <- evalAllGenotypes(cbn2, order = FALSE) |
|
1260 |
+gcbn2[1:10, ] |
|
1261 |
+``` |
|
1262 |
+ |
|
1263 |
+ |
|
1264 |
+Of course, if we were to look at genotypes but taking into account order |
|
1265 |
+of occurrence of mutations, we would see no differences |
|
1266 |
+ |
|
1267 |
+```{r} |
|
1268 |
+gcbn2o <- evalAllGenotypes(cbn2, order = TRUE, max = 1956) |
|
1269 |
+gcbn2o[1:10, ] |
|
1270 |
+``` |
|
1271 |
+ |
|
1272 |
+(The $max = 1956$ is there so that we show all the genotypes, even |
|
1273 |
+if they are more than 256, the default.) |
|
1274 |
+ |
|
1275 |
+You can check the output and verify things are as they should. For instance: |
|
1276 |
+ |
|
1277 |
+```{r} |
|
1278 |
+all.equal( |
|
1279 |
+ gcbn2[c(1:21, 22, 28, 41, 44, 56, 63 ) , "Fitness"], |
|
1280 |
+ c(1.01, 1.02, 0.1, 1.03, 1.04, 0.05, |
|
1281 |
+ 1.01 * c(1.02, 0.1, 1.03, 1.04, 0.05), |
|
1282 |
+ 1.02 * c(0.10, 1.03, 1.04, 0.05), |
|
1283 |
+ 0.1 * c(1.03, 1.04, 0.05), |
|
1284 |
+ 1.03 * c(1.04, 0.05), |
|
1285 |
+ 1.04 * 0.05, |
|
1286 |
+ 1.01 * 1.02 * 1.1, |
|
1287 |
+ 1.01 * 0.1 * 0.05, |
|
1288 |
+ 1.03 * 1.04 * 0.05, |
|
1289 |
+ 1.01 * 1.02 * 1.1 * 0.05, |
|
1290 |
+ 1.03 * 1.04 * 1.2 * 0.1, ## notice this |
|
1291 |
+ 1.01 * 1.02 * 1.03 * 1.04 * 1.1 * 1.2 |
|
1292 |
+ )) |
|
1293 |
+``` |
|
1294 |
+ |
|
1295 |
+A particular one that is important to understand is genotype with |
|
1296 |
+mutated genes "c, d, e, g": |
|
1297 |
+ |
|
1298 |
+```{r} |
|
1299 |
+gcbn2[56, ] |
|
1300 |
+all.equal(gcbn2[56, "Fitness"], 1.03 * 1.04 * 1.2 * 0.10) |
|
1301 |
+``` |
|
1302 |
+ |
|