git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@105123 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progresion with Epistasis |
4 |
-Version: 1.99.0 |
|
5 |
-Date: 2015-06-16 |
|
4 |
+Version: 1.99.1 |
|
5 |
+Date: 2015-06-18 |
|
6 | 6 |
Author: Ramon Diaz-Uriarte. |
7 | 7 |
Maintainer: Ramon Diaz-Uriarte <rdiaz02@gmail.com> |
8 | 8 |
Description: Functions for forward population genetic simulation in |
... | ... |
@@ -17,6 +17,7 @@ Description: Functions for forward population genetic simulation in |
17 | 17 |
realizations of the simulations. |
18 | 18 |
biocViews: BiologicalQuestion, SomaticMutation |
19 | 19 |
License: GPL (>= 3) |
20 |
+URL: https://github.com/rdiaz02/OncoSimul, http://ligarto.org/rdiaz |
|
20 | 21 |
Depends: R (>= 3.1.0) |
21 | 22 |
Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz, gtools, igraph |
22 | 23 |
Suggests: BiocStyle, knitr, Oncotree, testthat |
... | ... |
@@ -1,3 +1,7 @@ |
1 |
+Changes in version 1.99.1 (2015-06-18): |
|
2 |
+ - Try to compile in Windoze with the SSTR again. |
|
3 |
+ - Reduce size of RData objects with resaveRdaFiles. |
|
4 |
+ |
|
1 | 5 |
Changes in version 1.99.0 (2015-06-17): |
2 | 6 |
- Many MAJOR changes: we are done moving to v.2 |
3 | 7 |
- New way of specifying restrictions (v.2) that allows |
... | ... |
@@ -448,12 +448,22 @@ std::string genotypeToIntString(const std::vector<int>& genotypeV, |
448 | 448 |
|
449 | 449 |
|
450 | 450 |
for(auto const &g : order_int) { |
451 |
+#ifdef _WIN32 |
|
452 |
+ order_part += (comma + SSTR(g)); |
|
453 |
+#endif |
|
454 |
+#ifndef _WIN32 |
|
451 | 455 |
order_part += (comma + std::to_string(g)); |
456 |
+#endif |
|
452 | 457 |
comma = ", "; |
453 | 458 |
} |
454 | 459 |
comma = ""; |
455 | 460 |
for(auto const &g : rest_int) { |
461 |
+#ifdef _WIN32 |
|
462 |
+ rest += (comma + SSTR(g)); |
|
463 |
+#endif |
|
464 |
+#ifndef _WIN32 |
|
456 | 465 |
rest += (comma + std::to_string(g)); |
466 |
+#endif |
|
457 | 467 |
comma = ", "; |
458 | 468 |
} |
459 | 469 |
if(fg.orderG.size()) { |
... | ... |
@@ -693,7 +693,12 @@ static inline void whichDrivers(int& totalPresentDrivers, |
693 | 693 |
std::string comma = ""; |
694 | 694 |
for(size_t i = 0; i < countByDriver.size(); ++i) { |
695 | 695 |
if(countByDriver[i] > 0) { |
696 |
- strDrivers += (comma + std::to_string(i + 1)); |
|
696 |
+#ifdef _WIN32 |
|
697 |
+ strDrivers += (comma + SSTR(i + 1)); |
|
698 |
+#endif |
|
699 |
+#ifndef _WIN32 |
|
700 |
+ strDrivers += (comma + std::to_string(i + 1)); //SSTR(i + 1)); |
|
701 |
+#endif |
|
697 | 702 |
comma = ", "; |
698 | 703 |
++totalPresentDrivers; |
699 | 704 |
} |
... | ... |
@@ -85,8 +85,16 @@ double ti_nextTime_tmax_2_st(const spParamsP& spP, |
85 | 85 |
|
86 | 86 |
// FIXME: should never happen |
87 | 87 |
if(spP.popSize <= 0.0) { |
88 |
+ |
|
89 |
+#ifdef _WIN32 |
|
88 | 90 |
throw std::range_error("ti: popSize <= 0. spP.popSize = " |
91 |
+ + SSTR(spP.popSize)); |
|
92 |
+#endif |
|
93 |
+ |
|
94 |
+#ifndef _WIN32 |
|
95 |
+ throw std::range_error("ti: popSize <= 0. spP.popSize = " |
|
89 | 96 |
+ std::to_string(spP.popSize)); |
97 |
+#endif |
|
90 | 98 |
} |
91 | 99 |
// long double invpop = 1/spP.popSize; |
92 | 100 |
// long double r; |
... | ... |
@@ -16,6 +16,14 @@ |
16 | 16 |
/* } */ |
17 | 17 |
|
18 | 18 |
|
19 |
+// Windows compiler in BioC is pre 4.8.0, so no to_string |
|
20 |
+// From http://stackoverflow.com/a/5590404 |
|
21 |
+#define SSTR( x ) dynamic_cast< std::ostringstream & >( \ |
|
22 |
+ ( std::ostringstream() << std::dec << x ) ).str() |
|
23 |
+ |
|
24 |
+ |
|
25 |
+ |
|
26 |
+ |
|
19 | 27 |
#ifdef DEBUGW |
20 | 28 |
#define ASSERT(x) { \ |
21 | 29 |
if (! (x)) { \ |
... | ... |
@@ -175,7 +175,7 @@ summary of some of the key features: |
175 | 175 |
on McFarland et al.\ \cite{McFarland2013}. |
176 | 176 |
|
177 | 177 |
\item Code in C++ is available (though not yet callable from R) for |
178 |
- using several other models, including the one from \cite{Beerenwinkel2007}. |
|
178 |
+ using several other models, including the one from \cite{Beerenwinkel2007b}. |
|
179 | 179 |
|
180 | 180 |
\item Simulations are generally very fast as I use C++ to implement |
181 | 181 |
the BNB algorithm. |
... | ... |
@@ -254,16 +254,6 @@ library(igraph) |
254 | 254 |
With OncoSimulR you can specify different types of effects on fitness: |
255 | 255 |
|
256 | 256 |
\begin{itemize} |
257 |
-\item General epistatic effects (e.g., section \ref{epi}), including |
|
258 |
- synthetic viability (e.g., section \ref{sv}) and synthetic |
|
259 |
- lethality/mortality (e.g., section \ref{sl}). |
|
260 |
- |
|
261 |
- |
|
262 |
-\item Effects where the order in which mutations are acquired matter, as |
|
263 |
- illustrated in section \ref{oe}. There is, in fact, empirical evidence |
|
264 |
- of these effects \cite{Ortmann2015}. For instance, the fitness of |
|
265 |
- genotype ``A, B'' would differ depending on whether A or B was acquired |
|
266 |
- first. |
|
267 | 257 |
|
268 | 258 |
\item A special type of epistatic effects that is particularly amenable to |
269 | 259 |
be represented as a graph. In this graph, having, say, ``B'' be a child |
... | ... |
@@ -271,21 +261,37 @@ With OncoSimulR you can specify different types of effects on fitness: |
271 | 261 |
is what OT \cite{Desper1999JCB, Szabo2008}, CBN \cite{Beerenwinkel2007, |
272 | 262 |
Gerstung2009, Gerstung2011}, progression networks |
273 | 263 |
\cite{FarahaniLagergren2013}, and other similar models |
274 |
- \cite{Korsunsky2014} mean. Note that this is not an order effect: the |
|
275 |
- fitness of a genotype is a function of whether or not the restrictions |
|
276 |
- in the graph are satisfied, not the historical sequence by how they were |
|
277 |
- satisfied. |
|
264 |
+ \cite{Korsunsky2014} mean. Details are provided in section |
|
265 |
+ \ref{posetslong}. Note that this is not an order effect (discussed |
|
266 |
+ below): the fitness of a genotype from this DAGs is a function of |
|
267 |
+ whether or not the restrictions in the graph are satisfied, not the |
|
268 |
+ historical sequence by how they were satisfied. |
|
269 |
+ |
|
270 |
+\item Effects where the order in which mutations are acquired matter, as |
|
271 |
+ illustrated in section \ref{oe}. There is, in fact, empirical evidence |
|
272 |
+ of these effects \cite{Ortmann2015}. For instance, the fitness of |
|
273 |
+ genotype ``A, B'' would differ depending on whether A or B was acquired |
|
274 |
+ first. |
|
278 | 275 |
|
279 |
-\item Genes that have independent effects on fitness. |
|
276 |
+ |
|
277 |
+\item General epistatic effects (e.g., section \ref{epi}), including |
|
278 |
+ synthetic viability (e.g., section \ref{sv}) and synthetic |
|
279 |
+ lethality/mortality (e.g., section \ref{sl}). |
|
280 |
+ |
|
281 |
+ |
|
282 |
+\item Genes that have independent effects on fitness (section \ref{noint}). |
|
280 | 283 |
|
281 | 284 |
\end{itemize} |
282 | 285 |
|
283 | 286 |
|
284 |
-Modules (see section \ref{modules0}) allow you to specify the above |
|
287 |
+Modules (see section \ref{modules0}) allow you to specify any of the above |
|
285 | 288 |
effects (except those for genes without interactions, as it would not make |
286 |
-sense there) in terms of modules (sets of genes), not individual genes. |
|
289 |
+sense there) in terms of modules (sets of genes), not individual genes. We |
|
290 |
+will introduce them right after \ref{posetslong}, and continue using them |
|
291 |
+thereafter. |
|
292 |
+ |
|
287 | 293 |
|
288 |
-\subsubsection{How to specify those effects}\label{howfit} |
|
294 |
+\subsubsection{How to specify fitness effects effects}\label{howfit} |
|
289 | 295 |
|
290 | 296 |
A guiding design principle of OncoSimulR is to try to make the |
291 | 297 |
specification of those effects as simple as possible but also as flexible |
... | ... |
@@ -473,11 +479,20 @@ of other mutations. For example, the values of \texttt{sh} could be all |
473 | 479 |
small positive ones (or for mildly deleterious effects, small negative |
474 | 480 |
numbers), while the values of \texttt{s} are much larger positive numbers. |
475 | 481 |
|
482 |
+\subsubsection{Extended posets} |
|
483 |
+In version 1 of this package we used posets in the sense of |
|
484 |
+\cite{Beerenwinkel2007, Gerstung2009} as explained in section \ref{poset} |
|
485 |
+and in the help for \Rfunction{poset}. Here, we continue using two |
|
486 |
+columns, that specify parents and children, but we add columns for the |
|
487 |
+specific values of fitness effects (both s and sh ---i.e., fitness effects |
|
488 |
+for what happens when restrictions are and are not satisfied) and for the |
|
489 |
+type of dependency as explained in section \ref{andorxor}. |
|
490 |
+ |
|
476 | 491 |
|
477 | 492 |
|
478 | 493 |
We can now illustrate the specification of different fitness effects. |
479 | 494 |
|
480 |
-\subsubsection{A first conjunction example}\label{cbn1} |
|
495 |
+\subsubsection{A first conjunction (AND) example}\label{cbn1} |
|
481 | 496 |
|
482 | 497 |
<<>>= |
483 | 498 |
|
... | ... |
@@ -564,9 +579,10 @@ and typeDep columns, so no need to specify \texttt{stringsAsFactor = TRUE}). |
564 | 579 |
|
565 | 580 |
We fix the error in our specification. Notice that the ``sh'' is not set |
566 | 581 |
to $-1$ in these examples. If you want strict compliance with the poset |
567 |
-restrictions, you should set $sh = -1$, but having an $sh > -1$ will lead |
|
568 |
-to fitnesses that are $> 0$ and, thus, is a way of modeling small |
|
569 |
-deviations from the poset (see discussion in \cite{Diaz-Uriarte2015}). |
|
582 |
+restrictions, you should set $sh = -1$ or, better yet, $sh = -\infty$ (see |
|
583 |
+section \ref{noviab}), but having an $sh > -1$ will lead to fitnesses that |
|
584 |
+are $> 0$ and, thus, is a way of modeling small deviations from the poset |
|
585 |
+(see discussion in \cite{Diaz-Uriarte2015}). |
|
570 | 586 |
|
571 | 587 |
In these examples, the reason to set ``sh'' to values larger than $-1$ and |
572 | 588 |
different among the genes is to allow us to easily see the actual, |
... | ... |
@@ -588,20 +604,21 @@ cbn2 <- allFitnessEffects(c1) |
588 | 604 |
|
589 | 605 |
@ |
590 | 606 |
|
591 |
-We can get a graphical representation using the default ``graphNEL'' |
|
592 |
-<<fig.height=3>>= |
|
593 |
-plot(cbn2) |
|
594 |
-@ |
|
607 |
+%% We can get a graphical representation using the default ``graphNEL'' |
|
608 |
+%% <<fig.height=3>>= |
|
609 |
+%% plot(cbn2) |
|
610 |
+%% @ |
|
595 | 611 |
|
596 |
-or one using ``igraph'': |
|
597 |
-<<fig.height=5>>= |
|
598 |
-plot(cbn2, "igraph", layout = layout.reingold.tilford) |
|
599 |
-@ |
|
612 |
+%% or one using ``igraph'': |
|
613 |
+%% <<fig.height=5>>= |
|
614 |
+%% plot(cbn2, "igraph", layout = layout.reingold.tilford) |
|
615 |
+%% @ |
|
600 | 616 |
|
601 |
-(since this is a tree, the reingold.tilford layout is probably the best here). |
|
617 |
+%% (since this is a tree, the reingold.tilford layout is probably the best here). |
|
602 | 618 |
|
603 |
-This figures, of course, are the same as in the example in section |
|
604 |
-\ref{cbn1}, since the structure has not changed, only the numeric values. |
|
619 |
+We could get graphical representations but the figures, of course, would |
|
620 |
+be the same as in the example in section \ref{cbn1}, since the structure |
|
621 |
+has not changed, only the numeric values. |
|
605 | 622 |
|
606 | 623 |
What is the fitness of all possible genotypes? Here, order of events |
607 | 624 |
\textit{per se} does not matter, beyond that considered in the poset. In |
... | ... |
@@ -779,7 +796,7 @@ plot(fp3) |
779 | 796 |
|
780 | 797 |
We can also use ``igraph'': |
781 | 798 |
|
782 |
-<<fig.height=5>>= |
|
799 |
+<<fig.height=6>>= |
|
783 | 800 |
plot(fp3, "igraph", layout.reingold.tilford) |
784 | 801 |
@ |
785 | 802 |
|
... | ... |
@@ -974,7 +991,7 @@ plot(fp4m, expandModules = TRUE) |
974 | 991 |
|
975 | 992 |
or |
976 | 993 |
|
977 |
-<<fig.height=6>>= |
|
994 |
+<<fig.height=7>>= |
|
978 | 995 |
plot(fp4m, "igraph", layout = layout.reingold.tilford, |
979 | 996 |
expandModules = TRUE) |
980 | 997 |
|
... | ... |
@@ -1284,6 +1301,7 @@ type, but each single mutant is lethal. |
1284 | 1301 |
M&M& (1 + s)\\ |
1285 | 1302 |
\hline |
1286 | 1303 |
\end{tabular} |
1304 |
+ |
|
1287 | 1305 |
where ``wt'' denotes wild type and ``M'' denotes mutant. |
1288 | 1306 |
|
1289 | 1307 |
|
... | ... |
@@ -1737,7 +1755,7 @@ fea <- allFitnessEffects(rT = p4, epistasis = epist, orderEffects = oe, |
1737 | 1755 |
|
1738 | 1756 |
How does it look? |
1739 | 1757 |
|
1740 |
-<<fig.height=5.5>>= |
|
1758 |
+<<fig.height=6.5>>= |
|
1741 | 1759 |
plot(fea) |
1742 | 1760 |
@ |
1743 | 1761 |
|
... | ... |
@@ -1749,12 +1767,12 @@ plot(fea, "igraph") |
1749 | 1767 |
|
1750 | 1768 |
|
1751 | 1769 |
We can, if we want, expand the modules using a ``graphNEL'' graph |
1752 |
-<<fig.height=5>>= |
|
1770 |
+<<fig.height=6.5>>= |
|
1753 | 1771 |
plot(fea, expandModules = TRUE) |
1754 | 1772 |
@ |
1755 | 1773 |
|
1756 | 1774 |
or an ``igraph'' one |
1757 |
-<<fig.height=7>>= |
|
1775 |
+<<fig.height=7.>>= |
|
1758 | 1776 |
plot(fea, "igraph", expandModules = TRUE) |
1759 | 1777 |
@ |
1760 | 1778 |
|
... | ... |
@@ -2249,7 +2267,7 @@ pancreatic cancer poset in Gerstung et al.\ \cite{Gerstung2011} (their |
2249 | 2267 |
figure 2B, left). We use directly the names of the genes, since that is |
2250 | 2268 |
immediately supported by the new version. |
2251 | 2269 |
|
2252 |
-<<>>= |
|
2270 |
+<<fig.width=4>>= |
|
2253 | 2271 |
|
2254 | 2272 |
pancr <- allFitnessEffects( |
2255 | 2273 |
data.frame(parent = c("Root", rep("KRAS", 4), |
... | ... |
@@ -2262,6 +2280,7 @@ pancr <- allFitnessEffects( |
2262 | 2280 |
sh = -0.9, |
2263 | 2281 |
typeDep = "MN")) |
2264 | 2282 |
|
2283 |
+plot(pancr) |
|
2265 | 2284 |
@ |
2266 | 2285 |
|
2267 | 2286 |
Of course the ``s'' and ``sh'' are set arbitrarily here. |
... | ... |
@@ -2352,6 +2371,31 @@ summary(colSums(mcfLs$Genotypes[-(1:70), ])) |
2352 | 2371 |
|
2353 | 2372 |
|
2354 | 2373 |
|
2374 |
+ |
|
2375 |
+%% <<>>= |
|
2376 |
+ |
|
2377 |
+%% nin <- 50000 |
|
2378 |
+%% ne <- 10 |
|
2379 |
+%% s <- 0.1 |
|
2380 |
+%% sj <- -0.05 |
|
2381 |
+%% nn <- sapply(c("A", "B"), function(x) paste0(seq.int(ne), x)) |
|
2382 |
+%% int <- apply(nn, 1, function(x) paste(x, collapse = " : ")) |
|
2383 |
+%% single <- as.vector(nn) |
|
2384 |
+%% epi <- c(rep(s, length(single)), rep(sj, length(int))) |
|
2385 |
+%% names(epi) <- c(single, int) |
|
2386 |
+%% ee <- allFitnessEffects(epistasis = epi, |
|
2387 |
+%% noIntGenes = rexp(nin, 20)) |
|
2388 |
+ |
|
2389 |
+ |
|
2390 |
+%% see <- oncoSimulIndiv(ee, model = "Exp", detectionDrivers = 1000, |
|
2391 |
+%% sampleEvery = 10, |
|
2392 |
+%% keepEvery = -9) |
|
2393 |
+ |
|
2394 |
+%% @ |
|
2395 |
+ |
|
2396 |
+ |
|
2397 |
+ |
|
2398 |
+ |
|
2355 | 2399 |
\subsection{Simulation with a conjunction example}\label{s-cbn1} |
2356 | 2400 |
|
2357 | 2401 |
We will simulate using the simple CBN-like restrictions of |
... | ... |
@@ -2884,7 +2928,7 @@ proposed and named before; please let me know, best if with a reference. |
2884 | 2928 |
|
2885 | 2929 |
|
2886 | 2930 |
Should we remove direct connections if there are indirect? Or, should we |
2887 |
-set \texttt{removeDirectIndirect = TRUE}? Except for zz \cite{FarahaniLagergren2013}, |
|
2931 |
+set \texttt{removeDirectIndirect = TRUE}? Except for \cite{FarahaniLagergren2013}, |
|
2888 | 2932 |
none of the DAGs I've seen in the context of CBNs, oncogenetic trees, etc, |
2889 | 2933 |
include both direct and indirect connections between nodes. If these |
2890 | 2934 |
exist, reasoning about the model can be harder. For example, with CBN (AND |
... | ... |
@@ -760,3 +760,17 @@ |
760 | 760 |
url = {http://cran.r-project.org/package=Oncotree}, |
761 | 761 |
} |
762 | 762 |
|
763 |
+@article{Beerenwinkel2007b, |
|
764 |
+author = {Beerenwinkel, Niko and Antal, Tibor and Dingli, David and Traulsen, Arne and Kinzler, Kenneth W and Velculescu, Victor E and Vogelstein, Bert and Nowak, Martin A}, |
|
765 |
+doi = {10.1371/journal.pcbi.0030225}, |
|
766 |
+issn = {1553-7358}, |
|
767 |
+journal = {PLoS computational biology}, |
|
768 |
+month = nov, |
|
769 |
+number = {11}, |
|
770 |
+pages = {e225}, |
|
771 |
+pmid = {17997597}, |
|
772 |
+title = {{Genetic progression and the waiting time to cancer.}}, |
|
773 |
+url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2065895\&tool=pmcentrez\&rendertype=abstract}, |
|
774 |
+volume = {3}, |
|
775 |
+year = {2007} |
|
776 |
+} |
... | ... |
@@ -1,15 +1,15 @@ |
1 | 1 |
\usepackage[% |
2 |
- shash={a73655b}, |
|
3 |
- lhash={a73655b1bb5d38bc7299c7c3a3ffc264f70a5ba8}, |
|
4 |
- authname={ramon diaz-uriarte (at Bufo)}, |
|
2 |
+ shash={74c0951}, |
|
3 |
+ lhash={74c095145b7ffe852573b83738a4f50628c1006b}, |
|
4 |
+ authname={Ramon Diaz-Uriarte (at Coleonyx)}, |
|
5 | 5 |
authemail={rdiaz02@gmail.com}, |
6 |
- authsdate={2015-06-17}, |
|
7 |
- authidate={2015-06-17 19:14:51 +0200}, |
|
8 |
- authudate={1434561291}, |
|
9 |
- commname={ramon diaz-uriarte (at Bufo)}, |
|
6 |
+ authsdate={2015-06-18}, |
|
7 |
+ authidate={2015-06-18 15:07:19 +0200}, |
|
8 |
+ authudate={1434632839}, |
|
9 |
+ commname={Ramon Diaz-Uriarte (at Coleonyx)}, |
|
10 | 10 |
commemail={rdiaz02@gmail.com}, |
11 |
- commsdate={2015-06-17}, |
|
12 |
- commidate={2015-06-17 19:14:51 +0200}, |
|
13 |
- commudate={1434561291}, |
|
14 |
- refnames={ (HEAD, splitfitness)} |
|
11 |
+ commsdate={2015-06-18}, |
|
12 |
+ commidate={2015-06-18 15:07:19 +0200}, |
|
13 |
+ commudate={1434632839}, |
|
14 |
+ refnames={ (HEAD, origin/master, origin/HEAD)} |
|
15 | 15 |
]{gitsetinfo} |
16 | 16 |
\ No newline at end of file |