Browse code

2.13.2

ramon diaz-uriarte (at Phelsuma) authored on 18/03/2019 11:11:47
Showing8 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.13.1
5
-Date: 2019-02-07
4
+Version: 2.13.2
5
+Date: 2019-03-18
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -18,8 +18,8 @@ Description: Functions for forward population genetic simulation in
18 18
     genes (to model mutator phenotypes). Simulations
19 19
     use continuous-time models and can include driver and passenger genes
20 20
     and modules.  Also included are functions for: simulating random DAGs
21
-    of the type found in Oncogenetic Tress, Conjunctive Bayesian Networks,
22
-    and other tumor progression models; plotting and sampling from
21
+    of the type found in Oncogenetic Trees, Conjunctive Bayesian Networks,
22
+    and other cancer progression models; plotting and sampling from
23 23
     single or multiple realizations of the simulations, including
24 24
     single-cell sampling; plotting the parent-child relationships of the
25 25
     clones; generating random fitness landscapes (Rough Mount Fuji, House
... ...
@@ -1,3 +1,8 @@
1
+Changes in version 2.13.2 (2019-03-18):
2
+	- changes in behavior of sample
3
+	  (see NEWS and https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17494) were
4
+	  leading to failures of some tests. Using RNGversion in some tests.
5
+
1 6
 Changes in version 2.13.1 (2019-02-07):
2 7
 	- bumped to one over the BioC-3.9 version
3 8
 
... ...
@@ -141,7 +141,8 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
141 141
     instance, a row "A, B" would mean the genotype with both A and B mutated.
142 142
   }
143 143
   In all cases, fitness must be \code{>= 0}. If any possible genotype is
144
-  missing, its fitness is assumed to be 1.
144
+  missing, its fitness is assumed to be 0, except for WT (if WT is
145
+    missing, its fitness is assumed to be 1 ---see examples).
145 146
 }
146 147
 
147 148
 
... ...
@@ -406,6 +407,19 @@ plotFitnessLandscape(evalAllGenotypes(fe2, order = FALSE))
406 407
 plotFitnessLandscape(fe2)
407 408
 
408 409
 
410
+###### Defaults for missing genotypes
411
+
412
+## As a two-column data frame
413
+
414
+(m8 <- data.frame(G = c("A, B, C", "B"), F = c(3, 2)))
415
+evalAllGenotypes(allFitnessEffects(genotFitness = m8), addwt = TRUE)
416
+
417
+## As a matrix 
418
+
419
+(m9 <- rbind(c(0, 1, 0, 1, 4), c(1, 0, 1, 0, 1.5)))
420
+evalAllGenotypes(allFitnessEffects(genotFitness = m9), addwt = TRUE)
421
+
422
+
409 423
 ## Reinitialize the seed
410 424
 set.seed(NULL)
411 425
 }
... ...
@@ -2,6 +2,8 @@ inittime <- Sys.time()
2 2
 cat(paste("\n Starting test.Z-all-fitness at", date(), "\n"))
3 3
 
4 4
 RNGkind("Mersenne-Twister")
5
+## in 3.6.0 change in sample: see https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17494
6
+suppressWarnings(RNGversion("3.5.1"))
5 7
 
6 8
 test_that("catches identical gene names in different modules", {
7 9
     ## long, because it uses a complex and easy to miss problem
... ...
@@ -76,11 +76,14 @@ test_that("all genes must be in geneToModule even if present in fitness", {
76 76
 })
77 77
 
78 78
 
79
-test_that("fitness and mutator effects evaluation of actual values, long example",  {
79
+test_that("fit. mut. eff. values, long ex",  {
80 80
     ## Based on "long example OK" in "test.all-fitness.R"
81 81
     ## Fitness and mutator effects are evaluated OK when modules, epist, etc,
82 82
     ## are made differently in mutator and fitness and modules even share names 
83 83
     RNGkind("Mersenne-Twister")
84
+    ## in 3.6.0 change in sample: see https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17494
85
+    suppressWarnings(RNGversion("3.5.1"))
86
+
84 87
     p4 <- data.frame(parent = c(rep("Root", 4), "A", "B", "D", "E", "C", "F"),
85 88
                      child = c("A", "B", "D", "E", "C", "C", "F", "F", "G", "G"),
86 89
                      s = c(0.01, 0.02, 0.03, 0.04, 0.1, 0.1, 0.2, 0.2, 0.3, 0.3),
... ...
@@ -107,6 +107,44 @@ test_that("genotFitness not combined with others", {
107 107
 })
108 108
 
109 109
 
110
+test_that("Missing genotypes defaults: WT 1, others 0", {
111
+    (m8 <- data.frame(G = c("A, B, C", "B"), F = c(3, 2)))
112
+    exp_m8 <- data.frame(
113
+        Genotype = c("WT", "A", "B", "C", "A, B", "A, C",
114
+                     "B, C", "A, B, C"),
115
+        Fitness = c(1, 0, 2, rep(0, 4), 3),
116
+        stringsAsFactors = FALSE)
117
+    
118
+    o_m8 <- evalAllGenotypes(allFitnessEffects(genotFitness = m8),
119
+                     addwt = TRUE)
120
+    ## as exp_m8 does not have the evalAllGenotypes attribute
121
+    expect_equivalent(o_m8, exp_m8)
122
+
123
+    
124
+
125
+    (m9 <- rbind(
126
+         c(0, 1, 1, 0, 2),
127
+         c(1, 1, 0, 0, 4),
128
+         c(1, 0, 1, 0, 1.5)
129
+     ))
130
+    
131
+    exp_m9 <- data.frame(
132
+        Genotype = c("WT", "A", "B", "C", "D",
133
+                     "A, B", "A, C", "A, D", 
134
+                     "B, C", "B, D", "C, D",
135
+                     "A, B, C", "A, B, D", "A, C, D", "B, C, D",
136
+                     "A, B, C, D"),
137
+        Fitness = c(1, rep(0, 4),
138
+                    4, 1.5, 0, 2,
139
+                    rep(0, 7)),
140
+        stringsAsFactors = FALSE)
141
+
142
+    o_m9 <- evalAllGenotypes(allFitnessEffects(genotFitness = m9), addwt = TRUE)
143
+
144
+    expect_equivalent(o_m9, exp_m9)
145
+    
146
+})
147
+
110 148
 cat(paste("\n Ending genotFitness at", date(), "\n"))
111 149
 cat(paste("  Took ", round(difftime(Sys.time(), inittime, units = "secs"), 2), "\n\n"))
112 150
 rm(inittime)
... ...
@@ -3598,12 +3598,12 @@ And you can plot the fitness landscape:
3598 3598
 plotFitnessLandscape(evalAllGenotypes(fem4))
3599 3599
 ``` 
3600 3600
 
3601
-
3602 3601
 To specify the mapping you can also use a matrix (or data frame) with
3603 3602
 $g + 1$ columns; each of the first $g$ columns contains a 1 or a 0
3604 3603
 indicating that the gene of that column is mutated or not. Column $g+ 1$
3605 3604
 contains the fitness values. And you do not even need to specify all the
3606
-genotypes (we will assume that the missing genotypes have fitness 1):
3605
+genotypes: the missing genotypes are assigned a fitness 0 ---except
3606
+for the WT genotype which, if missing, is assigned a fitness of 1:
3607 3607
 
3608 3608
 ```{r}
3609 3609
 m6 <- cbind(c(1, 1), c(1, 0), c(2, 3))
... ...
@@ -3726,7 +3726,7 @@ we use.
3726 3726
 
3727 3727
 <!-- % As we will see in the examples (e.g., see sections \@ref(e2), \@ref(e3), -->
3728 3728
 <!-- % \@ref(exlong)) OncoSimulR makes it simple to be explicit about the mapping -->
3729
-<!-- % of specific genotypes, while also using the ``how this specific effects -->
3729
+<!-- % of specific genotypes, while also using the "how this specific effects -->
3730 3730
 <!-- % modifies previous effects" logic, leading to a flexible -->
3731 3731
 <!-- % specification. This also means that in many cases the same fitness -->
3732 3732
 <!-- % effects can be specified in several different ways. -->
... ...
@@ -4203,7 +4203,7 @@ all.equal(gcbn2[56, "Fitness"], 1.03 * 1.04 * 1.2 * 0.10)
4203 4203
 ``` 
4204 4204
 
4205 4205
 where "g" is taken as if its dependencies are satisfied (as "c",
4206
-``d", and "e" are present) even when the dependencies of "c" are not
4206
+"d", and "e" are present) even when the dependencies of "c" are not
4207 4207
 satisfied (and that is why the term for "c" is 0.9).
4208 4208
 
4209 4209
 
... ...
@@ -4277,7 +4277,7 @@ gxor1 <- evalAllGenotypes(xor1, order = FALSE)
4277 4277
 
4278 4278
 Whenever "c" is present with both "a" and "b", the fitness component
4279 4279
 for "c" will be $(1 - 0.1)$. Similarly for "g" (if more than one of
4280
-``d", "e", or "c" is present, it will show as $(1 - 0.05)$). For example:
4280
+"d", "e", or "c" is present, it will show as $(1 - 0.05)$). For example:
4281 4281
 
4282 4282
 ```{r}
4283 4283
 gxor1[c(22, 41), ] 
... ...
@@ -4895,7 +4895,7 @@ But we can also use a specification where we do not use the "-". That
4895 4895
 requires a different numerical value of the interaction, because now, as
4896 4896
 we are rewriting the interaction term as genotype "A is mutant, B is
4897 4897
 mutant" the double mutant will incorporate the effects of "A mutant",
4898
-``B mutant" and "both A and B mutants". We can define a new $s_2$ that
4898
+"B mutant" and "both A and B mutants". We can define a new $s_2$ that
4899 4899
 satisfies $(1 + s_{ab}) = (1 + s_a) (1 + s_b) (1 + s_2)$ so
4900 4900
 $(1 + s_2) = (1 + s_{ab})/((1 + s_a) (1 + s_b))$ and therefore specify as:
4901 4901
 
... ...
@@ -4977,7 +4977,7 @@ evalAllGenotypes(E3A, order = FALSE, addwt = FALSE)
4977 4977
 We needed to pass the $s_{ac}$ coefficient explicitly, even if it that
4978 4978
 term was just the product. We can try to avoid using the "-", however
4979 4979
 (but we will need to do other calculations). For simplicity, I use capital
4980
-``S" in what follows where the letters differ from the previous
4980
+"S" in what follows where the letters differ from the previous
4981 4981
 specification:
4982 4982
 
4983 4983
 
... ...
@@ -5100,7 +5100,7 @@ depicted above (several genes in module A, for example) actually is one of
5100 5100
 interaction: the members of "A" are combined using an "OR" operator
5101 5101
 (i.e., the fitness consequences of having one or more genes of A mutated
5102 5102
 are the same), not just simply multiplying their fitness; similarly for
5103
-``B". This is why no interaction genes also mean no modules allowed.
5103
+"B". This is why no interaction genes also mean no modules allowed.
5104 5104
 
5105 5105
 So how do you get what you want in this case?  Enter the names of
5106 5106
 the modules in the `epistasis` component but have no term for ":"
... ...
@@ -5298,7 +5298,7 @@ evalAllGenotypes(sv, order = FALSE, addwt = TRUE, model = "Bozic")
5298 5298
 ```
5299 5299
 
5300 5300
 What gives here? The simulation code would alert you of this (see section
5301
-\@ref(ex-0-death)) in this particular case because there are ``-1",
5301
+\@ref(ex-0-death)) in this particular case because there are "-1",
5302 5302
 which might indicate that this is not what you want. The problem is that
5303 5303
 you probably want the Death rate to be infinity (the birth rate was 0, so
5304 5304
 no clone viability, when we used birth rates ---section \@ref(noviab)).
... ...
@@ -6516,14 +6516,16 @@ you need to decide:
6516 6516
 
6517 6517
 ## Can I start the simulation from a specific mutant? {#initmut}
6518 6518
 
6519
-You bet. <!-- In v.1 you can only give the initial mutant as one with a single -->
6520
-<!-- mutated gene. --> In version 2 you can specify the genotype for the
6519
+You bet. In version 2 you can specify the genotype for the
6521 6520
 initial mutant with the same flexibility as in `evalGenotype`. Here we show
6522 6521
 a couple of examples (we use the representation of the parent-child
6523 6522
 relationships ---discussed in section \@ref(phylog)--- of the clones so that
6524 6523
 you can see which clones appear, and from which, and check that we are not
6525 6524
 making mistakes).
6526 6525
 
6526
+<!-- In v.1 you can only give the initial mutant as one with a single -->
6527
+<!-- mutated gene. -->
6528
+
6527 6529
 <!-- %% o3init <- allFitnessEffects(orderEffects = c( -->
6528 6530
 <!-- %%                             "M > D > F" = 0.99, -->
6529 6531
 <!-- %%                             "D > M > F" = 0.2, -->
... ...
@@ -6660,7 +6662,7 @@ drivers, whichever comes first (if any of these happen before
6660 6662
 
6661 6663
 In the `onlyCancer = TRUE` case, what happens if we reach
6662 6664
 `finalTime` (or the population size becomes zero) before any of the
6663
-``reach cancer" conditions have been fulfilled?  The simulation will be
6665
+"reach cancer" conditions have been fulfilled?  The simulation will be
6664 6666
 repeated again, within the following limits:
6665 6667
 
6666 6668
 
... ...
@@ -6932,7 +6934,7 @@ example, suppose we have a five loci genotype and suppose that you want to
6932 6934
 stop the simulations only if genotypes "A", "B, E", or "A, B, C, D, E"
6933 6935
 reach fixation. You do not want to stop it if, say, genotype "A, B, E"
6934 6936
 reaches fixation. To specify genotypes, you prepend the genotype
6935
-combinations with a ``_,'', and that tells OncoSimulR that you want
6937
+combinations with a "_,", and that tells OncoSimulR that you want
6936 6938
 fixation of **genotypes**, not just gene combinations.
6937 6939
 
6938 6940
 An example of the differences between the mechanisms can be seen from
... ...
@@ -7039,7 +7041,7 @@ fixation. These must be successive sampling periods without interruptions
7039 7041
 (i.e., a single period when the condition is not fulfilled will set the
7040 7042
 counter to 0). This can help to exclude short, transitional, local maxima
7041 7043
 that are quickly replaced by other genotypes. (The default is 50, but this
7042
-is probably too small for ``real life'' usage).
7044
+is probably too small for "real life" usage).
7043 7045
 
7044 7046
 `fixation_min_size`: you might only want to consider fixation to have
7045 7047
 happened if a minimal size has been reached (this can help weed out local
... ...
@@ -7701,7 +7703,7 @@ streamgraph(lb1, Genotype, Y, Time, scale = "continuous",
7701 7703
 <!-- %% \verb= %>% =, the pipe operator. Something like  -->
7702 7704
 <!-- %% \begin{verbatim} -->
7703 7705
 <!-- %% streamgraph(lb1, Genotype, Y, Time, scale = "continuous",   -->
7704
-<!-- %%            offset = "zero") \%>\%                                                                                                                                sg_legend(show=TRUE, label=``Genotype: ")  -->
7706
+<!-- %%            offset = "zero") \%>\%                                                                                                                                sg_legend(show=TRUE, label="Genotype: ")  -->
7705 7707
 <!-- %% \end{verbatim} -->
7706 7708
 
7707 7709
 <!-- %% but it gives me problems with knitr, etc). -->
... ...
@@ -8312,7 +8314,7 @@ par(op)
8312 8314
 ## Parent-child relationships from multiple runs {#phylogmult}
8313 8315
 
8314 8316
 If you use `oncoSimulPop` you can store and plot the
8315
-``phylogenies" of the different runs:
8317
+"phylogenies" of the different runs:
8316 8318
 
8317 8319
 ```{r}
8318 8320
 
... ...
@@ -8759,7 +8761,7 @@ increase the computational burden by many orders of magnitude.
8759 8761
 <!-- > pruned to free computer memory.  When the simulation is completed, -->
8760 8762
 <!-- > the properties of the origination and fixation point processes may -->
8761 8763
 <!-- > be studied by 'climbing' the tree and recording the origination and -->
8762
-<!-- > fixation times of sites." -->
8764
+<!-- > fixation times of sites. -->
8763 8765
 
8764 8766
 ### `sampleEvery`, `keepPhylog`, and pruning {#prune}
8765 8767
 
... ...
@@ -9362,5 +9364,5 @@ set.seed(NULL)
9362 9364
 <!-- fill-column: 68 -->
9363 9365
 <!-- End: -->
9364 9366
 
9365
-<!--  LocalWords:  genotype Genotyping
9366
- -->
9367
+<!--  LocalWords:  genotype Genotyping -->
9368
+
... ...
@@ -1,15 +1,15 @@
1 1
 \usepackage[%
2
-		shash={fd5f72e},
3
-		lhash={fd5f72ece0484508a63cf408dbe6ab2cdb5cb5b2},
4
-		authname={Ramon Diaz-Uriarte (at Coleonyx)},
2
+		shash={501dfef},
3
+		lhash={501dfefb4ffa60570ca4dcdeebff305d9b2942fd},
4
+		authname={ramon diaz-uriarte (at Phelsuma)},
5 5
 		authemail={rdiaz02@gmail.com},
6
-		authsdate={2018-05-04},
7
-		authidate={2018-05-04 18:44:29 +0200},
8
-		authudate={1525452269},
9
-		commname={Ramon Diaz-Uriarte (at Coleonyx)},
6
+		authsdate={2019-03-18},
7
+		authidate={2019-03-18 08:53:11 +0100},
8
+		authudate={1552895591},
9
+		commname={ramon diaz-uriarte (at Phelsuma)},
10 10
 		commemail={rdiaz02@gmail.com},
11
-		commsdate={2018-05-04},
12
-		commidate={2018-05-04 18:44:29 +0200},
13
-		commudate={1525452269},
11
+		commsdate={2019-03-18},
12
+		commidate={2019-03-18 08:53:11 +0100},
13
+		commudate={1552895591},
14 14
 		refnames={ (HEAD -> master, origin/master, origin/HEAD)}
15 15
 	]{gitsetinfo}
16 16
\ No newline at end of file