Browse code

3.99.1: interventions, death with fdf, user variables

ramon diaz-uriarte (at Phelsuma) authored on 25/06/2022 14:24:13
Showing 1 changed files
... ...
@@ -29,8 +29,9 @@
29 29
 
30 30
 allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
31 31
   noIntGenes = NULL, geneToModule = NULL, drvNames = NULL,
32
-  genotFitness = NULL,  keepInput = TRUE, frequencyDependentFitness =
33
-  FALSE, frequencyType = NA)
32
+  genotFitness = NULL,  keepInput = TRUE, frequencyDependentBirth =
33
+  FALSE, frequencyDependentDeath = FALSE, frequencyDependentFitness,
34
+  frequencyType = NA, deathSpec = FALSE)
34 35
 
35 36
 allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
36 37
                    geneToModule = NULL,
... ...
@@ -75,7 +76,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
75 76
       three would have the same effects if a node that connects to root
76 77
       only connects to root).}
77 78
   }
78
-  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
79
+  This paramenter is not used if  \code{frequencyDependentBirth} is TRUE.
79 80
   }
80 81
   \item{epistasis}{
81 82
     A named numeric vector. The names identify the relationship, and the
... ...
@@ -83,7 +84,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
83 84
     genes or modules involved is separated by a ":". A negative sign
84 85
     denotes the absence of that term.
85 86
 
86
-    This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
87
+    This paramenter is not used if  \code{frequencyDependentBirth} is TRUE.
87 88
   }
88 89
   \item{orderEffects}{
89 90
     A named numeric vector, as for \code{epistasis}. A ">" separates the
... ...
@@ -91,7 +92,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
91 92
   that the relationship is satisfied when mutation U has happened before
92 93
   mutation Z.
93 94
 
94
-  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
95
+  This paramenter is not used if  \code{frequencyDependentBirth} is TRUE.
95 96
 }
96 97
 \item{noIntGenes}{
97 98
   A numeric vector (optionally named) with the fitness coefficients (or
... ...
@@ -103,7 +104,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
103 104
   Of course, avoid using potentially confusing characters in the
104 105
   names. In particular, "," and ">" are not allowed as gene names.
105 106
 
106
-  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
107
+  This paramenter is not used if  \code{frequencyDependentBirth} is TRUE.
107 108
 }
108 109
 
109 110
 \item{geneToModule}{
... ...
@@ -117,7 +118,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
117 118
   must necessarily contain, in the first position, "Root" (since the
118 119
   restriction table contains a node named "Root"). See examples below.
119 120
     
120
-  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
121
+  This paramenter is not used if  \code{frequencyDependentBirth} is TRUE.
121 122
 }
122 123
 
123 124
 \item{drvNames}{The names of genes that are considered drivers. This is
... ...
@@ -135,37 +136,39 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
135 136
 }
136 137
 
137 138
 \item{genotFitness}{A matrix or data frame that contains explicitly the
138
-  mapping of genotypes to fitness. For now, we only allow epistasis-like
139
-  relations between genes (so you cannot code order effects this way).
139
+  mapping of genotypes to birth and optionally death. For now, we only 
140
+  allow epistasis-like relations between genes (so you cannot code
141
+  order effects this way).
140 142
 
141 143
   Genotypes can be specified in two ways:
142 144
   \itemize{
143 145
     
144
-    \item As a matrix (or data frame) with g + 1 columns (where g >
145
-    1). Each of the first g columns contains a 1 or a 0 indicating that
146
+    \item As a matrix (or data frame) with g + 1 columns or g + 2 columns,
147
+	depending if death is specified or not(where g > 1). Each of the first 
148
+	g columns contains a 1 or a 0 indicating that
146 149
     the gene of that column is mutated or not. Column g+ 1 contains the
147
-    fitness values. This is, for instance, the output you will get from
150
+    birth values. This is, for instance, the output you will get from
148 151
     \code{\link{rfitness}}. If the matrix has all columns named, those
149 152
     will be used for the names of the genes. Of course, except for
150 153
     column or row names, all entries in this matrix or data frame must
151
-    be numeric, except when \code{frequencyDependentFitness} is TRUE.
152
-    In this case, last column must be character and contains fitness
154
+    be numeric, except when \code{frequencyDependentBirth} is TRUE.
155
+    In this case, last column must be character and contains birth
153 156
     equations.
154 157
     
155
-    \item As a two column data frame. The second column is fitness, and
158
+    \item As a two column data frame. The second column is birth, and
156 159
     the first column are genotypes, given as a character vector. For
157 160
     instance, a row "A, B" would mean the genotype with both A and B
158
-    mutated. If \code{frequencyDependentFitness} is TRUE both columns 
161
+    mutated. If \code{frequencyDependentBirth} is TRUE both columns 
159 162
     must be character vectors.
160 163
   }
161 164
   
162
-  When \code{frequencyDependentFitness = FALSE}, fitness must be
165
+  When \code{frequencyDependentBirth = FALSE}, fitness must be
163 166
   \code{>= 0}. If any possible genotype is missing, its fitness is
164 167
   assumed to be 0, except for WT (if WT is missing, its fitness is
165 168
   assumed to be 1 ---see examples); this also applies to
166 169
   frequency-dependent fitness.
167 170
 
168
-  In contrast, if \code{frequencyDependentFitness = TRUE}, the Fitness
171
+  In contrast, if \code{frequencyDependentBirth = TRUE}, the Fitness
169 172
   column must contain the fitness specification equations, like
170 173
   characters, using as variables the frequencies (absolute or relative)
171 174
   of the all possible genotypes. We use "f" to denote relative
... ...
@@ -198,15 +201,26 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
198 201
   internal representation. But if you want, you can set it to FALSE and
199 202
   the object will be a little bit smaller.}
200 203
 
201
-  \item{frequencyDependentFitness}{
204
+  \item{frequencyDependentBirth}{
202 205
   If FALSE, the default value, all downstream work will be realised in a
203 206
   way not related to frequency depedent fitness situations. That implies
204 207
   that fitness specifications are fixed, except death rate in case of
205 208
   McFarland model (see \code{\link{oncoSimulIndiv}} for more details). If
206 209
   TRUE, you are in a frequency dependent fitness situation, where fitness
207 210
   specification ecuations must be passed as characters at 
208
-  \code{genotFitness}.
209
-}
211
+  \code{genotFitness}.}
212
+  
213
+  \item{frequencyDependentDeath}{
214
+  If FALSE, the default value, all downstream work will be realised in a
215
+  way not related to frequency depedent fitness situations. That implies
216
+  that fitness specifications are fixed, except death rate in case of
217
+  McFarland model (see \code{\link{oncoSimulIndiv}} for more details). If
218
+  TRUE, you are in a frequency dependent fitness situation, where fitness
219
+  specification ecuations must be passed as characters at 
220
+  \code{genotFitness}.}
221
+
222
+  \item{frequencyDependentFitness}{NA.}
223
+
210 224
 
211 225
   %% \item{frequencyType}{frequencyType is a character that specify wether 
212 226
   %%   we are using absolute or relatives frequecies and can take tree values
... ...
@@ -220,6 +234,12 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
220 234
   frequencies, or "rel", for relative ones. Remember that you must to
221 235
   use "f" for relative frequency and "n" for absolute in
222 236
   \code{genoFitness}. Set to NA for non-frequency-dependent fitness. }
237
+  
238
+  \item{deathSpec}{
239
+  If FALSE, the default value, all downstream work will be realised in a
240
+  way in which we assume that death is not specified by the user in \code{genotFitness}.
241
+  If TRUE, that means that death was specified by the user.
242
+  }
223 243
 
224 244
 }
225 245
 
... ...
@@ -243,27 +263,31 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
243 263
   no direct effect on fitness, but that affect mutation rate, you MUST
244 264
   specify them in the call to \code{allFitnessEffects}, for instance as
245 265
   \code{noIntGenes} with an effect of 0. When you run the simulations in
246
-  \code{frequencyDependentFitness} = TRUE only fitness effects are
247
-  allowed, and must be codified in \code{genotFitness}.
266
+  \code{frequencyDependentBirth} = TRUE or \code{frequencyDependentDeath} = TRUE
267
+  only fitness effects are allowed, and must be codified in \code{genotFitness}.
248 268
 
249 269
   If you use \code{genotFitness} then you cannot pass modules,
250 270
   noIntgenes, epistasis, or rT. This makes sense, because using
251 271
   \code{genotFitness} is saying
252
-  "this is the mapping of genotypes to fitness. Period", so we should
272
+  "this is the mapping of genotypes to birth and maybe death. Period", so we should
253 273
   not allow further modifications from other terms. This is always the
254
-  case when \code{frequencyDependentFitness} = TRUE.
274
+  case when \code{frequencyDependentBirth} = TRUE or
275
+  \code{frequencyDependentDeath} = TRUE.
255 276
 
256 277
   If you use \code{genotFitness} you need to be careful when you use
257 278
   Bozic's model (as you get a death rate of 0).
258 279
 
259 280
 
260 281
   If you use \code{genotFitness} note that we force the WT (wildtype) to
261
-  always be 1 so fitnesses are rescaled in case of 
262
-  \code{frequencyDependentFitness = FALSE}. In contrast, when 
263
-  \code{frequencyDependentFitness = TRUE} you are free to 
264
-  determine the fitness as a function of the frequencies of the genotypes
265
-  (see \code{genotFitness} and the vignette).
282
+  always be 1 so birth rates (death rates) are rescaled in case of 
283
+  \code{frequencyDependentBirth = FALSE} (\code{frequencyDependentDeath = FALSE}).
284
+  In contrast, when \code{frequencyDependentBirth = TRUE} (\code{frequencyDependentDeath = TRUE})
285
+  you are free to determine the birth rate (death rate) as a function of the frequencies of the
286
+  genotypes (see \code{genotFitness} and the vignette).
287
+
266 288
 
289
+  When using \code{genotFitness}, any genotype with a fitness <= 1e-9 is removed from the table of genotypes, thus 
290
+    making it a non-viable genotype during simulations. 
267 291
 }
268 292
 
269 293
 
... ...
@@ -320,18 +344,30 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
320 344
   frequency variables necessary for the C++ code. The "fvars".
321 345
 }
322 346
 
323
-  \item{frequencyDependentFitness}{TRUE or FALSE as we have explained
347
+  \item{frequencyDependentBirth}{TRUE or FALSE as we have explained
348
+  before.
349
+  }
350
+  
351
+  \item{frequencyDependentDeath}{TRUE or FALSE as we have explained
324 352
   before.
325 353
   }
326 354
   
355
+  \item{frequencyDependentFitness}{DEPRECATED. Use instead of \code{frequencyDependentFitness} for
356
+  old nomenclature.
357
+  }
358
+  
327 359
   \item{frequencyType}{A character string "abs" or "rel" (or NULL).
328 360
   }
329
-
330
-   \item{full_FDF_spec}{For frequency-dependent fitness, a complete
361
+	
362
+  \item{deathSpec}{TRUE or FALSE as we have explained
363
+  before.
364
+  }
365
+  
366
+   \item{full_FDF_spec}{For frequency-dependent birth (death), a complete
331 367
   data frame showing the genotypes (as matrix, letters, and "fvars") and the
332
-  fitness specification, in terms of the original specification
333
-  (Fitness_as_letters) and with genotypes mapped to numbers according to
334
-  the "fvars" (Fitness_as_fvars). If fitness was originally specified in
368
+  birth (death) specification, in terms of the original specification
369
+  (Birth_as_letters (Death_as_letters) and with genotypes mapped to numbers according to
370
+  the "fvars" (Birth_as_fvars (Death_as_fvars)). If birth (death) was originally specified in
335 371
   terms of numbers, these two columns will be identical. All the
336 372
   information in this data frame is implicitly above, but this
337 373
   simplifies checking that you are doing what you think you are doing.
... ...
@@ -545,17 +581,17 @@ evalAllGenotypes(allFitnessEffects(genotFitness = m9),
545 581
                  addwt = TRUE)
546 582
 
547 583
 
548
-#########  Frequency Dependent Fitness
584
+#########  Frequency Dependent Birth
549 585
 genofit <- data.frame(A = c(0, 1, 0, 1),
550 586
                       B = c(0, 0, 1, 1),
551
-                      Fitness = c("max(3, 2*f_)",
587
+                      Birth = c("max(3, 2*f_)",
552 588
                                   "max(1.5, 3*(f_ + f_1))",
553 589
                                   "max(2, 3*(f_ + f_2))",
554 590
                                   "max(2, 5*f_ - 0.5*( f_1 + f_2) + 15*f_1_2)"),
555 591
                       stringsAsFactors = FALSE)
556 592
 
557 593
 afe <- allFitnessEffects(genotFitness = genofit,
558
-                         frequencyDependentFitness = TRUE,
594
+                         frequencyDependentBirth = TRUE,
559 595
                          frequencyType = "rel")
560 596
                          
561 597
 ##Ploting fitness landscape in case of spPopSizes = c(5000, 2500, 3000, 7500)
Browse code

version 2.99.1: frequency-dependent fitness functionality

ramon diaz-uriarte (at Phelsuma) authored on 10/12/2020 11:41:53
Showing 1 changed files
... ...
@@ -16,6 +16,11 @@
16 16
   mapping of genes to modules, return the complete specification of how
17 17
   mutations affect the mutation rate.
18 18
   
19
+  This function can be used also to produce the fitness specification
20
+  needed to run simulations in a frequency dependent fitness way. In that
21
+  situation we presume that the effects must be considered as fitness effects
22
+  and never as mutator effects (see \code{details} for more info).
23
+
19 24
   The output of these functions is not intended for user consumption,
20 25
   but as a way of preparing data to be sent to the C++ code.  }
21 26
 
... ...
@@ -24,7 +29,8 @@
24 29
 
25 30
 allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
26 31
   noIntGenes = NULL, geneToModule = NULL, drvNames = NULL,
27
-  genotFitness = NULL,  keepInput = TRUE)
32
+  genotFitness = NULL,  keepInput = TRUE, frequencyDependentFitness =
33
+  FALSE, frequencyType = NA)
28 34
 
29 35
 allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
30 36
                    geneToModule = NULL,
... ...
@@ -69,18 +75,23 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
69 75
       three would have the same effects if a node that connects to root
70 76
       only connects to root).}
71 77
   }
78
+  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
72 79
   }
73 80
   \item{epistasis}{
74 81
     A named numeric vector. The names identify the relationship, and the
75 82
     numeric value is the fitness (or mutator) effect. For the names, each of the
76 83
     genes or modules involved is separated by a ":". A negative sign
77 84
     denotes the absence of that term.
85
+
86
+    This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
78 87
   }
79 88
   \item{orderEffects}{
80 89
     A named numeric vector, as for \code{epistasis}. A ">" separates the
81 90
   names of the genes of modules of a relationship, so that "U > Z" means
82 91
   that the relationship is satisfied when mutation U has happened before
83 92
   mutation Z.
93
+
94
+  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
84 95
 }
85 96
 \item{noIntGenes}{
86 97
   A numeric vector (optionally named) with the fitness coefficients (or
... ...
@@ -91,6 +102,8 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
91 102
 
92 103
   Of course, avoid using potentially confusing characters in the
93 104
   names. In particular, "," and ">" are not allowed as gene names.
105
+
106
+  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
94 107
 }
95 108
 
96 109
 \item{geneToModule}{
... ...
@@ -103,8 +116,9 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
103 116
   argument, and you used a restriction table, the \code{geneToModule} 
104 117
   must necessarily contain, in the first position, "Root" (since the
105 118
   restriction table contains a node named "Root"). See examples below.
106
-}
107 119
     
120
+  This paramenter is not used if  \code{frequencyDependentFitness} is TRUE.
121
+}
108 122
 
109 123
 \item{drvNames}{The names of genes that are considered drivers. This is
110 124
   only used for: a) deciding when to stop the simulations, in case you
... ...
@@ -134,16 +148,47 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
134 148
     \code{\link{rfitness}}. If the matrix has all columns named, those
135 149
     will be used for the names of the genes. Of course, except for
136 150
     column or row names, all entries in this matrix or data frame must
137
-    be numeric.
151
+    be numeric, except when \code{frequencyDependentFitness} is TRUE.
152
+    In this case, last column must be character and contains fitness
153
+    equations.
138 154
     
139 155
     \item As a two column data frame. The second column is fitness, and
140 156
     the first column are genotypes, given as a character vector. For
141
-    instance, a row "A, B" would mean the genotype with both A and B mutated.
157
+    instance, a row "A, B" would mean the genotype with both A and B
158
+    mutated. If \code{frequencyDependentFitness} is TRUE both columns 
159
+    must be character vectors.
160
+  }
161
+  
162
+  When \code{frequencyDependentFitness = FALSE}, fitness must be
163
+  \code{>= 0}. If any possible genotype is missing, its fitness is
164
+  assumed to be 0, except for WT (if WT is missing, its fitness is
165
+  assumed to be 1 ---see examples); this also applies to
166
+  frequency-dependent fitness.
167
+
168
+  In contrast, if \code{frequencyDependentFitness = TRUE}, the Fitness
169
+  column must contain the fitness specification equations, like
170
+  characters, using as variables the frequencies (absolute or relative)
171
+  of the all possible genotypes. We use "f" to denote relative
172
+  frecuencies and "n" for absolute. Letter "N" (UPPER CASE) is reserved
173
+  to denote total population size, thus f=n/N for each possible
174
+  genotype.  Relative frequency variables must be f_ for wild type, f_1
175
+  or f_A if first gene is mutated, f_2 or f_B if is the case for the
176
+  second one, f_1_2 or f_A_B, if both the first and second genes are
177
+  mutated, and so on. For anything beyond the trivially simple, using
178
+  letters (not numbers) is strongly recommended. Note also that you need
179
+  not specify the fitness of every genotype (those missing are assumed
180
+  to have a fitness of 0), nor do you need to pass the WT genotype. See
181
+  the vignette for many examples.
182
+
183
+  If we want to use absolute numbers (absolute frequencies), just
184
+  subtitute "f" for "n". The choice between relative or absolute
185
+  frequencies may be specified also in \code{frequencyType} or, if using
186
+  the default (auto) it can be automatically inferred.
187
+
188
+  Mathematical operations and symbols allowed are described in the
189
+  documentation of C++'s library ExprTk that is used to parse and
190
+  evaluate the fitness equations (see references for more information).
142 191
   }
143
-  In all cases, fitness must be \code{>= 0}. If any possible genotype is
144
-  missing, its fitness is assumed to be 0, except for WT (if WT is
145
-    missing, its fitness is assumed to be 1 ---see examples).
146
-}
147 192
 
148 193
 
149 194
 \item{keepInput}{
... ...
@@ -152,14 +197,39 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
152 197
   decode, say, the restriction table from the data frame than from the
153 198
   internal representation. But if you want, you can set it to FALSE and
154 199
   the object will be a little bit smaller.}
200
+
201
+  \item{frequencyDependentFitness}{
202
+  If FALSE, the default value, all downstream work will be realised in a
203
+  way not related to frequency depedent fitness situations. That implies
204
+  that fitness specifications are fixed, except death rate in case of
205
+  McFarland model (see \code{\link{oncoSimulIndiv}} for more details). If
206
+  TRUE, you are in a frequency dependent fitness situation, where fitness
207
+  specification ecuations must be passed as characters at 
208
+  \code{genotFitness}.
209
+}
210
+
211
+  %% \item{frequencyType}{frequencyType is a character that specify wether 
212
+  %%   we are using absolute or relatives frequecies and can take tree values
213
+  %%   depending on \code{frequencyDependentFitness}. When FALSE is 
214
+  %%   "unemployed" and for TRUE we can use "abs", for absolute frequencies,
215
+  %%   or "rel", for relative ones. Remember that you must to use "f" for
216
+  %% relative frequency and "n" for absolute in \code{genoFitness}.
217
+  \item{frequencyType}{frequencyType is a character that specify whether
218
+  we are using absolute or relatives frequecies and can take tree values
219
+  depending on \code{frequencyDependentFitness}. Use "abs", for absolute
220
+  frequencies, or "rel", for relative ones. Remember that you must to
221
+  use "f" for relative frequency and "n" for absolute in
222
+  \code{genoFitness}. Set to NA for non-frequency-dependent fitness. }
223
+
155 224
 }
156 225
 
157 226
 \details{
158 227
   \code{allFitnessEffects} is used for extremely flexible specification of fitness
159 228
   and mutator effects, including posets, XOR relationships, synthetic mortality and
160 229
   synthetic viability, arbitrary forms of epistatis, arbitrary forms of
161
-  order effects, etc. Please, see the vignette for detailed and
162
-  commented examples.
230
+  order effects, etc. \code{allFitnessEffects} produce the output necessary
231
+  to pass to the C++ code the fitness/mutator specifications to run simulations.
232
+  Please, see the vignette for detailed and commented examples.
163 233
 
164 234
   \code{allMutatorEffects} provide the same flexibility, but without
165 235
   order and posets (this might be included in the future, but I have
... ...
@@ -172,22 +242,27 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
172 242
   \code{allFitnessEffects} object. If you want to have genes that have
173 243
   no direct effect on fitness, but that affect mutation rate, you MUST
174 244
   specify them in the call to \code{allFitnessEffects}, for instance as
175
-  \code{noIntGenes} with an effect of 0.
176
-
245
+  \code{noIntGenes} with an effect of 0. When you run the simulations in
246
+  \code{frequencyDependentFitness} = TRUE only fitness effects are
247
+  allowed, and must be codified in \code{genotFitness}.
177 248
 
178 249
   If you use \code{genotFitness} then you cannot pass modules,
179 250
   noIntgenes, epistasis, or rT. This makes sense, because using
180 251
   \code{genotFitness} is saying
181 252
   "this is the mapping of genotypes to fitness. Period", so we should
182
-  not allow further modifications from other terms.
253
+  not allow further modifications from other terms. This is always the
254
+  case when \code{frequencyDependentFitness} = TRUE.
183 255
 
184 256
   If you use \code{genotFitness} you need to be careful when you use
185 257
   Bozic's model (as you get a death rate of 0).
186 258
 
187 259
 
188 260
   If you use \code{genotFitness} note that we force the WT (wildtype) to
189
-  always be 1 so fitnesses are rescaled. 
190
-
261
+  always be 1 so fitnesses are rescaled in case of 
262
+  \code{frequencyDependentFitness = FALSE}. In contrast, when 
263
+  \code{frequencyDependentFitness = TRUE} you are free to 
264
+  determine the fitness as a function of the frequencies of the genotypes
265
+  (see \code{genotFitness} and the vignette).
191 266
 
192 267
 }
193 268
 
... ...
@@ -226,18 +301,58 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
226 301
 
227 302
   \item{noIntGenes}{If \code{keepInput} is TRUE, the original 
228 303
     noIntGenes.}
304
+
305
+  \item{fitnessLandscape}{A data.frame that contains number of genes + 1 columns,
306
+  where the first columns are the genes (1 if mutated and 0 if not) and the last
307
+  one contains the fitnesses.
308
+  }
309
+
310
+  \item{fitnessLandscape_df}{A data.frame with the same information of \code{fitnessLandscape},
311
+  but in this case ther are only two columns: Genotype, that has genotypes as vectors
312
+  codified as characters, and Fitness.
313
+  }
314
+
315
+  \item{fitnessLandscape_gene_id}{A data.frame with two columns (Gene and GeneNumID),
316
+  that map by rows genes as letters (Gene) with genes as numbers (GeneNumID).
317
+  }
318
+
319
+  \item{fitnessLandscapeVariables}{A character vector that contains the
320
+  frequency variables necessary for the C++ code. The "fvars".
229 321
 }
322
+
323
+  \item{frequencyDependentFitness}{TRUE or FALSE as we have explained
324
+  before.
325
+  }
326
+  
327
+  \item{frequencyType}{A character string "abs" or "rel" (or NULL).
328
+  }
329
+
330
+   \item{full_FDF_spec}{For frequency-dependent fitness, a complete
331
+  data frame showing the genotypes (as matrix, letters, and "fvars") and the
332
+  fitness specification, in terms of the original specification
333
+  (Fitness_as_letters) and with genotypes mapped to numbers according to
334
+  the "fvars" (Fitness_as_fvars). If fitness was originally specified in
335
+  terms of numbers, these two columns will be identical. All the
336
+  information in this data frame is implicitly above, but this
337
+  simplifies checking that you are doing what you think you are doing.
338
+  }
339
+
340
+}
341
+
230 342
 \references{
231 343
     Diaz-Uriarte, R. (2015). Identifying restrictions in the order of
232 344
   accumulation of mutations during tumor progression: effects of
233 345
   passengers, evolutionary models, and sampling
234
-  \url{http://www.biomedcentral.com/1471-2105/16/41/abstract}
346
+  \url{http://www.biomedcentral.com/1471-2105/16/41/abstract}.
235 347
 
236 348
     McFarland, C.~D. et al. (2013). Impact of deleterious passenger
237 349
   mutations on cancer progression.  \emph{Proceedings of the National
238 350
   Academy of Sciences of the United States of America\/}, \bold{110}(8),
239 351
   2910--5.
240 352
 
353
+    Partow, A. ExprTk: C++ Mathematical Expression Library (MIT Open
354
+    Souce License). \url{http://www.partow.net/programming/exprtk/}.
355
+
241 356
 
242 357
 }
243 358
 
... ...
@@ -255,7 +370,15 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
255 370
   characters because you know those characters have special meanings to
256 371
   separate names or indicate epistasis or order relationships.  Right
257 372
   now, using those characters as names is caught (and result in
258
-  stopping) if passed as names for noIntGenes.  }
373
+  stopping) if passed as names for noIntGenes.
374
+
375
+  At the moment, the variables you need to specify in the fitness
376
+  equations when you are in a frequency dependent fitness situation are
377
+  fixed as we have explained in \code{genotFitness}. Perhaps using
378
+  different and strange combinations of "f_" or "n_" followed by letters
379
+  and numbers you could confuse the R parser, but never the C++ one. For
380
+  a correct performance please be aware of this.
381
+  }
259 382
 
260 383
 
261 384
 
... ...
@@ -265,7 +388,9 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
265 388
 
266 389
 \seealso{
267 390
   
268
-  \code{\link{evalGenotype}}, \code{\link{oncoSimulIndiv}},
391
+  \code{\link{evalGenotype}},
392
+  \code{\link{evalAllGenotypes}},
393
+  \code{\link{oncoSimulIndiv}},
269 394
   \code{\link{plot.fitnessEffects}},
270 395
   \code{\link{evalGenotypeFitAndMut}},
271 396
   \code{\link{rfitness}},
... ...
@@ -394,7 +519,7 @@ evalAllGenotypes(fem6, addwt = TRUE, order = FALSE)
394 519
 ## Plotting a fitness landscape
395 520
 
396 521
 fe2 <- allFitnessEffects(noIntGenes =
397
-                         c(a1 = 0.1, 
522
+                         c(a1 = 0.1,
398 523
                            b1 = 0.01,
399 524
                            c1 = 0.3))
400 525
 
... ...
@@ -407,18 +532,34 @@ plotFitnessLandscape(evalAllGenotypes(fe2, order = FALSE))
407 532
 plotFitnessLandscape(fe2)
408 533
 
409 534
 
410
-###### Defaults for missing genotypes
411 535
 
536
+###### Defaults for missing genotypes
412 537
 ## As a two-column data frame
413
-
414 538
 (m8 <- data.frame(G = c("A, B, C", "B"), F = c(3, 2)))
415
-evalAllGenotypes(allFitnessEffects(genotFitness = m8), addwt = TRUE)
539
+evalAllGenotypes(allFitnessEffects(genotFitness = m8),
540
+                 addwt = TRUE)
416 541
 
417 542
 ## As a matrix 
418
-
419 543
 (m9 <- rbind(c(0, 1, 0, 1, 4), c(1, 0, 1, 0, 1.5)))
420
-evalAllGenotypes(allFitnessEffects(genotFitness = m9), addwt = TRUE)
421
-
544
+evalAllGenotypes(allFitnessEffects(genotFitness = m9),
545
+                 addwt = TRUE)
546
+
547
+
548
+#########  Frequency Dependent Fitness
549
+genofit <- data.frame(A = c(0, 1, 0, 1),
550
+                      B = c(0, 0, 1, 1),
551
+                      Fitness = c("max(3, 2*f_)",
552
+                                  "max(1.5, 3*(f_ + f_1))",
553
+                                  "max(2, 3*(f_ + f_2))",
554
+                                  "max(2, 5*f_ - 0.5*( f_1 + f_2) + 15*f_1_2)"),
555
+                      stringsAsFactors = FALSE)
556
+
557
+afe <- allFitnessEffects(genotFitness = genofit,
558
+                         frequencyDependentFitness = TRUE,
559
+                         frequencyType = "rel")
560
+                         
561
+##Ploting fitness landscape in case of spPopSizes = c(5000, 2500, 3000, 7500)
562
+plotFitnessLandscape(evalAllGenotypes(afe, spPopSizes = c(5000, 2500, 3000, 7500)))
422 563
 
423 564
 ## Reinitialize the seed
424 565
 set.seed(NULL)
Browse code

2.13.2

ramon diaz-uriarte (at Phelsuma) authored on 18/03/2019 11:11:47
Showing 1 changed files
... ...
@@ -141,7 +141,8 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
141 141
     instance, a row "A, B" would mean the genotype with both A and B mutated.
142 142
   }
143 143
   In all cases, fitness must be \code{>= 0}. If any possible genotype is
144
-  missing, its fitness is assumed to be 1.
144
+  missing, its fitness is assumed to be 0, except for WT (if WT is
145
+    missing, its fitness is assumed to be 1 ---see examples).
145 146
 }
146 147
 
147 148
 
... ...
@@ -406,6 +407,19 @@ plotFitnessLandscape(evalAllGenotypes(fe2, order = FALSE))
406 407
 plotFitnessLandscape(fe2)
407 408
 
408 409
 
410
+###### Defaults for missing genotypes
411
+
412
+## As a two-column data frame
413
+
414
+(m8 <- data.frame(G = c("A, B, C", "B"), F = c(3, 2)))
415
+evalAllGenotypes(allFitnessEffects(genotFitness = m8), addwt = TRUE)
416
+
417
+## As a matrix 
418
+
419
+(m9 <- rbind(c(0, 1, 0, 1, 4), c(1, 0, 1, 0, 1.5)))
420
+evalAllGenotypes(allFitnessEffects(genotFitness = m9), addwt = TRUE)
421
+
422
+
409 423
 ## Reinitialize the seed
410 424
 set.seed(NULL)
411 425
 }
Browse code

2.5.8; handle trivial cases in genotFitness

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@125266 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 17/12/2016 12:59:03
Showing 1 changed files
... ...
@@ -127,12 +127,14 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
127 127
   Genotypes can be specified in two ways:
128 128
   \itemize{
129 129
     
130
-    \item As a matrix (or data frame) with g + 1 columns. Each of the
131
-    first g columns contains a 1 or a 0 indicating that the gene of that
132
-    column is mutated or not. Column g+ 1 contains the fitness
133
-    values. This is, for instance, the output you will get from
130
+    \item As a matrix (or data frame) with g + 1 columns (where g >
131
+    1). Each of the first g columns contains a 1 or a 0 indicating that
132
+    the gene of that column is mutated or not. Column g+ 1 contains the
133
+    fitness values. This is, for instance, the output you will get from
134 134
     \code{\link{rfitness}}. If the matrix has all columns named, those
135
-    will be used for the names of the genes.
135
+    will be used for the names of the genes. Of course, except for
136
+    column or row names, all entries in this matrix or data frame must
137
+    be numeric.
136 138
     
137 139
     \item As a two column data frame. The second column is fitness, and
138 140
     the first column are genotypes, given as a character vector. For
Browse code

v.2.5.1. \n - much faster accessible genots \n - AND of drivers and size \n - fixation \n - doc. improvements

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 13/11/2016 09:46:27
Showing 1 changed files
... ...
@@ -114,7 +114,7 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
114 114
   specifiy anything if you do not want to, and you can pass an empty
115 115
   vector (as \code{character(0)}). The default has changed with respect
116 116
   to v.2.1.3 and previous: it used to be to assume that all
117
-  genes that were not in the \code{noIntGenes} were drivers. The fault
117
+  genes that were not in the \code{noIntGenes} were drivers. The default
118 118
   now is to assume nothing: if you want \code{drvNames} you have
119 119
   to specify them.
120 120
 
Browse code

v.2.3.7; detectionProb mechanism;\n documentation enhancements;\n more tests

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@119150 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 05/07/2016 15:59:56
Showing 1 changed files
... ...
@@ -127,10 +127,12 @@ allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
127 127
   Genotypes can be specified in two ways:
128 128
   \itemize{
129 129
     
130
-    \item As a matrix (or data frame) with g + 1 columns. Each of the first g columns contains a 1 or a 0 indicating that the
131
-    gene of that column is mutated or not. Column g+ 1 contains the
132
-    fitness values. This is, for instance, the output you will get from
133
-    \code{\link{rfitness}}.
130
+    \item As a matrix (or data frame) with g + 1 columns. Each of the
131
+    first g columns contains a 1 or a 0 indicating that the gene of that
132
+    column is mutated or not. Column g+ 1 contains the fitness
133
+    values. This is, for instance, the output you will get from
134
+    \code{\link{rfitness}}. If the matrix has all columns named, those
135
+    will be used for the names of the genes.
134 136
     
135 137
     \item As a two column data frame. The second column is fitness, and
136 138
     the first column are genotypes, given as a character vector. For
Browse code

v.2.3.5; to_Magellan

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@118942 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 25/06/2016 00:53:22
Showing 1 changed files
... ...
@@ -389,9 +389,9 @@ evalAllGenotypes(fem6, addwt = TRUE, order = FALSE)
389 389
 ## Plotting a fitness landscape
390 390
 
391 391
 fe2 <- allFitnessEffects(noIntGenes =
392
-                         c(a1 = 0.1, a2 = 0.2,
393
-                           b1 = 0.01, b2 = 0.3, b3 = 0.2,
394
-                           c1 = 0.3, c2 = -0.2))
392
+                         c(a1 = 0.1, 
393
+                           b1 = 0.01,
394
+                           c1 = 0.3))
395 395
 
396 396
 plot(evalAllGenotypes(fe2, order = FALSE))
397 397
 
Browse code

v. 2.3.3.\n mutator, fitness landscapes, rfitness, and many other changes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@118909 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 23/06/2016 16:43:51
Showing 1 changed files
... ...
@@ -1,23 +1,38 @@
1 1
 \name{allFitnessEffects}
2 2
 \alias{allFitnessEffects}
3
-\title{Create fitness effects specification from restrictions,
4
-  epistasis, and order effects.
5
-}
3
+\alias{allMutatorEffects}
4
+
5
+\title{Create fitness and mutation effects specification from
6
+  restrictions, epistasis, and order effects.  }
7
+
6 8
 \description{
7 9
   Given one or more of a set of poset restrictions, epistatic
8 10
   interactions, order effects, and genes without interactions, as well
9 11
   as, optionally, a mapping of genes to modules, return the complete
10 12
   fitness specification.
11 13
 
12
-  The output of this function is not intended for user consumption, but
13
-  as a way of preparing data to be sent to the C++ code.  }
14
+  For mutator effects, given one or more of a set of epistatic
15
+  interactions and genes without interactions, as well as, optionally, a
16
+  mapping of genes to modules, return the complete specification of how
17
+  mutations affect the mutation rate.
18
+  
19
+  The output of these functions is not intended for user consumption,
20
+  but as a way of preparing data to be sent to the C++ code.  }
14 21
 
15 22
 
16 23
 \usage{
17 24
 
18 25
 allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
19
-  noIntGenes = NULL, geneToModule = NULL, drvNames = NULL, keepInput =
20
-  TRUE) }
26
+  noIntGenes = NULL, geneToModule = NULL, drvNames = NULL,
27
+  genotFitness = NULL,  keepInput = TRUE)
28
+
29
+allMutatorEffects(epistasis = NULL, noIntGenes = NULL,
30
+                   geneToModule = NULL,
31
+                  keepInput =  TRUE)
32
+}
33
+
34
+
35
+
21 36
 
22 37
 \arguments{
23 38
   \item{rT}{A restriction table that is an extended version of a poset 
... ...
@@ -49,7 +64,6 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
49 64
 	  relationship to be satisfied. Specify it as "XOR" or "xmpn" or
50 65
 	  "XMPN".}
51 66
       }
52
-
53 67
       In addition, for the nodes that depend only on the root node, you
54 68
       can use "--" or "-" if you want (though using any of the other
55 69
       three would have the same effects if a node that connects to root
... ...
@@ -58,7 +72,7 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
58 72
   }
59 73
   \item{epistasis}{
60 74
     A named numeric vector. The names identify the relationship, and the
61
-    numeric value is the fitness effect. For the names, each of the
75
+    numeric value is the fitness (or mutator) effect. For the names, each of the
62 76
     genes or modules involved is separated by a ":". A negative sign
63 77
     denotes the absence of that term.
64 78
   }
... ...
@@ -69,7 +83,8 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
69 83
   mutation Z.
70 84
 }
71 85
 \item{noIntGenes}{
72
-  A numeric vector (optionally named) with the fitness coefficients of genes
86
+  A numeric vector (optionally named) with the fitness coefficients (or
87
+      mutator multiplier factor) of genes
73 88
   (only genes, not modules) that show no interactions. These genes
74 89
   cannot be part of modules. But you can specify modules that have
75 90
   no epistatic interactions. See examples and vignette.
... ...
@@ -97,8 +112,34 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
97 112
   \code{\link{oncoSimulIndiv}}); b) for summarization purposes (e.g.,
98 113
   how many drivers are mutated); c) in figures. But you need not
99 114
   specifiy anything if you do not want to, and you can pass an empty
100
-  vector (as \code{character(0)}). The default is to assume that all
101
-  genes that are not in the \code{noIntGenes} are drivers.}
115
+  vector (as \code{character(0)}). The default has changed with respect
116
+  to v.2.1.3 and previous: it used to be to assume that all
117
+  genes that were not in the \code{noIntGenes} were drivers. The fault
118
+  now is to assume nothing: if you want \code{drvNames} you have
119
+  to specify them.
120
+
121
+}
122
+
123
+\item{genotFitness}{A matrix or data frame that contains explicitly the
124
+  mapping of genotypes to fitness. For now, we only allow epistasis-like
125
+  relations between genes (so you cannot code order effects this way).
126
+
127
+  Genotypes can be specified in two ways:
128
+  \itemize{
129
+    
130
+    \item As a matrix (or data frame) with g + 1 columns. Each of the first g columns contains a 1 or a 0 indicating that the
131
+    gene of that column is mutated or not. Column g+ 1 contains the
132
+    fitness values. This is, for instance, the output you will get from
133
+    \code{\link{rfitness}}.
134
+    
135
+    \item As a two column data frame. The second column is fitness, and
136
+    the first column are genotypes, given as a character vector. For
137
+    instance, a row "A, B" would mean the genotype with both A and B mutated.
138
+  }
139
+  In all cases, fitness must be \code{>= 0}. If any possible genotype is
140
+  missing, its fitness is assumed to be 1.
141
+}
142
+
102 143
 
103 144
 \item{keepInput}{
104 145
   If TRUE, whether to keep the original input. This is only useful for
... ...
@@ -107,17 +148,50 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
107 148
   internal representation. But if you want, you can set it to FALSE and
108 149
   the object will be a little bit smaller.}
109 150
 }
151
+
110 152
 \details{
111
-  This function is used for extremely flexible specification of fitness
112
-  effects, including posets, XOR relationships, synthetic mortality and
153
+  \code{allFitnessEffects} is used for extremely flexible specification of fitness
154
+  and mutator effects, including posets, XOR relationships, synthetic mortality and
113 155
   synthetic viability, arbitrary forms of epistatis, arbitrary forms of
114 156
   order effects, etc. Please, see the vignette for detailed and
115 157
   commented examples.
158
+
159
+  \code{allMutatorEffects} provide the same flexibility, but without
160
+  order and posets (this might be included in the future, but I have
161
+  seen no empirical or theoretical argument for their existence or
162
+  relevance as of now, so I do not add them to minimize unneeded complexity).
163
+
164
+  If you use both for simulations in the same call to, say,
165
+  \code{\link{oncoSimulIndiv}}, all the genes specified in
166
+  \code{allMutatorEffects} MUST be included in the
167
+  \code{allFitnessEffects} object. If you want to have genes that have
168
+  no direct effect on fitness, but that affect mutation rate, you MUST
169
+  specify them in the call to \code{allFitnessEffects}, for instance as
170
+  \code{noIntGenes} with an effect of 0.
171
+
172
+
173
+  If you use \code{genotFitness} then you cannot pass modules,
174
+  noIntgenes, epistasis, or rT. This makes sense, because using
175
+  \code{genotFitness} is saying
176
+  "this is the mapping of genotypes to fitness. Period", so we should
177
+  not allow further modifications from other terms.
178
+
179
+  If you use \code{genotFitness} you need to be careful when you use
180
+  Bozic's model (as you get a death rate of 0).
181
+
182
+
183
+  If you use \code{genotFitness} note that we force the WT (wildtype) to
184
+  always be 1 so fitnesses are rescaled. 
185
+
186
+
116 187
 }
117 188
 
189
+
118 190
 \value{
119
-  An object of class "fitnessEffects". This is just a list, but it is not
120
-  intended for human consumption.  The components are:
191
+  
192
+  An object of class "fitnessEffects" or "mutatorEffects". This is just
193
+  a list, but it is not intended for human consumption.  The components
194
+  are:
121 195
 
122 196
   \item{long.rt}{The restriction table in "long format", so as to be
123 197
     easy to parse by the C++ code.}
... ...
@@ -178,11 +252,20 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
178 252
   now, using those characters as names is caught (and result in
179 253
   stopping) if passed as names for noIntGenes.  }
180 254
 
255
+
256
+
257
+
181 258
 \author{ Ramon Diaz-Uriarte
182 259
 }
183 260
 
184 261
 \seealso{
185
-  \code{\link{evalGenotype}}, \code{\link{oncoSimulIndiv}}, \code{\link{plot.fitnessEffects}}
262
+  
263
+  \code{\link{evalGenotype}}, \code{\link{oncoSimulIndiv}},
264
+  \code{\link{plot.fitnessEffects}},
265
+  \code{\link{evalGenotypeFitAndMut}},
266
+  \code{\link{rfitness}},
267
+  \code{\link{plotFitnessLandscape}}
268
+
186 269
 }
187 270
 \examples{
188 271
 ## A simple poset or CBN-like example
... ...
@@ -240,6 +323,87 @@ fnme <- allFitnessEffects(epistasis = c("A" = 0.1,
240 323
 
241 324
 evalAllGenotypes(fnme, order = FALSE, addwt = TRUE)
242 325
 
326
+
327
+## Epistasis for fitness and simple mutator effects
328
+
329
+fe <- allFitnessEffects(epistasis = c("a : b" = 0.3,
330
+                                          "b : c" = 0.5),
331
+                            noIntGenes = c("e" = 0.1))
332
+
333
+fm <- allMutatorEffects(noIntGenes = c("a" = 10,
334
+                                       "c" = 5))
335
+
336
+evalAllGenotypesFitAndMut(fe, fm, order = FALSE)
337
+
338
+
339
+## Simple fitness effects (noIntGenes) and modules
340
+## for mutators
341
+
342
+fe2 <- allFitnessEffects(noIntGenes =
343
+                         c(a1 = 0.1, a2 = 0.2,
344
+                           b1 = 0.01, b2 = 0.3, b3 = 0.2,
345
+                           c1 = 0.3, c2 = -0.2))
346
+
347
+fm2 <- allMutatorEffects(epistasis = c("A" = 5,
348
+                                       "B" = 10,
349
+                                       "C" = 3),
350
+                         geneToModule = c("A" = "a1, a2",
351
+                                          "B" = "b1, b2, b3",
352
+                                          "C" = "c1, c2"))
353
+
354
+evalAllGenotypesFitAndMut(fe2, fm2, order = FALSE)
355
+
356
+
357
+
358
+## Passing fitness directly, a complete fitness specification
359
+## with a two column data frame with genotypes as character vectors