Browse code

3.99.1: interventions, death with fdf, user variables

ramon diaz-uriarte (at Phelsuma) authored on 25/06/2022 14:24:13
Showing 1 changed files
... ...
@@ -50,10 +50,27 @@ to_Magellan <- function(x, file,
50 50
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
51 51
     ## A general converter. Ready to be used by plotFitnessLandscape and
52 52
     ## Magellan exporter.
53
-
53
+    
54
+    ## Very bad, but there is no other way
55
+    
56
+    
54 57
     ## FIXME: really, some of this is inefficient. Very. Fix it.
55 58
     if( (inherits(x, "genotype_fitness_matrix")) ||
56 59
         ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) {
60
+        
61
+        if("Fitness" %in% colnames(x)) {
62
+            afe <- evalAllGenotypes(allFitnessEffects(
63
+                genotFitness = x, frequencyDependentFitness = FALSE
64
+                ##, epistasis = from_genotype_fitness(x)
65
+            ),
66
+            order = FALSE, addwt = TRUE, max = max_num_genotypes)
67
+        } else {
68
+            afe <- evalAllGenotypes(allFitnessEffects(
69
+                genotFitness = x
70
+                ##, epistasis = from_genotype_fitness(x)
71
+            ),
72
+            order = FALSE, addwt = TRUE, max = max_num_genotypes)
73
+        }
57 74
         ## Why this? We go back and forth twice. We need both things. We
58 75
         ## could construct the afe below by appropriately pasting the
59 76
         ## columns names
... ...
@@ -62,11 +79,7 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
62 79
 
63 80
         ## This could use fmatrix_to_afe, above!!!
64 81
         ## Major change as of flfast: no longer using from_genotype_fitness
65
-        afe <- evalAllGenotypes(allFitnessEffects(
66
-            genotFitness = x
67
-            ##, epistasis = from_genotype_fitness(x)
68
-        ),
69
-            order = FALSE, addwt = TRUE, max = max_num_genotypes)
82
+        
70 83
 
71 84
         ## Might not be needed with the proper gfm object (so gmf <- x)
72 85
         ## but is needed if arbitrary matrices.
... ...
@@ -85,10 +98,26 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
85 98
         x <- x[, c(1, 2)]
86 99
         if(x[1, "Genotype"] != "WT") {
87 100
             ## Yes, because we expect this present below
88
-            x <- rbind(data.frame(Genotype = "WT",
89
-                                  Fitness = 1,
90
-                                  stringsAsFactors = FALSE),
91
-                       x)
101
+            if ("Death" %in% colnames(x)) {
102
+                x <- rbind(data.frame(Genotype = "WT",
103
+                                      Birth = 1, Death = 1,
104
+                                      stringsAsFactors = FALSE),
105
+                           x)
106
+            } else {
107
+                
108
+                if("Fitness" %in% colnames(x)) {
109
+                    x <- rbind(data.frame(Genotype = "WT",
110
+                                          Fitness = 1,
111
+                                          stringsAsFactors = FALSE),
112
+                               x)
113
+                } else {
114
+                    x <- rbind(data.frame(Genotype = "WT",
115
+                                          Birth = 1,
116
+                                          stringsAsFactors = FALSE),
117
+                               x)
118
+                }
119
+            }
120
+            
92 121
         }
93 122
         afe <- x
94 123
         ## in case we pass an evalAllgenotypesfitandmut
... ...
@@ -113,19 +142,29 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
113 142
 ## rfitness, do nothing. Well, it actually does something.
114 143
 
115 144
 to_genotFitness_std <- function(x,
116
-                                frequencyDependentFitness = FALSE,
117
-                                frequencyType = frequencyType,
145
+                                frequencyDependentBirth = FALSE,
146
+                                frequencyDependentDeath = FALSE,
147
+                                frequencyDependentFitness = NULL,
148
+                                frequencyType = NA,
149
+                                deathSpec = FALSE,
118 150
                                 simplify = TRUE,
119
-                                min_filter_fitness = 1e-9,
151
+                                min_filter_birth_death = 1e-9,
120 152
                                 sort_gene_names = TRUE) {
121 153
     ## Would break with output from allFitnessEffects and
122 154
     ## output from allGenotypeAndMut
123 155
 
124 156
     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
125 157
         stop("Input must inherit from matrix or data.frame.")
126
-
127
-    if(ncol(x) > 2) {
128
-        if (!frequencyDependentFitness){
158
+    
159
+    if (frequencyDependentDeath && !deathSpec) {
160
+        deathSpec = TRUE
161
+        warning("Assumming death is being specified. Setting deathSpec to TRUE.")
162
+    }
163
+    # Has to be 0's and 1's specification without freq_dep_birth and no death
164
+    # or without freq_dep_birth and without freq_dep_death.
165
+    
166
+    if((ncol(x) > 2 && !deathSpec) || ncol(x) > 3) {
167
+        if (!frequencyDependentBirth && !frequencyDependentDeath){
129 168
             if(inherits(x, "matrix")) {
130 169
                 if(!is.numeric(x))
131 170
                     stop("A genotype fitness matrix/data.frame must be numeric.")
... ...
@@ -139,16 +178,57 @@ to_genotFitness_std <- function(x,
139 178
             if(ncol(x) == 0){
140 179
                 stop("You have an empty data.frame")
141 180
             }
142
-            if(!all(unlist(lapply(x[,-ncol(x)], is.numeric)))){
143
-                stop("All columns except last one must be numeric.")
181
+            if (frequencyDependentBirth && frequencyDependentDeath) {
182
+                if(!all(unlist(lapply(x[,-c((ncol(x)-1):ncol(x))], is.numeric)))){
183
+                    stop("All columns except from the last two must be numeric.")
184
+                }
185
+                
186
+                if(is.factor(x[, ncol(x)])) {
187
+                    warning("Last column of genotype birth-death is a factor. ",
188
+                            "Converting to character.")
189
+                    x[, ncol(x)] <- as.character(x[, ncol(x)])
190
+                }
191
+                
192
+                if(is.factor(x[, ncol(x)-1])) {
193
+                    warning("Second last column of genotype birth-death is a factor. ",
194
+                            "Converting to character.")
195
+                    x[, ncol(x)-1] <- as.character(x[, ncol(x)-1])
196
+                }
197
+                if(!all(unlist(lapply(x[, ncol(x)], is.character)))){
198
+                    stop("All elements in last column must be character.")
199
+                }
200
+                
201
+                if(!all(unlist(lapply(x[, ncol(x)-1], is.character)))){
202
+                    stop("All elements in second last column must be character.")
203
+                }
144 204
             }
145
-            if(is.factor(x[, ncol(x)])) {
146
-                warning("Last column of genotype fitness is a factor. ",
147
-                        "Converting to character.")
148
-                x[, ncol(x)] <- as.character(x[, ncol(x)])
205
+            
206
+            else if(frequencyDependentBirth && deathSpec) {
207
+                if(!all(unlist(lapply(x[,-(ncol(x)-1)], is.numeric)))){
208
+                    stop("All columns except from the second last must be numeric.")
209
+                }
210
+                if(is.factor(x[, ncol(x)-1])) {
211
+                    warning("Second last column of genotype birth-death is a factor. ",
212
+                            "Converting to character.")
213
+                    x[, ncol(x)-1] <- as.character(x[, ncol(x)-1])
214
+                }
215
+                if(!all(unlist(lapply(x[, ncol(x)-1], is.character)))){
216
+                    stop("All elements in second last column must be character.")
217
+                }
149 218
             }
150
-            if(!all(unlist(lapply(x[, ncol(x)], is.character)))){
151
-                stop("All elements in last column must be character.")
219
+            
220
+            else if(frequencyDependentDeath || (frequencyDependentBirth && !deathSpec)) {
221
+                if(!all(unlist(lapply(x[,-ncol(x)], is.numeric)))){
222
+                    stop("All columns except from the last one must be numeric.")
223
+                }
224
+                if(is.factor(x[, ncol(x)])) {
225
+                    warning("Last column of genotype birth-death is a factor. ",
226
+                            "Converting to character.")
227
+                    x[, ncol(x)] <- as.character(x[, ncol(x)])
228
+                }
229
+                if(!all(unlist(lapply(x[, ncol(x)], is.character)))){
230
+                    stop("All elements in last column must be character.")
231
+                }
152 232
             }
153 233
         }
154 234
 
... ...
@@ -158,8 +238,14 @@ to_genotFitness_std <- function(x,
158 238
         ## return(genot_fitness_to_epistasis(x))
159 239
         if(any(duplicated(colnames(x))))
160 240
             stop("duplicated column names")
161
-
162
-        cnfl <- which(colnames(x)[-ncol(x)] == "")
241
+        
242
+        if(deathSpec) {
243
+            cnfl <- which(colnames(x)[-c((ncol(x)-1):ncol(x))] == "")
244
+        }
245
+        else {
246
+            cnfl <- which(colnames(x)[-ncol(x)] == "")
247
+        }
248
+        
163 249
         if(length(cnfl)) {
164 250
             freeletter <- setdiff(LETTERS, colnames(x))[1]
165 251
             if(length(freeletter) == 0) stop("Renaiming failed")
... ...
@@ -168,35 +254,73 @@ to_genotFitness_std <- function(x,
168 254
         }
169 255
         if(!is.null(colnames(x)) && sort_gene_names) {
170 256
             ncx <- ncol(x)
171
-            cnx <- colnames(x)[-ncx]
257
+            if(deathSpec) {
258
+                cnx <- colnames(x)[-c((ncx-1):ncx)]
259
+            }
260
+            else {
261
+                cnx <- colnames(x)[-ncx]
262
+            }
263
+            
172 264
             ocnx <- gtools::mixedorder(cnx)
173 265
             if(!(identical(cnx[ocnx], cnx))) {
174 266
                 message("Sorting gene column names alphabetically")
175
-                x <- cbind(x[, ocnx, drop = FALSE], Fitness = x[, (ncx)])
267
+                
268
+                if(!is.null(frequencyDependentFitness)) {
269
+                    x <- cbind(x[, ocnx, drop = FALSE], Fitness = x[, (ncx)])
270
+                } else {
271
+                    x <- cbind(x[, ocnx, drop = FALSE], Birth = x[, (ncx)])
272
+                }
273
+                
176 274
             }
177 275
         }
178 276
 
179 277
         if(is.null(colnames(x))) {
180
-            ncx <- (ncol(x) - 1)
278
+            if(deathSpec) {
279
+                ncx <- (ncol(x) - 2)
280
+            }
281
+            else {
282
+                ncx <- (ncol(x) - 1)
283
+            }
284
+            
181 285
             message("No column names: assigning gene names from LETTERS")
182 286
             if(ncx > length(LETTERS))
183 287
                 stop("More genes than LETTERS; please give gene names",
184 288
                      " as you see fit.")
185
-            colnames(x) <- c(LETTERS[1:ncx], "Fitness")
289
+            if(deathSpec) {
290
+                colnames(x) <- c(LETTERS[1:ncx], "Birth", "Death")
291
+            }
292
+            
293
+            else {
294
+                
295
+                if (!is.null(frequencyDependentFitness)) {
296
+                    colnames(x) <- c(LETTERS[1:ncx], "Fitness")
297
+                } else {
298
+                    colnames(x) <- c(LETTERS[1:ncx], "Birth")
299
+                }
300
+                    
301
+            }
302
+            
303
+        }
304
+        
305
+        if(deathSpec) {
306
+            if(!all(as.matrix(x[, -c((ncol(x)-1):ncol(x))]) %in% c(0, 1) ))
307
+                stop("First ncol - 2 entries not in {0, 1}.")
308
+        }
309
+        else {
310
+            if(!all(as.matrix(x[, -ncol(x)]) %in% c(0, 1) ))
311
+                stop("First ncol - 1 entries not in {0, 1}.")
186 312
         }
187
-        if(!all(as.matrix(x[, -ncol(x)]) %in% c(0, 1) ))
188
-            stop("First ncol - 1 entries not in {0, 1}.")
189 313
 
190 314
     } else {
191 315
 
192 316
         if(!inherits(x, "data.frame"))
193
-            stop("genotFitness: if two-column must be data frame")
317
+            stop("genotFitness: if genotype is specified, it must be data frame")
194 318
         if(ncol(x) == 0){
195 319
             stop("You have an empty data.frame")
196 320
         }
197 321
         ## Make sure no factors
198 322
         if(is.factor(x[, 1])) {
199
-            warning("First column of genotype fitness is a factor. ",
323
+            warning("First column of genotype birth-death is a factor. ",
200 324
                     "Converting to character.")
201 325
             x[, 1] <- as.character(x[, 1])
202 326
         }
... ...
@@ -221,61 +345,147 @@ to_genotFitness_std <- function(x,
221 345
         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
222 346
             ## the second case above corresponds to passing just single letter genotypes
223 347
             ## as there is not a single marker
224
-            x <- x[, c(1, 2), drop = FALSE]
225
-            if(!all(colnames(x) == c("Genotype", "Fitness"))) {
226
-                message("Column names of object not Genotype and Fitness.",
227
-                        " Renaming them assuming that is what you wanted")
228
-                colnames(x) <- c("Genotype", "Fitness")
348
+            if(deathSpec) {
349
+                x <- x[, c(1, 2, 3), drop = FALSE]
350
+                if(!all(colnames(x) == c("Genotype", "Birth", "Death"))) {
351
+                    message("Column names of object not Genotype, Birth and Death.",
352
+                            " Renaming them assuming that is what you wanted")
353
+                    colnames(x) <- c("Genotype", "Birth", "Death")
354
+                }
355
+            }
356
+            else {
357
+                x <- x[, c(1, 2), drop = FALSE]
358
+                if (!is.null(frequencyDependentFitness)) {
359
+                    if(!all(colnames(x) == c("Genotype", "Fitness"))) {
360
+                        message("Column names of object not Genotype and Birth",
361
+                                " Renaming them assuming that is what you wanted")
362
+                        colnames(x) <- c("Genotype", "Fitness")
363
+                    }
364
+                    
365
+                } else {
366
+                    if(!all(colnames(x) == c("Genotype", "Birth"))) {
367
+                        message("Column names of object not Genotype and Birth",
368
+                                " Renaming them assuming that is what you wanted")
369
+                        colnames(x) <- c("Genotype", "Birth")
370
+                    }
371
+                }
372
+                
229 373
             }
374
+            
375
+            
230 376
             if((!omarker) && (!emarker) && (!nogoodepi)) {
231 377
                 message("All single-gene genotypes as input to to_genotFitness_std")
232 378
             }
233 379
             ## Yes, we need to do this to  scale the fitness and put the "-"
234
-            if(frequencyDependentFitness){
380
+            if(frequencyDependentBirth || frequencyDependentDeath){
235 381
                 anywt <- which(x[, 1] == "WT")
236 382
                 if (length(anywt) > 1){
237
-                    stop("WT should not appear more than once in fitness specification")
383
+                    stop("WT should not appear more than once in birth-death specification")
238 384
                 }
239
-                if(is.factor(x[, ncol(x)])) {
240
-                    warning("Second column of genotype fitness is a factor. ",
241
-                            "Converting to character.")
242
-                    x[, ncol(x)] <- as.character(x[, ncol(x)])
385
+                
386
+                if (frequencyDependentBirth) {
387
+                    if(is.factor(x[, 2])) {
388
+                        warning("Second column of genotype birth-death is a factor. ",
389
+                                "Converting to character.")
390
+                        x[, 2] <- as.character(x[, 2])
391
+                    }
392
+                }
393
+                if (frequencyDependentDeath) {
394
+                    if(is.factor(x[, 3])) {
395
+                        warning("Third column of genotype birth-death is a factor. ",
396
+                                "Converting to character.")
397
+                        x[, 3] <- as.character(x[, 3])
398
+                    }
243 399
                 }
400
+                
244 401
             }
245 402
 
246
-            x <- allGenotypes_to_matrix(x, frequencyDependentFitness)
403
+            x <- allGenotypes_to_matrix(x, frequencyDependentBirth,
404
+                                        frequencyDependentDeath, 
405
+                                        frequencyDependentFitness, deathSpec)
247 406
         }
248 407
     }
249
-    ## And, yes, scale all fitnesses by that of the WT
408
+    ## And, yes, scale all births and deaths by that of the WT
250 409
 
251
-    if (!frequencyDependentFitness){
252
-        whichroot <- which(rowSums(x[, -ncol(x), drop = FALSE]) == 0)
410
+    if (!frequencyDependentBirth && !frequencyDependentDeath){
411
+        if (deathSpec) {
412
+            whichroot <- which(rowSums(x[, -c((ncol(x)-1):ncol(x)), drop = FALSE]) == 0)
413
+        }
414
+        else {
415
+            whichroot <- which(rowSums(x[, -ncol(x), drop = FALSE]) == 0)
416
+        }
417
+        
253 418
         if(length(whichroot) == 0) {
254
-            warning("No wildtype in the fitness landscape!!! Adding it with fitness 1.")
255
-            x <- rbind(c(rep(0, ncol(x) - 1), 1), x)
256
-        } else if(x[whichroot, ncol(x)] != 1) {
257
-            warning("Fitness of wildtype != 1.",
258
-                    " Dividing all fitnesses by fitness of wildtype.")
259
-            vwt <- x[whichroot, ncol(x)]
260
-            x[, ncol(x)] <- x[, ncol(x)]/vwt
419
+            if(deathSpec) {
420
+                warning("No wildtype in the fitness landscape!!! Adding it with birth and death 1.")
421
+                x <- rbind(c(rep(0, ncol(x) - 2), 1, 1), x)
422
+            }
423
+            else {
424
+                warning("No wildtype in the fitness landscape!!! Adding it with birth 1.")
425
+                x <- rbind(c(rep(0, ncol(x) - 1), 1), x)
426
+            }
427
+            
428
+        } else {
429
+            if(x[whichroot, ncol(x)] != 1) {
430
+                if(deathSpec) {
431
+                    warning("Death of wildtype != 1.",
432
+                            " Dividing all deaths by death of wildtype.")
433
+                }
434
+                else {
435
+                    warning("Birth of wildtype != 1.",
436
+                            " Dividing all births by birth of wildtype.")
437
+                }
438
+                
439
+                vwt <- x[whichroot, ncol(x)]
440
+                x[, ncol(x)] <- x[, ncol(x)]/vwt
441
+            }
442
+            
443
+            if (deathSpec && x[whichroot, ncol(x)-1] != 1) {
444
+                warning("Birth of wildtype != 1.",
445
+                        " Dividing all births by birth of wildtype.")
446
+                
447
+                vwt <- x[whichroot, ncol(x)-1]
448
+                x[, ncol(x)-1] <- x[, ncol(x)-1]/vwt
449
+            }
261 450
         }
451
+        
452
+        if(!is.null(frequencyDependentFitness))
453
+            colnames(x)[which(colnames(x) == "Birth")] <- "Fitness"
262 454
     }
263 455
 
264 456
     if(any(is.na(x)))
265 457
         stop("NAs in fitness matrix")
266
-    if(!frequencyDependentFitness) {
458
+    
459
+    if(!frequencyDependentBirth && !frequencyDependentDeath) {
267 460
         if(is.data.frame(x)) 
268 461
             x <- as.matrix(x)
269 462
         stopifnot(inherits(x, "matrix"))
270
-
271
-        if(simplify) {
272
-            x <- x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE]  
463
+    }
464
+    
465
+    if(simplify) {
466
+        if ((!frequencyDependentBirth && !deathSpec) || (!frequencyDependentDeath && deathSpec)) {
467
+            x <- x[x[, ncol(x)] > min_filter_birth_death, , drop = FALSE]
273 468
         }
274
-        class(x) <- c("matrix", "genotype_fitness_matrix")
275
-    } else { ## frequency-dependent fitness
469
+        
470
+        if (!frequencyDependentBirth && deathSpec) {
471
+            x <- x[x[, ncol(x)-1] > min_filter_birth_death, , drop = FALSE]
472
+        }
473
+        
474
+    }
475
+    
476
+    if (!frequencyDependentBirth && !frequencyDependentDeath)
477
+        class(x) <- c("matrix", "genotype_birth_death_matrix")
478
+    
479
+    if(frequencyDependentBirth) { ## frequency-dependent fitness
276 480
         
277 481
         if(frequencyType == "auto"){
278
-            ch <- paste(as.character(x[, ncol(x)]), collapse = "")
482
+            if (deathSpec) {
483
+                ch <- paste(as.character(x[, ncol(x)-1]), collapse = "")
484
+            }
485
+            else {
486
+                ch <- paste(as.character(x[, ncol(x)]), collapse = "")
487
+            }
488
+            
279 489
             if( grepl("f_", ch, fixed = TRUE) ){
280 490
                 frequencyType = "rel"
281 491
                 pattern <- stringr::regex("f_(\\d*_*)*")
... ...
@@ -291,18 +501,62 @@ to_genotFitness_std <- function(x,
291 501
         } else {
292 502
             pattern <- stringr::regex("f_(\\d*_*)*")
293 503
         }
294
-
295
-        regularExpressionVector <-
504
+        
505
+        if(deathSpec) {
506
+            regularExpressionVectorBirth <-
507
+                unique(unlist(lapply(x[, ncol(x)-1],
508
+                                     function(z) {stringr::str_extract_all(string = z,
509
+                                                                           pattern = pattern)})))
510
+            
511
+            if((!all(regularExpressionVectorBirth %in% fVariablesN(ncol(x) - 2, frequencyType))) |
512
+               !(length(intersect(regularExpressionVectorBirth,
513
+                                  fVariablesN(ncol(x) - 2, frequencyType)) >= 1) )){
514
+                stop("There are some errors in birth column")
515
+            }
516
+        }
517
+        else {
518
+            regularExpressionVectorBirth <-
519
+                unique(unlist(lapply(x[, ncol(x)],
520
+                                     function(z) {stringr::str_extract_all(string = z,
521
+                                                                           pattern = pattern)})))
522
+            if((!all(regularExpressionVectorBirth %in% fVariablesN(ncol(x) - 1, frequencyType))) |
523
+               !(length(intersect(regularExpressionVectorBirth,
524
+                                  fVariablesN(ncol(x) - 1, frequencyType)) >= 1) )){
525
+                stop("There are some errors in birth column")
526
+            }
527
+        }
528
+    }
529
+    
530
+    if(frequencyDependentDeath) { ## frequency-dependent fitness
531
+        
532
+        if(frequencyType == "auto"){
533
+            ch <- paste(as.character(x[, ncol(x)]), collapse = "")
534
+            
535
+            if( grepl("f_", ch, fixed = TRUE) ){
536
+                frequencyType = "rel"
537
+                pattern <- stringr::regex("f_(\\d*_*)*")
538
+                
539
+            } else if ( grepl("n_", ch, fixed = TRUE) ){
540
+                frequencyType = "abs"
541
+                pattern <- stringr::regex("n_(\\d*_*)*")
542
+                
543
+            } else { stop("No pattern found when frequencyTypeDeath set to 'auto'") }
544
+            
545
+        } else if(frequencyType == "abs"){
546
+            pattern <- stringr::regex("n_(\\d*_*)*")
547
+        } else {
548
+            pattern <- stringr::regex("f_(\\d*_*)*")
549
+        }
550
+        
551
+        regularExpressionVectorDeath <-
296 552
             unique(unlist(lapply(x[, ncol(x)],
297 553
                                  function(z) {stringr::str_extract_all(string = z,
298 554
                                                                        pattern = pattern)})))
299
-        
300
-        if((!all(regularExpressionVector %in% fVariablesN(ncol(x) - 1, frequencyType))) |
301
-           !(length(intersect(regularExpressionVector,
555
+        if((!all(regularExpressionVectorDeath %in% fVariablesN(ncol(x) - 1, frequencyType))) |
556
+           !(length(intersect(regularExpressionVectorDeath,
302 557
                               fVariablesN(ncol(x) - 1, frequencyType)) >= 1) )){
303
-            stop("There are some errors in fitness column")
558
+            stop("There are some errors in death column")
304 559
         }
305
-
306 560
     }
307 561
     return(x)
308 562
 }
... ...
@@ -373,28 +627,44 @@ genot_fitness_to_epistasis <- function(x) {
373 627
 
374 628
 
375 629
 allGenotypes_to_matrix <- function(x,
376
-                                   frequencyDependentFitness = FALSE) {
630
+                                   frequencyDependentBirth = FALSE,
631
+                                   frequencyDependentDeath = FALSE,
632
+                                   frequencyDependentFitness = NULL,
633
+                                   deathSpec = FALSE) {
377 634
     ## Makes no sense to allow passing order: the matrix would have
378 635
     ## repeated rows. A > B and B > A both have exactly A and B
379 636
 
380 637
     ## Take output of evalAllGenotypes or identical data frame and return
381 638
     ## a matrix with 0/1 in a column for each gene and a final column of
382 639
     ## Fitness
383
-
640
+    
641
+    if(!is.null(frequencyDependentFitness))
642
+        frequencyDependentBirth <- frequencyDependentFitness
643
+    
384 644
     if (is.factor(x[, 1])) {
385 645
         warning(
386
-            "First column of genotype fitness is a factor. ",
646
+            "First column of genotype birth-death is a factor. ",
387 647
             "Converting to character."
388 648
         )
389 649
         x[, 1] <- as.character(x[, 1])
390 650
     }
391
-    if (frequencyDependentFitness) {
392
-        if (is.factor(x[, ncol(x)])) {
651
+    if (frequencyDependentBirth) {
652
+        if (is.factor(x[, 2])) {
653
+            warning(
654
+                "Second column of genotype birth-death is a factor. ",
655
+                "Converting to character."
656
+            )
657
+            x[, 2] <- as.character(x[, 2])
658
+        }
659
+    }
660
+    
661
+    if (frequencyDependentDeath) {
662
+        if (is.factor(x[, 3])) {
393 663
             warning(
394
-                "Second column of genotype fitness is a factor. ",
664
+                "Third column of genotype birth-death is a factor. ",
395 665
                 "Converting to character."
396 666
             )
397
-            x[, ncol(x)] <- as.character(x[, ncol(x)])
667
+            x[, 3] <- as.character(x[, 3])
398 668
         }
399 669
     }
400 670
 
... ...
@@ -402,15 +672,30 @@ allGenotypes_to_matrix <- function(x,
402 672
     anywt <- which(x[, 1] == "WT")
403 673
     if (length(anywt) > 1) stop("More than 1 WT")
404 674
     if (length(anywt) == 1) {
405
-        fwt <- x[anywt, 2]
675
+        bwt <- x[anywt, 2]
676
+        if (deathSpec) {
677
+            dwt <- x[anywt, 3]
678
+        }
679
+        
406 680
         x <- x[-anywt, ]
407 681
         ## Trivial case of passing just a WT?
408 682
     } else {
409
-        if (!frequencyDependentFitness) {
410
-            warning("No WT genotype. Setting its fitness to 1.")
411
-            fwt <- 1
683
+        if (!frequencyDependentBirth && !frequencyDependentDeath) {
684
+            bwt <- 1
685
+            if(deathSpec) {
686
+                dwt <- 1
687
+                warning("No WT genotype. Setting its birth and death to 1.")
688
+            }
689
+            else {
690
+                warning("No WT genotype. Setting its birth to 1.")
691
+            }
692
+            
693
+            
412 694
         } else {
413
-            fwt <- NA
695
+            bwt <- NA
696
+            
697
+            if(deathSpec) 
698
+                dwt <- NA
414 699
             ##   message("No WT genotype in FDF: setting it to 0.")
415 700
         }
416 701
     }
... ...
@@ -436,23 +721,60 @@ allGenotypes_to_matrix <- function(x,
436 721
     for (i in 1:nrow(m)) {
437 722
         m[i, the_match[[i]]] <- 1
438 723
     }
439
-    m <- cbind(m, x[, 2])
440
-    colnames(m) <- c(all_genes, "Fitness")
441
-    if(!is.na(fwt))
442
-        m <- rbind(c(rep(0, length(all_genes)), fwt), m)
724
+    if(deathSpec) {
725
+        m <- cbind(m, x[, 2], x[, 3])
726
+        colnames(m) <- c(all_genes, "Birth", "Death")
727
+    }
728
+    else {
729
+        m <- cbind(m, x[, 2])
730
+        
731
+        if (!is.null(frequencyDependentFitness)) {
732
+            colnames(m) <- c(all_genes, "Fitness")
733
+        } else {
734
+            colnames(m) <- c(all_genes, "Birth")
735
+        }
736
+    }
737
+    
738
+    
739
+    if(!is.na(bwt))
740
+        if(deathSpec) {
741
+            m <- rbind(c(rep(0, length(all_genes)), bwt, dwt), m)
742
+        }
743
+        else{
744
+            m <- rbind(c(rep(0, length(all_genes)), bwt), m)
745
+        }
746
+        
443 747
 
444
-    if (frequencyDependentFitness) {
748
+    if (frequencyDependentBirth || frequencyDependentDeath) {
445 749
         m <- as.data.frame(m)
446 750
         m[, 1:length(all_genes)] <- apply(
447 751
             m[, 1:length(all_genes), drop = FALSE],
448 752
             2,
449 753
             as.numeric
450 754
         )
451
-        m[, ncol(m)] <- as.character(m[, ncol(m)])
755
+        if(frequencyDependentBirth) {
756
+            m[, length(all_genes)+1] <- as.character(m[, length(all_genes)+1])
757
+        }
758
+        else {
759
+            m[, length(all_genes)+1] <- as.numeric(m[, length(all_genes)+1])
760
+        }
761
+        
762
+        if(frequencyDependentDeath) {
763
+            m[, length(all_genes)+2] <- as.character(m[, length(all_genes)+2])
764
+        }
765
+        else if(deathSpec){
766
+            m[, length(all_genes)+2] <- as.numeric(m[, length(all_genes)+2])
767
+        }
768
+        
452 769
     }
453 770
     ## Ensure sorted
454 771
     ## m <- data.frame(m)
455
-    rs <- rowSums(m[, -ncol(m), drop = FALSE])
772
+    if(deathSpec) {
773
+        rs <- rowSums(m[, -c((ncol(m)-1):ncol(m)), drop = FALSE])
774
+    }
775
+    else {
776
+        rs <- rowSums(m[, -ncol(m), drop = FALSE])
777
+    }
456 778
     m <- m[order(rs), , drop = FALSE]
457 779
     ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ]
458 780
     return(m)
Browse code

v. 2.99.9: fixed one-gene cases, nem in Readme, typos; removin unused code and v.1 from long tests

ramon diaz-uriarte (at Phelsuma) authored on 22/04/2021 10:27:49
Showing 1 changed files
... ...
@@ -307,90 +307,6 @@ to_genotFitness_std <- function(x,
307 307
     return(x)
308 308
 }
309 309
 
310
-## Deprecated after flfast
311
-## to_genotFitness_std is faster and has better error checking
312
-## and is very similar and does not use
313
-## the genot_fitness_to_epistasis, which is not reasonable anymore.
314
-
315
-## from_genotype_fitness <- function(x) {
316
-##     ## Would break with output from allFitnessEffects and
317
-##     ## output from allGenotypeAndMut
318
-
319
-##     ## For the very special and weird case of
320
-##     ## a matrix but only a single gene so with a 0 and 1
321
-##     ## No, this is a silly and meaningless case.
322
-##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
323
-
324
-##     ## } else  blabla:
325
-
326
-##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
327
-##         stop("Input must inherit from matrix or data.frame.")
328
-
329
-##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
330
-##     ##     stop(paste0("Genotype fitness input either two-column data frame",
331
-##     ##          " or a numeric matrix with > 2 columns."))
332
-##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
333
-##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
334
-##     ##                 " of a single gene. For this degenerate cases use",
335
-##     ##                 " a data frame specification."))
336
-
337
-##     if(ncol(x) > 2) {
338
-##         if(inherits(x, "matrix")) {
339
-##             if(!is.numeric(x))
340
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
341
-##         } else if(inherits(x, "data.frame")) {
342
-##             if(!all(unlist(lapply(x, is.numeric))))
343
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
344
-##         }
345
-
346
-##         ## We are expecting here a matrix of 0/1 where columns are genes
347
-##         ## except for the last column, that is Fitness
348
-##         ## Of course, can ONLY work with epistastis, NOT order
349
-##         return(genot_fitness_to_epistasis(x))
350
-##     } else {
351
-##         if(!inherits(x, "data.frame"))
352
-##             stop("genotFitness: if two-column must be data frame")
353
-##         ## Make sure no factors
354
-##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
355
-##         ## Make sure no numbers
356
-##         if(any(is.numeric(x[, 1])))
357
-##             stop(paste0("genotFitness: first column of data frame is numeric.",
358
-##                         " Ambiguous and suggests possible error. If sure,",
359
-##                         " enter that column as character"))
360
-
361
-##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
362
-##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
363
-##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
364
-##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
365
-##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
366
-##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
367
-##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
368
-##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
369
-##         if(omarker) {
370
-##             ## do something. To be completed
371
-##             stop("This code not yet ready")
372
-##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
373
-##             ## involve epistasis and order. But they must have different
374
-##             ## genes. Otherwise, it is not manageable.
375
-##         }
376
-##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
377
-##             ## the second case above corresponds to passing just single letter genotypes
378
-##             ## as there is not a single marker
379
-##             x <- x[, c(1, 2), drop = FALSE]
380
-##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
381
-##                 message("Column names of object not Genotype and Fitness.",
382
-##                         " Renaming them assuming that is what you wanted")
383
-##                 colnames(x) <- c("Genotype", "Fitness")
384
-##             }
385
-##             if((!omarker) && (!emarker) && (!nogoodepi)) {
386
-##                 message("All single-gene genotypes as input to from_genotype_fitness")
387
-##             }
388
-##             ## Yes, we need to do this to  scale the fitness and put the "-"
389
-##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
390
-##         }
391
-##     }
392
-## }
393
-
394 310
 
395 311
 
396 312
 
... ...
@@ -504,7 +420,13 @@ allGenotypes_to_matrix <- function(x,
504 420
     )
505 421
 
506 422
     all_genes <- sort(unique(unlist(splitted_genots)))
507
-
423
+    if(length(all_genes) < 2) stop(paste("There must be at least two genes (loci)",
424
+                                         "in the fitness effects.",
425
+                                         "If you only care about a case with",
426
+                                         "a single one (or none) enter gene(s)",
427
+                                         "with a fitness effect of zero.",
428
+                                         "For freq.dep.fitness, create another ",
429
+                                         "genotype that always has fitness zero."))
508 430
     m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
509 431
     the_match <- lapply(
510 432
         splitted_genots,
... ...
@@ -522,7 +444,7 @@ allGenotypes_to_matrix <- function(x,
522 444
     if (frequencyDependentFitness) {
523 445
         m <- as.data.frame(m)
524 446
         m[, 1:length(all_genes)] <- apply(
525
-            m[, 1:length(all_genes)],
447
+            m[, 1:length(all_genes), drop = FALSE],
526 448
             2,
527 449
             as.numeric
528 450
         )
... ...
@@ -607,78 +529,6 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
607 529
 }
608 530
 
609 531
 
610
-## Former version, that always tries to give a vector
611
-## and that uses an external executable.
612
-## Magellan_stats and Magellan_draw cannot be tested
613
-## routinely, as they depend on external software
614
-Magellan_stats_former <- function(x, max_num_genotypes = 2000,
615
-                           verbose = FALSE,
616
-                           use_log = TRUE,
617
-                           short = TRUE,
618
-                           fl_statistics = "fl_statistics",
619
-                           replace_missing = FALSE) { # nocov start
620
-    ## I always use
621
-    ## if(!is.null(x) && is.null(file))
622
-    ##     stop("one of object or file name")
623
-    ## if(is.null(file))
624
-    fn <- tempfile()
625
-    fnret <- tempfile()
626
-    if(verbose)
627
-        cat("\n Using input file", fn, " and output file ", fnret, "\n")
628
-
629
-    if(use_log) {
630
-        logarg <- "-l"
631
-    } else {
632
-        logarg <- NULL
633
-    }
634
-    if(short) {
635
-        shortarg <- "-s"
636
-    } else {
637
-        shortarg <- NULL
638
-    }
639
-
640
-    if(replace_missing) {
641
-        zarg <- "-z"
642
-    } else {
643
-        zarg <- NULL
644
-    }
645
-
646
-    to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
647
-    ## newer versions of Magellan provide some extra values to standard output
648
-    call_M <- system2(fl_statistics,
649
-                      args = paste(fn, shortarg, logarg, zarg, "-o", fnret),
650
-                      stdout = NULL)
651
-    if(short) {
652
-        ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
653
-
654
-        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
655
-        ## Make names more explicit, but check we have what we think we have
656
-        ## New versions of Magellan produce different output apparently of variable length
657
-        stopifnot(length(tmp) >= 23) ## 23) ## variable length
658
-        stopifnot(identical(names(tmp)[1:13], ## only some
659
-                            c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
660
-                              "nchains", "nsteps", "nori", "depth", "magn", "sign",
661
-                              "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
662
-        ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
663
-        if(length(tmp) >= 24) ## the new version
664
-            stopifnot(identical(names(tmp)[c(20, 24)],
665
-                                c("steps_m", "mProbOpt_0")))
666
-        ## steps_m: the mean number of steps over the entire landscape to
667
-        ## reach the global optimum
668
-        ## mProbOpt_0: The mean probability to
669
-        ## reach that optimum (again averaged over the entire landscape).
670
-        ## but if there are multiple optima, there are many of these
671
-        names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
672
-                        "r/s","n_chains", "n_steps", "n_origins", "max_depth",
673
-                        "epist_magn", "epist_sign", "epist_rsign")## ,
674
-                        ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
675
-                        ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
676
-        ## "std_dev_pairs", "var_outdegree")
677
-    } else {
678
-        tmp <- readLines(fnret)
679
-    }
680
-    return(tmp)
681
-} # nocov end
682 532
 
683 533
 Magellan_draw <- function(x, max_num_genotypes = 2000,
684 534
                           verbose = FALSE,
Browse code

2.99.4

ramon diaz-uriarte (at Phelsuma) authored on 17/12/2020 15:07:07
Showing 1 changed files
... ...
@@ -47,18 +47,6 @@ to_Magellan <- function(x, file,
47 47
 
48 48
 
49 49
 
50
-## ## genotype_fitness_matrix -> fitness landscape as data frame
51
-## fmatrix_to_afe <- function(x) {
52
-##     stopifnot(inherits(x, "genotype_fitness_matrix"))
53
-##     y <- x[, -ncol(x)]
54
-##     nn <- apply(y, 1,
55
-##                 function(u) paste(sort(colnames(y)[as.logical(u)]),
56
-##                                   collapse = ", "))
57
-##     nn[nn == ""] <- "WT"
58
-##     return(data.frame(Genotype = nn, Fitness = x[, ncol(x)],
59
-##            stringsAsFactors = FALSE))
60
-## }
61
-
62 50
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
63 51
     ## A general converter. Ready to be used by plotFitnessLandscape and
64 52
     ## Magellan exporter.
... ...
@@ -124,217 +112,181 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
124 112
 ## but if we are passed a fitness landscapes as produced by
125 113
 ## rfitness, do nothing. Well, it actually does something.
126 114
 
127
-
128