Browse code

v. 2.9.2 - LOD: using only the strict Szendro et al. meaning. - POM: computed in C++. - Using fitness landscape directly when given as input (no conversion to epistasis)

ramon diaz-uriarte (at Phelsuma) authored on 24/11/2017 12:41:48
Showing 36 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.9.0
5
-Date: 2017-09-27
4
+Version: 2.9.2
5
+Date: 2017-11-24
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -51,8 +51,8 @@ importFrom("igraph", igraph.to.graphNEL, graph.data.frame, V, E,
51 51
 import(graph)
52 52
 import(Rgraphviz)
53 53
 importFrom("parallel", mclapply, detectCores, mcMap)
54
-importFrom("gtools", combinations, permutations)
55
-
54
+importFrom("gtools", combinations, permutations, mixedorder)
55
+## importFrom("compare", compare)
56 56
 importFrom("graphics", "axis", "box", "legend", "matplot", "par", "polygon")
57 57
 importFrom("methods", "as")
58 58
 importFrom("stats", "na.omit", "runif", "smooth.spline")
... ...
@@ -1,4 +1,4 @@
1
-## Copyright 2016 Ramon Diaz-Uriarte
1
+## Copyright 2016, 2017 Ramon Diaz-Uriarte
2 2
 
3 3
 ## This program is free software: you can redistribute it and/or modify
4 4
 ## it under the terms of the GNU General Public License as published by
... ...
@@ -14,64 +14,115 @@
14 14
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 15
 
16 16
 
17
+## Note that, in contrast to POM, LOD is not well defined if the population
18
+## becomes extinct.
17 19
 
18 20
 ## Functions to obtain LOD and POM similar to Szendro et al., 2014, PNAS.
19 21
 genot_max <- function(x) {
20 22
     x$GenotypesLabels[which.max(x$pops.by.time[nrow(x$pops.by.time), -1])]
21 23
 }
22 24
 
23
-LOD.internal <- function(x) {
24
-    ## Not identical to LOD of Szendro because:
25
-    
26
-    ##  a) I think we want all paths, not just their single LOD, which I
27
-    ##  think they might use out of convenience.
28 25
 
29
-    ##  b) For me it is a mess, a complicated mess, to use their LOD as
30
-    ##  they define it and there are many ambiguities in how to define it
31
-    ##  in continuous time.
32 26
 
33
-    ## This also means that single simulation might yield multiple LODs
27
+## Filter the PhylogDF so we obtain LOD, sensu stricto.
34 28
 
35
-    ## keepEvents is FALSE to make this object as small as possible.
29
+## No longer necessary with LOD from C++ removing duplicates
30
+## ## Now this is coming from LOD_DF, which already only has
31
+## ## implicitly pop_size_child == 0
32
+## Only used in one function use for testing
33
+filter_phylog_df_LOD <- function(y) {
34
+    keep <- !rev(duplicated(rev(y$child)))
35
+    return(y[keep, ])
36
+}
37
+
38
+
39
+
40
+## ## Filter the PhylogDF so we obtain LOD, sensu stricto.
41
+## ## For the old version
42
+## filter_phylog_df_LOD_with_n <- function(y) {
43
+##     y <- y[y$pop_size_child == 0, , drop = FALSE]
44
+##     keep <- !rev(duplicated(rev(y$child)))
45
+##     return(y[keep, ])
46
+## }
47
+
48
+
49
+## from phylogClone, key parts for the LOD strict structure
50
+phcl_from_lod <- function(df, x) {
51
+    ## no longer necessary
52
+    ## ## the !keepEvents. Which I move here to speed things up.
53
+    ## df <- df[!duplicated(df[, c(1, 2)]), , drop = FALSE]
54
+    
55
+    tG <- unique(c(as.character(df[, 1]), as.character(df[, 2])))
56
+    ## ## Do as in phylogClone. So that we have the same nodes
57
+    ## ## in LOD all and not LOD all?
58
+    ## z <- which_N_at_T(x, N = 1, t = "last")
59
+    ## tG <- x$GenotypesLabels[z]
60
+    
61
+    ## ## FIXME: aren't these two warnings redundant or aliased?
62
+    ## ## yes, partlt
63
+    ##  I think this can never happen now
64
+    ## if ((length(tG) == 1) && (tG == "")) {
65
+    ##     warning("There never was a descendant of WT")
66
+    ## }
67
+    if (nrow(df) == 0) {
68
+        warning("LOD structure has 0 rows: no descendants of initMutant ever appeared. ")
69
+        return(NA)
70
+    }
71
+    g <- igraph::graph.data.frame(df[, c(1, 2)])
72
+    nodesInP <- unique(unlist(igraph::neighborhood(g, order = 1e+09, 
73
+                                                   nodes = tG, mode = "in")))
74
+    allLabels <- unique(as.character(unlist(df[, c(1, 2)])))
75
+    nodesRm <- setdiff(allLabels, V(g)$name[nodesInP])
76
+    g <- igraph::delete.vertices(g, nodesRm)
77
+    tmp <- list(graph = g, df = df)
78
+    class(tmp) <- c(class(tmp), "phylogClone")
79
+    return(tmp)
80
+}
81
+
82
+LOD.internal <- function(x) {
36 83
     if(is.null(x$pops.by.time)) {
37 84
         warning("Missing needed components. This might be a failed simulation.",
38 85
                 " Returning NA.")
39 86
         return(list(all_paths = NA, lod_single = NA))
40 87
     }
41
-    pc <- phylogClone(x, keepEvents = FALSE)
42
-    
88
+    if (!inherits(x, "oncosimul2")) 
89
+        stop("LOD information is only stored with v >= 2")
90
+    ## y <- filter_phylog_df_LOD(x$other$LOD_DF)
91
+    y <- x$other$LOD_DF
92
+    pc <- phcl_from_lod(y)
93
+    ## need eval for oncoSimulPop calls and for LOD_as_path
94
+    initMutant <- x$InitMutant
95
+
96
+    ## No descendants means that: never a descendant.
97
+    ## Note the same that the init mutant be the final state.
43 98
     if((length(pc) == 1) && (is.na(pc))) {
44
-        return(list(all_paths = NA,
45
-                    lod_single = "No_descendants"))
99
+        lod <- "No_descendants"
100
+        ## bail out here. We do not need the rest.
101
+        if(initMutant != "")
102
+            attributes(lod)$initMutant <- initMutant
103
+        return(lod)
46 104
     }
105
+    
47 106
     pcg <- pc$graph
48 107
     end <- genot_max(x)
49
-    all_paths <- igraph::all_simple_paths(pcg, from = "", to = end, mode = "out")
50
-    ## the next is partially redundant
51
-    ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
52
-    ##                                        mode = "in")
53
-    ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
54
-    ## I am not sure if I should keep the last one. Redundant
55
-
56
-    ## This gives a single path and it is the first entry into each
57
-    ## destination. But we do not check there is no extinction afterwards.
58
-    ## The closest to the single Szendro LOD
59
-    if(end == "") {
60
-        ## Max is WT
61
-        lod_single <- "WT_is_end"
108
+    
109
+    if(end == initMutant) {
110
+        if(initMutant == "") {
111
+            stinitm <- "WT"
112
+        } else {
113
+            stinitm <- paste0("initMutant(", initMutant, ")")
114
+        }
115
+        lod <- paste0(stinitm, "_is_end")
62 116
     } else {
63
-        singlep <- pc$df
64
-        singlep[, 1] <- as.character(singlep[, 1])
65
-        singlep[, 2] <- as.character(singlep[, 2])
66
-        singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
67
-        singlep <- singlep[!duplicated(singlep[, 2]), ]
68
-        gsingle <- igraph::graph_from_data_frame(singlep)
69
-        lod_single <- igraph::all_simple_paths(gsingle, from = "", to = end, mode = "out")
70
-        if(length(lod_single) != 1) stop("lod_single != 1")
117
+        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
118
+                                              mode = "out")
119
+        if(length(all_paths) > 1)
120
+            stop("length(all_paths) > 1???")
121
+        lod <- igraph::as_ids(all_paths[[1]])
71 122
     }
72
-    return(list(all_paths = all_paths,
73
-                lod_single = lod_single[[1]])) ##, graph_to_end = graph_to_end[[1]]))
74
-                ## graph_phylog_clone = pcg))
123
+    if(initMutant != "")
124
+        attributes(lod)$initMutant <- initMutant
125
+    return(lod)
75 126
 }
76 127
 
77 128
 
... ...
@@ -80,30 +131,71 @@ POM.internal <- function(x) {
80 131
         warning("Missing needed components. This might be a failed simulation.",
81 132
                 " Returning NA.")
82 133
         return(NA)
134
+    } else {
135
+        x$other$POM
83 136
     }
84
-    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE], 1, which.max))$values]
85 137
 }
86 138
 
87
-## First do, over a set of simulations, sim:
88
-## l_lod_single <- mclapply(sim, LODs)
89
-## l_poms <- mclapply(sim, POM)
90 139
 
91 140
 
92 141
 diversityLOD <- function(llod) {
93
-    nn <- names(llod[[1]])
142
+    ## nn <- names(llod[[1]])
143
+    nn <- llod[[1]]
94 144
     if( is_null_na(nn) ||
95
-        !(nn == c("all_paths", "lod_single")))
145
+        !(is.list(llod)))
96 146
         stop("Object must be a list of LODs")
97
-    pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
147
+    pathstr <- unlist(lapply(llod, function(x) paste(x,
98 148
                                                      collapse = "_")))
99 149
     shannonI(table(pathstr))
100 150
 }
101 151
 
152
+## diversityLOD <- function(llod) {
153
+##     nn <- names(llod[[1]])
154
+##     if( is_null_na(nn) ||
155
+##         !(is.list(llod)))
156
+##         stop("Object must be a list of LODs")
157
+##     pathstr <- unlist(lapply(llod, function(x) paste(names(x),
158
+##                                                      collapse = "_")))
159
+##     shannonI(table(pathstr))
160
+## }
161
+
162
+LOD_as_path <- function(llod) {
163
+    path_l <- function(u) {
164
+        if(length(u) == 1) {
165
+            if(is.null(attributes(u)$initMutant))
166
+                initMutant <- ""
167
+            else
168
+                initMutant <- attributes(u)$initMutant
169
+            if(initMutant == "") initMutant <- "WT"
170
+            if(grepl("_is_end", u))
171
+                return(initMutant)
172
+            if(u == "No_descendants")
173
+                return(initMutant)
174
+        } else {
175
+            ## Deal with "" meaning WT
176
+            ## the_names <- names(u)
177
+            the_names <- u
178
+            the_names_wt <- which(the_names == "")
179
+            
180
+            if(length(the_names_wt)) {
181
+                if(length(the_names_wt) > 1) stop("more than 1 WT?!")
182
+                if(the_names_wt > 1) stop("WT in position not 1?!")
183
+                the_names[the_names_wt] <- "WT"
184
+            }
185
+            return(paste(the_names, collapse = " -> ")) 
186
+        }
187
+    }
188
+    if(!is.list(llod))
189
+        pathstr <- path_l(llod)
190
+    else
191
+        pathstr <- unlist(lapply(llod, path_l))
192
+    return(pathstr)
193
+}
194
+## We would just need a LOD_as_DAG
195
+
102 196
 diversityPOM <- function(lpom) {
103 197
     if(!inherits(lpom, "list"))
104 198
         stop("Object must be a list of POMs")
105
-    ## if(!inherits(x, "oncosimul_lod_list"))
106
-    ##     stop("This is not a list of POMs")
107 199
     pomstr <- unlist(lapply(lpom, function(x) paste(x, collapse = "_")))
108 200
     shannonI(table(pomstr))
109 201
 }
... ...
@@ -112,13 +204,12 @@ diversityPOM <- function(lpom) {
112 204
 diversity_POM <- diversityPOM
113 205
 diversity_LOD <- diversityLOD
114 206
 
115
-## POM.oncosimul2 <- POM.internal
116
-## LOD2.oncosimul2 <- LOD.internal
117 207
 
118 208
 POM <- function(x) {
119 209
     UseMethod("POM", x)
120 210
 }
121 211
 
212
+
122 213
 LOD <- function(x) {
123 214
     UseMethod("LOD", x)
124 215
 }
... ...
@@ -129,6 +220,9 @@ LOD.oncosimul2 <- function(x) return(LOD.internal(x))
129 220
 POM.oncosimulpop <- function(x) return(lapply(x, POM.internal))
130 221
 LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))
131 222
 
223
+
224
+
225
+
132 226
 ## POM.oncosimul2 <- function(x) {
133 227
 ##     out <- POM.internal(x)
134 228
 ##     class(out) <- c(class(out), "oncosimul_pom")
... ...
@@ -166,3 +260,169 @@ LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))
166 260
 ## }
167 261
 
168 262
 
263
+
264
+POM_pre_2.9.2 <- function(x) {
265
+    if(is.null(x$pops.by.time)) {
266
+        warning("Missing needed components. This might be a failed simulation.",
267
+                " Returning NA.")
268
+        return(NA)
269
+    }
270
+    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE],
271
+                                1, which.max))$values]
272
+}
273
+
274
+
275
+
276
+LOD.internal_pre_2.9.2 <- function(x, strict) {
277
+    ## Not identical to LOD of Szendro because:
278
+    
279
+    ##  a) I think we want all paths, not just their single LOD, which I
280
+    ##  think they might use out of convenience.
281
+
282
+    ##  b) For me it is a mess, a complicated mess, to use their LOD as
283
+    ##  they define it and there are many ambiguities in how to define it
284
+    ##  in continuous time.
285
+
286
+    ## This also means that single simulation might yield multiple LODs
287
+
288
+    ## keepEvents is FALSE to make this object as small as possible.
289
+    if(is.null(x$pops.by.time)) {
290
+        warning("Missing needed components. This might be a failed simulation.",
291
+                " Returning NA.")
292
+        return(list(all_paths = NA, lod_single = NA))
293
+    }
294
+    if(strict) {
295
+        if (!inherits(x, "oncosimul2")) 
296
+            stop("LOD information is only stored with v >= 2")
297
+        y <- filter_phylog_df_LOD(x$other$LOD_DF)
298
+        pc <- phcl_from_lod(y)
299
+    } else {
300
+        pc <- phylogClone(x, keepEvents = FALSE)
301
+    }
302
+    ## need eval for oncoSimulPop calls and for LOD_as_path
303
+    initMutant <- x$InitMutant
304
+    
305
+    if((length(pc) == 1) && (is.na(pc))) {
306
+        lodlist <- list(all_paths = NA,
307
+                        lod_single = "No_descendants")
308
+        ## bail out here. We do not need the rest.
309
+        if(initMutant != "")
310
+            attributes(lodlist)$initMutant <- initMutant
311
+        return(lodlist)
312
+    }
313
+    
314
+    pcg <- pc$graph
315
+    end <- genot_max(x)
316
+    
317
+    ## if(!is.null(eval(attributes(x)$call$initMutant))) {
318
+    ##     initMutant <- eval(attributes(s7)$call$initMutant)
319
+    ## } else {
320
+    ##     initMutant <- ""
321
+    ## }
322
+    ## browser()
323
+    if(end == initMutant) {
324
+        if(initMutant == "") {
325
+            stinitm <- "WT"
326
+        } else {
327
+            stinitm <- paste0("initMutant(", initMutant, ")")
328
+        }
329
+        lod_single <- paste0(stinitm, "_is_end")
330
+        all_paths <- list(lod_single)
331
+    } else {
332
+        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
333
+                                              mode = "out")
334
+       
335
+        if(!strict) {
336
+            ## the next is partially redundant
337
+            ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
338
+            ##                                        mode = "in")
339
+            ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
340
+            ## I am not sure if I should keep the last one. Redundant
341
+            
342
+            ## This gives a single path and it is the first entry into each
343
+            ## destination. But we do not check there is no extinction afterwards.
344
+            ## The closest to the single Szendro LOD
345
+            singlep <- pc$df
346
+            singlep[, 1] <- as.character(singlep[, 1])
347
+            singlep[, 2] <- as.character(singlep[, 2])
348
+            singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
349
+            singlep <- singlep[!duplicated(singlep[, 2]), ]
350
+            gsingle <- igraph::graph_from_data_frame(singlep)
351
+            lod_single <- igraph::all_simple_paths(gsingle, from = initMutant,
352
+                                                   to = end, mode = "out")
353
+            if(length(lod_single) != 1) stop("lod_single != 1")
354
+        }
355
+    }
356
+    if(strict) {
357
+        if(length(all_paths) > 1)
358
+            stop("length(all_paths) > 1???")
359
+        lodlist <- list(all_paths = NA,
360
+                    lod_single = all_paths[[1]])
361
+    } else {
362
+        lodlist <- list(all_paths = all_paths,
363
+                    lod_single = lod_single[[1]])
364
+    }
365
+    if(initMutant != "")
366
+        attributes(lodlist)$initMutant <- initMutant
367
+    return(lodlist)
368
+}
369
+
370
+
371
+
372
+## LOD_as_path_pre_2.9.2 <- function(llod) {
373
+##     path_l <- function(u) {
374
+##         if(length(u$lod_single) == 1) {
375
+##             if(is.null(attributes(u)$initMutant))
376
+##                 initMutant <- ""
377
+##             else
378
+##                 initMutant <- attributes(u)$initMutant
379
+##             if(initMutant == "") initMutant <- "WT"
380
+##             if(grepl("_is_end", u$lod_single))
381
+##                 return(initMutant)
382
+##             if(u$lod_single == "No_descendants")
383
+##                 return(initMutant)
384
+##         } else {
385
+##             ## Deal with "" meaning WT
386
+##             the_names <- names(u$lod_single)
387
+##             the_names_wt <- which(the_names == "")
388
+            
389
+##             if(length(the_names_wt)) {
390
+##                 if(length(the_names_wt) > 1) stop("more than 1 WT?!")
391
+##                 if(the_names_wt > 1) stop("WT in position not 1?!")
392
+##                 the_names[the_names_wt] <- "WT"
393
+##             }
394
+##             return(paste(the_names, collapse = " -> ")) 
395
+##             ## return(paste0("WT", paste(names(u$lod_single),
396
+##             ##                           collapse = " -> ")) )
397
+##         }
398
+##     }
399
+##     if(identical(names(llod), c("all_paths", "lod_single")))
400
+##         pathstr <- path_l(llod)
401
+##     else {
402
+##         ## should be a list
403
+##         pathstr <- unlist(lapply(llod, path_l))
404
+##     }
405
+##     return(pathstr)
406
+##     ## pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
407
+##     ##                                                  collapse = " -> ")))
408
+##     ## return(paste0("WT", pathstr))
409
+## }
410
+
411
+
412
+
413
+LOD.oncosimul2_pre_2.9.2 <- function(x, strict = TRUE)
414
+    return(LOD.internal_pre_2.9.2(x, strict))
415
+
416
+## LOD.oncosimulpop_pre_2.9.2 <- function(x, strict = TRUE)
417
+##     return(lapply(x, LOD.internal_pre_2.9.2, strict))
418
+
419
+
420
+
421
+
422
+
423
+
424
+## Note for self: we could get all the LODs per simulation in the strict
425
+## sense of those never becoming extinct if we subset the phylogClone
426
+## object to children in which if we arrive at the children at any two
427
+## times t and t+k, we retain only rows where any time > t is such that
428
+## the popsize is > 0. But this is not worth it now.
... ...
@@ -1475,7 +1475,7 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
1475 1475
     z <- which_N_at_T(x, N, t)
1476 1476
     tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
1477 1477
     ## sample size info at each period
1478
-
1478
+    ## FIXME: aren't this and the next warnings redundant or aliased?
1479 1479
     if( (length(tG) == 1) && (tG == "")) {
1480 1480
         warning("There never was a descendant of WT")
1481 1481
     }
... ...
@@ -53,9 +53,14 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
53 53
         ## columns names
54 54
         ## if( (is.null(colnames(x))) || any(grepl("^$", colnames(x))))
55 55
         ##    stop("Matrix x must have column names")
56
+
57
+        ## Major change as of flfast: no longer using from_genotype_fitness
56 58
         afe <- evalAllGenotypes(allFitnessEffects(
57
-            epistasis = from_genotype_fitness(x)),
59
+            genotFitness = x
60
+            ##, epistasis = from_genotype_fitness(x)
61
+        ),
58 62
             order = FALSE, addwt = TRUE, max = max_num_genotypes)
63
+
59 64
         ## Might not be needed with the proper gfm object (so gmf <- x)
60 65
         ## but is needed if arbitrary matrices.
61 66
         gfm <- allGenotypes_to_matrix(afe) 
... ...
@@ -96,8 +101,13 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
96 101
     return(list(gfm = gfm, afe = afe))
97 102
 }   
98 103
 
104
+## Based on from_genotype_fitness
105
+## but if we are passed a fitness landscapes as produced by
106
+## rfitness, do nothing
99 107
 
100
-from_genotype_fitness <- function(x) {
108
+to_genotFitness_std <- function(x, simplify = TRUE,
109
+                                min_filter_fitness = 1e-9,
110
+                                sort_gene_names = TRUE) {
101 111
     ## Would break with output from allFitnessEffects and
102 112
     ## output from allGenotypeAndMut
103 113
     
... ...
@@ -131,12 +141,47 @@ from_genotype_fitness <- function(x) {
131 141
         ## We are expecting here a matrix of 0/1 where columns are genes
132 142
         ## except for the last column, that is Fitness
133 143
         ## Of course, can ONLY work with epistastis, NOT order
134
-        return(genot_fitness_to_epistasis(x))
144
+        ## return(genot_fitness_to_epistasis(x))
145
+        if(any(duplicated(colnames(x))))
146
+            stop("duplicated column names")
147
+        
148
+        cnfl <- which(colnames(x)[-ncol(x)] == "")
149
+        if(length(cnfl)) {
150
+            freeletter <- setdiff(LETTERS, colnames(x))[1]
151
+            if(length(freeletter) == 0) stop("Renaiming failed")
152
+            warning("One column named ''. Renaming to ", freeletter)
153
+            colnames(x)[cnfl] <- freeletter
154
+        }
155
+        if(!is.null(colnames(x)) && sort_gene_names) {
156
+            ncx <- ncol(x)
157
+            cnx <- colnames(x)[-ncx]
158
+            ocnx <- gtools::mixedorder(cnx)
159
+            if(!(identical(cnx[ocnx], cnx))) {
160
+                message("Sorting gene column names alphabetically")
161
+                x <- cbind(x[, ocnx, drop = FALSE], Fitness = x[, (ncx)])
162
+            }
163
+        }
164
+        
165
+        if(is.null(colnames(x))) {
166
+            ncx <- (ncol(x) - 1)
167
+            message("No column names: assigning gene names from LETTERS")
168
+            if(ncx > length(LETTERS))
169
+                stop("More genes than LETTERS; please give gene names",
170
+                     " as you see fit.")
171
+            colnames(x) <- c(LETTERS[1:ncx], "Fitness")
172
+        }
173
+        
174
+        if(!all(as.matrix(x[, -ncol(x)]) %in% c(0, 1) ))
175
+            stop("First ncol - 1 entries not in {0, 1}.")
135 176
     } else {
136 177
         if(!inherits(x, "data.frame"))
137 178
             stop("genotFitness: if two-column must be data frame")
138 179
         ## Make sure no factors
139
-        if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
180
+        if(is.factor(x[, 1])) {
181
+            warning("First column of genotype fitness is a factor. ",
182
+                    "Converting to character.")
183
+            x[, 1] <- as.character(x[, 1])
184
+            }
140 185
         ## Make sure no numbers
141 186
         if(any(is.numeric(x[, 1])))
142 187
             stop(paste0("genotFitness: first column of data frame is numeric.",
... ...
@@ -168,16 +213,121 @@ from_genotype_fitness <- function(x) {
168 213
                 colnames(x) <- c("Genotype", "Fitness")
169 214
             }
170 215
             if((!omarker) && (!emarker) && (!nogoodepi)) {
171
-                message("All single-gene genotypes as input to from_genotype_fitness")
216
+                message("All single-gene genotypes as input to to_genotFitness_std")
172 217
             }
173 218
             ## Yes, we need to do this to  scale the fitness and put the "-"
174
-            return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
219
+            x <- allGenotypes_to_matrix(x)
175 220
         }
176 221
     }
222
+    ## And, yes, scale all fitnesses by that of the WT
223
+    whichroot <- which(rowSums(x[, -ncol(x), drop = FALSE]) == 0)
224
+    if(length(whichroot) == 0) {
225
+        warning("No wildtype in the fitness landscape!!! Adding it with fitness 1.")
226
+        x <- rbind(c(rep(0, ncol(x) - 1), 1), x)
227
+    } else if(x[whichroot, ncol(x)] != 1) {
228
+        warning("Fitness of wildtype != 1.",
229
+                " Dividing all fitnesses by fitness of wildtype.")
230
+        vwt <- x[whichroot, ncol(x)]
231
+        x[, ncol(x)] <- x[, ncol(x)]/vwt
232
+    }
233
+    if(any(is.na(x)))
234
+        stop("NAs in fitness matrix")
235
+    if(simplify) {
236
+        return(x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE])
237
+    } else {
238
+        return(x)
239
+    }
177 240
 }
178 241
 
242
+## Deprecated after flfast
243
+## to_genotFitness_std is faster and has better error checking
244
+## and is very similar and does not use
245
+## the genot_fitness_to_epistasis, which is not reasonable anymore.
179 246
 
180
-
247
+## from_genotype_fitness <- function(x) {
248
+##     ## Would break with output from allFitnessEffects and
249
+##     ## output from allGenotypeAndMut
250
+    
251
+##     ## For the very special and weird case of
252
+##     ## a matrix but only a single gene so with a 0 and 1
253
+##     ## No, this is a silly and meaningless case.
254
+##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
255
+    
256
+##     ## } else  blabla: 
257
+    
258
+##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
259
+##         stop("Input must inherit from matrix or data.frame.")
260
+    
261
+##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
262
+##     ##     stop(paste0("Genotype fitness input either two-column data frame",
263
+##     ##          " or a numeric matrix with > 2 columns."))
264
+##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
265
+##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
266
+##     ##                 " of a single gene. For this degenerate cases use",
267
+##     ##                 " a data frame specification."))
268
+    
269
+##     if(ncol(x) > 2) {
270
+##         if(inherits(x, "matrix")) {
271
+##             if(!is.numeric(x))
272
+##                 stop("A genotype fitness matrix/data.frame must be numeric.")
273
+##         } else if(inherits(x, "data.frame")) {
274
+##             if(!all(unlist(lapply(x, is.numeric))))
275
+##                 stop("A genotype fitness matrix/data.frame must be numeric.")
276
+##         }
277
+        
278
+##         ## We are expecting here a matrix of 0/1 where columns are genes
279
+##         ## except for the last column, that is Fitness
280
+##         ## Of course, can ONLY work with epistastis, NOT order
281
+##         return(genot_fitness_to_epistasis(x))
282
+##     } else {
283
+##         if(!inherits(x, "data.frame"))
284
+##             stop("genotFitness: if two-column must be data frame")
285
+##         ## Make sure no factors
286
+##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
287
+##         ## Make sure no numbers
288
+##         if(any(is.numeric(x[, 1])))
289
+##             stop(paste0("genotFitness: first column of data frame is numeric.",
290
+##                         " Ambiguous and suggests possible error. If sure,",
291
+##                         " enter that column as character"))
292
+        
293
+##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
294
+##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
295
+##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
296
+##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
297
+##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
298
+##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
299
+##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
300
+##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
301
+##         if(omarker) {
302
+##             ## do something. To be completed
303
+##             stop("This code not yet ready")
304
+##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
305
+##             ## involve epistasis and order. But they must have different
306
+##             ## genes. Otherwise, it is not manageable.
307
+##         }
308
+##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
309
+##             ## the second case above corresponds to passing just single letter genotypes
310
+##             ## as there is not a single marker
311
+##             x <- x[, c(1, 2), drop = FALSE]
312
+##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
313
+##                 message("Column names of object not Genotype and Fitness.",
314
+##                         " Renaming them assuming that is what you wanted")
315
+##                 colnames(x) <- c("Genotype", "Fitness")
316
+##             }
317
+##             if((!omarker) && (!emarker) && (!nogoodepi)) {
318
+##                 message("All single-gene genotypes as input to from_genotype_fitness")
319
+##             }
320
+##             ## Yes, we need to do this to  scale the fitness and put the "-"
321
+##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
322
+##         }
323
+##     }
324
+## }
325
+
326
+
327
+
328
+
329
+
330
+## No longer used for real
181 331
 genot_fitness_to_epistasis <- function(x) {
182 332
     ## FIXME future:
183 333
 
... ...
@@ -208,6 +358,7 @@ genot_fitness_to_epistasis <- function(x) {
208 358
     fwt <- 1
209 359
     if(length(wt) == 1)
210 360
         fwt <- f[wt]
361
+    ## No longer being used when we pass fitness landscapse: flfast
211 362
     if(!isTRUE(all.equal(fwt, 1))) {
212 363
         message("Fitness of wildtype != 1. ",
213 364
                 "Dividing all fitnesses by fitness of wildtype.")
... ...
@@ -245,6 +396,11 @@ allGenotypes_to_matrix <- function(x) {
245 396
     ## a matrix with 0/1 in a column for each gene and a final column of
246 397
     ## Fitness
247 398
 
399
+    if(is.factor(x[, 1])) {
400
+        warning("First column of genotype fitness is a factor. ",
401
+                "Converting to character.")
402
+        x[, 1] <- as.character(x[, 1])
403
+    }
248 404
     ## A WT can be specified with string "WT"
249 405
     anywt <- which(x[, 1] == "WT")
250 406
     if(length(anywt) > 1) stop("More than 1 WT")
... ...
@@ -253,6 +409,7 @@ allGenotypes_to_matrix <- function(x) {
253 409
         x <- x[-anywt, ]
254 410
         ## Trivial case of passing just a WT?
255 411
     } else {
412
+        warning("No WT genotype. Setting its fitness to 1.")
256 413
         fwt <- 1
257 414
     }
258 415
     splitted_genots <- lapply(x$Genotype,
... ...
@@ -25,6 +25,8 @@ allMutatorEffects <- function(epistasis = NULL,
25 25
                               keepInput = TRUE) {
26 26
     ## This is on purpose to prevent using a rT or orderEffects. Those are
27 27
     ## not tested to work with mutator.
28
+
29
+    ## Neither do we accept a fitness landscape object either for now.
28 30
     allFitnessORMutatorEffects(
29 31
         rT = NULL,
30 32
         epistasis = epistasis,
... ...
@@ -373,13 +373,18 @@ checkRT <- function(mdeps) {
373 373
 
374 374
 getNamesID <- function(fp) {
375 375
     ## Return a lookup table for names based on numeric IDs
376
-    idname <- c(fp$geneModule$GeneNumID,  fp$long.geneNoInt$GeneNumID)
377
-    names(idname) <- c(fp$geneModule$Gene, fp$long.geneNoInt$Gene)
376
+    idname <- c(fp$geneModule$GeneNumID,
377
+                fp$long.geneNoInt$GeneNumID,
378
+                fp$fitnessLandscape_gene_id$GeneNumID)
379
+    names(idname) <- c(fp$geneModule$Gene,
380
+                       fp$long.geneNoInt$Gene,
381
+                       fp$fitnessLandscape_gene_id$Gene)
378 382
     return(idname[-1]) ## remove Root!!
379 383
 }
380 384
 
381 385
 
382
-getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) {
386
+getGeneIDNum <- function(geneModule, geneNoInt, fitnessLandscape_gene_id,
387
+                         drv, sort = TRUE) {
383 388
     ## It returns the genes, as NumID, in the given vector with names drv
384 389
     ## initMutant uses this, for simplicity, without sorting, but noInt
385 390
     ## are always sorted
... ...
@@ -388,24 +393,26 @@ getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) {
388 393
 
389 394
     ## Yes, we must do it twice because we do not know before hand which
390 395
     ## is which. This makes sure no NA. Period.
391
-    if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene))))) {
396
+    if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene,
397
+                               fitnessLandscape_gene_id$Gene))))) {
392 398
         stop(paste("For driver or initMutant you have passed genes",
393 399
                    "not in the fitness table."))
394 400
     }
395 401
     
396 402
     indicesM <- as.vector(na.omit(match( drv, geneModule$Gene)))
397 403
     indicesI <- as.vector(na.omit(sort(match( drv, geneNoInt$Gene))))
404
+    indicesF <- as.vector(na.omit(sort(match( drv, fitnessLandscape_gene_id$Gene))))
398 405
     if(sort) {
399 406
         indicesM <- sort(indicesM)
400 407
     }
401 408
     return(c(
402 409
         geneModule$GeneNumID[indicesM],
403
-        geneNoInt$GeneNumID[indicesI])
410
+        geneNoInt$GeneNumID[indicesI],
411
+        fitnessLandscape_gene_id$GeneNumID[indicesF])
404 412
     )
405 413
 }
406 414
 
407 415
 
408
-
409 416
 allFitnessORMutatorEffects <- function(rT = NULL,
410 417
                                        epistasis = NULL,
411 418
                                        orderEffects = NULL,
... ...
@@ -413,6 +420,7 @@ allFitnessORMutatorEffects <- function(rT = NULL,
413 420
                                        geneToModule = NULL,
414 421
                                        drvNames = NULL,
415 422
                                        keepInput = TRUE,
423
+                                       genotFitness = NULL,
416 424
                                        ## refFE = NULL,
417 425
                                        calledBy = NULL) {
418 426
     ## From allFitnessEffects. Generalized so we deal with Fitness
... ...
@@ -442,7 +450,8 @@ allFitnessORMutatorEffects <- function(rT = NULL,
442 450
     
443 451
     if(calledBy == "allMutatorEffects") {
444 452
         ## very paranoid check
445
-        if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
453
+        if( !is.null(rT) || !is.null(orderEffects) ||
454
+            !is.null(drvNames) || !is.null(genotFitness))
446 455
             stop("allMutatorEffects called with forbidden arguments.",
447 456
                  "Is this an attempt to subvert the function?")
448 457
     }
... ...
@@ -551,9 +560,34 @@ allFitnessORMutatorEffects <- function(rT = NULL,
551 560
     } else {
552 561
         geneNoInt <- data.frame()
553 562
     }
554
-
563
+    
564
+    if(is.null(genotFitness)) {
565
+        genotFitness <- matrix(NA, nrow = 0, ncol = 1)
566
+        fitnessLandscape_df <- data.frame()
567
+        fitnessLandscape_gene_id <- data.frame()
568
+    } else {
569
+        ## Yes, I am duplicating stuff for now.
570
+        ## This makes life simpler in C++:
571
+        ## In the map, the key is the genotype name, as
572
+        ## cnn <- colnames(genotFitness)[-ncol(genotFitness)]
573
+        cnn <- 1:(ncol(genotFitness) - 1)
574
+        gfn <- apply(genotFitness[, -ncol(genotFitness), drop = FALSE], 1,
575
+                     function(x) paste(cnn[as.logical(x)],
576
+                                       collapse = ", "))
577
+        ## rownames(genotFitness) <- gfn
578
+        fitnessLandscape_df <-
579
+            data.frame(Genotype = gfn,
580
+                       Fitness = genotFitness[, ncol(genotFitness)],
581
+                       stringsAsFactors = FALSE)
582
+        fitnessLandscape_gene_id <- data.frame(
583
+            Gene = colnames(genotFitness)[-ncol(genotFitness)],
584
+            GeneNumID = cnn,
585
+            stringsAsFactors = FALSE)
586
+        
587
+    }
588
+    
555 589
     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
556
-             nrow(geneNoInt)) == 0)
590
+             nrow(geneNoInt) + nrow(genotFitness)) == 0)
557 591
         stop("You have specified nothing!")
558 592
 
559 593
     if(calledBy == "allFitnessEffects") {
... ...
@@ -566,7 +600,8 @@ allFitnessORMutatorEffects <- function(rT = NULL,
566 600
         graphE <- NULL
567 601
     }
568 602
     if(!is.null(drvNames)) {
569
-        drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
603
+        drv <- unique(getGeneIDNum(geneModule, geneNoInt, fitnessLandscape_gene_id,
604
+                                   drvNames))
570 605
         ## drivers should never be in the geneNoInt; Why!!!???
571 606
         ## Catch the problem. This is an overkill,
572 607
         ## so since we catch the issue, we could leave the geneNoInt. But
... ...
@@ -583,10 +618,11 @@ allFitnessORMutatorEffects <- function(rT = NULL,
583 618
         ## drv <- geneModule$GeneNumID[-1]
584 619
         drv <- vector(mode = "integer", length = 0L)
585 620
     }
586
-    
621
+  
587 622
     if(!keepInput) {
588 623
         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
589 624
     }
625
+
590 626
     out <- list(long.rt = long.rt,
591 627
                 long.epistasis = long.epistasis,
592 628
                 long.orderEffects = long.orderEffects,
... ...
@@ -599,7 +635,10 @@ allFitnessORMutatorEffects <- function(rT = NULL,
599 635
                 rT = rT,
600 636
                 epistasis = epistasis,
601 637
                 orderEffects = orderEffects,
602
-                noIntGenes = noIntGenes                
638
+                noIntGenes = noIntGenes,
639
+                fitnessLandscape = genotFitness,
640
+                fitnessLandscape_df = fitnessLandscape_df,
641
+                fitnessLandscape_gene_id = fitnessLandscape_gene_id
603 642
                 )
604 643
     if(calledBy == "allFitnessEffects") {
605 644
         class(out) <- c("fitnessEffects")
... ...
@@ -609,6 +648,210 @@ allFitnessORMutatorEffects <- function(rT = NULL,
609 648
     return(out)
610 649
 }
611 650
 
651
+## Former version, with fitness landscape
652
+## allFitnessORMutatorEffects <- function(rT = NULL,
653
+##                                        epistasis = NULL,
654
+##                                        orderEffects = NULL,
655
+##                                        noIntGenes = NULL,
656
+##                                        geneToModule = NULL,
657
+##                                        drvNames = NULL,
658
+##                                        keepInput = TRUE,
659
+##                                        ## refFE = NULL,
660
+##                                        calledBy = NULL) {
661
+##     ## From allFitnessEffects. Generalized so we deal with Fitness
662
+##     ## and mutator.
663
+    
664
+##     ## restrictions: the usual rt
665
+
666
+##     ## epistasis: as it says, with the ":"
667
+
668
+##     ## orderEffects: the ">"
669
+    
670
+##     ## All of the above can be genes or can be modules (if you pass a
671
+##     ## geneToModule)
672
+
673
+##     ## rest: rest of genes, with fitness
674
+
675
+
676
+##     ## For epistasis and order effects we create the output object but
677
+##     ## missing the numeric ids of genes. With rT we do it in one go, as we
678
+##     ## already know the mapping of genes to numeric ids. We could do the
679
+##     ## same in epistasis and order, but we would be splitting twice
680
+##     ## (whereas for rT extracting the names is very simple).
681
+
682
+##     ## called appropriately?
683
+##     if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
684
+##         stop("How did you call this function?. Bug.")
685
+    
686
+##     if(calledBy == "allMutatorEffects") {
687
+##         ## very paranoid check
688
+##         if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
689
+##             stop("allMutatorEffects called with forbidden arguments.",
690
+##                  "Is this an attempt to subvert the function?")
691
+##     }
692
+    
693
+##     rtNames <- NULL
694
+##     epiNames <- NULL
695
+##     orNames <- NULL
696
+##     if(!is.null(rT)) {
697
+##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
698
+##         rT$parent <- as.character(rT$parent)
699
+##         rT$child <- as.character(rT$child)
700
+##         rT$typeDep <- as.character(rT$typeDep)
701
+##         rtNames <- unique(c(rT$parent, rT$child))
702
+##     }
703
+##     if(!is.null(epistasis)) {
704
+##         long.epistasis <- to.long.epist.order(epistasis, ":")
705
+##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
706
+##         ## deal with the possible negative signs
707
+##         epiNames <- setdiff(unique(
708
+##             unlist(lapply(long.epistasis,
709
+##                           function(x) lapply(x$ids,
710
+##                                              function(z) strsplit(z, "^-"))))),
711
+##                             "")
712
+##     } else {
713
+##         long.epistasis <- list()
714
+##     }
715
+##     if(!is.null(orderEffects)) {
716
+##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
717
+##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
718
+##     } else {
719
+##         long.orderEffects <- list()
720
+##     }
721
+##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
722
+##     if(is.null(geneToModule)) {
723
+##         gMOneToOne <- TRUE
724
+##         geneToModule <- geneModuleNull(allModuleNames)
725
+##     } else {
726
+##         gMOneToOne <- FALSE
727
+##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
728
+##             stop(paste("Some values in geneToModule not present in any of",
729
+##                        " rT, epistasis, or order effects"))
730
+##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
731
+##             stop(paste("Some values in rT, epistasis, ",
732
+##                        "or order effects not in geneToModule"))
733
+##     }
734
+##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
735
+    
736
+##     idm <- unique(geneModule$ModuleNumID)
737
+##     names(idm) <- unique(geneModule$Module)
738
+
739
+##     if(!is.null(rT)) {
740
+##         checkRT(rT)
741
+##         long.rt <- to.long.rt(rT, idm)
742
+##     } else {
743
+##         long.rt <- list() ## yes, we want an object of length 0
744
+##     }
745
+
746
+##     ## Append the numeric ids to epistasis and order
747
+##     if(!is.null(epistasis)) {
748
+##         long.epistasis <- lapply(long.epistasis,
749
+##                                  function(x)
750
+##                                      addIntID.epist.order(x, idm,
751
+##                                                           sort = TRUE,
752
+##                                                           sign = TRUE))
753
+##     }
754
+##     if(!is.null(orderEffects)) {
755
+##         long.orderEffects <- lapply(long.orderEffects,
756
+##                                     function(x)
757
+##                                         addIntID.epist.order(x, idm,
758
+##                                                              sort = FALSE,
759
+##                                                              sign = FALSE))
760
+##     }
761
+    
762
+##     if(!is.null(noIntGenes)) {
763
+##         if(inherits(noIntGenes, "character")) {
764
+##             wm <- paste("noIntGenes is a character vector.",
765
+##                         "This is probably not what you want, and will",
766
+##                         "likely result in an error downstream.",
767
+##                         "You can get messages like",
768
+##                         " 'not compatible with requested type', and others.",
769
+##                         "We are stopping.")
770
+##             stop(wm)
771
+##         }
772
+            
773
+##         mg <- max(geneModule[, "GeneNumID"])
774
+##         gnum <- seq_along(noIntGenes) + mg
775
+##         if(!is.null(names(noIntGenes))) {
776
+##             ng <- names(noIntGenes)
777
+##             if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE)
778
+##                 || grepl(":", ng, fixed = TRUE))
779
+##                 stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
780
+##             if(any(ng %in% geneModule[, "Gene"] ))
781
+##                 stop("A gene in noIntGenes also present in the other terms")
782
+##             if(any(duplicated(ng)))
783
+##                 stop("Duplicated gene names in geneNoInt")
784
+##             if(any(is.na(ng)))
785
+##                 stop("In noIntGenes some genes have names, some don't.",
786
+##                      " Name all of them, or name none of them.")
787
+##         } else {
788
+##             ng <- gnum
789
+##         }
790
+##         geneNoInt <- data.frame(Gene = as.character(ng),
791
+##                                 GeneNumID = gnum,
792
+##                                 s = noIntGenes,
793
+##                                 stringsAsFactors = FALSE)
794
+##     } else {
795
+##         geneNoInt <- data.frame()
796
+##     }
797
+
798
+##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
799
+##              nrow(geneNoInt)) == 0)
800
+##         stop("You have specified nothing!")
801
+
802
+##     if(calledBy == "allFitnessEffects") {
803
+##         if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
804
+##             graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
805
+##         } else {
806
+##             graphE <- NULL
807
+##         }
808
+##     } else {
809
+##         graphE <- NULL
810
+##     }
811
+##     if(!is.null(drvNames)) {
812
+##         drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
813
+##         ## drivers should never be in the geneNoInt; Why!!!???
814
+##         ## Catch the problem. This is an overkill,
815
+##         ## so since we catch the issue, we could leave the geneNoInt. But
816
+##         ## that should not be there in this call.
817
+##         ## if(any(drvNames %in% geneNoInt$Gene)) {
818
+##         ##     stop(paste("At least one gene in drvNames is a geneNoInt gene.",
819
+##         ##                "That is not allowed.",
820
+##         ##                "If that gene is a driver, pass it as gene in the epistasis",
821
+##         ##                "component."))
822
+##         ## }
823
+##         ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
824
+##     } else {
825
+##         ## we used to have this default
826
+##         ## drv <- geneModule$GeneNumID[-1]
827
+##         drv <- vector(mode = "integer", length = 0L)
828
+##     }
829
+    
830
+##     if(!keepInput) {
831
+##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
832
+##     }
833
+##     out <- list(long.rt = long.rt,
834
+##                 long.epistasis = long.epistasis,
835
+##                 long.orderEffects = long.orderEffects,
836
+##                 long.geneNoInt = geneNoInt,
837
+##                 geneModule = geneModule,
838
+##                 gMOneToOne = gMOneToOne,
839
+##                 geneToModule = geneToModule,
840
+##                 graph = graphE,
841
+##                 drv = drv,
842
+##                 rT = rT,
843
+##                 epistasis = epistasis,
844
+##                 orderEffects = orderEffects,
845
+##                 noIntGenes = noIntGenes                
846
+##                 )
847
+##     if(calledBy == "allFitnessEffects") {
848
+##         class(out) <- c("fitnessEffects")
849
+##     } else if(calledBy == "allMutatorEffects") {
850
+##         class(out) <- c("mutatorEffects")
851
+##     }
852
+##     return(out)
853
+## }
854
+
612 855
 
613 856
 allFitnessEffects <- function(rT = NULL,
614 857
                               epistasis = NULL,
... ...
@@ -628,7 +871,11 @@ allFitnessEffects <- function(rT = NULL,
628 871
                  " you cannot pass any of rT, epistasis, orderEffects",
629 872
                  " noIntGenes or geneToModule.")
630 873
         }
631
-        epistasis <- from_genotype_fitness(genotFitness)
874
+
875
+        genotFitness_std <- to_genotFitness_std(genotFitness, simplify = TRUE)
876
+        ## epistasis <- from_genotype_fitness(genotFitness)
877
+    } else {
878
+        genotFitness_std <- NULL
632 879
     }
633 880
     allFitnessORMutatorEffects(
634 881
         rT = rT,
... ...
@@ -638,9 +885,41 @@ allFitnessEffects <- function(rT = NULL,
638 885
         geneToModule = geneToModule,
639 886
         drvNames = drvNames,
640 887
         keepInput = keepInput,
888
+        genotFitness = genotFitness_std,
641 889
         calledBy = "allFitnessEffects")
642 890
 }
643 891
 
892
+## Former version
893
+## allFitnessEffects <- function(rT = NULL,
894
+##                               epistasis = NULL,
895
+##                               orderEffects = NULL,
896
+##                               noIntGenes = NULL,
897
+##                               geneToModule = NULL,
898
+##                               drvNames = NULL,
899
+##                               genotFitness = NULL,
900
+##                               keepInput = TRUE) {
901
+
902
+##     if(!is.null(genotFitness)) {
903
+##         if(!is.null(rT) || !is.null(epistasis) ||
904
+##            !is.null(orderEffects) || !is.null(noIntGenes) ||
905
+##            !is.null(geneToModule)) {
906
+##             stop("You have a non-null genotFitness.",
907
+##                  " If you pass the complete genotype to fitness mapping",
908
+##                  " you cannot pass any of rT, epistasis, orderEffects",
909
+##                  " noIntGenes or geneToModule.")
910
+##         }
911
+##         epistasis <- from_genotype_fitness(genotFitness)
912
+##     }
913
+##     allFitnessORMutatorEffects(
914
+##         rT = rT,
915
+##         epistasis = epistasis,
916
+##         orderEffects = orderEffects,
917
+##         noIntGenes = noIntGenes,
918
+##         geneToModule = geneToModule,
919
+##         drvNames = drvNames,
920
+##         keepInput = keepInput,
921
+##         calledBy = "allFitnessEffects")
922
+## }
644 923
 
645 924
 ## allFitnessEffects <- function(rT = NULL,
646 925
 ##                               epistasis = NULL,
... ...
@@ -867,6 +1146,18 @@ evalGenotypeORMut <- function(genotype,
867 1146
 
868 1147
     if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") ))
869 1148
         stop("How did you call this function?. Bug.")
1149
+
1150
+    ## fmEffects could be a mutator effect
1151
+    if(!exists("fitnessLandscape_gene_id", where = fmEffects)) {
1152
+        fmEffects$fitnessLandscape_df <- data.frame()
1153
+        fmEffects$fitnessLandscape_gene_id <- data.frame()
1154
+    }
1155
+
1156
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1157
+        (nrow(fmEffects$fitnessLandscape_df) > 0)) {
1158
+        warning("Bozic model passing a fitness landscape will not work",
1159
+                    " for now.")
1160
+    }
870 1161
     
871 1162
     if(echo)
872 1163
         cat(paste("Genotype: ", genotype))
... ...
@@ -877,9 +1168,11 @@ evalGenotypeORMut <- function(genotype,
877 1168
             genotype <- nice.vector.eo(genotype, ",")
878 1169
         }
879 1170
         all.g.nums <- c(fmEffects$geneModule$GeneNumID,
880
-                        fmEffects$long.geneNoInt$GeneNumID)
1171
+                        fmEffects$long.geneNoInt$GeneNumID,
1172
+                        fmEffects$fitnessLandscape_gene_id$GeneNumID)
881 1173
         all.g.names <- c(fmEffects$geneModule$Gene,
882
-                         fmEffects$long.geneNoInt$Gene)
1174
+                         fmEffects$long.geneNoInt$Gene,
1175
+                         fmEffects$fitnessLandscape_gene_id$Gene)
883 1176
         genotype <- all.g.nums[match(genotype, all.g.names)]
884 1177
     }
885 1178
     if(any(is.na(genotype)))
... ...
@@ -939,6 +1232,17 @@ evalGenotypeFitAndMut <- function(genotype,
939 1232
                                   verbose = FALSE,
940 1233
                                   echo = FALSE,
941 1234
                                   model = "") {
1235
+    
1236
+    ## Must deal with objects from previous, pre flfast, modifications
1237
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1238
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1239
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1240
+    }
1241
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1242
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1243
+        warning("Bozic model passing a fitness landscape will not work",
1244
+                    " for now.")
1245
+    }
942 1246
     prodNeg <- FALSE
943 1247
     ## Next is from evalGenotypeAndMut
944 1248
     if(echo)
... ...
@@ -950,9 +1254,11 @@ evalGenotypeFitAndMut <- function(genotype,
950 1254
             genotype <- nice.vector.eo(genotype, ",")
951 1255
         }
952 1256
         all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
953
-                        fitnessEffects$long.geneNoInt$GeneNumID)
1257
+                        fitnessEffects$long.geneNoInt$GeneNumID,
1258
+                        fitnessEffects$fitnessLandscape_gene_id$GeneNumID)
954 1259
         all.g.names <- c(fitnessEffects$geneModule$Gene,
955
-                         fitnessEffects$long.geneNoInt$Gene)
1260
+                         fitnessEffects$long.geneNoInt$Gene,
1261
+                         fitnessEffects$fitnessLandscape_gene_id$Gene)
956 1262
         genotype <- all.g.nums[match(genotype, all.g.names)]
957 1263
     }
958 1264
     if(any(is.na(genotype)))
... ...
@@ -1050,6 +1356,8 @@ evalAllGenotypesORMut <- function(fmEffects,
1050 1356
         stop("You are trying to get the mutator effects of a fitness specification. ",
1051 1357
              "You did not pass an object of class mutatorEffects.")
1052 1358
 
1359
+    
1360
+    
1053 1361
     ## if(!minimal)
1054 1362
     allg <- generateAllGenotypes(fitnessEffects = fmEffects,
1055 1363
                                  order = order, max = max)
... ...
@@ -1133,6 +1441,17 @@ evalAllGenotypesORMut <- function(fmEffects,
1133 1441
 evalAllGenotypes <- function(fitnessEffects, order = FALSE, max = 256,
1134 1442
                              addwt = FALSE,
1135 1443
                              model = "") {
1444
+    ## Must deal with objects from previous, pre flfast, modifications
1445
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1446
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1447
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1448
+    }
1449
+
1450
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1451
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1452
+        warning("Bozic model passing a fitness landscape will not work",
1453
+                    " for now.")
1454
+    }
1136 1455
     evalAllGenotypesORMut(
1137 1456
         fmEffects = fitnessEffects,
1138 1457
         order = order,
... ...
@@ -1149,7 +1468,11 @@ generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) {
1149 1468
                                        function(x) choose(n, x) * factorial(x)))}
1150 1469
     else
1151 1470
         tot <- function(n) {2^n}
1152
-    nn <- nrow(fitnessEffects$geneModule) -1  + nrow(fitnessEffects$long.geneNoInt)
1471
+    
1472
+    nn <- nrow(fitnessEffects$geneModule) -1  +
1473
+        nrow(fitnessEffects$long.geneNoInt) +
1474
+        nrow(fitnessEffects$fitnessLandscape_gene_id)
1475
+    
1153 1476
     tnn <- tot(nn)
1154 1477
     if(tnn > max) {
1155 1478
         m <- paste("There are", tnn, "genotypes.")
... ...
@@ -1176,7 +1499,8 @@ generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) {
1176 1499
     }
1177 1500
     genotNums <- list.of.vectors(genotNums)
1178 1501
     names <- c(fitnessEffects$geneModule$Gene[-1],
1179
-               fitnessEffects$long.geneNoInt$Gene)
1502
+               fitnessEffects$long.geneNoInt$Gene,
1503
+               fitnessEffects$fitnessLandscape_gene_id$Gene)
1180 1504
     
1181 1505
     genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
1182 1506
                                 function(z)
... ...
@@ -1207,6 +1531,18 @@ evalAllGenotypesFitAndMut <- function(fitnessEffects, mutatorEffects,
1207 1531
     } else {
1208 1532
         prodNeg <- FALSE
1209 1533
     }
1534
+
1535
+
1536
+    ## Must deal with objects from previous, pre flfast, modifications
1537
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1538
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1539
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1540
+    }
1541
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1542
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1543
+        warning("Bozic model passing a fitness landscape will not work",
1544
+                    " for now.")
1545
+    } 
1210 1546
     
1211 1547
     full2mutator_ <- matchGeneIDs(mutatorEffects,
1212 1548
                                   fitnessEffects)$Reduced
... ...
@@ -1344,7 +1680,10 @@ plot.fitnessEffects <- function(x, type = "graphNEL",
1344 1680
     ## layout.reingold.tilford if really a tree
1345 1681
     ## o.w. it will use the default
1346 1682
     g <- x$graph
1347
-    
1683
+    if(is.null(g))
1684
+        stop("This fitnessEffects object can not be ploted this way.",
1685
+             " It is probably one with fitness landscape specification, ",
1686
+             " so you might want to plot the fitness landscape instead.")
1348 1687
     if(type == "igraph") {
1349 1688
         if(expandModules && (!x$gMOneToOne)) {
1350 1689
             ## vlabels <- fe$geneToModule[vertex.attributes(g)$name]
... ...
@@ -1462,6 +1801,14 @@ nr_oncoSimul.internal <- function(rFE,
1462 1801
              " there must be at least two gene/loci).")
1463 1802
     }
1464 1803
 
1804
+    ## Must deal with objects from previous, pre flfast, modifications
1805
+    ## Could move this way down to the bottom, right before
1806
+    ## .Call
1807
+    if(!exists("fitnessLandscape_gene_id", where = rFE)) {
1808
+        rFE$fitnessLandscape_df <- data.frame()
1809
+        rFE$fitnessLandscape_gene_id <- data.frame()
1810
+    }
1811
+    
1465 1812
     namedGenes <- allNamedGenes(rFE)
1466 1813
 
1467 1814
     if( length(mu) > 1) {
... ...
@@ -1489,13 +1836,15 @@ nr_oncoSimul.internal <- function(rFE,
1489 1836
                        "mutation rate in the human genome is about 1e-11 to 1e-9."))
1490 1837
     }
1491 1838
     if(!is.null(initMutant)) {
1839
+        initMutantString <- initMutant
1492 1840
        if(length(grep(">", initMutant))) {
1493 1841
             initMutant <- nice.vector.eo(initMutant, ">")
1494 1842
         } else if(length(grep(",", initMutant))) {
1495 1843
             initMutant <- nice.vector.eo(initMutant, ",")
1496 1844
         }
1497 1845
         initMutant <- getGeneIDNum(rFE$geneModule,
1498
-                             rFE$long.geneNoInt,
1846
+                                   rFE$long.geneNoInt,
1847
+                                   rFE$fitnessLandscape_gene_id,
1499 1848
                                    initMutant,
1500 1849
                              FALSE)
1501 1850
        if(length(initMutant) >= countGenesFe(rFE)) {
... ...
@@ -1505,6 +1854,7 @@ nr_oncoSimul.internal <- function(rFE,
1505 1854
        
1506 1855
     } else {
1507 1856
         initMutant <- vector(mode = "integer")
1857
+        initMutantString <- ""
1508 1858
     }
1509 1859
     ## these are never user options
1510 1860
     ## if(initSize_species < 10) {
... ...
@@ -1515,6 +1865,14 @@ nr_oncoSimul.internal <- function(rFE,
1515 1865
     ## }
1516 1866
 
1517 1867
     if(typeFitness %in% c("bozic1", "bozic2")) {
1868
+        if(nrow(rFE$fitnessLandscape_df) > 0)
1869
+            warning("Bozic model passing a fitness landscape will not work",
1870
+                    " for now.")
1871
+        ## FIXME: bozic and fitness landscape
1872
+        ## the issue is that in the C++ code we directly do
1873
+        ## s = birth rate - 1
1874
+        ## but we would need something different
1875
+        ## Can be done going through epistasis, etc.
1518 1876
         thesh <- unlist(lapply(rFE$long.rt, function(x) x$sh))
1519 1877
         ## thes <- unlist(lapply(rFE$long.rt, function(x) x$s))
1520 1878
         thes <- unlist(c(lapply(rFE$long.rt, function(x) x$s),
... ...
@@ -1602,7 +1960,6 @@ nr_oncoSimul.internal <- function(rFE,
1602 1960
         fixation_list <- list()
1603 1961
     }
1604 1962
 
1605
-    
1606 1963
     return(c(
1607 1964
         nr_BNB_Algo5(rFE = rFE,
1608 1965
                      mu_ = mu,
... ...
@@ -1642,14 +1999,21 @@ nr_oncoSimul.internal <- function(rFE,
1642 1999
                      AND_DrvProbExit = AND_DrvProbExit,
1643 2000
                      fixation_list = fixation_list),
1644 2001
         Drivers = list(rFE$drv), ## but when doing pops, these will be repeated
1645
-        geneNames = list(names(getNamesID(rFE)))
2002
+        geneNames = list(names(getNamesID(rFE))),
2003
+        InitMutant = initMutantString
1646 2004
     ))
1647 2005
 }
1648 2006
 
1649 2007
 
1650 2008
 countGenesFe <- function(fe) {
1651 2009
     ## recall geneModule has Root always
1652
-    nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1
2010
+    ## We want to be able to use objects that did not have
2011
+    ## the fitness landscape component
2012
+    if(exists("fitnessLandscape_gene_id", where = fe))
2013
+        return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) +
2014
+            nrow(fe$fitnessLandscape_gene_id) - 1)
2015
+    else
2016
+        return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1)
1653 2017
 }
1654 2018
 
1655 2019
 allNamedGenes <- function(fe){
... ...
@@ -1668,14 +2032,29 @@ allNamedGenes <- function(fe){
1668 2032
     ##         stop("When using per-gene mutation rates the ",
1669 2033
     ##              "no interaction genes must be named ",
1670 2034
     ##              "(i.e., the noIntGenes vector must have names).")
1671
-    
1672
-    v1 <- fe$geneModule[, c("Gene", "GeneNumID")]
1673
-    if(nrow(fe$long.geneNoInt)) {
1674
-        v1 <- rbind(v1,
1675
-                    fe$long.geneNoInt[, c("Gene", "GeneNumID")])
2035
+
2036
+    ## accommodate objects w/o fitnessLandscape
2037
+    if(!is.null(fe$fitnessLandscape) && nrow(fe$fitnessLandscape)) {
2038
+        gn <-
2039
+            gtools::mixedsort(
2040
+                        colnames(fe$fitnessLandscape)[-ncol(fe$fitnessLandscape)])
2041
+        v1 <- data.frame(Gene = gn, GeneNumID = seq.int(length(gn)),
2042
+                        stringsAsFactors = FALSE)
2043
+    } else {
2044
+        v1 <- fe$geneModule[, c("Gene", "GeneNumID")]
2045
+        if(nrow(fe$long.geneNoInt)) {
2046
+            v1 <- rbind(v1,
2047
+                        fe$long.geneNoInt[, c("Gene", "GeneNumID")])
2048
+        }
2049
+        if(any(v1[, "Gene"] == "Root"))
2050
+            v1 <- v1[-which(v1[, "Gene"] == "Root"), ]
1676 2051
     }
1677
-    v1 <- v1[-which(v1[, "Gene"] == "Root"), ]
1678 2052
     rownames(v1) <- NULL
2053
+    if(any(v1$Gene == "WT")) {
2054
+        warning("A gene is named WT. You can expect problems ",
2055
+                "because we use WT to denote the wildtype ",
2056
+                "genotype. You might want to change it.")
2057
+    }
1679 2058
     return(v1)
1680 2059
 }
1681 2060
 
... ...
@@ -1,3 +1,11 @@
1
+Changes in version 2.9.2 (2017-11-24):
2
+	- LOD: using only the strict Szendro et al. meaning.