Browse code

v. 2.9.2 - LOD: using only the strict Szendro et al. meaning. - POM: computed in C++. - Using fitness landscape directly when given as input (no conversion to epistasis)

ramon diaz-uriarte (at Phelsuma) authored on 24/11/2017 12:41:48
Showing36 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.9.0
5
-Date: 2017-09-27
4
+Version: 2.9.2
5
+Date: 2017-11-24
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -51,8 +51,8 @@ importFrom("igraph", igraph.to.graphNEL, graph.data.frame, V, E,
51 51
 import(graph)
52 52
 import(Rgraphviz)
53 53
 importFrom("parallel", mclapply, detectCores, mcMap)
54
-importFrom("gtools", combinations, permutations)
55
-
54
+importFrom("gtools", combinations, permutations, mixedorder)
55
+## importFrom("compare", compare)
56 56
 importFrom("graphics", "axis", "box", "legend", "matplot", "par", "polygon")
57 57
 importFrom("methods", "as")
58 58
 importFrom("stats", "na.omit", "runif", "smooth.spline")
... ...
@@ -1,4 +1,4 @@
1
-## Copyright 2016 Ramon Diaz-Uriarte
1
+## Copyright 2016, 2017 Ramon Diaz-Uriarte
2 2
 
3 3
 ## This program is free software: you can redistribute it and/or modify
4 4
 ## it under the terms of the GNU General Public License as published by
... ...
@@ -14,64 +14,115 @@
14 14
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 15
 
16 16
 
17
+## Note that, in contrast to POM, LOD is not well defined if the population
18
+## becomes extinct.
17 19
 
18 20
 ## Functions to obtain LOD and POM similar to Szendro et al., 2014, PNAS.
19 21
 genot_max <- function(x) {
20 22
     x$GenotypesLabels[which.max(x$pops.by.time[nrow(x$pops.by.time), -1])]
21 23
 }
22 24
 
23
-LOD.internal <- function(x) {
24
-    ## Not identical to LOD of Szendro because:
25
-    
26
-    ##  a) I think we want all paths, not just their single LOD, which I
27
-    ##  think they might use out of convenience.
28 25
 
29
-    ##  b) For me it is a mess, a complicated mess, to use their LOD as
30
-    ##  they define it and there are many ambiguities in how to define it
31
-    ##  in continuous time.
32 26
 
33
-    ## This also means that single simulation might yield multiple LODs
27
+## Filter the PhylogDF so we obtain LOD, sensu stricto.
34 28
 
35
-    ## keepEvents is FALSE to make this object as small as possible.
29
+## No longer necessary with LOD from C++ removing duplicates
30
+## ## Now this is coming from LOD_DF, which already only has
31
+## ## implicitly pop_size_child == 0
32
+## Only used in one function use for testing
33
+filter_phylog_df_LOD <- function(y) {
34
+    keep <- !rev(duplicated(rev(y$child)))
35
+    return(y[keep, ])
36
+}
37
+
38
+
39
+
40
+## ## Filter the PhylogDF so we obtain LOD, sensu stricto.
41
+## ## For the old version
42
+## filter_phylog_df_LOD_with_n <- function(y) {
43
+##     y <- y[y$pop_size_child == 0, , drop = FALSE]
44
+##     keep <- !rev(duplicated(rev(y$child)))
45
+##     return(y[keep, ])
46
+## }
47
+
48
+
49
+## from phylogClone, key parts for the LOD strict structure
50
+phcl_from_lod <- function(df, x) {
51
+    ## no longer necessary
52
+    ## ## the !keepEvents. Which I move here to speed things up.
53
+    ## df <- df[!duplicated(df[, c(1, 2)]), , drop = FALSE]
54
+    
55
+    tG <- unique(c(as.character(df[, 1]), as.character(df[, 2])))
56
+    ## ## Do as in phylogClone. So that we have the same nodes
57
+    ## ## in LOD all and not LOD all?
58
+    ## z <- which_N_at_T(x, N = 1, t = "last")
59
+    ## tG <- x$GenotypesLabels[z]
60
+    
61
+    ## ## FIXME: aren't these two warnings redundant or aliased?
62
+    ## ## yes, partlt
63
+    ##  I think this can never happen now
64
+    ## if ((length(tG) == 1) && (tG == "")) {
65
+    ##     warning("There never was a descendant of WT")
66
+    ## }
67
+    if (nrow(df) == 0) {
68
+        warning("LOD structure has 0 rows: no descendants of initMutant ever appeared. ")
69
+        return(NA)
70
+    }
71
+    g <- igraph::graph.data.frame(df[, c(1, 2)])
72
+    nodesInP <- unique(unlist(igraph::neighborhood(g, order = 1e+09, 
73
+                                                   nodes = tG, mode = "in")))
74
+    allLabels <- unique(as.character(unlist(df[, c(1, 2)])))
75
+    nodesRm <- setdiff(allLabels, V(g)$name[nodesInP])
76
+    g <- igraph::delete.vertices(g, nodesRm)
77
+    tmp <- list(graph = g, df = df)
78
+    class(tmp) <- c(class(tmp), "phylogClone")
79
+    return(tmp)
80
+}
81
+
82
+LOD.internal <- function(x) {
36 83
     if(is.null(x$pops.by.time)) {
37 84
         warning("Missing needed components. This might be a failed simulation.",
38 85
                 " Returning NA.")
39 86
         return(list(all_paths = NA, lod_single = NA))
40 87
     }
41
-    pc <- phylogClone(x, keepEvents = FALSE)
42
-    
88
+    if (!inherits(x, "oncosimul2")) 
89
+        stop("LOD information is only stored with v >= 2")
90
+    ## y <- filter_phylog_df_LOD(x$other$LOD_DF)
91
+    y <- x$other$LOD_DF
92
+    pc <- phcl_from_lod(y)
93
+    ## need eval for oncoSimulPop calls and for LOD_as_path
94
+    initMutant <- x$InitMutant
95
+
96
+    ## No descendants means that: never a descendant.
97
+    ## Note the same that the init mutant be the final state.
43 98
     if((length(pc) == 1) && (is.na(pc))) {
44
-        return(list(all_paths = NA,
45
-                    lod_single = "No_descendants"))
99
+        lod <- "No_descendants"
100
+        ## bail out here. We do not need the rest.
101
+        if(initMutant != "")
102
+            attributes(lod)$initMutant <- initMutant
103
+        return(lod)
46 104
     }
105
+    
47 106
     pcg <- pc$graph
48 107
     end <- genot_max(x)
49
-    all_paths <- igraph::all_simple_paths(pcg, from = "", to = end, mode = "out")
50
-    ## the next is partially redundant
51
-    ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
52
-    ##                                        mode = "in")
53
-    ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
54
-    ## I am not sure if I should keep the last one. Redundant
55
-
56
-    ## This gives a single path and it is the first entry into each
57
-    ## destination. But we do not check there is no extinction afterwards.
58
-    ## The closest to the single Szendro LOD
59
-    if(end == "") {
60
-        ## Max is WT
61
-        lod_single <- "WT_is_end"
108
+    
109
+    if(end == initMutant) {
110
+        if(initMutant == "") {
111
+            stinitm <- "WT"
112
+        } else {
113
+            stinitm <- paste0("initMutant(", initMutant, ")")
114
+        }
115
+        lod <- paste0(stinitm, "_is_end")
62 116
     } else {
63
-        singlep <- pc$df
64
-        singlep[, 1] <- as.character(singlep[, 1])
65
-        singlep[, 2] <- as.character(singlep[, 2])
66
-        singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
67
-        singlep <- singlep[!duplicated(singlep[, 2]), ]
68
-        gsingle <- igraph::graph_from_data_frame(singlep)
69
-        lod_single <- igraph::all_simple_paths(gsingle, from = "", to = end, mode = "out")
70
-        if(length(lod_single) != 1) stop("lod_single != 1")
117
+        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
118
+                                              mode = "out")
119
+        if(length(all_paths) > 1)
120
+            stop("length(all_paths) > 1???")
121
+        lod <- igraph::as_ids(all_paths[[1]])
71 122
     }
72
-    return(list(all_paths = all_paths,
73
-                lod_single = lod_single[[1]])) ##, graph_to_end = graph_to_end[[1]]))
74
-                ## graph_phylog_clone = pcg))
123
+    if(initMutant != "")
124
+        attributes(lod)$initMutant <- initMutant
125
+    return(lod)
75 126
 }
76 127
 
77 128
 
... ...
@@ -80,30 +131,71 @@ POM.internal <- function(x) {
80 131
         warning("Missing needed components. This might be a failed simulation.",
81 132
                 " Returning NA.")
82 133
         return(NA)
134
+    } else {
135
+        x$other$POM
83 136
     }
84
-    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE], 1, which.max))$values]
85 137
 }
86 138
 
87
-## First do, over a set of simulations, sim:
88
-## l_lod_single <- mclapply(sim, LODs)
89
-## l_poms <- mclapply(sim, POM)
90 139
 
91 140
 
92 141
 diversityLOD <- function(llod) {
93
-    nn <- names(llod[[1]])
142
+    ## nn <- names(llod[[1]])
143
+    nn <- llod[[1]]
94 144
     if( is_null_na(nn) ||
95
-        !(nn == c("all_paths", "lod_single")))
145
+        !(is.list(llod)))
96 146
         stop("Object must be a list of LODs")
97
-    pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
147
+    pathstr <- unlist(lapply(llod, function(x) paste(x,
98 148
                                                      collapse = "_")))
99 149
     shannonI(table(pathstr))
100 150
 }
101 151
 
152
+## diversityLOD <- function(llod) {
153
+##     nn <- names(llod[[1]])
154
+##     if( is_null_na(nn) ||
155
+##         !(is.list(llod)))
156
+##         stop("Object must be a list of LODs")
157
+##     pathstr <- unlist(lapply(llod, function(x) paste(names(x),
158
+##                                                      collapse = "_")))
159
+##     shannonI(table(pathstr))
160
+## }
161
+
162
+LOD_as_path <- function(llod) {
163
+    path_l <- function(u) {
164
+        if(length(u) == 1) {
165
+            if(is.null(attributes(u)$initMutant))
166
+                initMutant <- ""
167
+            else
168
+                initMutant <- attributes(u)$initMutant
169
+            if(initMutant == "") initMutant <- "WT"
170
+            if(grepl("_is_end", u))
171
+                return(initMutant)
172
+            if(u == "No_descendants")
173
+                return(initMutant)
174
+        } else {
175
+            ## Deal with "" meaning WT
176
+            ## the_names <- names(u)
177
+            the_names <- u
178
+            the_names_wt <- which(the_names == "")
179
+            
180
+            if(length(the_names_wt)) {
181
+                if(length(the_names_wt) > 1) stop("more than 1 WT?!")
182
+                if(the_names_wt > 1) stop("WT in position not 1?!")
183
+                the_names[the_names_wt] <- "WT"
184
+            }
185
+            return(paste(the_names, collapse = " -> ")) 
186
+        }
187
+    }
188
+    if(!is.list(llod))
189
+        pathstr <- path_l(llod)
190
+    else
191
+        pathstr <- unlist(lapply(llod, path_l))
192
+    return(pathstr)
193
+}
194
+## We would just need a LOD_as_DAG
195
+
102 196
 diversityPOM <- function(lpom) {
103 197
     if(!inherits(lpom, "list"))
104 198
         stop("Object must be a list of POMs")
105
-    ## if(!inherits(x, "oncosimul_lod_list"))
106
-    ##     stop("This is not a list of POMs")
107 199
     pomstr <- unlist(lapply(lpom, function(x) paste(x, collapse = "_")))
108 200
     shannonI(table(pomstr))
109 201
 }
... ...
@@ -112,13 +204,12 @@ diversityPOM <- function(lpom) {
112 204
 diversity_POM <- diversityPOM
113 205
 diversity_LOD <- diversityLOD
114 206
 
115
-## POM.oncosimul2 <- POM.internal
116
-## LOD2.oncosimul2 <- LOD.internal
117 207
 
118 208
 POM <- function(x) {
119 209
     UseMethod("POM", x)
120 210
 }
121 211
 
212
+
122 213
 LOD <- function(x) {
123 214
     UseMethod("LOD", x)
124 215
 }
... ...
@@ -129,6 +220,9 @@ LOD.oncosimul2 <- function(x) return(LOD.internal(x))
129 220
 POM.oncosimulpop <- function(x) return(lapply(x, POM.internal))
130 221
 LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))
131 222
 
223
+
224
+
225
+
132 226
 ## POM.oncosimul2 <- function(x) {
133 227
 ##     out <- POM.internal(x)
134 228
 ##     class(out) <- c(class(out), "oncosimul_pom")
... ...
@@ -166,3 +260,169 @@ LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))
166 260
 ## }
167 261
 
168 262
 
263
+
264
+POM_pre_2.9.2 <- function(x) {
265
+    if(is.null(x$pops.by.time)) {
266
+        warning("Missing needed components. This might be a failed simulation.",
267
+                " Returning NA.")
268
+        return(NA)
269
+    }
270
+    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE],
271
+                                1, which.max))$values]
272
+}
273
+
274
+
275
+
276
+LOD.internal_pre_2.9.2 <- function(x, strict) {
277
+    ## Not identical to LOD of Szendro because:
278
+    
279
+    ##  a) I think we want all paths, not just their single LOD, which I
280
+    ##  think they might use out of convenience.
281
+
282
+    ##  b) For me it is a mess, a complicated mess, to use their LOD as
283
+    ##  they define it and there are many ambiguities in how to define it
284
+    ##  in continuous time.
285
+
286
+    ## This also means that single simulation might yield multiple LODs
287
+
288
+    ## keepEvents is FALSE to make this object as small as possible.
289
+    if(is.null(x$pops.by.time)) {
290
+        warning("Missing needed components. This might be a failed simulation.",
291
+                " Returning NA.")
292
+        return(list(all_paths = NA, lod_single = NA))
293
+    }
294
+    if(strict) {
295
+        if (!inherits(x, "oncosimul2")) 
296
+            stop("LOD information is only stored with v >= 2")
297
+        y <- filter_phylog_df_LOD(x$other$LOD_DF)
298
+        pc <- phcl_from_lod(y)
299
+    } else {
300
+        pc <- phylogClone(x, keepEvents = FALSE)
301
+    }
302
+    ## need eval for oncoSimulPop calls and for LOD_as_path
303
+    initMutant <- x$InitMutant
304
+    
305
+    if((length(pc) == 1) && (is.na(pc))) {
306
+        lodlist <- list(all_paths = NA,
307
+                        lod_single = "No_descendants")
308
+        ## bail out here. We do not need the rest.
309
+        if(initMutant != "")
310
+            attributes(lodlist)$initMutant <- initMutant
311
+        return(lodlist)
312
+    }
313
+    
314
+    pcg <- pc$graph
315
+    end <- genot_max(x)
316
+    
317
+    ## if(!is.null(eval(attributes(x)$call$initMutant))) {
318
+    ##     initMutant <- eval(attributes(s7)$call$initMutant)
319
+    ## } else {
320
+    ##     initMutant <- ""
321
+    ## }
322
+    ## browser()
323
+    if(end == initMutant) {
324
+        if(initMutant == "") {
325
+            stinitm <- "WT"
326
+        } else {
327
+            stinitm <- paste0("initMutant(", initMutant, ")")
328
+        }
329
+        lod_single <- paste0(stinitm, "_is_end")
330
+        all_paths <- list(lod_single)
331
+    } else {
332
+        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
333
+                                              mode = "out")
334
+       
335
+        if(!strict) {
336
+            ## the next is partially redundant
337
+            ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
338
+            ##                                        mode = "in")
339
+            ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
340
+            ## I am not sure if I should keep the last one. Redundant
341
+            
342
+            ## This gives a single path and it is the first entry into each
343
+            ## destination. But we do not check there is no extinction afterwards.
344
+            ## The closest to the single Szendro LOD
345
+            singlep <- pc$df
346
+            singlep[, 1] <- as.character(singlep[, 1])
347
+            singlep[, 2] <- as.character(singlep[, 2])
348
+            singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
349
+            singlep <- singlep[!duplicated(singlep[, 2]), ]
350
+            gsingle <- igraph::graph_from_data_frame(singlep)
351
+            lod_single <- igraph::all_simple_paths(gsingle, from = initMutant,
352
+                                                   to = end, mode = "out")
353
+            if(length(lod_single) != 1) stop("lod_single != 1")
354
+        }
355
+    }
356
+    if(strict) {
357
+        if(length(all_paths) > 1)
358
+            stop("length(all_paths) > 1???")
359
+        lodlist <- list(all_paths = NA,
360
+                    lod_single = all_paths[[1]])
361
+    } else {
362
+        lodlist <- list(all_paths = all_paths,
363
+                    lod_single = lod_single[[1]])
364
+    }
365
+    if(initMutant != "")
366
+        attributes(lodlist)$initMutant <- initMutant
367
+    return(lodlist)
368
+}
369
+
370
+
371
+
372
+## LOD_as_path_pre_2.9.2 <- function(llod) {
373
+##     path_l <- function(u) {
374
+##         if(length(u$lod_single) == 1) {
375
+##             if(is.null(attributes(u)$initMutant))
376
+##                 initMutant <- ""
377
+##             else
378
+##                 initMutant <- attributes(u)$initMutant
379
+##             if(initMutant == "") initMutant <- "WT"
380
+##             if(grepl("_is_end", u$lod_single))
381
+##                 return(initMutant)
382
+##             if(u$lod_single == "No_descendants")
383
+##                 return(initMutant)
384
+##         } else {
385
+##             ## Deal with "" meaning WT
386
+##             the_names <- names(u$lod_single)
387
+##             the_names_wt <- which(the_names == "")
388
+            
389
+##             if(length(the_names_wt)) {
390
+##                 if(length(the_names_wt) > 1) stop("more than 1 WT?!")
391
+##                 if(the_names_wt > 1) stop("WT in position not 1?!")
392
+##                 the_names[the_names_wt] <- "WT"
393
+##             }
394
+##             return(paste(the_names, collapse = " -> ")) 
395
+##             ## return(paste0("WT", paste(names(u$lod_single),
396
+##             ##                           collapse = " -> ")) )
397
+##         }
398
+##     }
399
+##     if(identical(names(llod), c("all_paths", "lod_single")))
400
+##         pathstr <- path_l(llod)
401
+##     else {
402
+##         ## should be a list
403
+##         pathstr <- unlist(lapply(llod, path_l))
404
+##     }
405
+##     return(pathstr)
406
+##     ## pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
407
+##     ##                                                  collapse = " -> ")))
408
+##     ## return(paste0("WT", pathstr))
409
+## }
410
+
411
+
412
+
413
+LOD.oncosimul2_pre_2.9.2 <- function(x, strict = TRUE)
414
+    return(LOD.internal_pre_2.9.2(x, strict))
415
+
416
+## LOD.oncosimulpop_pre_2.9.2 <- function(x, strict = TRUE)
417
+##     return(lapply(x, LOD.internal_pre_2.9.2, strict))
418
+
419
+
420
+
421
+
422
+
423
+
424
+## Note for self: we could get all the LODs per simulation in the strict
425
+## sense of those never becoming extinct if we subset the phylogClone
426
+## object to children in which if we arrive at the children at any two
427
+## times t and t+k, we retain only rows where any time > t is such that
428
+## the popsize is > 0. But this is not worth it now.
... ...
@@ -1475,7 +1475,7 @@ phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
1475 1475
     z <- which_N_at_T(x, N, t)
1476 1476
     tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
1477 1477
     ## sample size info at each period
1478
-
1478
+    ## FIXME: aren't this and the next warnings redundant or aliased?
1479 1479
     if( (length(tG) == 1) && (tG == "")) {
1480 1480
         warning("There never was a descendant of WT")
1481 1481
     }
... ...
@@ -53,9 +53,14 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
53 53
         ## columns names
54 54
         ## if( (is.null(colnames(x))) || any(grepl("^$", colnames(x))))
55 55
         ##    stop("Matrix x must have column names")
56
+
57
+        ## Major change as of flfast: no longer using from_genotype_fitness
56 58
         afe <- evalAllGenotypes(allFitnessEffects(
57
-            epistasis = from_genotype_fitness(x)),
59
+            genotFitness = x
60
+            ##, epistasis = from_genotype_fitness(x)
61
+        ),
58 62
             order = FALSE, addwt = TRUE, max = max_num_genotypes)
63
+
59 64
         ## Might not be needed with the proper gfm object (so gmf <- x)
60 65
         ## but is needed if arbitrary matrices.
61 66
         gfm <- allGenotypes_to_matrix(afe) 
... ...
@@ -96,8 +101,13 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
96 101
     return(list(gfm = gfm, afe = afe))
97 102
 }   
98 103
 
104
+## Based on from_genotype_fitness
105
+## but if we are passed a fitness landscapes as produced by
106
+## rfitness, do nothing
99 107
 
100
-from_genotype_fitness <- function(x) {
108
+to_genotFitness_std <- function(x, simplify = TRUE,
109
+                                min_filter_fitness = 1e-9,
110
+                                sort_gene_names = TRUE) {
101 111
     ## Would break with output from allFitnessEffects and
102 112
     ## output from allGenotypeAndMut
103 113
     
... ...
@@ -131,12 +141,47 @@ from_genotype_fitness <- function(x) {
131 141
         ## We are expecting here a matrix of 0/1 where columns are genes
132 142
         ## except for the last column, that is Fitness
133 143
         ## Of course, can ONLY work with epistastis, NOT order
134
-        return(genot_fitness_to_epistasis(x))
144
+        ## return(genot_fitness_to_epistasis(x))
145
+        if(any(duplicated(colnames(x))))
146
+            stop("duplicated column names")
147
+        
148
+        cnfl <- which(colnames(x)[-ncol(x)] == "")
149
+        if(length(cnfl)) {
150
+            freeletter <- setdiff(LETTERS, colnames(x))[1]
151
+            if(length(freeletter) == 0) stop("Renaiming failed")
152
+            warning("One column named ''. Renaming to ", freeletter)
153
+            colnames(x)[cnfl] <- freeletter
154
+        }
155
+        if(!is.null(colnames(x)) && sort_gene_names) {
156
+            ncx <- ncol(x)
157
+            cnx <- colnames(x)[-ncx]
158
+            ocnx <- gtools::mixedorder(cnx)
159
+            if(!(identical(cnx[ocnx], cnx))) {
160
+                message("Sorting gene column names alphabetically")
161
+                x <- cbind(x[, ocnx, drop = FALSE], Fitness = x[, (ncx)])
162
+            }
163
+        }
164
+        
165
+        if(is.null(colnames(x))) {
166
+            ncx <- (ncol(x) - 1)
167
+            message("No column names: assigning gene names from LETTERS")
168
+            if(ncx > length(LETTERS))
169
+                stop("More genes than LETTERS; please give gene names",
170
+                     " as you see fit.")
171
+            colnames(x) <- c(LETTERS[1:ncx], "Fitness")
172
+        }
173
+        
174
+        if(!all(as.matrix(x[, -ncol(x)]) %in% c(0, 1) ))
175
+            stop("First ncol - 1 entries not in {0, 1}.")
135 176
     } else {
136 177
         if(!inherits(x, "data.frame"))
137 178
             stop("genotFitness: if two-column must be data frame")
138 179
         ## Make sure no factors
139
-        if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
180
+        if(is.factor(x[, 1])) {
181
+            warning("First column of genotype fitness is a factor. ",
182
+                    "Converting to character.")
183
+            x[, 1] <- as.character(x[, 1])
184
+            }
140 185
         ## Make sure no numbers
141 186
         if(any(is.numeric(x[, 1])))
142 187
             stop(paste0("genotFitness: first column of data frame is numeric.",
... ...
@@ -168,16 +213,121 @@ from_genotype_fitness <- function(x) {
168 213
                 colnames(x) <- c("Genotype", "Fitness")
169 214
             }
170 215
             if((!omarker) && (!emarker) && (!nogoodepi)) {
171
-                message("All single-gene genotypes as input to from_genotype_fitness")
216
+                message("All single-gene genotypes as input to to_genotFitness_std")
172 217
             }
173 218
             ## Yes, we need to do this to  scale the fitness and put the "-"
174
-            return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
219
+            x <- allGenotypes_to_matrix(x)
175 220
         }
176 221
     }
222
+    ## And, yes, scale all fitnesses by that of the WT
223
+    whichroot <- which(rowSums(x[, -ncol(x), drop = FALSE]) == 0)
224
+    if(length(whichroot) == 0) {
225
+        warning("No wildtype in the fitness landscape!!! Adding it with fitness 1.")
226
+        x <- rbind(c(rep(0, ncol(x) - 1), 1), x)
227
+    } else if(x[whichroot, ncol(x)] != 1) {
228
+        warning("Fitness of wildtype != 1.",
229
+                " Dividing all fitnesses by fitness of wildtype.")
230
+        vwt <- x[whichroot, ncol(x)]
231
+        x[, ncol(x)] <- x[, ncol(x)]/vwt
232
+    }
233
+    if(any(is.na(x)))
234
+        stop("NAs in fitness matrix")
235
+    if(simplify) {
236
+        return(x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE])
237
+    } else {
238
+        return(x)
239
+    }
177 240
 }
178 241
 
242
+## Deprecated after flfast
243
+## to_genotFitness_std is faster and has better error checking
244
+## and is very similar and does not use
245
+## the genot_fitness_to_epistasis, which is not reasonable anymore.
179 246
 
180
-
247
+## from_genotype_fitness <- function(x) {
248
+##     ## Would break with output from allFitnessEffects and
249
+##     ## output from allGenotypeAndMut
250
+    
251
+##     ## For the very special and weird case of
252
+##     ## a matrix but only a single gene so with a 0 and 1
253
+##     ## No, this is a silly and meaningless case.
254
+##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
255
+    
256
+##     ## } else  blabla: 
257
+    
258
+##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
259
+##         stop("Input must inherit from matrix or data.frame.")
260
+    
261
+##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
262
+##     ##     stop(paste0("Genotype fitness input either two-column data frame",
263
+##     ##          " or a numeric matrix with > 2 columns."))
264
+##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
265
+##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
266
+##     ##                 " of a single gene. For this degenerate cases use",
267
+##     ##                 " a data frame specification."))
268
+    
269
+##     if(ncol(x) > 2) {
270
+##         if(inherits(x, "matrix")) {
271
+##             if(!is.numeric(x))
272
+##                 stop("A genotype fitness matrix/data.frame must be numeric.")
273
+##         } else if(inherits(x, "data.frame")) {
274
+##             if(!all(unlist(lapply(x, is.numeric))))
275
+##                 stop("A genotype fitness matrix/data.frame must be numeric.")
276
+##         }
277
+        
278
+##         ## We are expecting here a matrix of 0/1 where columns are genes
279
+##         ## except for the last column, that is Fitness
280
+##         ## Of course, can ONLY work with epistastis, NOT order
281
+##         return(genot_fitness_to_epistasis(x))
282
+##     } else {
283
+##         if(!inherits(x, "data.frame"))
284
+##             stop("genotFitness: if two-column must be data frame")
285
+##         ## Make sure no factors
286
+##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
287
+##         ## Make sure no numbers
288
+##         if(any(is.numeric(x[, 1])))
289
+##             stop(paste0("genotFitness: first column of data frame is numeric.",
290
+##                         " Ambiguous and suggests possible error. If sure,",
291
+##                         " enter that column as character"))
292
+        
293
+##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
294
+##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
295
+##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
296
+##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
297
+##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
298
+##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
299
+##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
300
+##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
301
+##         if(omarker) {
302
+##             ## do something. To be completed
303
+##             stop("This code not yet ready")
304
+##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
305
+##             ## involve epistasis and order. But they must have different
306
+##             ## genes. Otherwise, it is not manageable.
307
+##         }
308
+##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
309
+##             ## the second case above corresponds to passing just single letter genotypes
310
+##             ## as there is not a single marker
311
+##             x <- x[, c(1, 2), drop = FALSE]
312
+##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
313
+##                 message("Column names of object not Genotype and Fitness.",
314
+##                         " Renaming them assuming that is what you wanted")
315
+##                 colnames(x) <- c("Genotype", "Fitness")
316
+##             }
317
+##             if((!omarker) && (!emarker) && (!nogoodepi)) {
318
+##                 message("All single-gene genotypes as input to from_genotype_fitness")
319
+##             }
320
+##             ## Yes, we need to do this to  scale the fitness and put the "-"
321
+##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
322
+##         }
323
+##     }
324
+## }
325
+
326
+
327
+
328
+
329
+
330
+## No longer used for real
181 331
 genot_fitness_to_epistasis <- function(x) {
182 332
     ## FIXME future:
183 333
 
... ...
@@ -208,6 +358,7 @@ genot_fitness_to_epistasis <- function(x) {
208 358
     fwt <- 1
209 359
     if(length(wt) == 1)
210 360
         fwt <- f[wt]
361
+    ## No longer being used when we pass fitness landscapse: flfast
211 362
     if(!isTRUE(all.equal(fwt, 1))) {
212 363
         message("Fitness of wildtype != 1. ",
213 364
                 "Dividing all fitnesses by fitness of wildtype.")
... ...
@@ -245,6 +396,11 @@ allGenotypes_to_matrix <- function(x) {
245 396
     ## a matrix with 0/1 in a column for each gene and a final column of
246 397
     ## Fitness
247 398
 
399
+    if(is.factor(x[, 1])) {
400
+        warning("First column of genotype fitness is a factor. ",
401
+                "Converting to character.")
402
+        x[, 1] <- as.character(x[, 1])
403
+    }
248 404
     ## A WT can be specified with string "WT"
249 405
     anywt <- which(x[, 1] == "WT")
250 406
     if(length(anywt) > 1) stop("More than 1 WT")
... ...
@@ -253,6 +409,7 @@ allGenotypes_to_matrix <- function(x) {
253 409
         x <- x[-anywt, ]
254 410
         ## Trivial case of passing just a WT?
255 411
     } else {
412
+        warning("No WT genotype. Setting its fitness to 1.")
256 413
         fwt <- 1
257 414
     }
258 415
     splitted_genots <- lapply(x$Genotype,
... ...
@@ -25,6 +25,8 @@ allMutatorEffects <- function(epistasis = NULL,
25 25
                               keepInput = TRUE) {
26 26
     ## This is on purpose to prevent using a rT or orderEffects. Those are
27 27
     ## not tested to work with mutator.
28
+
29
+    ## Neither do we accept a fitness landscape object either for now.
28 30
     allFitnessORMutatorEffects(
29 31
         rT = NULL,
30 32
         epistasis = epistasis,
... ...
@@ -373,13 +373,18 @@ checkRT <- function(mdeps) {
373 373
 
374 374
 getNamesID <- function(fp) {
375 375
     ## Return a lookup table for names based on numeric IDs
376
-    idname <- c(fp$geneModule$GeneNumID,  fp$long.geneNoInt$GeneNumID)
377
-    names(idname) <- c(fp$geneModule$Gene, fp$long.geneNoInt$Gene)
376
+    idname <- c(fp$geneModule$GeneNumID,
377
+                fp$long.geneNoInt$GeneNumID,
378
+                fp$fitnessLandscape_gene_id$GeneNumID)
379
+    names(idname) <- c(fp$geneModule$Gene,
380
+                       fp$long.geneNoInt$Gene,
381
+                       fp$fitnessLandscape_gene_id$Gene)
378 382
     return(idname[-1]) ## remove Root!!
379 383
 }
380 384
 
381 385
 
382
-getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) {
386
+getGeneIDNum <- function(geneModule, geneNoInt, fitnessLandscape_gene_id,
387
+                         drv, sort = TRUE) {
383 388
     ## It returns the genes, as NumID, in the given vector with names drv
384 389
     ## initMutant uses this, for simplicity, without sorting, but noInt
385 390
     ## are always sorted
... ...
@@ -388,24 +393,26 @@ getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) {
388 393
 
389 394
     ## Yes, we must do it twice because we do not know before hand which
390 395
     ## is which. This makes sure no NA. Period.
391
-    if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene))))) {
396
+    if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene,
397
+                               fitnessLandscape_gene_id$Gene))))) {
392 398
         stop(paste("For driver or initMutant you have passed genes",
393 399
                    "not in the fitness table."))
394 400
     }
395 401
     
396 402
     indicesM <- as.vector(na.omit(match( drv, geneModule$Gene)))
397 403
     indicesI <- as.vector(na.omit(sort(match( drv, geneNoInt$Gene))))
404
+    indicesF <- as.vector(na.omit(sort(match( drv, fitnessLandscape_gene_id$Gene))))
398 405
     if(sort) {
399 406
         indicesM <- sort(indicesM)
400 407
     }
401 408
     return(c(
402 409
         geneModule$GeneNumID[indicesM],
403
-        geneNoInt$GeneNumID[indicesI])
410
+        geneNoInt$GeneNumID[indicesI],
411
+        fitnessLandscape_gene_id$GeneNumID[indicesF])
404 412
     )
405 413
 }
406 414
 
407 415
 
408
-
409 416
 allFitnessORMutatorEffects <- function(rT = NULL,
410 417
                                        epistasis = NULL,
411 418
                                        orderEffects = NULL,
... ...
@@ -413,6 +420,7 @@ allFitnessORMutatorEffects <- function(rT = NULL,
413 420
                                        geneToModule = NULL,
414 421
                                        drvNames = NULL,
415 422
                                        keepInput = TRUE,
423
+                                       genotFitness = NULL,
416 424
                                        ## refFE = NULL,
417 425
                                        calledBy = NULL) {
418 426
     ## From allFitnessEffects. Generalized so we deal with Fitness
... ...
@@ -442,7 +450,8 @@ allFitnessORMutatorEffects <- function(rT = NULL,
442 450
     
443 451
     if(calledBy == "allMutatorEffects") {
444 452
         ## very paranoid check
445
-        if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
453
+        if( !is.null(rT) || !is.null(orderEffects) ||
454
+            !is.null(drvNames) || !is.null(genotFitness))
446 455
             stop("allMutatorEffects called with forbidden arguments.",
447 456
                  "Is this an attempt to subvert the function?")
448 457
     }
... ...
@@ -551,9 +560,34 @@ allFitnessORMutatorEffects <- function(rT = NULL,
551 560
     } else {
552 561
         geneNoInt <- data.frame()
553 562
     }
554
-
563
+    
564
+    if(is.null(genotFitness)) {
565
+        genotFitness <- matrix(NA, nrow = 0, ncol = 1)
566
+        fitnessLandscape_df <- data.frame()
567
+        fitnessLandscape_gene_id <- data.frame()
568
+    } else {
569
+        ## Yes, I am duplicating stuff for now.
570
+        ## This makes life simpler in C++:
571
+        ## In the map, the key is the genotype name, as
572
+        ## cnn <- colnames(genotFitness)[-ncol(genotFitness)]
573
+        cnn <- 1:(ncol(genotFitness) - 1)
574
+        gfn <- apply(genotFitness[, -ncol(genotFitness), drop = FALSE], 1,
575
+                     function(x) paste(cnn[as.logical(x)],
576
+                                       collapse = ", "))
577
+        ## rownames(genotFitness) <- gfn
578
+        fitnessLandscape_df <-
579
+            data.frame(Genotype = gfn,
580
+                       Fitness = genotFitness[, ncol(genotFitness)],
581
+                       stringsAsFactors = FALSE)
582
+        fitnessLandscape_gene_id <- data.frame(
583
+            Gene = colnames(genotFitness)[-ncol(genotFitness)],
584
+            GeneNumID = cnn,
585
+            stringsAsFactors = FALSE)
586
+        
587
+    }
588
+    
555 589
     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
556
-             nrow(geneNoInt)) == 0)
590
+             nrow(geneNoInt) + nrow(genotFitness)) == 0)
557 591
         stop("You have specified nothing!")
558 592
 
559 593
     if(calledBy == "allFitnessEffects") {
... ...
@@ -566,7 +600,8 @@ allFitnessORMutatorEffects <- function(rT = NULL,
566 600
         graphE <- NULL
567 601
     }
568 602
     if(!is.null(drvNames)) {
569
-        drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
603
+        drv <- unique(getGeneIDNum(geneModule, geneNoInt, fitnessLandscape_gene_id,
604
+                                   drvNames))
570 605
         ## drivers should never be in the geneNoInt; Why!!!???
571 606
         ## Catch the problem. This is an overkill,
572 607
         ## so since we catch the issue, we could leave the geneNoInt. But
... ...
@@ -583,10 +618,11 @@ allFitnessORMutatorEffects <- function(rT = NULL,
583 618
         ## drv <- geneModule$GeneNumID[-1]
584 619
         drv <- vector(mode = "integer", length = 0L)
585 620
     }
586
-    
621
+  
587 622
     if(!keepInput) {
588 623
         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
589 624
     }
625
+
590 626
     out <- list(long.rt = long.rt,
591 627
                 long.epistasis = long.epistasis,
592 628
                 long.orderEffects = long.orderEffects,
... ...
@@ -599,7 +635,10 @@ allFitnessORMutatorEffects <- function(rT = NULL,
599 635
                 rT = rT,
600 636
                 epistasis = epistasis,
601 637
                 orderEffects = orderEffects,
602
-                noIntGenes = noIntGenes                
638
+                noIntGenes = noIntGenes,
639
+                fitnessLandscape = genotFitness,
640
+                fitnessLandscape_df = fitnessLandscape_df,
641
+                fitnessLandscape_gene_id = fitnessLandscape_gene_id
603 642
                 )
604 643
     if(calledBy == "allFitnessEffects") {
605 644
         class(out) <- c("fitnessEffects")
... ...
@@ -609,6 +648,210 @@ allFitnessORMutatorEffects <- function(rT = NULL,
609 648
     return(out)
610 649
 }
611 650
 
651
+## Former version, with fitness landscape
652
+## allFitnessORMutatorEffects <- function(rT = NULL,
653
+##                                        epistasis = NULL,
654
+##                                        orderEffects = NULL,
655
+##                                        noIntGenes = NULL,
656
+##                                        geneToModule = NULL,
657
+##                                        drvNames = NULL,
658
+##                                        keepInput = TRUE,
659
+##                                        ## refFE = NULL,
660
+##                                        calledBy = NULL) {
661
+##     ## From allFitnessEffects. Generalized so we deal with Fitness
662
+##     ## and mutator.
663
+    
664
+##     ## restrictions: the usual rt
665
+
666
+##     ## epistasis: as it says, with the ":"
667
+
668
+##     ## orderEffects: the ">"
669
+    
670
+##     ## All of the above can be genes or can be modules (if you pass a
671
+##     ## geneToModule)
672
+
673
+##     ## rest: rest of genes, with fitness
674
+
675
+
676
+##     ## For epistasis and order effects we create the output object but
677
+##     ## missing the numeric ids of genes. With rT we do it in one go, as we
678
+##     ## already know the mapping of genes to numeric ids. We could do the
679
+##     ## same in epistasis and order, but we would be splitting twice
680
+##     ## (whereas for rT extracting the names is very simple).
681
+
682
+##     ## called appropriately?
683
+##     if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
684
+##         stop("How did you call this function?. Bug.")
685
+    
686
+##     if(calledBy == "allMutatorEffects") {
687
+##         ## very paranoid check
688
+##         if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
689
+##             stop("allMutatorEffects called with forbidden arguments.",
690
+##                  "Is this an attempt to subvert the function?")
691
+##     }
692
+    
693
+##     rtNames <- NULL
694
+##     epiNames <- NULL
695
+##     orNames <- NULL
696
+##     if(!is.null(rT)) {
697
+##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
698
+##         rT$parent <- as.character(rT$parent)
699
+##         rT$child <- as.character(rT$child)
700
+##         rT$typeDep <- as.character(rT$typeDep)
701
+##         rtNames <- unique(c(rT$parent, rT$child))
702
+##     }
703
+##     if(!is.null(epistasis)) {
704
+##         long.epistasis <- to.long.epist.order(epistasis, ":")
705
+##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
706
+##         ## deal with the possible negative signs
707
+##         epiNames <- setdiff(unique(
708
+##             unlist(lapply(long.epistasis,
709
+##                           function(x) lapply(x$ids,
710
+##                                              function(z) strsplit(z, "^-"))))),
711
+##                             "")
712
+##     } else {
713
+##         long.epistasis <- list()
714
+##     }
715
+##     if(!is.null(orderEffects)) {
716
+##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
717
+##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
718
+##     } else {
719
+##         long.orderEffects <- list()
720
+##     }
721
+##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
722
+##     if(is.null(geneToModule)) {
723
+##         gMOneToOne <- TRUE
724
+##         geneToModule <- geneModuleNull(allModuleNames)
725
+##     } else {
726
+##         gMOneToOne <- FALSE
727
+##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
728
+##             stop(paste("Some values in geneToModule not present in any of",
729
+##                        " rT, epistasis, or order effects"))
730
+##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
731
+##             stop(paste("Some values in rT, epistasis, ",
732
+##                        "or order effects not in geneToModule"))
733
+##     }
734
+##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
735
+    
736
+##     idm <- unique(geneModule$ModuleNumID)
737
+##     names(idm) <- unique(geneModule$Module)
738
+
739
+##     if(!is.null(rT)) {
740
+##         checkRT(rT)
741
+##         long.rt <- to.long.rt(rT, idm)
742
+##     } else {
743
+##         long.rt <- list() ## yes, we want an object of length 0
744
+##     }
745
+
746
+##     ## Append the numeric ids to epistasis and order
747
+##     if(!is.null(epistasis)) {
748
+##         long.epistasis <- lapply(long.epistasis,
749
+##                                  function(x)
750
+##                                      addIntID.epist.order(x, idm,
751
+##                                                           sort = TRUE,
752
+##                                                           sign = TRUE))
753
+##     }
754
+##     if(!is.null(orderEffects)) {
755
+##         long.orderEffects <- lapply(long.orderEffects,
756
+##                                     function(x)
757
+##                                         addIntID.epist.order(x, idm,
758
+##                                                              sort = FALSE,
759
+##                                                              sign = FALSE))
760
+##     }
761
+    
762
+##     if(!is.null(noIntGenes)) {
763
+##         if(inherits(noIntGenes, "character")) {
764
+##             wm <- paste("noIntGenes is a character vector.",
765
+##                         "This is probably not what you want, and will",
766
+##                         "likely result in an error downstream.",
767
+##                         "You can get messages like",
768
+##                         " 'not compatible with requested type', and others.",
769
+##                         "We are stopping.")
770
+##             stop(wm)
771
+##         }
772
+            
773
+##         mg <- max(geneModule[, "GeneNumID"])
774
+##         gnum <- seq_along(noIntGenes) + mg
775
+##         if(!is.null(names(noIntGenes))) {
776
+##             ng <- names(noIntGenes)
777
+##             if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE)
778
+##                 || grepl(":", ng, fixed = TRUE))
779
+##                 stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
780
+##             if(any(ng %in% geneModule[, "Gene"] ))
781
+##                 stop("A gene in noIntGenes also present in the other terms")
782
+##             if(any(duplicated(ng)))
783
+##                 stop("Duplicated gene names in geneNoInt")
784
+##             if(any(is.na(ng)))
785
+##                 stop("In noIntGenes some genes have names, some don't.",
786
+##                      " Name all of them, or name none of them.")
787
+##         } else {
788
+##             ng <- gnum
789
+##         }
790
+##         geneNoInt <- data.frame(Gene = as.character(ng),
791
+##                                 GeneNumID = gnum,
792
+##                                 s = noIntGenes,
793
+##                                 stringsAsFactors = FALSE)
794
+##     } else {
795
+##         geneNoInt <- data.frame()
796
+##     }
797
+
798
+##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
799
+##              nrow(geneNoInt)) == 0)
800
+##         stop("You have specified nothing!")
801
+
802
+##     if(calledBy == "allFitnessEffects") {
803
+##         if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
804
+##             graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
805
+##         } else {
806
+##             graphE <- NULL
807
+##         }
808
+##     } else {
809
+##         graphE <- NULL
810
+##     }
811
+##     if(!is.null(drvNames)) {
812
+##         drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
813
+##         ## drivers should never be in the geneNoInt; Why!!!???
814
+##         ## Catch the problem. This is an overkill,
815
+##         ## so since we catch the issue, we could leave the geneNoInt. But
816
+##         ## that should not be there in this call.
817
+##         ## if(any(drvNames %in% geneNoInt$Gene)) {
818
+##         ##     stop(paste("At least one gene in drvNames is a geneNoInt gene.",
819
+##         ##                "That is not allowed.",
820
+##         ##                "If that gene is a driver, pass it as gene in the epistasis",
821
+##         ##                "component."))
822
+##         ## }
823
+##         ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
824
+##     } else {
825
+##         ## we used to have this default
826
+##         ## drv <- geneModule$GeneNumID[-1]
827
+##         drv <- vector(mode = "integer", length = 0L)
828
+##     }
829
+    
830
+##     if(!keepInput) {
831
+##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
832
+##     }
833
+##     out <- list(long.rt = long.rt,
834
+##                 long.epistasis = long.epistasis,
835
+##                 long.orderEffects = long.orderEffects,
836
+##                 long.geneNoInt = geneNoInt,
837
+##                 geneModule = geneModule,
838
+##                 gMOneToOne = gMOneToOne,
839
+##                 geneToModule = geneToModule,
840
+##                 graph = graphE,
841
+##                 drv = drv,
842
+##                 rT = rT,
843
+##                 epistasis = epistasis,
844
+##                 orderEffects = orderEffects,
845
+##                 noIntGenes = noIntGenes                
846
+##                 )
847
+##     if(calledBy == "allFitnessEffects") {
848
+##         class(out) <- c("fitnessEffects")
849
+##     } else if(calledBy == "allMutatorEffects") {
850
+##         class(out) <- c("mutatorEffects")
851
+##     }
852
+##     return(out)
853
+## }
854
+
612 855
 
613 856
 allFitnessEffects <- function(rT = NULL,
614 857
                               epistasis = NULL,
... ...
@@ -628,7 +871,11 @@ allFitnessEffects <- function(rT = NULL,
628 871
                  " you cannot pass any of rT, epistasis, orderEffects",
629 872
                  " noIntGenes or geneToModule.")
630 873
         }
631
-        epistasis <- from_genotype_fitness(genotFitness)
874
+
875
+        genotFitness_std <- to_genotFitness_std(genotFitness, simplify = TRUE)
876
+        ## epistasis <- from_genotype_fitness(genotFitness)
877
+    } else {
878
+        genotFitness_std <- NULL
632 879
     }
633 880
     allFitnessORMutatorEffects(
634 881
         rT = rT,
... ...
@@ -638,9 +885,41 @@ allFitnessEffects <- function(rT = NULL,
638 885
         geneToModule = geneToModule,
639 886
         drvNames = drvNames,
640 887
         keepInput = keepInput,
888
+        genotFitness = genotFitness_std,
641 889
         calledBy = "allFitnessEffects")
642 890
 }
643 891
 
892
+## Former version
893
+## allFitnessEffects <- function(rT = NULL,
894
+##                               epistasis = NULL,
895
+##                               orderEffects = NULL,
896
+##                               noIntGenes = NULL,
897
+##                               geneToModule = NULL,
898
+##                               drvNames = NULL,
899
+##                               genotFitness = NULL,
900
+##                               keepInput = TRUE) {
901
+
902
+##     if(!is.null(genotFitness)) {
903
+##         if(!is.null(rT) || !is.null(epistasis) ||
904
+##            !is.null(orderEffects) || !is.null(noIntGenes) ||
905
+##            !is.null(geneToModule)) {
906
+##             stop("You have a non-null genotFitness.",
907
+##                  " If you pass the complete genotype to fitness mapping",
908
+##                  " you cannot pass any of rT, epistasis, orderEffects",
909
+##                  " noIntGenes or geneToModule.")
910
+##         }
911
+##         epistasis <- from_genotype_fitness(genotFitness)
912
+##     }
913
+##     allFitnessORMutatorEffects(
914
+##         rT = rT,
915
+##         epistasis = epistasis,
916
+##         orderEffects = orderEffects,
917
+##         noIntGenes = noIntGenes,
918
+##         geneToModule = geneToModule,
919
+##         drvNames = drvNames,
920
+##         keepInput = keepInput,
921
+##         calledBy = "allFitnessEffects")
922
+## }
644 923
 
645 924
 ## allFitnessEffects <- function(rT = NULL,
646 925
 ##                               epistasis = NULL,
... ...
@@ -867,6 +1146,18 @@ evalGenotypeORMut <- function(genotype,
867 1146
 
868 1147
     if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") ))
869 1148
         stop("How did you call this function?. Bug.")
1149
+
1150
+    ## fmEffects could be a mutator effect
1151
+    if(!exists("fitnessLandscape_gene_id", where = fmEffects)) {
1152
+        fmEffects$fitnessLandscape_df <- data.frame()
1153
+        fmEffects$fitnessLandscape_gene_id <- data.frame()
1154
+    }
1155
+
1156
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1157
+        (nrow(fmEffects$fitnessLandscape_df) > 0)) {
1158
+        warning("Bozic model passing a fitness landscape will not work",
1159
+                    " for now.")
1160
+    }
870 1161
     
871 1162
     if(echo)
872 1163
         cat(paste("Genotype: ", genotype))
... ...
@@ -877,9 +1168,11 @@ evalGenotypeORMut <- function(genotype,
877 1168
             genotype <- nice.vector.eo(genotype, ",")
878 1169
         }
879 1170
         all.g.nums <- c(fmEffects$geneModule$GeneNumID,
880
-                        fmEffects$long.geneNoInt$GeneNumID)
1171
+                        fmEffects$long.geneNoInt$GeneNumID,
1172
+                        fmEffects$fitnessLandscape_gene_id$GeneNumID)
881 1173
         all.g.names <- c(fmEffects$geneModule$Gene,
882
-                         fmEffects$long.geneNoInt$Gene)
1174
+                         fmEffects$long.geneNoInt$Gene,
1175
+                         fmEffects$fitnessLandscape_gene_id$Gene)
883 1176
         genotype <- all.g.nums[match(genotype, all.g.names)]
884 1177
     }
885 1178
     if(any(is.na(genotype)))
... ...
@@ -939,6 +1232,17 @@ evalGenotypeFitAndMut <- function(genotype,
939 1232
                                   verbose = FALSE,
940 1233
                                   echo = FALSE,
941 1234
                                   model = "") {
1235
+    
1236
+    ## Must deal with objects from previous, pre flfast, modifications
1237
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1238
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1239
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1240
+    }
1241
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1242
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1243
+        warning("Bozic model passing a fitness landscape will not work",
1244
+                    " for now.")
1245
+    }
942 1246
     prodNeg <- FALSE
943 1247
     ## Next is from evalGenotypeAndMut
944 1248
     if(echo)
... ...
@@ -950,9 +1254,11 @@ evalGenotypeFitAndMut <- function(genotype,
950 1254
             genotype <- nice.vector.eo(genotype, ",")
951 1255
         }
952 1256
         all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
953
-                        fitnessEffects$long.geneNoInt$GeneNumID)
1257
+                        fitnessEffects$long.geneNoInt$GeneNumID,
1258
+                        fitnessEffects$fitnessLandscape_gene_id$GeneNumID)
954 1259
         all.g.names <- c(fitnessEffects$geneModule$Gene,
955
-                         fitnessEffects$long.geneNoInt$Gene)
1260
+                         fitnessEffects$long.geneNoInt$Gene,
1261
+                         fitnessEffects$fitnessLandscape_gene_id$Gene)
956 1262
         genotype <- all.g.nums[match(genotype, all.g.names)]
957 1263
     }
958 1264
     if(any(is.na(genotype)))
... ...
@@ -1050,6 +1356,8 @@ evalAllGenotypesORMut <- function(fmEffects,
1050 1356
         stop("You are trying to get the mutator effects of a fitness specification. ",
1051 1357
              "You did not pass an object of class mutatorEffects.")
1052 1358
 
1359
+    
1360
+    
1053 1361
     ## if(!minimal)
1054 1362
     allg <- generateAllGenotypes(fitnessEffects = fmEffects,
1055 1363
                                  order = order, max = max)
... ...
@@ -1133,6 +1441,17 @@ evalAllGenotypesORMut <- function(fmEffects,
1133 1441
 evalAllGenotypes <- function(fitnessEffects, order = FALSE, max = 256,
1134 1442
                              addwt = FALSE,
1135 1443
                              model = "") {
1444
+    ## Must deal with objects from previous, pre flfast, modifications
1445
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1446
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1447
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1448
+    }
1449
+
1450
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1451
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1452
+        warning("Bozic model passing a fitness landscape will not work",
1453
+                    " for now.")
1454
+    }
1136 1455
     evalAllGenotypesORMut(
1137 1456
         fmEffects = fitnessEffects,
1138 1457
         order = order,
... ...
@@ -1149,7 +1468,11 @@ generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) {
1149 1468
                                        function(x) choose(n, x) * factorial(x)))}
1150 1469
     else
1151 1470
         tot <- function(n) {2^n}
1152
-    nn <- nrow(fitnessEffects$geneModule) -1  + nrow(fitnessEffects$long.geneNoInt)
1471
+    
1472
+    nn <- nrow(fitnessEffects$geneModule) -1  +
1473
+        nrow(fitnessEffects$long.geneNoInt) +
1474
+        nrow(fitnessEffects$fitnessLandscape_gene_id)
1475
+    
1153 1476
     tnn <- tot(nn)
1154 1477
     if(tnn > max) {
1155 1478
         m <- paste("There are", tnn, "genotypes.")
... ...
@@ -1176,7 +1499,8 @@ generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) {
1176 1499
     }
1177 1500
     genotNums <- list.of.vectors(genotNums)
1178 1501
     names <- c(fitnessEffects$geneModule$Gene[-1],
1179
-               fitnessEffects$long.geneNoInt$Gene)
1502
+               fitnessEffects$long.geneNoInt$Gene,
1503
+               fitnessEffects$fitnessLandscape_gene_id$Gene)
1180 1504
     
1181 1505
     genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
1182 1506
                                 function(z)
... ...
@@ -1207,6 +1531,18 @@ evalAllGenotypesFitAndMut <- function(fitnessEffects, mutatorEffects,
1207 1531
     } else {
1208 1532
         prodNeg <- FALSE
1209 1533
     }
1534
+
1535
+
1536
+    ## Must deal with objects from previous, pre flfast, modifications
1537
+    if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
1538
+        fitnessEffects$fitnessLandscape_df <- data.frame()
1539
+        fitnessEffects$fitnessLandscape_gene_id <- data.frame()
1540
+    }
1541
+    if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
1542
+        (nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
1543
+        warning("Bozic model passing a fitness landscape will not work",
1544
+                    " for now.")
1545
+    } 
1210 1546
     
1211 1547
     full2mutator_ <- matchGeneIDs(mutatorEffects,
1212 1548
                                   fitnessEffects)$Reduced
... ...
@@ -1344,7 +1680,10 @@ plot.fitnessEffects <- function(x, type = "graphNEL",
1344 1680
     ## layout.reingold.tilford if really a tree
1345 1681
     ## o.w. it will use the default
1346 1682
     g <- x$graph
1347
-    
1683
+    if(is.null(g))
1684
+        stop("This fitnessEffects object can not be ploted this way.",
1685
+             " It is probably one with fitness landscape specification, ",
1686
+             " so you might want to plot the fitness landscape instead.")
1348 1687
     if(type == "igraph") {
1349 1688
         if(expandModules && (!x$gMOneToOne)) {
1350 1689
             ## vlabels <- fe$geneToModule[vertex.attributes(g)$name]
... ...
@@ -1462,6 +1801,14 @@ nr_oncoSimul.internal <- function(rFE,
1462 1801
              " there must be at least two gene/loci).")
1463 1802
     }
1464 1803
 
1804
+    ## Must deal with objects from previous, pre flfast, modifications
1805
+    ## Could move this way down to the bottom, right before
1806
+    ## .Call
1807
+    if(!exists("fitnessLandscape_gene_id", where = rFE)) {
1808
+        rFE$fitnessLandscape_df <- data.frame()
1809
+        rFE$fitnessLandscape_gene_id <- data.frame()
1810
+    }
1811
+    
1465 1812
     namedGenes <- allNamedGenes(rFE)
1466 1813
 
1467 1814
     if( length(mu) > 1) {
... ...
@@ -1489,13 +1836,15 @@ nr_oncoSimul.internal <- function(rFE,
1489 1836
                        "mutation rate in the human genome is about 1e-11 to 1e-9."))
1490 1837
     }
1491 1838
     if(!is.null(initMutant)) {
1839
+        initMutantString <- initMutant
1492 1840
        if(length(grep(">", initMutant))) {
1493 1841
             initMutant <- nice.vector.eo(initMutant, ">")
1494 1842
         } else if(length(grep(",", initMutant))) {
1495 1843
             initMutant <- nice.vector.eo(initMutant, ",")
1496 1844
         }
1497 1845
         initMutant <- getGeneIDNum(rFE$geneModule,
1498
-                             rFE$long.geneNoInt,
1846
+                                   rFE$long.geneNoInt,
1847
+                                   rFE$fitnessLandscape_gene_id,
1499 1848
                                    initMutant,
1500 1849
                              FALSE)
1501 1850
        if(length(initMutant) >= countGenesFe(rFE)) {
... ...
@@ -1505,6 +1854,7 @@ nr_oncoSimul.internal <- function(rFE,
1505 1854
        
1506 1855
     } else {
1507 1856
         initMutant <- vector(mode = "integer")
1857
+        initMutantString <- ""
1508 1858
     }
1509 1859
     ## these are never user options
1510 1860
     ## if(initSize_species < 10) {
... ...
@@ -1515,6 +1865,14 @@ nr_oncoSimul.internal <- function(rFE,
1515 1865
     ## }
1516 1866
 
1517 1867
     if(typeFitness %in% c("bozic1", "bozic2")) {
1868
+        if(nrow(rFE$fitnessLandscape_df) > 0)
1869
+            warning("Bozic model passing a fitness landscape will not work",
1870
+                    " for now.")
1871
+        ## FIXME: bozic and fitness landscape
1872
+        ## the issue is that in the C++ code we directly do
1873
+        ## s = birth rate - 1
1874
+        ## but we would need something different
1875
+        ## Can be done going through epistasis, etc.
1518 1876
         thesh <- unlist(lapply(rFE$long.rt, function(x) x$sh))
1519 1877
         ## thes <- unlist(lapply(rFE$long.rt, function(x) x$s))
1520 1878
         thes <- unlist(c(lapply(rFE$long.rt, function(x) x$s),
... ...
@@ -1602,7 +1960,6 @@ nr_oncoSimul.internal <- function(rFE,
1602 1960
         fixation_list <- list()
1603 1961
     }
1604 1962
 
1605
-    
1606 1963
     return(c(
1607 1964
         nr_BNB_Algo5(rFE = rFE,
1608 1965
                      mu_ = mu,
... ...
@@ -1642,14 +1999,21 @@ nr_oncoSimul.internal <- function(rFE,
1642 1999
                      AND_DrvProbExit = AND_DrvProbExit,
1643 2000
                      fixation_list = fixation_list),
1644 2001
         Drivers = list(rFE$drv), ## but when doing pops, these will be repeated
1645
-        geneNames = list(names(getNamesID(rFE)))
2002
+        geneNames = list(names(getNamesID(rFE))),
2003
+        InitMutant = initMutantString
1646 2004
     ))
1647 2005
 }
1648 2006
 
1649 2007
 
1650 2008
 countGenesFe <- function(fe) {
1651 2009
     ## recall geneModule has Root always
1652
-    nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1
2010
+    ## We want to be able to use objects that did not have
2011
+    ## the fitness landscape component
2012
+    if(exists("fitnessLandscape_gene_id", where = fe))
2013
+        return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) +
2014
+            nrow(fe$fitnessLandscape_gene_id) - 1)
2015
+    else
2016
+        return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1)
1653 2017
 }
1654 2018
 
1655 2019
 allNamedGenes <- function(fe){
... ...
@@ -1668,14 +2032,29 @@ allNamedGenes <- function(fe){
1668 2032
     ##         stop("When using per-gene mutation rates the ",
1669 2033
     ##              "no interaction genes must be named ",
1670 2034
     ##              "(i.e., the noIntGenes vector must have names).")
1671
-    
1672
-    v1 <- fe$geneModule[, c("Gene", "GeneNumID")]
1673
-    if(nrow(fe$long.geneNoInt)) {
1674
-        v1 <- rbind(v1,
1675
-                    fe$long.geneNoInt[, c("Gene", "GeneNumID")])
2035
+
2036
+    ## accommodate objects w/o fitnessLandscape
2037
+    if(!is.null(fe$fitnessLandscape) && nrow(fe$fitnessLandscape)) {
2038
+        gn <-
2039
+            gtools::mixedsort(
2040
+                        colnames(fe$fitnessLandscape)[-ncol(fe$fitnessLandscape)])
2041
+        v1 <- data.frame(Gene = gn, GeneNumID = seq.int(length(gn)),
2042
+                        stringsAsFactors = FALSE)
2043
+    } else {
2044
+        v1 <- fe$geneModule[, c("Gene", "GeneNumID")]
2045
+        if(nrow(fe$long.geneNoInt)) {
2046
+            v1 <- rbind(v1,
2047
+                        fe$long.geneNoInt[, c("Gene", "GeneNumID")])
2048
+        }
2049
+        if(any(v1[, "Gene"] == "Root"))
2050
+            v1 <- v1[-which(v1[, "Gene"] == "Root"), ]
1676 2051
     }
1677
-    v1 <- v1[-which(v1[, "Gene"] == "Root"), ]
1678 2052
     rownames(v1) <- NULL
2053
+    if(any(v1$Gene == "WT")) {
2054
+        warning("A gene is named WT. You can expect problems ",
2055
+                "because we use WT to denote the wildtype ",
2056
+                "genotype. You might want to change it.")
2057
+    }
1679 2058
     return(v1)
1680 2059
 }
1681 2060
 
... ...
@@ -1,3 +1,11 @@
1
+Changes in version 2.9.2 (2017-11-24):
2
+	- LOD: using only the strict Szendro et al. meaning.
3
+	- POM: computed in C++.
4
+	
5
+Changes in version 2.9.1 (2017-11-10):
6
+	- Using fitness landscape directly when given as input (no
7
+          conversion to epistasis)
8
+
1 9
 Changes in version 2.7.2 (2017-09-27):
2 10
 	- genot_to_adj_mat in C++.
3 11
 	- fast_peaks (for no backmutation cases).
... ...
@@ -34,8 +34,11 @@ diversityLOD(llod)
34 34
 
35 35
 \arguments{ \item{x}{An object of class \code{oncosimulpop} (version >=
36 36
   2, so simulations with the old poset specification will not work) or
37
-  class \code{oncosimul2} (a single simulation). For \code{LOD}
38
-  simulations must have been run with \code{keepPhylog = TRUE}.}
37
+  class \code{oncosimul2} (a single simulation). }
38
+
39
+%% \item{strict}{If TRUE, a single LOD as in Szendro et al. See Details.
40
+%%   If FALSE, simulations must have been run with \code{keepPhylog = TRUE}
41
+%%   to compute all possible LODs (see Details).}
39 42
 
40 43
 \item{lpom}{A list of POMs, as returned from \code{POM} on an object of
41 44
   class \code{oncosimulpop}.}
... ...
@@ -49,35 +52,50 @@ diversityLOD(llod)
49 52
 \details{
50 53
 
51 54
   Lines of Descent (LOD) and Path of the Maximum (POM) were defined in
52
-  Szendro et al. (2013) and I follow those definitions here as closely
53
-  as possible, as applied to a process in continuous time with sampling
54
-  at user-specified periods.
55
-
56
-  For POM, the results can depend strongly on how often we sample and
57
-  keep samples (i.e., the \code{sampleEvery} and \code{keepEvery}
58
-  arguments to \code{oncoSimulIndiv} and \code{oncoSimulPop}), since the
59
-  POM is computed from the values stored in the \code{pops.by.time}
60
-  matrix. This also explains why it is generally meaningless to use POM on
61
-  \code{oncoSimulSample} runs: these only keep the very last sample.
62
-
63
-
64
-  For LOD my implementation is not exactly identical to the definition
65
-  given in p. 572 of Szendro et al. (2013). First, in case this might be
66
-  useful, for each simulation I keep all the paths that
67
-  "(...) arrive at the most populated genotype at the final time" (first
68
-  paragraph in p. 572 of Szendro et al.), whereas they only keep one
69
-  (see second column of p. 572). However, I do provide a single LOD for
70
-  each run, too. This is the first path to arrive at the genotype that
71
-  eventually becomes the most populated genotype at the final time (and,
72
-  in this sense, agrees with the LOD of Szendro et al.). However, in
73
-  contrast to what is apparently done in Szendro
74
-  ("A given genotype may undergo several episodes of colonization and extinction that are stored by the algorithm, and the last episode before the colonization of the final state is used to construct the step."),
75
-  I do not check that this genotype (which is the one that will become
76
-  the most populated at final time) does not become extinct before the
77
-  final colonization. So there could be other paths (all in
78
-  \code{all_paths}) that are actually the one(s) that are colonizers of
79
-  the most populated genotype (with no extinction before the final
80
-  colonization).
55
+  Szendro et al. (2013) and I follow those definitions here, as applied
56
+  to a process in continuous time with sampling at user-specified
57
+  periods.
58
+
59
+  For POM, the results can depend strongly on how often we sample (i.e.,
60
+  the \code{sampleEvery} argument to \code{oncoSimulIndiv} and
61
+  \code{oncoSimulPop}), since the POM is computed by finding the clone
62
+  with largest population size whenever we sample.%% from the values
63
+  %% stored in the \code{pops.by.time} matrix.
64
+  This also explains why
65
+  it is generally meaningless to use POM on \code{oncoSimulSample} runs:
66
+  these only keep the very last sample.
67
+
68
+
69
+  For LOD, %% when using \code{strict = TRUE}, 
70
+  a single LOD per simulation
71
+  is returned, with the same meaning as that in p. 572 of Szendro et
72
+  al. (2013). "A given genotype may undergo several episodes of colonization and extinction that are stored by the algorithm, and the last episode before the colonization of the final state is used to construct the step.",
73
+  and I check that this genotype (which is the one that will become the
74
+  most populated at final time) does not become extinct before the final
75
+  colonization.
76
+
77
+  %% If \code{strict = FALSE}, and if you have run the simulations with
78
+  %% \code{keepPhylog = TRUE}, then a I return both \code{all_paths} and
79
+  %% \code{lod_single}, with meanings as follow.  First, in case this might
80
+  %% be useful, for each simulation I keep all the paths that
81
+  %% "(...) arrive at the most populated genotype at the final time" (first
82
+  %% paragraph in p. 572 of Szendro et al.), and these are stored in
83
+  %% \code{all_paths}.  When \code{strict = FALSE} I also provide another
84
+  %% single LOD for each run, too. This is the first path to arrive at the
85
+  %% genotype that eventually becomes the most populated genotype at the
86
+  %% final time (and, in this sense, agrees with the LOD of Szendro et
87
+  %% al.). However, in contrast to what is done in Szendro
88
+  %% ("A given genotype may undergo several episodes of colonization and extinction that are stored by the algorithm, and the last episode before the colonization of the final state is used to construct the step.")
89
+  %% and when \code{strict = TRUE}, I do not check that this genotype
90
+  %% (which is the one that will become the most populated at final time)
91
+  %% does not become extinct before the final colonization. So there could
92
+  %% be other paths (all in \code{all_paths}) that are actually the one(s)
93
+  %% that are colonizers of the most populated genotype (with no extinction
94
+  %% before the final colonization).
95
+
96
+  Note \emph{breaking changes}: for LOD we used to return all lines of
97
+  descent in a given simulation. In v. 2.9.1 we also returned the LOD
98
+  as explained above. Now we only return the LOD as defined above.
81 99
   
82 100
   
83 101
 }
... ...
@@ -89,18 +107,29 @@ diversityLOD(llod)
89 107
   the ordered set of genotypes that contain the largest subpopulation at
90 108
   the times of sampling.
91 109
 
92
-  For \code{LOD}, if \code{x} is a single simulation, a two-element
93
-  list. The first, \code{all_paths}, contains all paths to the
94
-  maximum. The second, \code{lod_single}, contain the single LOD which
95
-  is closest in meaning to the original definition of Szendro et
96
-  al. (See "Details"). If \code{x} is a list (population)  of
97
-  simulations, then a list where each element is a two-element list, as
98
-  just explained. All the lists contain objects of class "igraph.vs" (an
99
-  igraph vertex sequence: see \code{\link[igraph]{vertex_attr}}).  
110
+  For \code{LOD}, if \code{x} is a single simulation, the line of
111
+  descent as defined above (either an object of class "igraph.vs" (an
112
+  igraph vertex sequence: see \code{\link[igraph]{vertex_attr}}) or a
113
+  character vector if there were no descendants). If \code{x} is a list
114
+  (population) of simulations, then a list where each element is a list
115
+  as just explained.
116
+
117
+  %% a two-element
118
+  %% list. If \code{strict = TRUE}, only \code{lod_single} is returned. If
119
+  %% \code{strict = FALSE} (and simulations were run with \code{keepPhylog
120
+  %% = TRUE}), \code{all_paths} contains all paths to the maximum, and
121
+  %% \code{lod_single} contains the single LOD which first arrives at the
122
+  %% maximum.
123
+
124
+  %% If \code{x} is a list (population) of simulations, then a list
125
+  %% where each element is a two-element list, as just explained.
126
+  %% All the lists
127
+  %% contain objects of class "igraph.vs" (an igraph vertex sequence: see
128
+  %% \code{\link[igraph]{vertex_attr}}).
100 129
  
101 130
   For \code{diversityLOD} and \code{diversityPOM} a single element
102
-  vector with the Shannon's diversity (entropy) of the \code{lod_single}
103
-  (for \code{diversityLOD}) or of the POMs (for \code{diversityPOM}).
131
+  vector with the Shannon's diversity (entropy) of the LODs (for
132
+  \code{diversityLOD}) or of the POMs (for \code{diversityPOM}).
104 133
 
105 134
 }
106 135
 
... ...
@@ -138,8 +167,8 @@ pancr <- allFitnessEffects(data.frame(parent = c("Root", rep("KRAS", 4), "SMAD4"
138 167
                                       typeDep = "MN"))
139 168
 
140 169
 
141
-pancr1 <- oncoSimulIndiv(pancr, model = "Exp", keepPhylog = TRUE)
142
-pancr8 <- oncoSimulPop(8, pancr, model = "Exp", keepPhylog = TRUE,
170
+pancr1 <- oncoSimulIndiv(pancr, model = "Exp")
171
+pancr8 <- oncoSimulPop(8, pancr, model = "Exp",
143 172
                        mc.cores = 2)
144 173
 
145 174
 POM(pancr1)
... ...
@@ -230,7 +230,11 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
230 230
 					std::mt19937& ran_gen,
231 231
 					const bool& AND_DrvProbExit,
232 232
 					const std::vector<std::vector<int> >& fixation_l,
233
-					const double& fatalPopSize = 1e15) {
233
+					POM& pom,
234
+					const std::map<int, std::string>& intName,
235
+					const fitness_as_genes& genesInFitness,
236
+					const double& fatalPopSize = 1e15
237
+					) {
234 238
   // Fill out, but also compute totPopSize
235 239
   // and return sample summaries for popsize, drivers.
236 240
   
... ...
@@ -389,12 +393,17 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
389 393
     storeThis = true;
390 394
 
391 395
 
396
+  
397
+  
398
+  // Reuse some info for POM
399
+
392 400
   if( storeThis ) {
393 401
     lastStoredSample = currentTime;
394 402
     outNS_i++;
395 403
     int ndr_lp = 0;
396 404
     double l_pop_s = 0.0;
397
-    
405
+    int largest_clone = -99;
406
+
398 407
     time_out.push_back(currentTime);
399 408
     
400 409
     for(size_t i = 0; i < popParams.size(); ++i) {
... ...
@@ -405,12 +414,21 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
405 414
       if(popParams[i].popSize > l_pop_s) {
406 415
 	l_pop_s = popParams[i].popSize;
407 416
 	ndr_lp = getGenotypeDrivers(Genotypes[i], drv).size();
417
+	largest_clone = i;
408 418
       }
409 419
     }
410 420
     sampleTotPopSize.push_back(totPopSize);
411 421
     sampleLargestPopSize.push_back(l_pop_s);
412 422
     sampleMaxNDr.push_back(max_ndr);
413 423
     sampleNDrLargestPop.push_back(ndr_lp);
424
+
425
+    if(l_pop_s > 0) {
426
+      if (largest_clone < 0)
427
+    	throw std::logic_error("largest_clone < 0");
428
+      addToPOM(pom, Genotypes[largest_clone], intName, genesInFitness);
429
+    } else {
430
+      addToPOM(pom, "_EXTINCTION_");
431
+    }
414 432
   } 
415 433
   
416 434
   if( !std::isfinite(totPopSize) ) {
... ...
@@ -419,8 +437,24 @@ void nr_totPopSize_and_fill_out_crude_P(int& outNS_i,
419 437
   if( std::isnan(totPopSize) ) {
420 438
     throw std::range_error("totPopSize is NaN");
421 439
   }
422
-  
423
-
440
+  // For POM
441
+  if( !storeThis ) {
442
+    double l_pop_s = 0.0;
443
+    int largest_clone = -99;
444
+    for(size_t i = 0; i < popParams.size(); ++i) {
445
+      if(popParams[i].popSize > l_pop_s) {
446
+	l_pop_s = popParams[i].popSize;
447
+	largest_clone = i;
448
+      }
449
+    }
450
+    if(l_pop_s > 0) {
451
+      if (largest_clone < 0)
452
+	throw std::logic_error("largest_clone < 0");
453
+      addToPOM(pom, Genotypes[largest_clone], intName, genesInFitness);
454
+    } else {
455
+      addToPOM(pom, "_EXTINCTION_");
456
+    }
457
+  }
424 458
 }
425 459
 
426 460
 // FIXME: I might want to return the actual drivers in each period
... ...
@@ -598,7 +632,6 @@ std::string genotypeToNameString(const std::vector<int>& genotypeV,
598 632
 
599 633
   std::vector<int> order_int;
600 634
   std::vector<int> rest_int;
601
-
602 635
    
603 636
   for(auto const &g : genotypeV) {
604 637
     if( binary_search(fg.orderG.begin(), fg.orderG.end(), g)) {
... ...
@@ -612,6 +645,7 @@ std::string genotypeToNameString(const std::vector<int>& genotypeV,
612 645
   std::string order_part;
613 646
   std::string rest;
614 647
   std::string comma = "";
648
+  
615 649
   // FIXME: when sure no problems, remove at if needed for speed.
616 650
   for(auto const &g : order_int) {
617 651
     order_part += (comma + intName.at(g));
... ...
@@ -739,18 +773,85 @@ static void nr_sample_all_pop_P(std::vector<int>& sp_to_remove,
739 773
   }
740 774
 }
741 775
 
742
-
776
+// zz: add population size of parent, to get the true LOD
777
+// as in Szendro
743 778
 void addToPhylog(PhylogName& phylog,
744 779
 		 const Genotype& parent,
745 780
 		 const Genotype& child,
746
-		 double time,
781
+		 const double time,
747 782
 		 const std::map<int, std::string>& intName,
748
-		 const fitness_as_genes& fg) {
783
+		 const fitness_as_genes& fg,
784
+		 const double pop_size_child) {
749 785
   phylog.time.push_back(time);
750 786
   phylog.parent.push_back(genotypeToNameString(genotypeSingleVector(parent),
751 787
 					       fg, intName));
752 788
   phylog.child.push_back(genotypeToNameString(genotypeSingleVector(child),
753 789
 					      fg, intName));
790
+  phylog.pop_size_child.push_back(pop_size_child);
791
+}
792
+
793
+// // Only called when the child has pop size of 0
794
+// // so true LOD
795
+// void addToLOD(LOD& lod,
796
+// 	      const Genotype& parent,
797
+// 	      const Genotype& child,
798
+// 	      // const double time,
799
+// 	      const std::map<int, std::string>& intName,
800
+// 	      const fitness_as_genes& fg) {
801
+//   // lod.time.push_back(time);
802
+//   lod.parent.push_back(genotypeToNameString(genotypeSingleVector(parent),
803
+// 					       fg, intName));
804
+//   lod.child.push_back(genotypeToNameString(genotypeSingleVector(child),
805
+// 					      fg, intName));
806
+// }
807
+
808
+
809
+// Only called when the child has pop size of 0
810
+// so true LOD
811
+// Use a map for LOD, and overwrite the parent:
812
+// we only add when the size of the child is 0
813
+// The key of the map is the child.
814
+void addToLOD(std::map<std::string, std::string>& lod,
815
+	      const Genotype& parent,
816
+	      const Genotype& child,
817
+	      // const double time,
818
+	      const std::map<int, std::string>& intName,
819
+	      const fitness_as_genes& fg) {
820
+  std::string parent_str = genotypeToNameString(genotypeSingleVector(parent),
821
+					 fg, intName);
822
+  std::string child_str = genotypeToNameString(genotypeSingleVector(child),
823
+					       fg, intName);
824
+  lod[child_str] = parent_str;
825
+  // // lod.time.push_back(time);
826
+  // lod.parent.push_back(genotypeToNameString(genotypeSingleVector(parent),
827
+  // 					       fg, intName));
828
+  // lod.child.push_back(genotypeToNameString(genotypeSingleVector(child),
829
+  // 					      fg, intName));
830
+}
831
+
832
+
833
+void addToPOM(POM& pom,
834
+	      const Genotype& genotype,
835
+	      const std::map<int, std::string>& intName,
836
+	      const fitness_as_genes& fg) {
837
+
838
+  if (pom.genotypes.empty()) {
839
+    std::string g = genotypeToNameString(genotypeSingleVector(genotype),
840
+				       fg, intName);
841
+    pom.genotypesString.push_back(g);
842
+    pom.genotypes.push_back(genotype);
843
+  } else if ( !(pom.genotypes.back() == genotype) ) {
844
+    std::string g = genotypeToNameString(genotypeSingleVector(genotype),
845
+				       fg, intName);
846
+    pom.genotypesString.push_back(g);
847
+    pom.genotypes.push_back(genotype);
848
+  }
849
+}
850
+
851
+// to explicitly signal extinction
852
+void addToPOM(POM& pom,
853
+	      const std::string string) {
854
+  pom.genotypesString.push_back(string);
754 855
 }
755 856
 
756 857
 
... ...
@@ -811,7 +912,10 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
811 912
 			const bool& AND_DrvProbExit,
812 913
 			const std::vector< std::vector<int> >& fixation_l,
813 914
 			 int& ti_dbl_min,
814
-			 int& ti_e3) {
915
+			 int& ti_e3,
916
+			 std::map<std::string, std::string>& lod,
917
+			 // LOD& lod,
918
+			 POM& pom) {
815 919
   
816 920
   double nextCheckSizeP = checkSizePEvery;
817 921
   const int numGenes = fitnessEffects.genomeSize;
... ...
@@ -826,7 +930,7 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
826 930
   double dummyMutationRate = std::max(std::min(minmu/1.0e4, targetmindummy),
827 931
 				      mymindummy);
828 932
   // This should very rarely happen:
829
-  if(minmu <= 1e-9 ) {
933
+  if(minmu <= mymindummy) { // 1e-9 
830 934
     double newdd = minmu/100.0;
831 935
     Rcpp::Rcout << "WARNING: the smallest mutation rate is "
832 936
 		<< "<= " << mymindummy << ". That is a really small value"
... ...
@@ -842,6 +946,9 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
842 946
   genot_out.clear();
843 947
 
844 948
   phylog = PhylogName();
949
+  // lod = LOD(); 
950
+  lod.clear();
951
+  pom = POM();
845 952
   
846 953
   popSizes_out.clear();
847 954
   index_out.clear();
... ...
@@ -1089,8 +1196,10 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1089 1196
     // we pass as the parent the tmpParam; it better initialize
1090 1197
     // everything right, or that will blow. Reset to init
1091 1198
     init_tmpP(tmpParam);
1199
+    addToPOM(pom, Genotypes[0], intName, genesInFitness);
1092 1200
   } else { //no initMutant
1093 1201
     popParams[0].numMutablePos = numGenes;
1202
+    
1094 1203
     // if(typeModel == TypeModel::beerenwinkel) {
1095 1204
     //   popParams[0].death = 1.0;
1096 1205
     //   // initialize to prevent birth/mutation warning with Beerenwinkel
... ...
@@ -1420,6 +1529,7 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1420 1529
 			newMutations,
1421 1530
 			ran_gen,
1422 1531
 			mu);
1532
+	//DP2(newMutations);
1423 1533
 	// nr_change
1424 1534
 	// getMutatedPos_bitset(mutatedPos, numMutablePosParent, // r,
1425 1535
 	// 		     ran_gen,
... ...
@@ -1494,9 +1604,9 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1494 1604
 	    // if(keepMutationTimes)
1495 1605
 	    //   update_mutation_freqs(newMutation, currentTime, mutation_freq_at);
1496 1606
 	    //FIXME: phylog
1497
-	    if(keepPhylog)
1498
-	      addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime,
1499
-			  intName, genesInFitness);
1607
+	    // if(keepPhylog)
1608
+	    //   addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime,
1609
+	    // 		  intName, genesInFitness);
1500 1610
 	    
1501 1611
 	    tmpParam.numMutablePos = numMutablePosParent - 1;
1502 1612
 	    tmpParam.mutation = mutationFromScratch(mu, tmpParam, newGenotype,
... ...
@@ -1531,7 +1641,18 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1531 1641
 	  
1532 1642
 	    g_tmp1_nr = tmpParam.death/tmpParam.mutation;
1533 1643
 	    if(g_tmp1_nr < g_min_death_mut_ratio_nr) g_min_death_mut_ratio_nr = g_tmp1_nr;	
1534
-#endif	  
1644
+#endif
1645
+
1646
+	    // LOD:
1647
+	    // here first call to addToPhylog, with popSize popParams[sp].popSize
1648
+	    // and it is 0
1649
+	    if(keepPhylog)
1650
+	      addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime,
1651
+			  intName, genesInFitness, 0);
1652
+	    // LOD, as LOD sensu stricto, always done now
1653
+	    addToLOD(lod, Genotypes[nextMutant], newGenotype, // currentTime,
1654
+			intName, genesInFitness);
1655
+
1535 1656
 	  } else {// fitness is 0, so we do not add it
1536 1657
 	    --sp;
1537 1658
 	    --numSpecies;
... ...
@@ -1679,6 +1800,14 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1679 1800
 	  // popParams[sp].timeLastUpdate = -99999.99999; // to catch errors
1680 1801
 #endif
1681 1802
 	  //popParams[sp].Flag = true;
1803
+
1804
+	    //zz: LOD:
1805
+	    // here one of the calls to addToPhylog, with popSize popParams[sp].popSize
1806
+	    if(keepPhylog)
1807
+	      addToPhylog(phylog, Genotypes[nextMutant], newGenotype, currentTime,
1808
+			  intName, genesInFitness, popParams[sp].popSize);
1809
+	    
1810
+
1682 1811
 	}
1683 1812
 	//   ***************  5.7 ***************
1684 1813
 	// u_2 irrelevant if to_update = 1;
... ...
@@ -1751,7 +1880,9 @@ static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
1751 1880
 					 nextCheckSizeP,
1752 1881
 					 ran_gen,
1753 1882
 					 AND_DrvProbExit,
1754
-					 fixation_l); //keepEvery is for thinning
1883
+					 fixation_l,
1884
+					 pom, intName,
1885
+					 genesInFitness); //keepEvery is for thinning
1755 1886
       if(verbosity >= 3) {
1756 1887
 	Rcpp::Rcout << "\n popParams.size() before sampling " << popParams.size() 
1757 1888
 		  << "\n totPopSize after sampling " << totPopSize << "\n";
... ...
@@ -1892,7 +2023,10 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1892 2023
   std::map<int, std::string> intName = mapGenesIntToNames(fitnessEffects);
1893 2024
   fitness_as_genes genesInFitness = fitnessAsGenes(fitnessEffects);
1894 2025
   PhylogName phylog;
1895
-
2026
+  // LOD lod;
2027
+  std::map<std::string, std::string> lod;
2028
+  POM pom;
2029
+  
1896 2030
   // Mutator effects
1897 2031
   fitnessEffectsAll muEF;
1898 2032
   if( (full2mutator.size() != 0) ) 
... ...
@@ -1903,7 +2037,6 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1903 2037
   if( (full2mutator.size() != 0) && (muEF.genomeSize == 0))
1904 2038
     throw std::logic_error("full2mutator > 0 with mutatorEffects.genomesize 0");
1905 2039
   if( (full2mutator.size() == 0) && (muEF.genomeSize != 0)) {
1906
-    DP2(muEF.genomeSize);
1907 2040
     throw std::logic_error("full2mutator 0 with mutatorEffects.genomesize != 0");
1908 2041
   }
1909 2042
 
... ...
@@ -2089,7 +2222,9 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
2089 2222
 		  AND_DrvProbExit,
2090 2223
 		  fixation_l,
2091 2224
 		  ti_dbl_min,
2092
-		  ti_e3);
2225
+		  ti_e3,
2226
+		  lod,
2227
+		  pom);
2093 2228
       ++numRuns;
2094 2229
       forceRerun = false;
2095 2230
       accum_ti_dbl_min += ti_dbl_min;
... ...
@@ -2185,12 +2320,29 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
2185 2320
   driverCounts(maxNumDrivers, totalPresentDrivers,
2186 2321
 	       countByDriver, presentDrivers,
2187 2322
 	       returnGenotypes, fitnessEffects.drv);
2188
-  
2323
+
2324
+ 
2189 2325
   std::vector<std::string> genotypesAsStrings =
2190 2326
     genotypesToNameString(uniqueGenotypes_vector_nr, genesInFitness, intName);
2191 2327
   std::string driversAsString =
2192 2328
     driversToNameString(presentDrivers, intName);
2193 2329
 
2330
+  // // // zz: debugging
2331
+  // // // Correct too
2332
+  // DP1("intName");
2333
+  // for(auto mmm: intName) {
2334
+  //   Rcpp::Rcout << mmm.first << " :" ;
2335
+  //   Rcpp::Rcout << mmm.second << std::endl;
2336
+  // }
2337
+
2338
+
2339
+  // // wrong
2340
+  // DP1("genotypesAsStrings");
2341
+  // for(auto gas: genotypesAsStrings) {
2342
+  //   Rcpp::Rcout << gas;
2343
+  //   Rcpp::Rcout << std::endl;
2344
+  // }
2345
+
2194 2346
   
2195 2347
   std::vector<double> sampleLargestPopProp(outNS_i + 1);
2196 2348
   if((outNS_i + 1) != static_cast<int>(sampleLargestPopSize.size()))
... ...
@@ -2203,6 +2355,13 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
2203 2355
   fill_SStats(perSampleStats, sampleTotPopSize, sampleLargestPopSize,
2204 2356
   	      sampleLargestPopProp, sampleMaxNDr, sampleNDrLargestPop);
2205 2357
 
2358
+  // create the lod return pieces. Move to a function later
2359
+  std::vector<std::string> lod_parent;
2360
+  std::vector<std::string> lod_child;
2361
+  for (const auto &l : lod) {
2362
+    lod_child.push_back(l.first);
2363
+    lod_parent.push_back(l.second);
2364
+  }
2206 2365
   
2207 2366
   return
2208 2367
     List::create(Named("pops.by.time") = outNS,
... ...
@@ -2243,12 +2402,19 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
2243 2402
 					       Named("PhylogDF") =  DataFrame::create(
2244 2403
 										      Named("parent") = phylog.parent,
2245 2404
 										      Named("child") = phylog.child,
2246
-										      Named("time") = phylog.time
2405
+										      Named("time") = phylog.time,
2406
+										      Named("pop_size_child") = phylog.pop_size_child
2247 2407
 										      ),
2248 2408
 					       Named("UnrecoverExcept") = false,
2249 2409
 					       Named("numRecoverExcept") = numRecoverExcept,
2250 2410
 					       Named("accum_ti_dbl_min") = accum_ti_dbl_min,
2251
-					       Named("accum_ti_e3") = accum_ti_e3)
2411
+					       Named("accum_ti_e3") = accum_ti_e3,
2412
+					       Named("LOD_DF") = DataFrame::create(
2413
+										   Named("parent") = lod_parent, // lod.parent,
2414
+										   Named("child") = lod_child //lod.child
2415
+										   ),
2416
+					       Named("POM") = Rcpp::wrap(pom.genotypesString)
2417
+					       )
2252 2418
 		 );
2253 2419
 }
2254 2420
 
... ...
@@ -186,5 +186,6 @@ void updateRatesBeeren(std::vector<spParamsP>& popParams,
186 186
 void detect_ti_duplicates(const std::multimap<double, int>& m,
187 187
 			  const double ti,
188 188
 			  const int spcies);
189
+
189 190
 #endif
190 191
 
... ...
@@ -3,7 +3,7 @@
3 3
 
4 4
 #include<Rcpp.h>
5 5
 
6
-//#define DEBUGZ
6
+// #define DEBUGZ
7 7
 // #define DEBUGV
8 8
 #define DEBUGW
9 9
 
... ...
@@ -22,6 +22,7 @@
22 22
 #include <iomanip> 
23 23
 #include <algorithm>
24 24
 #include <random>
25
+#include <string>
25 26
 
26 27
 
27