Browse code

version 2.7.2 Changes in version 2.7.2 (2017-09-27): - genot_to_adj_mat in C++. - fast_peaks (for no backmutation cases). - Better explanation and testing of peaks and valleys. - Clarified simOGraph transitive reduction. - Better handling of ti corner cases. - Magellan reading fuctions adapted to output of newer (as of 2017-07) version of Magellan. - sorting gene names in allGenotypes_to_matrix. - sampledGenotypes: genotype names with sorted gene names.

ramon diaz-uriarte (at Phelsuma) authored on 27/09/2017 10:23:50
Showing19 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.7.0
5
-Date: 2017-04-07
4
+Version: 2.7.2
5
+Date: 2017-09-27
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -1590,7 +1590,7 @@ get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
1590 1590
           
1591 1591
           if (length(candidate.time) == 0) {
1592 1592
               warning(paste("There is not a single sampled time",
1593
-                            "at which there are any mutants.",
1593
+                            "at which there are any mutants with drivers. ",
1594 1594
                             "Thus, no uniform sampling possible.",
1595 1595
                             "You will get NAs"))
1596 1596
               the.time <- -99
... ...
@@ -21,6 +21,19 @@ accessibleGenotypes <- function(y, f, numMut, th) {
21 21
     .Call('OncoSimulR_accessibleGenotypes', PACKAGE = 'OncoSimulR', y, f, numMut, th)
22 22
 }
23 23
 
24
+
25
+genot2AdjMat <- function(y, f, numMut) {
26
+    .Call('OncoSimulR_genot2AdjMat', PACKAGE = 'OncoSimulR', y, f, numMut)
27
+}
28
+
29
+peaksLandscape <- function(y, f, numMut, th) {
30
+    .Call('OncoSimulR_peaksLandscape', PACKAGE = 'OncoSimulR', y, f, numMut, th)
31
+}
32
+
33
+accessibleGenotypes_former <- function(y, f, numMut, th) {
34
+    .Call('OncoSimulR_accessibleGenotypes_former', PACKAGE = 'OncoSimulR', y, f, numMut, th)
35
+}
36
+
24 37
 ## readFitnessEffects <- function(rFE, echo) {
25 38
 ##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
26 39
 ## }
... ...
@@ -18,6 +18,9 @@
18 18
 
19 19
 ## FIXME: show only accessible paths? 
20 20
 
21
+## FIXME: if using only_accessible, maybe we
22
+## can try to use fast_peaks, and use the slower
23
+## approach as fallback (if identical fitness)
21 24
 plotFitnessLandscape <- function(x, show_labels = TRUE,
22 25
                                  col = c("green4", "red", "yellow"),
23 26
                                  lty = c(1, 2, 3), use_ggrepel = FALSE,
... ...
@@ -224,13 +227,45 @@ filter_inaccessible <- function(x, accessible_th) {
224 227
 }
225 228
 
226 229
 
230
+## wrapper to the C++ code
231
+fast_peaks <- function(x, th) {
232
+    ## x is the fitness matrix, not adjacency matrix
233
+
234
+    ## Only works if no connected genotypes that form maxima. I.e., no
235
+    ## identical fitness. Do a sufficient check for it (too inclusive, though)
236
+    ## And only under no back mutation
237
+
238
+    original_pos <- 1:nrow(x)
239
+    numMut <- rowSums(x[, -ncol(x)])
240
+    o_numMut <- order(numMut)
241
+    x <- x[o_numMut, ]
242
+    numMut <- numMut[o_numMut]
243
+    f <- x[, ncol(x)]
244
+    ## Two assumptions
245
+    stopifnot(numMut[1] == 0)
246
+    ## make sure no repeated in those that could be maxima
247
+    if(any(duplicated(f[f >= f[1]])))
248
+        stop("There could be several connected maxima genotypes.",
249
+             " This function is not safe to use.")
250
+    
251
+    y <- x[, -ncol(x)]
252
+    storage.mode(y) <- "integer"
253
+    original_pos <- original_pos[o_numMut]
254
+    return(sort(original_pos[peaksLandscape(y, f,
255
+                          as.integer(numMut),
256
+                          th)]))
257
+}
258
+
259
+
227 260
 ## wrapper to the C++ code
228 261
 wrap_accessibleGenotypes <- function(x, th) {
229 262
     ## x is the fitness matrix, not adjacency matrix
263
+    original_pos <- 1:nrow(x)
230 264
     numMut <- rowSums(x[, -ncol(x)])
231 265
     o_numMut <- order(numMut)
232 266
     x <- x[o_numMut, ]
233 267
     numMut <- numMut[o_numMut]
268
+    original_pos <- original_pos[o_numMut]
234 269
     
235 270
     y <- x[, -ncol(x)]
236 271
     storage.mode(y) <- "integer"
... ...
@@ -238,7 +273,29 @@ wrap_accessibleGenotypes <- function(x, th) {
238 273
     acc <- accessibleGenotypes(y, x[, ncol(x)],
239 274
                                as.integer(numMut),
240 275
                                th)
241
-    return(acc[acc > 0])
276
+    ## return(acc[acc > 0])
277
+    return(sort(original_pos[acc[acc > 0]]))
278
+}
279
+
280
+
281
+## wrapper to the C++ code; the former one, only for testing. Remove
282
+## eventually FIXME
283
+wrap_accessibleGenotypes_former <- function(x, th) {
284
+    ## x is the fitness matrix, not adjacency matrix
285
+    original_pos <- 1:nrow(x)
286
+    numMut <- rowSums(x[, -ncol(x)])
287
+    o_numMut <- order(numMut)
288
+    x <- x[o_numMut, ]
289
+    numMut <- numMut[o_numMut]
290
+    original_pos <- original_pos[o_numMut]
291
+    
292
+    y <- x[, -ncol(x)]
293
+    storage.mode(y) <- "integer"
294
+
295
+    acc <- accessibleGenotypes_former(y, x[, ncol(x)],
296
+                                      as.integer(numMut),
297
+                                      th)
298
+    return(sort(original_pos[acc[acc > 0]]))
242 299
 }
243 300
 
244 301
 ## A transitional function
... ...
@@ -378,8 +435,10 @@ generate_matrix_genotypes <- function(g) {
378 435
 }
379 436
 
380 437
 
381
-
382
-genot_to_adj_mat <- function(x) {
438
+## The R version. See also the C++ one
439
+genot_to_adj_mat_R <- function(x) {
440
+    ## x is the fitness matrix
441
+    
383 442
     ## FIXME this can take about 23% of the time of the ggplot call.
384 443
     ## But them, we are quickly constructing a 2000*2000 matrix
385 444
     ## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a
... ...
@@ -390,13 +449,16 @@ genot_to_adj_mat <- function(x) {
390 449
     ## FIXME: code is now in place to do all of this in C++
391 450
     
392 451
     ## Make sure sorted, so ancestors always before descendants
393
-    rs0 <- rowSums(x[, -ncol(x)])
394
-    x <- x[order(rs0), ]
395
-    rm(rs0)
452
+    original_pos <- 1:nrow(x)
453
+    numMut <- rowSums(x[, -ncol(x)])
454
+    o_numMut <- order(numMut)
455
+    x <- x[o_numMut, ]
456
+    original_pos <- original_pos[o_numMut]
457
+    rm(numMut)
396 458
     
397 459
     y <- x[, -ncol(x)]
398 460
     f <- x[, ncol(x)]
399
-    rs <- rowSums(y)
461
+    rs <- rowSums(y) ## redo for paranoia; could have ordered numMut
400 462
 
401 463
     ## Move this to C++?
402 464
     adm <- matrix(NA, nrow = length(rs), ncol = length(rs))
... ...
@@ -408,9 +470,51 @@ genot_to_adj_mat <- function(x) {
408 470
             if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
409 471
         }
410 472
     }
473
+    colnames(adm) <- rownames(adm) <- original_pos
411 474
     return(adm)
412 475
 }
413 476
 
477
+genot_to_adj_mat <- function(x) {
478
+    ## x is the fitness matrix
479
+
480
+    ## adding column and row names should rarely be necessary
481
+    ## as these are internal functions, etc. But just in case
482
+    original_pos <- 1:nrow(x)
483
+    numMut <- rowSums(x[, -ncol(x)])
484
+    o_numMut <- order(numMut)
485
+    x <- x[o_numMut, ]
486
+    numMut <- numMut[o_numMut]
487
+    original_pos <- original_pos[o_numMut]
488
+    
489
+    y <- x[, -ncol(x)]
490
+    storage.mode(y) <- "integer"
491
+
492
+    adm <- genot2AdjMat(y, x[, ncol(x)],
493
+                        as.integer(numMut))
494
+    colnames(adm) <- rownames(adm) <- original_pos
495
+    return(adm)
496
+}
497
+
498
+
499
+## ## to move above to C++ note that loop can be
500
+## for(i in 1:length(rs)) { ## i is the current genotype
501
+##     for(j in (i:length(rs))) {
502
+##         if(rs[j] > (rs[i] + 1)) break;
503
+##         else if(rs[j] == (rs[i] + 1)) {
504
+##             ## and use here my HammingDistance function
505
+##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
506
+##             ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
507
+##             if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]);
508
+##             }
509
+##     }
510
+## }
511
+
512
+## actually, all that is already in accessibleGenotypes except for the
513
+## filling up of adm.
514
+
515
+
516
+
517
+
414 518
 
415 519
 peak_valley <- function(x) {
416 520
     ## FIXME: when there are no identical entries, all this
... ...
@@ -466,7 +570,13 @@ peak_valley <- function(x) {
466 570
     }
467 571
     bad_fwd <- vector("integer", nrow(x))
468 572
     for(i in 1:nrow(x)) {
573
+        ## Eh, why any? All.
574
+        ## Nope, any: we want peaks in general, not just
575
+        ## under assumption of "no back mutation"
576
+        ## We get a different result when we restrict to accessible
577
+        ## because all < 0 in adjacency are turned to NAs.
469 578
         if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) {
579
+        ## if( all(x[, i] < 0, na.rm = TRUE) ) {
470 580
             ## this node is bad. Any descendant with fitness >= is bad
471 581
             bad_fwd[i] <- 1
472 582
             reach_f <- which(x[i, ] <= 0)
... ...
@@ -478,3 +588,36 @@ peak_valley <- function(x) {
478 588
     peak <- setdiff(candidate, bad)
479 589
     return(list(peak = peak, valley = valley))
480 590
 }
591
+
592
+
593
+
594
+## For the future
595
+## ## data.frame (two columns: genotype with "," and Fitness) -> fitness graph (DAG)
596
+## ## Return an adj matrix of the fitness graph from a fitness
597
+## ## landscape
598
+## ## Based on code in plotFitnessLandscape
599
+## flandscape_to_fgraph <- function(afe) {
600
+##     gfm <- OncoSimulR:::allGenotypes_to_matrix(afe)
601
+##     ## mutated <- rowSums(gfm[, -ncol(gfm)])
602
+##     gaj <- OncoSimulR:::genot_to_adj_mat(gfm)
603
+##     gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0)
604
+##     stopifnot(all(na.omit(as.vector(gaj == gaj2))))
605
+##     remaining <- as.numeric(colnames(gaj2))
606
+##     ## mutated <- mutated[remaining]
607
+##     afe <- afe[remaining, , drop = FALSE]
608
+##     ## vv <- which(!is.na(gaj2), arr.ind = TRUE)
609
+
610
+##     gaj2 <- gaj2
611
+##     gaj2[is.na(gaj2)] <- 0
612
+##     gaj2[gaj2 > 0] <- 1
613
+##     colnames(gaj2) <- rownames(gaj2) <- afe[, "Genotype"]
614
+##     return(gaj2)
615
+## }
616
+## ## This could be done easily in C++, taking care of row/colnames at end,
617
+## ## without moving around the full adjacency matrix.
618
+## ## Skeleton for C++
619
+## ## a call to accessibleGenotypesPeaksLandscape
620
+## ## (with another argument or changing the returnpeaks by a three value thing)
621
+## ## after done with first loop, 
622
+## ## return the matrix adm[accessible > 0, accessible >0]
623
+## ## only need care with row/colnames
... ...
@@ -23,7 +23,7 @@
23 23
 
24 24
 to_Magellan <- function(x, file,
25 25
                         max_num_genotypes = 2000) {
26
-    ## This is stupid if we already have, as entry, an object from
26
+    ## Go directly if you have, as entry, an object from
27 27
     ## rfitness!! to_Fitness_Matrix can be very slow.
28 28
     if(is.null(file)) {
29 29
         file <- tempfile()
... ...
@@ -44,7 +44,8 @@ to_Magellan <- function(x, file,
44 44
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
45 45
     ## A general converter. Ready to be used by plotFitnessLandscape and
46 46
     ## Magellan exporter.
47
-    
47
+
48
+    ## FIXME: really, some of this is inefficient. Very. Fix it.
48 49
     if( (inherits(x, "genotype_fitness_matrix")) ||
49 50
         ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) {
50 51
         ## Why this? We go back and forth twice. We need both things. We
... ...
@@ -257,7 +258,7 @@ allGenotypes_to_matrix <- function(x) {
257 258
     splitted_genots <- lapply(x$Genotype,
258 259
                              function(z) nice.vector.eo(z, ","))
259 260
 
260
-    all_genes <- unique(unlist(splitted_genots))
261
+    all_genes <- sort(unique(unlist(splitted_genots)))
261 262
 
262 263
     m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
263 264
     the_match <- lapply(splitted_genots,
... ...
@@ -285,7 +286,8 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
285 286
                            verbose = FALSE,
286 287
                            use_log = TRUE,
287 288
                            short = TRUE,
288
-                           fl_statistics = "fl_statistics") { # nocov start
289
+                           fl_statistics = "fl_statistics",
290
+                           replace_missing = FALSE) { # nocov start
289 291
     ## I always use 
290 292
     ## if(!is.null(x) && is.null(file))
291 293
     ##     stop("one of object or file name")
... ...
@@ -306,23 +308,43 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
306 308
         shortarg <- NULL
307 309
     }
308 310
     
311
+    if(replace_missing) {
312
+        zarg <- "-z"
313
+    } else {
314
+        zarg <- NULL
315
+    }
316
+
309 317
     to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
310
-    call_M <- system2(fl_statistics, args = paste(fn, shortarg, logarg, "-o", fnret))
318
+    ## newer versions of Magellan provide some extra values to standard output
319
+    call_M <- system2(fl_statistics,
320
+                      args = paste(fn, shortarg, logarg, zarg, "-o", fnret),
321
+                      stdout = NULL)
311 322
     if(short) {
312
-        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
323
+        ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
324
+        
325
+        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
313 326
         ## Make names more explicit, but check we have what we think we have
314
-        stopifnot(length(tmp) == 23)
315
-        stopifnot(identical(names(tmp),
327
+        ## New versions of Magellan produce different output apparently of variable length
328
+        stopifnot(length(tmp) >= 23) ## 23) ## variable length
329
+        stopifnot(identical(names(tmp)[1:13], ## only some
316 330
                             c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
317 331
                               "nchains", "nsteps", "nori", "depth", "magn", "sign",
318
-                              "rsign", "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
319
-                              "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
320
-        names(tmp) <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
332
+                              "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
333
+        ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
334
+        if(length(tmp) >= 24) ## the new version
335
+            stopifnot(identical(names(tmp)[c(20, 24)],
336
+                                c("steps_m", "mProbOpt_0")))
337
+        ## steps_m: the mean number of steps over the entire landscape to
338
+        ## reach the global optimum
339
+        ## mProbOpt_0: The mean probability to
340
+        ## reach that optimum (again averaged over the entire landscape).
341
+        ## but if there are multiple optima, there are many of these
342
+        names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
321 343
                         "r/s","n_chains", "n_steps", "n_origins", "max_depth",
322
-                        "epist_magn", "epist_sign", "epist_rsign",
323
-                        "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
324
-                        "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
325
-                        "std_dev_pairs", "var_outdegree")
344
+                        "epist_magn", "epist_sign", "epist_rsign")## ,
345
+                        ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
346
+                        ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
347
+        ## "std_dev_pairs", "var_outdegree")
326 348
     } else {
327 349
         tmp <- readLines(fnret)
328 350
     }
... ...
@@ -1866,14 +1866,24 @@ sampledGenotypes <- function(y, genes = NULL) {
1866 1866
         cols <- which(colnames(y) %in% genes )
1867 1867
         y <- y[, cols]
1868 1868
     }
1869
-    nn <- colnames(y)
1869
+    ## nn <- colnames(y)
1870
+    genotlabel <- function(u, nn = colnames(y)) {
1871
+        if(any(is.na(u))) return(NA)
1872
+        else {
1873
+            return(paste(sort(nn[as.logical(u)]), collapse = ", "))
1874
+        }
1875
+    }
1876
+    ## df <- data.frame(table(
1877
+    ##     apply(y, 1, function(z) paste(sort(nn[as.logical(z)]), collapse = ", ") )
1878
+    ## ))
1870 1879
     df <- data.frame(table(
1871
-        apply(y, 1, function(z) paste(nn[as.logical(z)], collapse = ", ") )
1872
-    ))
1880
+        apply(y, 1, genotlabel),
1881
+        useNA = "ifany"))
1882
+
1873 1883
     gn <- as.character(df[, 1])
1874 1884
     gn[gn == ""] <- "WT"
1875 1885
     df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE)
1876
-    attributes(df)$ShannonI <- shannonI(df$Freq)
1886
+    attributes(df)$ShannonI <- shannonI(na.omit(df)$Freq)
1877 1887
     class(df) <- c("sampledGenotypes", class(df))
1878 1888
     return(df)
1879 1889
 }
... ...
@@ -1,3 +1,14 @@
1
+Changes in version 2.7.2 (2017-09-27):
2
+	- genot_to_adj_mat in C++.
3
+	- fast_peaks (for no backmutation cases).
4
+	- Better explanation and testing of peaks and valleys.
5
+	- Clarified simOGraph transitive reduction.
6
+	- Better handling of ti corner cases.
7
+	- Magellan reading fuctions adapted to output of newer (as of
8
+	  2017-07) version of Magellan.
9
+	- sorting gene names in allGenotypes_to_matrix.
10
+	- sampledGenotypes: genotype names with sorted gene names.
11
+
1 12
 Changes in version 2.6.0 (for BioC 3.5):
2 13
 	- Many additions to the vignette and documentation.
3 14
 	- LOD and POM (lines of descent, path of maximum, sensu Szendro et
... ...
@@ -34,8 +34,9 @@ s = 0.1, sh = -0.1, typeDep = "AND")
34 34
   \item{removeDirectIndirect}{
35 35
     Ensure that no two nodes are connected both directly (i.e., with an
36 36
   edge between them) and indirectly, through intermediate nodes. If
37
-  TRUE, the direct connections are removed from the graph starting from
38
-  the bottom.
37
+  TRUE, we return the transitive reduction of the DAG.
38
+  %% the final DAG returned is the transitive reduction of thethe direct
39
+  %% connections are removed from the graph starting from the bottom.
39 40
 }
40 41
 \item{rootName}{
41 42
   The name you want to give the "Root" node.
... ...
@@ -755,7 +755,7 @@ void addToPhylog(PhylogName& phylog,
755 755
 
756 756
 
757 757
 
758
-static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
758
+static void nr_innerBNB (const fitnessEffectsAll& fitnessEffects,
759 759
 			const double& initSize,
760 760
 			const double& K,
761 761
 			// const double& alpha,
... ...
@@ -809,7 +809,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
809 809
 			const double& PDBaseline,
810 810
 			const double& checkSizePEvery,
811 811
 			const bool& AND_DrvProbExit,
812
-			const std::vector< std::vector<int> >& fixation_l) {
812
+			const std::vector< std::vector<int> >& fixation_l,
813
+			 int& ti_dbl_min,
814
+			 int& ti_e3) {
813 815
   
814 816
   double nextCheckSizeP = checkSizePEvery;
815 817
   const int numGenes = fitnessEffects.genomeSize;
... ...
@@ -939,8 +941,10 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
939 941
   std::multimap<double, int> mapTimes;
940 942
   //std::multimap<double, int>::iterator m1pos;
941 943
 
942
-  int ti_dbl_min = 0;
943
-  int ti_e3 = 0;
944
+  // int ti_dbl_min = 0;
945
+  // int ti_e3 = 0;
946
+  ti_dbl_min = 0;
947
+  ti_e3 = 0;
944 948
 
945 949
 
946 950
       // // FIXME debug
... ...
@@ -1239,6 +1243,10 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1239 1243
 	mapTimes_updateP(mapTimes, popParams, u_1, tmpdouble1);
1240 1244
 	popParams[u_1].timeLastUpdate = currentTime;
1241 1245
 
1246
+#ifdef DEBUGV
1247
+	detect_ti_duplicates(mapTimes, tmpdouble1, u_1);  
1248
+#endif
1249
+	
1242 1250
 #ifdef DEBUGV
1243 1251
 	Rcpp::Rcout << "\n\n     ********* 5.2: call to ti_nextTime, update one ******\n For to_update = \n " 
1244 1252
 		  << "     tSample  = " << tSample
... ...
@@ -1269,6 +1277,13 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1269 1277
 	popParams[u_1].timeLastUpdate = currentTime;
1270 1278
 	popParams[u_2].timeLastUpdate = currentTime;
1271 1279
 
1280
+#ifdef DEBUGV
1281
+	detect_ti_duplicates(mapTimes, tmpdouble1, u_1);
1282
+	detect_ti_duplicates(mapTimes, tmpdouble2, u_2);
1283
+#endif	
1284
+
1285
+	
1286
+
1272 1287
 #ifdef DEBUGV
1273 1288
 	Rcpp::Rcout << "\n\n     ********* 5.2: call to ti_nextTime, update two ******\n " 
1274 1289
 		  << "     tSample  = " << tSample
... ...
@@ -1306,6 +1321,9 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1306 1321
 					     tSample, ti_dbl_min, ti_e3);
1307 1322
 	  mapTimes_updateP(mapTimes, popParams, i, tmpdouble1);
1308 1323
 	  popParams[i].timeLastUpdate = currentTime;
1324
+#ifdef DEBUGV
1325
+	  detect_ti_duplicates(mapTimes, tmpdouble1, i);
1326
+#endif
1309 1327
 	  
1310 1328
 #ifdef DEBUGV
1311 1329
 	  Rcpp::Rcout << "\n\n     ********* 5.2: call to ti_nextTime, update all ******\n " 
... ...
@@ -1335,6 +1353,7 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1335 1353
     
1336 1354
     // ******************** 5.3 and do we sample? *********** 
1337 1355
     // Find minimum to know if we need to sample the whole pop
1356
+    // We also obtain the nextMutant
1338 1357
     getMinNextMutationTime4(nextMutant, minNextMutationTime, 
1339 1358
 			    mapTimes);
1340 1359
     
... ...
@@ -1438,7 +1457,31 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1438 1457
 	    Rcpp::Rcout <<"\n     Creating new species   " << (numSpecies - 1)
1439 1458
 			<< "         from species "  <<   nextMutant;
1440 1459
 	  }
1441
-	
1460
+	  
1461
+#ifdef DEBUGW
1462
+	  if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) {
1463
+	    DP2(currentTime); //this is set to minNextMutationTime above
1464
+	    DP2(minNextMutationTime);
1465
+	    DP2(tSample);
1466
+	    DP2(popParams[nextMutant].timeLastUpdate);
1467
+	    DP2( (currentTime -  popParams[nextMutant].timeLastUpdate) );
1468
+	    DP2( (currentTime <  popParams[nextMutant].timeLastUpdate) );
1469
+	    DP2( (currentTime ==  popParams[nextMutant].timeLastUpdate) );
1470
+	    DP2(nextMutant);
1471
+	    DP2(u_1);
1472
+	    DP2(u_2);
1473
+	    DP2(tmpdouble1);
1474
+	    DP2(tmpdouble2);
1475
+	    DP2(popParams[nextMutant].timeLastUpdate);
1476
+	    DP2(popParams[u_1].timeLastUpdate);
1477
+	    DP2(popParams[u_2].timeLastUpdate);
1478
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) );
1479
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) );
1480
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) );
1481
+	    print_spP(popParams[nextMutant]);
1482
+	    throw std::out_of_range("new species: currentTime - timeLastUpdate[sp] out of range. ***###!!!Serious bug!!!###***");
1483
+	  }
1484
+#endif	  
1442 1485
 	  tmpParam.popSize = 1;
1443 1486
 
1444 1487
 	  nr_fitness(tmpParam, popParams[nextMutant],
... ...
@@ -1523,12 +1566,75 @@ static void nr_innerBNB(const fitnessEffectsAll& fitnessEffects,
1523 1566
 	  to_update = 2; 
1524 1567
 
1525 1568
 #ifdef DEBUGW
1526
-	  if( (currentTime - popParams[sp].timeLastUpdate) <= 0.0) {
1527
-	    DP2(currentTime);
1569
+	  if( (currentTime - popParams[sp].timeLastUpdate) < 0.0) {
1570
+	    // Yes, the difference could be 0 if two next mutation times are identical.
1571
+	    // You enable detect_ti_duplicates and use trigger-duplicated-ti.R
1572
+	    // to see it.
1573
+	    // Often the involved culprits (nextMutant and the other, say sp)
1574
+	    // were lastUpdated with tiny difference and they were, when updated
1575
+	    // given an identical ti, each in its own run.
1576
+	    // Key is not timeLastUpdate. This is a possible sequence of events:
1577
+	    //    - at time t0, species that will become nextMutant is updated and gets ti = tinm
1578
+	    //    - t1: species u1 gets ti = tinm
1579
+	    //    - t2: species u2 gets some ti > tinm
1580
+	    //    - tinm becomes minimal, so we mutate u1, and it mutates to u2
1581
+	    //    - (so now the timeLastUpdate of u1 = u2 = tinm)
1582
+	    //    - nextMutant is now mutated, and it mutates to u2, which becomes sp
1583
+	    //    - tinm = timeLastUpdate of u1 and u2.
1584
+	    //    - You will also see that number of mutations, or genotypes are such
1585
+	    //      that, in this case, u2 is the most mutated, etc.
1586
+	    //    - If you enable the detect_ti_duplicates, you would have seen duplicated ti
1587
+	    //      for nextMutant and u1
1588
+
1589
+	    //   Even simpler is if above, nextMutant will mutate to u1 (not u2) so u1 becomes sp.
1590
+	    DP2(currentTime); //this is set to minNextMutationTime above
1591
+	    DP2(minNextMutationTime);
1592
+	    DP2(tSample);
1593
+	    DP2(popParams[sp].timeLastUpdate);
1594
+	    DP2( (currentTime -  popParams[sp].timeLastUpdate) );
1595
+	    DP2( (currentTime <  popParams[sp].timeLastUpdate) );
1596
+	    DP2( (currentTime ==  popParams[sp].timeLastUpdate) );
1528 1597
 	    DP2(sp);
1598
+	    DP2(nextMutant);
1599
+	    DP2(u_1);
1600
+	    DP2(u_2);
1601
+	    DP2(tmpdouble1);
1602
+	    DP2(tmpdouble2);
1529 1603
 	    DP2(popParams[sp].timeLastUpdate);
1604
+	    DP2(popParams[nextMutant].timeLastUpdate);
1605
+	    DP2(popParams[u_1].timeLastUpdate);
1606
+	    DP2(popParams[u_2].timeLastUpdate);
1607
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) );
1608
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) );
1609
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) );
1530 1610
 	    print_spP(popParams[sp]);
1531
-	    throw std::out_of_range("currentTime - timeLastUpdate out of range. Serious bug!");
1611
+	    print_spP(popParams[nextMutant]);
1612
+	    throw std::out_of_range("currentTime - timeLastUpdate[sp] out of range.  ***###!!!Serious bug!!!###***");
1613
+	  }
1614
+	  if( (currentTime - popParams[nextMutant].timeLastUpdate) < 0.0) {
1615
+	    DP2(currentTime); //this is set to minNextMutationTime above
1616
+	    DP2(minNextMutationTime);
1617
+	    DP2(tSample);
1618
+	    DP2(popParams[nextMutant].timeLastUpdate);
1619
+	    DP2( (currentTime -  popParams[nextMutant].timeLastUpdate) );
1620
+	    DP2( (currentTime <  popParams[nextMutant].timeLastUpdate) );
1621
+	    DP2( (currentTime ==  popParams[nextMutant].timeLastUpdate) );
1622
+	    DP2(sp);
1623
+	    DP2(nextMutant);
1624
+	    DP2(u_1);
1625
+	    DP2(u_2);
1626
+	    DP2(tmpdouble1);
1627
+	    DP2(tmpdouble2);
1628
+	    DP2(popParams[sp].timeLastUpdate);
1629
+	    DP2(popParams[nextMutant].timeLastUpdate);
1630
+	    DP2(popParams[u_1].timeLastUpdate);
1631
+	    DP2(popParams[u_2].timeLastUpdate);
1632
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[u_2].timeLastUpdate) );
1633
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[nextMutant].timeLastUpdate) );
1634
+	    DP2( (popParams[u_1].timeLastUpdate - popParams[0].timeLastUpdate) );
1635
+	    print_spP(popParams[sp]);
1636
+	    print_spP(popParams[nextMutant]);
1637
+	    throw std::out_of_range("currentTime - timeLastUpdate[nextMutant] out of range. ***###!!!Serious bug!!!###***");
1532 1638
 	  }
1533 1639
 #endif
1534 1640
 	  // if(verbosity >= 2) {
... ...
@@ -1888,11 +1994,17 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1888 1994
   // 5.1 Initialize 
1889 1995
 
1890 1996
   int numRuns = 0;
1997
+  int numRecoverExcept = 0;
1891 1998
   bool forceRerun = false;
1892 1999
   
1893 2000
   double currentTime = 0;
1894 2001
   int iter = 0;
1895 2002
 
2003
+  int ti_dbl_min = 0;
2004
+  int ti_e3 = 0;
2005
+
2006
+  int accum_ti_dbl_min = 0;
2007
+  int accum_ti_e3 = 0;
1896 2008
   // bool AND_DrvProbExit = ( (cpDetect >= 0) &&
1897 2009
   // 			     (detectionDrivers < 1e9) &&
1898 2010
   // 			     (detectionSize < std::numeric_limits<double>::infinity()));
... ...
@@ -1975,13 +2087,21 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
1975 2087
 		  PDBaseline,
1976 2088
 		  checkSizePEvery,
1977 2089
 		  AND_DrvProbExit,
1978
-		  fixation_l);
2090
+		  fixation_l,
2091
+		  ti_dbl_min,
2092
+		  ti_e3);
1979 2093
       ++numRuns;
1980 2094
       forceRerun = false;
2095
+      accum_ti_dbl_min += ti_dbl_min;
2096
+      accum_ti_e3 += ti_e3;
1981 2097
     } catch (rerunExcept &e) {
1982 2098
       Rcpp::Rcout << "\n Recoverable exception " << e.what() 
1983 2099
 		  << ". Rerunning.";
1984 2100
       forceRerun = true;
2101
+      ++numRecoverExcept;
2102
+      ++numRuns; // exception should count here!
2103
+      accum_ti_dbl_min += ti_dbl_min;
2104
+      accum_ti_e3 += ti_e3;
1985 2105
     } catch (const std::exception &e) {
1986 2106
       Rcpp::Rcout << "\n Unrecoverable exception: " << e.what()
1987 2107
 		  << ". Aborting. \n";
... ...
@@ -2125,9 +2245,11 @@ Rcpp::List nr_BNB_Algo5(Rcpp::List rFE,
2125 2245
 										      Named("child") = phylog.child,
2126 2246
 										      Named("time") = phylog.time
2127 2247
 										      ),
2128
-					       Named("UnrecoverExcept") = false)
2248
+					       Named("UnrecoverExcept") = false,
2249
+					       Named("numRecoverExcept") = numRecoverExcept,
2250
+					       Named("accum_ti_dbl_min") = accum_ti_dbl_min,
2251
+					       Named("accum_ti_e3") = accum_ti_e3)
2129 2252
 		 );
2130
-
2131 2253
 }
2132 2254
 
2133 2255
 
... ...
@@ -12,6 +12,10 @@ SEXP OncoSimulR_evalRGenotypeAndMut(SEXP rGSEXP, SEXP rFESEXP, SEXP muEFSEXP, SE
12 12
 // SEXP OncoSimulR_readFitnessEffects(SEXP rFESEXP, SEXP echoSEXP);
13 13
 SEXP OncoSimulR_accessibleGenotypes(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP);
14 14
 
15
+SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP);
16
+SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP);
17
+SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP xSEXP, SEXP numMutSEXP, SEXP thSEXP);
18
+
15 19
 // The number is the number of arguments
16 20
 R_CallMethodDef callMethods[]  = {
17 21
   {"OncoSimulR_nr_BNB_Algo5", (DL_FUNC) &OncoSimulR_nr_BNB_Algo5, 37},
... ...
@@ -19,6 +23,9 @@ R_CallMethodDef callMethods[]  = {
19 23
   {"OncoSimulR_evalRGenotype", (DL_FUNC) &OncoSimulR_evalRGenotype, 5},
20 24
   {"OncoSimulR_evalRGenotypeAndMut", (DL_FUNC) &OncoSimulR_evalRGenotypeAndMut, 6},
21 25
   {"OncoSimulR_accessibleGenotypes", (DL_FUNC) &OncoSimulR_accessibleGenotypes, 4},
26
+  {"OncoSimulR_genot2AdjMat", (DL_FUNC) &OncoSimulR_genot2AdjMat, 3},
27
+  {"OncoSimulR_peaksLandscape", (DL_FUNC) &OncoSimulR_peaksLandscape, 4},
28
+  {"OncoSimulR_accessibleGenotypes_former", (DL_FUNC) &OncoSimulR_accessibleGenotypes_former, 4},  
22 29
   //  {"OncoSimulR_readFitnessEffects", (DL_FUNC) &OncoSimulR_readFitnessEffects, 2},
23 30
   {NULL, NULL, 0}
24 31
 };
... ...
@@ -141,6 +141,50 @@ BEGIN_RCPP
141 141
  END_RCPP
142 142
 }
143 143
 
144
+// genotype fitness matrix to adjacency matrix of genotypes
145
+Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut);
146
+RcppExport SEXP OncoSimulR_genot2AdjMat(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP) {
147
+BEGIN_RCPP
148
+    Rcpp::RObject __result;
149
+// Rcpp::RNGScope __rngScope;
150
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP);
151
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP);
152
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP);
153
+ __result = Rcpp::wrap(genot2AdjMat(y, f, numMut));
154
+ return __result;
155
+ END_RCPP
156
+}
157
+
158
+
159
+// evalRGenotypeAndMut
160
+Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th);
161
+RcppExport SEXP OncoSimulR_peaksLandscape(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) {
162
+BEGIN_RCPP
163
+    Rcpp::RObject __result;
164
+// Rcpp::RNGScope __rngScope;
165
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP);
166
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP);
167
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP);
168
+ Rcpp::traits::input_parameter< double >::type th(thSEXP);
169
+ __result = Rcpp::wrap(peaksLandscape(y, f, numMut, th));
170
+ return __result;
171
+ END_RCPP
172
+}
173
+
174
+// just for testing. Eventually remove
175
+Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y, Rcpp::NumericVector f, Rcpp::IntegerVector numMut, double th);
176
+RcppExport SEXP OncoSimulR_accessibleGenotypes_former(SEXP ySEXP, SEXP fSEXP, SEXP numMutSEXP, SEXP thSEXP) {
177
+BEGIN_RCPP
178
+    Rcpp::RObject __result;
179
+// Rcpp::RNGScope __rngScope;
180
+ Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type y(ySEXP);
181
+ Rcpp::traits::input_parameter< Rcpp::NumericVector >::type f(fSEXP);
182
+ Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type numMut(numMutSEXP);
183
+ Rcpp::traits::input_parameter< double >::type th(thSEXP);
184
+ __result = Rcpp::wrap(accessibleGenotypes_former(y, f, numMut, th));
185
+ return __result;
186
+ END_RCPP
187
+}
144 188
 
145 189
 
146 190
 // // readFitnessEffects
... ...
@@ -22,12 +22,12 @@ inline int HammingDistance(const Rcpp::IntegerVector& x, const Rcpp::IntegerVect
22 22
 }
23 23
 
24 24
 
25
-
25
+// eventually remove this. Left now for testing
26 26
 // [[Rcpp::export]]
27
-Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
28
-					Rcpp::NumericVector f,
29
-					Rcpp::IntegerVector numMut, //
30
-					double th) {
27
+Rcpp::IntegerVector accessibleGenotypes_former(Rcpp::IntegerMatrix y,
28
+					       Rcpp::NumericVector f,
29
+					       Rcpp::IntegerVector numMut, //
30
+					       double th) {
31 31
   // Return just the indices. Could preserve fitness, but would need
32 32
   // another matrix.
33 33
   int ng = y.nrow(); //it counts the wt
... ...
@@ -106,6 +106,189 @@ Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
106 106
 
107 107
 
108 108
 
109
+
110
+
111
+// [[Rcpp::export]]
112
+Rcpp::NumericMatrix genot2AdjMat(Rcpp::IntegerMatrix y,
113
+				 Rcpp::NumericVector f,
114
+				 Rcpp::IntegerVector numMut) {
115
+  // Return just the indices. Could preserve fitness, but would need
116
+  // another matrix.
117
+  int ng = y.nrow(); //it counts the wt
118
+  Rcpp::NumericMatrix adm(ng, ng);
119
+  
120
+  // fill with NAs: https://stackoverflow.com/a/23753626
121
+  // Filling with NAs and in general having NAs might lead to performance
122
+  // penalties. But I use the NAs in a lot of the code for accessible
123
+  // genotypes, etc.
124
+  std::fill( adm.begin(), adm.end(), Rcpp::NumericVector::get_na() ) ;
125
+  int numMutdiff = 0;
126
+  // I would have thought this would be faster. It ain't.
127
+  // The last genotype never accesses anything.
128
+  // for(int i = 0; i < (ng - 1); ++i) {
129
+  //   // Candidate genotypes to be accessed from i are always of larger
130
+  //   // mutation by 1. And candidates can thus not have smaller index
131
+  //   for(int j = (i + 1); j < ng; ++j) {
132
+  //     if( (numMut(j) == (numMut(i) + 1)) &&
133
+  // 	  ( (f(j) - f(i)) >= th) &&
134
+  // 	  (HammingDistance(y(i, _), y(j, _)) == 1) ) {
135
+  // 	adm(i, j) = 1;
136
+  //     } else if( (numMut(j) > (numMut(i) + 1)) ) {
137
+  // 	break;
138
+  //     }
139
+  //   }
140
+  // }
141
+
142
+  // The last genotype never accesses anything.
143
+  for(int i = 0; i < (ng - 1); ++i) {
144
+    // Candidate genotypes to be accessed from i are always of larger
145
+    // mutation by 1. And candidates can thus not have smaller index
146
+    for(int j = (i + 1); j < ng; ++j) {
147
+      numMutdiff = numMut(j) - numMut(i);
148
+      if( numMutdiff > 1) { // no more to search
149
+  	break; 
150
+      } else if(numMutdiff == 1) {
151
+  	if( HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1) {
152
+  	  adm(i, j) =  (f(j) - f(i));
153
+  	}
154
+      }
155
+    }
156
+  }
157
+  return adm;
158
+}
159
+
160
+
161
+Rcpp::IntegerMatrix integerAdjMat(Rcpp::IntegerMatrix y,
162
+				  Rcpp::NumericVector f,
163
+				  Rcpp::IntegerVector numMut, //
164
+				  double th) {
165
+  // Return a genotype adjacency matrix with a 1 if genotype j is
166
+  // accessible (fitness >, within th) from i.
167
+  int ng = y.nrow(); //it counts the wt
168
+  Rcpp::IntegerMatrix adm(ng, ng);
169
+  int numMutdiff = 0;
170
+  // I would have thought this would be faster. It ain't.
171
+  // The last genotype never accesses anything.
172
+  // for(int i = 0; i < (ng - 1); ++i) {
173
+  //   // Candidate genotypes to be accessed from i are always of larger
174
+  //   // mutation by 1. And candidates can thus not have smaller index
175
+  //   for(int j = (i + 1); j < ng; ++j) {
176
+  //     if( (numMut(j) == (numMut(i) + 1)) &&
177
+  // 	  ( (f(j) - f(i)) >= th) &&
178
+  // 	  (HammingDistance(y(i, _), y(j, _)) == 1) ) {
179
+  // 	adm(i, j) = 1;
180
+  //     } else if( (numMut(j) > (numMut(i) + 1)) ) {
181
+  // 	break;
182
+  //     }
183
+  //   }
184
+  // }
185
+
186
+  // The last genotype never accesses anything.
187
+  for(int i = 0; i < (ng - 1); ++i) {
188
+    // Candidate genotypes to be accessed from i are always of larger
189
+    // mutation by 1. And candidates can thus not have smaller index
190
+    for(int j = (i + 1); j < ng; ++j) {
191
+      numMutdiff = numMut(j) - numMut(i);
192
+      if( numMutdiff > 1) { // no more to search
193
+  	break; 
194
+      } else if(numMutdiff == 1) {
195
+  	// f(j) - f(i) is faster than HammingDistance
196
+  	// but might lead to more evals?
197
+  	// or fewer, depending on landscape
198
+  	if( ( (f(j) - f(i)) >= th) &&
199
+	    (HammingDistance(y(i, Rcpp::_), y(j, Rcpp::_)) == 1)
200
+  	    ) {
201
+  	  adm(i, j) = 1;
202
+	  // Rcpp::Rcout << "i = " << i << " j = " << j << " adm " << adm(i,j) << "\n"; 
203
+  	}
204
+      }
205
+    }
206
+  }
207
+  return adm;
208
+}
209
+
210
+
211
+// used in both peaks and accessible genotypes
212
+Rcpp::IntegerVector accessibleGenotypesPeaksLandscape(Rcpp::IntegerMatrix y,
213
+						      Rcpp::NumericVector f,
214
+						      Rcpp::IntegerVector numMut, //
215
+						      double th,
216
+						      bool returnpeaks) {
217
+  // Return the indices. This is like accessibleGenotypes, but we do an
218
+  // extra loop
219
+  int ng = y.nrow(); //it counts the wt
220
+  Rcpp::IntegerMatrix adm(ng, ng);
221
+
222
+  adm = integerAdjMat(y, f, numMut, th);
223
+  
224
+  int numMutdiff = 0;
225
+
226
+  // Slightly different logic from R: Do not resize object; set the row to
227
+  // 0.
228
+  int colsum = 0;
229
+  // int indicator = 0; // indicator != 0 means we set one row to 0
230
+  // so we need to iterate at least once more.
231
+  
232
+  // accessible is the genotype number, not the column!  WT is 1,
233
+  // etc. This makes it easy to keep track of which are accessible.
234
+  Rcpp::IntegerVector accessible = Rcpp::seq_len(ng);
235
+  // This is doable in one pass
236
+  // while (true) {
237
+  //   indicator = 0;
238
+    for(int k = 1; k < ng; ++k) {
239
+      if(accessible(k) > 0) {
240
+	colsum = std::accumulate(adm(Rcpp::_, k).begin(),
241
+				 adm(Rcpp::_, k).end(), 0);
242
+	if(colsum == 0) { // This genotype ain't reachable
243
+	  // Nothing can be reached from this genotype; fill with 0.
244
+	  adm(k, Rcpp::_) = Rcpp::IntegerVector(ng);
245
+	  accessible(k) = -9;
246
+	  // indicator = 1;
247
+	}
248
+      }
249
+    }
250
+  //   if(indicator == 0) break;
251
+  // }
252
+    if(!returnpeaks) {
253
+      return accessible;
254
+    } else  {
255
+      // BEWARE: this will not work if several connected genotypes
256
+      // have the same fitness and are maxima
257
+      int rowsum = 0;
258
+      Rcpp::IntegerVector peaks;
259
+      for(int k = 0; k < ng; ++k) {
260
+	if(accessible(k) > 0) {
261
+	  rowsum = std::accumulate(adm(k, Rcpp::_).begin(),
262
+				   adm(k, Rcpp::_).end(), 0);
263
+	  if(rowsum == 0) { // This genotype doesn't have children
264
+	    peaks.push_back(k + 1); // k is index. But in R, WT is in pos 1
265
+	  }
266
+	}
267
+      }
268
+      return peaks;
269
+    }
270
+}
271
+
272
+
273
+
274
+// [[Rcpp::export]]
275
+Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
276
+					Rcpp::NumericVector f,
277
+					Rcpp::IntegerVector numMut, //
278
+					double th) {
279
+  return accessibleGenotypesPeaksLandscape(y, f, numMut, th, false);
280
+}
281
+
282
+// [[Rcpp::export]]
283
+Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y,
284
+				   Rcpp::NumericVector f,
285
+				   Rcpp::IntegerVector numMut, //
286
+				   double th) {
287
+  return accessibleGenotypesPeaksLandscape(y, f, numMut, th, true);
288
+}
289
+
290
+
291
+
109 292
 // // This would make it easier returning the actual accessible genotypes easily
110 293
 // // preserving the fitness if needed
111 294
 // // Not being used now
... ...
@@ -169,4 +352,107 @@ Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
169 352
 
170 353
 
171 354
 
355
+// // [[Rcpp::export]]
356
+// Rcpp::IntegerVector accessibleGenotypes(Rcpp::IntegerMatrix y,
357
+// 					Rcpp::NumericVector f,
358
+// 					Rcpp::IntegerVector numMut, //
359
+// 					double th) {
360
+  
361
+//   // Return just the indices. Could preserve fitness, but would need
362
+//   // another matrix.
363
+//   int ng = y.nrow(); //it counts the wt
364
+//   Rcpp::IntegerMatrix adm(ng, ng);
365
+
366
+//   adm = integerAdjMat(y, f, numMut, th);
367
+  
368
+//   int numMutdiff = 0;
172 369
 
370
+//   // Slightly different logic from R: Do not resize object; set the row to
371
+//   // 0.
372
+//   int colsum = 0;
373
+//   // int indicator = 0; // indicator != 0 means we set one row to 0
374
+//   // so we need to iterate at least once more.
375
+  
376
+//   // accessible is the genotype number, not the column!  WT is 1,
377
+//   // etc. This makes it easy to keep track of which are accessible.
378
+//   Rcpp::IntegerVector accessible = Rcpp::seq_len(ng);
379
+
380
+//   // This is doable in one pass
381
+//   // while (true) {
382
+//   //   indicator = 0;
383
+//     for(int k = 1; k < ng; ++k) {
384
+//       if(accessible(k) > 0) {
385
+// 	colsum = std::accumulate(adm(Rcpp::_, k).begin(),
386
+// 				 adm(Rcpp::_, k).end(), 0);
387
+// 	if(colsum == 0) { // This genotype ain't reachable
388
+// 	  // Nothing can be reached from this genotype; fill with 0.
389
+// 	  adm(k, Rcpp::_) = Rcpp::IntegerVector(ng);
390
+// 	  accessible(k) = -9;
391
+// 	  // indicator = 1;
392
+// 	}
393
+//       }
394
+//     }
395
+//   //   if(indicator == 0) break;
396
+//   // }
397
+//   return accessible;
398
+// }
399
+
400
+
401
+
402
+
403
+// // [[Rcpp::export]]
404
+// Rcpp::IntegerVector peaksLandscape(Rcpp::IntegerMatrix y,
405
+// 				   Rcpp::NumericVector f,
406
+// 				   Rcpp::IntegerVector numMut, //
407
+// 				   double th) {
408
+//   // Return the indices. This is like accessibleGenotypes, but we do an
409
+//   // extra loop
410
+//   int ng = y.nrow(); //it counts the wt
411
+//   Rcpp::IntegerMatrix adm(ng, ng);
412
+
413
+//   adm = integerAdjMat(y, f, numMut, th);
414
+  
415
+//   int numMutdiff = 0;
416
+
417
+//   // Slightly different logic from R: Do not resize object; set the row to
418
+//   // 0.
419
+//   int colsum = 0;
420
+//   // int indicator = 0; // indicator != 0 means we set one row to 0
421
+//   // so we need to iterate at least once more.
422
+  
423
+//   // accessible is the genotype number, not the column!  WT is 1,
424
+//   // etc. This makes it easy to keep track of which are accessible.
425
+//   Rcpp::IntegerVector accessible = Rcpp::seq_len(ng);
426
+//   // This is doable in one pass
427
+//   // while (true) {
428
+//   //   indicator = 0;
429
+//     for(int k = 1; k < ng; ++k) {
430
+//       if(accessible(k) > 0) {
431
+// 	colsum = std::accumulate(adm(Rcpp::_, k).begin(),
432
+// 				 adm(Rcpp::_, k).end(), 0);
433
+// 	if(colsum == 0) { // This genotype ain't reachable
434
+// 	  // Nothing can be reached from this genotype; fill with 0.
435
+// 	  adm(k, Rcpp::_) = Rcpp::IntegerVector(ng);
436
+// 	  accessible(k) = -9;
437
+// 	  // indicator = 1;
438
+// 	}
439
+//       }
440
+//     }
441
+//   //   if(indicator == 0) break;
442
+//   // }
443
+
444
+//     int rowsum = 0;
445
+//     Rcpp::IntegerVector peaks;
446
+//     for(int k = 1; k < ng; ++k) {
447
+//       if(accessible(k) > 0) {
448
+// 	rowsum = std::accumulate(adm(k, Rcpp::_).begin(),
449
+// 				 adm(k, Rcpp::_).end(), 0);
450
+// 	if(rowsum == 0) { // This genotype doesn't have children
451
+// 	  peaks.push_back(k);
452
+// 	}
453
+//       }
454
+//     }
455
+
456
+   
457
+//   return peaks;
458
+// }
... ...
@@ -238,11 +238,28 @@ double ti_nextTime_tmax_2_st(const spParamsP& spP,
238 238
 	print_spP(spP);
239 239
 	throw std::range_error("ti: ti not finite");
240 240
       }
241
-      if(ti == 0.0) {
241
+      if((ti == 0.0) || (ti <= DBL_MIN)) {
242
+// #ifdef DEBUGW
243
+//  // this is too verbose for routine use
244
+// 	std::string ti_dbl_comp;
245
+// 	if( ti == DBL_MIN) {
246
+// 	  ti_dbl_comp = "ti_equal_DBL_MIN";
247
+// 	  DP2(ti);
248
+// 	} else if (ti == 0.0) {
249
+// 	  ti_dbl_comp = "ti_equal_0.0";
250
+// 	} else if ( (ti < DBL_MIN) && (ti > 0.0) ) {
251
+// 	  ti_dbl_comp = "ti_gt_0.0_lt_DBL_MIN";
252
+// 	  DP2(ti);
253
+// 	}  else {
254
+// 	  ti_dbl_comp = "IMPOSSIBLE!";
255
+// 	}
256
+// 	DP2(ti_dbl_comp);
257
+// #endif
242 258
 #ifdef DEBUGV	
243 259
 	// FIXME: pass verbosity as argument, and return the warning
244 260
 	// if set to more than 0?
245
-	Rcpp::Rcout << "\n\n\n WARNING: ti == 0. Setting it to DBL_MIN \n"; 
261
+	Rcpp::Rcout << "\n\n\n WARNING: ti == 0. Setting it to DBL_MIN \n";
262
+
246 263
 	double eq12 = pow( (spP.R - spP.W + 2.0 * spP.death) / 
247 264
 			   (spP.R + spP.W - 2.0 * spP.birth) , spP.popSize);
248 265
 
... ...
@@ -279,10 +296,18 @@ double ti_nextTime_tmax_2_st(const spParamsP& spP,
279 296
 	// Rcpp::Rcout << "ti set to DBL_MIN\n";
280 297
 	// Yes, abort because o.w. we can repeat it many, manu times
281 298
 	// throw std::range_error("ti set to DBL_MIN");
299
+	// Even just touching DBL_MIN is enough to want a rerunExcept;
300
+	// no need for it to be 0.0.
282 301
 	throw rerunExcept("ti set to DBL_MIN");
283 302
       }
284
-      if(ti < 0.001) ++ti_e3;
303
+      if(ti < (2*DBL_MIN)) ++ti_e3; // Counting how often this happens.
304
+      // Can be smaller than the ti_dbl_min count
285 305
       ti += currentTime;
306
+      // But we can still have issues here if the difference is too small
307
+      if( (ti <= currentTime) ) {
308
+	// Rcpp::Rcout << "\n (ti <= currentTime): expect problems\n";
309
+	throw rerunExcept("ti <= currentTime");
310
+      }
286 311
     } 
287 312
   }
288 313
   return ti;
... ...
@@ -1106,3 +1131,26 @@ void fill_SStats(Rcpp::NumericMatrix& perSampleStats,
1106 1131
     perSampleStats(i, 4) = static_cast<double>(sampleNDrLargestPop[i]);
1107 1132
   }
1108 1133
 }
1134
+
1135
+// Do not use this routinely. Too expensive and not needed.
1136
+void detect_ti_duplicates(const std::multimap<double, int>& m,
1137
+			  const double ti,
1138
+			  const int species) {
1139
+
1140
+  double maxti = m.rbegin()->first;
1141
+  if((ti < maxti) && (m.count(ti) > 1)) {
1142
+    Rcpp::Rcout << "\n *** duplicated ti for species " << species << "\n";
1143
+
1144
+    std::multimap<double, int>::const_iterator it = m.lower_bound(ti);
1145
+    std::multimap<double, int>::const_iterator it2 = m.upper_bound(ti);
1146
+
1147
+    while(it != it2) {
1148
+      Rcpp::Rcout << "\tgenotype: " << (it->second) << "; time: " <<
1149
+	(it->first) << "\n";
1150
+      ++it;
1151
+    }
1152
+    Rcpp::Rcout << "\n\n\n";
1153
+  }
1154
+}
1155
+
1156
+
... ...
@@ -183,6 +183,8 @@ void updateRatesBeeren(std::vector<spParamsP>& popParams,
183 183
 			      const int& mutationPropGrowth,
184 184
 		       const double& mu);
185 185
 
186
-
186
+void detect_ti_duplicates(const std::multimap<double, int>& m,
187
+			  const double ti,
188
+			  const int spcies);
187 189
 #endif
188 190
 
... ...
@@ -1,18 +1,189 @@
1
-test_that("We obtain same accessible genotypes with different functions", {
1
+test_that("We obtain same accessible genotypes with different functions plus genot_to_adjm_mat", {
2 2
     ## More likely to catch bugs if many iters, rather than large matrices
3 3
     niter <- 100
4 4
     for(i in 1:niter) {
5 5
         ## cat("\n i   iteration fast accessible comp ", i)
6 6
         for(ng in 2:6) {
7 7
             rtmp <- rfitness(ng)
8
-            a1 <- OncoSimulR:::faster_accessible_genotypes_R(rtmp, 0)
8
+
9 9
             ajm <- OncoSimulR:::genot_to_adj_mat(rtmp)
10
+            ajmr <- OncoSimulR:::genot_to_adj_mat_R(rtmp)
11
+            stopifnot(all.equal(ajm, ajmr))
12
+            
13
+            a1 <- OncoSimulR:::faster_accessible_genotypes_R(rtmp, 0)
10 14
             a2 <- colnames(OncoSimulR:::filter_inaccessible(ajm, 0))
11 15
             a3 <- OncoSimulR:::wrap_accessibleGenotypes(rtmp, 0)
16
+            a4 <- OncoSimulR:::wrap_accessibleGenotypes_former(rtmp, 0)
17
+
12 18
             stopifnot(identical(as.integer(a1), a3))
13 19
             stopifnot(identical(as.integer(a2), a3))
14
-            stopifnot(all(a2 ==  a3))
15
-            stopifnot(identical(a2, as.character(a3)))
20
+            stopifnot(all(a3 ==  a4))
21
+
16 22
         }
17 23
     } 
18 24
 })
25
+
26
+
27
+test_that("The indices of accessible genotypes are correct", {
28
+    ## Make sure we do not assume matrix is ordered
29
+
30
+    mf1 <- rbind(c(0, 0, 1),
31
+                 c(1, 0, 4),
32
+                 c(0, 1, .2),
33
+                 c(1, 1, 3))
34
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf1, 0),
35
+                 c(1, 2))
36
+    mf2 <- rbind(c(0, 0, 1),
37
+                 c(1, 0, 4),
38
+                 c(0, 1, .2),
39
+                 c(1, 1, 5))
40
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0),
41
+                 c(1, 2, 4))
42
+
43
+
44
+    mf1 <- rbind(c(0, 0, 1),
45
+                 c(0, 1, .2),
46
+                 c(1, 0, 4),
47
+                 c(1, 1, 3))
48
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf1, 0),
49
+                 c(1, 3))
50
+    
51
+    mf2 <- rbind(
52
+        c(0, 1, .2),
53
+        c(0, 0, 1),
54
+        c(1, 0, 4),
55
+        c(1, 1, 5)
56
+    )
57
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0),
58
+                 c(2, 3, 4))
59
+
60
+    mf2 <- rbind(
61
+        c(0, 1, .2),
62
+        c(0, 0, 1),
63
+        c(1, 1, 5),
64
+        c(1, 0, 4)
65
+    )
66
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0),
67
+                 c(2, 3, 4))
68
+
69
+    mf2 <- rbind(
70
+        c(0, 1, .2),
71
+        c(0, 0, 1),
72
+        c(1, 1, 5),
73
+        c(1, 0, .4)
74
+    )
75
+    expect_equal(OncoSimulR:::wrap_accessibleGenotypes(mf2, 0),
76
+                 c(2))
77
+    
78
+})
79
+
80
+
81
+test_that("The indices of adjancey matrices are correct", {
82
+    ## Make sure we do not assume matrix is ordered
83
+
84
+    trueam <- matrix(NA, nrow = 4, ncol = 4)
85
+    trueam[1, 2] <- 3
86
+    trueam[1, 3] <- -0.8
87
+    trueam[2, 4] <- -1
88
+    trueam[3, 4] <- 2.8    
89
+    colnames(trueam) <- rownames(trueam) <- 1:4
90
+
91
+    mf1 <- rbind(c(0, 0, 1),
92
+                 c(1, 0, 4),
93
+                 c(0, 1, .2),
94
+                 c(1, 1, 3))
95
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf1),
96
+                          OncoSimulR:::genot_to_adj_mat_R(mf1)))
97
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf1),
98
+                          trueam[1:4, 1:4]))
99
+
100
+    ## these two make use of the fact that we are forced to reorder some
101
+    mf2 <- mf1[c(2, 1, 4, 3), ]
102
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
103
+                          OncoSimulR:::genot_to_adj_mat_R(mf2)))
104
+    trueam2 <- trueam
105
+    colnames(trueam2) <- rownames(trueam2) <- colnames(trueam)[c(2, 1, 4, 3)]
106
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
107
+                          trueam2))
108
+
109
+    ## ## this is potentially confusing. See below for much cleaner
110
+    ## ## which compares matrices ordered by their names
111
+    ## mf2 <- mf1[c(3, 1, 4, 2), ]
112
+    ## expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
113
+    ##                       OncoSimulR:::genot_to_adj_mat_R(mf2)))
114
+    ## trueam2 <- trueam[c(1, 3, 2, 4), c(1, 3, 2, 4)]
115
+    ## colnames(trueam2) <- rownames(trueam2) <- c(2, 1, 4, 3)
116
+    ## expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
117
+    ##                       trueam2))
118
+
119
+
120
+    
121
+
122
+    ## the next are cleaner:  I compare the matrices ordered by the new names
123
+    ii <- c(2, 1, 4, 3)
124
+    mf2 <- mf1[ii, ]
125
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
126
+                          OncoSimulR:::genot_to_adj_mat_R(mf2)))
127
+    trueam2 <- trueam[ii, ii]
128
+    colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2)
129
+    ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2)
130
+    ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))]
131
+    expect_true(all.equal(ogammf2, trueam2))
132
+    expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4)
133
+    expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam)))
134
+    expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped
135
+
136
+    
137
+    
138
+
139
+    ii <- c(3, 1, 2, 4)
140
+    mf2 <- mf1[ii, ]
141
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
142
+                          OncoSimulR:::genot_to_adj_mat_R(mf2)))
143
+    trueam2 <- trueam[ii, ii]
144
+    colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2)
145
+    ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2)
146
+    ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))]
147
+    expect_true(all.equal(ogammf2, trueam2))
148
+    expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4)
149
+    expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam)))
150
+    expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped
151
+    
152
+  
153
+
154
+
155
+    ii <- c(1, 3, 2, 4)
156
+    mf2 <- mf1[ii, ]
157
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
158
+                          OncoSimulR:::genot_to_adj_mat_R(mf2)))
159
+    trueam2 <- trueam[ii, ii]
160
+    colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2)
161
+    ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2)
162
+    ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))]
163
+    expect_true(all.equal(ogammf2, trueam2))
164
+    expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4)
165
+    ## note this
166
+    expect_true(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam)))
167
+    expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped
168
+    
169
+
170
+
171
+    ii <- c(4, 3, 1, 2)
172
+    mf2 <- mf1[ii, ]
173
+    expect_true(all.equal(OncoSimulR:::genot_to_adj_mat(mf2),
174
+                          OncoSimulR:::genot_to_adj_mat_R(mf2)))
175
+    trueam2 <- trueam[ii, ii]
176
+    colnames(trueam2) <- rownames(trueam2) <- 1:nrow(trueam2)
177
+    ogammf2 <- OncoSimulR:::genot_to_adj_mat(mf2)
178
+    ogammf2 <- ogammf2[order(colnames(ogammf2)), order(colnames(ogammf2))]
179
+    expect_true(all.equal(ogammf2, trueam2))
180
+    expect_true(sum(ogammf2 == trueam2, na.rm = TRUE) == 4)
181
+    expect_false(all(colnames(OncoSimulR:::genot_to_adj_mat(mf2)) == colnames(trueam)))
182
+    expect_false(sum(ogammf2 == trueam, na.rm = TRUE) == 4) ## because 2 and 3 are flipped
183
+   
184
+})
185
+
186
+
187
+
188
+
189
+## For peaks, see code in test.plotFitnessLandscape.R
... ...
@@ -272,3 +272,451 @@ test_that("internal peak valley functions", {
272 272
 
273 273
     
274 274
 })
275
+
276
+
277
+## Beware that using peak_valley on only_accessible makes a difference
278
+test_that("internal peak valley functions w/wo inaccessible filter", {
279
+    ## A is accessible, a peak
280
+    ## AB is a peak if only forward. But there is no
281
+    ## reciprocal sign epistasis here!
282
+
283
+    ## We want peaks in general, not just
284
+    ## under assumption of "no back mutation"?
285
+
286
+    ## Well, no, that is not obvious with cancer progression models if we
287
+    ## do not allow back mutations.
288
+    
289
+    ## We get a different result when we restrict to accessible
290
+    ## because all < 0 in adjacency are turned to NAs.
291
+
292
+    ## Thinking in terms of adjacency matrix, AB is not a peak if it has a
293
+    ## positive and a negative entry in its column, because the negative
294
+    ## entry means there is an ancestor with larger fitness.
295
+    ## But see below for why plainly using the adjacency matrix can give bad results.
296
+    
297
+    ## The next matrices are all fitness matrix. Last column is fitness.
298
+    mf1 <- rbind(
299
+        c(0, 0, 1),
300
+        c(1, 0, 4),
301
+        c(0, 1, 2),
302
+        c(1, 1, 3)
303
+    )
304
+
305
+    plotFitnessLandscape(mf1)
306
+    
307
+    expect_equal(
308
+        OncoSimulR:::peak_valley(
309
+                                OncoSimulR:::genot_to_adj_mat(mf1))$peak, 2)
310
+    
311
+    expect_equal(
312
+            OncoSimulR:::peak_valley(
313
+                             OncoSimulR:::filter_inaccessible(
314
+                                              OncoSimulR:::genot_to_adj_mat(mf1), 0))$peak,
315
+        c(2, 4))
316
+
317
+    expect_equal(
318
+        OncoSimulR:::fast_peaks(mf1, 0),
319
+        c(2, 4))
320
+
321
+
322
+    ## reorder the rows of the matrix. Affects fast_peaks, as it should
323
+    mf1 <- rbind(
324
+        c(1, 0, 4),
325
+        c(0, 0, 1),
326
+        c(1, 1, 3),
327
+        c(0, 1, 2)
328
+    )
329
+    
330
+    plotFitnessLandscape(mf1)
331
+    ## this is not affected, since it uses, by construction, the ordered matrix
332
+    expect_equal(
333
+        OncoSimulR:::peak_valley(
334
+                                OncoSimulR:::genot_to_adj_mat(mf1))$peak, 2)
335
+    ## ditto
336
+    expect_equal(
337
+            OncoSimulR:::peak_valley(
338
+                             OncoSimulR:::filter_inaccessible(
339
+                                              OncoSimulR:::genot_to_adj_mat(mf1), 0))$peak,
340
+        c(2, 4))
341
+    expect_equal(
342
+        OncoSimulR:::fast_peaks(mf1, 0),
343
+        c(1, 3))
344
+
345
+
346
+    
347
+    ## filtering by inaccessible also likely gets rid of all
348
+    ## peaks in the non-accessible part of the fitness landscape.
349
+    ## But of course those cannot be peaks, since they are inaccessible
350
+
351
+    mf3 <- rbind(
352
+        c(0, 0, 0, 1),
353
+        c(1, 0, 0, 2),
354
+        c(0, 1, 0, 0.1),
355
+        c(0, 0, 1, 0.3),
356
+        c(1, 1, 0, 3),
357
+        c(1, 0, 1, 4),
358
+        c(0, 1, 1, 0.4),
359
+        c(1, 1, 1, 0.2)
360
+    )
361
+
362
+    ## plotFitnessLandscape(mf3)
363
+    ## BC is detected as a peak, the seventh entry
364
+    expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf3))$peak,
365
+                 c(5, 6, 7))
366
+
367
+    ## recall this gives the columns of the reduced matrix, which are the former
368
+    ## 5 and 6
369
+    expect_equal(OncoSimulR:::peak_valley(
370
+                                  OncoSimulR:::filter_inaccessible(
371
+                                                   OncoSimulR:::genot_to_adj_mat(mf3), 0))$peak,
372
+                 c(3, 4))
373
+
374
+    ## correct indices from original matrix
375
+    expect_equal(
376
+        OncoSimulR:::fast_peaks(mf3, 0),
377
+        c(5, 6))
378
+
379
+    ## works under reorder?
380
+    expect_equal(
381
+        OncoSimulR:::fast_peaks(mf3[c(5, 1, 2, 3, 7, 4, 6), ], 0),
382
+        c(1, 7))
383
+
384
+    
385
+  
386
+
387
+    mf4 <- rbind(
388
+        c(0, 0, 0, 1),
389
+        c(1, 0, 0, 2),
390
+        c(0, 1, 0, 0.1),
391
+        c(0, 0, 1, 0.3),
392
+        c(1, 1, 0, 3),
393
+        c(1, 0, 1, 4),
394
+        c(0, 1, 1, 0.4),
395
+        c(1, 1, 1, 1.2)
396
+    )
397
+
398
+    ## plotFitnessLandscape(mf4)
399
+    
400
+    ## ABC is not detected as a peak, because it is not.
401
+    ## Issue is not its accessibility, but that AC and AB have larger fitness
402
+    ## see example with mf5
403
+    expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf4))$peak,
404
+                 c(5, 6))
405
+
406
+    ## recall this gives the columns of the reduced matrix, which are the former
407
+    ## 5 and 6
408
+    expect_equal(OncoSimulR:::peak_valley(
409
+                                  OncoSimulR:::filter_inaccessible(
410
+                                                   OncoSimulR:::genot_to_adj_mat(mf4), 0))$peak,
411
+                 c(3, 4))
412
+
413
+    expect_equal(
414
+        OncoSimulR:::fast_peaks(mf4, 0),
415
+        c(5, 6))
416
+
417
+    
418
+    ## Now ABC is accessible
419
+      mf5 <- rbind(
420
+        c(0, 0, 0, 1),
421
+        c(1, 0, 0, 2),
422
+        c(0, 1, 0, 0.1),
423
+        c(0, 0, 1, 0.3),
424
+        c(1, 1, 0, 3),
425
+        c(1, 0, 1, 4),
426
+        c(0, 1, 1, 0.4),
427
+        c(1, 1, 1, 3.5)
428
+    )
429
+ 
430
+      ## plotFitnessLandscape(mf5)
431
+      ## plotFitnessLandscape(mf5, only_accessible = TRUE)
432
+      
433
+      ## But only AC is the peak, correctly
434
+      expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf5))$peak,
435
+                   c(6))
436
+
437
+      ## Now, both AC and ABC are peaks
438
+      ## columns 4 and 5 correspond to genotypes 6 and 8
439
+      expect_equal(OncoSimulR:::peak_valley(
440
+                                    OncoSimulR:::filter_inaccessible(
441
+                                                     OncoSimulR:::genot_to_adj_mat(mf5), 0))$peak,
442
+                   c(4, 5))
443
+
444
+      expect_equal(
445
+          OncoSimulR:::fast_peaks(mf5, 0),
446
+          c(6, 8))
447
+
448
+    ## AC and ABC same max fitness
449
+      mf6 <- rbind(
450
+        c(0, 0, 0, 1),
451
+        c(1, 0, 0, 2),
452
+        c(0, 1, 0, 0.1),
453
+        c(0, 0, 1, 0.3),
454
+        c(1, 1, 0, 3),
455
+        c(1, 0, 1, 4),
456
+        c(0, 1, 1, 0.4),
457
+        c(1, 1, 1, 4)
458
+    )
459
+ 
460
+      ## plotFitnessLandscape(mf6)
461
+      ## Both AC and ABC are peaks. Correctly
462
+      expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf6))$peak,
463
+                   c(6, 8))
464
+
465
+      ## fast peaks should refuse to run
466
+      expect_error(
467
+          OncoSimulR:::fast_peaks(mf6, 0),
468
+          "There could be several connected maxima",
469
+          fixed = TRUE)
470
+
471
+
472
+
473
+      ## A and AC
474
+      mf7 <- rbind(
475
+        c(0, 0, 0, 1),
476
+        c(1, 0, 0, 4),
477
+        c(0, 1, 0, 0.1),
478
+        c(0, 0, 1, 0.3),
479
+        c(1, 1, 0, 3),
480
+        c(1, 0, 1, 4),
481
+        c(0, 1, 1, 0.4),
482
+        c(1, 1, 1, 3.4)
483
+    )
484
+      ## plotFitnessLandscape(mf7)
485
+      ## Both A and AC are peaks. Correctly
486
+      expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf7))$peak,
487
+                   c(2, 6))
488
+
489
+      ## fast peaks should refuse to run
490
+      expect_error(
491
+          OncoSimulR:::fast_peaks(mf7, 0),
492
+          "There could be several connected maxima",
493
+          fixed = TRUE)
494
+      
495
+
496
+      
497
+      ## A, AC, ABC same max fitness
498
+      mf8 <- rbind(
499
+        c(0, 0, 0, 1),
500
+        c(1, 0, 0, 4),
501
+        c(0, 1, 0, 0.1),
502
+        c(0, 0, 1, 0.3),
503
+        c(1, 1, 0, 3),
504
+        c(1, 0, 1, 4),
505
+        c(0, 1, 1, 0.4),
506
+        c(1, 1, 1, 4)
507
+    )
508
+      ## plotFitnessLandscape(mf8)
509
+      ## Both A and AC are peaks. Correctly
510
+      expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf8))$peak,
511
+                   c(2, 6, 8))
512
+
513
+      
514
+      ## fast peaks should refuse to run
515
+      expect_error(
516
+          OncoSimulR:::fast_peaks(mf8, 0),
517
+          "There could be several connected maxima",
518
+          fixed = TRUE)
519
+      
520
+      ## A, AC, AB same max fitness
521
+      mf9 <- rbind(
522
+        c(0, 0, 0, 1),
523
+        c(1, 0, 0, 4),
524
+        c(0, 1, 0, 0.1),
525
+        c(0, 0, 1, 0.3),
526
+        c(1, 1, 0, 4),
527
+        c(1, 0, 1, 4),
528
+        c(0, 1, 1, 0.4),
529
+        c(1, 1, 1, 2.4)
530
+    )
531
+      ## plotFitnessLandscape(mf9, use_ggrepel = TRUE)
532
+      ## Both A and AC are peaks. Correctly
533
+      expect_equal(OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(mf9))$peak,
534
+                   c(2, 5, 6))
535
+
536
+      
537
+      
538
+      ## This illustrates that the "filter_inaccessible" is not just "do
539
+      ## not take into account inaccessible genotypes" but, properly, do
540
+      ## not take into account, do not allow any travelling through
541
+      ## inaccessible paths.
542
+
543
+      ## Thus, filter_inaccessible is the way to go if we want to exclude
544
+      ## backmutation. In no bakcmutation, it is not possible to go from
545
+      ## m+1 to m mutations.
546
+      
547
+      ## It also shows that naively looking at the adjacency matrix can
548
+      ## fail. Two reasons:
549
+
550
+      ## a) the last row will never have any entries and yet it need not
551
+      ## be a peak.
552
+
553
+      ## b) simply looking at adjacency matrix is not the correct
554
+      ## procedure when some fitnesses can be equal. That is what the
555
+      ## function peak_valley works hard to get right :-)
556
+      
557
+      
558
+    cp2 <- structure(c(0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
559
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
560
+1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
561
+0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
562
+1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 
563
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 
564
+1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 
565
+1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 
566
+0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
567
+0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 
568
+0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
569
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 
570
+1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
571
+0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 
572
+1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 
573
+1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 
574
+1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 
575
+1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 
576
+0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 
577
+0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 
578
+0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 
579
+0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 
580
+1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 
581
+1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 
582
+1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
583
+0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 
584
+0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 
585
+1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 
586
+1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 
587
+1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 
588
+1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 
589
+0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 
590
+1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 
591
+1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 
592
+0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 
593
+0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 
594
+1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 
595
+0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 
596
+0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 
597
+1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 
598
+0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 
599
+1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 
600
+1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0.852873407703003, 
601
+1.51520969989942, 1.09934414414554, 1.08362391548151, 1.06352377058758, 
602
+0.875558455823467, 1.69351291065104, 2.92492684398312, 1.02057836095586, 
603
+0.994559647972076, 1.01807462848707, 0.782398502758159, 0.755318352952028, 
604
+1.81553780643735, 1.7427209966816, 1.00116069406198, 0.790243245268257, 
605
+3.38168029927183, 1.18573953796889, 1.24679706264807, 0.944183929293486, 
606
+1.04153712305771, 1.20232261789798, 1.0345783487807, 1.04678440594199, 
607
+0.993244793867836, 0.97914067773803, 0.79321495112376, 0.868101325153957, 
608
+0.866235177920767, 4.1155779007473, 3.163209721772, 4.34977195536485, 
609
+1.09932137400121, 1.08612305022998, 0.916953742980573, 0.850115441923501, 
610
+1.06277833622263, 0.865087563773651, 0.928169473201598, 0.904902930158639, 
611
+0.897493717866434, 0.71149600120298, 1.06538015204221, 1.07859259299858, 
612
+0.858803230350538, 2.25551012930227, 1.09241633274047, 0.870425423271033, 
613
+2.17687545546796, 0.84459090869647, 4.58149975106353, 3.85245245151455, 
614
+1.28342034151899, 1.08529050597462, 1.02256835452167, 1.04982916832593, 
615
+1.0457848642841, 0.90107628754529, 1.08969768294891, 1.05766476796899, 
616
+0.902394628842996, 0.888348932462492, 1.01037474862489, 0.954093541062801, 
617
+0.807820459139572, 2.74832174163312, 1.01318977068049, 0.854004033396404, 
618
+0.842034005421367, 0.800544915243185, 5.31108977064723, 5.31423066433053, 
619
+1.16539625099584, 0.983449927610599, 0.996320237843515, 0.9794158873742, 
620
+1.02038748073625, 0.808875731463122, 0.964868528161141, 0.966566509486774, 
621
+0.860373057266184, 0.81168825662344, 1.19978481918247, 0.98157798351476, 
622
+0.999463234369357, 0.98711106267367, 0.961995700808845, 4.79391503400402, 
623
+0.998909701750288, 0.996465768481649, 0.785688019266101, 0.778917380394268, 
624
+1.17230915723272, 1.19911647477422, 0.961939861987872, 0.981542927739855, 
625
+0.999822362533057, 1.15236749698624, 0.919688401637553, 0.876733220798505, 
626
+0.92069327916386, 0.958801043337062, 0.670589798279379, 0.84152795885645, 
627
+5.93895353544503, 0.723329951949942, 0.733188455582477, 1.07557023464861, 
628
+1.09180382079188, 0.923957719945906, 0.93313538716072, 0.896562810368268, 
629
+1.09769821865825, 1.10615389985864, 0.94426955155254, 0.898545873061366, 
630
+0.876269943340891, 1.11556411094416, 0.94930544641744, 1.02495854041569, 
631
+0.794907983845338, 0.847332095413669, 0.776896984008625, 0.928896557877041, 
632
+0.945135371172636, 0.892100531723894), .Dim = c(128L, 8L), .Dimnames = list(
633
+    NULL, c("CDKN2A", "KRAS", "MLL3", "PXDN", "SMAD4", "TGFBR2", 
634
+    "TP53", "")))
635
+
636
+    expect_equal(length(
637
+        OncoSimulR:::peak_valley(OncoSimulR:::genot_to_adj_mat(cp2))$peak), 4)
638
+
639
+    expect_equal(length(
640
+        OncoSimulR:::peak_valley(
641
+                         OncoSimulR:::filter_inaccessible(
642
+                                          OncoSimulR:::genot_to_adj_mat(cp2), 0))$peak), 6)
643
+
644
+
645
+    expect_equal(
646
+          OncoSimulR:::fast_peaks(cp2, 0),
647
+        c(51, 55, 68, 74, 90, 107))
648
+
649
+    ## Nope, since filter inaccessible removes genotypes
650
+    expect_false(all(
651
+        OncoSimulR:::peak_valley(
652
+                         OncoSimulR:::filter_inaccessible(
653
+                                          OncoSimulR:::genot_to_adj_mat(cp2), 0))$peak ==
654
+        OncoSimulR:::fast_peaks(cp2, 0)))
655
+    
656
+
657
+    ## compare with the probl
658
+    gnn <- OncoSimulR:::to_Fitness_Matrix(cp2, 1000)$afe[, "Genotype"]
659
+
660
+    plotFitnessLandscape(cp2, use_ggrepel = TRUE, only_accessible = TRUE)
661
+    
662
+    expect_equal(
663
+        gnn[OncoSimulR:::fast_peaks(cp2, 0)],
664
+        c("KRAS, PXDN, TP53",
665
+          "MLL3, PXDN, SMAD4",
666
+          "CDKN2A, KRAS, MLL3, TP53",
667
+          "CDKN2A, KRAS, TGFBR2, TP53",
668
+          "KRAS, MLL3, TGFBR2, TP53",
669
+          "CDKN2A, KRAS, PXDN, SMAD4, TP53"))
670
+
671
+    ## can also check by removing the inacessible genotypes so the indices are the same
672
+    agg <- OncoSimulR:::wrap_accessibleGenotypes(cp2, 0)
673
+    cp3 <- cp2[agg, ]
674
+
675
+    ## This is NOT correct: we have removed the inacessible,
676
+    ## but we allow backmutation
677
+    ## OncoSimulR:::peak_valley(
678
+    ##                  OncoSimulR:::genot_to_adj_mat(cp3))$peak
679
+    
680
+    expect_equal(OncoSimulR:::peak_valley(
681
+                     OncoSimulR:::filter_inaccessible(
682
+                                      OncoSimulR:::genot_to_adj_mat(cp3), 0))$peak,
683
+                 OncoSimulR:::fast_peaks(cp3, 0))
684
+
685
+    gnn3 <- gnn[agg]
686
+
687
+    expect_equal(
688
+        gnn3[OncoSimulR:::fast_peaks(cp3, 0)],
689
+        c("KRAS, PXDN, TP53",
690
+          "MLL3, PXDN, SMAD4",
691
+          "CDKN2A, KRAS, MLL3, TP53",
692
+          "CDKN2A, KRAS, TGFBR2, TP53",
693
+          "KRAS, MLL3, TGFBR2, TP53",
694
+          "CDKN2A, KRAS, PXDN, SMAD4, TP53"))
695
+
696
+    
697
+})
698
+
699
+
700
+
701
+test_that("Some random checks of the fast peaks function", {
702
+    niter <- 50
703
+    for(i in 1:niter) {
704
+        for(ng in 2:6) {
705
+            rtmp <- rfitness(ng)
706
+            p1 <- OncoSimulR:::peak_valley(
707
+                                   OncoSimulR:::filter_inaccessible(
708
+                                                    OncoSimulR:::genot_to_adj_mat(rtmp), 0))$peak
709
+            expect_equal(length(p1),
710
+                         length(OncoSimulR:::fast_peaks(rtmp, 0)))
711
+            agg <- OncoSimulR:::wrap_accessibleGenotypes(rtmp, 0)
712
+            if(length(agg) >= 2) {
713
+                ## cat(".")
714
+                p2 <- OncoSimulR:::peak_valley(
715
+                                       OncoSimulR:::filter_inaccessible(
716
+                                                        OncoSimulR:::genot_to_adj_mat(rtmp[agg, , drop = FALSE]), 0))$peak
717
+                expect_equal(p2, OncoSimulR:::fast_peaks(rtmp[agg, , drop = FALSE], 0))
718
+                expect_equal(length(p2), length(p1))
719
+            }
720
+        }
721
+    }
722
+})
... ...
@@ -480,6 +480,22 @@ test_that("exercising sampling code, propError", {
480 480
                    fixed = TRUE)
481 481
 })
482 482
 
483
+
484
+
485
+
486
+test_that("sampledGenotypes deals with NAs", {
487
+    sp <- structure(c(1, NA, 1, 0, 0, NA, 0, 0, 0, NA, 0, 0, 0, NA, 0, 
488
+                      0, 0, NA, 1, 0, 0, NA, 0, 0, 0, NA, 0, 0, 1, NA, 1, 0),
489
+                    .Dim = c(4L, 
490
+                             8L),
491
+                    .Dimnames = list(NULL, c("ZZZ", "APC", "EVC2",
492
+                                             "VVV", 
493
+                                             "KRAS", "PIK3CA", "TCF7L2", "TP53")))
494
+    sg <- sampledGenotypes(sp)
495
+    expect_true(is.na(sg[4, 1]))
496
+    expect_true(sg[1, 1] == "WT")
497
+})
498
+
483 499
 cat(paste("\n Ending samplePop tests", date(), "\n"))
484 500
 
485 501
 
... ...
@@ -3616,7 +3616,10 @@ plotFitnessLandscape(evalAllGenotypes(fem6))
3616 3616
 This way of giving a fitness specification to OncoSimulR might be
3617 3617
 ideal if you directly generate random mappings of genotypes to
3618 3618
 fitness (or random fitness landscapes), as we will do in section
3619
-\@ref(gener-fit-land).
3619
+\@ref(gener-fit-land). (Note for curious readers: actually, what we
3620
+do now is convert the table of fitness of genotypes to a fitness
3621
+specification with all possible epistatic interactions; look
3622
+at the `fem6` object).
3620 3623
 
3621 3624
 
3622 3625
 We will see an example of this way of passing fitness again in
... ...
@@ -3635,6 +3638,15 @@ We will see an example of this way of passing fitness again in
3635 3638
 <!-- % when $k$ is 10. -->
3636 3639
 
3637 3640
 
3641
+<!-- Not for self: actually, what I really do is mapping -->
3642
+<!-- the genotypes to epistasis effects. -->
3643
+
3644
+<!-- So we either use a succint description, or we map all -->
3645
+<!-- to epistasis. -->
3646
+
3647
+
3648
+
3649
+
3638 3650
 ### How to specify fitness effects with the lego system {#howfit}
3639 3651
 
3640 3652
 <!-- % A guiding design principle of OncoSimulR is to try to make the -->
... ...
@@ -8304,19 +8316,51 @@ proposed and named before; please let me know, best if with a reference.
8304 8316
 
8305 8317
 
8306 8318
 Should we remove direct connections if there are indirect? Or,
8307
-should we set `removeDirectIndirect = TRUE`? Except for
8308
-@Farahani2013 and @ramazzotti_capri_2015, none of the DAGs I've seen
8309
-in the context of CBNs, Oncogenetic trees, etc, include both direct
8310
-and indirect connections between nodes. If these exist, reasoning
8311
-about the model can be harder. For example, with CBN (AND or CMPN or
8312
-monotone relationships) adding a direct connection makes no
8313
-difference iff we assume that the relationships encoded in the DAG
8314
-are fully respected (e.g., all $s_h = -\infty$). But it can make a
8315
-difference if we allow for deviations from the monotonicity,
8316
-specially if we only check for the satisfaction of the presence of
8317
-the immediate ancestors. And things get even trickier if we combine
8318
-XOR with AND. The code for computing fitness, however, should deal
8319
-with all of this just fine.
8319
+should we set `removeDirectIndirect = TRUE`? Setting
8320
+`removeDirectIndirect = TRUE` is basically asking for
8321
+the
8322
+[transitive reduction](https://en.wikipedia.org/wiki/Transitive_reduction) of
8323
+the generated DAG. Except for @Farahani2013 and
8324
+@ramazzotti_capri_2015, none of the DAGs I've seen in the context of
8325
+CBNs, Oncogenetic trees, etc, include both direct and indirect
8326
+connections between nodes. If these exist, reasoning about the model
8327
+can be harder. For example, with CBN (AND or CMPN or monotone
8328
+relationships) adding a direct connection makes no difference iff we
8329
+assume that the relationships encoded in the DAG are fully respected
8330
+(e.g., all $s_h = -\infty$). But it can make a difference if we
8331
+allow for deviations from the monotonicity, specially if we only
8332
+check for the satisfaction of the presence of the immediate
8333
+ancestors. And things get even trickier if we combine XOR with
8334
+AND. <!-- The code for computing fitness, however, should deal with all -->
8335
+<!-- of this just fine. --> Thus, I strongly suggest you leave the
8336
+default `removeDirectIndirect = TRUE`. If you change it, you should
8337
+double check that the fitnesses of the possible genotypes are what
8338
+you expect. In fact, I would suggest that, to be sure you get what
8339
+you think you should get, you convert the fitness from the DAG to a
8340
+fitness table, and pass that to the simulations, and this requires
8341
+using non-exposed user functions; to give you an idea, this could
8342
+work (but you've been warned: this is dangerous!)
8343
+
8344
+
8345
+```{r simographindirect, eval=FALSE,echo=TRUE}
8346
+g2 <- simOGraph(4, out = "rT", removeDirectIndirect = FALSE)
8347
+
8348
+fe_from_d <- allFitnessEffects(g2)
8349
+fitness_d <- evalAllGenotypes(fe_from_d)
8350
+
8351
+fe_from_t <- allFitnessEffects(genotFitness =
8352
+                          OncoSimulR:::allGenotypes_to_matrix(fitness_d))
8353
+						  
8354
+## Compare
8355
+fitness_d
8356
+(fitness_t <- evalAllGenotypes(fe_from_t))
8357
+
8358
+identical(fitness_d, fitness_t)
8359
+						  
8360
+
8361
+## ... but to be safe use fe_from_t as the fitnessEffects object for simulations
8362
+
8363
+```
8320 8364
 
8321 8365
 
8322 8366
 
... ...
@@ -1,15 +1,15 @@
1 1
 \usepackage[%
2
-		shash={e91dd33},
3
-		lhash={e91dd330908da89bee6171f0b856e34171e689d2},
4
-		authname={ramon diaz-uriarte (at Phelsuma)},
5
-		authemail={rdiaz02@gmail.com},
6
-		authsdate={2017-02-18},
7
-		authidate={2017-02-18 21:20:59 +0100},
8
-		authudate={1487449259},
9
-		commname={ramon diaz-uriarte (at Phelsuma)},
10
-		commemail={rdiaz02@gmail.com},
11
-		commsdate={2017-02-18},
12
-		commidate={2017-02-18 21:20:59 +0100},
13
-		commudate={1487449259},
14
-		refnames={ (HEAD -> master, tag: 2.5.12, origin/master, origin/HEAD, oncosimul-ana/ana-intervention, ana-intervention)}
2
+		shash={1d0c18d},
3
+		lhash={1d0c18d601793401e4273f6bfb8fe6e9af3ab8dc},
4
+		authname={Ramon Diaz-Uriarte},
5
+		authemail={rdiaz02@users.noreply.github.com},
6
+		authsdate={2017-09-27},
7
+		authidate={2017-09-27 11:34:21 +0200},
8
+		authudate={1506504861},
9
+		commname={Ramon Diaz-Uriarte},
10
+		commemail={rdiaz02@users.noreply.github.com},
11
+		commsdate={2017-09-27},
12
+		commidate={2017-09-27 11:34:21 +0200},
13
+		commudate={1506504861},
14
+		refnames={ (HEAD, origin/master, origin/HEAD)}
15 15
 	]{gitsetinfo}
16 16
\ No newline at end of file