Browse code

v. 2.3.3.\n mutator, fitness landscapes, rfitness, and many other changes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@118909 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 23/06/2016 16:43:51
Showing 70 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progresion with Epistasis 
4
-Version: 2.3.2
5
-Date: 2016-04-14
4
+Version: 2.3.3
5
+Date: 2016-06-23
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -13,21 +13,23 @@ Description: Functions for forward population genetic simulation in
13 13
     asexual populations, with special focus on cancer progression.
14 14
     Fitness can be an arbitrary function of genetic interactions between
15 15
     multiple genes or modules of genes, including epistasis, order
16
-    restrictions in mutation accumulation, and order effects.  Simulations
16
+    restrictions in mutation accumulation, and order effects.  Mutation
17
+    rates can differ between genes, and we can include mutator/antimutator
18
+    genes (to model mutator phenotypes). Simulations
17 19
     use continuous-time models and can include driver and passenger genes
18 20
     and modules.  Also included are functions for simulating random DAGs
19 21
     of the type found in Oncogenetic Tress, Conjunctive Bayesian Networks,
20 22
     and other tumor progression models, and for plotting and sampling from
21 23
     single or multiple realizations of the simulations, including
22
-    single-cell sampling, as well as functions for plotting the true
23
-    phylogenetic relationships of the clones.
24
+    single-cell sampling, as well as functions for plotting the parent-child
25
+    relationships of the clones.
24 26
 biocViews: BiologicalQuestion, SomaticMutation
25 27
 License: GPL (>= 3)
26 28
 URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/
27 29
 BugReports: https://github.com/rdiaz02/OncoSimul/issues
28
-Depends: R (>= 3.1.0)
29
-Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices
30
-Suggests: BiocStyle, knitr, Oncotree, testthat
30
+Depends: R (>= 3.3.0)
31
+Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel
32
+Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0)
31 33
 LinkingTo: Rcpp
32 34
 VignetteBuilder: knitr
33 35
 
... ...
@@ -2,8 +2,13 @@ useDynLib(OncoSimulR, .registration=TRUE)
2 2
 
3 3
 export("oncoSimulPop", "oncoSimulIndiv", "samplePop",
4 4
        "plotPoset", "oncoSimulSample", "allFitnessEffects",
5
-       "evalGenotype", "evalAllGenotypes", "simOGraph"
6
-     , "plotClonePhylog", "OncoSimulWide2Long"
5
+       "evalGenotype", "evalAllGenotypes", "simOGraph",
6
+       "plotClonePhylog", "OncoSimulWide2Long",
7
+       "allMutatorEffects", "evalAllGenotypesMut",
8
+       "evalGenotypeMut", "evalGenotypeFitAndMut",
9
+       "evalAllGenotypesFitAndMut",
10
+       "rfitness",
11
+       "plotFitnessLandscape"
7 12
        )
8 13
 
9 14
 S3method(plot, oncosimul)
... ...
@@ -13,8 +18,14 @@ S3method(plot, oncosimulpop)
13 18
 S3method(summary, oncosimulpop)
14 19
 S3method(print, oncosimulpop)
15 20
 S3method(plot, fitnessEffects)
21
+S3method(plot, genotype_fitness_matrix)
22
+S3method(plot, evalAllGenotypes)
23
+S3method(plot, evalAllGenotypesMut)
16 24
 
25
+import(ggplot2)
26
+importFrom("ggrepel", geom_text_repel, geom_label_repel)
17 27
 
28
+importFrom("stats", "rnorm")
18 29
 importFrom("data.table", rbindlist, .rbind.data.table) 
19 30
 importFrom(Rcpp, evalCpp)
20 31
 importFrom("igraph", igraph.to.graphNEL, graph.data.frame, V, E,
... ...
@@ -31,3 +42,11 @@ importFrom("stats", "na.omit", "runif", "smooth.spline")
31 42
 importFrom("utils", "type.convert")
32 43
 importFrom("RColorBrewer", "brewer.pal")
33 44
 importFrom("grDevices", "colorRampPalette", "hsv", "rainbow")
45
+importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate",
46
+           "filter")
47
+importFrom("smatr", "ma") ## for major axis regression in some tests
48
+importFrom("car", "linearHypothesis")
49
+
50
+
51
+
52
+
... ...
@@ -23,6 +23,7 @@ oncoSimulSample <- function(Nindiv,
23 23
                             model = "Exp",
24 24
                             numPassengers = 0,
25 25
                             mu = 1e-6,
26
+                            muEF = NULL,
26 27
                             detectionSize = round(runif(Nindiv, 1e5, 1e8)),
27 28
                             detectionDrivers = {
28 29
                                 if(inherits(fp, "fitnessEffects")) {
... ...
@@ -62,6 +63,7 @@ oncoSimulSample <- function(Nindiv,
62 63
                             thresholdWhole = 0.5,
63 64
                             initMutant = NULL,
64 65
                             verbosity  = 1,
66
+                            showProgress = TRUE,
65 67
                             seed = "auto"){
66 68
     ## No longer using mclapply, because of the way we use the limit on
67 69
     ## the number of tries.
... ...
@@ -164,6 +166,7 @@ oncoSimulSample <- function(Nindiv,
164 166
                                model = model,
165 167
                                numPassengers = numPassengers,
166 168
                                mu = mu,
169
+                               muEF = muEF,
167 170
                                detectionSize = params[indiv, "detectionSize"],
168 171
                                detectionDrivers = params[indiv, "detectionDrivers"],
169 172
                                sampleEvery = sampleEvery,
... ...
@@ -195,6 +198,14 @@ oncoSimulSample <- function(Nindiv,
195 198
         numToRun <- (numToRun - 1)
196 199
         attemptsUsed <- attemptsUsed + tmp$other$attemptsUsed
197 200
         attemptsLeft <- (max.num.tries.total - attemptsUsed)
201
+        if(showProgress) {
202
+            cat("......  Done individual ", indiv,
203
+                ". Used ", attemptsUsed, "attempts.",
204
+                ". Running for ", as.double(difftime(Sys.time(),
205
+                                                    startTime, units = "secs")),
206
+                " secs.\n"
207
+                )
208
+        }
198 209
         indiv <- indiv + 1
199 210
         
200 211
         ## We need to check in exactly this order. Attempts left only
... ...
@@ -243,17 +254,30 @@ oncoSimulSample <- function(Nindiv,
243 254
 }
244 255
 
245 256
 
246
-samplePop <- function(x, timeSample = "last", typeSample = "whole",
257
+samplePop <- function(x, timeSample = "last",
258
+                      typeSample = "whole",
247 259
                       thresholdWhole = 0.5,
248
-                      geneNames = NULL) {
260
+                      geneNames = NULL,
261
+                      popSizeSample = NULL) {
262
+    ## timeSample <- match.arg(timeSample)
249 263
     gN <- geneNames
264
+    
265
+    if(!is.null(popSizeSample) && (length(popSizeSample) > 1) &&
266
+       (length(popSizeSample) != length(x))) {
267
+        message("length popSizeSample != number of subjects")
268
+        popSizeSample <- rep(popSizeSample, length.out = length(x))
269
+        }
270
+    ## A hack to prevent Map from crashing with a NULL
271
+    if(is.null(popSizeSample)) popSizeSample <- -99
250 272
     if(inherits(x, "oncosimulpop")) {
251 273
         z <- do.call(rbind,
252
-                     lapply(x,
253
-                            get.mut.vector,
254
-                            timeSample = timeSample,
255
-                            typeSample = typeSample,
256
-                            thresholdWhole = thresholdWhole))
274
+                     Map(get.mut.vector,
275
+                         x = x,
276
+                         timeSample = timeSample,
277
+                         typeSample = typeSample,
278
+                         thresholdWhole = thresholdWhole,
279
+                         popSizeSample = popSizeSample
280
+                         ))
257 281
         ## We need to check if the object is coming from v.2., to avoid
258 282
         ## having to force passing a vector of names
259 283
         if(is.null(gN) && (!is.null(x[[1]]$geneNames)))
... ...
@@ -262,7 +286,8 @@ samplePop <- function(x, timeSample = "last", typeSample = "whole",
262 286
         z <- get.mut.vector(x,
263 287
                             timeSample = timeSample,
264 288
                             typeSample = typeSample,
265
-                            thresholdWhole = thresholdWhole)
289
+                            thresholdWhole = thresholdWhole,
290
+                            popSizeSample = popSizeSample)
266 291
         dim(z) <- c(1, length(z))
267 292
         if(is.null(gN) && (!is.null(x$geneNames)))
268 293
             gN <- geneNames
... ...
@@ -285,6 +310,7 @@ oncoSimulPop <- function(Nindiv,
285 310
                          model = "Exp",
286 311
                          numPassengers = 0,
287 312
                          mu = 1e-6,
313
+                         muEF = NULL,
288 314
                          detectionSize = 1e8,
289 315
                          detectionDrivers = 4,
290 316
                          sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
... ...
@@ -329,6 +355,7 @@ oncoSimulPop <- function(Nindiv,
329 355
                         model = model,
330 356
                         numPassengers = numPassengers,
331 357
                         mu = mu,
358
+                        muEF = muEF,
332 359
                         detectionSize = detectionSize,
333 360
                         detectionDrivers = detectionDrivers,
334 361
                         sampleEvery = sampleEvery,
... ...
@@ -365,6 +392,7 @@ oncoSimulIndiv <- function(fp,
365 392
                            model = "Exp",
366 393
                            numPassengers = 0,
367 394
                            mu = 1e-6,
395
+                           muEF = NULL,
368 396
                            detectionSize = 1e8,
369 397
                            detectionDrivers = 4,
370 398
                            sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
... ...
@@ -434,8 +462,11 @@ oncoSimulIndiv <- function(fp,
434 462
             stop("Unknown model")
435 463
     }
436 464
 
465
+    if( (length(mu) > 1) && !inherits(fp, "fitnessEffects"))
466
+        stop("Per-gene mutation rates cannot be used with the old poset format")
467
+
437 468
     if(any(mu < 0)) {
438
-        stop("mutation rate (mu) is negative")
469
+        stop("(at least one) mutation rate (mu) is negative")
439 470
     }
440 471
     ## We do not test for equality to 0. That might be a weird but
441 472
     ## legitimate case?
... ...
@@ -463,6 +494,8 @@ oncoSimulIndiv <- function(fp,
463 494
             stop(m)
464 495
            
465 496
         }
497
+        if(!is.null(muEF))
498
+            stop("Mutator effects cannot be especified with the old poset format")
466 499
         if(length(initMutant) > 1)
467 500
             stop("With the old poset, initMutant can only take a single value.")
468 501
         ## Seeding C++ is now much better in new version
... ...
@@ -507,7 +540,7 @@ oncoSimulIndiv <- function(fp,
507 540
                                      max.wall.time = max.wall.time,
508 541
                                      max.num.tries = max.num.tries,
509 542
                                      keepEvery = keepEvery,  
510
-                                     alpha = 0.0015,  
543
+                                     ## alpha = 0.0015,  
511 544
                                      sh = sh,
512 545
                                      K = K, 
513 546
                                      minDetectDrvCloneSz = minDetectDrvCloneSz,
... ...
@@ -558,7 +591,7 @@ oncoSimulIndiv <- function(fp,
558 591
                                         max.wall.time = max.wall.time,
559 592
                                         max.num.tries = max.num.tries,
560 593
                                         keepEvery = keepEvery,  
561
-                                        alpha = 0.0015,  
594
+                                        ## alpha = 0.0015,  
562 595
                                         K = K, 
563 596
                                         minDetectDrvCloneSz = minDetectDrvCloneSz,
564 597
                                         extraTime = extraTime,
... ...
@@ -566,7 +599,8 @@ oncoSimulIndiv <- function(fp,
566 599
                                         onlyCancer = onlyCancer,
567 600
                                         errorHitWallTime = errorHitWallTime,
568 601
                                         errorHitMaxTries = errorHitMaxTries,
569
-                                        keepPhylog = keepPhylog),
602
+                                        keepPhylog = keepPhylog,
603
+                                        MMUEF = muEF),
570 604
                   silent = !verbosity)
571 605
         objClass <- c("oncosimul", "oncosimul2")
572 606
     }
... ...
@@ -648,7 +682,7 @@ plot.oncosimulpop <- function(x, ask = TRUE,
648 682
                               show = "drivers", 
649 683
                               type = ifelse(show == "genotypes",
650 684
                                             "stacked", "line"),
651
-                              col = c(8, "orange", 6:1),
685
+                              col = "auto",
652 686
                               log = ifelse(type == "line", "y", ""),
653 687
                               ltyClone = 2:6,
654 688
                               lwdClone = 0.9,
... ...
@@ -1389,17 +1423,32 @@ plotClonePhylog <- function(x, N = 1, t = "last",
1389 1423
 
1390 1424
 ############# The rest are internal functions
1391 1425
 
1426
+closest_time <- function(x, size) {
1427
+    ## Find the first time when a given size is reached
1428
+    sizes <- rowSums(x$pops.by.time[, -1, drop = FALSE])
1429
+    candidates <- which(sizes >= size)
1430
+    if(length(candidates) == 0) {
1431
+        warning(paste("Pop size never >= requested size.",
1432
+                      "Thus, there is nothing to sample. You will get NAs"))
1433
+        return(-99)
1434
+    } else {
1435
+        return(candidates[1])
1436
+    }
1437
+}
1438
+
1392 1439
 
1393
-get.the.time.for.sample <- function(tmp, timeSample) {
1394
-    if(timeSample == "last") {
1440
+get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
1441
+    if( !is.null(popSizeSample) && (popSizeSample >= 0) )  {
1442
+        the.time <- closest_time(tmp, popSizeSample)
1443
+    } else if(timeSample == "last") {
1395 1444
         if(tmp$TotalPopSize == 0) {
1396 1445
             warning(paste("Final population size is 0.",
1397 1446
                           "Thus, there is nothing to sample with",
1398 1447
                           "sampling last. You will get NAs"))
1399 1448
             the.time <- -99
1400 1449
         } else {
1401
-              the.time <- nrow(tmp$pops.by.time)
1402
-          }
1450
+            the.time <- nrow(tmp$pops.by.time)
1451
+        }
1403 1452
     } else if (timeSample %in% c("uniform", "unif")) {
1404 1453
           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
1405 1454
           
... ...
@@ -1416,7 +1465,7 @@ get.the.time.for.sample <- function(tmp, timeSample) {
1416 1465
             } else {
1417 1466
                   the.time <- sample(candidate.time, 1)
1418 1467
               }
1419
-      } else {
1468
+    } else {
1420 1469
             stop("Unknown timeSample option")
1421 1470
         }
1422 1471
     return(the.time)
... ...
@@ -1424,8 +1473,8 @@ get.the.time.for.sample <- function(tmp, timeSample) {
1424 1473
 
1425 1474
 
1426 1475
 get.mut.vector <- function(x, timeSample, typeSample,
1427
-                           thresholdWhole) {
1428
-    the.time <- get.the.time.for.sample(x, timeSample)
1476
+                           thresholdWhole, popSizeSample) {
1477
+    the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1429 1478
     if(the.time < 0) { 
1430 1479
         return(rep(NA, nrow(x$Genotypes)))
1431 1480
     } 
... ...
@@ -1441,10 +1490,10 @@ get.mut.vector <- function(x, timeSample, typeSample,
1441 1490
                                        x$Genotypes)/popSize) >= thresholdWhole) )
1442 1491
     } else if (typeSample %in%  c("singleCell", "single")) {
1443 1492
 
1444
-          return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1445
-      } else {
1446
-            stop("Unknown typeSample option")
1447
-        }
1493
+        return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1494
+    } else {
1495
+        stop("Unknown typeSample option")
1496
+    }
1448 1497
 }
1449 1498
 
1450 1499
 
... ...
@@ -1507,9 +1556,15 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1507 1556
         restrict.table <- poset.to.restrictTable(poset)
1508 1557
     numDrivers <- nrow(restrict.table)
1509 1558
     numGenes <- (numDrivers + numPassengers)
1510
-    
1559
+    if(numGenes < 2)
1560
+        stop("There must be at least two genes (loci) in the fitness effects.",
1561
+             "If you only care about a degenerate case with just one,",
1562
+             "you can enter a second gene",
1563
+             "with fitness effect of zero.")
1511 1564
     if(numGenes > 64)
1512
-        stop("Largest possible number of genes is 64")
1565
+        stop("Largest possible number of genes (loci) is 64 for version 1.",
1566
+             "You are strongly encouraged to use the new specification",
1567
+             "as in version 2.")
1513 1568
 
1514 1569
     ## These can never be set by the user
1515 1570
     ## if(initSize_species < 10) {
... ...
@@ -1567,8 +1622,8 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1567 1622
         BNB_Algo5(restrictTable = rtC,
1568 1623
         numDrivers = numDrivers,
1569 1624
         numGenes = numGenes,
1570
-        typeCBN_ = typeCBN,
1571
-        birthRate = birth,
1625
+        typeCBN_= typeCBN,
1626
+        ## birthRate = birth, 
1572 1627
         s = s, 
1573 1628
         death = death,
1574 1629
         mu = mu,
... ...
@@ -1582,16 +1637,15 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1582 1637
         verbosity = verbosity,
1583 1638
         speciesFS = speciesFS,
1584 1639
         ratioForce = ratioForce,
1585
-        typeFitness_= typeFitness,
1640
+        typeFitness_ = typeFitness,
1586 1641
         maxram = max.memory,
1587 1642
         mutationPropGrowth = as.integer(mutationPropGrowth),
1588 1643
         initMutant = initMutant,
1589 1644
         maxWallTime = max.wall.time,
1590 1645
         keepEvery = keepEvery,
1591
-        alpha = alpha,
1646
+        # alpha = alpha,
1592 1647
         sh = sh,
1593 1648
         K = K,
1594
-        # end = TimeEvery# , 
1595 1649
         detectionDrivers = detectionDrivers,
1596 1650
         onlyCancer = onlyCancer,
1597 1651
         errorHitWallTime = errorHitWallTime,
... ...
@@ -1600,7 +1654,7 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1600 1654
         minDetectDrvCloneSz = minDetectDrvCloneSz,
1601 1655
         extraTime = extraTime
1602 1656
     ),
1603
-             NumDrivers = numDrivers
1657
+    NumDrivers = numDrivers
1604 1658
              ))
1605 1659
 
1606 1660
 }
... ...
@@ -1,19 +1,24 @@
1 1
 # This file was generated by Rcpp::compileAttributes
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-nr_BNB_Algo5 <- function(rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, alpha, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog) {
5
-    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, alpha, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog)
4
+nr_BNB_Algo5 <- function(rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_) {
5
+    .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_)
6 6
 }
7 7
 
8
-BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, birthRate, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, alpha, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) {
9
-    .Call('OncoSimulR_BNB_Algo5', PACKAGE = 'OncoSimulR', restrictTable, numDrivers, numGenes, typeCBN_, birthRate, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, alpha, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime)
8
+BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) {
9
+    .Call('OncoSimulR_BNB_Algo5', PACKAGE = 'OncoSimulR', restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime)
10 10
 }
11 11
 
12
-## readFitnessEffects <- function(rFE, echo) {
13
-##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
14
-## }
12
+readFitnessEffects <- function(rFE, echo) {
13
+     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
14
+}
15
+
16
+evalRGenotype <- function(rG, rFE, verbose, prodNeg, calledBy_) {
17
+    .Call('OncoSimulR_evalRGenotype', PACKAGE = 'OncoSimulR', rG, rFE, verbose, prodNeg, calledBy_)
18
+}
15 19
 
16
-evalRGenotype <- function(rG, rFE, verbose, prodNeg) {
17
-    .Call('OncoSimulR_evalRGenotype', PACKAGE = 'OncoSimulR', rG, rFE, verbose, prodNeg)
20
+evalRGenotypeAndMut <- function(rG, rFE, muEF, full2mutator_, verbose, prodNeg) {
21
+    .Call('OncoSimulR_evalRGenotypeAndMut', PACKAGE = 'OncoSimulR', rG, rFE, muEF, full2mutator_, verbose, prodNeg)
18 22
 }
19 23
 
24
+
20 25
new file mode 100644
... ...
@@ -0,0 +1,398 @@
1
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+## plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
17
+##     plot.genotype_fitness_matrix <- plotFitnessLandscape 
18
+
19
+## FIXME: show only accessible paths? 
20
+
21
+plotFitnessLandscape <- function(x, show_labels = TRUE,
22
+                                 col = c("green4", "red", "yellow"),
23
+                                 lty = c(1, 2, 3), use_ggrepel = FALSE,
24
+                                 log = FALSE, max_num_genotypes = 2000,
25
+                                 ...) {
26
+
27
+    ## FIXME future:
28
+    
29
+   
30
+
31
+    ## - Allow passing order effects. Change "allGenotypes_to_matrix"
32
+    ##   below? Probably not, as we cannot put order effects as a
33
+    ##   matrix. Do something else, like allow only order effects if from
34
+    ##   and allFitnessEffects object.
35
+
36
+    ## - Allow selection only some genotypes or alleles
37
+
38
+    ## Allow selecting only paths that involve a particular genotype in
39
+    ## adjacency matrix of genotype i go at row i and column i.  Follow back
40
+    ## all entries in row i, and their previous, and forward all column i.
41
+
42
+
43
+    ## gfm: genotype fitness matrix
44
+    ## afe: all fitness effects
45
+
46
+    ## We seem to go back and forth, but we need to ensure the afe and the
47
+    ## gfm are coherent. Since there are many ways to make them fail
48
+    ## (e.g., a user passing the wrong order, or incomplete matrices, etc)
49
+    ## we do it the following way.
50
+
51
+    ## Yes, we set WT to 1 as we call from_genotype_fitness on
52
+    ## matrices and genotype_fitness_matrix objects.
53
+    ## We do that because we call evalAllGenotypes to
54
+    ## get the string representation, etc. And this is for use
55
+    ## with OncoSimul.
56
+
57
+    ## FIXME: passing fitness as a 2 column data frame
58
+    
59
+    if( (inherits(x, "genotype_fitness_matrix")) ||
60
+        ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) {
61
+        ## Why this? We go back and forth twice. We need both things. We
62
+        ## could construct the afe below by appropriately pasting the
63
+        ## columns names
64
+        afe <- evalAllGenotypes(allFitnessEffects(
65
+            epistasis = from_genotype_fitness(x)),
66
+            order = FALSE, addwt = TRUE, max = max_num_genotypes)
67
+        ## Might not be needed with the proper gfm object (so gmf <- x)
68
+        ## but is needed if arbitrary matrices.
69
+        gfm <- allGenotypes_to_matrix(afe) 
70
+    } else if(inherits(x, "fitnessEffects")) {
71
+        if(!is.null(x$orderEffects) )
72
+            stop("We cannot yet deal with order effects")
73
+        afe <- evalAllGenotypes(x,
74
+                                order = FALSE,
75
+                                addwt = TRUE, max = max_num_genotypes)
76
+        gfm <- allGenotypes_to_matrix(afe)
77
+    } else if( (inherits(x, "evalAllGenotypes")) ||
78
+               (inherits(x, "evalAllGenotypesMut"))) {
79
+        if(any(grepl(">", x[, 1], fixed = TRUE)))
80
+            stop("We cannot deal with order effects yet.")
81
+        x <- x[, c(1, 2)]
82
+        if(x[1, "Genotype"] != "WT") {
83
+            ## Yes, because we expect this present below
84
+            x <- rbind(data.frame(Genotype = "WT",
85
+                                  Fitness = 1,
86
+                                  stringsAsFactors = FALSE),
87
+                       x)
88
+        }
89
+        afe <- x
90
+        ## in case we pass an evalAllgenotypesfitandmut
91
+        gfm <- allGenotypes_to_matrix(afe)
92
+    } else if(is.data.frame(x)) {
93
+        ## Assume a two-column data frame of genotypes as character
94
+        ## vectors and fitness
95
+        if(colnames(x)[2] != "Fitness")
96
+            stop("We cannot guess what you are passing here") 
97
+        afe <- evalAllGenotypes(allFitnessEffects(genotFitness = x),
98
+                                order = FALSE, addwt = TRUE,
99
+                                max = max_num_genotypes)
100
+        gfm <- allGenotypes_to_matrix(afe)
101
+    } else {
102
+       stop("We cannot guess what you are passing here") 
103
+    }
104
+   
105
+
106
+    
107
+    ## if(inherits(x, "genotype_fitness_matrix")) {
108
+    ##     ## Why this? We go back and forth twice. We need both things. We
109
+    ##     ## could construct the afe below by appropriately pasting the
110
+    ##     ## columns names
111
+    ##     afe <- evalAllGenotypes(allFitnessEffects(
112
+    ##         epistasis = from_genotype_fitness(x)),
113
+    ##         order = FALSE, addwt = TRUE, max = 2000)
114
+    ##     gfm <- allGenotypes_to_matrix(afe) ## should not be needed? gfm <- x
115
+    ## } else if(inherits(x, "fitnessEffects")) {
116
+    ##     if(!is.null(x$orderEffects) )
117
+    ##         stop("We cannot yet deal with order effects")
118
+    ##     afe <- evalAllGenotypes(x,
119
+    ##                             order = FALSE,
120
+    ##                             addwt = TRUE, max = 2000)
121
+    ##     gfm <- allGenotypes_to_matrix(x)
122
+    ## } else {
123
+    ##     lc <- ncol(x)
124
+    ##     ## detect an appropriately formatted matrix
125
+    ##     if( (is.data.frame(x) || is.matrix(x)) ) {
126
+    ##         if(ncol(x) > 2) {
127
+    ##             ## We cannot be sure all genotypes are present
128
+    ##             ## but that is not our business?
129
+    ##             ## colnames(x)[lc] <- "Fitness"
130
+    ##             ## So this must be a matrix
131
+    ##             afe <- evalAllGenotypes(allFitnessEffects(
132
+    ##                 epistasis = from_genotype_fitness(x)),
133
+    ##                 order = FALSE, addwt = TRUE, max = 2000)
134
+    ##             gfm <- allGenotypes_to_matrix(afe)
135
+    ##         } else {
136
+    ##             ## This must be a data frame
137
+    ##             if(!is.data.frame(x))
138
+    ##                 stop("How are you passing a matrix here?")
139
+    ##             if(colnames(x)[lc] != "Fitness")
140
+    ##                 stop("We cannot guess what you are passing here")
141
+    ##             ## We are passed an allFitnessEffects output
142
+    ##             ## Will be simpler later as we will know immediately
143
+    ##             if(x[1, "Genotype"] != "WT") {
144
+    ##                 ## Yes, because we expect this present below
145
+    ##                 x <- rbind(data.frame(Genotype = "WT",
146
+    ##                                       Fitness = 1,
147
+    ##                                       stringsAsFactors = FALSE),
148
+    ##                            x)
149
+    ##             }
150
+    ##             x <- afe
151
+    ##             gfm <- allGenotypes_to_matrix(afe)
152
+    ##         }
153
+    ##     } else {
154
+    ##         stop("This is not allowed input")
155
+    ##     }
156
+    ## }
157
+
158
+    mutated <- rowSums(gfm[, -ncol(gfm)])
159
+    gaj <- genot_to_adj_mat(gfm)
160
+    vv <- which(!is.na(gaj), arr.ind = TRUE)
161
+    
162
+    ## plot(x = mutated, y = e1$Fitness, ylab = "Fitness",
163
+    ##      xlab = "", type = "n", axes = FALSE)
164
+    ## box()
165
+    ## axis(2)
166
+    ## text(x = mutated, y = x$Fitness, labels = x$Genotype)
167
+
168
+    ## The R CMD CHEKC notes about no visible binding for global variable
169
+
170
+    x_from <- y_from <- x_to <- y_to <- Change <- muts <-
171
+        label <- fitness <- Type <- NULL
172
+                
173
+                
174
+    dd <- data.frame(muts = mutated,
175
+                     fitness = afe$Fitness,
176
+                     label = afe$Genotype)
177
+    cl <- gaj[vv]
178
+    sg <- data.frame(x_from = mutated[vv[, 1]],
179
+                     y_from = afe$Fitness[vv[, 1]],
180
+                     x_to = mutated[vv[, 2]],
181
+                     y_to = afe$Fitness[vv[, 2]],
182
+                     Change = factor(ifelse(cl == 0, "Neutral",
183
+                                     ifelse(cl > 0, "Gain", "Loss"))))
184
+    ## From http://stackoverflow.com/a/17257422
185
+    number_ticks <- function(n) {function(limits) pretty(limits, n)}
186
+    
187
+    p <- ggplot() +
188
+        xlab("") + ylab("Fitness") + 
189
+        theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
190
+              panel.grid.minor.x = element_blank()) +
191
+        geom_segment(data = sg,
192
+                       aes(x = x_from, y = y_from,
193
+                           xend = x_to, yend = y_to,
194
+                           colour = Change,
195
+                           lty = Change)) + scale_color_manual("Change",
196
+                                                               values = col)
197
+    if(log) {
198
+        p <- p + scale_y_log10(breaks = number_ticks(5))
199
+    } else {
200
+        p <- p + scale_y_continuous(breaks = number_ticks(5))
201
+    }
202
+    peaks_valleys <- peak_valley(gaj)
203
+    maxF <- peaks_valleys$peak
204
+    minF <- peaks_valleys$valley
205
+
206
+    ddMM <- dd
207
+    ddMM$Type <- NA
208
+    ddMM$Type[minF] <- "Sink"
209
+    ddMM$Type[maxF] <- "Peak"
210
+    ddMM <- ddMM[ddMM$Type %in% c("Sink", "Peak"), ]
211
+    
212
+    if(show_labels && use_ggrepel) {
213
+        p <- p + geom_text_repel(data = dd[-c(minF, maxF), ],
214
+                                 aes(x = muts, y = fitness, label = label)) +
215
+            geom_label_repel(data = ddMM,
216
+                             aes(x = muts, y = fitness, label = label, fill = Type),
217
+                             color = "black",
218
+                             fontface = "bold")
219
+            ## geom_label_repel(data = dd[maxF, ], aes(x = muts, y = fitness, label = label),
220
+            ##                  color = "black",
221
+            ##                  fontface = "bold", fill = col[1]) +
222
+            ## geom_label_repel(data = dd[minF, ], aes(x = muts, y = fitness, label = label),
223
+            ##                  color = "black",
224
+            ##                  fontface = "bold", fill = col[2]) 
225
+        
226
+        }
227
+    else {
228
+        if(!show_labels) {
229
+            ## Use same code, but make label empty
230
+            ddMM$label <- ""
231
+        }
232
+            
233
+        p <- p + geom_text(data = dd[-c(minF, maxF), ],
234
+                           aes(x = muts, y = fitness, label = label),
235
+                           vjust = -0.2, hjust = "inward") +
236
+            geom_label(data = ddMM,
237
+                       aes(x = muts, y = fitness, label = label, fill = Type),
238
+                       vjust = -0.2, hjust = "inward", color = "black",
239
+                       fontface = "bold")
240
+            ## geom_label(data = dd[maxF, ], aes(x = muts, y = fitness, label = label),
241
+            ##            vjust = -0.2, hjust = "inward", color = "black",
242
+            ##            fontface = "bold", fill = col[1]) +
243
+            ## geom_label(data = dd[minF, ], aes(x = muts, y = fitness, label = label),
244
+            ##            vjust = -0.2, hjust = "inward", color = "black",
245
+            ##            fontface = "bold", fill = col[2])
246
+    }
247
+    p <- p + scale_fill_manual("Local\nmax/min",  values = col)
248
+    p
249
+}
250
+
251
+
252
+plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
253
+    plot.genotype_fitness_matrix <-
254
+        plot_fitness_landscape <-
255
+            plotFitnessLandscape
256
+
257
+
258
+######################################################################
259
+######################################################################
260
+######################################################################
261
+####
262
+####            Internal functions
263
+####
264
+######################################################################
265
+######################################################################
266
+######################################################################
267
+
268
+
269
+
270
+generate_matrix_genotypes <- function(g) {
271
+    ## FIXME future: do this for order too? Only if rfitness for order.
272
+    ## Given a number of genes, generate all possible genotypes.
273
+    
274
+    if(g > 20) stop("This would generate more than one million genotypes")
275
+    ## g: number of genes
276
+    f1 <- function(n) {
277
+        lapply(seq.int(n), function(x) combinations(n = n, r = x))
278
+    }
279
+    genotNums <- f1(g)
280
+    list.of.vectors <- function(y) {
281
+        ## there's got to be a simpler way
282
+        lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}),
283
+                      recursive = FALSE),
284
+               function(m) m[[1]])
285
+    }
286
+    genotNums <- list.of.vectors(genotNums)
287
+    
288
+    v <- rep(0, g)
289
+    mat <- matrix(unlist(lapply(genotNums,
290
+                                function(x) {
291
+                                    v[x] <- 1
292
+                                    return(v)
293
+                                }
294
+                                )), ncol = g, byrow = TRUE)
295
+    mat <- rbind(rep(0, g), mat)
296
+    colnames(mat) <- LETTERS[1:g]
297
+    return(mat)
298
+}
299
+
300
+
301
+
302
+genot_to_adj_mat <- function(x) {
303
+    ## FIXME this can take about 23% of the time of the ggplot call.
304
+    ## But them, we are quickly constructing a 2000*2000 matrix
305
+    ## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a
306
+    ## directed adjacency matrix between genotypes connected (i.e., those
307
+    ## that differ by gaining one mutation) with value being the
308
+    ## difference in fitness between destination and origin
309
+
310
+    ## Make sure sorted, so ancestors always before descendants
311
+    rs0 <- rowSums(x[, -ncol(x)])
312
+    x <- x[order(rs0), ]
313
+    rm(rs0)
314
+    
315
+    y <- x[, -ncol(x)]
316
+    f <- x[, ncol(x)]
317
+    rs <- rowSums(y)
318
+
319
+    ## Move this to C++?
320
+    adm <- matrix(NA, nrow = length(rs), ncol = length(rs))
321
+    for(i in 1:length(rs)) { ## i is the current genotype
322
+        candidates <- which(rs == (rs[i] + 1))
323
+        for(j in candidates) {
324
+            sumdiff <- sum(abs(y[j, ] - y[i, ]))
325
+            ## if(sumdiff < 0) stop("eh?")
326
+            if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
327
+        }
328
+    }
329
+    return(adm)
330
+}
331
+
332
+
333
+peak_valley <- function(x) {
334
+    ## FIXME: when there are no identical entries, all this
335
+    ## can be simplified a lot. Actually, there could be a way
336
+    ## to only use the slow code on paths with 0.
337
+    ## But this does not seem to be the CPU hog.
338
+    ## x is an adjacency matrix where i,j is fitness_j - fitness_i.  Thus,
339
+    ## negative values means j has less fitness than the incoming.  And if
340
+    ## all not NA entries of a column are negative, then column j is is
341
+    ## sink candidate
342
+    ## Thus we locate local maxima and minima
343
+
344
+    ## Some of this complicated as we need to detect paths like -3, 0, 0,
345
+    ## 3.  The -3 and the two 0 are local minima with identical values.
346
+
347
+    ## We assume crucially that ancestors always have smaller indexes in
348
+    ## the matrix than descendants.
349
+
350
+    ## Valleys
351
+    bad_back <- vector("integer", nrow(x))
352
+    for(i in ncol(x):1) {
353
+        if(any(x[i, ] < 0, na.rm = TRUE) || bad_back[i]) {
354
+            ## this node is bad. Any ancestor with fitness >= is bad
355
+            bad_back[i] <- 1
356
+            reach_b <- which(x[, i] <= 0)
357
+            bad_back[reach_b] <- 1
358
+        }
359
+    }
360
+    bad_fwd <- vector("integer", nrow(x))
361
+    for(i in 1:nrow(x)) {
362
+        if( any(x[, i] > 0, na.rm = TRUE) || bad_fwd[i] ) {
363
+            ## this node is bad. Any descendant with fitness >= is bad
364
+            bad_fwd[i] <- 1
365
+            reach_f <- which(x[i, ] >= 0)
366
+            bad_fwd[reach_f] <- 1
367
+        }
368
+    }
369
+    bad <- union(which(bad_back == 1), which(bad_fwd == 1))
370
+    candidate <- which(apply(x, 2, function(z) all(z <= 0, na.rm = TRUE)))
371
+    valley <- setdiff(candidate, bad)
372
+
373
+    null <- suppressWarnings(try({rm(bad_back, bad_fwd, reach_b, reach_f, candidate)},
374
+        silent = TRUE))
375
+    ## Peaks
376
+    bad_back <- vector("integer", nrow(x))
377
+    for(i in ncol(x):1) {
378
+        if(any(x[i, ] > 0, na.rm = TRUE) || bad_back[i]) {
379
+            ## this node is bad. Any ancestor with fitness >= is bad
380
+            bad_back[i] <- 1
381
+            reach_b <- which(x[, i] >= 0)
382
+            bad_back[reach_b] <- 1
383
+        }
384
+    }
385
+    bad_fwd <- vector("integer", nrow(x))
386
+    for(i in 1:nrow(x)) {
387
+        if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) {
388
+            ## this node is bad. Any descendant with fitness >= is bad
389
+            bad_fwd[i] <- 1
390
+            reach_f <- which(x[i, ] <= 0)
391
+            bad_fwd[reach_f] <- 1
392
+        }
393
+    }
394
+    bad <- union(which(bad_back == 1), which(bad_fwd == 1))
395
+    candidate <- which(apply(x, 2, function(z) all(z >= 0, na.rm = TRUE)))
396
+    peak <- setdiff(candidate, bad)
397
+    return(list(peak = peak, valley = valley))
398
+}
0 399
new file mode 100644
... ...
@@ -0,0 +1,180 @@
1
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+
17
+
18
+## Functions that allow passing a matrix or data frame of mappings
19
+## genotype -> fitness so this is taken as input in fitnessEffects.
20
+
21
+
22
+from_genotype_fitness <- function(x) {
23
+    ## Would break with output from allFitnessEffects and
24
+    ## output from allGenotypeAndMut
25
+    if(ncol(x) > 2) {
26
+        ## Of course, can ONLY work with epistastis, NOT order
27
+        return(genot_fitness_to_epistasis(x))
28
+    } else {
29
+        ## Make sure no factors
30
+        if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
31
+        omarker <- any(grepl(">", x[, 1], fixed = TRUE))
32
+        emarker <- any(grepl(",", x[, 1], fixed = TRUE))
33
+        nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
34
+        ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
35
+        if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
36
+        if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
37
+        ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
38
+        ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
39
+        if(omarker) {
40
+            ## do something. To be completed
41
+            stop("This code not yet ready")
42
+            ## You can pass to allFitnessEffects genotype -> fitness mappings that
43
+            ## involve epistasis and order. But they must have different
44
+            ## genes. Otherwise, it is not manageable.
45
+        }
46
+        if(emarker) {
47
+            x <- x[, c(1, 2)]
48
+            if(!all(colnames(x) == c("Genotype", "Fitness"))) {
49
+                message("Column names of object not Genotype and Fitness.",
50
+                        " Renaming them assuming that is what you wanted")
51
+                colnames(x) <- c("Genotype", "Fitness")
52
+            }
53
+            ## Yes, we need to do this to  scale the fitness and put the "-"
54
+            return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
55
+        }
56
+        
57
+    }
58
+}
59
+
60
+
61
+
62
+genot_fitness_to_epistasis <- function(x) {
63
+    ## FIXME future:
64
+
65
+    ## - Nope, order cannot be dealt with here. Not a matrix of 0 and 1.
66
+
67
+    ## - modify "fitnessEffects" so we can take a component that is
68
+    ## - "genot_fitness"; so this would never be exposed to the user
69
+
70
+
71
+    ## Why we should not combine this specification with other terms? If
72
+    ## you use this is because you say "this is the mapping genotype ->
73
+    ## fitness. Period." so we should not combine other terms (or other
74
+    ## terms that involve these genes)
75
+    
76
+    nr <- nrow(x)
77
+    if(nr < (2^(ncol(x) - 1)))
78
+        message("Number of genotypes less than 2^L.",
79
+                " Missing genotype will be set to 1")
80
+    ## This is specific if only epistasis, not order
81
+    if(nr > (2^(ncol(x) - 1)))
82
+        stop("Number of genotypes > 2^L. Repeated entries?")
83
+    f <- x[, ncol(x)]
84
+    ## Why should I stop?
85
+    if(any(f < 0))
86
+        message("Negative fitnesses. Watch out if you divide by the wildtype")
87
+    x <- x[, -ncol(x)]
88
+    wt <- which(rowSums(x) == 0)
89
+    fwt <- 1
90
+    if(length(wt) == 1)
91
+        fwt <- f[wt]
92
+    if(!isTRUE(all.equal(fwt, 1))) {
93
+        message("Fitness of wildtype != 1. ",
94
+                "Dividing all fitnesses by fitness of wildtype.")
95
+        f <- f/fwt
96
+    }
97
+    
98
+    if(is.null(colnames(x))) {
99
+        if(ncol(x) <= 26)
100
+            colnames(x) <- LETTERS[1:ncol(x)]
101
+        else
102
+            colnames(x) <- paste0("G", seq.int(ncol(x)))
103
+    }
104
+    cn <- colnames(x)
105
+    x2 <- matrix("", nrow = nrow(x), ncol = ncol(x))
106
+    x2[x == 0] <- "-"
107
+    epin <- apply(x2, 1, function(z) paste(paste0(z, cn), collapse = ":"))
108
+    if(anyDuplicated(epin))
109
+        stop("Non unique names")
110
+    s <- f - 1
111
+    names(s) <- epin
112
+    return(s)
113
+}
114
+
115
+
116
+
117
+
118
+allGenotypes_to_matrix <- function(x) { 
119
+    ## Makes no sense to allow passing order: the matrix would have
120
+    ## repeated rows. A > B and B > A both have exactly A and B
121
+    
122
+    ## Take output of evalAllGenotypes or identical data frame and return
123
+    ## a matrix with 0/1 in a column for each gene and a final column of
124
+    ## Fitness
125
+
126
+    ## A WT can be specified with string "WT"
127
+    anywt <- which(x[, 1] == "WT")
128
+    if(length(anywt) > 1) stop("More than 1 WT")
129
+    if(length(anywt) == 1) {
130
+        fwt <- x[anywt, 2]
131
+        x <- x[-anywt, ]
132
+    } else {
133
+        fwt <- 1
134
+    }
135
+    splitted_genots <- lapply(x$Genotype,
136
+                             function(z) nice.vector.eo(z, ","))
137
+
138
+    all_genes <- unique(unlist(splitted_genots))
139
+
140
+    m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
141
+    the_match <- lapply(splitted_genots,
142
+                        function(z) match(z, all_genes))
143
+    ## A lot simpler with a loop
144
+    for(i in 1:nrow(m)) {
145
+        m[i, the_match[[i]]] <- 1
146
+    }
147
+    m <- cbind(m, x[, 2])
148
+    colnames(m) <- c(all_genes, "Fitness")
149
+    m <- rbind(c(rep(0, length(all_genes)), fwt),
150
+               m)
151
+    ## Ensure sorted
152
+    m <- data.frame(m)
153
+    rs <- rowSums(m[, -ncol(m)])
154
+    m <- m[order(rs), ]
155
+    ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ]
156
+    return(m)
157
+}
158
+
159
+
160
+##
161
+
162
+## Example of Bozic issues
163
+m1 <- cbind(c(0, 1), c(1, 0), c(2, 3))
164
+
165
+m2 <- cbind(c(1, 1), c(1, 0), c(2, 3))
166
+
167
+m3 <- data.frame(G = c("A, B", "A"), F = c(1, 2))
168
+
169
+m4 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 4))
170
+
171
+m5 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 0))
172
+
173
+m6 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2.5, 2, 0))
174
+
175
+
176
+
177
+## And no, it makes no sense to use any of this for mutator: in mutator I
178
+## directly have the multiplication factor of each gene. Which is likely
179
+## what people want anyway. Add it later if needed. by using a ratio
180
+## instead of a "-"
0 181
new file mode 100644
... ...
@@ -0,0 +1,67 @@
1
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+
17
+## Any mutator gene must have been in fitness, even if it has fitness
18
+## effect of 0. Yes, because otherwise the mapping of gene names to
19
+## numerical ids becomes a PITA.
20
+
21
+
22
+allMutatorEffects <- function(epistasis = NULL,
23
+                              noIntGenes = NULL,
24
+                              geneToModule = NULL,
25
+                              keepInput = TRUE) {
26
+    ## This is on purpose to prevent using a rT or orderEffects. Those are
27
+    ## not tested to work with mutator.
28
+    allFitnessORMutatorEffects(
29
+        rT = NULL,
30
+        epistasis = epistasis,
31
+        orderEffects = NULL,
32
+        noIntGenes = noIntGenes,
33
+        geneToModule = geneToModule,
34
+        drvNames = NULL,
35
+        keepInput = keepInput,
36
+        calledBy = "allMutatorEffects")
37
+}
38
+                              
39
+
40
+evalAllGenotypesMut <- function(mutatorEffects,
41
+                                max = 256,
42
+                                addwt = FALSE) {
43
+    evalAllGenotypesORMut(
44
+        fmEffects = mutatorEffects,
45
+        order = FALSE,
46
+        max = max,
47
+        model = "",
48
+        calledBy_= "evalGenotypeMut"
49
+    )
50
+}
51
+
52
+evalGenotypeMut <- function(genotype, mutatorEffects,
53
+                         verbose = FALSE,
54
+                         echo = FALSE) {
55
+    if(inherits(mutatorEffects, "fitnessEffects"))
56
+        stop("You are trying to get the mutator effects of a fitness specification. ",
57
+             "You did not pass an object of class mutatorEffects.")
58
+    evalGenotypeORMut(genotype = genotype,
59
+                       fmEffects = mutatorEffects,
60
+                       verbose = verbose,
61
+                       echo = echo,
62
+                       model  = "" ,
63
+                       calledBy_= "evalGenotypeMut"
64
+                       )
65
+
66
+}
67
+
... ...
@@ -403,13 +403,20 @@ getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) {
403 403
     )
404 404
 }
405 405
 
406
-allFitnessEffects <- function(rT = NULL,
407
-                              epistasis = NULL,
408
-                              orderEffects = NULL,
409
-                              noIntGenes = NULL,
410
-                              geneToModule = NULL,
411
-                              drvNames = NULL,
412
-                              keepInput = TRUE) {
406
+
407
+
408
+allFitnessORMutatorEffects <- function(rT = NULL,
409
+                                       epistasis = NULL,
410
+                                       orderEffects = NULL,
411
+                                       noIntGenes = NULL,
412
+                                       geneToModule = NULL,
413
+                                       drvNames = NULL,
414
+                                       keepInput = TRUE,
415
+                                       ## refFE = NULL,
416
+                                       calledBy = NULL) {
417
+    ## From allFitnessEffects. Generalized so we deal with Fitness
418
+    ## and mutator.
419
+    
413 420
     ## restrictions: the usual rt
414 421
 
415 422
     ## epistasis: as it says, with the ":"
... ...
@@ -428,6 +435,16 @@ allFitnessEffects <- function(rT = NULL,
428 435
     ## same in epistasis and order, but we would be splitting twice
429 436
     ## (whereas for rT extracting the names is very simple).
430 437
 
438
+    ## called appropriately?
439
+    if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
440
+        stop("How did you call this function?. Bug.")
441
+    
442
+    if(calledBy == "allMutatorEffects") {
443
+        ## very paranoid check
444
+        if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
445
+            stop("allMutatorEffects called with forbidden arguments.",
446
+                 "Is this an attempt to subvert the function?")
447
+    }
431 448
     
432 449
     rtNames <- NULL
433 450
     epiNames <- NULL
... ...
@@ -448,7 +465,6 @@ allFitnessEffects <- function(rT = NULL,
448 465
                           function(x) lapply(x$ids,
449 466
                                              function(z) strsplit(z, "^-"))))),
450 467
                             "")
451
-        
452 468
     } else {
453 469
         long.epistasis <- list()
454 470
     }
... ...
@@ -472,7 +488,7 @@ allFitnessEffects <- function(rT = NULL,
472 488
                        "or order effects not in geneToModule"))
473 489
     }
474 490
     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
475
-
491
+    
476 492
     idm <- unique(geneModule$ModuleNumID)
477 493
     names(idm) <- unique(geneModule$Module)
478 494
 
... ...
@@ -529,16 +545,20 @@ allFitnessEffects <- function(rT = NULL,
529 545
     } else {
530 546
         geneNoInt <- data.frame()
531 547
     }
548
+
532 549
     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
533 550
              nrow(geneNoInt)) == 0)
534 551
         stop("You have specified nothing!")
535 552
 
536
-    if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
537
-        graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
553
+    if(calledBy == "allFitnessEffects") {
554
+        if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
555
+            graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
556
+        } else {
557
+            graphE <- NULL
558
+        }
538 559
     } else {
539 560
         graphE <- NULL
540 561
     }
541
-
542 562
     if(!is.null(drvNames)) {
543 563
         drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
544 564
         ## drivers should never be in the geneNoInt; Why!!!???
... ...
@@ -553,8 +573,11 @@ allFitnessEffects <- function(rT = NULL,
553 573
         ## }
554 574
         ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
555 575
     } else {
556
-        drv <- geneModule$GeneNumID[-1]
576
+        ## we used to have this default
577
+        ## drv <- geneModule$GeneNumID[-1]
578
+        drv <- vector(mode = "integer", length = 0L)
557 579
     }
580
+    
558 581
     if(!keepInput) {
559 582
         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
560 583
     }
... ...
@@ -572,11 +595,197 @@ allFitnessEffects <- function(rT = NULL,
572 595
                 orderEffects = orderEffects,
573 596
                 noIntGenes = noIntGenes                
574 597
                 )
575
-    class(out) <- c("fitnessEffects")
598
+    if(calledBy == "allFitnessEffects") {
599
+        class(out) <- c("fitnessEffects")
600
+    } else if(calledBy == "allMutatorEffects") {
601
+        class(out) <- c("mutatorEffects")
602
+    }
576 603
     return(out)
577 604
 }
578 605
 
579 606
 
607
+allFitnessEffects <- function(rT = NULL,
608
+                              epistasis = NULL,
609
+                              orderEffects = NULL,
610
+                              noIntGenes = NULL,
611
+                              geneToModule = NULL,
612
+                              drvNames = NULL,
613
+                              genotFitness = NULL,
614
+                              keepInput = TRUE) {
615
+
616
+    if(!is.null(genotFitness)) {
617
+        if(!is.null(rT) || !is.null(epistasis) ||
618
+           !is.null(orderEffects) || !is.null(noIntGenes) ||
619
+           !is.null(geneToModule)) {
620
+            stop("You have a non-null genotFitness.",
621
+                 " If you pass the complete genotype to fitness mapping",
622
+                 " you cannot pass any of rT, epistasis, orderEffects",
623
+                 " noIntGenes or geneToModule.")
624
+        }
625
+        epistasis <- from_genotype_fitness(genotFitness)
626
+    }
627
+    allFitnessORMutatorEffects(
628
+        rT = rT,
629
+        epistasis = epistasis,
630
+        orderEffects = orderEffects,
631
+        noIntGenes = noIntGenes,
632
+        geneToModule = geneToModule,
633
+        drvNames = drvNames,
634
+        keepInput = keepInput,
635
+        calledBy = "allFitnessEffects")
636
+}
637
+
638
+
639
+## allFitnessEffects <- function(rT = NULL,
640
+##                               epistasis = NULL,
641
+##                               orderEffects = NULL,
642
+##                               noIntGenes = NULL,
643
+##                               geneToModule = NULL,
644
+##                               drvNames = NULL,
645
+##                               keepInput = TRUE) {
646
+##     ## restrictions: the usual rt
647
+
648
+##     ## epistasis: as it says, with the ":"
649
+
650
+##     ## orderEffects: the ">"
651
+    
652
+##     ## All of the above can be genes or can be modules (if you pass a
653
+##     ## geneToModule)
654
+
655
+##     ## rest: rest of genes, with fitness
656
+
657
+
658
+##     ## For epistasis and order effects we create the output object but
659
+##     ## missing the numeric ids of genes. With rT we do it in one go, as we
660
+##     ## already know the mapping of genes to numeric ids. We could do the
661
+##     ## same in epistasis and order, but we would be splitting twice
662
+##     ## (whereas for rT extracting the names is very simple).
663
+
664
+    
665
+##     rtNames <- NULL
666
+##     epiNames <- NULL
667
+##     orNames <- NULL
668
+##     if(!is.null(rT)) {
669
+##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
670
+##         rT$parent <- as.character(rT$parent)
671
+##         rT$child <- as.character(rT$child)
672
+##         rT$typeDep <- as.character(rT$typeDep)
673
+##         rtNames <- unique(c(rT$parent, rT$child))
674
+##     }
675
+##     if(!is.null(epistasis)) {
676
+##         long.epistasis <- to.long.epist.order(epistasis, ":")
677
+##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
678
+##         ## deal with the possible negative signs
679
+##         epiNames <- setdiff(unique(
680
+##             unlist(lapply(long.epistasis,
681
+##                           function(x) lapply(x$ids,
682
+##                                              function(z) strsplit(z, "^-"))))),
683
+##                             "")
684
+        
685
+##     } else {
686
+##         long.epistasis <- list()
687
+##     }
688
+##     if(!is.null(orderEffects)) {
689
+##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
690
+##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
691
+##     } else {
692
+##         long.orderEffects <- list()
693
+##     }
694
+##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
695
+##     if(is.null(geneToModule)) {
696
+##         gMOneToOne <- TRUE
697
+##         geneToModule <- geneModuleNull(allModuleNames)
698
+##     } else {
699
+##         gMOneToOne <- FALSE
700
+##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
701
+##             stop(paste("Some values in geneToModule not present in any of",
702
+##                        " rT, epistasis, or order effects"))
703
+##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
704
+##             stop(paste("Some values in rT, epistasis, ",
705
+##                        "or order effects not in geneToModule"))
706
+##     }
707
+##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
708
+
709
+##     idm <- unique(geneModule$ModuleNumID)
710
+##     names(idm) <- unique(geneModule$Module)
711
+
712
+##     if(!is.null(rT)) {
713
+##         checkRT(rT)
714
+##         long.rt <- to.long.rt(rT, idm)
715
+##     } else {
716
+##         long.rt <- list() ## yes, we want an object of length 0
717
+##     }
718
+
719
+##     ## Append the numeric ids to epistasis and order
720
+##     if(!is.null(epistasis)) {
721
+##         long.epistasis <- lapply(long.epistasis,
722
+##                                  function(x)
723
+##                                      addIntID.epist.order(x, idm,
724
+##                                                           sort = TRUE,
725
+##                                                           sign = TRUE))
726
+##     }
727
+##     if(!is.null(orderEffects)) {
728
+##         long.orderEffects <- lapply(long.orderEffects,
729
+##                                     function(x)
730
+##                                         addIntID.epist.order(x, idm,
731
+##                                                              sort = FALSE,
732
+##                                                              sign = FALSE))
733
+##     }
734
+    
735
+##     if(!is.null(noIntGenes)) {
736
+##         mg <- max(geneModule[, "GeneNumID"])
737
+##         gnum <- seq_along(noIntGenes) + mg
738
+##         if(!is.null(names(noIntGenes))) {
739
+##             ng <- names(noIntGenes)
740
+##             if(any(ng %in% geneModule[, "Gene"] ))
741
+##                 stop("A gene in noIntGenes also present in the other terms")
742
+##         } else {
743
+##             ng <- gnum
744
+##         }
745
+##         geneNoInt <- data.frame(Gene = as.character(ng),
746
+##                                 GeneNumID = gnum,
747
+##                                 s = noIntGenes,
748
+##                                 stringsAsFactors = FALSE)
749
+##     } else {
750
+##         geneNoInt <- data.frame()
751
+##     }
752
+##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
753
+##              nrow(geneNoInt)) == 0)
754
+##         stop("You have specified nothing!")
755
+
756
+##     if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
757
+##         graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
758
+##     } else {
759
+##         graphE <- NULL
760
+##     }
761
+
762
+##     if(!is.null(drvNames)) {
763
+##         drv <- getGeneIDNum(geneModule, geneNoInt, drvNames)
764
+##     } else {
765
+##         drv <- geneModule$GeneNumID[-1]
766
+##     }
767
+##     if(!keepInput) {
768
+##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
769
+##     }
770
+##     out <- list(long.rt = long.rt,
771
+##                 long.epistasis = long.epistasis,
772
+##                 long.orderEffects = long.orderEffects,
773
+##                 long.geneNoInt = geneNoInt,
774
+##                 geneModule = geneModule,
775
+##                 gMOneToOne = gMOneToOne,
776
+##                 geneToModule = geneToModule,
777
+##                 graph = graphE,
778
+##                 drv = drv,
779
+##                 rT = rT,
780
+##                 epistasis = epistasis,
781
+##                 orderEffects = orderEffects,
782
+##                 noIntGenes = noIntGenes                
783
+##                 )
784
+##     class(out) <- c("fitnessEffects")
785
+##     return(out)
786
+## }
787
+
788
+
580 789
 ## No longer used
581 790
 ## rtAndGeneModule <- function(mdeps, gM = NULL) {
582 791
 ##     ## To show a table of restrictions when there are modules. Do not use
... ...
@@ -638,14 +847,94 @@ allFitnessEffects <- function(rT = NULL,
638 847
 ##     ##                    echo = TRUE)
639 848
 ## }
640 849
 
850
+
851
+
852
+evalGenotypeORMut <- function(genotype,
853
+                              fmEffects,
854
+                              verbose = FALSE,
855
+                              echo = FALSE,
856
+                              model = "",
857
+                              calledBy_= NULL) {