git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@118909 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
Package: OncoSimulR |
2 | 2 |
Type: Package |
3 | 3 |
Title: Forward Genetic Simulation of Cancer Progresion with Epistasis |
4 |
-Version: 2.3.2 |
|
5 |
-Date: 2016-04-14 |
|
4 |
+Version: 2.3.3 |
|
5 |
+Date: 2016-06-23 |
|
6 | 6 |
Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"), |
7 | 7 |
email = "rdiaz02@gmail.com"), |
8 | 8 |
person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com")) |
... | ... |
@@ -13,21 +13,23 @@ Description: Functions for forward population genetic simulation in |
13 | 13 |
asexual populations, with special focus on cancer progression. |
14 | 14 |
Fitness can be an arbitrary function of genetic interactions between |
15 | 15 |
multiple genes or modules of genes, including epistasis, order |
16 |
- restrictions in mutation accumulation, and order effects. Simulations |
|
16 |
+ restrictions in mutation accumulation, and order effects. Mutation |
|
17 |
+ rates can differ between genes, and we can include mutator/antimutator |
|
18 |
+ genes (to model mutator phenotypes). Simulations |
|
17 | 19 |
use continuous-time models and can include driver and passenger genes |
18 | 20 |
and modules. Also included are functions for simulating random DAGs |
19 | 21 |
of the type found in Oncogenetic Tress, Conjunctive Bayesian Networks, |
20 | 22 |
and other tumor progression models, and for plotting and sampling from |
21 | 23 |
single or multiple realizations of the simulations, including |
22 |
- single-cell sampling, as well as functions for plotting the true |
|
23 |
- phylogenetic relationships of the clones. |
|
24 |
+ single-cell sampling, as well as functions for plotting the parent-child |
|
25 |
+ relationships of the clones. |
|
24 | 26 |
biocViews: BiologicalQuestion, SomaticMutation |
25 | 27 |
License: GPL (>= 3) |
26 | 28 |
URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/ |
27 | 29 |
BugReports: https://github.com/rdiaz02/OncoSimul/issues |
28 |
-Depends: R (>= 3.1.0) |
|
29 |
-Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices |
|
30 |
-Suggests: BiocStyle, knitr, Oncotree, testthat |
|
30 |
+Depends: R (>= 3.3.0) |
|
31 |
+Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel |
|
32 |
+Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0) |
|
31 | 33 |
LinkingTo: Rcpp |
32 | 34 |
VignetteBuilder: knitr |
33 | 35 |
|
... | ... |
@@ -2,8 +2,13 @@ useDynLib(OncoSimulR, .registration=TRUE) |
2 | 2 |
|
3 | 3 |
export("oncoSimulPop", "oncoSimulIndiv", "samplePop", |
4 | 4 |
"plotPoset", "oncoSimulSample", "allFitnessEffects", |
5 |
- "evalGenotype", "evalAllGenotypes", "simOGraph" |
|
6 |
- , "plotClonePhylog", "OncoSimulWide2Long" |
|
5 |
+ "evalGenotype", "evalAllGenotypes", "simOGraph", |
|
6 |
+ "plotClonePhylog", "OncoSimulWide2Long", |
|
7 |
+ "allMutatorEffects", "evalAllGenotypesMut", |
|
8 |
+ "evalGenotypeMut", "evalGenotypeFitAndMut", |
|
9 |
+ "evalAllGenotypesFitAndMut", |
|
10 |
+ "rfitness", |
|
11 |
+ "plotFitnessLandscape" |
|
7 | 12 |
) |
8 | 13 |
|
9 | 14 |
S3method(plot, oncosimul) |
... | ... |
@@ -13,8 +18,14 @@ S3method(plot, oncosimulpop) |
13 | 18 |
S3method(summary, oncosimulpop) |
14 | 19 |
S3method(print, oncosimulpop) |
15 | 20 |
S3method(plot, fitnessEffects) |
21 |
+S3method(plot, genotype_fitness_matrix) |
|
22 |
+S3method(plot, evalAllGenotypes) |
|
23 |
+S3method(plot, evalAllGenotypesMut) |
|
16 | 24 |
|
25 |
+import(ggplot2) |
|
26 |
+importFrom("ggrepel", geom_text_repel, geom_label_repel) |
|
17 | 27 |
|
28 |
+importFrom("stats", "rnorm") |
|
18 | 29 |
importFrom("data.table", rbindlist, .rbind.data.table) |
19 | 30 |
importFrom(Rcpp, evalCpp) |
20 | 31 |
importFrom("igraph", igraph.to.graphNEL, graph.data.frame, V, E, |
... | ... |
@@ -31,3 +42,11 @@ importFrom("stats", "na.omit", "runif", "smooth.spline") |
31 | 42 |
importFrom("utils", "type.convert") |
32 | 43 |
importFrom("RColorBrewer", "brewer.pal") |
33 | 44 |
importFrom("grDevices", "colorRampPalette", "hsv", "rainbow") |
45 |
+importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate", |
|
46 |
+ "filter") |
|
47 |
+importFrom("smatr", "ma") ## for major axis regression in some tests |
|
48 |
+importFrom("car", "linearHypothesis") |
|
49 |
+ |
|
50 |
+ |
|
51 |
+ |
|
52 |
+ |
... | ... |
@@ -23,6 +23,7 @@ oncoSimulSample <- function(Nindiv, |
23 | 23 |
model = "Exp", |
24 | 24 |
numPassengers = 0, |
25 | 25 |
mu = 1e-6, |
26 |
+ muEF = NULL, |
|
26 | 27 |
detectionSize = round(runif(Nindiv, 1e5, 1e8)), |
27 | 28 |
detectionDrivers = { |
28 | 29 |
if(inherits(fp, "fitnessEffects")) { |
... | ... |
@@ -62,6 +63,7 @@ oncoSimulSample <- function(Nindiv, |
62 | 63 |
thresholdWhole = 0.5, |
63 | 64 |
initMutant = NULL, |
64 | 65 |
verbosity = 1, |
66 |
+ showProgress = TRUE, |
|
65 | 67 |
seed = "auto"){ |
66 | 68 |
## No longer using mclapply, because of the way we use the limit on |
67 | 69 |
## the number of tries. |
... | ... |
@@ -164,6 +166,7 @@ oncoSimulSample <- function(Nindiv, |
164 | 166 |
model = model, |
165 | 167 |
numPassengers = numPassengers, |
166 | 168 |
mu = mu, |
169 |
+ muEF = muEF, |
|
167 | 170 |
detectionSize = params[indiv, "detectionSize"], |
168 | 171 |
detectionDrivers = params[indiv, "detectionDrivers"], |
169 | 172 |
sampleEvery = sampleEvery, |
... | ... |
@@ -195,6 +198,14 @@ oncoSimulSample <- function(Nindiv, |
195 | 198 |
numToRun <- (numToRun - 1) |
196 | 199 |
attemptsUsed <- attemptsUsed + tmp$other$attemptsUsed |
197 | 200 |
attemptsLeft <- (max.num.tries.total - attemptsUsed) |
201 |
+ if(showProgress) { |
|
202 |
+ cat("...... Done individual ", indiv, |
|
203 |
+ ". Used ", attemptsUsed, "attempts.", |
|
204 |
+ ". Running for ", as.double(difftime(Sys.time(), |
|
205 |
+ startTime, units = "secs")), |
|
206 |
+ " secs.\n" |
|
207 |
+ ) |
|
208 |
+ } |
|
198 | 209 |
indiv <- indiv + 1 |
199 | 210 |
|
200 | 211 |
## We need to check in exactly this order. Attempts left only |
... | ... |
@@ -243,17 +254,30 @@ oncoSimulSample <- function(Nindiv, |
243 | 254 |
} |
244 | 255 |
|
245 | 256 |
|
246 |
-samplePop <- function(x, timeSample = "last", typeSample = "whole", |
|
257 |
+samplePop <- function(x, timeSample = "last", |
|
258 |
+ typeSample = "whole", |
|
247 | 259 |
thresholdWhole = 0.5, |
248 |
- geneNames = NULL) { |
|
260 |
+ geneNames = NULL, |
|
261 |
+ popSizeSample = NULL) { |
|
262 |
+ ## timeSample <- match.arg(timeSample) |
|
249 | 263 |
gN <- geneNames |
264 |
+ |
|
265 |
+ if(!is.null(popSizeSample) && (length(popSizeSample) > 1) && |
|
266 |
+ (length(popSizeSample) != length(x))) { |
|
267 |
+ message("length popSizeSample != number of subjects") |
|
268 |
+ popSizeSample <- rep(popSizeSample, length.out = length(x)) |
|
269 |
+ } |
|
270 |
+ ## A hack to prevent Map from crashing with a NULL |
|
271 |
+ if(is.null(popSizeSample)) popSizeSample <- -99 |
|
250 | 272 |
if(inherits(x, "oncosimulpop")) { |
251 | 273 |
z <- do.call(rbind, |
252 |
- lapply(x, |
|
253 |
- get.mut.vector, |
|
254 |
- timeSample = timeSample, |
|
255 |
- typeSample = typeSample, |
|
256 |
- thresholdWhole = thresholdWhole)) |
|
274 |
+ Map(get.mut.vector, |
|
275 |
+ x = x, |
|
276 |
+ timeSample = timeSample, |
|
277 |
+ typeSample = typeSample, |
|
278 |
+ thresholdWhole = thresholdWhole, |
|
279 |
+ popSizeSample = popSizeSample |
|
280 |
+ )) |
|
257 | 281 |
## We need to check if the object is coming from v.2., to avoid |
258 | 282 |
## having to force passing a vector of names |
259 | 283 |
if(is.null(gN) && (!is.null(x[[1]]$geneNames))) |
... | ... |
@@ -262,7 +286,8 @@ samplePop <- function(x, timeSample = "last", typeSample = "whole", |
262 | 286 |
z <- get.mut.vector(x, |
263 | 287 |
timeSample = timeSample, |
264 | 288 |
typeSample = typeSample, |
265 |
- thresholdWhole = thresholdWhole) |
|
289 |
+ thresholdWhole = thresholdWhole, |
|
290 |
+ popSizeSample = popSizeSample) |
|
266 | 291 |
dim(z) <- c(1, length(z)) |
267 | 292 |
if(is.null(gN) && (!is.null(x$geneNames))) |
268 | 293 |
gN <- geneNames |
... | ... |
@@ -285,6 +310,7 @@ oncoSimulPop <- function(Nindiv, |
285 | 310 |
model = "Exp", |
286 | 311 |
numPassengers = 0, |
287 | 312 |
mu = 1e-6, |
313 |
+ muEF = NULL, |
|
288 | 314 |
detectionSize = 1e8, |
289 | 315 |
detectionDrivers = 4, |
290 | 316 |
sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1, |
... | ... |
@@ -329,6 +355,7 @@ oncoSimulPop <- function(Nindiv, |
329 | 355 |
model = model, |
330 | 356 |
numPassengers = numPassengers, |
331 | 357 |
mu = mu, |
358 |
+ muEF = muEF, |
|
332 | 359 |
detectionSize = detectionSize, |
333 | 360 |
detectionDrivers = detectionDrivers, |
334 | 361 |
sampleEvery = sampleEvery, |
... | ... |
@@ -365,6 +392,7 @@ oncoSimulIndiv <- function(fp, |
365 | 392 |
model = "Exp", |
366 | 393 |
numPassengers = 0, |
367 | 394 |
mu = 1e-6, |
395 |
+ muEF = NULL, |
|
368 | 396 |
detectionSize = 1e8, |
369 | 397 |
detectionDrivers = 4, |
370 | 398 |
sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1, |
... | ... |
@@ -434,8 +462,11 @@ oncoSimulIndiv <- function(fp, |
434 | 462 |
stop("Unknown model") |
435 | 463 |
} |
436 | 464 |
|
465 |
+ if( (length(mu) > 1) && !inherits(fp, "fitnessEffects")) |
|
466 |
+ stop("Per-gene mutation rates cannot be used with the old poset format") |
|
467 |
+ |
|
437 | 468 |
if(any(mu < 0)) { |
438 |
- stop("mutation rate (mu) is negative") |
|
469 |
+ stop("(at least one) mutation rate (mu) is negative") |
|
439 | 470 |
} |
440 | 471 |
## We do not test for equality to 0. That might be a weird but |
441 | 472 |
## legitimate case? |
... | ... |
@@ -463,6 +494,8 @@ oncoSimulIndiv <- function(fp, |
463 | 494 |
stop(m) |
464 | 495 |
|
465 | 496 |
} |
497 |
+ if(!is.null(muEF)) |
|
498 |
+ stop("Mutator effects cannot be especified with the old poset format") |
|
466 | 499 |
if(length(initMutant) > 1) |
467 | 500 |
stop("With the old poset, initMutant can only take a single value.") |
468 | 501 |
## Seeding C++ is now much better in new version |
... | ... |
@@ -507,7 +540,7 @@ oncoSimulIndiv <- function(fp, |
507 | 540 |
max.wall.time = max.wall.time, |
508 | 541 |
max.num.tries = max.num.tries, |
509 | 542 |
keepEvery = keepEvery, |
510 |
- alpha = 0.0015, |
|
543 |
+ ## alpha = 0.0015, |
|
511 | 544 |
sh = sh, |
512 | 545 |
K = K, |
513 | 546 |
minDetectDrvCloneSz = minDetectDrvCloneSz, |
... | ... |
@@ -558,7 +591,7 @@ oncoSimulIndiv <- function(fp, |
558 | 591 |
max.wall.time = max.wall.time, |
559 | 592 |
max.num.tries = max.num.tries, |
560 | 593 |
keepEvery = keepEvery, |
561 |
- alpha = 0.0015, |
|
594 |
+ ## alpha = 0.0015, |
|
562 | 595 |
K = K, |
563 | 596 |
minDetectDrvCloneSz = minDetectDrvCloneSz, |
564 | 597 |
extraTime = extraTime, |
... | ... |
@@ -566,7 +599,8 @@ oncoSimulIndiv <- function(fp, |
566 | 599 |
onlyCancer = onlyCancer, |
567 | 600 |
errorHitWallTime = errorHitWallTime, |
568 | 601 |
errorHitMaxTries = errorHitMaxTries, |
569 |
- keepPhylog = keepPhylog), |
|
602 |
+ keepPhylog = keepPhylog, |
|
603 |
+ MMUEF = muEF), |
|
570 | 604 |
silent = !verbosity) |
571 | 605 |
objClass <- c("oncosimul", "oncosimul2") |
572 | 606 |
} |
... | ... |
@@ -648,7 +682,7 @@ plot.oncosimulpop <- function(x, ask = TRUE, |
648 | 682 |
show = "drivers", |
649 | 683 |
type = ifelse(show == "genotypes", |
650 | 684 |
"stacked", "line"), |
651 |
- col = c(8, "orange", 6:1), |
|
685 |
+ col = "auto", |
|
652 | 686 |
log = ifelse(type == "line", "y", ""), |
653 | 687 |
ltyClone = 2:6, |
654 | 688 |
lwdClone = 0.9, |
... | ... |
@@ -1389,17 +1423,32 @@ plotClonePhylog <- function(x, N = 1, t = "last", |
1389 | 1423 |
|
1390 | 1424 |
############# The rest are internal functions |
1391 | 1425 |
|
1426 |
+closest_time <- function(x, size) { |
|
1427 |
+ ## Find the first time when a given size is reached |
|
1428 |
+ sizes <- rowSums(x$pops.by.time[, -1, drop = FALSE]) |
|
1429 |
+ candidates <- which(sizes >= size) |
|
1430 |
+ if(length(candidates) == 0) { |
|
1431 |
+ warning(paste("Pop size never >= requested size.", |
|
1432 |
+ "Thus, there is nothing to sample. You will get NAs")) |
|
1433 |
+ return(-99) |
|
1434 |
+ } else { |
|
1435 |
+ return(candidates[1]) |
|
1436 |
+ } |
|
1437 |
+} |
|
1438 |
+ |
|
1392 | 1439 |
|
1393 |
-get.the.time.for.sample <- function(tmp, timeSample) { |
|
1394 |
- if(timeSample == "last") { |
|
1440 |
+get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) { |
|
1441 |
+ if( !is.null(popSizeSample) && (popSizeSample >= 0) ) { |
|
1442 |
+ the.time <- closest_time(tmp, popSizeSample) |
|
1443 |
+ } else if(timeSample == "last") { |
|
1395 | 1444 |
if(tmp$TotalPopSize == 0) { |
1396 | 1445 |
warning(paste("Final population size is 0.", |
1397 | 1446 |
"Thus, there is nothing to sample with", |
1398 | 1447 |
"sampling last. You will get NAs")) |
1399 | 1448 |
the.time <- -99 |
1400 | 1449 |
} else { |
1401 |
- the.time <- nrow(tmp$pops.by.time) |
|
1402 |
- } |
|
1450 |
+ the.time <- nrow(tmp$pops.by.time) |
|
1451 |
+ } |
|
1403 | 1452 |
} else if (timeSample %in% c("uniform", "unif")) { |
1404 | 1453 |
candidate.time <- which(tmp$PerSampleStats[, 4] > 0) |
1405 | 1454 |
|
... | ... |
@@ -1416,7 +1465,7 @@ get.the.time.for.sample <- function(tmp, timeSample) { |
1416 | 1465 |
} else { |
1417 | 1466 |
the.time <- sample(candidate.time, 1) |
1418 | 1467 |
} |
1419 |
- } else { |
|
1468 |
+ } else { |
|
1420 | 1469 |
stop("Unknown timeSample option") |
1421 | 1470 |
} |
1422 | 1471 |
return(the.time) |
... | ... |
@@ -1424,8 +1473,8 @@ get.the.time.for.sample <- function(tmp, timeSample) { |
1424 | 1473 |
|
1425 | 1474 |
|
1426 | 1475 |
get.mut.vector <- function(x, timeSample, typeSample, |
1427 |
- thresholdWhole) { |
|
1428 |
- the.time <- get.the.time.for.sample(x, timeSample) |
|
1476 |
+ thresholdWhole, popSizeSample) { |
|
1477 |
+ the.time <- get.the.time.for.sample(x, timeSample, popSizeSample) |
|
1429 | 1478 |
if(the.time < 0) { |
1430 | 1479 |
return(rep(NA, nrow(x$Genotypes))) |
1431 | 1480 |
} |
... | ... |
@@ -1441,10 +1490,10 @@ get.mut.vector <- function(x, timeSample, typeSample, |
1441 | 1490 |
x$Genotypes)/popSize) >= thresholdWhole) ) |
1442 | 1491 |
} else if (typeSample %in% c("singleCell", "single")) { |
1443 | 1492 |
|
1444 |
- return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)]) |
|
1445 |
- } else { |
|
1446 |
- stop("Unknown typeSample option") |
|
1447 |
- } |
|
1493 |
+ return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)]) |
|
1494 |
+ } else { |
|
1495 |
+ stop("Unknown typeSample option") |
|
1496 |
+ } |
|
1448 | 1497 |
} |
1449 | 1498 |
|
1450 | 1499 |
|
... | ... |
@@ -1507,9 +1556,15 @@ oncoSimul.internal <- function(poset, ## restrict.table, |
1507 | 1556 |
restrict.table <- poset.to.restrictTable(poset) |
1508 | 1557 |
numDrivers <- nrow(restrict.table) |
1509 | 1558 |
numGenes <- (numDrivers + numPassengers) |
1510 |
- |
|
1559 |
+ if(numGenes < 2) |
|
1560 |
+ stop("There must be at least two genes (loci) in the fitness effects.", |
|
1561 |
+ "If you only care about a degenerate case with just one,", |
|
1562 |
+ "you can enter a second gene", |
|
1563 |
+ "with fitness effect of zero.") |
|
1511 | 1564 |
if(numGenes > 64) |
1512 |
- stop("Largest possible number of genes is 64") |
|
1565 |
+ stop("Largest possible number of genes (loci) is 64 for version 1.", |
|
1566 |
+ "You are strongly encouraged to use the new specification", |
|
1567 |
+ "as in version 2.") |
|
1513 | 1568 |
|
1514 | 1569 |
## These can never be set by the user |
1515 | 1570 |
## if(initSize_species < 10) { |
... | ... |
@@ -1567,8 +1622,8 @@ oncoSimul.internal <- function(poset, ## restrict.table, |
1567 | 1622 |
BNB_Algo5(restrictTable = rtC, |
1568 | 1623 |
numDrivers = numDrivers, |
1569 | 1624 |
numGenes = numGenes, |
1570 |
- typeCBN_ = typeCBN, |
|
1571 |
- birthRate = birth, |
|
1625 |
+ typeCBN_= typeCBN, |
|
1626 |
+ ## birthRate = birth, |
|
1572 | 1627 |
s = s, |
1573 | 1628 |
death = death, |
1574 | 1629 |
mu = mu, |
... | ... |
@@ -1582,16 +1637,15 @@ oncoSimul.internal <- function(poset, ## restrict.table, |
1582 | 1637 |
verbosity = verbosity, |
1583 | 1638 |
speciesFS = speciesFS, |
1584 | 1639 |
ratioForce = ratioForce, |
1585 |
- typeFitness_= typeFitness, |
|
1640 |
+ typeFitness_ = typeFitness, |
|
1586 | 1641 |
maxram = max.memory, |
1587 | 1642 |
mutationPropGrowth = as.integer(mutationPropGrowth), |
1588 | 1643 |
initMutant = initMutant, |
1589 | 1644 |
maxWallTime = max.wall.time, |
1590 | 1645 |
keepEvery = keepEvery, |
1591 |
- alpha = alpha, |
|
1646 |
+ # alpha = alpha, |
|
1592 | 1647 |
sh = sh, |
1593 | 1648 |
K = K, |
1594 |
- # end = TimeEvery# , |
|
1595 | 1649 |
detectionDrivers = detectionDrivers, |
1596 | 1650 |
onlyCancer = onlyCancer, |
1597 | 1651 |
errorHitWallTime = errorHitWallTime, |
... | ... |
@@ -1600,7 +1654,7 @@ oncoSimul.internal <- function(poset, ## restrict.table, |
1600 | 1654 |
minDetectDrvCloneSz = minDetectDrvCloneSz, |
1601 | 1655 |
extraTime = extraTime |
1602 | 1656 |
), |
1603 |
- NumDrivers = numDrivers |
|
1657 |
+ NumDrivers = numDrivers |
|
1604 | 1658 |
)) |
1605 | 1659 |
|
1606 | 1660 |
} |
... | ... |
@@ -1,19 +1,24 @@ |
1 | 1 |
# This file was generated by Rcpp::compileAttributes |
2 | 2 |
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
3 | 3 |
|
4 |
-nr_BNB_Algo5 <- function(rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, alpha, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog) { |
|
5 |
- .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, alpha, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog) |
|
4 |
+nr_BNB_Algo5 <- function(rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_) { |
|
5 |
+ .Call('OncoSimulR_nr_BNB_Algo5', PACKAGE = 'OncoSimulR', rFE, mu, death, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant_, maxWallTime, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, MMUEF, full2mutator_) |
|
6 | 6 |
} |
7 | 7 |
|
8 |
-BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, birthRate, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, alpha, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) { |
|
9 |
- .Call('OncoSimulR_BNB_Algo5', PACKAGE = 'OncoSimulR', restrictTable, numDrivers, numGenes, typeCBN_, birthRate, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, alpha, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) |
|
8 |
+BNB_Algo5 <- function(restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) { |
|
9 |
+ .Call('OncoSimulR_BNB_Algo5', PACKAGE = 'OncoSimulR', restrictTable, numDrivers, numGenes, typeCBN_, s, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSp, initIt, seed, verbosity, speciesFS, ratioForce, typeFitness_, maxram, mutationPropGrowth, initMutant, maxWallTime, keepEvery, sh, K, detectionDrivers, onlyCancer, errorHitWallTime, maxNumTries, errorHitMaxTries, minDetectDrvCloneSz, extraTime) |
|
10 | 10 |
} |
11 | 11 |
|
12 |
-## readFitnessEffects <- function(rFE, echo) { |
|
13 |
-## invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo)) |
|
14 |
-## } |
|
12 |
+readFitnessEffects <- function(rFE, echo) { |
|
13 |
+ invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo)) |
|
14 |
+} |
|
15 |
+ |
|
16 |
+evalRGenotype <- function(rG, rFE, verbose, prodNeg, calledBy_) { |
|
17 |
+ .Call('OncoSimulR_evalRGenotype', PACKAGE = 'OncoSimulR', rG, rFE, verbose, prodNeg, calledBy_) |
|
18 |
+} |
|
15 | 19 |
|
16 |
-evalRGenotype <- function(rG, rFE, verbose, prodNeg) { |
|
17 |
- .Call('OncoSimulR_evalRGenotype', PACKAGE = 'OncoSimulR', rG, rFE, verbose, prodNeg) |
|
20 |
+evalRGenotypeAndMut <- function(rG, rFE, muEF, full2mutator_, verbose, prodNeg) { |
|
21 |
+ .Call('OncoSimulR_evalRGenotypeAndMut', PACKAGE = 'OncoSimulR', rG, rFE, muEF, full2mutator_, verbose, prodNeg) |
|
18 | 22 |
} |
19 | 23 |
|
24 |
+ |
20 | 25 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,398 @@ |
1 |
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte |
|
2 |
+ |
|
3 |
+## This program is free software: you can redistribute it and/or modify |
|
4 |
+## it under the terms of the GNU General Public License as published by |
|
5 |
+## the Free Software Foundation, either version 3 of the License, or |
|
6 |
+## (at your option) any later version. |
|
7 |
+ |
|
8 |
+## This program is distributed in the hope that it will be useful, |
|
9 |
+## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+## GNU General Public License for more details. |
|
12 |
+ |
|
13 |
+## You should have received a copy of the GNU General Public License |
|
14 |
+## along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+ |
|
16 |
+## plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
|
17 |
+## plot.genotype_fitness_matrix <- plotFitnessLandscape |
|
18 |
+ |
|
19 |
+## FIXME: show only accessible paths? |
|
20 |
+ |
|
21 |
+plotFitnessLandscape <- function(x, show_labels = TRUE, |
|
22 |
+ col = c("green4", "red", "yellow"), |
|
23 |
+ lty = c(1, 2, 3), use_ggrepel = FALSE, |
|
24 |
+ log = FALSE, max_num_genotypes = 2000, |
|
25 |
+ ...) { |
|
26 |
+ |
|
27 |
+ ## FIXME future: |
|
28 |
+ |
|
29 |
+ |
|
30 |
+ |
|
31 |
+ ## - Allow passing order effects. Change "allGenotypes_to_matrix" |
|
32 |
+ ## below? Probably not, as we cannot put order effects as a |
|
33 |
+ ## matrix. Do something else, like allow only order effects if from |
|
34 |
+ ## and allFitnessEffects object. |
|
35 |
+ |
|
36 |
+ ## - Allow selection only some genotypes or alleles |
|
37 |
+ |
|
38 |
+ ## Allow selecting only paths that involve a particular genotype in |
|
39 |
+ ## adjacency matrix of genotype i go at row i and column i. Follow back |
|
40 |
+ ## all entries in row i, and their previous, and forward all column i. |
|
41 |
+ |
|
42 |
+ |
|
43 |
+ ## gfm: genotype fitness matrix |
|
44 |
+ ## afe: all fitness effects |
|
45 |
+ |
|
46 |
+ ## We seem to go back and forth, but we need to ensure the afe and the |
|
47 |
+ ## gfm are coherent. Since there are many ways to make them fail |
|
48 |
+ ## (e.g., a user passing the wrong order, or incomplete matrices, etc) |
|
49 |
+ ## we do it the following way. |
|
50 |
+ |
|
51 |
+ ## Yes, we set WT to 1 as we call from_genotype_fitness on |
|
52 |
+ ## matrices and genotype_fitness_matrix objects. |
|
53 |
+ ## We do that because we call evalAllGenotypes to |
|
54 |
+ ## get the string representation, etc. And this is for use |
|
55 |
+ ## with OncoSimul. |
|
56 |
+ |
|
57 |
+ ## FIXME: passing fitness as a 2 column data frame |
|
58 |
+ |
|
59 |
+ if( (inherits(x, "genotype_fitness_matrix")) || |
|
60 |
+ ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) { |
|
61 |
+ ## Why this? We go back and forth twice. We need both things. We |
|
62 |
+ ## could construct the afe below by appropriately pasting the |
|
63 |
+ ## columns names |
|
64 |
+ afe <- evalAllGenotypes(allFitnessEffects( |
|
65 |
+ epistasis = from_genotype_fitness(x)), |
|
66 |
+ order = FALSE, addwt = TRUE, max = max_num_genotypes) |
|
67 |
+ ## Might not be needed with the proper gfm object (so gmf <- x) |
|
68 |
+ ## but is needed if arbitrary matrices. |
|
69 |
+ gfm <- allGenotypes_to_matrix(afe) |
|
70 |
+ } else if(inherits(x, "fitnessEffects")) { |
|
71 |
+ if(!is.null(x$orderEffects) ) |
|
72 |
+ stop("We cannot yet deal with order effects") |
|
73 |
+ afe <- evalAllGenotypes(x, |
|
74 |
+ order = FALSE, |
|
75 |
+ addwt = TRUE, max = max_num_genotypes) |
|
76 |
+ gfm <- allGenotypes_to_matrix(afe) |
|
77 |
+ } else if( (inherits(x, "evalAllGenotypes")) || |
|
78 |
+ (inherits(x, "evalAllGenotypesMut"))) { |
|
79 |
+ if(any(grepl(">", x[, 1], fixed = TRUE))) |
|
80 |
+ stop("We cannot deal with order effects yet.") |
|
81 |
+ x <- x[, c(1, 2)] |
|
82 |
+ if(x[1, "Genotype"] != "WT") { |
|
83 |
+ ## Yes, because we expect this present below |
|
84 |
+ x <- rbind(data.frame(Genotype = "WT", |
|
85 |
+ Fitness = 1, |
|
86 |
+ stringsAsFactors = FALSE), |
|
87 |
+ x) |
|
88 |
+ } |
|
89 |
+ afe <- x |
|
90 |
+ ## in case we pass an evalAllgenotypesfitandmut |
|
91 |
+ gfm <- allGenotypes_to_matrix(afe) |
|
92 |
+ } else if(is.data.frame(x)) { |
|
93 |
+ ## Assume a two-column data frame of genotypes as character |
|
94 |
+ ## vectors and fitness |
|
95 |
+ if(colnames(x)[2] != "Fitness") |
|
96 |
+ stop("We cannot guess what you are passing here") |
|
97 |
+ afe <- evalAllGenotypes(allFitnessEffects(genotFitness = x), |
|
98 |
+ order = FALSE, addwt = TRUE, |
|
99 |
+ max = max_num_genotypes) |
|
100 |
+ gfm <- allGenotypes_to_matrix(afe) |
|
101 |
+ } else { |
|
102 |
+ stop("We cannot guess what you are passing here") |
|
103 |
+ } |
|
104 |
+ |
|
105 |
+ |
|
106 |
+ |
|
107 |
+ ## if(inherits(x, "genotype_fitness_matrix")) { |
|
108 |
+ ## ## Why this? We go back and forth twice. We need both things. We |
|
109 |
+ ## ## could construct the afe below by appropriately pasting the |
|
110 |
+ ## ## columns names |
|
111 |
+ ## afe <- evalAllGenotypes(allFitnessEffects( |
|
112 |
+ ## epistasis = from_genotype_fitness(x)), |
|
113 |
+ ## order = FALSE, addwt = TRUE, max = 2000) |
|
114 |
+ ## gfm <- allGenotypes_to_matrix(afe) ## should not be needed? gfm <- x |
|
115 |
+ ## } else if(inherits(x, "fitnessEffects")) { |
|
116 |
+ ## if(!is.null(x$orderEffects) ) |
|
117 |
+ ## stop("We cannot yet deal with order effects") |
|
118 |
+ ## afe <- evalAllGenotypes(x, |
|
119 |
+ ## order = FALSE, |
|
120 |
+ ## addwt = TRUE, max = 2000) |
|
121 |
+ ## gfm <- allGenotypes_to_matrix(x) |
|
122 |
+ ## } else { |
|
123 |
+ ## lc <- ncol(x) |
|
124 |
+ ## ## detect an appropriately formatted matrix |
|
125 |
+ ## if( (is.data.frame(x) || is.matrix(x)) ) { |
|
126 |
+ ## if(ncol(x) > 2) { |
|
127 |
+ ## ## We cannot be sure all genotypes are present |
|
128 |
+ ## ## but that is not our business? |
|
129 |
+ ## ## colnames(x)[lc] <- "Fitness" |
|
130 |
+ ## ## So this must be a matrix |
|
131 |
+ ## afe <- evalAllGenotypes(allFitnessEffects( |
|
132 |
+ ## epistasis = from_genotype_fitness(x)), |
|
133 |
+ ## order = FALSE, addwt = TRUE, max = 2000) |
|
134 |
+ ## gfm <- allGenotypes_to_matrix(afe) |
|
135 |
+ ## } else { |
|
136 |
+ ## ## This must be a data frame |
|
137 |
+ ## if(!is.data.frame(x)) |
|
138 |
+ ## stop("How are you passing a matrix here?") |
|
139 |
+ ## if(colnames(x)[lc] != "Fitness") |
|
140 |
+ ## stop("We cannot guess what you are passing here") |
|
141 |
+ ## ## We are passed an allFitnessEffects output |
|
142 |
+ ## ## Will be simpler later as we will know immediately |
|
143 |
+ ## if(x[1, "Genotype"] != "WT") { |
|
144 |
+ ## ## Yes, because we expect this present below |
|
145 |
+ ## x <- rbind(data.frame(Genotype = "WT", |
|
146 |
+ ## Fitness = 1, |
|
147 |
+ ## stringsAsFactors = FALSE), |
|
148 |
+ ## x) |
|
149 |
+ ## } |
|
150 |
+ ## x <- afe |
|
151 |
+ ## gfm <- allGenotypes_to_matrix(afe) |
|
152 |
+ ## } |
|
153 |
+ ## } else { |
|
154 |
+ ## stop("This is not allowed input") |
|
155 |
+ ## } |
|
156 |
+ ## } |
|
157 |
+ |
|
158 |
+ mutated <- rowSums(gfm[, -ncol(gfm)]) |
|
159 |
+ gaj <- genot_to_adj_mat(gfm) |
|
160 |
+ vv <- which(!is.na(gaj), arr.ind = TRUE) |
|
161 |
+ |
|
162 |
+ ## plot(x = mutated, y = e1$Fitness, ylab = "Fitness", |
|
163 |
+ ## xlab = "", type = "n", axes = FALSE) |
|
164 |
+ ## box() |
|
165 |
+ ## axis(2) |
|
166 |
+ ## text(x = mutated, y = x$Fitness, labels = x$Genotype) |
|
167 |
+ |
|
168 |
+ ## The R CMD CHEKC notes about no visible binding for global variable |
|
169 |
+ |
|
170 |
+ x_from <- y_from <- x_to <- y_to <- Change <- muts <- |
|
171 |
+ label <- fitness <- Type <- NULL |
|
172 |
+ |
|
173 |
+ |
|
174 |
+ dd <- data.frame(muts = mutated, |
|
175 |
+ fitness = afe$Fitness, |
|
176 |
+ label = afe$Genotype) |
|
177 |
+ cl <- gaj[vv] |
|
178 |
+ sg <- data.frame(x_from = mutated[vv[, 1]], |
|
179 |
+ y_from = afe$Fitness[vv[, 1]], |
|
180 |
+ x_to = mutated[vv[, 2]], |
|
181 |
+ y_to = afe$Fitness[vv[, 2]], |
|
182 |
+ Change = factor(ifelse(cl == 0, "Neutral", |
|
183 |
+ ifelse(cl > 0, "Gain", "Loss")))) |
|
184 |
+ ## From http://stackoverflow.com/a/17257422 |
|
185 |
+ number_ticks <- function(n) {function(limits) pretty(limits, n)} |
|
186 |
+ |
|
187 |
+ p <- ggplot() + |
|
188 |
+ xlab("") + ylab("Fitness") + |
|
189 |
+ theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), |
|
190 |
+ panel.grid.minor.x = element_blank()) + |
|
191 |
+ geom_segment(data = sg, |
|
192 |
+ aes(x = x_from, y = y_from, |
|
193 |
+ xend = x_to, yend = y_to, |
|
194 |
+ colour = Change, |
|
195 |
+ lty = Change)) + scale_color_manual("Change", |
|
196 |
+ values = col) |
|
197 |
+ if(log) { |
|
198 |
+ p <- p + scale_y_log10(breaks = number_ticks(5)) |
|
199 |
+ } else { |
|
200 |
+ p <- p + scale_y_continuous(breaks = number_ticks(5)) |
|
201 |
+ } |
|
202 |
+ peaks_valleys <- peak_valley(gaj) |
|
203 |
+ maxF <- peaks_valleys$peak |
|
204 |
+ minF <- peaks_valleys$valley |
|
205 |
+ |
|
206 |
+ ddMM <- dd |
|
207 |
+ ddMM$Type <- NA |
|
208 |
+ ddMM$Type[minF] <- "Sink" |
|
209 |
+ ddMM$Type[maxF] <- "Peak" |
|
210 |
+ ddMM <- ddMM[ddMM$Type %in% c("Sink", "Peak"), ] |
|
211 |
+ |
|
212 |
+ if(show_labels && use_ggrepel) { |
|
213 |
+ p <- p + geom_text_repel(data = dd[-c(minF, maxF), ], |
|
214 |
+ aes(x = muts, y = fitness, label = label)) + |
|
215 |
+ geom_label_repel(data = ddMM, |
|
216 |
+ aes(x = muts, y = fitness, label = label, fill = Type), |
|
217 |
+ color = "black", |
|
218 |
+ fontface = "bold") |
|
219 |
+ ## geom_label_repel(data = dd[maxF, ], aes(x = muts, y = fitness, label = label), |
|
220 |
+ ## color = "black", |
|
221 |
+ ## fontface = "bold", fill = col[1]) + |
|
222 |
+ ## geom_label_repel(data = dd[minF, ], aes(x = muts, y = fitness, label = label), |
|
223 |
+ ## color = "black", |
|
224 |
+ ## fontface = "bold", fill = col[2]) |
|
225 |
+ |
|
226 |
+ } |
|
227 |
+ else { |
|
228 |
+ if(!show_labels) { |
|
229 |
+ ## Use same code, but make label empty |
|
230 |
+ ddMM$label <- "" |
|
231 |
+ } |
|
232 |
+ |
|
233 |
+ p <- p + geom_text(data = dd[-c(minF, maxF), ], |
|
234 |
+ aes(x = muts, y = fitness, label = label), |
|
235 |
+ vjust = -0.2, hjust = "inward") + |
|
236 |
+ geom_label(data = ddMM, |
|
237 |
+ aes(x = muts, y = fitness, label = label, fill = Type), |
|
238 |
+ vjust = -0.2, hjust = "inward", color = "black", |
|
239 |
+ fontface = "bold") |
|
240 |
+ ## geom_label(data = dd[maxF, ], aes(x = muts, y = fitness, label = label), |
|
241 |
+ ## vjust = -0.2, hjust = "inward", color = "black", |
|
242 |
+ ## fontface = "bold", fill = col[1]) + |
|
243 |
+ ## geom_label(data = dd[minF, ], aes(x = muts, y = fitness, label = label), |
|
244 |
+ ## vjust = -0.2, hjust = "inward", color = "black", |
|
245 |
+ ## fontface = "bold", fill = col[2]) |
|
246 |
+ } |
|
247 |
+ p <- p + scale_fill_manual("Local\nmax/min", values = col) |
|
248 |
+ p |
|
249 |
+} |
|
250 |
+ |
|
251 |
+ |
|
252 |
+plot.evalAllGenotypes <- plot.evalAllGenotypesMut <- |
|
253 |
+ plot.genotype_fitness_matrix <- |
|
254 |
+ plot_fitness_landscape <- |
|
255 |
+ plotFitnessLandscape |
|
256 |
+ |
|
257 |
+ |
|
258 |
+###################################################################### |
|
259 |
+###################################################################### |
|
260 |
+###################################################################### |
|
261 |
+#### |
|
262 |
+#### Internal functions |
|
263 |
+#### |
|
264 |
+###################################################################### |
|
265 |
+###################################################################### |
|
266 |
+###################################################################### |
|
267 |
+ |
|
268 |
+ |
|
269 |
+ |
|
270 |
+generate_matrix_genotypes <- function(g) { |
|
271 |
+ ## FIXME future: do this for order too? Only if rfitness for order. |
|
272 |
+ ## Given a number of genes, generate all possible genotypes. |
|
273 |
+ |
|
274 |
+ if(g > 20) stop("This would generate more than one million genotypes") |
|
275 |
+ ## g: number of genes |
|
276 |
+ f1 <- function(n) { |
|
277 |
+ lapply(seq.int(n), function(x) combinations(n = n, r = x)) |
|
278 |
+ } |
|
279 |
+ genotNums <- f1(g) |
|
280 |
+ list.of.vectors <- function(y) { |
|
281 |
+ ## there's got to be a simpler way |
|
282 |
+ lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), |
|
283 |
+ recursive = FALSE), |
|
284 |
+ function(m) m[[1]]) |
|
285 |
+ } |
|
286 |
+ genotNums <- list.of.vectors(genotNums) |
|
287 |
+ |
|
288 |
+ v <- rep(0, g) |
|
289 |
+ mat <- matrix(unlist(lapply(genotNums, |
|
290 |
+ function(x) { |
|
291 |
+ v[x] <- 1 |
|
292 |
+ return(v) |
|
293 |
+ } |
|
294 |
+ )), ncol = g, byrow = TRUE) |
|
295 |
+ mat <- rbind(rep(0, g), mat) |
|
296 |
+ colnames(mat) <- LETTERS[1:g] |
|
297 |
+ return(mat) |
|
298 |
+} |
|
299 |
+ |
|
300 |
+ |
|
301 |
+ |
|
302 |
+genot_to_adj_mat <- function(x) { |
|
303 |
+ ## FIXME this can take about 23% of the time of the ggplot call. |
|
304 |
+ ## But them, we are quickly constructing a 2000*2000 matrix |
|
305 |
+ ## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a |
|
306 |
+ ## directed adjacency matrix between genotypes connected (i.e., those |
|
307 |
+ ## that differ by gaining one mutation) with value being the |
|
308 |
+ ## difference in fitness between destination and origin |
|
309 |
+ |
|
310 |
+ ## Make sure sorted, so ancestors always before descendants |
|
311 |
+ rs0 <- rowSums(x[, -ncol(x)]) |
|
312 |
+ x <- x[order(rs0), ] |
|
313 |
+ rm(rs0) |
|
314 |
+ |
|
315 |
+ y <- x[, -ncol(x)] |
|
316 |
+ f <- x[, ncol(x)] |
|
317 |
+ rs <- rowSums(y) |
|
318 |
+ |
|
319 |
+ ## Move this to C++? |
|
320 |
+ adm <- matrix(NA, nrow = length(rs), ncol = length(rs)) |
|
321 |
+ for(i in 1:length(rs)) { ## i is the current genotype |
|
322 |
+ candidates <- which(rs == (rs[i] + 1)) |
|
323 |
+ for(j in candidates) { |
|
324 |
+ sumdiff <- sum(abs(y[j, ] - y[i, ])) |
|
325 |
+ ## if(sumdiff < 0) stop("eh?") |
|
326 |
+ if(sumdiff == 1) adm[i, j] <- (f[j] - f[i]) |
|
327 |
+ } |
|
328 |
+ } |
|
329 |
+ return(adm) |
|
330 |
+} |
|
331 |
+ |
|
332 |
+ |
|
333 |
+peak_valley <- function(x) { |
|
334 |
+ ## FIXME: when there are no identical entries, all this |
|
335 |
+ ## can be simplified a lot. Actually, there could be a way |
|
336 |
+ ## to only use the slow code on paths with 0. |
|
337 |
+ ## But this does not seem to be the CPU hog. |
|
338 |
+ ## x is an adjacency matrix where i,j is fitness_j - fitness_i. Thus, |
|
339 |
+ ## negative values means j has less fitness than the incoming. And if |
|
340 |
+ ## all not NA entries of a column are negative, then column j is is |
|
341 |
+ ## sink candidate |
|
342 |
+ ## Thus we locate local maxima and minima |
|
343 |
+ |
|
344 |
+ ## Some of this complicated as we need to detect paths like -3, 0, 0, |
|
345 |
+ ## 3. The -3 and the two 0 are local minima with identical values. |
|
346 |
+ |
|
347 |
+ ## We assume crucially that ancestors always have smaller indexes in |
|
348 |
+ ## the matrix than descendants. |
|
349 |
+ |
|
350 |
+ ## Valleys |
|
351 |
+ bad_back <- vector("integer", nrow(x)) |
|
352 |
+ for(i in ncol(x):1) { |
|
353 |
+ if(any(x[i, ] < 0, na.rm = TRUE) || bad_back[i]) { |
|
354 |
+ ## this node is bad. Any ancestor with fitness >= is bad |
|
355 |
+ bad_back[i] <- 1 |
|
356 |
+ reach_b <- which(x[, i] <= 0) |
|
357 |
+ bad_back[reach_b] <- 1 |
|
358 |
+ } |
|
359 |
+ } |
|
360 |
+ bad_fwd <- vector("integer", nrow(x)) |
|
361 |
+ for(i in 1:nrow(x)) { |
|
362 |
+ if( any(x[, i] > 0, na.rm = TRUE) || bad_fwd[i] ) { |
|
363 |
+ ## this node is bad. Any descendant with fitness >= is bad |
|
364 |
+ bad_fwd[i] <- 1 |
|
365 |
+ reach_f <- which(x[i, ] >= 0) |
|
366 |
+ bad_fwd[reach_f] <- 1 |
|
367 |
+ } |
|
368 |
+ } |
|
369 |
+ bad <- union(which(bad_back == 1), which(bad_fwd == 1)) |
|
370 |
+ candidate <- which(apply(x, 2, function(z) all(z <= 0, na.rm = TRUE))) |
|
371 |
+ valley <- setdiff(candidate, bad) |
|
372 |
+ |
|
373 |
+ null <- suppressWarnings(try({rm(bad_back, bad_fwd, reach_b, reach_f, candidate)}, |
|
374 |
+ silent = TRUE)) |
|
375 |
+ ## Peaks |
|
376 |
+ bad_back <- vector("integer", nrow(x)) |
|
377 |
+ for(i in ncol(x):1) { |
|
378 |
+ if(any(x[i, ] > 0, na.rm = TRUE) || bad_back[i]) { |
|
379 |
+ ## this node is bad. Any ancestor with fitness >= is bad |
|
380 |
+ bad_back[i] <- 1 |
|
381 |
+ reach_b <- which(x[, i] >= 0) |
|
382 |
+ bad_back[reach_b] <- 1 |
|
383 |
+ } |
|
384 |
+ } |
|
385 |
+ bad_fwd <- vector("integer", nrow(x)) |
|
386 |
+ for(i in 1:nrow(x)) { |
|
387 |
+ if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) { |
|
388 |
+ ## this node is bad. Any descendant with fitness >= is bad |
|
389 |
+ bad_fwd[i] <- 1 |
|
390 |
+ reach_f <- which(x[i, ] <= 0) |
|
391 |
+ bad_fwd[reach_f] <- 1 |
|
392 |
+ } |
|
393 |
+ } |
|
394 |
+ bad <- union(which(bad_back == 1), which(bad_fwd == 1)) |
|
395 |
+ candidate <- which(apply(x, 2, function(z) all(z >= 0, na.rm = TRUE))) |
|
396 |
+ peak <- setdiff(candidate, bad) |
|
397 |
+ return(list(peak = peak, valley = valley)) |
|
398 |
+} |
0 | 399 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,180 @@ |
1 |
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte |
|
2 |
+ |
|
3 |
+## This program is free software: you can redistribute it and/or modify |
|
4 |
+## it under the terms of the GNU General Public License as published by |
|
5 |
+## the Free Software Foundation, either version 3 of the License, or |
|
6 |
+## (at your option) any later version. |
|
7 |
+ |
|
8 |
+## This program is distributed in the hope that it will be useful, |
|
9 |
+## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+## GNU General Public License for more details. |
|
12 |
+ |
|
13 |
+## You should have received a copy of the GNU General Public License |
|
14 |
+## along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+ |
|
16 |
+ |
|
17 |
+ |
|
18 |
+## Functions that allow passing a matrix or data frame of mappings |
|
19 |
+## genotype -> fitness so this is taken as input in fitnessEffects. |
|
20 |
+ |
|
21 |
+ |
|
22 |
+from_genotype_fitness <- function(x) { |
|
23 |
+ ## Would break with output from allFitnessEffects and |
|
24 |
+ ## output from allGenotypeAndMut |
|
25 |
+ if(ncol(x) > 2) { |
|
26 |
+ ## Of course, can ONLY work with epistastis, NOT order |
|
27 |
+ return(genot_fitness_to_epistasis(x)) |
|
28 |
+ } else { |
|
29 |
+ ## Make sure no factors |
|
30 |
+ if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1]) |
|
31 |
+ omarker <- any(grepl(">", x[, 1], fixed = TRUE)) |
|
32 |
+ emarker <- any(grepl(",", x[, 1], fixed = TRUE)) |
|
33 |
+ nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE)) |
|
34 |
+ ## if(omarker && emarker) stop("Specify only epistasis or order, not both.") |
|
35 |
+ if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.") |
|
36 |
+ if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.") |
|
37 |
+ ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.") |
|
38 |
+ ## if(!omarker && !emarker) stop("You specified neither epistasis nor order") |
|
39 |
+ if(omarker) { |
|
40 |
+ ## do something. To be completed |
|
41 |
+ stop("This code not yet ready") |
|
42 |
+ ## You can pass to allFitnessEffects genotype -> fitness mappings that |
|
43 |
+ ## involve epistasis and order. But they must have different |
|
44 |
+ ## genes. Otherwise, it is not manageable. |
|
45 |
+ } |
|
46 |
+ if(emarker) { |
|
47 |
+ x <- x[, c(1, 2)] |
|
48 |
+ if(!all(colnames(x) == c("Genotype", "Fitness"))) { |
|
49 |
+ message("Column names of object not Genotype and Fitness.", |
|
50 |
+ " Renaming them assuming that is what you wanted") |
|
51 |
+ colnames(x) <- c("Genotype", "Fitness") |
|
52 |
+ } |
|
53 |
+ ## Yes, we need to do this to scale the fitness and put the "-" |
|
54 |
+ return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x))) |
|
55 |
+ } |
|
56 |
+ |
|
57 |
+ } |
|
58 |
+} |
|
59 |
+ |
|
60 |
+ |
|
61 |
+ |
|
62 |
+genot_fitness_to_epistasis <- function(x) { |
|
63 |
+ ## FIXME future: |
|
64 |
+ |
|
65 |
+ ## - Nope, order cannot be dealt with here. Not a matrix of 0 and 1. |
|
66 |
+ |
|
67 |
+ ## - modify "fitnessEffects" so we can take a component that is |
|
68 |
+ ## - "genot_fitness"; so this would never be exposed to the user |
|
69 |
+ |
|
70 |
+ |
|
71 |
+ ## Why we should not combine this specification with other terms? If |
|
72 |
+ ## you use this is because you say "this is the mapping genotype -> |
|
73 |
+ ## fitness. Period." so we should not combine other terms (or other |
|
74 |
+ ## terms that involve these genes) |
|
75 |
+ |
|
76 |
+ nr <- nrow(x) |
|
77 |
+ if(nr < (2^(ncol(x) - 1))) |
|
78 |
+ message("Number of genotypes less than 2^L.", |
|
79 |
+ " Missing genotype will be set to 1") |
|
80 |
+ ## This is specific if only epistasis, not order |
|
81 |
+ if(nr > (2^(ncol(x) - 1))) |
|
82 |
+ stop("Number of genotypes > 2^L. Repeated entries?") |
|
83 |
+ f <- x[, ncol(x)] |
|
84 |
+ ## Why should I stop? |
|
85 |
+ if(any(f < 0)) |
|
86 |
+ message("Negative fitnesses. Watch out if you divide by the wildtype") |
|
87 |
+ x <- x[, -ncol(x)] |
|
88 |
+ wt <- which(rowSums(x) == 0) |
|
89 |
+ fwt <- 1 |
|
90 |
+ if(length(wt) == 1) |
|
91 |
+ fwt <- f[wt] |
|
92 |
+ if(!isTRUE(all.equal(fwt, 1))) { |
|
93 |
+ message("Fitness of wildtype != 1. ", |
|
94 |
+ "Dividing all fitnesses by fitness of wildtype.") |
|
95 |
+ f <- f/fwt |
|
96 |
+ } |
|
97 |
+ |
|
98 |
+ if(is.null(colnames(x))) { |
|
99 |
+ if(ncol(x) <= 26) |
|
100 |
+ colnames(x) <- LETTERS[1:ncol(x)] |
|
101 |
+ else |
|
102 |
+ colnames(x) <- paste0("G", seq.int(ncol(x))) |
|
103 |
+ } |
|
104 |
+ cn <- colnames(x) |
|
105 |
+ x2 <- matrix("", nrow = nrow(x), ncol = ncol(x)) |
|
106 |
+ x2[x == 0] <- "-" |
|
107 |
+ epin <- apply(x2, 1, function(z) paste(paste0(z, cn), collapse = ":")) |
|
108 |
+ if(anyDuplicated(epin)) |
|
109 |
+ stop("Non unique names") |
|
110 |
+ s <- f - 1 |
|
111 |
+ names(s) <- epin |
|
112 |
+ return(s) |
|
113 |
+} |
|
114 |
+ |
|
115 |
+ |
|
116 |
+ |
|
117 |
+ |
|
118 |
+allGenotypes_to_matrix <- function(x) { |
|
119 |
+ ## Makes no sense to allow passing order: the matrix would have |
|
120 |
+ ## repeated rows. A > B and B > A both have exactly A and B |
|
121 |
+ |
|
122 |
+ ## Take output of evalAllGenotypes or identical data frame and return |
|
123 |
+ ## a matrix with 0/1 in a column for each gene and a final column of |
|
124 |
+ ## Fitness |
|
125 |
+ |
|
126 |
+ ## A WT can be specified with string "WT" |
|
127 |
+ anywt <- which(x[, 1] == "WT") |
|
128 |
+ if(length(anywt) > 1) stop("More than 1 WT") |
|
129 |
+ if(length(anywt) == 1) { |
|
130 |
+ fwt <- x[anywt, 2] |
|
131 |
+ x <- x[-anywt, ] |
|
132 |
+ } else { |
|
133 |
+ fwt <- 1 |
|
134 |
+ } |
|
135 |
+ splitted_genots <- lapply(x$Genotype, |
|
136 |
+ function(z) nice.vector.eo(z, ",")) |
|
137 |
+ |
|
138 |
+ all_genes <- unique(unlist(splitted_genots)) |
|
139 |
+ |
|
140 |
+ m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes)) |
|
141 |
+ the_match <- lapply(splitted_genots, |
|
142 |
+ function(z) match(z, all_genes)) |
|
143 |
+ ## A lot simpler with a loop |
|
144 |
+ for(i in 1:nrow(m)) { |
|
145 |
+ m[i, the_match[[i]]] <- 1 |
|
146 |
+ } |
|
147 |
+ m <- cbind(m, x[, 2]) |
|
148 |
+ colnames(m) <- c(all_genes, "Fitness") |
|
149 |
+ m <- rbind(c(rep(0, length(all_genes)), fwt), |
|
150 |
+ m) |
|
151 |
+ ## Ensure sorted |
|
152 |
+ m <- data.frame(m) |
|
153 |
+ rs <- rowSums(m[, -ncol(m)]) |
|
154 |
+ m <- m[order(rs), ] |
|
155 |
+ ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ] |
|
156 |
+ return(m) |
|
157 |
+} |
|
158 |
+ |
|
159 |
+ |
|
160 |
+## |
|
161 |
+ |
|
162 |
+## Example of Bozic issues |
|
163 |
+m1 <- cbind(c(0, 1), c(1, 0), c(2, 3)) |
|
164 |
+ |
|
165 |
+m2 <- cbind(c(1, 1), c(1, 0), c(2, 3)) |
|
166 |
+ |
|
167 |
+m3 <- data.frame(G = c("A, B", "A"), F = c(1, 2)) |
|
168 |
+ |
|
169 |
+m4 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 4)) |
|
170 |
+ |
|
171 |
+m5 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 0)) |
|
172 |
+ |
|
173 |
+m6 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2.5, 2, 0)) |
|
174 |
+ |
|
175 |
+ |
|
176 |
+ |
|
177 |
+## And no, it makes no sense to use any of this for mutator: in mutator I |
|
178 |
+## directly have the multiplication factor of each gene. Which is likely |
|
179 |
+## what people want anyway. Add it later if needed. by using a ratio |
|
180 |
+## instead of a "-" |
0 | 181 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,67 @@ |
1 |
+## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte |
|
2 |
+ |
|
3 |
+## This program is free software: you can redistribute it and/or modify |
|
4 |
+## it under the terms of the GNU General Public License as published by |
|
5 |
+## the Free Software Foundation, either version 3 of the License, or |
|
6 |
+## (at your option) any later version. |
|
7 |
+ |
|
8 |
+## This program is distributed in the hope that it will be useful, |
|
9 |
+## but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
11 |
+## GNU General Public License for more details. |
|
12 |
+ |
|
13 |
+## You should have received a copy of the GNU General Public License |
|
14 |
+## along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
15 |
+ |
|
16 |
+ |
|
17 |
+## Any mutator gene must have been in fitness, even if it has fitness |
|
18 |
+## effect of 0. Yes, because otherwise the mapping of gene names to |
|
19 |
+## numerical ids becomes a PITA. |
|
20 |
+ |
|
21 |
+ |
|
22 |
+allMutatorEffects <- function(epistasis = NULL, |
|
23 |
+ noIntGenes = NULL, |
|
24 |
+ geneToModule = NULL, |
|
25 |
+ keepInput = TRUE) { |
|
26 |
+ ## This is on purpose to prevent using a rT or orderEffects. Those are |
|
27 |
+ ## not tested to work with mutator. |
|
28 |
+ allFitnessORMutatorEffects( |
|
29 |
+ rT = NULL, |
|
30 |
+ epistasis = epistasis, |
|
31 |
+ orderEffects = NULL, |
|
32 |
+ noIntGenes = noIntGenes, |
|
33 |
+ geneToModule = geneToModule, |
|
34 |
+ drvNames = NULL, |
|
35 |
+ keepInput = keepInput, |
|
36 |
+ calledBy = "allMutatorEffects") |
|
37 |
+} |
|
38 |
+ |
|
39 |
+ |
|
40 |
+evalAllGenotypesMut <- function(mutatorEffects, |
|
41 |
+ max = 256, |
|
42 |
+ addwt = FALSE) { |
|
43 |
+ evalAllGenotypesORMut( |
|
44 |
+ fmEffects = mutatorEffects, |
|
45 |
+ order = FALSE, |
|
46 |
+ max = max, |
|
47 |
+ model = "", |
|
48 |
+ calledBy_= "evalGenotypeMut" |
|
49 |
+ ) |
|
50 |
+} |
|
51 |
+ |
|
52 |
+evalGenotypeMut <- function(genotype, mutatorEffects, |
|
53 |
+ verbose = FALSE, |
|
54 |
+ echo = FALSE) { |
|
55 |
+ if(inherits(mutatorEffects, "fitnessEffects")) |
|
56 |
+ stop("You are trying to get the mutator effects of a fitness specification. ", |
|
57 |
+ "You did not pass an object of class mutatorEffects.") |
|
58 |
+ evalGenotypeORMut(genotype = genotype, |
|
59 |
+ fmEffects = mutatorEffects, |
|
60 |
+ verbose = verbose, |
|
61 |
+ echo = echo, |
|
62 |
+ model = "" , |
|
63 |
+ calledBy_= "evalGenotypeMut" |
|
64 |
+ ) |
|
65 |
+ |
|
66 |
+} |
|
67 |
+ |
... | ... |
@@ -403,13 +403,20 @@ getGeneIDNum <- function(geneModule, geneNoInt, drv, sort = TRUE) { |
403 | 403 |
) |
404 | 404 |
} |
405 | 405 |
|
406 |
-allFitnessEffects <- function(rT = NULL, |
|
407 |
- epistasis = NULL, |
|
408 |
- orderEffects = NULL, |
|
409 |
- noIntGenes = NULL, |
|
410 |
- geneToModule = NULL, |
|
411 |
- drvNames = NULL, |
|
412 |
- keepInput = TRUE) { |
|
406 |
+ |
|
407 |
+ |
|
408 |
+allFitnessORMutatorEffects <- function(rT = NULL, |
|
409 |
+ epistasis = NULL, |
|
410 |
+ orderEffects = NULL, |
|
411 |
+ noIntGenes = NULL, |
|
412 |
+ geneToModule = NULL, |
|
413 |
+ drvNames = NULL, |
|
414 |
+ keepInput = TRUE, |
|
415 |
+ ## refFE = NULL, |
|
416 |
+ calledBy = NULL) { |
|
417 |
+ ## From allFitnessEffects. Generalized so we deal with Fitness |
|
418 |
+ ## and mutator. |
|
419 |
+ |
|
413 | 420 |
## restrictions: the usual rt |
414 | 421 |
|
415 | 422 |
## epistasis: as it says, with the ":" |
... | ... |
@@ -428,6 +435,16 @@ allFitnessEffects <- function(rT = NULL, |
428 | 435 |
## same in epistasis and order, but we would be splitting twice |
429 | 436 |
## (whereas for rT extracting the names is very simple). |
430 | 437 |
|
438 |
+ ## called appropriately? |
|
439 |
+ if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") )) |
|
440 |
+ stop("How did you call this function?. Bug.") |
|
441 |
+ |
|
442 |
+ if(calledBy == "allMutatorEffects") { |
|
443 |
+ ## very paranoid check |
|
444 |
+ if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames)) |
|
445 |
+ stop("allMutatorEffects called with forbidden arguments.", |
|
446 |
+ "Is this an attempt to subvert the function?") |
|
447 |
+ } |
|
431 | 448 |
|
432 | 449 |
rtNames <- NULL |
433 | 450 |
epiNames <- NULL |
... | ... |
@@ -448,7 +465,6 @@ allFitnessEffects <- function(rT = NULL, |
448 | 465 |
function(x) lapply(x$ids, |
449 | 466 |
function(z) strsplit(z, "^-"))))), |
450 | 467 |
"") |
451 |
- |
|
452 | 468 |
} else { |
453 | 469 |
long.epistasis <- list() |
454 | 470 |
} |
... | ... |
@@ -472,7 +488,7 @@ allFitnessEffects <- function(rT = NULL, |
472 | 488 |
"or order effects not in geneToModule")) |
473 | 489 |
} |
474 | 490 |
geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne) |
475 |
- |
|
491 |
+ |
|
476 | 492 |
idm <- unique(geneModule$ModuleNumID) |
477 | 493 |
names(idm) <- unique(geneModule$Module) |
478 | 494 |
|
... | ... |
@@ -529,16 +545,20 @@ allFitnessEffects <- function(rT = NULL, |
529 | 545 |
} else { |
530 | 546 |
geneNoInt <- data.frame() |
531 | 547 |
} |
548 |
+ |
|
532 | 549 |
if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) + |
533 | 550 |
nrow(geneNoInt)) == 0) |
534 | 551 |
stop("You have specified nothing!") |
535 | 552 |
|
536 |
- if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { |
|
537 |
- graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) |
|
553 |
+ if(calledBy == "allFitnessEffects") { |
|
554 |
+ if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { |
|
555 |
+ graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) |
|
556 |
+ } else { |
|
557 |
+ graphE <- NULL |
|
558 |
+ } |
|
538 | 559 |
} else { |
539 | 560 |
graphE <- NULL |
540 | 561 |
} |
541 |
- |
|
542 | 562 |
if(!is.null(drvNames)) { |
543 | 563 |
drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames)) |
544 | 564 |
## drivers should never be in the geneNoInt; Why!!!??? |
... | ... |
@@ -553,8 +573,11 @@ allFitnessEffects <- function(rT = NULL, |
553 | 573 |
## } |
554 | 574 |
## drv <- getGeneIDNum(geneModule, NULL, drvNames) |
555 | 575 |
} else { |
556 |
- drv <- geneModule$GeneNumID[-1] |
|
576 |
+ ## we used to have this default |
|
577 |
+ ## drv <- geneModule$GeneNumID[-1] |
|
578 |
+ drv <- vector(mode = "integer", length = 0L) |
|
557 | 579 |
} |
580 |
+ |
|
558 | 581 |
if(!keepInput) { |
559 | 582 |
rT <- epistasis <- orderEffects <- noIntGenes <- NULL |
560 | 583 |
} |
... | ... |
@@ -572,11 +595,197 @@ allFitnessEffects <- function(rT = NULL, |
572 | 595 |
orderEffects = orderEffects, |
573 | 596 |
noIntGenes = noIntGenes |
574 | 597 |
) |
575 |
- class(out) <- c("fitnessEffects") |
|
598 |
+ if(calledBy == "allFitnessEffects") { |
|
599 |
+ class(out) <- c("fitnessEffects") |
|
600 |
+ } else if(calledBy == "allMutatorEffects") { |
|
601 |
+ class(out) <- c("mutatorEffects") |
|
602 |
+ } |
|
576 | 603 |
return(out) |
577 | 604 |
} |
578 | 605 |
|
579 | 606 |
|
607 |
+allFitnessEffects <- function(rT = NULL, |
|
608 |
+ epistasis = NULL, |
|
609 |
+ orderEffects = NULL, |
|
610 |
+ noIntGenes = NULL, |
|
611 |
+ geneToModule = NULL, |
|
612 |
+ drvNames = NULL, |
|
613 |
+ genotFitness = NULL, |
|
614 |
+ keepInput = TRUE) { |
|
615 |
+ |
|
616 |
+ if(!is.null(genotFitness)) { |
|
617 |
+ if(!is.null(rT) || !is.null(epistasis) || |
|
618 |
+ !is.null(orderEffects) || !is.null(noIntGenes) || |
|
619 |
+ !is.null(geneToModule)) { |
|
620 |
+ stop("You have a non-null genotFitness.", |
|
621 |
+ " If you pass the complete genotype to fitness mapping", |
|
622 |
+ " you cannot pass any of rT, epistasis, orderEffects", |
|
623 |
+ " noIntGenes or geneToModule.") |
|
624 |
+ } |
|
625 |
+ epistasis <- from_genotype_fitness(genotFitness) |
|
626 |
+ } |
|
627 |
+ allFitnessORMutatorEffects( |
|
628 |
+ rT = rT, |
|
629 |
+ epistasis = epistasis, |
|
630 |
+ orderEffects = orderEffects, |
|
631 |
+ noIntGenes = noIntGenes, |
|
632 |
+ geneToModule = geneToModule, |
|
633 |
+ drvNames = drvNames, |
|
634 |
+ keepInput = keepInput, |
|
635 |
+ calledBy = "allFitnessEffects") |
|
636 |
+} |
|
637 |
+ |
|
638 |
+ |
|
639 |
+## allFitnessEffects <- function(rT = NULL, |
|
640 |
+## epistasis = NULL, |
|
641 |
+## orderEffects = NULL, |
|
642 |
+## noIntGenes = NULL, |
|
643 |
+## geneToModule = NULL, |
|
644 |
+## drvNames = NULL, |
|
645 |
+## keepInput = TRUE) { |
|
646 |
+## ## restrictions: the usual rt |
|
647 |
+ |
|
648 |
+## ## epistasis: as it says, with the ":" |
|
649 |
+ |
|
650 |
+## ## orderEffects: the ">" |
|
651 |
+ |
|
652 |
+## ## All of the above can be genes or can be modules (if you pass a |
|
653 |
+## ## geneToModule) |
|
654 |
+ |
|
655 |
+## ## rest: rest of genes, with fitness |
|
656 |
+ |
|
657 |
+ |
|
658 |
+## ## For epistasis and order effects we create the output object but |
|
659 |
+## ## missing the numeric ids of genes. With rT we do it in one go, as we |
|
660 |
+## ## already know the mapping of genes to numeric ids. We could do the |
|
661 |
+## ## same in epistasis and order, but we would be splitting twice |
|
662 |
+## ## (whereas for rT extracting the names is very simple). |
|
663 |
+ |
|
664 |
+ |
|
665 |
+## rtNames <- NULL |
|
666 |
+## epiNames <- NULL |
|
667 |
+## orNames <- NULL |
|
668 |
+## if(!is.null(rT)) { |
|
669 |
+## ## This is really ugly, but to prevent the stringsAsFactors I need it here: |
|
670 |
+## rT$parent <- as.character(rT$parent) |
|
671 |
+## rT$child <- as.character(rT$child) |
|
672 |
+## rT$typeDep <- as.character(rT$typeDep) |
|
673 |
+## rtNames <- unique(c(rT$parent, rT$child)) |
|
674 |
+## } |
|
675 |
+## if(!is.null(epistasis)) { |
|
676 |
+## long.epistasis <- to.long.epist.order(epistasis, ":") |
|
677 |
+## ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids))) |
|
678 |
+## ## deal with the possible negative signs |
|
679 |
+## epiNames <- setdiff(unique( |
|
680 |
+## unlist(lapply(long.epistasis, |
|
681 |
+## function(x) lapply(x$ids, |
|
682 |
+## function(z) strsplit(z, "^-"))))), |
|
683 |
+## "") |
|
684 |
+ |
|
685 |
+## } else { |
|
686 |
+## long.epistasis <- list() |
|
687 |
+## } |
|
688 |
+## if(!is.null(orderEffects)) { |
|
689 |
+## long.orderEffects <- to.long.epist.order(orderEffects, ">") |
|
690 |
+## orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids))) |
|
691 |
+## } else { |
|
692 |
+## long.orderEffects <- list() |
|
693 |
+## } |
|
694 |
+## allModuleNames <- unique(c(rtNames, epiNames, orNames)) |
|
695 |
+## if(is.null(geneToModule)) { |
|
696 |
+## gMOneToOne <- TRUE |
|
697 |
+## geneToModule <- geneModuleNull(allModuleNames) |
|
698 |
+## } else { |
|
699 |
+## gMOneToOne <- FALSE |
|
700 |
+## if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames)))) |
|
701 |
+## stop(paste("Some values in geneToModule not present in any of", |
|
702 |
+## " rT, epistasis, or order effects")) |
|
703 |
+## if(any(is.na(match(allModuleNames, names(geneToModule))))) |
|
704 |
+## stop(paste("Some values in rT, epistasis, ", |
|
705 |
+## "or order effects not in geneToModule")) |
|
706 |
+## } |
|
707 |
+## geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne) |
|
708 |
+ |
|
709 |
+## idm <- unique(geneModule$ModuleNumID) |
|
710 |
+## names(idm) <- unique(geneModule$Module) |
|
711 |
+ |
|
712 |
+## if(!is.null(rT)) { |
|
713 |
+## checkRT(rT) |
|
714 |
+## long.rt <- to.long.rt(rT, idm) |
|
715 |
+## } else { |
|
716 |
+## long.rt <- list() ## yes, we want an object of length 0 |
|
717 |
+## } |
|
718 |
+ |
|
719 |
+## ## Append the numeric ids to epistasis and order |
|
720 |
+## if(!is.null(epistasis)) { |
|
721 |
+## long.epistasis <- lapply(long.epistasis, |
|
722 |
+## function(x) |
|
723 |
+## addIntID.epist.order(x, idm, |
|
724 |
+## sort = TRUE, |
|
725 |
+## sign = TRUE)) |
|
726 |
+## } |
|
727 |
+## if(!is.null(orderEffects)) { |
|
728 |
+## long.orderEffects <- lapply(long.orderEffects, |
|
729 |
+## function(x) |
|
730 |
+## addIntID.epist.order(x, idm, |
|
731 |
+## sort = FALSE, |
|
732 |
+## sign = FALSE)) |
|
733 |
+## } |
|
734 |
+ |
|
735 |
+## if(!is.null(noIntGenes)) { |
|
736 |
+## mg <- max(geneModule[, "GeneNumID"]) |
|
737 |
+## gnum <- seq_along(noIntGenes) + mg |
|
738 |
+## if(!is.null(names(noIntGenes))) { |
|
739 |
+## ng <- names(noIntGenes) |
|
740 |
+## if(any(ng %in% geneModule[, "Gene"] )) |
|
741 |
+## stop("A gene in noIntGenes also present in the other terms") |
|
742 |
+## } else { |
|
743 |
+## ng <- gnum |
|
744 |
+## } |
|
745 |
+## geneNoInt <- data.frame(Gene = as.character(ng), |
|
746 |
+## GeneNumID = gnum, |
|
747 |
+## s = noIntGenes, |
|
748 |
+## stringsAsFactors = FALSE) |
|
749 |
+## } else { |
|
750 |
+## geneNoInt <- data.frame() |
|
751 |
+## } |
|
752 |
+## if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) + |
|
753 |
+## nrow(geneNoInt)) == 0) |
|
754 |
+## stop("You have specified nothing!") |
|
755 |
+ |
|
756 |
+## if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { |
|
757 |
+## graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) |
|
758 |
+## } else { |
|
759 |
+## graphE <- NULL |
|
760 |
+## } |
|
761 |
+ |
|
762 |
+## if(!is.null(drvNames)) { |
|
763 |
+## drv <- getGeneIDNum(geneModule, geneNoInt, drvNames) |
|
764 |
+## } else { |
|
765 |
+## drv <- geneModule$GeneNumID[-1] |
|
766 |
+## } |
|
767 |
+## if(!keepInput) { |
|
768 |
+## rT <- epistasis <- orderEffects <- noIntGenes <- NULL |
|
769 |
+## } |
|
770 |
+## out <- list(long.rt = long.rt, |
|
771 |
+## long.epistasis = long.epistasis, |
|
772 |
+## long.orderEffects = long.orderEffects, |
|
773 |
+## long.geneNoInt = geneNoInt, |
|
774 |
+## geneModule = geneModule, |
|
775 |
+## gMOneToOne = gMOneToOne, |
|
776 |
+## geneToModule = geneToModule, |
|
777 |
+## graph = graphE, |
|
778 |
+## drv = drv, |
|
779 |
+## rT = rT, |
|
780 |
+## epistasis = epistasis, |
|
781 |
+## orderEffects = orderEffects, |
|
782 |
+## noIntGenes = noIntGenes |
|
783 |
+## ) |
|
784 |
+## class(out) <- c("fitnessEffects") |
|
785 |
+## return(out) |
|
786 |
+## } |
|
787 |
+ |
|
788 |
+ |
|
580 | 789 |
## No longer used |
581 | 790 |
## rtAndGeneModule <- function(mdeps, gM = NULL) { |
582 | 791 |
## ## To show a table of restrictions when there are modules. Do not use |
... | ... |
@@ -638,14 +847,94 @@ allFitnessEffects <- function(rT = NULL, |
638 | 847 |
## ## echo = TRUE) |
639 | 848 |
## } |
640 | 849 |
|
850 |
+ |
|
851 |
+ |
|
852 |
+evalGenotypeORMut <- function(genotype, |
|
853 |
+ fmEffects, |
|
854 |
+ verbose = FALSE, |
|
855 |
+ echo = FALSE, |
|
856 |
+ model = "", |
|
857 |
+ calledBy_= NULL) { |
|