c95df82d |
|
4068375e |
## Copyright 2013-2021 Ramon Diaz-Uriarte
|
e693f153 |
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
## Say which are drivers: populate the drv vector.
## // In R, the user says which are the drivers. If does not say anuthing,
## // the default (NULL) then drivers are all in poset, epist, restrict. The
## // user can pass a vector with the names of the genes (not modules). Allow
## // also for empty, so this is faster if not needed. And check that if we
## // use a stopping rule based on drivers that drv vectors is not empty.
## - Say that a user can use a "0" as a gene name, but that is BAD idea.
## - Modules and order effects can be kind of funny?
## Gene names can contain no spaces, commas, or ">"
check.gm <- function(gm) {
|
1b1e5e0c |
## Yes, Root: we want no ambiguities.
## Actually, that sucks. So we do not require it, but check for
## consistency.
if(any(gm == "Root") && (gm[1] != "Root") )
stop("If Root is in the module table, it must be the first")
if(any(names(gm) == "Root") && (names(gm)[1] != "Root") )
stop("If the name Root is in the module table, it must be the first")
if( (names(gm)[1] == "Root") && (gm[1] != "Root") )
stop("The name Root is in the module table, but is not of Root")
if( (gm[1] == "Root") && (names(gm)[1] != "Root") )
stop("Root is in the module table, but with a different name")
## if(gm[1] != "Root")
## stop("First value of a module table must be Root")
## if(names(gm)[1] != "Root")
## stop("First name of a module table must be Root")
|
e693f153 |
if(length(unique(names(gm))) != length(gm))
stop("Number of unique module names different from length of vector")
|
1b1e5e0c |
if(gm[1] != "Root")
gm <- c("Root" = "Root", gm)
return(gm)
|
e693f153 |
}
gtm2 <- function(x) {
|
e719a81d |
data.frame(cbind(nice.vector.eo(x, ","), x), stringsAsFactors = TRUE)
|
e693f153 |
}
## nice.vector.eo <- function(z, sep) {
## ## with epistasis, maybe we want sorted?
## setdiff(unlist(lapply(strsplit(z, " "),
## function(u) strsplit(u, sep))), "")
## }
nice.vector.eo <- function(z, sep, rm.sign = FALSE) {
## with epistasis, maybe we want sorted?
if(! rm.sign)
return(setdiff(unlist(lapply(strsplit(z, " "),
function(u) strsplit(u, sep))), ""))
else ## remove the " ", the -, and the sep
return(setdiff(unlist(lapply(strsplit(z, " "), function(u)
strsplit(unlist(strsplit(u, "-")), sep))), ""))
}
gm.to.geneModuleL <- function(gm, one.to.one) {
## the table will be sorted by gene name
|
1b1e5e0c |
gm <- check.gm(gm)
|
e693f153 |
## the named vector with the mapping into the long geneModule df
|
e719a81d |
geneMod <- as.data.frame(rbindlist(lapply(gm, gtm2)),
stringsAsFactors = TRUE) ## this stringsAsFactors is key
|
e693f153 |
geneMod$Module <- names(gm)[geneMod[, 2]] ## reverse lookup table
colnames(geneMod)[1] <- c("Gene")
geneMod <- geneMod[, -2]
geneMod$Gene <- as.character(geneMod$Gene)
## geneMod$Module <- as.character(geneMod$Module) ## already a char
geneMod <- geneMod[c(1, order(geneMod$Gene[-1]) + 1), ]
geneMod$GeneNumID <- 0:(nrow(geneMod) - 1)
## this assumes sorted! and they need not be
## rlem <- rle(geneMod$Module)
## geneMod$ModuleNumID <- rep( 0:(length(rlem$values) - 1), rlem$lengths)
if(one.to.one) {
geneMod$ModuleNumID <- geneMod$GeneNumID
if(!(identical(geneMod$Gene, geneMod$Module)))
stop("Impossible: we are supposed to be in one-to-one for Module-Gene.")
} else {
## Works, but does not give the most ordered module names. But
## keeps implicit order given by user.
idm <- seq.int(length(gm) - 1)
idm <- c("Root" = 0L, idm)
names(idm) <- names(gm)
## This sorts by the names but is not optimal either
## idm1 <- seq.int(length(gm) - 1)
## idm <- c(0L, idm1)
## names(idm) <- c("Root", sort(setdiff(names(gm), "Root")))
geneMod$ModuleNumID <- idm[geneMod[, "Module"]]
}
if(length(unique(geneMod$Gene)) != nrow(geneMod)) {
stop("Are there identical gene names in different modules?")
}
|
e6e95f54 |
## I think this is unreacheable now. Caught earlier.
|
e693f153 |
if(length(unique(geneMod$GeneNumID)) != nrow(geneMod)) {
stop("Are there identical gene names in different modules?")
}
rownames(geneMod) <- 1:nrow(geneMod)
geneMod
}
geneModuleNull <- function(namesM) {
v <- c("Root", setdiff(namesM, "Root"))
names(v) <- v
return(v)
}
list.of.deps <- function(x) {
## lookupTypeDep <- c("MN" = 1, "monotone" = 1,
## "SM" = 2, "semimonotone" = 2)
lookupTypeDep <- c("MN" = "monotone",
"AND" = "monotone",
"monotone" = "monotone",
"CMPN" = "monotone",
"OR" = "semimonotone",
"SM" = "semimonotone",
"semimonotone" = "semimonotone",
"DMPN" = "semimonotone",
"XOR" = "xmpn",
"xmpn" = "xmpn",
"XMPN" = "xmpn",
"--" = "--",
"-" = "--")
## FIXME: check values of typeDep
if(length(x) > 1) {
if(length(unique(x$s))!= 1)
stop("Not all s identical within a child")
if(length(unique(x$sh))!= 1)
stop("Not all sh identical within a child")
if(length(unique(x$typeDep))!= 1)
stop("Not all typeDep identical within a child")
if(length(unique(x$child))!= 1)
stop("child not unique")
}
typeDep <- lookupTypeDep[unique(x$typeDep)]
|
c95df82d |
if(any(is_null_na(typeDep)))
|
e693f153 |
stop("typeDep value incorrect. Check spelling.")
return(list(
child = unique(x$child),
s = unique(x$s),
sh = unique(x$sh),
typeDep = typeDep,
parents = unlist(x$parent)))
}
to.long.rt <- function(rt, idm) {
## We now do this inconditionally, so that we do not need to use the
## "stringsAsFactors = FALSE". This is now done before
## if(is.numeric(rt$parent))
## rt$parent <- as.character(rt$parent)
## if(is.numeric(rt$child))
## rt$child <- as.character(rt$child)
if(!("Root" %in% rt$parent))
stop("Root must be one parent node")
## rt$parent <- unlist(lapply(rt$parent, nice.string))
## rt$child <- unlist(lapply(rt$child, nice.string))
srt <- rt[order(rt$child), ]
## Not relevant if we allow non-numeric names
## all.child.genes <- as.integer(
## unlist(lapply(rt[, 2],
## function(x) strsplit(x, ","))))
## ## check all childs
## if(!identical(sort(unique(all.child.genes)),
## seq.int(max(all.child.genes))))
## stop("Not all children present")
long.rt <- lapply(split(srt, srt$child), list.of.deps)
## geneModule <- gene.to.module(srt)
## idm <- seq.int(length(names(long.rt)))
## names(idm) <- names(long.rt)
## idm <- c("0" = 0L, idm)
## geneModule$ModuleNumID <- idm[geneModule[, "Module"]]
## idm is just a look up table for the id of the module
## idm <- unique(geneModule$ModuleNumID)
## names(idm) <- unique(geneModule$Module)
## add integer IDs
addIntID <- function(z, idm) {
z$childNumID <- idm[z$child]
z$parentsNumID <- idm[z$parents]
if( any(is.na(z$parentsNumID)) ||
|
e6e95f54 |
any(is.na(z$childNumID)) ) {
## I think this can no longer be reached ever. Caught before.
|
e693f153 |
stop(paste("An ID is NA:",
"Is a gene part of two different modules?",
"(That includes being by itself and part",
"of a module.)"))
}
## These checks could be somewhere else instead of here.
if(length(unique(z$parentsNumID)) != length(z$parentsNumID))
stop("No node can have the same parent multiple times")
if(length(unique(z$parents)) != length(z$parents))
stop("No node can have the same parent multiple times")
if(length(z$child) > 1)
stop("Child nodes can have one, and only one, member")
## sort the parents, always.
o <- order(z$parentsNumID)
z$parentsNumID <- z$parentsNumID[o]
z$parents <- as.character(z$parents[o])
return(z)
}
long.rt <- lapply(long.rt, function(x) addIntID(x, idm = idm))
## if(verbosity >= 4) {
## message(paste("Number of drivers: ",
## length(unique(geneModule[, "Gene"]))))
## message(paste("Number of modules: ",
## length(unique(geneModule[, "Module"]))))
## }
return(long.rt)
## return(list(long.rt = long.rt, geneModule = geneModule))
}
epist.order.element <- function(x, y, sep, rm.sign = FALSE) {
list(ids = nice.vector.eo(x, sep = sep, rm.sign = rm.sign), s = y)
}
oe.to.df <- function(x) {
ma <- matrix(ncol = 2, nrow = 1 + length(x) - 2)
if(length(x) == 2) {
ma[1, 1] <- x[1]
ma[1, 2] <- x[2]
} else {
ma[, 1] <- x[-length(x)]
ma[, 2] <- x[-1]
}
return(data.frame(ma, stringsAsFactors = FALSE))
}
epist.order.to.pairs.modules <- function(x, sep, rm.sign = TRUE) {
## We discard, do not even accept, the coefficient
tmp <- epist.order.element(x, -99, sep = sep, rm.sign = rm.sign)$ids
if(length(tmp) > 1) {
## if a single gene, as when we specify all genotypes, we do not
## want this
if(sep == ":")
return(data.frame(combinations(n = length(tmp), r = 2, v = tmp),
stringsAsFactors = FALSE))
else if(sep == ">") {
return(oe.to.df(tmp))
}
}
}
to.pairs.modules <- function(x, sep, rm.sign = TRUE) {
df <- data.frame(rbindlist(
lapply(names(x),
|
e719a81d |
function(z) epist.order.to.pairs.modules(z, sep))),
stringsAsFactors = TRUE)
|
e693f153 |
if(nrow(df) == 0L) { ## if only single genes in epist, we get nothing here.
return(df)
} else {
colnames(df) <- c("parent", "child")
if(sep == ":")
df$typeDep <- "epistasis"
else if(sep == ">")
df$typeDep <- "orderEffect"
return(unique(df))
}
}
to.long.epist.order <- function(epor, sep, rm.sign = FALSE) {
## just vectors for now
long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign),
names(epor), epor)
## if(is.vector(epor))
## long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign),
## names(epor), epor)
## else if(is.data.frame(epor))
## long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign),
## as.character(epor$ids),
## epor$s)
names(long) <- NULL
return(long)
}
## addIntID.epist.order <- function(z, idm, sort) {
## z$NumID <- idm[z$ids]
## if(sort) {
## ## essential for epistasis, but never do for order effects
## o <- order(z$NumID)
## z$NumID <- z$NumID[o]
## z$ids <- z$ids[o]
## }
## return(z)
## }
addIntID.epist.order <- function(z, idm, sort, sign) {
if( sort && (!sign))
warning("sort is TRUE but sign is FALSE. You sure?")
if((!sort) && sign)
warning("sort is FALSE but sign is TRUE. You sure?")
## Adds the integer, but takes care of sign if sign = TRUE
if(!sign) {
z$NumID <- idm[z$ids]
signs <- grep("^-", z$ids)
if(length(signs))
stop("You have a negative sign and you said sign = FALSE")
} else {
unsigned <- setdiff(unlist(lapply(z$ids,
function(z) strsplit(z, "^-"))), "")
NumID <- idm[unsigned]
signs <- grep("^-", z$ids)
if(length(signs)) {
NumID[signs] <- -1 * NumID[signs]
}
z$NumID <- NumID
}
if(sort) {
## Essential for epistasis, but never do for order effects
o <- order(z$NumID)
z$NumID <- z$NumID[o]
z$ids <- z$ids[o]
}
if(length(unique(z$NumID)) != length(z$NumID))
stop("No node can have the same id multiple times")
if(length(unique(z$ids)) != length(z$ids))
stop("No node can have the same id multiple times")
return(z)
}
checkRT <- function(mdeps) {
if(ncol(mdeps) != 5)
stop("mdeps must be of exactly 5 columns")
if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep")))
stop(paste("Column names of mdeps not of appropriate format. ",
"Should be parent, child, s, sh, typeDep"))
}
getNamesID <- function(fp) {
## Return a lookup table for names based on numeric IDs
|
ab439943 |
idname <- c(fp$geneModule$GeneNumID,
fp$long.geneNoInt$GeneNumID,
fp$fitnessLandscape_gene_id$GeneNumID)
names(idname) <- c(fp$geneModule$Gene,
fp$long.geneNoInt$Gene,
fp$fitnessLandscape_gene_id$Gene)
|
e693f153 |
return(idname[-1]) ## remove Root!!
}
|
ab439943 |
getGeneIDNum <- function(geneModule, geneNoInt, fitnessLandscape_gene_id,
drv, sort = TRUE) {
|
1d9018c0 |
## It returns the genes, as NumID, in the given vector with names drv
## initMutant uses this, for simplicity, without sorting, but noInt
## are always sorted
## Also used for the drivers with sort = TRUE
## Yes, we must do it twice because we do not know before hand which
## is which. This makes sure no NA. Period.
|
ab439943 |
if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene,
fitnessLandscape_gene_id$Gene))))) {
|
1d9018c0 |
stop(paste("For driver or initMutant you have passed genes",
"not in the fitness table."))
}
indicesM <- as.vector(na.omit(match( drv, geneModule$Gene)))
indicesI <- as.vector(na.omit(sort(match( drv, geneNoInt$Gene))))
|
ab439943 |
indicesF <- as.vector(na.omit(sort(match( drv, fitnessLandscape_gene_id$Gene))))
|
1d9018c0 |
if(sort) {
indicesM <- sort(indicesM)
}
|
e693f153 |
return(c(
geneModule$GeneNumID[indicesM],
|
ab439943 |
geneNoInt$GeneNumID[indicesI],
fitnessLandscape_gene_id$GeneNumID[indicesF])
|
e693f153 |
)
}
|
4068375e |
## Next two in crate_flvars_fitvars.
## ## generate fitnesLanscapeVariables for FDF
## ## fitness landscape as data frame with cols as genes,
## ## and last column is genotype
## create_flvars <- function(x, frequencyType) {
## x <- x[, -ncol(x), drop = FALSE]
## pasted <- apply(x, 1, function(z) paste(which(z == 1), collapse = "_"))
## npasted <- apply(x, 1, function(z) paste(colnames(x)[which(z == 1)], collapse = "_"))
## if(frequencyType == "abs") {
## tmp <- paste0("n_", pasted)
## } else {
## tmp <- paste0("f_", pasted)
## }
## names(tmp) <- npasted
## return(tmp)
## }
## ## fitness specification with letters -> fitness specification with numbers
## names_to_nums_fitness <- function(fitness, genotFitness) {
## from_subst_pattern <- colnames(genotFitness[, -ncol(genotFitness)])
## to_subst_pattern <- as.character(1:(ncol(genotFitness) - 1))
## names(to_subst_pattern) <- from_subst_pattern
## return(stringr::str_replace_all(fitness, to_subst_pattern))
## }
## genotFitnes and frequency type -> fitnesLanscapeVariables for FDF and
## fitness with numbers, not names
## Done in a single function since both operations make
## the same assumptions
create_flvars_fitvars <- function(genotFitness, frequencyType) {
x <- genotFitness[, -ncol(genotFitness), drop = FALSE]
pasted <- apply(x, 1, function(z) paste(sort(which(z == 1)), collapse = "_"))
npasted <- apply(x, 1, function(z) paste(sort(colnames(x)[which(z == 1)]), collapse = "_"))
if(frequencyType == "abs") {
prefix <- "n_"
prefixre <- "^n_"
} else {
prefix <- "f_"
prefixre <- "^f_"
}
flvars <- paste0(prefix, pasted)
names(flvars) <- npasted
## make sure we get f_1_2 and not f_2_1, etc
flvars2 <- flvars
names(flvars2) <- paste0(prefix, names(flvars))
rmwt <- which(flvars2 == prefix)
if(length(rmwt)) flvars2 <- flvars2[-rmwt] ## rm this.
## Need to rev the vector, to ensure larger patterns come first
## and to place "f_" as last.
rflvars2 <- rev(flvars2)
count_seps <- stringr::str_count(rflvars2, stringr::fixed("_"))
if(any(diff(count_seps) > 0)) {
warning("flvars not ordered?",
"Check the conversion of gene names to numbers in fitness spec")
rflvars2 <- rflvars2[order(count_seps, decreasing = TRUE)]
}
## Users can pass in many possible orderings. Get all.
full_rflvars <- all_orders_fv(rflvars2, prefix, prefixre)
Fitness_as_fvars <- stringr::str_replace_all(genotFitness$Fitness,
stringr::fixed(full_rflvars))
return(list(flvars = flvars,
Fitness_as_fvars = Fitness_as_fvars))
}
## named vector, prefixes -> all possible combinations of the name
all_orders_fv <- function(x, prefix, prefixre) {
f1 <- function(zz, prefix, prefixre) {
z <- gsub(prefixre, "", names(zz))
spl <- strsplit(z, "_")[[1]]
if(length(spl) <= 1) {
return(zz)
} else {
pp <- gtools::permutations(length(spl), length(spl), spl)
ppo <- apply(pp, 1, function(u) paste(u, collapse = "_"))
ppoo <- rep(zz, length(ppo))
names(ppoo) <- paste0(prefix, ppo)
return(ppoo)
}
}
## I cannot use lapply, as it strips the names
out <- vector(mode = "character", length = 0)
for(i in 1:length(x)) {
out <- c(out, f1(x[i], prefix, prefixre))
}
return(out)
}
## ## character vector, named replacement -> replaced vector
## ## named_replace: names are the (fixed) pattern, value the replacement
## ## stringr::str_replace_all seems too smart and does not respect order
## my_mgsub <- function(x, named_replace) {
## nn <- names(named_replace)
## xr <- x
## for(i in 1:length(named_replace)) {
## xr <- gsub(nn[i], named_replace[i], xr, fixed = TRUE)
## }
## return(xr)
## }
##New function
fVariablesN <- function (g, frequencyType) {
if (is.null(g) | g == 0)
stop("Number of genes must be integer > 0")
combinationsList <- list()
for (i in 0:g) {
combinationsList <- append(combinationsList,
combn(1:g, i, list, simplify = TRUE))
}
if (frequencyType == "abs"){
fsVector <-sapply(sapply(combinationsList,
function(x) paste0(x, collapse = "_")),
function(x) paste0("n_", x))
}else{
fsVector <-sapply(sapply(combinationsList,
function(x) paste0(x, collapse = "_")),
function(x) paste0("f_", x))
}
return (fsVector)
}
fVariablesL <- function (g, frequencyType) {
if (is.null(g) | g == 0)
stop("Number of genes must be integer > 0")
if(g > length(LETTERS))
stop(paste0("Number of genes must be < length(LETTERS).",
" Please specify variables with numbers"))
combinationsList <- list()
for (i in 0:g) {
combinationsList <- append(combinationsList,
combn(LETTERS[1:g], i, list, simplify = TRUE))
}
if (frequencyType == "abs"){
fsVector <-sapply(sapply(combinationsList,
function(x) paste0(x, collapse = "_")),
function(x) paste0("n_", x))
}else{
fsVector <-sapply(sapply(combinationsList,
function(x) paste0(x, collapse = "_")),
function(x) paste0("f_", x))
}
return (fsVector)
}
## ## Assuming we are using the full fitness landscapes (i.e., none of
## ## setting genotypes with 0 fitness are absent from the table)
## conversionTable <- function(g, frequencyType){
## df <- data.frame(let = fVariablesL(g, frequencyType)[-1],
## num = fVariablesN(g, frequencyType)[-1],
## stringsAsFactors = FALSE)
## return (df)
## }
## findAndReplace <- function(str, conversionTable_input){
## pattern <- rev(setNames(as.character(conversionTable_input$num),
## conversionTable_input$let))
## str <- stringr::str_replace_all(string = str,
## pattern = pattern)
## return(str)
## }
|
50a94207 |
allFitnessORMutatorEffects <- function(rT = NULL,
epistasis = NULL,
orderEffects = NULL,
noIntGenes = NULL,
geneToModule = NULL,
drvNames = NULL,
keepInput = TRUE,
|
ab439943 |
genotFitness = NULL,
|
50a94207 |
## refFE = NULL,
|
4068375e |
calledBy = NULL,
frequencyDependentFitness = FALSE,
frequencyType = "freq_dep_not_used"){
#spPopSizes = NULL) {
## From allFitnessEffects. Generalized so we deal with Fitness
## and mutator.
|
e693f153 |
|
4068375e |
## restrictions: the usual rt
|
e693f153 |
|
4068375e |
## epistasis: as it says, with the ":"
|
e693f153 |
|
4068375e |
## orderEffects: the ">"
|
e693f153 |
|
4068375e |
## All of the above can be genes or can be modules (if you pass a
## geneToModule)
|
e693f153 |
|
4068375e |
## rest: rest of genes, with fitness
|
e693f153 |
|
4068375e |
## For epistasis and order effects we create the output object but
## missing the numeric ids of genes. With rT we do it in one go, as we
## already know the mapping of genes to numeric ids. We could do the
## same in epistasis and order, but we would be splitting twice
## (whereas for rT extracting the names is very simple).
## called appropriately?
if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
stop("How did you call this function?. Bug.")
if(calledBy == "allMutatorEffects") {
## very paranoid check
if( !is.null(rT) || !is.null(orderEffects) ||
!is.null(drvNames) || !is.null(genotFitness))
stop("allMutatorEffects called with forbidden arguments.",
"Is this an attempt to subvert the function?")
}
if(!frequencyDependentFitness) {
|
e693f153 |
rtNames <- NULL
epiNames <- NULL
orNames <- NULL
if(!is.null(rT)) {
|
4068375e |
## This is really ugly, but to prevent the stringsAsFactors I need it here:
rT$parent <- as.character(rT$parent)
rT$child <- as.character(rT$child)
rT$typeDep <- as.character(rT$typeDep)
rtNames <- unique(c(rT$parent, rT$child))
|
e693f153 |
}
|
4068375e |
#if(!is.null(spPopSizes))
#warning("spPopSizes will be considered NULL if frequencyDependentFitness = FALSE")
|
e693f153 |
if(!is.null(epistasis)) {
|
4068375e |
long.epistasis <- to.long.epist.order(epistasis, ":")
## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
## deal with the possible negative signs
epiNames <- setdiff(unique(
unlist(lapply(long.epistasis,
function(x) lapply(x$ids,
function(z) strsplit(z, "^-"))))),
"")
|
e693f153 |
} else {
|
4068375e |
long.epistasis <- list()
|
e693f153 |
}
if(!is.null(orderEffects)) {
|
4068375e |
long.orderEffects <- to.long.epist.order(orderEffects, ">")
orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
|
e693f153 |
} else {
|
4068375e |
long.orderEffects <- list()
|
e693f153 |
}
allModuleNames <- unique(c(rtNames, epiNames, orNames))
if(is.null(geneToModule)) {
|
4068375e |
gMOneToOne <- TRUE
geneToModule <- geneModuleNull(allModuleNames)
|
e693f153 |
} else {
|
4068375e |
gMOneToOne <- FALSE
if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
stop(paste("Some values in geneToModule not present in any of",
" rT, epistasis, or order effects"))
if(any(is.na(match(allModuleNames, names(geneToModule)))))
stop(paste("Some values in rT, epistasis, ",
"or order effects not in geneToModule"))
|
e693f153 |
}
geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
|
4068375e |
|
e693f153 |
idm <- unique(geneModule$ModuleNumID)
names(idm) <- unique(geneModule$Module)
if(!is.null(rT)) {
|
4068375e |
checkRT(rT)
long.rt <- to.long.rt(rT, idm)
|
e693f153 |
} else {
|
4068375e |
long.rt <- list() ## yes, we want an object of length 0
|
e693f153 |
}
## Append the numeric ids to epistasis and order
if(!is.null(epistasis)) {
|
4068375e |
long.epistasis <- lapply(long.epistasis,
function(x)
addIntID.epist.order(x, idm,
sort = TRUE,
sign = TRUE))
|
e693f153 |
}
if(!is.null(orderEffects)) {
|
4068375e |
long.orderEffects <- lapply(long.orderEffects,
function(x)
addIntID.epist.order(x, idm,
sort = FALSE,
sign = FALSE))
|
e693f153 |
}
|
4068375e |
|
e693f153 |
if(!is.null(noIntGenes)) {
|
8d75a8a6 |
if(inherits(noIntGenes, "character")) {
wm <- paste("noIntGenes is a character vector.",
"This is probably not what you want, and will",
"likely result in an error downstream.",
"You can get messages like",
" 'not compatible with requested type', and others.",
"We are stopping.")
stop(wm)
}
|
e693f153 |
mg <- max(geneModule[, "GeneNumID"])
gnum <- seq_along(noIntGenes) + mg
if(!is.null(names(noIntGenes))) {
ng <- names(noIntGenes)
|
4912fe1a |
if( any(grepl(",", ng, fixed = TRUE)) ||
any(grepl(">", ng, fixed = TRUE)) ||
any(grepl(":", ng, fixed = TRUE)) )
|
8d75a8a6 |
stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
|
e693f153 |
if(any(ng %in% geneModule[, "Gene"] ))
stop("A gene in noIntGenes also present in the other terms")
|
4d70b942 |
if(any(duplicated(ng)))
stop("Duplicated gene names in geneNoInt")
if(any(is.na(ng)))
stop("In noIntGenes some genes have names, some don't.",
" Name all of them, or name none of them.")
|
e693f153 |
} else {
ng <- gnum
}
geneNoInt <- data.frame(Gene = as.character(ng),
GeneNumID = gnum,
s = noIntGenes,
stringsAsFactors = FALSE)
} else {
|
4068375e |
geneNoInt <- data.frame()
|
e693f153 |
}
|
4068375e |
|
ab439943 |
if(is.null(genotFitness)) {
|
4068375e |
genotFitness <- matrix(NA, nrow = 0, ncol = 1)
fitnessLandscape_df <- data.frame()
fitnessLandscape_gene_id <- data.frame()
|
ab439943 |
} else {
|
4068375e |
## Yes, I am duplicating stuff for now.
## This makes life simpler in C++:
## In the map, the key is the genotype name, as
## cnn <- colnames(genotFitness)[-ncol(genotFitness)]
cnn <- 1:(ncol(genotFitness) - 1)
gfn <- apply(genotFitness[, -ncol(genotFitness), drop = FALSE], 1,
function(x) paste(cnn[as.logical(x)],
collapse = ", "))
## rownames(genotFitness) <- gfn
fitnessLandscape_df <-
data.frame(Genotype = gfn,
Fitness = genotFitness[, ncol(genotFitness)],
stringsAsFactors = FALSE)
fitnessLandscape_gene_id <- data.frame(
Gene = colnames(genotFitness)[-ncol(genotFitness)],
GeneNumID = cnn,
stringsAsFactors = FALSE)
|
ab439943 |
}
|
4068375e |
|
e693f153 |
if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
|
4068375e |
nrow(geneNoInt) + nrow(genotFitness)) == 0)
stop("You have specified nothing!")
|
e693f153 |
|
50a94207 |
if(calledBy == "allFitnessEffects") {
|
4068375e |
if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
} else {
|
e693f153 |
graphE <- NULL
|
4068375e |
}
} else {
graphE <- NULL
|
e693f153 |
}
if(!is.null(drvNames)) {
|
4068375e |
drv <- unique(getGeneIDNum(geneModule, geneNoInt, fitnessLandscape_gene_id,
drvNames))
## drivers should never be in the geneNoInt; Why!!!???
## Catch the problem. This is an overkill,
## so since we catch the issue, we could leave the geneNoInt. But
## that should not be there in this call.
## if(any(drvNames %in% geneNoInt$Gene)) {
## stop(paste("At least one gene in drvNames is a geneNoInt gene.",
## "That is not allowed.",
## "If that gene is a driver, pass it as gene in the epistasis",
## "component."))
## }
## drv <- getGeneIDNum(geneModule, NULL, drvNames)
|
e693f153 |
} else {
|
4068375e |
## we used to have this default
## drv <- geneModule$GeneNumID[-1]
drv <- vector(mode = "integer", length = 0L)
|
e693f153 |
}
|
4068375e |
|
e693f153 |
if(!keepInput) {
|
4068375e |
rT <- epistasis <- orderEffects <- noIntGenes <- NULL
|
e693f153 |
}
|
ab439943 |
|
e693f153 |
out <- list(long.rt = long.rt,
long.epistasis = long.epistasis,
long.orderEffects = long.orderEffects,
long.geneNoInt = geneNoInt,
geneModule = geneModule,
gMOneToOne = gMOneToOne,
geneToModule = geneToModule,
graph = graphE,
drv = drv,
rT = rT,
epistasis = epistasis,
orderEffects = orderEffects,
|
ab439943 |
noIntGenes = noIntGenes,
fitnessLandscape = genotFitness,
fitnessLandscape_df = fitnessLandscape_df,
|
4068375e |
fitnessLandscape_gene_id = fitnessLandscape_gene_id,
fitnessLandscapeVariables = vector(mode = "character", length = 0L),
frequencyDependentFitness = frequencyDependentFitness,
frequencyType = frequencyType)
#spPopSizes = vector(mode = "integer", length = 0L)
|
50a94207 |
if(calledBy == "allFitnessEffects") {
|
4068375e |
class(out) <- c("fitnessEffects")
|
50a94207 |
} else if(calledBy == "allMutatorEffects") {
|
4068375e |
class(out) <- c("mutatorEffects")
|
50a94207 |
}
|
4068375e |
} else {
if(is.null(genotFitness)) {
#genotFitness <- matrix(NA, nrow = 0, ncol = 1)
#fitnessLandscape_df <- data.frame()
#fitnessLandscape_gene_id <- data.frame()
stop("You have a null genotFitness in a frequency dependent fitness situation.")
} else {
cnn <- 1:(ncol(genotFitness) - 1)
gfn <- apply(genotFitness[, -ncol(genotFitness), drop = FALSE], 1,
function(x) paste(cnn[as.logical(x)],
collapse = ", "))
## rownames(genotFitness) <- gfn
fitnessLandscape_df <-
data.frame(Genotype = gfn,
Fitness = genotFitness[, ncol(genotFitness)],
stringsAsFactors = FALSE)
attr(fitnessLandscape_df,'row.names') <-
as.integer(attr(fitnessLandscape_df,'row.names'))
fitnessLandscape_gene_id <- data.frame(
Gene = colnames(genotFitness)[-ncol(genotFitness)],
GeneNumID = cnn,
stringsAsFactors = FALSE)
if(frequencyType == "auto"){
ch <- paste(as.character(fitnessLandscape_df[, ncol(fitnessLandscape_df)]), collapse = "")
#print(ch)
if( grepl("f_", ch, fixed = TRUE) ){
frequencyType = "rel"
} else{
frequencyType = "abs"
}
} else { frequencyType = frequencyType }
## Wrong: assumes all genotypes in fitness landscape
## fitnessLandscapeVariables = fVariablesN(ncol(genotFitness) - 1, frequencyType)
stopifnot(identical(genotFitness$Fitness, fitnessLandscape_df$Fitness))
flvars_and_fitvars <- create_flvars_fitvars(genotFitness, frequencyType)
fitnessLandscapeVariables <- flvars_and_fitvars$flvars
Fitness_as_fvars <- flvars_and_fitvars$Fitness_as_fvars
}
if(!is.null(drvNames)) {
drv <- unique(getGeneIDNum(geneModule, geneNoInt, fitnessLandscape_gene_id,
drvNames))
} else {
drv <- vector(mode = "integer", length = 0L)
}
defaultGeneModuleDF <- data.frame(Gene = "Root",
Module = "Root",
GeneNumID = 0,
ModuleNumID = 0,
stringsAsFactors = FALSE)
## Create, for the user, a single data frame with everything.
## This is what C++ should consume
## This ought to allow to pass fitness spec as letters. Preserve original
Fitness_original_as_letters <- fitnessLandscape_df$Fitness
fitnessLandscape_df$Fitness <- Fitness_as_fvars
full_FDF_spec <- cbind(genotFitness[, -ncol(genotFitness)]
, Genotype_letters = genotype_letterslabel(genotFitness[, -ncol(genotFitness)])
, Genotype_fvars = fitnessLandscapeVariables ## used in C++
, Fitness_as_fvars = Fitness_as_fvars
, Fitness_as_letters = Fitness_original_as_letters
)
rownames(full_FDF_spec) <- 1:nrow(full_FDF_spec)
out <- list(long.rt = list(),
long.epistasis = list(),
long.orderEffects = list(),
long.geneNoInt = data.frame(),
geneModule = defaultGeneModuleDF, ##Trick to pass countGenesFe>2,
gMOneToOne = TRUE,
geneToModule = c(Root = "Root"),
graph = list(),
drv = drv,
rT = NULL,
epistasis = NULL,
orderEffects = NULL,
noIntGenes = NULL,
fitnessLandscape = genotFitness,
fitnessLandscape_df = fitnessLandscape_df,
fitnessLandscape_gene_id = fitnessLandscape_gene_id,
fitnessLandscapeVariables = fitnessLandscapeVariables,
frequencyDependentFitness = frequencyDependentFitness,
frequencyType = frequencyType,
full_FDF_spec = full_FDF_spec
#spPopSizes = spPopSizes
)
class(out) <- c("fitnessEffects")
}
return(out)
|
e693f153 |
}
|
ab439943 |
## Former version, with fitness landscape
## allFitnessORMutatorEffects <- function(rT = NULL,
## epistasis = NULL,
## orderEffects = NULL,
## noIntGenes = NULL,
## geneToModule = NULL,
## drvNames = NULL,
## keepInput = TRUE,
## ## refFE = NULL,
## calledBy = NULL) {
## ## From allFitnessEffects. Generalized so we deal with Fitness
## ## and mutator.
|
4068375e |
|
ab439943 |
## ## restrictions: the usual rt
## ## epistasis: as it says, with the ":"
## ## orderEffects: the ">"
|
4068375e |
|
ab439943 |
## ## All of the above can be genes or can be modules (if you pass a
## ## geneToModule)
## ## rest: rest of genes, with fitness
## ## For epistasis and order effects we create the output object but
## ## missing the numeric ids of genes. With rT we do it in one go, as we
## ## already know the mapping of genes to numeric ids. We could do the
## ## same in epistasis and order, but we would be splitting twice
## ## (whereas for rT extracting the names is very simple).
## ## called appropriately?
## if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
## stop("How did you call this function?. Bug.")
|
4068375e |
|
ab439943 |
## if(calledBy == "allMutatorEffects") {
## ## very paranoid check
## if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
## stop("allMutatorEffects called with forbidden arguments.",
## "Is this an attempt to subvert the function?")
## }
|
4068375e |
|
ab439943 |
## rtNames <- NULL
## epiNames <- NULL
## orNames <- NULL
## if(!is.null(rT)) {
## ## This is really ugly, but to prevent the stringsAsFactors I need it here:
## rT$parent <- as.character(rT$parent)
## rT$child <- as.character(rT$child)
## rT$typeDep <- as.character(rT$typeDep)
## rtNames <- unique(c(rT$parent, rT$child))
## }
## if(!is.null(epistasis)) {
## long.epistasis <- to.long.epist.order(epistasis, ":")
## ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
## ## deal with the possible negative signs
## epiNames <- setdiff(unique(
## unlist(lapply(long.epistasis,
## function(x) lapply(x$ids,
## function(z) strsplit(z, "^-"))))),
## "")
## } else {
## long.epistasis <- list()
## }
## if(!is.null(orderEffects)) {
## long.orderEffects <- to.long.epist.order(orderEffects, ">")
## orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
## } else {
## long.orderEffects <- list()
## }
## allModuleNames <- unique(c(rtNames, epiNames, orNames))
## if(is.null(geneToModule)) {
## gMOneToOne <- TRUE
## geneToModule <- geneModuleNull(allModuleNames)
## } else {
## gMOneToOne <- FALSE
## if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
## stop(paste("Some values in geneToModule not present in any of",
## " rT, epistasis, or order effects"))
## if(any(is.na(match(allModuleNames, names(geneToModule)))))
## stop(paste("Some values in rT, epistasis, ",
## "or order effects not in geneToModule"))
## }
## geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
|
4068375e |
|
ab439943 |
## idm <- unique(geneModule$ModuleNumID)
## names(idm) <- unique(geneModule$Module)
## if(!is.null(rT)) {
## checkRT(rT)
## long.rt <- to.long.rt(rT, idm)
## } else {
## long.rt <- list() ## yes, we want an object of length 0
## }
## ## Append the numeric ids to epistasis and order
## if(!is.null(epistasis)) {
## long.epistasis <- lapply(long.epistasis,
## function(x)
## addIntID.epist.order(x, idm,
## sort = TRUE,
## sign = TRUE))
## }
## if(!is.null(orderEffects)) {
## long.orderEffects <- lapply(long.orderEffects,
## function(x)
## addIntID.epist.order(x, idm,
## sort = FALSE,
## sign = FALSE))
## }
|
4068375e |
|
ab439943 |
## if(!is.null(noIntGenes)) {
## if(inherits(noIntGenes, "character")) {
## wm <- paste("noIntGenes is a character vector.",
## "This is probably not what you want, and will",
## "likely result in an error downstream.",
## "You can get messages like",
## " 'not compatible with requested type', and others.",
## "We are stopping.")
## stop(wm)
## }
|
4068375e |
|
ab439943 |
## mg <- max(geneModule[, "GeneNumID"])
## gnum <- seq_along(noIntGenes) + mg
## if(!is.null(names(noIntGenes))) {
## ng <- names(noIntGenes)
## if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE)
## || grepl(":", ng, fixed = TRUE))
## stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
## if(any(ng %in% geneModule[, "Gene"] ))
## stop("A gene in noIntGenes also present in the other terms")
## if(any(duplicated(ng)))
## stop("Duplicated gene names in geneNoInt")
## if(any(is.na(ng)))
## stop("In noIntGenes some genes have names, some don't.",
## " Name all of them, or name none of them.")
## } else {
## ng <- gnum
## }
## geneNoInt <- data.frame(Gene = as.character(ng),
## GeneNumID = gnum,
## s = noIntGenes,
## stringsAsFactors = FALSE)
## } else {
## geneNoInt <- data.frame()
## }
## if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
## nrow(geneNoInt)) == 0)
## stop("You have specified nothing!")
## if(calledBy == "allFitnessEffects") {
## if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
## graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
## } else {
## graphE <- NULL
## }
## } else {
## graphE <- NULL
## }
## if(!is.null(drvNames)) {
## drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
## ## drivers should never be in the geneNoInt; Why!!!???
## ## Catch the problem. This is an overkill,
## ## so since we catch the issue, we could leave the geneNoInt. But
## ## that should not be there in this call.
## ## if(any(drvNames %in% geneNoInt$Gene)) {
## ## stop(paste("At least one gene in drvNames is a geneNoInt gene.",
## ## "That is not allowed.",
## ## "If that gene is a driver, pass it as gene in the epistasis",
## ## "component."))
## ## }
## ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
## } else {
## ## we used to have this default
## ## drv <- geneModule$GeneNumID[-1]
## drv <- vector(mode = "integer", length = 0L)
## }
|
4068375e |
|
ab439943 |
## if(!keepInput) {
## rT <- epistasis <- orderEffects <- noIntGenes <- NULL
## }
## out <- list(long.rt = long.rt,
## long.epistasis = long.epistasis,
## long.orderEffects = long.orderEffects,
## long.geneNoInt = geneNoInt,
## geneModule = geneModule,
## gMOneToOne = gMOneToOne,
## geneToModule = geneToModule,
## graph = graphE,
## drv = drv,
## rT = rT,
## epistasis = epistasis,
## orderEffects = orderEffects,
|
4068375e |
## noIntGenes = noIntGenes
|
ab439943 |
## )
## if(calledBy == "allFitnessEffects") {
## class(out) <- c("fitnessEffects")
## } else if(calledBy == "allMutatorEffects") {
## class(out) <- c("mutatorEffects")
## }
## return(out)
## }
|
50a94207 |
allFitnessEffects <- function(rT = NULL,
epistasis = NULL,
orderEffects = NULL,
noIntGenes = NULL,
geneToModule = NULL,
drvNames = NULL,
genotFitness = NULL,
|
4068375e |
keepInput = TRUE,
frequencyDependentFitness = FALSE,
frequencyType = NA) {
#spPopSizes = NULL) {
|
50a94207 |
|
4068375e |
if(!frequencyDependentFitness) {
if(!is.na(frequencyType)){
warning("frequencyType set to NA")
|
50a94207 |
}
|
4068375e |
## this is a kludge, but we must pass something not NA and not NULL
## to the C++ code
frequencyType = "freq_dep_not_used"
|
ab439943 |
|
4068375e |
if(!is.null(genotFitness)) {
if(!is.null(rT) || !is.null(epistasis) ||
!is.null(orderEffects) || !is.null(noIntGenes) ||
!is.null(geneToModule)) {
stop("You have a non-null genotFitness.",
" If you pass the complete genotype to fitness mapping",
" you cannot pass any of rT, epistasis, orderEffects",
" noIntGenes or geneToModule.")
}
genotFitness_std <- to_genotFitness_std(genotFitness,
frequencyDependentFitness = FALSE,
frequencyType = frequencyType,
simplify = TRUE)
## epistasis <- from_genotype_fitness(genotFitness)
|
ab439943 |
} else {
|
4068375e |
genotFitness_std <- NULL
|
50a94207 |
}
allFitnessORMutatorEffects(
|
4068375e |
rT = rT,
epistasis = epistasis,
orderEffects = orderEffects,
noIntGenes = noIntGenes,
geneToModule = geneToModule,
drvNames = drvNames,
keepInput = keepInput,
genotFitness = genotFitness_std,
calledBy = "allFitnessEffects",
frequencyDependentFitness = FALSE,
frequencyType = frequencyType)
#spPopSizes = spPopSizes)
} else {
if(!(frequencyType %in% c('abs', 'rel', 'auto'))){
#set frequencyType = "auto" in case you did not specify 'rel' or 'abs'
frequencyType = "auto"
message("frequencyType set to 'auto'")
}
if(is.null(genotFitness)) {
stop("You have a null genotFitness in a frequency dependent fitness situation.")
} else {
genotFitness_std <- to_genotFitness_std(genotFitness,
frequencyDependentFitness = TRUE,
frequencyType = frequencyType,
simplify = TRUE)
allFitnessORMutatorEffects(
|
50a94207 |
rT = rT,
epistasis = epistasis,
orderEffects = orderEffects,
noIntGenes = noIntGenes,
geneToModule = geneToModule,
drvNames = drvNames,
keepInput = keepInput,
|
ab439943 |
genotFitness = genotFitness_std,
|
4068375e |
calledBy = "allFitnessEffects",
frequencyDependentFitness = TRUE,
frequencyType = frequencyType)
#spPopSizes = spPopSizes)
}
}
|
50a94207 |
}
|
ab439943 |
## Former version
## allFitnessEffects <- function(rT = NULL,
## epistasis = NULL,
## orderEffects = NULL,
## noIntGenes = NULL,
## geneToModule = NULL,
## drvNames = NULL,
## genotFitness = NULL,
## keepInput = TRUE) {
## if(!is.null(genotFitness)) {
## if(!is.null(rT) || !is.null(epistasis) ||
## !is.null(orderEffects) || !is.null(noIntGenes) ||
## !is.null(geneToModule)) {
## stop("You have a non-null genotFitness.",
## " If you pass the complete genotype to fitness mapping",
## " you cannot pass any of rT, epistasis, orderEffects",
## " noIntGenes or geneToModule.")
## }
## epistasis <- from_genotype_fitness(genotFitness)
## }
## allFitnessORMutatorEffects(
## rT = rT,
## epistasis = epistasis,
## orderEffects = orderEffects,
## noIntGenes = noIntGenes,
## geneToModule = geneToModule,
## drvNames = drvNames,
## keepInput = keepInput,
## calledBy = "allFitnessEffects")
## }
|
50a94207 |
## allFitnessEffects <- function(rT = NULL,
## epistasis = NULL,
## orderEffects = NULL,
## noIntGenes = NULL,
## geneToModule = NULL,
## drvNames = NULL,
## keepInput = TRUE) {
## ## restrictions: the usual rt
## ## epistasis: as it says, with the ":"
## ## orderEffects: the ">"
|
4068375e |
|
50a94207 |
## ## All of the above can be genes or can be modules (if you pass a
## ## geneToModule)
## ## rest: rest of genes, with fitness
## ## For epistasis and order effects we create the output object but
## ## missing the numeric ids of genes. With rT we do it in one go, as we
## ## already know the mapping of genes to numeric ids. We could do the
## ## same in epistasis and order, but we would be splitting twice
## ## (whereas for rT extracting the names is very simple).
|
4068375e |
|
50a94207 |
## rtNames <- NULL
## epiNames <- NULL
## orNames <- NULL
## if(!is.null(rT)) {
## ## This is really ugly, but to prevent the stringsAsFactors I need it here:
## rT$parent <- as.character(rT$parent)
## rT$child <- as.character(rT$child)
## rT$typeDep <- as.character(rT$typeDep)
## rtNames <- unique(c(rT$parent, rT$child))
## }
## if(!is.null(epistasis)) {
## long.epistasis <- to.long.epist.order(epistasis, ":")
## ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
## ## deal with the possible negative signs
## epiNames <- setdiff(unique(
## unlist(lapply(long.epistasis,
## function(x) lapply(x$ids,
## function(z) strsplit(z, "^-"))))),
## "")
|
4068375e |
|
50a94207 |
## } else {
## long.epistasis <- list()
## }
## if(!is.null(orderEffects)) {
## long.orderEffects <- to.long.epist.order(orderEffects, ">")
## orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
## } else {
## long.orderEffects <- list()
## }
## allModuleNames <- unique(c(rtNames, epiNames, orNames))
## if(is.null(geneToModule)) {
## gMOneToOne <- TRUE
## geneToModule <- geneModuleNull(allModuleNames)
## } else {
## gMOneToOne <- FALSE
## if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
## stop(paste("Some values in geneToModule not present in any of",
## " rT, epistasis, or order effects"))
## if(any(is.na(match(allModuleNames, names(geneToModule)))))
## stop(paste("Some values in rT, epistasis, ",
## "or order effects not in geneToModule"))
## }
## geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
## idm <- unique(geneModule$ModuleNumID)
## names(idm) <- unique(geneModule$Module)
## if(!is.null(rT)) {
## checkRT(rT)
## long.rt <- to.long.rt(rT, idm)
## } else {
## long.rt <- list() ## yes, we want an object of length 0
## }
## ## Append the numeric ids to epistasis and order
## if(!is.null(epistasis)) {
## long.epistasis <- lapply(long.epistasis,
## function(x)
## addIntID.epist.order(x, idm,
## sort = TRUE,
## sign = TRUE))
## }
## if(!is.null(orderEffects)) {
## long.orderEffects <- lapply(long.orderEffects,
## function(x)
## addIntID.epist.order(x, idm,
## sort = FALSE,
## sign = FALSE))
## }
|
4068375e |
|
50a94207 |
## if(!is.null(noIntGenes)) {
## mg <- max(geneModule[, "GeneNumID"])
## gnum <- seq_along(noIntGenes) + mg
## if(!is.null(names(noIntGenes))) {
## ng <- names(noIntGenes)
## if(any(ng %in% geneModule[, "Gene"] ))
## stop("A gene in noIntGenes also present in the other terms")
## } else {
## ng <- gnum
## }
## geneNoInt <- data.frame(Gene = as.character(ng),
## GeneNumID = gnum,
## s = noIntGenes,
## stringsAsFactors = FALSE)
## } else {
## geneNoInt <- data.frame()
## }
## if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
## nrow(geneNoInt)) == 0)
## stop("You have specified nothing!")
## if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
## graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
## } else {
## graphE <- NULL
## }
## if(!is.null(drvNames)) {
## drv <- getGeneIDNum(geneModule, geneNoInt, drvNames)
## } else {
## drv <- geneModule$GeneNumID[-1]
## }
## if(!keepInput) {
## rT <- epistasis <- orderEffects <- noIntGenes <- NULL
## }
## out <- list(long.rt = long.rt,
## long.epistasis = long.epistasis,
## long.orderEffects = long.orderEffects,
## long.geneNoInt = geneNoInt,
## geneModule = geneModule,
## gMOneToOne = gMOneToOne,
## geneToModule = geneToModule,
## graph = graphE,
## drv = drv,
## rT = rT,
## epistasis = epistasis,
## orderEffects = orderEffects,
|
4068375e |
## noIntGenes = noIntGenes
|
50a94207 |
## )
## class(out) <- c("fitnessEffects")
## return(out)
## }
|
e6e95f54 |
## No longer used
## rtAndGeneModule <- function(mdeps, gM = NULL) {
## ## To show a table of restrictions when there are modules. Do not use
## ## for anything else. Maybe as intermediate to plotting.
|
4068375e |
|
e6e95f54 |
## ## Specify restriction table of modules and a mapping of modules to
## ## genes. gM is a named vector; names are modules, values are elements
## ## of each module.
## ## We do nothing important if gM is NULL except checks
## ## If there are modules, the table shows the individual genes.
## checkRT(mdeps)
## ## if(ncol(mdeps) != 5)
## ## stop("mdeps must be of exactly 5 columns")
## ## if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep")))
## ## stop(paste("Column names of mdeps not of appropriate format. ",
## ## "Should be parent, child, s, sh, typeDep"))
## if(!is.null(gM)) {
## if(any(is.na(match(mdeps[ , 1], names(gM)))))
## stop("Some values in parent not from a known module")
## if(any(is.na(match(mdeps[ , 2], names(gM)))))
## stop("Some values in child not from a known module")
## if(any(is.na(match(names(gM), c(mdeps[, 1], mdeps[, 2])))))
## stop("Some values in module in neither parent or child")
|
4068375e |
|
e6e95f54 |
## parent <- gM[mdeps[, 1]]
## child <- gM[mdeps[, 2]]
## df <- data.frame(parent = parent,
## child = child,
## s = mdeps$s,
## sh = mdeps$sh,
## typeDep = mdeps$typeDep,
## stringsAsFactors = FALSE)
## } else {
## df <- mdeps
## }
## rownames(df) <- seq.int(nrow(df))
## return(df)
## }
|
e693f153 |
## wrap.test.rt <- function(rt, gM = NULL) {
## ## FIXME add epistasis and orderEffects
## lrt <- allFitnessEffects(rt, geneToModule = gM)
## ## wrap_test_rt(lrt$long.rt)
## wrap_test_rt(lrt$long.rt, lrt$geneModule)
## }
|
e6e95f54 |
## No longer used
## wrap.readFitnessEffects <- function(rt, epi, oe, ni, gm, echo = TRUE) {
## tt <- allFitnessEffects(rt, epi, oe, ni, gm)
## readFitnessEffects(tt, echo = echo)
## ## readFitnessEffects(tt$long.rt,
## ## tt$long.epistasis,
## ## tt$long.orderEffects,
## ## tt$long.geneNoInt,
## ## tt$geneModule,
## ## tt$gMOneToOne,
## ## echo = TRUE)
## }
|
e693f153 |
|
50a94207 |
evalGenotypeORMut <- function(genotype,
fmEffects,
|
4068375e |
spPopSizes = spPopSizes,
|
50a94207 |
verbose = FALSE,
echo = FALSE,
model = "",
|
4068375e |
calledBy_= NULL,
currentTime = currentTime) {
|
50a94207 |
## genotype can be a vector of integers, that are the exact same in
## the table of fmEffects or a vector of strings, or a vector (a
## string) with genes separated by "," or ">"
if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") ))
stop("How did you call this function?. Bug.")
|
ab439943 |
## fmEffects could be a mutator effect
if(!exists("fitnessLandscape_gene_id", where = fmEffects)) {
fmEffects$fitnessLandscape_df <- data.frame()
fmEffects$fitnessLandscape_gene_id <- data.frame()
}
if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
(nrow(fmEffects$fitnessLandscape_df) > 0)) {
warning("Bozic model passing a fitness landscape will not work",
|
4068375e |
" for now.")
|
ab439943 |
}
|
4068375e |
## This will avoid errors is evalRGenotype where spPopSizes = NULL
if (!fmEffects$frequencyDependentFitness) {
spPopSizes = 0
} else {
spPopSizes <- match_spPopSizes(spPopSizes, fmEffects)
}
|
50a94207 |
if(echo)
cat(paste("Genotype: ", genotype))
|
4068375e |
if(is.character(genotype)) {
|
50a94207 |
if(length(grep(">", genotype))) {
genotype <- nice.vector.eo(genotype, ">")
} else if(length(grep(",", genotype))) {
genotype <- nice.vector.eo(genotype, ",")
}
all.g.nums <- c(fmEffects$geneModule$GeneNumID,
|
ab439943 |
fmEffects$long.geneNoInt$GeneNumID,
fmEffects$fitnessLandscape_gene_id$GeneNumID)
|
50a94207 |
all.g.names <- c(fmEffects$geneModule$Gene,
|
ab439943 |
fmEffects$long.geneNoInt$Gene,
fmEffects$fitnessLandscape_gene_id$Gene)
|
4068375e |
if((length(genotype) == 1) && (genotype %in% c("", "WT") )) {
genotype <- 0
} else {
genotype <- all.g.nums[match(genotype, all.g.names)]
}
} else {
all.g.nums <- c(fmEffects$geneModule$GeneNumID,
fmEffects$long.geneNoInt$GeneNumID,
fmEffects$fitnessLandscape_gene_id$GeneNumID)
if(!all(sapply(genotype, function(x) x %in% all.g.nums))){
stop("Genotype as vector of numbers contains genes not in fitnessEffects/mutatorEffects.")
}
|
50a94207 |
}
|
4068375e |
## FIXME: About the WT: in fmEffects$geneModule$Gene we have "Root". We might
## want to refactor that and turn all those to WT. It would avoid the need for
## the if above to catch the WT or ""
if(!fmEffects$frequencyDependentFitness){
if( any(is.na(genotype)) ){
stop("Genotype contains NAs or genes not in fitnessEffects/mutatorEffects")
}
if((!length(genotype))){
stop("Genotypes must have at least one mutated gene")
}
if(any(genotype < 0)) {
stop(paste("Genotypes cannot contain negative values.",
"If you see this message, you found a bug."))
}
if(length(genotype) == 1 && genotype == 0){
## We do not handle WT for now
stop("Genotype cannot be 0. We do not handle WT on its own in non-freq-dep: its fitness is 1 by decree.")
}
if(any(genotype == 0)){
stop("Genotype cannot contain any 0.")
}
} else {
if(length(genotype) == 1 && is.na(genotype)){
stop("Genotype contains NA or a gene not in fitnessEffects/mutatorEffects")
} else if(length(genotype) == 1 && genotype == 0) {
## for WT
genotype <- vector(mode = "integer", length = 0L)
} else if(length(genotype) > 1){
if( any(is.na(genotype)) ){
stop("Genotype contains NAs or genes not in fitnessEffects/mutatorEffects")
}
if(any(genotype == 0)){
stop("Genotype cannot contain any 0 if its length > 1")
}
}
|
50a94207 |
}
|
4068375e |
|
50a94207 |
if(model %in% c("Bozic", "bozic1", "bozic2") )
prodNeg <- TRUE
else
prodNeg <- FALSE
|
4068375e |
|
50a94207 |
ff <- evalRGenotype(rG = genotype,
rFE = fmEffects,
|
4068375e |
spPop = spPopSizes,
|
50a94207 |
verbose = verbose,
prodNeg = prodNeg,
|
4068375e |
calledBy_ = calledBy_,
currentTime = currentTime)
|
50a94207 |
if(echo) {
if(calledBy_ == "evalGenotype") {
if(!prodNeg)
cat(" Fitness: ", ff, "\n")
else
cat(" Death rate: ", ff, "\n")
} else if(calledBy_ == "evalGenotypeMut") {
cat(" Mutation rate product :", ff, "\n")
}
|
4068375e |
}
|
50a94207 |
return(ff)
}
|
4068375e |
evalGenotype <- function(genotype,
fitnessEffects,
spPopSizes = NULL,
|
e693f153 |
verbose = FALSE,
echo = FALSE,
|
4068375e |
model = "",
currentTime = 0
) {
|
50a94207 |
if(inherits(fitnessEffects, "mutatorEffects"))
stop("You are trying to get the fitness of a mutator specification. ",
"You did not pass an object of class fitnessEffects.")
|
4068375e |
if (fitnessEffects$frequencyDependentFitness) {
if (is.null(spPopSizes))
stop("You have a NULL spPopSizes")
if (!(length(spPopSizes) == nrow(fitnessEffects$fitnessLandscape)))
stop("spPopSizes must be as long as number of genotypes")
}
|
50a94207 |
evalGenotypeORMut(genotype = genotype,
|
4068375e |
fmEffects = fitnessEffects,
spPopSizes = spPopSizes,
verbose = verbose,
echo = echo,
model = model ,
calledBy_= "evalGenotype",
currentTime = currentTime)
|
50a94207 |
}
evalGenotypeFitAndMut <- function(genotype,
fitnessEffects,
mutatorEffects,
|
4068375e |
spPopSizes = NULL,
|
50a94207 |
verbose = FALSE,
echo = FALSE,
|
4068375e |
model = "",
currentTime = 0
) {
|
ab439943 |
## Must deal with objects from previous, pre flfast, modifications
if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
fitnessEffects$fitnessLandscape_df <- data.frame()
fitnessEffects$fitnessLandscape_gene_id <- data.frame()
}
if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
(nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
warning("Bozic model passing a fitness landscape will not work",
" for now.")
}
|
4068375e |
if(fitnessEffects$frequencyDependentFitness) {
if (is.null(spPopSizes))
stop("You have a NULL spPopSizes")
if (!(length(spPopSizes) == nrow(fitnessEffects$fitnessLandscape)))
stop("spPopSizes must be as long as number of genotypes")
spPopSizes <- match_spPopSizes(spPopSizes, fitnessEffects)
}
# This will avoid errors is evalRGenotype where spPopSizes = NULL
if (!fitnessEffects$frequencyDependentFitness) {
spPopSizes = 0
}
|
50a94207 |
prodNeg <- FALSE
## Next is from evalGenotypeAndMut
|
e693f153 |
if(echo)
cat(paste("Genotype: ", genotype))
|
4068375e |
if(is.character(genotype)) {
|
e693f153 |
if(length(grep(">", genotype))) {
genotype <- nice.vector.eo(genotype, ">")
} else if(length(grep(",", genotype))) {
genotype <- nice.vector.eo(genotype, ",")
}
all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
|
ab439943 |
fitnessEffects$long.geneNoInt$GeneNumID,
fitnessEffects$fitnessLandscape_gene_id$GeneNumID)
|
e693f153 |
all.g.names <- c(fitnessEffects$geneModule$Gene,
|
ab439943 |
fitnessEffects$long.geneNoInt$Gene,
fitnessEffects$fitnessLandscape_gene_id$Gene)
|
e693f153 |
genotype <- all.g.nums[match(genotype, all.g.names)]
|
4068375e |
} else {
all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
fitnessEffects$long.geneNoInt$GeneNumID,
fitnessEffects$fitnessLandscape_gene_id$GeneNumID)
if(!all(sapply(genotype, function(x)x %in% all.g.nums))){
stop("Genotype as vector of numbers contains genes not in fitnessEffects/mutatorEffects.")
}
}
if(!fitnessEffects$frequencyDependentFitness){
if( any(is.na(genotype)) ){
stop("Genotype contains NAs or genes not in fitnessEffects/mutatorEffects")
}
if((!length(genotype))){
stop("Genotypes must have at least one mutated gene")
|
e693f153 |
}
if(any(genotype < 0)) {
|
4068375e |
stop(paste("Genotypes cannot contain negative values.",
|
e693f153 |
"If you see this message, you found a bug."))
}
|
4068375e |
if(length(genotype) == 1 && genotype == 0){
stop("Genotype cannot be 0.")
}
if(any(genotype == 0)){
stop("Genotype cannot contain any 0.")
}
}else{
if(length(genotype) == 1 && is.na(genotype)){
stop("Genotype contains NA or a gene not in fitnessEffects/mutatorEffects")
}else if(length(genotype) == 1 && genotype == 0){
genotype <- vector(mode = "integer", length = 0L)
}else if(length(genotype) > 1){
if( any(is.na(genotype)) ){
stop("Genotype contains NAs or genes not in fitnessEffects/mutatorEffects")
}
if(any(genotype == 0)){
stop("Genotype cannot contain any 0 if its length > 1")
}
}
}
|
50a94207 |
full2mutator_ <- matchGeneIDs(mutatorEffects,
fitnessEffects)$Reduced
|
e693f153 |
if(model %in% c("Bozic", "bozic1", "bozic2") )
prodNeg <- TRUE
else
prodNeg <- FALSE
|
4068375e |
evalRGenotypeAndMut(genotype,
fitnessEffects,
mutatorEffects,
spPopSizes,
full2mutator_,
|
50a94207 |
verbose = verbose,
|
4068375e |
prodNeg = prodNeg,
currentTime = currentTime)
|
e693f153 |
}
|
50a94207 |
## evalGenotype <- function(genotype, fitnessEffects,
## verbose = FALSE,
## echo = FALSE,
## model = "") {
## ## genotype can be a vector of integers, that are the exact same in
## ## the table of fitnessEffects or a vector of strings, or a vector (a
## ## string) with genes separated by "," or ">"
## if(echo)
## cat(paste("Genotype: ", genotype))
## if(!is.integer(genotype)) {
## if(length(grep(">", genotype))) {
## genotype <- nice.vector.eo(genotype, ">")
## } else if(length(grep(",", genotype))) {
## genotype <- nice.vector.eo(genotype, ",")
## }
## all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
## fitnessEffects$long.geneNoInt$GeneNumID)
## all.g.names <- c(fitnessEffects$geneModule$Gene,
## fitnessEffects$long.geneNoInt$Gene)
## genotype <- all.g.nums[match(genotype, all.g.names)]
## }
## if(any(is.na(genotype)))
## stop("genotype contains NAs or genes not in fitnessEffects")
## if(!length(genotype))
## stop("genotypes must have at least one mutated gene")
## if(any(genotype < 0)) {
## stop(paste("genotypes cannot contain negative values.",
## "If you see this message, you found a bug."))
## }
## if(model %in% c("Bozic", "bozic1", "bozic2") )
## prodNeg <- TRUE
## else
## prodNeg <- FALSE
## ff <- evalRGenotype(genotype, fitnessEffects, verbose, prodNeg)
## if(echo) {
## if(!prodNeg)
## cat(" Fitness: ", ff, "\n")
## else
## cat(" Death rate: ", ff, "\n")
## } ## else {
## ## return(ff)
## ## }
## return(ff)
## }
|
e693f153 |
## For multiple genotypes, lapply the matching.
## Nope, I think unneeded
## internal.convert_genotypes <- function(genotypes, gm) {
## genotypes <- lapply(lg, function(x) gm$GeneNumID[match(x, gm$Gene)])
## }
|
50a94207 |
## I am here: simplify this
|
4068375e |
## fitnesEffects from FDF, spPopSizes -> reordered spPopSizes
## Verify if named. If not, issue a warning.
## If yes, check matches genotypes and reorder
match_spPopSizes <- function(sp, fe){
if(!fe$frequencyDependentFitness) return(sp)
if(is.null(names(sp))) {
warning("spPopSizes unnamed: cannot check genotype names.")
return(sp)
}
nns <- names(sp)
nns <- sort_genes_genots(nns)
names(sp) <- nns
nnf <- fe$full_FDF_spec$Genotype_letters
if( ( length(nns) != length(nnf) ) ||
(!(identical(sort(nnf), sort(nns))) ) )
stop("Genotype names in spPopSizes differ w.r.t. fitness landscape")
return(sp[nnf])
}
## genotype names -> genotype names with names sorted
sort_genes_genots <- function(x) {
return(
unlist(
lapply(
lapply(strsplit(x, ", "), sort),
function(x) paste(x, collapse = ", "))))
## Or this:
## lapply(names(nn3), function(x) paste(sort(strsplit(x, ", ")[[1]]),
## collapse = ", "))
}
|
50a94207 |
evalAllGenotypesORMut <- function(fmEffects,
|
4068375e |
order = FALSE,
max = 256,
addwt = FALSE,
model = "",
spPopSizes = spPopSizes,
calledBy_ = "",
currentTime = currentTime) {
## minimal = FALSE) {
|
50a94207 |
if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") ))
stop("How did you call this function?. Bug.")
|
4068375e |
|
50a94207 |
if( (calledBy_ == "evalGenotype" ) &&
(!inherits(fmEffects, "fitnessEffects")))
stop("You are trying to get the fitness of a mutator specification. ",
"You did not pass an object of class fitnessEffects.")
if( (calledBy_ == "evalGenotypeMut" ) &&
(!inherits(fmEffects, "mutatorEffects")))
stop("You are trying to get the mutator effects of a fitness specification. ",
"You did not pass an object of class mutatorEffects.")
|
4068375e |
if (fmEffects$frequencyDependentFitness) {
if (is.null(spPopSizes))
stop("You have a NULL spPopSizes")
if (!(length(spPopSizes) == nrow(fmEffects$fitnessLandscape)))
stop("spPopSizes must be as long as number of genotypes")
spPopSizes <- match_spPopSizes(spPopSizes, fmEffects)
}
# This will avoid errors is evalRGenotype where spPopSizes = NULL
if (!fmEffects$frequencyDependentFitness) {
spPopSizes = 0
}
|
50a94207 |
## if(!minimal)
|
4068375e |
|
50a94207 |
## else
## allg <- generateAllGenotypes_minimal(fitnessEffects = fmEffects,
## max = max)
## if(order)
## tot <- function(n) {sum(sapply(seq.int(n),
## function(x) choose(n, x) * factorial(x)))}
## else
## tot <- function(n) {2^n}
## nn <- nrow(fitnessEffects$geneModule) -1 + nrow(fitnessEffects$long.geneNoInt)
## tnn <- tot(nn)
## if(tnn > max) {
## m <- paste("There are", tnn, "genotypes.")
## m <- paste(m, "This is larger than max.")
## m <- paste(m, "Adjust max and rerun if you want")
## stop(m)
## }
## ## With mutator, the ids of genes need not go from 1:n
## vid <- allNamedGenes(fitnessEffects)$GeneNumID
## if(order) {
## f1 <- function(n) {
## lapply(seq.int(n), function(x) permutations(n = n, r = x, v = vid))
## }
## } else {
## f1 <- function(n) {
## lapply(seq.int(n), function(x) combinations(n = n, r = x, v = vid))}
|
4068375e |
|
50a94207 |
## }
## genotNums <- f1(nn)
## list.of.vectors <- function(y) {
## ## there's got to be a simpler way
## lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE),
## function(m) m[[1]])
## }
## genotNums <- list.of.vectors(genotNums)
## names <- c(fitnessEffects$geneModule$Gene[-1],
## fitnessEffects$long.geneNoInt$Gene)
## genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
## function(z)
## paste(z,
## collapse = if(order){" > "} else {", "} )))
## This ain't the best, as we repeatedly read and convert
## fitnessEffects. If this were slow, prepare C++ function that takes
## vectors of genotypes and uses same fitnessEffects.
if(model %in% c("Bozic", "bozic1", "bozic2") )
prodNeg <- TRUE
else
prodNeg <- FALSE
|
4068375e |
## For non-FDF we do not evaluate WT; we rbind it as a 1, by
## decree. For FDF WT is evaluated on its own with evalWT below, and
## rbinded to the rest.
allg <- generateAllGenotypes(fitnessEffects = fmEffects,
order = order,
max = max)
|
50a94207 |
allf <- vapply(allg$genotNums,
|
4068375e |
function(x) evalRGenotype(x,
fmEffects,
spPopSizes,
FALSE,
|
50a94207 |
prodNeg,
|
4068375e |
calledBy_,
currentTime),
|
50a94207 |
1.1)
|
4068375e |
if (fmEffects$frequencyDependentFitness){
evalWT <- evalRGenotype(vector(mode = "integer", length = 0L),
fmEffects, spPopSizes, FALSE, prodNeg, calledBy_, currentTime)
allf <- c(evalWT, allf)
genotypes <- c("WT", allg$genotNames)
} else {
genotypes <- allg$genotNames
}
df <- data.frame(Genotype = genotypes,
Fitness = allf,
|
50a94207 |
stringsAsFactors = FALSE)
## Why am I doing this? I am not computing the genotype. I test the
## evaluation of the empty genotype in the tests. But its evaluation
## generates warnings that are not needed. And by decree (even in the
## C++) this thing has a fitness of 1, a mutator effect of 1 since
## there are no terms.
|
4068375e |
if(addwt & !fmEffects$frequencyDependentFitness)
|
50a94207 |
df <- rbind(data.frame(Genotype = "WT", Fitness = 1,
stringsAsFactors = FALSE), df)
|
4068375e |
if(calledBy_ == "evalGenotype") {
|
50a94207 |
if(prodNeg)
colnames(df)[match("Fitness", colnames(df))] <- "Death_rate"
class(df) <- c("evalAllGenotypes", class(df))
} else if (calledBy_ == "evalGenotypeMut") {
colnames(df)[match("Fitness", colnames(df))] <- "MutatorFactor"
class(df) <- c("evalAllGenotypesMut", class(df))
}
|
4068375e |
|
50a94207 |
return(df)
}
|
e693f153 |
|
4068375e |
evalAllGenotypes <- function(fitnessEffects,
order = FALSE,
max = 256,
|
e693f153 |
addwt = FALSE,
|
4068375e |
model = "",
spPopSizes = NULL,
currentTime = 0) {
|
ab439943 |
## Must deal with objects from previous, pre flfast, modifications
if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
fitnessEffects$fitnessLandscape_df <- data.frame()
fitnessEffects$fitnessLandscape_gene_id <- data.frame()
}
if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
(nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
warning("Bozic model passing a fitness landscape will not work",
" for now.")
}
|
50a94207 |
evalAllGenotypesORMut(
fmEffects = fitnessEffects,
order = order,
max = max,
addwt = addwt,
model = model,
|
4068375e |
spPopSizes = spPopSizes,
calledBy_= "evalGenotype",
currentTime = currentTime)
|
50a94207 |
}
generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) {
|
e693f153 |
if(order)
tot <- function(n) {sum(sapply(seq.int(n),
function(x) choose(n, x) * factorial(x)))}
else
tot <- function(n) {2^n}
|
4068375e |
|
ab439943 |
nn <- nrow(fitnessEffects$geneModule) -1 +
nrow(fitnessEffects$long.geneNoInt) +
nrow(fitnessEffects$fitnessLandscape_gene_id)
|
4068375e |
|
e693f153 |
tnn <- tot(nn)
if(tnn > max) {
|
e6e95f54 |
m <- paste("There are", tnn, "genotypes.")
m <- paste(m, "This is larger than max.")
|
e693f153 |
m <- paste(m, "Adjust max and rerun if you want")
stop(m)
}
|
50a94207 |
## With mutator, the ids of genes need not go from 1:n
vid <- allNamedGenes(fitnessEffects)$GeneNumID
|
e693f153 |
if(order) {
f1 <- function(n) {
|
50a94207 |
lapply(seq.int(n), function(x) permutations(n = n, r = x, v = vid))
|
e693f153 |
}
} else {
f1 <- function(n) {
|
50a94207 |
lapply(seq.int(n), function(x) combinations(n = n, r = x, v = vid))}
|
4068375e |
|
e693f153 |
}
genotNums <- f1(nn)
list.of.vectors <- function(y) {
## there's got to be a simpler way
lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE),
function(m) m[[1]])
}
genotNums <- list.of.vectors(genotNums)
names <- c(fitnessEffects$geneModule$Gene[-1],
|
ab439943 |
fitnessEffects$long.geneNoInt$Gene,
fitnessEffects$fitnessLandscape_gene_id$Gene)
|
4068375e |
|
e693f153 |
genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
|
50a94207 |
function(z)
paste(z,
collapse = if(order){" > "} else {", "} )))
|
e693f153 |
## This ain't the best, as we repeatedly read and convert
## fitnessEffects. If this were slow, prepare C++ function that takes
## vectors of genotypes and uses same fitnessEffects.
|
50a94207 |
return(list(genotNums = genotNums,
genotNames = genotNames
))
}
evalAllGenotypesFitAndMut <- function(fitnessEffects, mutatorEffects,
|
13b90b40 |
order = FALSE, max = 256,
|
50a94207 |
addwt = FALSE,
|
4068375e |
model = "",
spPopSizes = NULL,
currentTime = 0){
|
50a94207 |
## minimal = FALSE) {
|
4068375e |
|
50a94207 |
if(model %in% c("Bozic", "bozic1", "bozic2") ) {
|
e693f153 |
prodNeg <- TRUE
|
50a94207 |
} else {
|
e693f153 |
prodNeg <- FALSE
|
50a94207 |
}
|
ab439943 |
## Must deal with objects from previous, pre flfast, modifications
if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) {
fitnessEffects$fitnessLandscape_df <- data.frame()
fitnessEffects$fitnessLandscape_gene_id <- data.frame()
}
if( (model %in% c("Bozic", "bozic1", "bozic2")) &&
(nrow(fitnessEffects$fitnessLandscape_df) > 0)) {
warning("Bozic model passing a fitness landscape will not work",
" for now.")
|
4068375e |
}
if(fitnessEffects$frequencyDependentFitness) {
if (is.null(spPopSizes))
stop("You have a NULL spPopSizes")
if (!(length(spPopSizes) == nrow(fitnessEffects$fitnessLandscape)))
stop("spPopSizes must be as long as number of genotypes")
spPopSizes <- match_spPopSizes(spPopSizes, fitnessEffects)
}
# This will avoid errors is evalRGenotype where spPopSizes = NULL
if (!fitnessEffects$frequencyDependentFitness) {
spPopSizes = 0
}
## if(!minimal)
allg <- generateAllGenotypes(fitnessEffects = fitnessEffects,
order = order, max = max)
## else
## allg <- generateAllGenotypes_minimal(fitnessEffects = fitnessEffects,
## max = max)
|
50a94207 |
full2mutator_ <- matchGeneIDs(mutatorEffects,
fitnessEffects)$Reduced
allf <- t(vapply(allg$genotNums,
function(x) evalRGenotypeAndMut(x,
rFE = fitnessEffects,
muEF= mutatorEffects,
|
4068375e |
spPop = spPopSizes,
|
50a94207 |
full2mutator_ = full2mutator_,
verbose = FALSE,
|
4068375e |
prodNeg = prodNeg,
currentTime = currentTime),
|
50a94207 |
c(1.1, 2.2)))
|
4068375e |
if(fitnessEffects$frequencyDependentFitness){
evalWT <- evalRGenotypeAndMut(vector(mode = "integer", length = 0L),
rFE = fitnessEffects,
muEF = mutatorEffects,
spPop = spPopSizes,
full2mutator_ = full2mutator_,
verbose = FALSE,
prodNeg = prodNeg,
currentTime = currentTime)
allf <- rbind(evalWT, allf)
genotypes <- c("WT", allg$genotNames)
}else{
genotypes <- allg$genotNames
}
df <- data.frame(Genotype = genotypes,
Fitness = allf[, 1],
|
50a94207 |
MutatorFactor = allf[, 2],
|
e693f153 |
stringsAsFactors = FALSE)
|
4068375e |
if(fitnessEffects$frequencyDependentFitness == FALSE && addwt)
|
debcf5f5 |
df <- rbind(data.frame(Genotype = "WT", Fitness = 1,
|
50a94207 |
MutatorFactor = 1,
|
e693f153 |
stringsAsFactors = FALSE), df)
if(prodNeg)
colnames(df)[match("Fitness", colnames(df))] <- "Death_rate"
|
50a94207 |
class(df) <- c("evalAllGenotypesFitAndMut", class(df))
|
e693f153 |
return(df)
}
|
50a94207 |
## evalAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256,
## addwt = FALSE,
## model = "") {
|
4068375e |
|
50a94207 |
## if(order)
## tot <- function(n) {sum(sapply(seq.int(n),
## function(x) choose(n, x) * factorial(x)))}
## else
## tot <- function(n) {2^n}
## nn <- nrow(fitnessEffects$geneModule) -1 + nrow(fitnessEffects$long.geneNoInt)
## tnn <- tot(nn)
## if(tnn > max) {
## m <- paste("There are", tnn, "genotypes.")
## m <- paste(m, "This is larger than max.")
## m <- paste(m, "Adjust max and rerun if you want")
## stop(m)
## }
## if(order) {
## f1 <- function(n) {
## lapply(seq.int(n), function(x) permutations(n = n, r = x))
## }
## } else {
## f1 <- function(n) {
## lapply(seq.int(n), function(x) combinations(n = n, r = x))}
|
4068375e |
|
50a94207 |
## }
## genotNums <- f1(nn)
## list.of.vectors <- function(y) {
## ## there's got to be a simpler way
## lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE),
## function(m) m[[1]])
## }
## genotNums <- list.of.vectors(genotNums)
## names <- c(fitnessEffects$geneModule$Gene[-1],
## fitnessEffects$long.geneNoInt$Gene)
## genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
## function(z)
## paste(z,
## collapse = if(order){" > "} else {", "} )))
## ## This ain't the best, as we repeatedly read and convert
## ## fitnessEffects. If this were slow, prepare C++ function that takes
## ## vectors of genotypes and uses same fitnessEffects.
## if(model %in% c("Bozic", "bozic1", "bozic2") )
## prodNeg <- TRUE
## else
## prodNeg <- FALSE
## allf <- vapply(genotNums,
## function(x) evalRGenotype(x, fitnessEffects, FALSE, prodNeg),
## 1.1)
## df <- data.frame(Genotype = genotNames, Fitness = allf,
## stringsAsFactors = FALSE)
## if(addwt)
## df <- rbind(data.frame(Genotype = "WT", Fitness = 1,
## stringsAsFactors = FALSE), df)
## if(prodNeg)
## colnames(df)[match("Fitness", colnames(df))] <- "Death_rate"
## return(df)
## }
|
e693f153 |
fitnessEffectsToIgraph <- function(rT, epistasis, orderEffects) {
df0 <- df1 <- df2 <- data.frame()
|
4068375e |
|
e693f153 |
if(!is.null(rT)) {
df0 <- rT[, c("parent", "child", "typeDep")]
}
if(!is.null(epistasis)) {
df1 <- to.pairs.modules(epistasis, ":")
}
if(!is.null(orderEffects)) {
df2 <- to.pairs.modules(orderEffects, ">")
}
df <- rbind(df0, df1, df2)
|
1b1e5e0c |
## for special case of simple epi effects
if(nrow(df) == 0) return(NULL)
|
4068375e |
|
e693f153 |
g1 <- graph.data.frame(df)
|
4068375e |
|
e693f153 |
E(g1)$color <- "black"
E(g1)$color[E(g1)$typeDep == "SM"] <- "blue"
E(g1)$color[E(g1)$typeDep == "XMPN"] <- "red"
E(g1)$color[E(g1)$typeDep == "epistasis"] <- "orange"
E(g1)$color[E(g1)$typeDep == "orderEffect"] <- "orange"
E(g1)$lty <- 1
E(g1)$lty[E(g1)$typeDep == "epistasis"] <- 2
E(g1)$lty[E(g1)$typeDep == "orderEffect"] <- 3
E(g1)$arrow.mode <- 2
E(g1)$arrow.mode[E(g1)$typeDep == "epistasis"] <- 0
E(g1)$arrow.mode[E(g1)$typeDep == "orderEffect"] <- 2
return(g1)
}
plot.fitnessEffects <- function(x, type = "graphNEL",
|
f79cc8aa |
layout = NULL,
expandModules = FALSE,
autofit = FALSE,
|
5d9775a3 |
scale_char = ifelse(type == "graphNEL", 1/10, 5),
|
f79cc8aa |
return_g = FALSE,
|
debcf5f5 |
lwdf = 1, ## multiplier for lwd for graphNEL
|
f79cc8aa |
...) {
|
e693f153 |
## some other layouts I find OK
## layout.circle
## layout.reingold.tilford if really a tree
## o.w. it will use the default
g <- x$graph
|
ab439943 |
if(is.null(g))
stop("This fitnessEffects object can not be ploted this way.",
" It is probably one with fitness landscape specification, ",
" so you might want to plot the fitness landscape instead.")
|
e693f153 |
if(type == "igraph") {
if(expandModules && (!x$gMOneToOne)) {
## vlabels <- fe$geneToModule[vertex.attributes(g)$name]
vlabels <- x$geneToModule[V(g)$name]
V(g)$label <- vlabels
}
|
f79cc8aa |
if(autofit) {
plot(0, type = "n", axes = FALSE, ann = FALSE)
## ideas from http://stackoverflow.com/questions/14472079/match-vertex-size-to-label-size-in-igraph
## vsize <- (strwidth(V(g)$label) + strwidth("oo")) * 200
## but this is a kludge.
|
5d9775a3 |
vsize <- (nchar(V(g)$label) + 1) * scale_char
|
f79cc8aa |
plot.igraph(g, vertex.size = vsize, vertex.shape = "rectangle",
layout = layout)
} else {
plot.igraph(g, layout = layout)
}
if(return_g) return(g)
|
e693f153 |
}
else if (type == "graphNEL") {
g1 <- igraph.to.graphNEL(g)
c1 <- unlist(lapply(edgeData(g1), function(x) x$color))
names(c1) <- sub("|", "~", names(c1), fixed = TRUE)
s1 <- unlist(lapply(edgeData(g1), function(x) x$lty))
names(s1) <- names(c1)
a1 <- unlist(lapply(edgeData(g1), function(x) max(x$arrow.mode - 1, 0)))
names(a1) <- names(c1)
lwd <- s1
lwd[lwd == 2] <- 2 ## o.w. too thin
lwd[lwd == 3] <- 2 ## o.w. too thin
|
debcf5f5 |
lwd <- lwd * lwdf
|
e693f153 |
nAttrs <- list()
if(expandModules && (!x$gMOneToOne)) {
nnodes <- x$geneToModule[nodes(g1)]
names(nnodes) <- nodes(g1)
nAttrs$label <- nnodes
}
|
f79cc8aa |
if(autofit) {
|
5d9775a3 |
nAttrs$width <- (nchar(nAttrs$label) + 1) * scale_char
|
f79cc8aa |
names(nAttrs$width) <- names(nAttrs$label)
plot(g1, edgeAttrs = list(arrowsize = a1, lty = s1, lwd = lwd,
color = c1), attrs=list(node=list(shape = "rectangle")),
nodeAttrs = nAttrs)
|
4068375e |
|
f79cc8aa |
} else {
plot(g1, edgeAttrs = list(arrowsize = a1, lty = s1, lwd = lwd,
color = c1),
nodeAttrs = nAttrs)
}
if(return_g) return(g1)
|
e693f153 |
} else {
stop("plot type not recognized")
}
}
## plot.fitnessEffects <- plotFitnessEffects
## FIXME: in the help: say we cannot properly show 3- or higher order
## interactions.
|
4068375e |
nr_oncoSimul.internal <- function(rFE,
birth,
|
e693f153 |
death,
mu,
initSize,
sampleEvery,
detectionSize,
finalTime,
initSize_species,
initSize_iter,
seed,
verbosity,
speciesFS,
ratioForce,
typeFitness,
max.memory,
|
debcf5f5 |
mutationPropGrowth,
|
e693f153 |
initMutant,
max.wall.time,
keepEvery,
K,
detectionDrivers,
onlyCancer,
errorHitWallTime,
max.num.tries,
errorHitMaxTries,
minDetectDrvCloneSz,
|
5d9775a3 |
extraTime,
|
50a94207 |
keepPhylog,
|
4d70b942 |
detectionProb,
|
c95df82d |
AND_DrvProbExit,
MMUEF = NULL, ## avoid partial matching, and set default
fixation = NULL ## avoid partial matching
|
50a94207 |
) {
|
e719a81d |
default_min_successive_fixation <- 50 ## yes, set at this for now
|
4068375e |
|
e693f153 |
if(!inherits(rFE, "fitnessEffects"))
stop(paste("rFE must be an object of class fitnessEffects",
"as created, for instance, with function",
"allFitnessEffects"))
|
50a94207 |
if(countGenesFe(rFE) < 2) {
stop("There must be at least two genes (loci) in the fitness effects.",
"If you only care about a degenerate case with just one,",
"you can enter a second gene (locus)",
"with fitness effect of zero.")
}
|
4068375e |
|
50a94207 |
if( (length(mu) == 1) && !(is.null(names(mu)))) {
stop("A length 1 mutation, but named. ",
"This is ambiguous. ",
"If you want per-gene mutation rates, each gene",
"must have its entry in the mu vector. ",
"(And regardless of per-gene mutation rates ",
" there must be at least two gene/loci).")
}
|
ab439943 |
## Must deal with objects from previous, pre flfast, modifications
## Could move this way down to the bottom, right before
## .Call
if(!exists("fitnessLandscape_gene_id", where = rFE)) {
rFE$fitnessLandscape_df <- data.frame()
rFE$fitnessLandscape_gene_id <- data.frame()
}
|
4068375e |
|
50a94207 |
namedGenes <- allNamedGenes(rFE)
if( length(mu) > 1) {
if(is.null(names(mu)))
stop("When using per-gene mutation rates the ",
"mu vector must be named ",
"(and if you have noIntGenes, those must have names).")
if(length(mu) != countGenesFe(rFE))
stop("When using per-gene mutation rates, ",
"there must be the same number of genes in the ",
"mu vector and the fitness effects object.")
if(!identical(sort(namedGenes$Gene),
sort(names(mu))))
stop("When using per-gene mutation rates, ",
"names of genes must match those in the ",
"restriction table.")
mu <- mu[order(match(names(mu), namedGenes$Gene))]
## Hyperparanoid check. Should never, ever, happen.
if(!identical(names(mu), namedGenes$Gene))
stop("Names of re-ordered mu do not match names of genes")
minmu <- 1e-40
if(any(mu <= minmu))
stop(paste("At least one per-gene mutation rate is negative",
"or less than", minmu,". Remember that the per-base",
"mutation rate in the human genome is about 1e-11 to 1e-9."))
}
|
e693f153 |
if(!is.null(initMutant)) {
|
4068375e |
if(!all(unlist(lapply(initMutant, is.character))))
stop("initMutant must be a (list of) character string(s)") ## FIXME: remove in the future
|
ab439943 |
initMutantString <- initMutant
|
4068375e |
if(length(grep(">", initMutant))) {
initMutant <- lapply(initMutant, function(x) nice.vector.eo(x, ">"))
|
1d9018c0 |
} else if(length(grep(",", initMutant))) {
|
4068375e |
initMutant <- lapply(initMutant, function(x) nice.vector.eo(x, ","))
|
1d9018c0 |
}
|
4068375e |
initMutant <- lapply(initMutant,
function(x) {
if( (length(x) == 1) && ((x == "") || (x == "WT")))
return(vector(mode = "integer", length = 0))
else
return(getGeneIDNum(rFE$geneModule,
rFE$long.geneNoInt,
rFE$fitnessLandscape_gene_id,
x,
FALSE))
}
)
if(max(unlist(lapply(initMutant, length))) > countGenesFe(rFE)) {
## if(length(initMutant) > countGenesFe(rFE)) {
stop("For initMutant you passed more genes mutated ",
|
50a94207 |
"than the number of genes in the genotype (fitness effects).")
|
4068375e |
}
|
e693f153 |
} else {
initMutant <- vector(mode = "integer")
|
ab439943 |
initMutantString <- ""
|
e693f153 |
}
|
4068375e |
if(any(initSize <= 0)) stop("At least one initSize <= 0")
if(length(initMutant)) {
if(length(initSize) != length(initMutant))
stop("Lengths of initSize and initMutant differ")
}
|
e6e95f54 |
## these are never user options
## if(initSize_species < 10) {
## warning("initSize_species too small?")
## }
## if(initSize_iter < 100) {
## warning("initSize_iter too small?")
## }
|
e693f153 |
if(typeFitness %in% c("bozic1", "bozic2")) {
|
ab439943 |
if(nrow(rFE$fitnessLandscape_df) > 0)
|
8d0896d9 |
warning("Bozic model passing a fitness landscape most likely will not work",
|
ab439943 |
" for now.")
## FIXME: bozic and fitness landscape
## the issue is that in the C++ code we directly do
## s = birth rate - 1
## but we would need something different
## Can be done going through epistasis, etc.
|
e693f153 |
thesh <- unlist(lapply(rFE$long.rt, function(x) x$sh))
|
e6e95f54 |
## thes <- unlist(lapply(rFE$long.rt, function(x) x$s))
thes <- unlist(c(lapply(rFE$long.rt, function(x) x$s),
lapply(rFE$long.epistasis, function(x) x$s),
lapply(rFE$long.orderEffects, function(x) x$s),
rFE$long.geneNoInt$s))
|
4068375e |
|
e693f153 |
if(any(thes > 1 )) {
m <- paste("You are using a Bozic model with",
"the new restriction specification, and you have",
"at least one s > 1.",
"But that is the same as setting that to 1:",
"we obviously cannot allow negative death rates,",
"nor problems derived from multiplying odd or even",
|
e6e95f54 |
"numbers of negative numbers.",
"You can set those > 1 to 1, but then you will",
"eventually get the simulations to abort when the",
"death rate becomes 0.")
|
e693f153 |
stop(m)
}
|
e6e95f54 |
if(any( (thesh <= -1) & (thesh > -Inf))) {
|
e693f153 |
m <- paste("You are using a Bozic model with",
"the new restriction specification, and you have",
|
e6e95f54 |
"at least one sh <= -1. Maybe you mean -Inf?")
|
e693f153 |
warning(m)
}
if(any(thes == 1 )) {
m <- paste("You are using a Bozic model with",
"the new restriction specification, and you have",
"at least one s of 1. If that gene is mutated, ",
"this will lead to a death rate of 0",
"and the simulations will abort when you get a non finite",
"value.")
warning(m)
}
}
|
50a94207 |
if(!is.null(MMUEF)) {
if(!inherits(MMUEF, "mutatorEffects"))
stop("muEF must be a mutatorEffects object")
full2mutator_ <- matchGeneIDs(MMUEF, rFE)
if(any(is.na(full2mutator_$Full)))
stop("Genes in mutatorEffects not present in fitnessEffects")
if(any(is.na(full2mutator_)))
stop("full2mutator failed for unknown reasons")
full2mutator_ <- full2mutator_$Reduced
} else {
full2mutator_ <- vector(mode = "numeric", length = 0)
## muEF <- emptyFitnessEffects()
}
|
4d70b942 |
|
4068375e |
dpr <- detectionProbCheckParse(detectionProb, sum(initSize), verbosity)
|
4d70b942 |
## if( !is.null(cPDetect) && (sum(!is.null(p2), !is.null(n2)) >= 1 ))
## stop("Specify only cPDetect xor both of p2 and n2")
## if( (is.null(p2) + is.null(n2)) == 1 )
## stop("If you pass one of n2 or p2, you must also pass the other. ",
## "Otherwise, we would not know what to do.")
## stopifnot(PDBaseline >= 0)
## stopifnot(n2 > PDBaseline)
## stopifnot(p2 < 1)
## stopifnot(p2 > 0)
## if( is.null(cPDetect) ) cPDetect <- -9
## if( is.null(p2)) p2 <- 9
## if( is.null(n2)) n2 <- -9
|
e693f153 |
## call <- match.call()
|
c95df82d |
## Process the fixed list, if any
if(!is_null_na(fixation)) {
ng <- namedGenes
|
e719a81d |
## rownames(ng) <- namedGenes[, "Gene"]
## Proposed extension to have exact matching of genotypes
ng <- rbind(
data.frame(Gene = "_", GeneNumID = -9, stringsAsFactors = FALSE),
ng)
rownames(ng) <- ng[, "Gene"]
## FIXME
## Later, accept a last argument, called tolerance.
## If not present, set to 0
## and then, at at the head of fixation_list below
if(is.list(fixation)) {
if(
(is.null(fixation[["fixation_tolerance"]])) ||
(is.na(fixation[["fixation_tolerance"]]))) {
fixation_tolerance <- 0
} else {
fixation_tolerance <- as.numeric(fixation[["fixation_tolerance"]])
fixation <- fixation[-which(names(fixation) == "fixation_tolerance")]
}
if(
(is.null(fixation[["min_successive_fixation"]])) ||
(is.na(fixation[["min_successive_fixation"]]))) {
min_successive_fixation <- default_min_successive_fixation
} else {
min_successive_fixation <- as.integer(fixation[["min_successive_fixation"]])
fixation <- fixation[-which(names(fixation) == "min_successive_fixation")]
}
if(
(is.null(fixation[["fixation_min_size"]])) ||
(is.na(fixation[["fixation_min_size"]]))) {
fixation_min_size <- 0
} else {
fixation_min_size <- as.integer(fixation[["fixation_min_size"]])
fixation <- fixation[-which(names(fixation) == "fixation_min_size")]
}
|
4068375e |
|
e719a81d |
} else {
if(is_null_na(fixation["fixation_tolerance"])) {
fixation_tolerance <- 0
} else {
fixation_tolerance <- as.numeric(fixation["fixation_tolerance"])
fixation <- fixation[-which(names(fixation) == "fixation_tolerance")]
}
if(is_null_na(fixation["min_successive_fixation"])) {
min_successive_fixation <- default_min_successive_fixation
} else {
min_successive_fixation <- as.integer(fixation["min_successive_fixation"])
fixation <- fixation[-which(names(fixation) == "min_successive_fixation")]
}
if(is_null_na(fixation["fixation_min_size"])) {
fixation_min_size <- 0
} else {
fixation_min_size <- as.integer(fixation["fixation_min_size"])
fixation <- fixation[-which(names(fixation) == "fixation_min_size")]
}
}
if( (fixation_tolerance > 1) || (fixation_tolerance < 0) )
stop("Impossible range for fixation tolerance")
if( (min_successive_fixation < 0) )
stop("Impossible range for min_successive_fixation")
if( (fixation_min_size < 0) )
stop("Impossible range for fixation_min_size")
|
c95df82d |
## Usual genotype specification and might allow ordered vectors
## in the future
fixation_b <- lapply(fixation, nice.vector.eo, sep = ",")
ulf <- unlist(fixation_b)
if(any(ulf == ">"))
stop("Order effects not allowed (yet?) in fixation.")
ulfg <- ng[ulf, 1]
if(any(is.na(ulfg)))
stop(paste("The 'fixation' list contains genes that are not present",
" in the fitness effects."))
## Sorting here is crucial!!
fixation_list <- lapply(fixation_b, function(x) sort(ng[x, 2]))
|
e719a81d |
fixation_list <- list(fixation_list = fixation_list,
fixation_tolerance = fixation_tolerance,
min_successive_fixation = min_successive_fixation,
fixation_min_size = fixation_min_size)
|
c95df82d |
} else {
fixation_list <- list()
}
|
e693f153 |
return(c(
nr_BNB_Algo5(rFE = rFE,
|
4d70b942 |
mu_ = mu,
death = death,
|
4068375e |
initSize_ = initSize,
|
4d70b942 |
sampleEvery = sampleEvery,
detectionSize = detectionSize,
finalTime = finalTime,
initSp = initSize_species,
initIt = initSize_iter,
seed = seed,
verbosity = verbosity,
speciesFS = speciesFS,
ratioForce = ratioForce,
typeFitness_ = typeFitness,
maxram = max.memory,
mutationPropGrowth = as.integer(mutationPropGrowth),
|
4068375e |
initMutant_ = initMutant,
|
4d70b942 |
maxWallTime = max.wall.time,
keepEvery = keepEvery,
K = K,
detectionDrivers = detectionDrivers,
onlyCancer = onlyCancer,
errorHitWallTime = errorHitWallTime,
maxNumTries = max.num.tries,
errorHitMaxTries = errorHitMaxTries,
minDetectDrvCloneSz = minDetectDrvCloneSz,
extraTime = extraTime,
keepPhylog = keepPhylog,
MMUEF = MMUEF,
full2mutator_ = full2mutator_,
n2 = dpr["n2"],
p2 = dpr["p2"],
PDBaseline = dpr["PDBaseline"],
cPDetect_i= dpr["cPDetect"],
|
c95df82d |
checkSizePEvery = dpr["checkSizePEvery"],
AND_DrvProbExit = AND_DrvProbExit,
fixation_list = fixation_list),
|
e693f153 |
Drivers = list(rFE$drv), ## but when doing pops, these will be repeated
|
ab439943 |
geneNames = list(names(getNamesID(rFE))),
InitMutant = initMutantString
|
e693f153 |
))
}
|
50a94207 |
countGenesFe <- function(fe) {
## recall geneModule has Root always
|
ab439943 |
## We want to be able to use objects that did not have
## the fitness landscape component
if(exists("fitnessLandscape_gene_id", where = fe))
return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) +
nrow(fe$fitnessLandscape_gene_id) - 1)
else
return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1)
|
50a94207 |
}
allNamedGenes <- function(fe){
## Returns a data frame with genes and their names and verifies all
## genes have names.
## Root should always be first, but just in case
## avoid assuming it
## This does is not a good idea as it assumes the user did not use
## "keepInput = FALSE".
## lni <- length(fe$noIntGenes)
## ## FIXME:test
## if((lni > 0) &&
## (is.null(names(fe$noIntGenes))))
## stop("When using per-gene mutation rates the ",
## "no interaction genes must be named ",
## "(i.e., the noIntGenes vector must have names).")
|
ab439943 |
## accommodate objects w/o fitnessLandscape
if(!is.null(fe$fitnessLandscape) && nrow(fe$fitnessLandscape)) {
gn <-
gtools::mixedsort(
colnames(fe$fitnessLandscape)[-ncol(fe$fitnessLandscape)])
v1 <- data.frame(Gene = gn, GeneNumID = seq.int(length(gn)),
stringsAsFactors = FALSE)
} else {
v1 <- fe$geneModule[, c("Gene", "GeneNumID")]
if(nrow(fe$long.geneNoInt)) {
v1 <- rbind(v1,
fe$long.geneNoInt[, c("Gene", "GeneNumID")])
}
if(any(v1[, "Gene"] == "Root"))
v1 <- v1[-which(v1[, "Gene"] == "Root"), ]
|
50a94207 |
}
rownames(v1) <- NULL
|
ab439943 |
if(any(v1$Gene == "WT")) {
warning("A gene is named WT. You can expect problems ",
"because we use WT to denote the wildtype ",
"genotype. You might want to change it.")
}
|
50a94207 |
return(v1)
}
get.gene.counts <- function(x) {
# , timeSample = "last",
# typeSample = "whole") {
## From get.mut.vector. Used for now just for testing
timeSample <- "last"
the.time <- get.the.time.for.sample(x, timeSample, NULL)
if(the.time < 0) {
counts <- rep(0, length(x$geneNames))
names(counts) <- x$geneNames
freq <- rep(NA, length(x$geneNames))
names(freq) <- x$geneNames
return(list(counts = counts,
freq = freq,
popSize = 0))
|
4068375e |
}
|
50a94207 |
pop <- x$pops.by.time[the.time, -1]
if(all(pop == 0)) {
stop("You found a bug: this should never happen")
}
## if(typeSample %in% c("wholeTumor", "whole")) {
popSize <- x$PerSampleStats[the.time, 1]
counts <- as.vector(tcrossprod(pop, x$Genotypes))
|
4068375e |
names(counts) <- x$geneNames
|
50a94207 |
return(list(counts = counts,
freq = counts/popSize,
popSize = popSize))
## return( (tcrossprod(pop,
## x$Genotypes)/popSize) )
## } else if (typeSample %in% c("singleCell", "single")) {
## return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
## } else {
## stop("Unknown typeSample option")
## }
}
|
e693f153 |
|
50a94207 |
geneCounts <- function(x) {
if(inherits(x, "oncosimulpop")) {
return( do.call(rbind,
lapply(x,
function(z)
get.gene.counts(z)$counts))
)
} else {
return( get.gene.counts(x)$counts)
}
}
|
e693f153 |
|
50a94207 |
## geneNumIDReset <- function(x, ref){
## ## Set GeneNumID of a fitnessEffect object, x, using ref as the
## ## reference fitness effect object.
## ## Check also if all in x are in ref.
|
e693f153 |
|
50a94207 |
## gg <- allNamedGenes(ref)
## gnid <- gg$GeneNumID
## names(gnid) <- gg$Gene
## ## FIXME: this later and conditional on what is in thee
## gnid <- c("Root" = 0, gnid)
|
4068375e |
|
50a94207 |
## if(!all(x$geneModule$Gene %in% names(gnid) ))
## stop("Some genes not in reference fitnessEffects (rebasing geneModule)")
## x$geneModule$GeneNumID <- gnid[geneModule$Gene]
|
4068375e |
## ## and then go over all the lists in the x object.
|
50a94207 |
## if(nrow(x$long.geneNoInt)) {
## ## now, mapping for the noInt if this is mutator
## if(!all(x$long.geneNoInt$Gene %in% names(gnid) ))
## stop("Some genes not in reference fitnessEffects (rebasing geneNoInt)")
## x$long.geneNoInt$GeneNumID <- gnid[long.geneNoInt$Gene]
## }
## }
matchGeneIDs <- function(x, refFE) {
n1 <- allNamedGenes(x)
n2 <- allNamedGenes(refFE)
colnames(n1)[2] <- "Reduced"
colnames(n2)[2] <- "Full"
## Non standard evaluation thing and a note being generated in check.
## See also http://stackoverflow.com/questions/33299845/when-writing-functions-for-an-r-package-should-i-avoid-non-standard-evaluation
## But does not work well with replace. So use the NULL trick
Reduced <- NULL
dplyr::full_join(n2, n1, by = "Gene") %>%
|
4d70b942 |
mutate(Reduced = replace(Reduced, is.na(Reduced), -9))
}
|
4068375e |
|
3bbfa52f |
detectionProbCheckParse <- function(x, initSize, verbosity) {
|
4d70b942 |
default_p2 <- 0.1
default_n2 <- 2 * initSize
|
3bbfa52f |
default_PDBaseline <- 1.2 * initSize
|
4d70b942 |
default_checkSizePEvery <- 20
## No default cPDetect. That is done from p2 and n2 in C++.
|
c95df82d |
if(is_null_na(x)) {
## if((length(x) == 1) && (is.na(x))) {
|
4d70b942 |
y <- vector()
y["cPDetect"] <- -9
y["p2"] <- 9
y["n2"] <- -9
y["PDBaseline"] <- -9
y["checkSizePEvery"] <- Inf
return(y)
} else if((length(x) == 1) && (x == "default")) {
## Populate with defaults
y <- vector()
y["p2"] <- default_p2
y["n2"] <- default_n2
y["PDBaseline"] <- default_PDBaseline
y["checkSizePEvery"] <- default_checkSizePEvery
x <- y
}
if(length(x) >= 1) {
if( !all(names(x) %in% c("cPDetect", "p2", "n2", "PDBaseline", "checkSizePEvery")))
stop("Names of some components of detectionProb are not recognized.",
" Check for possible typos.")
}
## This ain't conditional. If not given, always check
|
c95df82d |
if( !is_null_na(x["cPDetect"]) && (sum(!is_null_na(x["p2"]), !is_null_na(x["n2"])) >= 1 ))
|
4d70b942 |
stop("Specify only cPDetect xor both of p2 and n2")
|
c95df82d |
if( (is_null_na(x["p2"]) + is_null_na(x["n2"])) == 1 )
|
4d70b942 |
stop("If you pass one of n2 or p2, you must also pass the other. ",
"Otherwise, we would not know what to do.")
|
c95df82d |
if(is_null_na(x["PDBaseline"])) {
|
4d70b942 |
x["PDBaseline"] <- default_PDBaseline
|
3bbfa52f |
if(verbosity > -1)
message("\n PDBaseline set to default value of ", default_PDBaseline, "\n")
|
4d70b942 |
}
|
c95df82d |
if(is_null_na(x["checkSizePEvery"])) {
|
4d70b942 |
x["checkSizePEvery"] <- default_checkSizePEvery
|
3bbfa52f |
if(verbosity > -1)
message("\n checkSizePEvery set to default value of ",
default_checkSizePEvery, "\n")
|
4d70b942 |
}
## If we get here, we checked consistency of whether cPDetect or p2/n2.
## Fill up with defaults the missing values
|
c95df82d |
if(is_null_na(x["cPDetect"])) {
if(is_null_na(x["p2"])) {
if(!is_null_na(x["n2"])) stop("Eh? no p2 but n2? Bug")
|
4d70b942 |
x["n2"] <- default_n2
x["p2"] <- default_p2
|
3bbfa52f |
if(verbosity > -1)
message("\n n2, p2 set to default values of ",
default_n2, ", ", default_p2, "\n")
|
4d70b942 |
}
}
if(x["checkSizePEvery"] <= 0)
stop("checkSizePEvery <= 0")
|
e719a81d |
if(x["PDBaseline"] <= 0)
stop("PDBaseline <= 0")
|
4d70b942 |
|
c95df82d |
if(!is_null_na(x["n2"])) {
|
4d70b942 |
if(x["n2"] <= x["PDBaseline"])
stop("n2 <= PDBaseline")
if(x["p2"] >= 1) stop("p2 >= 1")
if(x["p2"] <= 0) stop("p2 <= 0")
x["cPDetect"] <- -9
} else { ## only if x["cPDetect"] is not NA
|
c95df82d |
if(is_null_na(x["cPDetect"])) stop("eh? you found a bug")## paranoia
|
4d70b942 |
x["n2"] <- -9
x["p2"] <- -9
|
3bbfa52f |
if(verbosity > -1)
message("\n Using user-specified cPDetect as n2, p2 not given.\n")
|
4d70b942 |
}
return(x)
|
50a94207 |
}
|
e693f153 |
|
4068375e |
|
c95df82d |
sampledGenotypes <- function(y, genes = NULL) {
## From a popSample object, or a matrix for that matter,
## show the sampled genotypes and their frequencies
if(!is.null(genes)) {
cols <- which(colnames(y) %in% genes )
y <- y[, cols]
}
|
f9a38e24 |
## nn <- colnames(y)
genotlabel <- function(u, nn = colnames(y)) {
if(any(is.na(u))) return(NA)
else {
return(paste(sort(nn[as.logical(u)]), collapse = ", "))
}
}
## df <- data.frame(table(
## apply(y, 1, function(z) paste(sort(nn[as.logical(z)]), collapse = ", ") )
## ))
|
c95df82d |
df <- data.frame(table(
|
f9a38e24 |
apply(y, 1, genotlabel),
|
e719a81d |
useNA = "ifany"),
stringsAsFactors = TRUE)
|
f9a38e24 |
|
c95df82d |
gn <- as.character(df[, 1])
gn[gn == ""] <- "WT"
df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE)
|
f9a38e24 |
attributes(df)$ShannonI <- shannonI(na.omit(df)$Freq)
|
fc38a875 |
class(df) <- c("sampledGenotypes", class(df))
|
c95df82d |
return(df)
}
|
fc38a875 |
print.sampledGenotypes <- function(x, ...) {
print.data.frame(x, ...)
cat("\n Shannon's diversity (entropy) of sampled genotypes: ")
cat(attributes(x)$ShannonI, "\n")
}
|
c95df82d |
list_g_matches_fixed <- function(x, y) {
## Internal function, for testing the fixation output.
## x and y are vectors
## x is the set of output genotypes, y the set of fixed
## genotypes/subset of genotypes.
## Yes, this function has tests too in test.fixation.R
## All genotypes in x satisfy that they are supersets of at least one
## in y? That is true if, for every element in x, at least one y in
## that x.
if(is.list(y)) y <- unlist(y)
y.nice <- lapply(y, nice.vector.eo, sep = ",")
x.nice <- lapply(x, nice.vector.eo, sep = ",")
fu <- function(u, y.nice)
any(unlist(lapply(y.nice, function(z) all(z %in% u))))
return(all(unlist(lapply(x.nice, fu, y.nice))))
}
|
4068375e |
## From genotlable above. But this is very useful
## fitnessLandscape (only genes) as columns with 0, 1 -> genotypes names as letters
genotype_letterslabel <- function(df) {
genotlabel <- function(u, nn = colnames(df)) {
if(any(is.na(u))) return(NA)
else {
return(paste(sort(nn[as.logical(u)]), collapse = ", "))
}
}
tmp <- apply(df, 1, genotlabel)
tmp[tmp == ""] <- "WT"
return(tmp)
}
|
e693f153 |
|
50a94207 |
## emptyFitnessEffects <- function() {
## list(long.rt = list(),
## long.epistasis = list(),
## long.orderEffects = list(),
## long.geneNoInt = list(),
## geneModule = list(),
## gMOneToOne = TRUE,
## geneToModule = list(),
## graph = NULL,
## drv = vector(mode = "integer", length = 0)
## )
## }
|
e693f153 |
|
50a94207 |
### Later, for all the effects, we will do some kind of dplyr match?
|
e693f153 |
|
50a94207 |
### a, b, c, in fitness, only a, c in mut.
### fitness table for a,b,c
### each row name transformed removing b (so leaving only present)
### each row transformed matched to row in mut table.
|
e693f153 |
|
50a94207 |
## t1 <- data.frame(v1 = c("a,b", "a,c", "b"), v2 = c("b", "c", "b"), v3 = 1:3, stringsAsFactors = FALSE)
## t2 <- data.frame(v2 = c("b", "c"), v4 = c(11, 12), stringsAsFactors = FALSE)
## full_join(t1, t2, by = "v2")
|
4d70b942 |
## FIXME
## new exit code
## check:
## baseline < n2
## p2 < 1
## baseline defaults to init size + .1
## use c < -9, n2 < -9, p2 < -9 for no values
## and check one of c or p2 and n2 are valid if using exit
|