## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. ## Say which are drivers: populate the drv vector. ## // In R, the user says which are the drivers. If does not say anuthing, ## // the default (NULL) then drivers are all in poset, epist, restrict. The ## // user can pass a vector with the names of the genes (not modules). Allow ## // also for empty, so this is faster if not needed. And check that if we ## // use a stopping rule based on drivers that drv vectors is not empty. ## - Say that a user can use a "0" as a gene name, but that is BAD idea. ## - Modules and order effects can be kind of funny? ## Gene names can contain no spaces, commas, or ">" check.gm <- function(gm) { ## Yes, Root: we want no ambiguities. ## Actually, that sucks. So we do not require it, but check for ## consistency. if(any(gm == "Root") && (gm[1] != "Root") ) stop("If Root is in the module table, it must be the first") if(any(names(gm) == "Root") && (names(gm)[1] != "Root") ) stop("If the name Root is in the module table, it must be the first") if( (names(gm)[1] == "Root") && (gm[1] != "Root") ) stop("The name Root is in the module table, but is not of Root") if( (gm[1] == "Root") && (names(gm)[1] != "Root") ) stop("Root is in the module table, but with a different name") ## if(gm[1] != "Root") ## stop("First value of a module table must be Root") ## if(names(gm)[1] != "Root") ## stop("First name of a module table must be Root") if(length(unique(names(gm))) != length(gm)) stop("Number of unique module names different from length of vector") if(gm[1] != "Root") gm <- c("Root" = "Root", gm) return(gm) } gtm2 <- function(x) { data.frame(cbind(nice.vector.eo(x, ","), x), stringsAsFactors = TRUE) } ## nice.vector.eo <- function(z, sep) { ## ## with epistasis, maybe we want sorted? ## setdiff(unlist(lapply(strsplit(z, " "), ## function(u) strsplit(u, sep))), "") ## } nice.vector.eo <- function(z, sep, rm.sign = FALSE) { ## with epistasis, maybe we want sorted? if(! rm.sign) return(setdiff(unlist(lapply(strsplit(z, " "), function(u) strsplit(u, sep))), "")) else ## remove the " ", the -, and the sep return(setdiff(unlist(lapply(strsplit(z, " "), function(u) strsplit(unlist(strsplit(u, "-")), sep))), "")) } gm.to.geneModuleL <- function(gm, one.to.one) { ## the table will be sorted by gene name gm <- check.gm(gm) ## the named vector with the mapping into the long geneModule df geneMod <- as.data.frame(rbindlist(lapply(gm, gtm2)), stringsAsFactors = TRUE) ## this stringsAsFactors is key geneMod$Module <- names(gm)[geneMod[, 2]] ## reverse lookup table colnames(geneMod)[1] <- c("Gene") geneMod <- geneMod[, -2] geneMod$Gene <- as.character(geneMod$Gene) ## geneMod$Module <- as.character(geneMod$Module) ## already a char geneMod <- geneMod[c(1, order(geneMod$Gene[-1]) + 1), ] geneMod$GeneNumID <- 0:(nrow(geneMod) - 1) ## this assumes sorted! and they need not be ## rlem <- rle(geneMod$Module) ## geneMod$ModuleNumID <- rep( 0:(length(rlem$values) - 1), rlem$lengths) if(one.to.one) { geneMod$ModuleNumID <- geneMod$GeneNumID if(!(identical(geneMod$Gene, geneMod$Module))) stop("Impossible: we are supposed to be in one-to-one for Module-Gene.") } else { ## Works, but does not give the most ordered module names. But ## keeps implicit order given by user. idm <- seq.int(length(gm) - 1) idm <- c("Root" = 0L, idm) names(idm) <- names(gm) ## This sorts by the names but is not optimal either ## idm1 <- seq.int(length(gm) - 1) ## idm <- c(0L, idm1) ## names(idm) <- c("Root", sort(setdiff(names(gm), "Root"))) geneMod$ModuleNumID <- idm[geneMod[, "Module"]] } if(length(unique(geneMod$Gene)) != nrow(geneMod)) { stop("Are there identical gene names in different modules?") } ## I think this is unreacheable now. Caught earlier. if(length(unique(geneMod$GeneNumID)) != nrow(geneMod)) { stop("Are there identical gene names in different modules?") } rownames(geneMod) <- 1:nrow(geneMod) geneMod } geneModuleNull <- function(namesM) { v <- c("Root", setdiff(namesM, "Root")) names(v) <- v return(v) } list.of.deps <- function(x) { ## lookupTypeDep <- c("MN" = 1, "monotone" = 1, ## "SM" = 2, "semimonotone" = 2) lookupTypeDep <- c("MN" = "monotone", "AND" = "monotone", "monotone" = "monotone", "CMPN" = "monotone", "OR" = "semimonotone", "SM" = "semimonotone", "semimonotone" = "semimonotone", "DMPN" = "semimonotone", "XOR" = "xmpn", "xmpn" = "xmpn", "XMPN" = "xmpn", "--" = "--", "-" = "--") ## FIXME: check values of typeDep if(length(x) > 1) { if(length(unique(x$s))!= 1) stop("Not all s identical within a child") if(length(unique(x$sh))!= 1) stop("Not all sh identical within a child") if(length(unique(x$typeDep))!= 1) stop("Not all typeDep identical within a child") if(length(unique(x$child))!= 1) stop("child not unique") } typeDep <- lookupTypeDep[unique(x$typeDep)] if(any(is_null_na(typeDep))) stop("typeDep value incorrect. Check spelling.") return(list( child = unique(x$child), s = unique(x$s), sh = unique(x$sh), typeDep = typeDep, parents = unlist(x$parent))) } to.long.rt <- function(rt, idm) { ## We now do this inconditionally, so that we do not need to use the ## "stringsAsFactors = FALSE". This is now done before ## if(is.numeric(rt$parent)) ## rt$parent <- as.character(rt$parent) ## if(is.numeric(rt$child)) ## rt$child <- as.character(rt$child) if(!("Root" %in% rt$parent)) stop("Root must be one parent node") ## rt$parent <- unlist(lapply(rt$parent, nice.string)) ## rt$child <- unlist(lapply(rt$child, nice.string)) srt <- rt[order(rt$child), ] ## Not relevant if we allow non-numeric names ## all.child.genes <- as.integer( ## unlist(lapply(rt[, 2], ## function(x) strsplit(x, ",")))) ## ## check all childs ## if(!identical(sort(unique(all.child.genes)), ## seq.int(max(all.child.genes)))) ## stop("Not all children present") long.rt <- lapply(split(srt, srt$child), list.of.deps) ## geneModule <- gene.to.module(srt) ## idm <- seq.int(length(names(long.rt))) ## names(idm) <- names(long.rt) ## idm <- c("0" = 0L, idm) ## geneModule$ModuleNumID <- idm[geneModule[, "Module"]] ## idm is just a look up table for the id of the module ## idm <- unique(geneModule$ModuleNumID) ## names(idm) <- unique(geneModule$Module) ## add integer IDs addIntID <- function(z, idm) { z$childNumID <- idm[z$child] z$parentsNumID <- idm[z$parents] if( any(is.na(z$parentsNumID)) || any(is.na(z$childNumID)) ) { ## I think this can no longer be reached ever. Caught before. stop(paste("An ID is NA:", "Is a gene part of two different modules?", "(That includes being by itself and part", "of a module.)")) } ## These checks could be somewhere else instead of here. if(length(unique(z$parentsNumID)) != length(z$parentsNumID)) stop("No node can have the same parent multiple times") if(length(unique(z$parents)) != length(z$parents)) stop("No node can have the same parent multiple times") if(length(z$child) > 1) stop("Child nodes can have one, and only one, member") ## sort the parents, always. o <- order(z$parentsNumID) z$parentsNumID <- z$parentsNumID[o] z$parents <- as.character(z$parents[o]) return(z) } long.rt <- lapply(long.rt, function(x) addIntID(x, idm = idm)) ## if(verbosity >= 4) { ## message(paste("Number of drivers: ", ## length(unique(geneModule[, "Gene"])))) ## message(paste("Number of modules: ", ## length(unique(geneModule[, "Module"])))) ## } return(long.rt) ## return(list(long.rt = long.rt, geneModule = geneModule)) } epist.order.element <- function(x, y, sep, rm.sign = FALSE) { list(ids = nice.vector.eo(x, sep = sep, rm.sign = rm.sign), s = y) } oe.to.df <- function(x) { ma <- matrix(ncol = 2, nrow = 1 + length(x) - 2) if(length(x) == 2) { ma[1, 1] <- x[1] ma[1, 2] <- x[2] } else { ma[, 1] <- x[-length(x)] ma[, 2] <- x[-1] } return(data.frame(ma, stringsAsFactors = FALSE)) } epist.order.to.pairs.modules <- function(x, sep, rm.sign = TRUE) { ## We discard, do not even accept, the coefficient tmp <- epist.order.element(x, -99, sep = sep, rm.sign = rm.sign)$ids if(length(tmp) > 1) { ## if a single gene, as when we specify all genotypes, we do not ## want this if(sep == ":") return(data.frame(combinations(n = length(tmp), r = 2, v = tmp), stringsAsFactors = FALSE)) else if(sep == ">") { return(oe.to.df(tmp)) } } } to.pairs.modules <- function(x, sep, rm.sign = TRUE) { df <- data.frame(rbindlist( lapply(names(x), function(z) epist.order.to.pairs.modules(z, sep))), stringsAsFactors = TRUE) if(nrow(df) == 0L) { ## if only single genes in epist, we get nothing here. return(df) } else { colnames(df) <- c("parent", "child") if(sep == ":") df$typeDep <- "epistasis" else if(sep == ">") df$typeDep <- "orderEffect" return(unique(df)) } } to.long.epist.order <- function(epor, sep, rm.sign = FALSE) { ## just vectors for now long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign), names(epor), epor) ## if(is.vector(epor)) ## long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign), ## names(epor), epor) ## else if(is.data.frame(epor)) ## long <- Map(function(x, y) epist.order.element(x, y, sep, rm.sign), ## as.character(epor$ids), ## epor$s) names(long) <- NULL return(long) } ## addIntID.epist.order <- function(z, idm, sort) { ## z$NumID <- idm[z$ids] ## if(sort) { ## ## essential for epistasis, but never do for order effects ## o <- order(z$NumID) ## z$NumID <- z$NumID[o] ## z$ids <- z$ids[o] ## } ## return(z) ## } addIntID.epist.order <- function(z, idm, sort, sign) { if( sort && (!sign)) warning("sort is TRUE but sign is FALSE. You sure?") if((!sort) && sign) warning("sort is FALSE but sign is TRUE. You sure?") ## Adds the integer, but takes care of sign if sign = TRUE if(!sign) { z$NumID <- idm[z$ids] signs <- grep("^-", z$ids) if(length(signs)) stop("You have a negative sign and you said sign = FALSE") } else { unsigned <- setdiff(unlist(lapply(z$ids, function(z) strsplit(z, "^-"))), "") NumID <- idm[unsigned] signs <- grep("^-", z$ids) if(length(signs)) { NumID[signs] <- -1 * NumID[signs] } z$NumID <- NumID } if(sort) { ## Essential for epistasis, but never do for order effects o <- order(z$NumID) z$NumID <- z$NumID[o] z$ids <- z$ids[o] } if(length(unique(z$NumID)) != length(z$NumID)) stop("No node can have the same id multiple times") if(length(unique(z$ids)) != length(z$ids)) stop("No node can have the same id multiple times") return(z) } checkRT <- function(mdeps) { if(ncol(mdeps) != 5) stop("mdeps must be of exactly 5 columns") if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep"))) stop(paste("Column names of mdeps not of appropriate format. ", "Should be parent, child, s, sh, typeDep")) } getNamesID <- function(fp) { ## Return a lookup table for names based on numeric IDs idname <- c(fp$geneModule$GeneNumID, fp$long.geneNoInt$GeneNumID, fp$fitnessLandscape_gene_id$GeneNumID) names(idname) <- c(fp$geneModule$Gene, fp$long.geneNoInt$Gene, fp$fitnessLandscape_gene_id$Gene) return(idname[-1]) ## remove Root!! } getGeneIDNum <- function(geneModule, geneNoInt, fitnessLandscape_gene_id, drv, sort = TRUE) { ## It returns the genes, as NumID, in the given vector with names drv ## initMutant uses this, for simplicity, without sorting, but noInt ## are always sorted ## Also used for the drivers with sort = TRUE ## Yes, we must do it twice because we do not know before hand which ## is which. This makes sure no NA. Period. if(any(is.na( match(drv, c(geneModule$Gene, geneNoInt$Gene, fitnessLandscape_gene_id$Gene))))) { stop(paste("For driver or initMutant you have passed genes", "not in the fitness table.")) } indicesM <- as.vector(na.omit(match( drv, geneModule$Gene))) indicesI <- as.vector(na.omit(sort(match( drv, geneNoInt$Gene)))) indicesF <- as.vector(na.omit(sort(match( drv, fitnessLandscape_gene_id$Gene)))) if(sort) { indicesM <- sort(indicesM) } return(c( geneModule$GeneNumID[indicesM], geneNoInt$GeneNumID[indicesI], fitnessLandscape_gene_id$GeneNumID[indicesF]) ) } allFitnessORMutatorEffects <- function(rT = NULL, epistasis = NULL, orderEffects = NULL, noIntGenes = NULL, geneToModule = NULL, drvNames = NULL, keepInput = TRUE, genotFitness = NULL, ## refFE = NULL, calledBy = NULL) { ## From allFitnessEffects. Generalized so we deal with Fitness ## and mutator. ## restrictions: the usual rt ## epistasis: as it says, with the ":" ## orderEffects: the ">" ## All of the above can be genes or can be modules (if you pass a ## geneToModule) ## rest: rest of genes, with fitness ## For epistasis and order effects we create the output object but ## missing the numeric ids of genes. With rT we do it in one go, as we ## already know the mapping of genes to numeric ids. We could do the ## same in epistasis and order, but we would be splitting twice ## (whereas for rT extracting the names is very simple). ## called appropriately? if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") )) stop("How did you call this function?. Bug.") if(calledBy == "allMutatorEffects") { ## very paranoid check if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames) || !is.null(genotFitness)) stop("allMutatorEffects called with forbidden arguments.", "Is this an attempt to subvert the function?") } rtNames <- NULL epiNames <- NULL orNames <- NULL if(!is.null(rT)) { ## This is really ugly, but to prevent the stringsAsFactors I need it here: rT$parent <- as.character(rT$parent) rT$child <- as.character(rT$child) rT$typeDep <- as.character(rT$typeDep) rtNames <- unique(c(rT$parent, rT$child)) } if(!is.null(epistasis)) { long.epistasis <- to.long.epist.order(epistasis, ":") ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids))) ## deal with the possible negative signs epiNames <- setdiff(unique( unlist(lapply(long.epistasis, function(x) lapply(x$ids, function(z) strsplit(z, "^-"))))), "") } else { long.epistasis <- list() } if(!is.null(orderEffects)) { long.orderEffects <- to.long.epist.order(orderEffects, ">") orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids))) } else { long.orderEffects <- list() } allModuleNames <- unique(c(rtNames, epiNames, orNames)) if(is.null(geneToModule)) { gMOneToOne <- TRUE geneToModule <- geneModuleNull(allModuleNames) } else { gMOneToOne <- FALSE if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames)))) stop(paste("Some values in geneToModule not present in any of", " rT, epistasis, or order effects")) if(any(is.na(match(allModuleNames, names(geneToModule))))) stop(paste("Some values in rT, epistasis, ", "or order effects not in geneToModule")) } geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne) idm <- unique(geneModule$ModuleNumID) names(idm) <- unique(geneModule$Module) if(!is.null(rT)) { checkRT(rT) long.rt <- to.long.rt(rT, idm) } else { long.rt <- list() ## yes, we want an object of length 0 } ## Append the numeric ids to epistasis and order if(!is.null(epistasis)) { long.epistasis <- lapply(long.epistasis, function(x) addIntID.epist.order(x, idm, sort = TRUE, sign = TRUE)) } if(!is.null(orderEffects)) { long.orderEffects <- lapply(long.orderEffects, function(x) addIntID.epist.order(x, idm, sort = FALSE, sign = FALSE)) } if(!is.null(noIntGenes)) { if(inherits(noIntGenes, "character")) { wm <- paste("noIntGenes is a character vector.", "This is probably not what you want, and will", "likely result in an error downstream.", "You can get messages like", " 'not compatible with requested type', and others.", "We are stopping.") stop(wm) } mg <- max(geneModule[, "GeneNumID"]) gnum <- seq_along(noIntGenes) + mg if(!is.null(names(noIntGenes))) { ng <- names(noIntGenes) if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE) || grepl(":", ng, fixed = TRUE)) stop("The name of some noIntGenes contain a ',' or a '>' or a ':'") if(any(ng %in% geneModule[, "Gene"] )) stop("A gene in noIntGenes also present in the other terms") if(any(duplicated(ng))) stop("Duplicated gene names in geneNoInt") if(any(is.na(ng))) stop("In noIntGenes some genes have names, some don't.", " Name all of them, or name none of them.") } else { ng <- gnum } geneNoInt <- data.frame(Gene = as.character(ng), GeneNumID = gnum, s = noIntGenes, stringsAsFactors = FALSE) } else { geneNoInt <- data.frame() } if(is.null(genotFitness)) { genotFitness <- matrix(NA, nrow = 0, ncol = 1) fitnessLandscape_df <- data.frame() fitnessLandscape_gene_id <- data.frame() } else { ## Yes, I am duplicating stuff for now. ## This makes life simpler in C++: ## In the map, the key is the genotype name, as ## cnn <- colnames(genotFitness)[-ncol(genotFitness)] cnn <- 1:(ncol(genotFitness) - 1) gfn <- apply(genotFitness[, -ncol(genotFitness), drop = FALSE], 1, function(x) paste(cnn[as.logical(x)], collapse = ", ")) ## rownames(genotFitness) <- gfn fitnessLandscape_df <- data.frame(Genotype = gfn, Fitness = genotFitness[, ncol(genotFitness)], stringsAsFactors = FALSE) fitnessLandscape_gene_id <- data.frame( Gene = colnames(genotFitness)[-ncol(genotFitness)], GeneNumID = cnn, stringsAsFactors = FALSE) } if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) + nrow(geneNoInt) + nrow(genotFitness)) == 0) stop("You have specified nothing!") if(calledBy == "allFitnessEffects") { if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) } else { graphE <- NULL } } else { graphE <- NULL } if(!is.null(drvNames)) { drv <- unique(getGeneIDNum(geneModule, geneNoInt, fitnessLandscape_gene_id, drvNames)) ## drivers should never be in the geneNoInt; Why!!!??? ## Catch the problem. This is an overkill, ## so since we catch the issue, we could leave the geneNoInt. But ## that should not be there in this call. ## if(any(drvNames %in% geneNoInt$Gene)) { ## stop(paste("At least one gene in drvNames is a geneNoInt gene.", ## "That is not allowed.", ## "If that gene is a driver, pass it as gene in the epistasis", ## "component.")) ## } ## drv <- getGeneIDNum(geneModule, NULL, drvNames) } else { ## we used to have this default ## drv <- geneModule$GeneNumID[-1] drv <- vector(mode = "integer", length = 0L) } if(!keepInput) { rT <- epistasis <- orderEffects <- noIntGenes <- NULL } out <- list(long.rt = long.rt, long.epistasis = long.epistasis, long.orderEffects = long.orderEffects, long.geneNoInt = geneNoInt, geneModule = geneModule, gMOneToOne = gMOneToOne, geneToModule = geneToModule, graph = graphE, drv = drv, rT = rT, epistasis = epistasis, orderEffects = orderEffects, noIntGenes = noIntGenes, fitnessLandscape = genotFitness, fitnessLandscape_df = fitnessLandscape_df, fitnessLandscape_gene_id = fitnessLandscape_gene_id ) if(calledBy == "allFitnessEffects") { class(out) <- c("fitnessEffects") } else if(calledBy == "allMutatorEffects") { class(out) <- c("mutatorEffects") } return(out) } ## Former version, with fitness landscape ## allFitnessORMutatorEffects <- function(rT = NULL, ## epistasis = NULL, ## orderEffects = NULL, ## noIntGenes = NULL, ## geneToModule = NULL, ## drvNames = NULL, ## keepInput = TRUE, ## ## refFE = NULL, ## calledBy = NULL) { ## ## From allFitnessEffects. Generalized so we deal with Fitness ## ## and mutator. ## ## restrictions: the usual rt ## ## epistasis: as it says, with the ":" ## ## orderEffects: the ">" ## ## All of the above can be genes or can be modules (if you pass a ## ## geneToModule) ## ## rest: rest of genes, with fitness ## ## For epistasis and order effects we create the output object but ## ## missing the numeric ids of genes. With rT we do it in one go, as we ## ## already know the mapping of genes to numeric ids. We could do the ## ## same in epistasis and order, but we would be splitting twice ## ## (whereas for rT extracting the names is very simple). ## ## called appropriately? ## if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") )) ## stop("How did you call this function?. Bug.") ## if(calledBy == "allMutatorEffects") { ## ## very paranoid check ## if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames)) ## stop("allMutatorEffects called with forbidden arguments.", ## "Is this an attempt to subvert the function?") ## } ## rtNames <- NULL ## epiNames <- NULL ## orNames <- NULL ## if(!is.null(rT)) { ## ## This is really ugly, but to prevent the stringsAsFactors I need it here: ## rT$parent <- as.character(rT$parent) ## rT$child <- as.character(rT$child) ## rT$typeDep <- as.character(rT$typeDep) ## rtNames <- unique(c(rT$parent, rT$child)) ## } ## if(!is.null(epistasis)) { ## long.epistasis <- to.long.epist.order(epistasis, ":") ## ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids))) ## ## deal with the possible negative signs ## epiNames <- setdiff(unique( ## unlist(lapply(long.epistasis, ## function(x) lapply(x$ids, ## function(z) strsplit(z, "^-"))))), ## "") ## } else { ## long.epistasis <- list() ## } ## if(!is.null(orderEffects)) { ## long.orderEffects <- to.long.epist.order(orderEffects, ">") ## orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids))) ## } else { ## long.orderEffects <- list() ## } ## allModuleNames <- unique(c(rtNames, epiNames, orNames)) ## if(is.null(geneToModule)) { ## gMOneToOne <- TRUE ## geneToModule <- geneModuleNull(allModuleNames) ## } else { ## gMOneToOne <- FALSE ## if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames)))) ## stop(paste("Some values in geneToModule not present in any of", ## " rT, epistasis, or order effects")) ## if(any(is.na(match(allModuleNames, names(geneToModule))))) ## stop(paste("Some values in rT, epistasis, ", ## "or order effects not in geneToModule")) ## } ## geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne) ## idm <- unique(geneModule$ModuleNumID) ## names(idm) <- unique(geneModule$Module) ## if(!is.null(rT)) { ## checkRT(rT) ## long.rt <- to.long.rt(rT, idm) ## } else { ## long.rt <- list() ## yes, we want an object of length 0 ## } ## ## Append the numeric ids to epistasis and order ## if(!is.null(epistasis)) { ## long.epistasis <- lapply(long.epistasis, ## function(x) ## addIntID.epist.order(x, idm, ## sort = TRUE, ## sign = TRUE)) ## } ## if(!is.null(orderEffects)) { ## long.orderEffects <- lapply(long.orderEffects, ## function(x) ## addIntID.epist.order(x, idm, ## sort = FALSE, ## sign = FALSE)) ## } ## if(!is.null(noIntGenes)) { ## if(inherits(noIntGenes, "character")) { ## wm <- paste("noIntGenes is a character vector.", ## "This is probably not what you want, and will", ## "likely result in an error downstream.", ## "You can get messages like", ## " 'not compatible with requested type', and others.", ## "We are stopping.") ## stop(wm) ## } ## mg <- max(geneModule[, "GeneNumID"]) ## gnum <- seq_along(noIntGenes) + mg ## if(!is.null(names(noIntGenes))) { ## ng <- names(noIntGenes) ## if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE) ## || grepl(":", ng, fixed = TRUE)) ## stop("The name of some noIntGenes contain a ',' or a '>' or a ':'") ## if(any(ng %in% geneModule[, "Gene"] )) ## stop("A gene in noIntGenes also present in the other terms") ## if(any(duplicated(ng))) ## stop("Duplicated gene names in geneNoInt") ## if(any(is.na(ng))) ## stop("In noIntGenes some genes have names, some don't.", ## " Name all of them, or name none of them.") ## } else { ## ng <- gnum ## } ## geneNoInt <- data.frame(Gene = as.character(ng), ## GeneNumID = gnum, ## s = noIntGenes, ## stringsAsFactors = FALSE) ## } else { ## geneNoInt <- data.frame() ## } ## if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) + ## nrow(geneNoInt)) == 0) ## stop("You have specified nothing!") ## if(calledBy == "allFitnessEffects") { ## if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { ## graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) ## } else { ## graphE <- NULL ## } ## } else { ## graphE <- NULL ## } ## if(!is.null(drvNames)) { ## drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames)) ## ## drivers should never be in the geneNoInt; Why!!!??? ## ## Catch the problem. This is an overkill, ## ## so since we catch the issue, we could leave the geneNoInt. But ## ## that should not be there in this call. ## ## if(any(drvNames %in% geneNoInt$Gene)) { ## ## stop(paste("At least one gene in drvNames is a geneNoInt gene.", ## ## "That is not allowed.", ## ## "If that gene is a driver, pass it as gene in the epistasis", ## ## "component.")) ## ## } ## ## drv <- getGeneIDNum(geneModule, NULL, drvNames) ## } else { ## ## we used to have this default ## ## drv <- geneModule$GeneNumID[-1] ## drv <- vector(mode = "integer", length = 0L) ## } ## if(!keepInput) { ## rT <- epistasis <- orderEffects <- noIntGenes <- NULL ## } ## out <- list(long.rt = long.rt, ## long.epistasis = long.epistasis, ## long.orderEffects = long.orderEffects, ## long.geneNoInt = geneNoInt, ## geneModule = geneModule, ## gMOneToOne = gMOneToOne, ## geneToModule = geneToModule, ## graph = graphE, ## drv = drv, ## rT = rT, ## epistasis = epistasis, ## orderEffects = orderEffects, ## noIntGenes = noIntGenes ## ) ## if(calledBy == "allFitnessEffects") { ## class(out) <- c("fitnessEffects") ## } else if(calledBy == "allMutatorEffects") { ## class(out) <- c("mutatorEffects") ## } ## return(out) ## } allFitnessEffects <- function(rT = NULL, epistasis = NULL, orderEffects = NULL, noIntGenes = NULL, geneToModule = NULL, drvNames = NULL, genotFitness = NULL, keepInput = TRUE) { if(!is.null(genotFitness)) { if(!is.null(rT) || !is.null(epistasis) || !is.null(orderEffects) || !is.null(noIntGenes) || !is.null(geneToModule)) { stop("You have a non-null genotFitness.", " If you pass the complete genotype to fitness mapping", " you cannot pass any of rT, epistasis, orderEffects", " noIntGenes or geneToModule.") } genotFitness_std <- to_genotFitness_std(genotFitness, simplify = TRUE) ## epistasis <- from_genotype_fitness(genotFitness) } else { genotFitness_std <- NULL } allFitnessORMutatorEffects( rT = rT, epistasis = epistasis, orderEffects = orderEffects, noIntGenes = noIntGenes, geneToModule = geneToModule, drvNames = drvNames, keepInput = keepInput, genotFitness = genotFitness_std, calledBy = "allFitnessEffects") } ## Former version ## allFitnessEffects <- function(rT = NULL, ## epistasis = NULL, ## orderEffects = NULL, ## noIntGenes = NULL, ## geneToModule = NULL, ## drvNames = NULL, ## genotFitness = NULL, ## keepInput = TRUE) { ## if(!is.null(genotFitness)) { ## if(!is.null(rT) || !is.null(epistasis) || ## !is.null(orderEffects) || !is.null(noIntGenes) || ## !is.null(geneToModule)) { ## stop("You have a non-null genotFitness.", ## " If you pass the complete genotype to fitness mapping", ## " you cannot pass any of rT, epistasis, orderEffects", ## " noIntGenes or geneToModule.") ## } ## epistasis <- from_genotype_fitness(genotFitness) ## } ## allFitnessORMutatorEffects( ## rT = rT, ## epistasis = epistasis, ## orderEffects = orderEffects, ## noIntGenes = noIntGenes, ## geneToModule = geneToModule, ## drvNames = drvNames, ## keepInput = keepInput, ## calledBy = "allFitnessEffects") ## } ## allFitnessEffects <- function(rT = NULL, ## epistasis = NULL, ## orderEffects = NULL, ## noIntGenes = NULL, ## geneToModule = NULL, ## drvNames = NULL, ## keepInput = TRUE) { ## ## restrictions: the usual rt ## ## epistasis: as it says, with the ":" ## ## orderEffects: the ">" ## ## All of the above can be genes or can be modules (if you pass a ## ## geneToModule) ## ## rest: rest of genes, with fitness ## ## For epistasis and order effects we create the output object but ## ## missing the numeric ids of genes. With rT we do it in one go, as we ## ## already know the mapping of genes to numeric ids. We could do the ## ## same in epistasis and order, but we would be splitting twice ## ## (whereas for rT extracting the names is very simple). ## rtNames <- NULL ## epiNames <- NULL ## orNames <- NULL ## if(!is.null(rT)) { ## ## This is really ugly, but to prevent the stringsAsFactors I need it here: ## rT$parent <- as.character(rT$parent) ## rT$child <- as.character(rT$child) ## rT$typeDep <- as.character(rT$typeDep) ## rtNames <- unique(c(rT$parent, rT$child)) ## } ## if(!is.null(epistasis)) { ## long.epistasis <- to.long.epist.order(epistasis, ":") ## ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids))) ## ## deal with the possible negative signs ## epiNames <- setdiff(unique( ## unlist(lapply(long.epistasis, ## function(x) lapply(x$ids, ## function(z) strsplit(z, "^-"))))), ## "") ## } else { ## long.epistasis <- list() ## } ## if(!is.null(orderEffects)) { ## long.orderEffects <- to.long.epist.order(orderEffects, ">") ## orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids))) ## } else { ## long.orderEffects <- list() ## } ## allModuleNames <- unique(c(rtNames, epiNames, orNames)) ## if(is.null(geneToModule)) { ## gMOneToOne <- TRUE ## geneToModule <- geneModuleNull(allModuleNames) ## } else { ## gMOneToOne <- FALSE ## if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames)))) ## stop(paste("Some values in geneToModule not present in any of", ## " rT, epistasis, or order effects")) ## if(any(is.na(match(allModuleNames, names(geneToModule))))) ## stop(paste("Some values in rT, epistasis, ", ## "or order effects not in geneToModule")) ## } ## geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne) ## idm <- unique(geneModule$ModuleNumID) ## names(idm) <- unique(geneModule$Module) ## if(!is.null(rT)) { ## checkRT(rT) ## long.rt <- to.long.rt(rT, idm) ## } else { ## long.rt <- list() ## yes, we want an object of length 0 ## } ## ## Append the numeric ids to epistasis and order ## if(!is.null(epistasis)) { ## long.epistasis <- lapply(long.epistasis, ## function(x) ## addIntID.epist.order(x, idm, ## sort = TRUE, ## sign = TRUE)) ## } ## if(!is.null(orderEffects)) { ## long.orderEffects <- lapply(long.orderEffects, ## function(x) ## addIntID.epist.order(x, idm, ## sort = FALSE, ## sign = FALSE)) ## } ## if(!is.null(noIntGenes)) { ## mg <- max(geneModule[, "GeneNumID"]) ## gnum <- seq_along(noIntGenes) + mg ## if(!is.null(names(noIntGenes))) { ## ng <- names(noIntGenes) ## if(any(ng %in% geneModule[, "Gene"] )) ## stop("A gene in noIntGenes also present in the other terms") ## } else { ## ng <- gnum ## } ## geneNoInt <- data.frame(Gene = as.character(ng), ## GeneNumID = gnum, ## s = noIntGenes, ## stringsAsFactors = FALSE) ## } else { ## geneNoInt <- data.frame() ## } ## if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) + ## nrow(geneNoInt)) == 0) ## stop("You have specified nothing!") ## if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) { ## graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects) ## } else { ## graphE <- NULL ## } ## if(!is.null(drvNames)) { ## drv <- getGeneIDNum(geneModule, geneNoInt, drvNames) ## } else { ## drv <- geneModule$GeneNumID[-1] ## } ## if(!keepInput) { ## rT <- epistasis <- orderEffects <- noIntGenes <- NULL ## } ## out <- list(long.rt = long.rt, ## long.epistasis = long.epistasis, ## long.orderEffects = long.orderEffects, ## long.geneNoInt = geneNoInt, ## geneModule = geneModule, ## gMOneToOne = gMOneToOne, ## geneToModule = geneToModule, ## graph = graphE, ## drv = drv, ## rT = rT, ## epistasis = epistasis, ## orderEffects = orderEffects, ## noIntGenes = noIntGenes ## ) ## class(out) <- c("fitnessEffects") ## return(out) ## } ## No longer used ## rtAndGeneModule <- function(mdeps, gM = NULL) { ## ## To show a table of restrictions when there are modules. Do not use ## ## for anything else. Maybe as intermediate to plotting. ## ## Specify restriction table of modules and a mapping of modules to ## ## genes. gM is a named vector; names are modules, values are elements ## ## of each module. ## ## We do nothing important if gM is NULL except checks ## ## If there are modules, the table shows the individual genes. ## checkRT(mdeps) ## ## if(ncol(mdeps) != 5) ## ## stop("mdeps must be of exactly 5 columns") ## ## if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep"))) ## ## stop(paste("Column names of mdeps not of appropriate format. ", ## ## "Should be parent, child, s, sh, typeDep")) ## if(!is.null(gM)) { ## if(any(is.na(match(mdeps[ , 1], names(gM))))) ## stop("Some values in parent not from a known module") ## if(any(is.na(match(mdeps[ , 2], names(gM))))) ## stop("Some values in child not from a known module") ## if(any(is.na(match(names(gM), c(mdeps[, 1], mdeps[, 2]))))) ## stop("Some values in module in neither parent or child") ## parent <- gM[mdeps[, 1]] ## child <- gM[mdeps[, 2]] ## df <- data.frame(parent = parent, ## child = child, ## s = mdeps$s, ## sh = mdeps$sh, ## typeDep = mdeps$typeDep, ## stringsAsFactors = FALSE) ## } else { ## df <- mdeps ## } ## rownames(df) <- seq.int(nrow(df)) ## return(df) ## } ## wrap.test.rt <- function(rt, gM = NULL) { ## ## FIXME add epistasis and orderEffects ## lrt <- allFitnessEffects(rt, geneToModule = gM) ## ## wrap_test_rt(lrt$long.rt) ## wrap_test_rt(lrt$long.rt, lrt$geneModule) ## } ## No longer used ## wrap.readFitnessEffects <- function(rt, epi, oe, ni, gm, echo = TRUE) { ## tt <- allFitnessEffects(rt, epi, oe, ni, gm) ## readFitnessEffects(tt, echo = echo) ## ## readFitnessEffects(tt$long.rt, ## ## tt$long.epistasis, ## ## tt$long.orderEffects, ## ## tt$long.geneNoInt, ## ## tt$geneModule, ## ## tt$gMOneToOne, ## ## echo = TRUE) ## } evalGenotypeORMut <- function(genotype, fmEffects, verbose = FALSE, echo = FALSE, model = "", calledBy_= NULL) { ## genotype can be a vector of integers, that are the exact same in ## the table of fmEffects or a vector of strings, or a vector (a ## string) with genes separated by "," or ">" if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") )) stop("How did you call this function?. Bug.") ## fmEffects could be a mutator effect if(!exists("fitnessLandscape_gene_id", where = fmEffects)) { fmEffects$fitnessLandscape_df <- data.frame() fmEffects$fitnessLandscape_gene_id <- data.frame() } if( (model %in% c("Bozic", "bozic1", "bozic2")) && (nrow(fmEffects$fitnessLandscape_df) > 0)) { warning("Bozic model passing a fitness landscape will not work", " for now.") } if(echo) cat(paste("Genotype: ", genotype)) if(!is.integer(genotype)) { if(length(grep(">", genotype))) { genotype <- nice.vector.eo(genotype, ">") } else if(length(grep(",", genotype))) { genotype <- nice.vector.eo(genotype, ",") } all.g.nums <- c(fmEffects$geneModule$GeneNumID, fmEffects$long.geneNoInt$GeneNumID, fmEffects$fitnessLandscape_gene_id$GeneNumID) all.g.names <- c(fmEffects$geneModule$Gene, fmEffects$long.geneNoInt$Gene, fmEffects$fitnessLandscape_gene_id$Gene) genotype <- all.g.nums[match(genotype, all.g.names)] } if(any(is.na(genotype))) stop("genotype contains NAs or genes not in fitnessEffects/mutatorEffects") if(!length(genotype)) stop("genotypes must have at least one mutated gene") if(any(genotype < 0)) { stop(paste("genotypes cannot contain negative values.", "If you see this message, you found a bug.")) } if(model %in% c("Bozic", "bozic1", "bozic2") ) prodNeg <- TRUE else prodNeg <- FALSE ff <- evalRGenotype(rG = genotype, rFE = fmEffects, verbose = verbose, prodNeg = prodNeg, calledBy_ = calledBy_) if(echo) { if(calledBy_ == "evalGenotype") { if(!prodNeg) cat(" Fitness: ", ff, "\n") else cat(" Death rate: ", ff, "\n") } else if(calledBy_ == "evalGenotypeMut") { cat(" Mutation rate product :", ff, "\n") } } return(ff) } evalGenotype <- function(genotype, fitnessEffects, verbose = FALSE, echo = FALSE, model = "") { if(inherits(fitnessEffects, "mutatorEffects")) stop("You are trying to get the fitness of a mutator specification. ", "You did not pass an object of class fitnessEffects.") evalGenotypeORMut(genotype = genotype, fmEffects = fitnessEffects, verbose = verbose, echo = echo, model = model , calledBy_= "evalGenotype" ) } evalGenotypeFitAndMut <- function(genotype, fitnessEffects, mutatorEffects, verbose = FALSE, echo = FALSE, model = "") { ## Must deal with objects from previous, pre flfast, modifications if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) { fitnessEffects$fitnessLandscape_df <- data.frame() fitnessEffects$fitnessLandscape_gene_id <- data.frame() } if( (model %in% c("Bozic", "bozic1", "bozic2")) && (nrow(fitnessEffects$fitnessLandscape_df) > 0)) { warning("Bozic model passing a fitness landscape will not work", " for now.") } prodNeg <- FALSE ## Next is from evalGenotypeAndMut if(echo) cat(paste("Genotype: ", genotype)) if(!is.integer(genotype)) { if(length(grep(">", genotype))) { genotype <- nice.vector.eo(genotype, ">") } else if(length(grep(",", genotype))) { genotype <- nice.vector.eo(genotype, ",") } all.g.nums <- c(fitnessEffects$geneModule$GeneNumID, fitnessEffects$long.geneNoInt$GeneNumID, fitnessEffects$fitnessLandscape_gene_id$GeneNumID) all.g.names <- c(fitnessEffects$geneModule$Gene, fitnessEffects$long.geneNoInt$Gene, fitnessEffects$fitnessLandscape_gene_id$Gene) genotype <- all.g.nums[match(genotype, all.g.names)] } if(any(is.na(genotype))) stop("genotype contains NAs or genes not in fitnessEffects") if(!length(genotype)) stop("genotypes must have at least one mutated gene") if(any(genotype < 0)) { stop(paste("genotypes cannot contain negative values.", "If you see this message, you found a bug.")) } full2mutator_ <- matchGeneIDs(mutatorEffects, fitnessEffects)$Reduced if(model %in% c("Bozic", "bozic1", "bozic2") ) prodNeg <- TRUE else prodNeg <- FALSE evalRGenotypeAndMut(genotype, fitnessEffects, mutatorEffects, full2mutator_, verbose = verbose, prodNeg = prodNeg) } ## evalGenotype <- function(genotype, fitnessEffects, ## verbose = FALSE, ## echo = FALSE, ## model = "") { ## ## genotype can be a vector of integers, that are the exact same in ## ## the table of fitnessEffects or a vector of strings, or a vector (a ## ## string) with genes separated by "," or ">" ## if(echo) ## cat(paste("Genotype: ", genotype)) ## if(!is.integer(genotype)) { ## if(length(grep(">", genotype))) { ## genotype <- nice.vector.eo(genotype, ">") ## } else if(length(grep(",", genotype))) { ## genotype <- nice.vector.eo(genotype, ",") ## } ## all.g.nums <- c(fitnessEffects$geneModule$GeneNumID, ## fitnessEffects$long.geneNoInt$GeneNumID) ## all.g.names <- c(fitnessEffects$geneModule$Gene, ## fitnessEffects$long.geneNoInt$Gene) ## genotype <- all.g.nums[match(genotype, all.g.names)] ## } ## if(any(is.na(genotype))) ## stop("genotype contains NAs or genes not in fitnessEffects") ## if(!length(genotype)) ## stop("genotypes must have at least one mutated gene") ## if(any(genotype < 0)) { ## stop(paste("genotypes cannot contain negative values.", ## "If you see this message, you found a bug.")) ## } ## if(model %in% c("Bozic", "bozic1", "bozic2") ) ## prodNeg <- TRUE ## else ## prodNeg <- FALSE ## ff <- evalRGenotype(genotype, fitnessEffects, verbose, prodNeg) ## if(echo) { ## if(!prodNeg) ## cat(" Fitness: ", ff, "\n") ## else ## cat(" Death rate: ", ff, "\n") ## } ## else { ## ## return(ff) ## ## } ## return(ff) ## } ## For multiple genotypes, lapply the matching. ## Nope, I think unneeded ## internal.convert_genotypes <- function(genotypes, gm) { ## genotypes <- lapply(lg, function(x) gm$GeneNumID[match(x, gm$Gene)]) ## } ## I am here: simplify this evalAllGenotypesORMut <- function(fmEffects, order = FALSE, max = 256, addwt = FALSE, model = "", calledBy_ = "") { ## minimal = FALSE) { if( !(calledBy_ %in% c("evalGenotype", "evalGenotypeMut") )) stop("How did you call this function?. Bug.") if( (calledBy_ == "evalGenotype" ) && (!inherits(fmEffects, "fitnessEffects"))) stop("You are trying to get the fitness of a mutator specification. ", "You did not pass an object of class fitnessEffects.") if( (calledBy_ == "evalGenotypeMut" ) && (!inherits(fmEffects, "mutatorEffects"))) stop("You are trying to get the mutator effects of a fitness specification. ", "You did not pass an object of class mutatorEffects.") ## if(!minimal) allg <- generateAllGenotypes(fitnessEffects = fmEffects, order = order, max = max) ## else ## allg <- generateAllGenotypes_minimal(fitnessEffects = fmEffects, ## max = max) ## if(order) ## tot <- function(n) {sum(sapply(seq.int(n), ## function(x) choose(n, x) * factorial(x)))} ## else ## tot <- function(n) {2^n} ## nn <- nrow(fitnessEffects$geneModule) -1 + nrow(fitnessEffects$long.geneNoInt) ## tnn <- tot(nn) ## if(tnn > max) { ## m <- paste("There are", tnn, "genotypes.") ## m <- paste(m, "This is larger than max.") ## m <- paste(m, "Adjust max and rerun if you want") ## stop(m) ## } ## ## With mutator, the ids of genes need not go from 1:n ## vid <- allNamedGenes(fitnessEffects)$GeneNumID ## if(order) { ## f1 <- function(n) { ## lapply(seq.int(n), function(x) permutations(n = n, r = x, v = vid)) ## } ## } else { ## f1 <- function(n) { ## lapply(seq.int(n), function(x) combinations(n = n, r = x, v = vid))} ## } ## genotNums <- f1(nn) ## list.of.vectors <- function(y) { ## ## there's got to be a simpler way ## lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE), ## function(m) m[[1]]) ## } ## genotNums <- list.of.vectors(genotNums) ## names <- c(fitnessEffects$geneModule$Gene[-1], ## fitnessEffects$long.geneNoInt$Gene) ## genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]), ## function(z) ## paste(z, ## collapse = if(order){" > "} else {", "} ))) ## This ain't the best, as we repeatedly read and convert ## fitnessEffects. If this were slow, prepare C++ function that takes ## vectors of genotypes and uses same fitnessEffects. if(model %in% c("Bozic", "bozic1", "bozic2") ) prodNeg <- TRUE else prodNeg <- FALSE allf <- vapply(allg$genotNums, function(x) evalRGenotype(x, fmEffects, FALSE, prodNeg, calledBy_), 1.1) df <- data.frame(Genotype = allg$genotNames, Fitness = allf, stringsAsFactors = FALSE) ## Why am I doing this? I am not computing the genotype. I test the ## evaluation of the empty genotype in the tests. But its evaluation ## generates warnings that are not needed. And by decree (even in the ## C++) this thing has a fitness of 1, a mutator effect of 1 since ## there are no terms. if(addwt) df <- rbind(data.frame(Genotype = "WT", Fitness = 1, stringsAsFactors = FALSE), df) if(calledBy_ == "evalGenotype") { if(prodNeg) colnames(df)[match("Fitness", colnames(df))] <- "Death_rate" class(df) <- c("evalAllGenotypes", class(df)) } else if (calledBy_ == "evalGenotypeMut") { colnames(df)[match("Fitness", colnames(df))] <- "MutatorFactor" class(df) <- c("evalAllGenotypesMut", class(df)) } return(df) } evalAllGenotypes <- function(fitnessEffects, order = FALSE, max = 256, addwt = FALSE, model = "") { ## Must deal with objects from previous, pre flfast, modifications if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) { fitnessEffects$fitnessLandscape_df <- data.frame() fitnessEffects$fitnessLandscape_gene_id <- data.frame() } if( (model %in% c("Bozic", "bozic1", "bozic2")) && (nrow(fitnessEffects$fitnessLandscape_df) > 0)) { warning("Bozic model passing a fitness landscape will not work", " for now.") } evalAllGenotypesORMut( fmEffects = fitnessEffects, order = order, max = max, addwt = addwt, model = model, calledBy_= "evalGenotype" ) } generateAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256) { if(order) tot <- function(n) {sum(sapply(seq.int(n), function(x) choose(n, x) * factorial(x)))} else tot <- function(n) {2^n} nn <- nrow(fitnessEffects$geneModule) -1 + nrow(fitnessEffects$long.geneNoInt) + nrow(fitnessEffects$fitnessLandscape_gene_id) tnn <- tot(nn) if(tnn > max) { m <- paste("There are", tnn, "genotypes.") m <- paste(m, "This is larger than max.") m <- paste(m, "Adjust max and rerun if you want") stop(m) } ## With mutator, the ids of genes need not go from 1:n vid <- allNamedGenes(fitnessEffects)$GeneNumID if(order) { f1 <- function(n) { lapply(seq.int(n), function(x) permutations(n = n, r = x, v = vid)) } } else { f1 <- function(n) { lapply(seq.int(n), function(x) combinations(n = n, r = x, v = vid))} } genotNums <- f1(nn) list.of.vectors <- function(y) { ## there's got to be a simpler way lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE), function(m) m[[1]]) } genotNums <- list.of.vectors(genotNums) names <- c(fitnessEffects$geneModule$Gene[-1], fitnessEffects$long.geneNoInt$Gene, fitnessEffects$fitnessLandscape_gene_id$Gene) genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]), function(z) paste(z, collapse = if(order){" > "} else {", "} ))) ## This ain't the best, as we repeatedly read and convert ## fitnessEffects. If this were slow, prepare C++ function that takes ## vectors of genotypes and uses same fitnessEffects. return(list(genotNums = genotNums, genotNames = genotNames )) } evalAllGenotypesFitAndMut <- function(fitnessEffects, mutatorEffects, order = FALSE, max = 256, addwt = FALSE, model = "" ){ ## minimal = FALSE) { ## if(!minimal) allg <- generateAllGenotypes(fitnessEffects = fitnessEffects, order = order, max = max) ## else ## allg <- generateAllGenotypes_minimal(fitnessEffects = fitnessEffects, ## max = max) if(model %in% c("Bozic", "bozic1", "bozic2") ) { prodNeg <- TRUE } else { prodNeg <- FALSE } ## Must deal with objects from previous, pre flfast, modifications if(!exists("fitnessLandscape_gene_id", where = fitnessEffects)) { fitnessEffects$fitnessLandscape_df <- data.frame() fitnessEffects$fitnessLandscape_gene_id <- data.frame() } if( (model %in% c("Bozic", "bozic1", "bozic2")) && (nrow(fitnessEffects$fitnessLandscape_df) > 0)) { warning("Bozic model passing a fitness landscape will not work", " for now.") } full2mutator_ <- matchGeneIDs(mutatorEffects, fitnessEffects)$Reduced allf <- t(vapply(allg$genotNums, function(x) evalRGenotypeAndMut(x, rFE = fitnessEffects, muEF= mutatorEffects, full2mutator_ = full2mutator_, verbose = FALSE, prodNeg = prodNeg), c(1.1, 2.2))) df <- data.frame(Genotype = allg$genotNames, Fitness = allf[, 1], MutatorFactor = allf[, 2], stringsAsFactors = FALSE) if(addwt) df <- rbind(data.frame(Genotype = "WT", Fitness = 1, MutatorFactor = 1, stringsAsFactors = FALSE), df) if(prodNeg) colnames(df)[match("Fitness", colnames(df))] <- "Death_rate" class(df) <- c("evalAllGenotypesFitAndMut", class(df)) return(df) } ## evalAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256, ## addwt = FALSE, ## model = "") { ## if(order) ## tot <- function(n) {sum(sapply(seq.int(n), ## function(x) choose(n, x) * factorial(x)))} ## else ## tot <- function(n) {2^n} ## nn <- nrow(fitnessEffects$geneModule) -1 + nrow(fitnessEffects$long.geneNoInt) ## tnn <- tot(nn) ## if(tnn > max) { ## m <- paste("There are", tnn, "genotypes.") ## m <- paste(m, "This is larger than max.") ## m <- paste(m, "Adjust max and rerun if you want") ## stop(m) ## } ## if(order) { ## f1 <- function(n) { ## lapply(seq.int(n), function(x) permutations(n = n, r = x)) ## } ## } else { ## f1 <- function(n) { ## lapply(seq.int(n), function(x) combinations(n = n, r = x))} ## } ## genotNums <- f1(nn) ## list.of.vectors <- function(y) { ## ## there's got to be a simpler way ## lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE), ## function(m) m[[1]]) ## } ## genotNums <- list.of.vectors(genotNums) ## names <- c(fitnessEffects$geneModule$Gene[-1], ## fitnessEffects$long.geneNoInt$Gene) ## genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]), ## function(z) ## paste(z, ## collapse = if(order){" > "} else {", "} ))) ## ## This ain't the best, as we repeatedly read and convert ## ## fitnessEffects. If this were slow, prepare C++ function that takes ## ## vectors of genotypes and uses same fitnessEffects. ## if(model %in% c("Bozic", "bozic1", "bozic2") ) ## prodNeg <- TRUE ## else ## prodNeg <- FALSE ## allf <- vapply(genotNums, ## function(x) evalRGenotype(x, fitnessEffects, FALSE, prodNeg), ## 1.1) ## df <- data.frame(Genotype = genotNames, Fitness = allf, ## stringsAsFactors = FALSE) ## if(addwt) ## df <- rbind(data.frame(Genotype = "WT", Fitness = 1, ## stringsAsFactors = FALSE), df) ## if(prodNeg) ## colnames(df)[match("Fitness", colnames(df))] <- "Death_rate" ## return(df) ## } fitnessEffectsToIgraph <- function(rT, epistasis, orderEffects) { df0 <- df1 <- df2 <- data.frame() if(!is.null(rT)) { df0 <- rT[, c("parent", "child", "typeDep")] } if(!is.null(epistasis)) { df1 <- to.pairs.modules(epistasis, ":") } if(!is.null(orderEffects)) { df2 <- to.pairs.modules(orderEffects, ">") } df <- rbind(df0, df1, df2) ## for special case of simple epi effects if(nrow(df) == 0) return(NULL) g1 <- graph.data.frame(df) E(g1)$color <- "black" E(g1)$color[E(g1)$typeDep == "SM"] <- "blue" E(g1)$color[E(g1)$typeDep == "XMPN"] <- "red" E(g1)$color[E(g1)$typeDep == "epistasis"] <- "orange" E(g1)$color[E(g1)$typeDep == "orderEffect"] <- "orange" E(g1)$lty <- 1 E(g1)$lty[E(g1)$typeDep == "epistasis"] <- 2 E(g1)$lty[E(g1)$typeDep == "orderEffect"] <- 3 E(g1)$arrow.mode <- 2 E(g1)$arrow.mode[E(g1)$typeDep == "epistasis"] <- 0 E(g1)$arrow.mode[E(g1)$typeDep == "orderEffect"] <- 2 return(g1) } plot.fitnessEffects <- function(x, type = "graphNEL", layout = NULL, expandModules = FALSE, autofit = FALSE, scale_char = ifelse(type == "graphNEL", 1/10, 5), return_g = FALSE, lwdf = 1, ## multiplier for lwd for graphNEL ...) { ## some other layouts I find OK ## layout.circle ## layout.reingold.tilford if really a tree ## o.w. it will use the default g <- x$graph if(is.null(g)) stop("This fitnessEffects object can not be ploted this way.", " It is probably one with fitness landscape specification, ", " so you might want to plot the fitness landscape instead.") if(type == "igraph") { if(expandModules && (!x$gMOneToOne)) { ## vlabels <- fe$geneToModule[vertex.attributes(g)$name] vlabels <- x$geneToModule[V(g)$name] V(g)$label <- vlabels } if(autofit) { plot(0, type = "n", axes = FALSE, ann = FALSE) ## ideas from http://stackoverflow.com/questions/14472079/match-vertex-size-to-label-size-in-igraph ## vsize <- (strwidth(V(g)$label) + strwidth("oo")) * 200 ## but this is a kludge. vsize <- (nchar(V(g)$label) + 1) * scale_char plot.igraph(g, vertex.size = vsize, vertex.shape = "rectangle", layout = layout) } else { plot.igraph(g, layout = layout) } if(return_g) return(g) } else if (type == "graphNEL") { g1 <- igraph.to.graphNEL(g) c1 <- unlist(lapply(edgeData(g1), function(x) x$color)) names(c1) <- sub("|", "~", names(c1), fixed = TRUE) s1 <- unlist(lapply(edgeData(g1), function(x) x$lty)) names(s1) <- names(c1) a1 <- unlist(lapply(edgeData(g1), function(x) max(x$arrow.mode - 1, 0))) names(a1) <- names(c1) lwd <- s1 lwd[lwd == 2] <- 2 ## o.w. too thin lwd[lwd == 3] <- 2 ## o.w. too thin lwd <- lwd * lwdf nAttrs <- list() if(expandModules && (!x$gMOneToOne)) { nnodes <- x$geneToModule[nodes(g1)] names(nnodes) <- nodes(g1) nAttrs$label <- nnodes } if(autofit) { nAttrs$width <- (nchar(nAttrs$label) + 1) * scale_char names(nAttrs$width) <- names(nAttrs$label) plot(g1, edgeAttrs = list(arrowsize = a1, lty = s1, lwd = lwd, color = c1), attrs=list(node=list(shape = "rectangle")), nodeAttrs = nAttrs) } else { plot(g1, edgeAttrs = list(arrowsize = a1, lty = s1, lwd = lwd, color = c1), nodeAttrs = nAttrs) } if(return_g) return(g1) } else { stop("plot type not recognized") } } ## plot.fitnessEffects <- plotFitnessEffects ## FIXME: in the help: say we cannot properly show 3- or higher order ## interactions. nr_oncoSimul.internal <- function(rFE, birth, death, mu, initSize, sampleEvery, detectionSize, finalTime, initSize_species, initSize_iter, seed, verbosity, speciesFS, ratioForce, typeFitness, max.memory, mutationPropGrowth, initMutant, max.wall.time, keepEvery, K, detectionDrivers, onlyCancer, errorHitWallTime, max.num.tries, errorHitMaxTries, minDetectDrvCloneSz, extraTime, keepPhylog, detectionProb, AND_DrvProbExit, MMUEF = NULL, ## avoid partial matching, and set default fixation = NULL ## avoid partial matching ) { default_min_successive_fixation <- 50 ## yes, set at this for now if(!inherits(rFE, "fitnessEffects")) stop(paste("rFE must be an object of class fitnessEffects", "as created, for instance, with function", "allFitnessEffects")) if(countGenesFe(rFE) < 2) { stop("There must be at least two genes (loci) in the fitness effects.", "If you only care about a degenerate case with just one,", "you can enter a second gene (locus)", "with fitness effect of zero.") } if( (length(mu) == 1) && !(is.null(names(mu)))) { stop("A length 1 mutation, but named. ", "This is ambiguous. ", "If you want per-gene mutation rates, each gene", "must have its entry in the mu vector. ", "(And regardless of per-gene mutation rates ", " there must be at least two gene/loci).") } ## Must deal with objects from previous, pre flfast, modifications ## Could move this way down to the bottom, right before ## .Call if(!exists("fitnessLandscape_gene_id", where = rFE)) { rFE$fitnessLandscape_df <- data.frame() rFE$fitnessLandscape_gene_id <- data.frame() } namedGenes <- allNamedGenes(rFE) if( length(mu) > 1) { if(is.null(names(mu))) stop("When using per-gene mutation rates the ", "mu vector must be named ", "(and if you have noIntGenes, those must have names).") if(length(mu) != countGenesFe(rFE)) stop("When using per-gene mutation rates, ", "there must be the same number of genes in the ", "mu vector and the fitness effects object.") if(!identical(sort(namedGenes$Gene), sort(names(mu)))) stop("When using per-gene mutation rates, ", "names of genes must match those in the ", "restriction table.") mu <- mu[order(match(names(mu), namedGenes$Gene))] ## Hyperparanoid check. Should never, ever, happen. if(!identical(names(mu), namedGenes$Gene)) stop("Names of re-ordered mu do not match names of genes") minmu <- 1e-40 if(any(mu <= minmu)) stop(paste("At least one per-gene mutation rate is negative", "or less than", minmu,". Remember that the per-base", "mutation rate in the human genome is about 1e-11 to 1e-9.")) } if(!is.null(initMutant)) { initMutantString <- initMutant if(length(grep(">", initMutant))) { initMutant <- nice.vector.eo(initMutant, ">") } else if(length(grep(",", initMutant))) { initMutant <- nice.vector.eo(initMutant, ",") } initMutant <- getGeneIDNum(rFE$geneModule, rFE$long.geneNoInt, rFE$fitnessLandscape_gene_id, initMutant, FALSE) if(length(initMutant) >= countGenesFe(rFE)) { stop("For initMutant you passed as many, or more genes, mutated ", "than the number of genes in the genotype (fitness effects).") } } else { initMutant <- vector(mode = "integer") initMutantString <- "" } ## these are never user options ## if(initSize_species < 10) { ## warning("initSize_species too small?") ## } ## if(initSize_iter < 100) { ## warning("initSize_iter too small?") ## } if(typeFitness %in% c("bozic1", "bozic2")) { if(nrow(rFE$fitnessLandscape_df) > 0) warning("Bozic model passing a fitness landscape will not work", " for now.") ## FIXME: bozic and fitness landscape ## the issue is that in the C++ code we directly do ## s = birth rate - 1 ## but we would need something different ## Can be done going through epistasis, etc. thesh <- unlist(lapply(rFE$long.rt, function(x) x$sh)) ## thes <- unlist(lapply(rFE$long.rt, function(x) x$s)) thes <- unlist(c(lapply(rFE$long.rt, function(x) x$s), lapply(rFE$long.epistasis, function(x) x$s), lapply(rFE$long.orderEffects, function(x) x$s), rFE$long.geneNoInt$s)) if(any(thes > 1 )) { m <- paste("You are using a Bozic model with", "the new restriction specification, and you have", "at least one s > 1.", "But that is the same as setting that to 1:", "we obviously cannot allow negative death rates,", "nor problems derived from multiplying odd or even", "numbers of negative numbers.", "You can set those > 1 to 1, but then you will", "eventually get the simulations to abort when the", "death rate becomes 0.") stop(m) } if(any( (thesh <= -1) & (thesh > -Inf))) { m <- paste("You are using a Bozic model with", "the new restriction specification, and you have", "at least one sh <= -1. Maybe you mean -Inf?") warning(m) } if(any(thes == 1 )) { m <- paste("You are using a Bozic model with", "the new restriction specification, and you have", "at least one s of 1. If that gene is mutated, ", "this will lead to a death rate of 0", "and the simulations will abort when you get a non finite", "value.") warning(m) } } if(!is.null(MMUEF)) { if(!inherits(MMUEF, "mutatorEffects")) stop("muEF must be a mutatorEffects object") full2mutator_ <- matchGeneIDs(MMUEF, rFE) if(any(is.na(full2mutator_$Full))) stop("Genes in mutatorEffects not present in fitnessEffects") if(any(is.na(full2mutator_))) stop("full2mutator failed for unknown reasons") full2mutator_ <- full2mutator_$Reduced } else { full2mutator_ <- vector(mode = "numeric", length = 0) ## muEF <- emptyFitnessEffects() } dpr <- detectionProbCheckParse(detectionProb, initSize, verbosity) ## if( !is.null(cPDetect) && (sum(!is.null(p2), !is.null(n2)) >= 1 )) ## stop("Specify only cPDetect xor both of p2 and n2") ## if( (is.null(p2) + is.null(n2)) == 1 ) ## stop("If you pass one of n2 or p2, you must also pass the other. ", ## "Otherwise, we would not know what to do.") ## stopifnot(PDBaseline >= 0) ## stopifnot(n2 > PDBaseline) ## stopifnot(p2 < 1) ## stopifnot(p2 > 0) ## if( is.null(cPDetect) ) cPDetect <- -9 ## if( is.null(p2)) p2 <- 9 ## if( is.null(n2)) n2 <- -9 ## call <- match.call() ## Process the fixed list, if any if(!is_null_na(fixation)) { ng <- namedGenes ## rownames(ng) <- namedGenes[, "Gene"] ## Proposed extension to have exact matching of genotypes ng <- rbind( data.frame(Gene = "_", GeneNumID = -9, stringsAsFactors = FALSE), ng) rownames(ng) <- ng[, "Gene"] ## FIXME ## Later, accept a last argument, called tolerance. ## If not present, set to 0 ## and then, at at the head of fixation_list below if(is.list(fixation)) { if( (is.null(fixation[["fixation_tolerance"]])) || (is.na(fixation[["fixation_tolerance"]]))) { fixation_tolerance <- 0 } else { fixation_tolerance <- as.numeric(fixation[["fixation_tolerance"]]) fixation <- fixation[-which(names(fixation) == "fixation_tolerance")] } if( (is.null(fixation[["min_successive_fixation"]])) || (is.na(fixation[["min_successive_fixation"]]))) { min_successive_fixation <- default_min_successive_fixation } else { min_successive_fixation <- as.integer(fixation[["min_successive_fixation"]]) fixation <- fixation[-which(names(fixation) == "min_successive_fixation")] } if( (is.null(fixation[["fixation_min_size"]])) || (is.na(fixation[["fixation_min_size"]]))) { fixation_min_size <- 0 } else { fixation_min_size <- as.integer(fixation[["fixation_min_size"]]) fixation <- fixation[-which(names(fixation) == "fixation_min_size")] } } else { if(is_null_na(fixation["fixation_tolerance"])) { fixation_tolerance <- 0 } else { fixation_tolerance <- as.numeric(fixation["fixation_tolerance"]) fixation <- fixation[-which(names(fixation) == "fixation_tolerance")] } if(is_null_na(fixation["min_successive_fixation"])) { min_successive_fixation <- default_min_successive_fixation } else { min_successive_fixation <- as.integer(fixation["min_successive_fixation"]) fixation <- fixation[-which(names(fixation) == "min_successive_fixation")] } if(is_null_na(fixation["fixation_min_size"])) { fixation_min_size <- 0 } else { fixation_min_size <- as.integer(fixation["fixation_min_size"]) fixation <- fixation[-which(names(fixation) == "fixation_min_size")] } } if( (fixation_tolerance > 1) || (fixation_tolerance < 0) ) stop("Impossible range for fixation tolerance") if( (min_successive_fixation < 0) ) stop("Impossible range for min_successive_fixation") if( (fixation_min_size < 0) ) stop("Impossible range for fixation_min_size") ## Usual genotype specification and might allow ordered vectors ## in the future fixation_b <- lapply(fixation, nice.vector.eo, sep = ",") ulf <- unlist(fixation_b) if(any(ulf == ">")) stop("Order effects not allowed (yet?) in fixation.") ulfg <- ng[ulf, 1] if(any(is.na(ulfg))) stop(paste("The 'fixation' list contains genes that are not present", " in the fitness effects.")) ## Sorting here is crucial!! fixation_list <- lapply(fixation_b, function(x) sort(ng[x, 2])) fixation_list <- list(fixation_list = fixation_list, fixation_tolerance = fixation_tolerance, min_successive_fixation = min_successive_fixation, fixation_min_size = fixation_min_size) } else { fixation_list <- list() } return(c( nr_BNB_Algo5(rFE = rFE, mu_ = mu, death = death, initSize = initSize, sampleEvery = sampleEvery, detectionSize = detectionSize, finalTime = finalTime, initSp = initSize_species, initIt = initSize_iter, seed = seed, verbosity = verbosity, speciesFS = speciesFS, ratioForce = ratioForce, typeFitness_ = typeFitness, maxram = max.memory, mutationPropGrowth = as.integer(mutationPropGrowth), initMutant_ = initMutant, maxWallTime = max.wall.time, keepEvery = keepEvery, K = K, detectionDrivers = detectionDrivers, onlyCancer = onlyCancer, errorHitWallTime = errorHitWallTime, maxNumTries = max.num.tries, errorHitMaxTries = errorHitMaxTries, minDetectDrvCloneSz = minDetectDrvCloneSz, extraTime = extraTime, keepPhylog = keepPhylog, MMUEF = MMUEF, full2mutator_ = full2mutator_, n2 = dpr["n2"], p2 = dpr["p2"], PDBaseline = dpr["PDBaseline"], cPDetect_i= dpr["cPDetect"], checkSizePEvery = dpr["checkSizePEvery"], AND_DrvProbExit = AND_DrvProbExit, fixation_list = fixation_list), Drivers = list(rFE$drv), ## but when doing pops, these will be repeated geneNames = list(names(getNamesID(rFE))), InitMutant = initMutantString )) } countGenesFe <- function(fe) { ## recall geneModule has Root always ## We want to be able to use objects that did not have ## the fitness landscape component if(exists("fitnessLandscape_gene_id", where = fe)) return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) + nrow(fe$fitnessLandscape_gene_id) - 1) else return(nrow(fe$geneModule) + nrow(fe$long.geneNoInt) - 1) } allNamedGenes <- function(fe){ ## Returns a data frame with genes and their names and verifies all ## genes have names. ## Root should always be first, but just in case ## avoid assuming it ## This does is not a good idea as it assumes the user did not use ## "keepInput = FALSE". ## lni <- length(fe$noIntGenes) ## ## FIXME:test ## if((lni > 0) && ## (is.null(names(fe$noIntGenes)))) ## stop("When using per-gene mutation rates the ", ## "no interaction genes must be named ", ## "(i.e., the noIntGenes vector must have names).") ## accommodate objects w/o fitnessLandscape if(!is.null(fe$fitnessLandscape) && nrow(fe$fitnessLandscape)) { gn <- gtools::mixedsort( colnames(fe$fitnessLandscape)[-ncol(fe$fitnessLandscape)]) v1 <- data.frame(Gene = gn, GeneNumID = seq.int(length(gn)), stringsAsFactors = FALSE) } else { v1 <- fe$geneModule[, c("Gene", "GeneNumID")] if(nrow(fe$long.geneNoInt)) { v1 <- rbind(v1, fe$long.geneNoInt[, c("Gene", "GeneNumID")]) } if(any(v1[, "Gene"] == "Root")) v1 <- v1[-which(v1[, "Gene"] == "Root"), ] } rownames(v1) <- NULL if(any(v1$Gene == "WT")) { warning("A gene is named WT. You can expect problems ", "because we use WT to denote the wildtype ", "genotype. You might want to change it.") } return(v1) } get.gene.counts <- function(x) { # , timeSample = "last", # typeSample = "whole") { ## From get.mut.vector. Used for now just for testing timeSample <- "last" the.time <- get.the.time.for.sample(x, timeSample, NULL) if(the.time < 0) { counts <- rep(0, length(x$geneNames)) names(counts) <- x$geneNames freq <- rep(NA, length(x$geneNames)) names(freq) <- x$geneNames return(list(counts = counts, freq = freq, popSize = 0)) } pop <- x$pops.by.time[the.time, -1] if(all(pop == 0)) { stop("You found a bug: this should never happen") } ## if(typeSample %in% c("wholeTumor", "whole")) { popSize <- x$PerSampleStats[the.time, 1] counts <- as.vector(tcrossprod(pop, x$Genotypes)) names(counts) <- x$geneNames return(list(counts = counts, freq = counts/popSize, popSize = popSize)) ## return( (tcrossprod(pop, ## x$Genotypes)/popSize) ) ## } else if (typeSample %in% c("singleCell", "single")) { ## return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)]) ## } else { ## stop("Unknown typeSample option") ## } } geneCounts <- function(x) { if(inherits(x, "oncosimulpop")) { return( do.call(rbind, lapply(x, function(z) get.gene.counts(z)$counts)) ) } else { return( get.gene.counts(x)$counts) } } ## geneNumIDReset <- function(x, ref){ ## ## Set GeneNumID of a fitnessEffect object, x, using ref as the ## ## reference fitness effect object. ## ## Check also if all in x are in ref. ## gg <- allNamedGenes(ref) ## gnid <- gg$GeneNumID ## names(gnid) <- gg$Gene ## ## FIXME: this later and conditional on what is in thee ## gnid <- c("Root" = 0, gnid) ## if(!all(x$geneModule$Gene %in% names(gnid) )) ## stop("Some genes not in reference fitnessEffects (rebasing geneModule)") ## x$geneModule$GeneNumID <- gnid[geneModule$Gene] ## ## and then go over all the lists in the x object. ## if(nrow(x$long.geneNoInt)) { ## ## now, mapping for the noInt if this is mutator ## if(!all(x$long.geneNoInt$Gene %in% names(gnid) )) ## stop("Some genes not in reference fitnessEffects (rebasing geneNoInt)") ## x$long.geneNoInt$GeneNumID <- gnid[long.geneNoInt$Gene] ## } ## } matchGeneIDs <- function(x, refFE) { n1 <- allNamedGenes(x) n2 <- allNamedGenes(refFE) colnames(n1)[2] <- "Reduced" colnames(n2)[2] <- "Full" ## Non standard evaluation thing and a note being generated in check. ## See also http://stackoverflow.com/questions/33299845/when-writing-functions-for-an-r-package-should-i-avoid-non-standard-evaluation ## But does not work well with replace. So use the NULL trick Reduced <- NULL dplyr::full_join(n2, n1, by = "Gene") %>% mutate(Reduced = replace(Reduced, is.na(Reduced), -9)) } detectionProbCheckParse <- function(x, initSize, verbosity) { default_p2 <- 0.1 default_n2 <- 2 * initSize default_PDBaseline <- 1.2 * initSize default_checkSizePEvery <- 20 ## No default cPDetect. That is done from p2 and n2 in C++. if(is_null_na(x)) { ## if((length(x) == 1) && (is.na(x))) { y <- vector() y["cPDetect"] <- -9 y["p2"] <- 9 y["n2"] <- -9 y["PDBaseline"] <- -9 y["checkSizePEvery"] <- Inf return(y) } else if((length(x) == 1) && (x == "default")) { ## Populate with defaults y <- vector() y["p2"] <- default_p2 y["n2"] <- default_n2 y["PDBaseline"] <- default_PDBaseline y["checkSizePEvery"] <- default_checkSizePEvery x <- y } if(length(x) >= 1) { if( !all(names(x) %in% c("cPDetect", "p2", "n2", "PDBaseline", "checkSizePEvery"))) stop("Names of some components of detectionProb are not recognized.", " Check for possible typos.") } ## This ain't conditional. If not given, always check if( !is_null_na(x["cPDetect"]) && (sum(!is_null_na(x["p2"]), !is_null_na(x["n2"])) >= 1 )) stop("Specify only cPDetect xor both of p2 and n2") if( (is_null_na(x["p2"]) + is_null_na(x["n2"])) == 1 ) stop("If you pass one of n2 or p2, you must also pass the other. ", "Otherwise, we would not know what to do.") if(is_null_na(x["PDBaseline"])) { x["PDBaseline"] <- default_PDBaseline if(verbosity > -1) message("\n PDBaseline set to default value of ", default_PDBaseline, "\n") } if(is_null_na(x["checkSizePEvery"])) { x["checkSizePEvery"] <- default_checkSizePEvery if(verbosity > -1) message("\n checkSizePEvery set to default value of ", default_checkSizePEvery, "\n") } ## If we get here, we checked consistency of whether cPDetect or p2/n2. ## Fill up with defaults the missing values if(is_null_na(x["cPDetect"])) { if(is_null_na(x["p2"])) { if(!is_null_na(x["n2"])) stop("Eh? no p2 but n2? Bug") x["n2"] <- default_n2 x["p2"] <- default_p2 if(verbosity > -1) message("\n n2, p2 set to default values of ", default_n2, ", ", default_p2, "\n") } } if(x["checkSizePEvery"] <= 0) stop("checkSizePEvery <= 0") if(x["PDBaseline"] <= 0) stop("PDBaseline <= 0") if(!is_null_na(x["n2"])) { if(x["n2"] <= x["PDBaseline"]) stop("n2 <= PDBaseline") if(x["p2"] >= 1) stop("p2 >= 1") if(x["p2"] <= 0) stop("p2 <= 0") x["cPDetect"] <- -9 } else { ## only if x["cPDetect"] is not NA if(is_null_na(x["cPDetect"])) stop("eh? you found a bug")## paranoia x["n2"] <- -9 x["p2"] <- -9 if(verbosity > -1) message("\n Using user-specified cPDetect as n2, p2 not given.\n") } return(x) } sampledGenotypes <- function(y, genes = NULL) { ## From a popSample object, or a matrix for that matter, ## show the sampled genotypes and their frequencies if(!is.null(genes)) { cols <- which(colnames(y) %in% genes ) y <- y[, cols] } ## nn <- colnames(y) genotlabel <- function(u, nn = colnames(y)) { if(any(is.na(u))) return(NA) else { return(paste(sort(nn[as.logical(u)]), collapse = ", ")) } } ## df <- data.frame(table( ## apply(y, 1, function(z) paste(sort(nn[as.logical(z)]), collapse = ", ") ) ## )) df <- data.frame(table( apply(y, 1, genotlabel), useNA = "ifany"), stringsAsFactors = TRUE) gn <- as.character(df[, 1]) gn[gn == ""] <- "WT" df <- data.frame(Genotype = gn, Freq = df[, 2], stringsAsFactors = FALSE) attributes(df)$ShannonI <- shannonI(na.omit(df)$Freq) class(df) <- c("sampledGenotypes", class(df)) return(df) } print.sampledGenotypes <- function(x, ...) { print.data.frame(x, ...) cat("\n Shannon's diversity (entropy) of sampled genotypes: ") cat(attributes(x)$ShannonI, "\n") } list_g_matches_fixed <- function(x, y) { ## Internal function, for testing the fixation output. ## x and y are vectors ## x is the set of output genotypes, y the set of fixed ## genotypes/subset of genotypes. ## Yes, this function has tests too in test.fixation.R ## All genotypes in x satisfy that they are supersets of at least one ## in y? That is true if, for every element in x, at least one y in ## that x. if(is.list(y)) y <- unlist(y) y.nice <- lapply(y, nice.vector.eo, sep = ",") x.nice <- lapply(x, nice.vector.eo, sep = ",") fu <- function(u, y.nice) any(unlist(lapply(y.nice, function(z) all(z %in% u)))) return(all(unlist(lapply(x.nice, fu, y.nice)))) } ## emptyFitnessEffects <- function() { ## list(long.rt = list(), ## long.epistasis = list(), ## long.orderEffects = list(), ## long.geneNoInt = list(), ## geneModule = list(), ## gMOneToOne = TRUE, ## geneToModule = list(), ## graph = NULL, ## drv = vector(mode = "integer", length = 0) ## ) ## } ### Later, for all the effects, we will do some kind of dplyr match? ### a, b, c, in fitness, only a, c in mut. ### fitness table for a,b,c ### each row name transformed removing b (so leaving only present) ### each row transformed matched to row in mut table. ## t1 <- data.frame(v1 = c("a,b", "a,c", "b"), v2 = c("b", "c", "b"), v3 = 1:3, stringsAsFactors = FALSE) ## t2 <- data.frame(v2 = c("b", "c"), v4 = c(11, 12), stringsAsFactors = FALSE) ## full_join(t1, t2, by = "v2") ## FIXME ## new exit code ## check: ## baseline < n2 ## p2 < 1 ## baseline defaults to init size + .1 ## use c < -9, n2 < -9, p2 < -9 for no values ## and check one of c or p2 and n2 are valid if using exit