R/from_genotype_utils.R
50a94207
 ## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
 
 ## This program is free software: you can redistribute it and/or modify
 ## it under the terms of the GNU General Public License as published by
 ## the Free Software Foundation, either version 3 of the License, or
 ## (at your option) any later version.
 
 ## This program is distributed in the hope that it will be useful,
 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 ## GNU General Public License for more details.
 
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 
 
 ## Functions that allow passing a matrix or data frame of mappings
 ## genotype -> fitness so this is taken as input in fitnessEffects.
65cb86f3
 ## and some related functions
 
 
 
 to_Magellan <- function(x, file,
                         max_num_genotypes = 2000) {
f9a38e24
     ## Go directly if you have, as entry, an object from
c95df82d
     ## rfitness!! to_Fitness_Matrix can be very slow.
edc593d3
     ## But often, when we use allFitnessEffects, we
     ## obtain the fitness landscape as genotype_fitness_matrix
     ## FIXME: we could use that fact here.
     ## or maybe in to_Fitness_Matrix
65cb86f3
     if(is.null(file)) {
         file <- tempfile()
         cat("\n Using file ", file, "\n")
     }
c95df82d
     if(inherits(x, "genotype_fitness_matrix")) {
         write(rep(2, ncol(x) - 1), file = file, ncolumns = ncol(x) - 1)
         write.table(x, file = file, append = TRUE,
                     row.names = FALSE, col.names = FALSE, sep = " ")
     } else {
         gfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)$gfm
         write(rep(2, ncol(gfm) - 1), file = file, ncolumns = ncol(gfm) - 1)
         write.table(gfm, file = file, append = TRUE,
                     row.names = FALSE, col.names = FALSE, sep = " ")
     }
65cb86f3
 }
 
e719a81d
 
 
 ## ## genotype_fitness_matrix -> fitness landscape as data frame
 ## fmatrix_to_afe <- function(x) {
 ##     stopifnot(inherits(x, "genotype_fitness_matrix"))
 ##     y <- x[, -ncol(x)]
 ##     nn <- apply(y, 1,
 ##                 function(u) paste(sort(colnames(y)[as.logical(u)]),
 ##                                   collapse = ", "))
 ##     nn[nn == ""] <- "WT"
 ##     return(data.frame(Genotype = nn, Fitness = x[, ncol(x)],
 ##            stringsAsFactors = FALSE))
 ## }
 
65cb86f3
 to_Fitness_Matrix <- function(x, max_num_genotypes) {
     ## A general converter. Ready to be used by plotFitnessLandscape and
     ## Magellan exporter.
f9a38e24
 
     ## FIXME: really, some of this is inefficient. Very. Fix it.
65cb86f3
     if( (inherits(x, "genotype_fitness_matrix")) ||
         ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) {
         ## Why this? We go back and forth twice. We need both things. We
         ## could construct the afe below by appropriately pasting the
         ## columns names
4d70b942
         ## if( (is.null(colnames(x))) || any(grepl("^$", colnames(x))))
         ##    stop("Matrix x must have column names")
ab439943
 
e719a81d
         ## This could use fmatrix_to_afe, above!!!
ab439943
         ## Major change as of flfast: no longer using from_genotype_fitness
65cb86f3
         afe <- evalAllGenotypes(allFitnessEffects(
ab439943
             genotFitness = x
             ##, epistasis = from_genotype_fitness(x)
         ),
65cb86f3
             order = FALSE, addwt = TRUE, max = max_num_genotypes)
ab439943
 
65cb86f3
         ## Might not be needed with the proper gfm object (so gmf <- x)
         ## but is needed if arbitrary matrices.
fd073de1
         gfm <- allGenotypes_to_matrix(afe)
65cb86f3
     } else if(inherits(x, "fitnessEffects")) {
         if(!is.null(x$orderEffects) )
             stop("We cannot yet deal with order effects")
         afe <- evalAllGenotypes(x,
                                 order = FALSE,
                                 addwt = TRUE, max = max_num_genotypes)
         gfm <- allGenotypes_to_matrix(afe)
     } else if( (inherits(x, "evalAllGenotypes")) ||
                (inherits(x, "evalAllGenotypesMut"))) {
         if(any(grepl(">", x[, 1], fixed = TRUE)))
             stop("We cannot deal with order effects yet.")
         x <- x[, c(1, 2)]
         if(x[1, "Genotype"] != "WT") {
             ## Yes, because we expect this present below
             x <- rbind(data.frame(Genotype = "WT",
                                   Fitness = 1,
                                   stringsAsFactors = FALSE),
                        x)
         }
         afe <- x
         ## in case we pass an evalAllgenotypesfitandmut
         gfm <- allGenotypes_to_matrix(afe)
     } else if(is.data.frame(x)) {
         ## Assume a two-column data frame of genotypes as character
         ## vectors and fitness
         if(colnames(x)[2] != "Fitness")
fd073de1
             stop("We cannot guess what you are passing here")
65cb86f3
         afe <- evalAllGenotypes(allFitnessEffects(genotFitness = x),
                                 order = FALSE, addwt = TRUE,
                                 max = max_num_genotypes)
         gfm <- allGenotypes_to_matrix(afe)
     } else {
fd073de1
         stop("We cannot guess what you are passing here")
65cb86f3
     }
     return(list(gfm = gfm, afe = afe))
fd073de1
 }
50a94207
 
ab439943
 ## Based on from_genotype_fitness
 ## but if we are passed a fitness landscapes as produced by
edc593d3
 ## rfitness, do nothing. Well, it actually does something.
 
50a94207
 
ab439943
 to_genotFitness_std <- function(x, simplify = TRUE,
                                 min_filter_fitness = 1e-9,
                                 sort_gene_names = TRUE) {
50a94207
     ## Would break with output from allFitnessEffects and
     ## output from allGenotypeAndMut
fd073de1
 
c06244a8
     ## For the very special and weird case of
     ## a matrix but only a single gene so with a 0 and 1
     ## No, this is a silly and meaningless case.
     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
fd073de1
 
     ## } else  blabla:
 
c06244a8
     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
         stop("Input must inherit from matrix or data.frame.")
fd073de1
 
c06244a8
     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
     ##     stop(paste0("Genotype fitness input either two-column data frame",
     ##          " or a numeric matrix with > 2 columns."))
     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
     ##     stop(paste0("It looks like you have a matrix for a single genotype",
     ##                 " of a single gene. For this degenerate cases use",
     ##                 " a data frame specification."))
fd073de1
 
50a94207
     if(ncol(x) > 2) {
c06244a8
         if(inherits(x, "matrix")) {
             if(!is.numeric(x))
                 stop("A genotype fitness matrix/data.frame must be numeric.")
         } else if(inherits(x, "data.frame")) {
             if(!all(unlist(lapply(x, is.numeric))))
                 stop("A genotype fitness matrix/data.frame must be numeric.")
         }
fd073de1
 
c06244a8
         ## We are expecting here a matrix of 0/1 where columns are genes
         ## except for the last column, that is Fitness
50a94207
         ## Of course, can ONLY work with epistastis, NOT order
ab439943
         ## return(genot_fitness_to_epistasis(x))
         if(any(duplicated(colnames(x))))
             stop("duplicated column names")
fd073de1
 
ab439943
         cnfl <- which(colnames(x)[-ncol(x)] == "")
         if(length(cnfl)) {
             freeletter <- setdiff(LETTERS, colnames(x))[1]
             if(length(freeletter) == 0) stop("Renaiming failed")
             warning("One column named ''. Renaming to ", freeletter)
             colnames(x)[cnfl] <- freeletter
         }
         if(!is.null(colnames(x)) && sort_gene_names) {
             ncx <- ncol(x)
             cnx <- colnames(x)[-ncx]
             ocnx <- gtools::mixedorder(cnx)
             if(!(identical(cnx[ocnx], cnx))) {
                 message("Sorting gene column names alphabetically")
                 x <- cbind(x[, ocnx, drop = FALSE], Fitness = x[, (ncx)])
             }
         }
fd073de1
 
ab439943
         if(is.null(colnames(x))) {
             ncx <- (ncol(x) - 1)
             message("No column names: assigning gene names from LETTERS")
             if(ncx > length(LETTERS))
                 stop("More genes than LETTERS; please give gene names",
                      " as you see fit.")
             colnames(x) <- c(LETTERS[1:ncx], "Fitness")
         }
fd073de1
 
ab439943
         if(!all(as.matrix(x[, -ncol(x)]) %in% c(0, 1) ))
             stop("First ncol - 1 entries not in {0, 1}.")
50a94207
     } else {
c06244a8
         if(!inherits(x, "data.frame"))
             stop("genotFitness: if two-column must be data frame")
50a94207
         ## Make sure no factors
ab439943
         if(is.factor(x[, 1])) {
             warning("First column of genotype fitness is a factor. ",
                     "Converting to character.")
             x[, 1] <- as.character(x[, 1])
             }
c06244a8
         ## Make sure no numbers
         if(any(is.numeric(x[, 1])))
             stop(paste0("genotFitness: first column of data frame is numeric.",
                         " Ambiguous and suggests possible error. If sure,",
                         " enter that column as character"))
fd073de1
 
50a94207
         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
         if(omarker) {
             ## do something. To be completed
             stop("This code not yet ready")
             ## You can pass to allFitnessEffects genotype -> fitness mappings that
             ## involve epistasis and order. But they must have different
             ## genes. Otherwise, it is not manageable.
         }
c06244a8
         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
             ## the second case above corresponds to passing just single letter genotypes
             ## as there is not a single marker
             x <- x[, c(1, 2), drop = FALSE]
50a94207
             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
                 message("Column names of object not Genotype and Fitness.",
                         " Renaming them assuming that is what you wanted")
                 colnames(x) <- c("Genotype", "Fitness")
             }
c06244a8
             if((!omarker) && (!emarker) && (!nogoodepi)) {
ab439943
                 message("All single-gene genotypes as input to to_genotFitness_std")
c06244a8
             }
50a94207
             ## Yes, we need to do this to  scale the fitness and put the "-"
ab439943
             x <- allGenotypes_to_matrix(x)
50a94207
         }
     }
ab439943
     ## And, yes, scale all fitnesses by that of the WT
     whichroot <- which(rowSums(x[, -ncol(x), drop = FALSE]) == 0)
     if(length(whichroot) == 0) {
         warning("No wildtype in the fitness landscape!!! Adding it with fitness 1.")
         x <- rbind(c(rep(0, ncol(x) - 1), 1), x)
     } else if(x[whichroot, ncol(x)] != 1) {
         warning("Fitness of wildtype != 1.",
                 " Dividing all fitnesses by fitness of wildtype.")
         vwt <- x[whichroot, ncol(x)]
         x[, ncol(x)] <- x[, ncol(x)]/vwt
     }
     if(any(is.na(x)))
         stop("NAs in fitness matrix")
edc593d3
     ## Make sure correct class
     if(is.data.frame(x)) x <- as.matrix(x)
     stopifnot(inherits(x, "matrix"))
     ## if(simplify) {
     ##     return(x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE])
     ## } else {
     ##     return(x)
     ## }
ab439943
     if(simplify) {
edc593d3
         x <- x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE]
ab439943
     }
edc593d3
     class(x) <- c("matrix", "genotype_fitness_matrix")
     return(x)
50a94207
 }
 
ab439943
 ## Deprecated after flfast
 ## to_genotFitness_std is faster and has better error checking
 ## and is very similar and does not use
 ## the genot_fitness_to_epistasis, which is not reasonable anymore.
 
 ## from_genotype_fitness <- function(x) {
 ##     ## Would break with output from allFitnessEffects and
 ##     ## output from allGenotypeAndMut
fd073de1
 
ab439943
 ##     ## For the very special and weird case of
 ##     ## a matrix but only a single gene so with a 0 and 1
 ##     ## No, this is a silly and meaningless case.
 ##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
fd073de1
 
 ##     ## } else  blabla:
 
ab439943
 ##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
 ##         stop("Input must inherit from matrix or data.frame.")
fd073de1
 
ab439943
 ##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
 ##     ##     stop(paste0("Genotype fitness input either two-column data frame",
 ##     ##          " or a numeric matrix with > 2 columns."))
 ##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
 ##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
 ##     ##                 " of a single gene. For this degenerate cases use",
 ##     ##                 " a data frame specification."))
fd073de1
 
ab439943
 ##     if(ncol(x) > 2) {
 ##         if(inherits(x, "matrix")) {
 ##             if(!is.numeric(x))
 ##                 stop("A genotype fitness matrix/data.frame must be numeric.")
 ##         } else if(inherits(x, "data.frame")) {
 ##             if(!all(unlist(lapply(x, is.numeric))))
 ##                 stop("A genotype fitness matrix/data.frame must be numeric.")
 ##         }
fd073de1
 
ab439943
 ##         ## We are expecting here a matrix of 0/1 where columns are genes
 ##         ## except for the last column, that is Fitness
 ##         ## Of course, can ONLY work with epistastis, NOT order
 ##         return(genot_fitness_to_epistasis(x))
 ##     } else {
 ##         if(!inherits(x, "data.frame"))
 ##             stop("genotFitness: if two-column must be data frame")
 ##         ## Make sure no factors
 ##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
 ##         ## Make sure no numbers
 ##         if(any(is.numeric(x[, 1])))
 ##             stop(paste0("genotFitness: first column of data frame is numeric.",
 ##                         " Ambiguous and suggests possible error. If sure,",
 ##                         " enter that column as character"))
fd073de1
 
ab439943
 ##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
 ##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
 ##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
 ##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
 ##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
 ##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
 ##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
 ##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
 ##         if(omarker) {
 ##             ## do something. To be completed
 ##             stop("This code not yet ready")
 ##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
 ##             ## involve epistasis and order. But they must have different
 ##             ## genes. Otherwise, it is not manageable.
 ##         }
 ##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
 ##             ## the second case above corresponds to passing just single letter genotypes
 ##             ## as there is not a single marker
 ##             x <- x[, c(1, 2), drop = FALSE]
 ##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
 ##                 message("Column names of object not Genotype and Fitness.",
 ##                         " Renaming them assuming that is what you wanted")
 ##                 colnames(x) <- c("Genotype", "Fitness")
 ##             }
 ##             if((!omarker) && (!emarker) && (!nogoodepi)) {
 ##                 message("All single-gene genotypes as input to from_genotype_fitness")
 ##             }
 ##             ## Yes, we need to do this to  scale the fitness and put the "-"
 ##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
 ##         }
 ##     }
 ## }
 
50a94207
 
 
ab439943
 
 
 ## No longer used for real
50a94207
 genot_fitness_to_epistasis <- function(x) {
     ## FIXME future:
 
     ## - Nope, order cannot be dealt with here. Not a matrix of 0 and 1.
 
     ## - modify "fitnessEffects" so we can take a component that is
     ## - "genot_fitness"; so this would never be exposed to the user
 
 
     ## Why we should not combine this specification with other terms? If
     ## you use this is because you say "this is the mapping genotype ->
     ## fitness. Period." so we should not combine other terms (or other
     ## terms that involve these genes)
fd073de1
 
50a94207
     nr <- nrow(x)
     if(nr < (2^(ncol(x) - 1)))
         message("Number of genotypes less than 2^L.",
                 " Missing genotype will be set to 1")
     ## This is specific if only epistasis, not order
     if(nr > (2^(ncol(x) - 1)))
         stop("Number of genotypes > 2^L. Repeated entries?")
     f <- x[, ncol(x)]
     ## Why should I stop?
     if(any(f < 0))
         message("Negative fitnesses. Watch out if you divide by the wildtype")
c06244a8
     x <- x[, -ncol(x), drop = FALSE]
50a94207
     wt <- which(rowSums(x) == 0)
     fwt <- 1
     if(length(wt) == 1)
         fwt <- f[wt]
ab439943
     ## No longer being used when we pass fitness landscapse: flfast
50a94207
     if(!isTRUE(all.equal(fwt, 1))) {
         message("Fitness of wildtype != 1. ",
                 "Dividing all fitnesses by fitness of wildtype.")
         f <- f/fwt
     }
fd073de1
 
4d70b942
     if(is.null(colnames(x)) || any(grepl("^$", colnames(x))) ) {
         message("Setting/resetting gene names because one or more are missing.",
                 " If this is not what you want, pass a matrix",
                 " with all columns named.")
50a94207
         if(ncol(x) <= 26)
             colnames(x) <- LETTERS[1:ncol(x)]
         else
             colnames(x) <- paste0("G", seq.int(ncol(x)))
     }
     cn <- colnames(x)
     x2 <- matrix("", nrow = nrow(x), ncol = ncol(x))
     x2[x == 0] <- "-"
     epin <- apply(x2, 1, function(z) paste(paste0(z, cn), collapse = ":"))
     if(anyDuplicated(epin))
         stop("Non unique names")
     s <- f - 1
     names(s) <- epin
     return(s)
 }
 
 
 
 
fd073de1
 allGenotypes_to_matrix <- function(x) {
50a94207
     ## Makes no sense to allow passing order: the matrix would have
     ## repeated rows. A > B and B > A both have exactly A and B
fd073de1
 
50a94207
     ## Take output of evalAllGenotypes or identical data frame and return
     ## a matrix with 0/1 in a column for each gene and a final column of
     ## Fitness
 
ab439943
     if(is.factor(x[, 1])) {
         warning("First column of genotype fitness is a factor. ",
                 "Converting to character.")
         x[, 1] <- as.character(x[, 1])
     }
50a94207
     ## A WT can be specified with string "WT"
     anywt <- which(x[, 1] == "WT")
     if(length(anywt) > 1) stop("More than 1 WT")
     if(length(anywt) == 1) {
         fwt <- x[anywt, 2]
         x <- x[-anywt, ]
c06244a8
         ## Trivial case of passing just a WT?
50a94207
     } else {
ab439943
         warning("No WT genotype. Setting its fitness to 1.")
50a94207
         fwt <- 1
     }
     splitted_genots <- lapply(x$Genotype,
                              function(z) nice.vector.eo(z, ","))
 
f9a38e24
     all_genes <- sort(unique(unlist(splitted_genots)))
50a94207
 
     m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
     the_match <- lapply(splitted_genots,
                         function(z) match(z, all_genes))
     ## A lot simpler with a loop
     for(i in 1:nrow(m)) {
         m[i, the_match[[i]]] <- 1
     }
     m <- cbind(m, x[, 2])
     colnames(m) <- c(all_genes, "Fitness")
     m <- rbind(c(rep(0, length(all_genes)), fwt),
                m)
     ## Ensure sorted
b137a4a3
     ## m <- data.frame(m)
c06244a8
     rs <- rowSums(m[, -ncol(m), drop = FALSE])
     m <- m[order(rs), , drop = FALSE]
50a94207
     ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ]
     return(m)
 }
 
 
fd073de1
 
 Magellan_stats <- function(x, max_num_genotypes = 2000,
                            verbose = FALSE,
edc593d3
                            use_log = FALSE,
fd073de1
                            short = TRUE,
                            replace_missing = FALSE) {
     ## I always use
     ## if(!is.null(x) && is.null(file))
     ##     stop("one of object or file name")
     ## if(is.null(file))
     fn <- tempfile()
     fnret <- tempfile()
     if(verbose)
         cat("\n Using input file", fn, " and output file ", fnret, "\n")
 
     if(use_log) {
         logarg <- "-l"
     } else {
         logarg <- NULL
     }
     if(short) {
         shortarg <- "-s"
     } else {
         shortarg <- NULL
     }
 
     if(replace_missing) {
         zarg <- "-z"
     } else {
         zarg <- NULL
     }
 
     to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
     ## newer versions of Magellan provide some extra values to standard output
     call_M <- system2(fl_statistics_binary(),
                       args = paste(shortarg, logarg, zarg, "-o", fnret, fn),
                       stdout = NULL)
     if(short) {
         ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
 
edc593d3
         tmp <- unlist(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
fd073de1
         ## ## Make names more explicit, but check we have what we think we have
         ## ## New versions of Magellan produce different output apparently of variable length
         ## stopifnot(length(tmp) >= 23) ## 23) ## variable length
         ## stopifnot(identical(names(tmp)[1:13], ## only some
         ##                     c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
         ##                       "nchains", "nsteps", "nori", "depth", "magn", "sign",
         ##                       "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
         ## ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
         ## if(length(tmp) >= 24) ## the new version
         ##     stopifnot(identical(names(tmp)[c(20, 24)],
         ##                         c("steps_m", "mProbOpt_0")))
         ## ## steps_m: the mean number of steps over the entire landscape to
         ## ## reach the global optimum
         ## ## mProbOpt_0: The mean probability to
         ## ## reach that optimum (again averaged over the entire landscape).
         ## ## but if there are multiple optima, there are many of these
         ## names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
         ##                 "r/s","n_chains", "n_steps", "n_origins", "max_depth",
         ##                 "epist_magn", "epist_sign", "epist_rsign")## ,
         ##                 ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
         ##                 ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
         ## ## "std_dev_pairs", "var_outdegree")
     } else {
         message("Output file: ", fnret)
         tmp <- readLines(fnret)
     }
     return(tmp)
 }
 
 
 ## Former version, that always tries to give a vector
 ## and that uses an external executable.
df131087
 ## Magellan_stats and Magellan_draw cannot be tested
 ## routinely, as they depend on external software
fd073de1
 Magellan_stats_former <- function(x, max_num_genotypes = 2000,
65cb86f3
                            verbose = FALSE,
df131087
                            use_log = TRUE,
                            short = TRUE,
f9a38e24
                            fl_statistics = "fl_statistics",
                            replace_missing = FALSE) { # nocov start
fd073de1
     ## I always use
65cb86f3
     ## if(!is.null(x) && is.null(file))
     ##     stop("one of object or file name")
     ## if(is.null(file))
     fn <- tempfile()
     fnret <- tempfile()
     if(verbose)
         cat("\n Using input file", fn, " and output file ", fnret, "\n")
fd073de1
 
df131087
     if(use_log) {
         logarg <- "-l"
     } else {
         logarg <- NULL
     }
     if(short) {
         shortarg <- "-s"
     } else {
         shortarg <- NULL
     }
fd073de1
 
f9a38e24
     if(replace_missing) {
         zarg <- "-z"
     } else {
         zarg <- NULL
     }
 
65cb86f3
     to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
f9a38e24
     ## newer versions of Magellan provide some extra values to standard output
     call_M <- system2(fl_statistics,
                       args = paste(fn, shortarg, logarg, zarg, "-o", fnret),
                       stdout = NULL)
df131087
     if(short) {
f9a38e24
         ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
fd073de1
 
f9a38e24
         tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
df131087
         ## Make names more explicit, but check we have what we think we have
f9a38e24
         ## New versions of Magellan produce different output apparently of variable length
         stopifnot(length(tmp) >= 23) ## 23) ## variable length
         stopifnot(identical(names(tmp)[1:13], ## only some
df131087
                             c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
                               "nchains", "nsteps", "nori", "depth", "magn", "sign",
f9a38e24
                               "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
         ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
         if(length(tmp) >= 24) ## the new version
             stopifnot(identical(names(tmp)[c(20, 24)],
                                 c("steps_m", "mProbOpt_0")))
         ## steps_m: the mean number of steps over the entire landscape to
         ## reach the global optimum
         ## mProbOpt_0: The mean probability to
         ## reach that optimum (again averaged over the entire landscape).
         ## but if there are multiple optima, there are many of these
         names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
df131087
                         "r/s","n_chains", "n_steps", "n_origins", "max_depth",
f9a38e24
                         "epist_magn", "epist_sign", "epist_rsign")## ,
                         ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
                         ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
         ## "std_dev_pairs", "var_outdegree")
df131087
     } else {
         tmp <- readLines(fnret)
     }
     return(tmp)
 } # nocov end
 
 Magellan_draw <- function(x, max_num_genotypes = 2000,
                           verbose = FALSE,
                           args = "-f",
                           fl_draw = "fl_draw",
                           svg_open = "xdg-open",
                           file_name = NULL) { # nocov start
     ## It always works by appending the name so file_name is without the .svg
     if(is.null(file_name)) {
         fn <- tempfile()
     } else {
         fn <- file_name
     }
     fn_out <- paste0(fn, ".svg")
     if(verbose)
         cat("\n Using input file", fn, " and output file ", fn_out, "\n")
fd073de1
 
df131087
     to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
     call_M <- system2(fl_draw, args = paste(fn, args), wait = FALSE)
     call_view <- system2(svg_open, args = fn_out, wait = FALSE,
                          stdout = ifelse(verbose, "", FALSE),
                          stderr = ifelse(verbose, "", FALSE))
fd073de1
 
     invisible()
df131087
 } # nocov end
65cb86f3
 
 
50a94207
 
df131087
 ## ## Example of Bozic issues in conversions of fitnes
65cb86f3
 ## m1 <- cbind(c(0, 1), c(1, 0), c(2, 3))
 ## m2 <- cbind(c(1, 1), c(1, 0), c(2, 3))
 ## m3 <- data.frame(G = c("A, B", "A"), F = c(1, 2))
 ## m4 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 4))
 ## m5 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2, 1, 0))
 ## m6 <- data.frame(G = c("A, B", "A", "WT", "B"), F = c(3, 2.5, 2, 0))
50a94207
 
 ## And no, it makes no sense to use any of this for mutator: in mutator I
 ## directly have the multiplication factor of each gene. Which is likely
df131087
 ## what people want anyway. Add it later if needed by using a ratio
50a94207
 ## instead of a "-"