// Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
// #include "randutils.h" //Nope, until we have gcc-4.8 in Win; full C++11
#include "debug_common.h"
#include "common_classes.h"
#include "new_restrict.h"
#include
#include
#include
#include
#include
using namespace Rcpp;
using std::vector;
using std::back_inserter;
std::string concatIntsString(const std::vector& ints,
const std::string sep = ", ") {
std::string strout;
std::string comma = "";
for(auto const &g : ints) {
strout += (comma + std::to_string(g));
comma = sep;
}
return strout;
}
double prodFitness(const std::vector& s) {
return accumulate(s.begin(), s.end(), 1.0,
[](double x, double y) {return (x * std::max(0.0, (1 + y)));});
}
// // This is a better idea? If yes, change code in nr_fitness so that
// // birth 0 is not extinction.
// double prodFitnessNegInf(std::vector s) {
// double f = 1.0;
// for(auto y : s) {
// if( y == -std::numeric_limits::infinity()) {
// return -std::numeric_limits::infinity();
// } else {
// f *= std::max(0.0, (1 + y));
// }
// }
// return f;
// }
double prodDeathFitness(const std::vector& s) {
return accumulate(s.begin(), s.end(), 1.0,
[](double x, double y) {return (x * std::max(0.0, (1 - y)));});
}
double prodMuts(const std::vector& s) {
// From prodFitness
// return accumulate(s.begin(), s.end(), 1.0,
// [](double x, double y) {return (x * y);});
return accumulate(s.begin(), s.end(), 1.0,
std::multiplies());
}
double set_cPDetect(const double n2, const double p2,
const double PDBaseline) {
return (-log(1.0 - p2)/(n2 - PDBaseline));
}
// Next two are identical, except for scaling the k. Use the simplest.
double probDetectSize(const double n, const double cPDetect,
const double PDBaseline) {
if(n <= PDBaseline) {
return 0;
} else {
return (1 - exp( -cPDetect * (n - PDBaseline)));
}
}
// double prob_exit_ratio(const double n, const double k, const double baseline) {
// if(n <= baseline) {
// return 0;
// } else {
// return (1 - exp( -c * ( (n - baseline)/baseline)));
// }
// }
bool detectedSizeP(const double n, const double cPDetect,
const double PDBaseline, std::mt19937& ran_gen) {
if(cPDetect < 0) {
// As we OR, return false if this condition does not apply
return false;
} else {
std::uniform_real_distribution runif(0.0, 1.0);
double prob = probDetectSize(n, cPDetect, PDBaseline);
if(prob <= 0.0) return false;
if(runif(ran_gen) <= prob) {
return true;
} else {
return false;
}
}
}
bool operator==(const Genotype& lhs, const Genotype& rhs) {
return (lhs.orderEff == rhs.orderEff) &&
(lhs.epistRtEff == rhs.epistRtEff) &&
(lhs.rest == rhs.rest) &&
(lhs.flGenes == rhs.flGenes);
}
// Added for completeness, but not used now
// bool operator<(const Genotype& lhs, const Genotype& rhs) {
// std::vector lh = genotypeSingleVector(lhs);
// std::vector rh = genotypeSingleVector(rhs);
// if( lh.size() < rh.size() ) return true;
// else if ( lh.size() > rh.size() ) return false;
// else {
// for(size_t i = 0; i != lh.size(); ++i) {
// if( lh[i] < rh[i] ) return true;
// }
// return false;
// }
// }
TypeModel stringToModel(const std::string& mod) {
if(mod == "exp")
return TypeModel::exp;
else if(mod == "bozic1")
return TypeModel::bozic1;
else if(mod == "mcfarlandlog")
return TypeModel::mcfarlandlog;
// else if(mod == "mcfarland")
// return TypeModel::mcfarland;
// else if(mod == "beerenwinkel")
// return TypeModel::beerenwinkel;
// else if(mod == "mcfarland0")
// return TypeModel::mcfarland0;
// else if(mod == "bozic2")
// return TypeModel::bozic2;
else
throw std::out_of_range("Not a valid TypeModel");
}
Dependency stringToDep(const std::string& dep) {
if(dep == "monotone") // AND, CBN, CMPN
return Dependency::monotone;
else if(dep == "semimonotone") // OR, SMN, DMPN
return Dependency::semimonotone;
else if(dep == "xmpn") // XOR, XMPN
return Dependency::xmpn;
else if(dep == "--") // for root, for example
return Dependency::single;
else
throw std::out_of_range("Not a valid typeDep");
// We never create the NA from entry data. NA is reserved for Root.
}
// std::string depToString(const Dependency dep) {
// switch(dep) {
// case Dependency::monotone:
// return "CMPN or monotone";
// case Dependency::semimonotone:
// return "DMPN or semimonotone";
// case Dependency::xmpn:
// return "XMPN (XOR)";
// case Dependency::single:
// return "--";
// default:
// throw std::out_of_range("Not a valid dependency");
// }
// }
void print_Genotype(const Genotype& ge) {
Rcpp::Rcout << "\n Printing Genotype";
Rcpp::Rcout << "\n\t\t order effects genes:";
for(auto const &oo : ge.orderEff) Rcpp::Rcout << " " << oo;
Rcpp::Rcout << "\n\t\t epistasis and restriction effects genes:";
for(auto const &oo : ge.epistRtEff) Rcpp::Rcout << " " << oo;
Rcpp::Rcout << "\n\t\t non interaction genes :";
for(auto const &oo : ge.rest) Rcpp::Rcout << " " << oo;
Rcpp::Rcout << "\n\t\t fitness landscape genes :";
for(auto const &oo : ge.flGenes) Rcpp::Rcout << " " << oo;
Rcpp::Rcout << std::endl;
}
vector genotypeSingleVector(const Genotype& ge) {
// orderEff in the order they occur. All others are sorted.
std::vector allgG;
allgG.insert(allgG.end(), ge.orderEff.begin(), ge.orderEff.end());
allgG.insert(allgG.end(), ge.epistRtEff.begin(), ge.epistRtEff.end());
allgG.insert(allgG.end(), ge.rest.begin(), ge.rest.end());
allgG.insert(allgG.end(), ge.flGenes.begin(), ge.flGenes.end());
// this should not be unique'd as it aint' sorted
return allgG;
}
vector allGenesinFitness(const fitnessEffectsAll& F) {
// Sorted
std::vector g0;
if(F.Gene_Module_tabl.size()) {
if( F.Gene_Module_tabl[0].GeneNumID != 0 )
throw std::logic_error("\n Gene module table's first element must be 0."
" This should have been caught in R.");
// for(vector::size_type i = 1;
// i != F.Gene_Module_tabl.size(); i++) {
for(decltype(F.Gene_Module_tabl.size()) i = 1;
i != F.Gene_Module_tabl.size(); i++) {
g0.push_back(F.Gene_Module_tabl[i].GeneNumID);
}
}
// for(auto const &a : F.Gene_Module_tabl) {
// if(a.GeneNumID != 0) g0.push_back(a.GeneNumID);
// }
for(auto const &b: F.genesNoInt.NumID) {
g0.push_back(b);
}
// sort(g0.begin(), g0.end());
for(auto const &b: F.fitnessLandscape.NumID) {
g0.push_back(b);
}
sort(g0.begin(), g0.end());
// Can we assume the fitness IDs go from 0 to n? Nope: because of
// muEF. But we assume in several places that there are no repeated
// elements in the output from this function.
// FIXME we verify there are no repeated elements. That is to strongly
// check our assumptions are right. Alternatively, return the "uniqued"
// vector and do not check anything.
std::vector g0_cp(g0);
g0.erase( unique( g0.begin(), g0.end() ), g0.end() );
if(g0.size() != g0_cp.size())
throw std::logic_error("\n allGenesinFitness: repeated genes. "
" This should have been caught in R.");
return g0;
}
vector allGenesinGenotype(const Genotype& ge){
// Like genotypeSingleVector, but sorted
std::vector allgG;
for(auto const &g1 : ge.orderEff)
allgG.push_back(g1);
for(auto const &g2 : ge.epistRtEff)
allgG.push_back(g2);
for(auto const &g3 : ge.rest)
allgG.push_back(g3);
for(auto const &g4 : ge.flGenes)
allgG.push_back(g4);
sort(allgG.begin(), allgG.end());
// Remove duplicates see speed comparisons here:
// http://stackoverflow.com/questions/1041620/whats-the-most-efficient-way-to-erase-duplicates-and-sort-a-vector
// We assume here there are no duplicates. Yes, a gene can have both
// fitness effects and order effects and be in the DAG. But it will be
// in only one of the buckets.
std::vector g0_cp(allgG);
allgG.erase( unique( allgG.begin(), allgG.end() ), allgG.end() );
if(allgG.size() != g0_cp.size())
throw std::logic_error("\n allGenesinGenotype: repeated genes."
" This should have been caught in R.");
return allgG;
}
// For users: if something depends on 0, that is it. No further deps.
// And do not touch the 0 in Gene_Module_table.
std::vector rTable_to_Poset(Rcpp::List rt) {
// The restriction table, or Poset, has a first element
// with nothing, so that all references by mutated gene
// are simply accessing the Poset[mutated gene] without
// having to remember to add 1, etc.
std::vector Poset;
Poset.resize(rt.size() + 1);
Poset[0].child = "0"; //should this be Root?? I don't think so.
Poset[0].childNumID = 0;
Poset[0].typeDep = Dependency::NA;
Poset[0].s = std::numeric_limits::quiet_NaN();
Poset[0].sh = std::numeric_limits::quiet_NaN();
Poset[0].parents.resize(0);
Poset[0].parentsNumID.resize(0);
Rcpp::List rt_element;
// std::string tmpname;
Rcpp::IntegerVector parentsid;
Rcpp::CharacterVector parents;
for(int i = 1; i != (rt.size() + 1); ++i) {
rt_element = rt[i - 1];
Poset[i].child = Rcpp::as(rt_element["child"]);
Poset[i].childNumID = as(rt_element["childNumID"]);
Poset[i].typeDep = stringToDep(as(rt_element["typeDep"]));
Poset[i].s = as(rt_element["s"]);
Poset[i].sh = as(rt_element["sh"]);
// if(i != Poset[i].childNumID) {
// Nope: this assumes we only deal with posets.
// // Rcpp::Rcout << "\n childNumID, original = " << as(rt_element["childNumID"]);
// // Rcpp::Rcout << "\n childNumID, Poset = " << Poset[i].childNumID;
// // Rcpp::Rcout << "\n i = " << i << std::endl;
// throw std::logic_error("childNumID != index");
// }
// Rcpp::IntegerVector parentsid = as(rt_element["parentsNumID"]);
// Rcpp::CharacterVector parents = as(rt_element["parents"]);
parentsid = as(rt_element["parentsNumID"]);
parents = as(rt_element["parents"]);
if( parentsid.size() != parents.size() ) {
throw std::logic_error("parents size != parentsNumID size. Bug in R code.");
}
for(int j = 0; j != parentsid.size(); ++j) {
Poset[i].parentsNumID.push_back(parentsid[j]);
Poset[i].parents.push_back( (Rcpp::as< std::string >(parents[j])) );
// tmpname = Rcpp::as< std::string >(parents[j]);
// Poset[i].parents.push_back(tmpname);
}
// Should not be needed if R always does what is should. Disable later?
if(! is_sorted(Poset[i].parentsNumID.begin(), Poset[i].parentsNumID.end()) )
throw std::logic_error("ParentsNumID not sorted. Bug in R code.");
if(std::isinf(Poset[i].s))
Rcpp::Rcout << "WARNING: at least one s is infinite"
<< std::endl;
if(std::isinf(Poset[i].sh) && (Poset[i].sh > 0))
Rcpp::Rcout << "WARNING: at least one sh is positive infinite"
<< std::endl;
}
return Poset;
}
std::vector R_GeneModuleToGeneModule(Rcpp::List rGM) {
std::vector geneModule;
Rcpp::IntegerVector GeneNumID = rGM["GeneNumID"];
Rcpp::IntegerVector ModuleNumID = rGM["ModuleNumID"];
Rcpp::CharacterVector GeneName = rGM["Gene"];
Rcpp::CharacterVector ModuleName = rGM["Module"];
// geneModule.resize(GeneNumID.size());
geneModule.resize(GeneNumID.size()); // remove later
for(size_t i = 0; i != geneModule.size(); ++i) {
if( static_cast(i) != GeneNumID[i])
throw std::logic_error(" i != GeneNumID. Bug in R code.");
// geneModule[i].GeneNumID = GeneNumID[i];
// geneModule[i].ModuleNumID = ModuleNumID[i];
// remove these later?
geneModule[i].GeneNumID = GeneNumID[i];
geneModule[i].ModuleNumID = ModuleNumID[i];
geneModule[i].GeneName = GeneName[i];
geneModule[i].ModuleName = ModuleName[i];
}
return geneModule;
}
std::vector GeneToModule(const std::vector& Drv,
const
std::vector& Gene_Module_tabl,
const bool sortout, const bool uniqueout) {
std::vector mutatedModules;
for(auto it = Drv.begin(); it != Drv.end(); ++it) {
mutatedModules.push_back(Gene_Module_tabl[(*it)].ModuleNumID);
}
// sortout and uniqueout returns a single element of each. uniqueout only removes
// successive duplicates. sortout without unique is just useful for knowing
// what happens for stats, etc. Neither sortout nor uniqueout for keeping
// track of order of module events.
if(sortout) {
sort( mutatedModules.begin(), mutatedModules.end() );
}
if(uniqueout) {
mutatedModules.erase( unique( mutatedModules.begin(),
mutatedModules.end() ),
mutatedModules.end() );
}
return mutatedModules;
}
// fitnessLandscape_struct convertFitnessLandscape(Rcpp::List flg,
// Rcpp::DataFrame fl_df) {
fitnessLandscape_struct convertFitnessLandscape(Rcpp::List flg,
Rcpp::List fl_df) {
fitnessLandscape_struct flS;
flS.names = Rcpp::as >(flg["Gene"]);
flS.NumID = Rcpp::as >(flg["GeneNumID"]);
std::vector genotNames =
Rcpp::as >(fl_df["Genotype"]);
// Rcpp::CharacterVector genotNames = fl_df["Genotype"];
Rcpp::NumericVector fitness = fl_df["Fitness"];
// Fill up the map genotypes(as string) to fitness
//for(size_t i = 0; i != fl_df.nrows(); ++i) {
for(size_t i = 0; i != genotNames.size(); ++i) {
flS.flmap.insert({genotNames[i], fitness[i]});
}
return flS;
}
genesWithoutInt convertNoInts(Rcpp::List nI) {
genesWithoutInt genesNoInt;
// FIXME: I think I want to force Gene in long.geneNoInt to be a char.vector
// to avoid this transformation.
// Rcpp::CharacterVector names = nI["Gene"];
// Rcpp::IntegerVector id = nI["GeneNumID"];
// Rcpp::NumericVector s1 = nI["s"];
// genesNoInt.names = Rcpp::as >(names);
genesNoInt.names = Rcpp::as >(nI["Gene"]);
genesNoInt.NumID = Rcpp::as >(nI["GeneNumID"]);
genesNoInt.s = Rcpp::as >(nI["s"]);
genesNoInt.shift = genesNoInt.NumID[0]; // FIXME: we assume mutations always
// indexed 1 to something. Not 0 to
// something.
return genesNoInt;
}
std::vector convertEpiOrderEff(Rcpp::List ep) {
std::vector Epistasis;
Rcpp::List element;
// For epistasis, the numID must be sorted, but never with order effects.
// Things come sorted (or not) from R.
Epistasis.resize(ep.size());
for(int i = 0; i != ep.size(); ++i) {
element = ep[i];
Epistasis[i].NumID = Rcpp::as >(element["NumID"]);
Epistasis[i].names = Rcpp::as >(element["ids"]);
Epistasis[i].s = as(element["s"]);
}
return Epistasis;
}
std::vector sortedAllOrder(const std::vector& E) {
std::vector allG;
for(auto const &ec : E) {
for(auto const &g : ec.NumID) {
allG.push_back(g);
}
}
sort(allG.begin(), allG.end());
allG.erase( unique( allG.begin(), allG.end()),
allG.end());
return allG;
}
std::vector sortedAllPoset(const std::vector& Poset) {
// Yes, this could be done inside rTable_to_Poset but this is cleaner
// and will only add very little time.
std::vector allG;
for(auto const &p : Poset) {
allG.push_back(p.childNumID);
}
sort(allG.begin(), allG.end());
allG.erase( unique( allG.begin(), allG.end()),
allG.end());
return allG;
}
fitnessEffectsAll convertFitnessEffects(Rcpp::List rFE) {
// Yes, some of the things below are data.frames in R, but for
// us that is used just as a list.
fitnessEffectsAll fe;
Rcpp::List rrt = rFE["long.rt"];
Rcpp::List re = rFE["long.epistasis"];
Rcpp::List ro = rFE["long.orderEffects"];
Rcpp::List rgi = rFE["long.geneNoInt"];
Rcpp::List rgm = rFE["geneModule"];
bool rone = as(rFE["gMOneToOne"]);
Rcpp::IntegerVector drv = rFE["drv"];
Rcpp::List flg = rFE["fitnessLandscape_gene_id"];
// clang does not like this
// Rcpp::DataFrame fl_df = rFE["fitnessLandscape_df"];
Rcpp::List fl_df = rFE["fitnessLandscape_df"];
// In the future, if we want noInt and fitnessLandscape, all
// we need is use the fitness landscape with an index smaller than those
// of noInt. So we can use noInt with shift being those in fitnessLandscape.
// BEWARE: will need to modify also createNewGenotype.
// if(fl_df.nrows()) {
if(fl_df.size()) {
fe.fitnessLandscape = convertFitnessLandscape(flg, fl_df);
}
if(rrt.size()) {
fe.Poset = rTable_to_Poset(rrt);
}
if(re.size()) {
fe.Epistasis = convertEpiOrderEff(re);
}
if(ro.size()) {
fe.orderE = convertEpiOrderEff(ro);
}
if(rgi.size()) {
fe.genesNoInt = convertNoInts(rgi);
} else {
fe.genesNoInt.shift = -9L;
}
// If this is null, use the nullFitnessEffects function; never
// end up here.
// if( (rrt.size() + re.size() + ro.size() + rgi.size() + fl_df.nrows()) == 0) {
if( (rrt.size() + re.size() + ro.size() + rgi.size() + fl_df.size()) == 0) {
throw std::logic_error("\n Nothing inside this fitnessEffects; why are you here?"
" Bug in R code.");
}
// At least for now, if fitness landscape nothing else allowed
// if(fl_df.nrows() && ((rrt.size() + re.size() + ro.size() + rgi.size()) > 0)) {
if(fl_df.size() && ((rrt.size() + re.size() + ro.size() + rgi.size()) > 0)) {
throw std::logic_error("\n Fitness landscape specification."
" There should be no other terms. "
" Bug in R code");
}
// This is silly
// if(fl_df.nrows() && (rgm.size() > 4) ) {
// throw std::logic_error("\n Fitness landscape specification."
// " Cannot use modules. "
// " Bug in R code");
// }
fe.Gene_Module_tabl = R_GeneModuleToGeneModule(rgm);
fe.allOrderG = sortedAllOrder(fe.orderE);
fe.allPosetG = sortedAllPoset(fe.Poset);
fe.gMOneToOne = rone;
fe.allGenes = allGenesinFitness(fe);
fe.genomeSize = fe.Gene_Module_tabl.size() - 1 + fe.genesNoInt.s.size() +
fe.fitnessLandscape.NumID.size();
fe.drv = as > (drv);
sort(fe.drv.begin(), fe.drv.end()); //should not be needed, but just in case
// cannot trust R gives it sorted
// check_disable_later
if(fe.genomeSize != static_cast(fe.allGenes.size())) {
throw std::logic_error("\n genomeSize != allGenes.size(). Bug in R code.");
}
// At least for now
if(fe.fitnessLandscape.NumID.size() > 0) {
if(fe.genomeSize != static_cast(fe.fitnessLandscape.NumID.size())) {
throw std::logic_error("\n genomeSize != genes in fitness landscape."
"Bug in R code.");
}
}
return fe;
}
// Before making allGenesinGenotype return a unique vector: we do a
// set_difference below. If we look at the help
// (http://en.cppreference.com/w/cpp/algorithm/set_difference) if we had
// more repetitions of an element in allGenes than in sortedparent we
// could have a problem. But if you look at function "allgenesinFitness",
// which is the one used to give the allgenes vector, you will see that
// that one returns only one entry per gene, as it parses the geneModule
// structure. So even if allGenesinGenotype returns multiple entries
// (which they don't), there will be no bugs as the maximum number of
// entries in the output of setdiff will be 0 or 1 as m is 1. But
// allGenesinGenotype cannot return more than one element as can be seen
// in createNewGenotype: an element only ends in the order component if it
// is not in the epistasis component. So there are no repeated elements
// in allGenes or in sortedparent below. Also, beware that does not break
// correct fitness evaluation of fitnessEffects where the same gene is in
// epistasis and order, as can be seen in the tests and because of how we
// evaluate fitness, where genes in a genotype in the orderEffects bucket
// are placed also in the epist for fitness eval. See evalGenotypeFitness
// and createNewGenotype.
void obtainMutations(const Genotype& parent,
const fitnessEffectsAll& fe,
int& numMutablePosParent,
std::vector& newMutations,
//randutils::mt19937_rng& ran_gen
std::mt19937& ran_gen,
std::vector mu) {
//Ugly: we return the mutations AND the numMutablePosParent This is
// almost ready to accept multiple mutations. And it returns a vector,
// newMutations.
std::vector sortedparent = allGenesinGenotype(parent);
std::vector nonmutated;
set_difference(fe.allGenes.begin(), fe.allGenes.end(),
sortedparent.begin(), sortedparent.end(),
back_inserter(nonmutated));
// numMutablePos is used not only for mutation but also to decide about
// the dummy or null mutation case.
numMutablePosParent = nonmutated.size();
if(nonmutated.size() < 1)
throw std::out_of_range("Trying to obtain a mutation when nonmutated.size is 0."
" Bug in R code; let us know.");
if(mu.size() == 1) { // common mutation rate
// FIXME:chromothr would not use this, or this is the limit case with a
// single mutant
std::uniform_int_distribution rpos(0, nonmutated.size() - 1);
newMutations.push_back(nonmutated[rpos(ran_gen)]);
} else { // per-gene mutation rate.
// Remember that mutations always indexed from 1, not from 0.
// We take an element from a discrete distribution, with probabilities
// proportional to the rates.
// FIXME:varmutrate give a warning if the only mu is for mu = 0.
std::vector mu_nm;
for(auto const &nm : nonmutated) mu_nm.push_back(mu[nm - 1]);
std::discrete_distribution rpos(mu_nm.begin(), mu_nm.end());
newMutations.push_back(nonmutated[rpos(ran_gen)]);
}
// randutils
// // Yes, the next will work, but pick is simpler!
// // size_t rpos = ran_gen.uniform(static_cast(0), nonmutated.size() - 1);
// // newMutations.push_back(nonmutated[rpos]);
// int posmutated = ran_gen.pick(nonmutated);
// newMutations.push_back(posmutated);
}
// std::vector genesInOrderModules(const fitnessEffectsAll& fe) {
// vector genes;
// if(fe.gMOneToOne)
// genes = fe.alOrderG;
// else {
// for(auto const &m : fe.allOrderG) {
// genes.push_back()
// }
// }
// return genes;
// }
fitness_as_genes fitnessAsGenes(const fitnessEffectsAll& fe) {
// Give the fitnessEffects in terms of genes, not modules.
// Extract the noInt. Then those in order effects by creating a multimap
// to go from map to genes. Then all remaining genes are those only in
// poset. By set_difference.
fitness_as_genes fg = zero_fitness_as_genes();
// fitness_as_genes fg;
fg.flGenes = fe.fitnessLandscape.NumID;
if(fg.flGenes.size()) {
return fg;
}
fg.noInt = fe.genesNoInt.NumID;
// //zz: debugging
// for(auto const &o : fe.genesNoInt.NumID) {
// DP2(o);
// }
// for(auto const &o : fe.genesNoInt.names) {
// DP2(o);
// }
// //
std::multimap MG;
for( auto const &mt : fe.Gene_Module_tabl) {
MG.insert({mt.ModuleNumID, mt.GeneNumID});
}
for (auto const &o : fe.allOrderG) {
for(auto pos = MG.lower_bound(o); pos != MG.upper_bound(o); ++pos)
fg.orderG.push_back(pos->second);
}
sort(fg.orderG.begin(), fg.orderG.end());
std::vector tmpv = fg.orderG;
tmpv.insert(tmpv.end(),fg.noInt.begin(), fg.noInt.end());
sort(tmpv.begin(), tmpv.end()); // should not be needed
set_difference(fe.allGenes.begin(), fe.allGenes.end(),
tmpv.begin(), tmpv.end(),
back_inserter(fg.posetEpistG));
// fg.posetEpistG.sort(fg.posetEpistG.begin(),
// fg.posetEpistG.end());
// // //zz: debugging
// DP1("order");
// for(auto const &o : fg.orderG) {
// DP2(o);
// }
// DP1("posetEpist");
// for(auto const &o : fg.posetEpistG) {
// DP2(o);
// }
// DP1("noint");
// for(auto const &o : fg.noInt) {
// DP2(o);
// }
// DP1("fl");
// for(auto const &o : fg.flGenes) {
// DP2(o);
// }
return fg;
}
std::map mapGenesIntToNames(const fitnessEffectsAll& fe) {
// This is a convenience, used in the creation of output.
// Sure, we could do this when reading the data in.
// The noInt in convertNoInts.
std::map gg;
for(auto const &mt : fe.Gene_Module_tabl) {
gg.insert({mt.GeneNumID, mt.GeneName});
}
// this is pedantic, as what is the size_type of NumID and of names?
// for(decltype(fe.genesNoInt.s.size()) i = 0;
// i != fe.genesNoInt.s.size(); ++i)
for(size_t i = 0;
i != fe.genesNoInt.NumID.size(); ++i){
gg.insert({fe.genesNoInt.NumID[i], fe.genesNoInt.names[i]});
}
// zz
for(size_t i = 0;
i != fe.fitnessLandscape.NumID.size(); ++i){
gg.insert({fe.fitnessLandscape.NumID[i], fe.fitnessLandscape.names[i]});
}
return gg;
}
// It is simple to write specialized functions for when
// there are no restrictions or no order effects , etc.
Genotype createNewGenotype(const Genotype& parent,
const std::vector& mutations,
const fitnessEffectsAll& fe,
std::mt19937& ran_gen,
//randutils::mt19937_rng& ran_gen
bool random = true) {
// random: if multiple mutations, randomly shuffle the ordered ones?
// This is the way to go if chromothripsis, but not if we give an
// initial mutant
Genotype newGenot = parent;
std::vector tempOrder; // holder for multiple muts if order.
bool sort_rest = false;
bool sort_epist = false;
bool sort_flgenes = false;
// FIXME: we need to create the mutations!
// Order of ifs: I suspect order effects rare. No idea about
// non-interaction genes, but if common the action is simple.
// A gene that is involved both in order effects and epistasis, only
// ends up in the orderEff container, for concision (even if a gene can
// be involved in both orderEff and epistasis and rT). But in the
// genotype evaluation, in evalGenotypeFitness, notice that we create
// the vector of genes to be checked against epistais and order effects
// using also those from orderEff:
// std::vector mutG (ge.epistRtEff);
// mutG.insert( mutG.end(), ge.orderEff.begin(), ge.orderEff.end());
for(auto const &g : mutations) {
// If we are dealing with a fitness landscape, that is as far as we go here
// at least for now. No other genes affect fitness.
// But this can be easily fixed in the future; like this?
// if(g <= (fe.fitnessLandscape.NumID.size() + 1)) {
// and restructure the else logic for the noInt
if(fe.fitnessLandscape.NumID.size()) {
newGenot.flGenes.push_back(g);
sort_flgenes = true;
} else {
if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int
// We can be dealing with modules
int m;
if(fe.gMOneToOne) {
m = g;
} else {
m = fe.Gene_Module_tabl[g].ModuleNumID;
}
if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) {
newGenot.epistRtEff.push_back(g);
sort_epist = true;
} else {
tempOrder.push_back(g);
}
} else {
// No interaction genes so no module stuff
newGenot.rest.push_back(g);
sort_rest = true;
}
}
}
// If there is order but multiple simultaneous mutations
// (chromothripsis), we randomly insert them
// FIXME: initMutant cannot use this!! we give the order!!!
if( (tempOrder.size() > 1) && random)
shuffle(tempOrder.begin(), tempOrder.end(), ran_gen);
// The new randutils engine:
// if(tempOrder.size() > 1)
// ran_gen.shuffle(tempOrder.begin(), tempOrder.end());
for(auto const &g : tempOrder)
newGenot.orderEff.push_back(g);
// Sorting done at end, in case multiple mutations
if(sort_rest)
sort(newGenot.rest.begin(), newGenot.rest.end());
if(sort_epist)
sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end());
if(sort_flgenes)
sort(newGenot.flGenes.begin(), newGenot.flGenes.end());
return newGenot;
}
// FIXME: Prepare specialized functions:
// Specialized functions:
// Never interactions: push into rest and sort. Identify by shift == 1.
// Never no interactions: remove the if. shift == -9.
// A paranoid check in case R code has bugs.
void breakingGeneDiff(const vector& genotype,
const vector& fitness) {
std::vector diffg;
set_difference(genotype.begin(), genotype.end(),
fitness.begin(), fitness.end(),
back_inserter(diffg));
if(diffg.size()) {
Rcpp::Rcout << "Offending genes :";
for(auto const &gx : diffg) {
Rcpp::Rcout << " " << gx;
}
Rcpp::Rcout << "\t Genotype: ";
for(auto const &g1 : genotype) Rcpp::Rcout << " " << g1;
Rcpp::Rcout << "\t Fitness: ";
for(auto const &g1 : fitness) Rcpp::Rcout << " " << g1;
Rcpp::Rcout << "\n ";
throw std::logic_error("\n At least one gene in the genotype not in fitness effects."
" Bug in R code.");
}
}
void checkNoNegZeroGene(const vector& ge) {
if( ge[0] == 0 )
throw std::logic_error("\n Genotype cannot contain 0. Bug in R code.");
else if(ge[0] < 0)
throw std::logic_error("\n Genotype cannot contain negative values. Bug in R code.");
}
void checkLegitGenotype(const Genotype& ge,
const fitnessEffectsAll& F) {
if((ge.orderEff.size() + ge.epistRtEff.size() + ge.rest.size()) == 0) {
// An empty genotype is always legitimate, even if silly
return;
}
// vector g0 = allGenesinFitness(F);
vector g0 = F.allGenes;
vector allgG = allGenesinGenotype(ge);
checkNoNegZeroGene(allgG);
breakingGeneDiff(allgG, g0);
}
void checkLegitGenotype(const vector& ge,
const fitnessEffectsAll& F) {
if (ge.size() == 0) {
// An empty genotype is always legitimate, even if silly
return;
}
// std::vector g0 = allGenesinFitness(F);
vector g0 = F.allGenes;
std::vector allgG (ge);
sort(allgG.begin(), allgG.end());
checkNoNegZeroGene(allgG);
breakingGeneDiff(allgG, g0);
}
Genotype convertGenotypeFromInts(const std::vector& gg,
const fitnessEffectsAll& fe) {
// A genotype is of one kind or another depending on what genes are of
// what type.
Genotype newGenot;
if(gg.size() != 0) {
// check_disable_later
checkLegitGenotype(gg, fe);
// Very similar to logic in createNewGenotype for placing each gene in
// its correct place, which needs to look at module mapping.
for(auto const &g : gg) {
if(fe.fitnessLandscape.NumID.size()) {
newGenot.flGenes.push_back(g);
} else {
if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int
// We can be dealing with modules
int m;
if(fe.gMOneToOne) {
m = g;
} else {
m = fe.Gene_Module_tabl[g].ModuleNumID;
}
if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) {
newGenot.epistRtEff.push_back(g);
} else {
newGenot.orderEff.push_back(g);
}
} else {
// No interaction genes so no module stuff
newGenot.rest.push_back(g);
}
}
}
sort(newGenot.flGenes.begin(), newGenot.flGenes.end());
sort(newGenot.rest.begin(), newGenot.rest.end());
sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end());
} else {
newGenot = wtGenotype(); // be explicit!!
}
return newGenot;
}
Genotype convertGenotypeFromR(Rcpp::IntegerVector rG,
const fitnessEffectsAll& fe) {
std::vector gg = Rcpp::as > (rG);
return convertGenotypeFromInts(gg, fe);
}
// Previous version
// Genotype convertGenotypeFromR(Rcpp::IntegerVector rG,
// const fitnessEffectsAll& fe) {
// // A genotype is of one kind or another depending on what genes are of
// // what type.
// std::vector gg = Rcpp::as > (rG);
// Genotype newGenot;
// // check_disable_later
// checkLegitGenotype(gg, fe);
// // Very similar to logic in createNewGenotype for placing each gene in
// // its correct place, which needs to look at module mapping.
// for(auto const &g : gg) {
// if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int
// // We can be dealing with modules
// int m;
// if(fe.gMOneToOne) {
// m = g;
// } else {
// m = fe.Gene_Module_tabl[g].ModuleNumID;
// }
// if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) {
// newGenot.epistRtEff.push_back(g);
// } else {
// newGenot.orderEff.push_back(g);
// }
// } else {
// // No interaction genes so no module stuff
// newGenot.rest.push_back(g);
// }
// }
// sort(newGenot.rest.begin(), newGenot.rest.end());
// sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end());
// return newGenot;
// }
// Genotype convertGenotypeFromR(Rcpp::List rGE) {
// Genotype g;
// Rcpp::IntegerVector oe = rGE["orderEffGenes"];
// Rcpp::IntegerVector ert = rGE["epistRTGenes"];
// Rcpp::IntegerVector rest = rGE["noInteractionGenes"];
// g.orderEff = Rcpp::as > (oe);
// g.epistRtEff = Rcpp::as > (ert);
// g.rest = Rcpp::as > (rest);
// sort(g.epistRtEff.begin(), g.epistRtEff.end());
// sort(g.rest.begin(), g.rest.end());
// return g;
// }
bool match_order_effects(const std::vector& O,
const std::vector& G) {
//As the name says: we check if the order effect is matched
if(G.size() < O.size()) return false;
std::vector::const_iterator p;
std::vector vdist;
auto itb = G.begin();
for(auto const &o : O) {
p = find(G.begin(), G.end(), o);
if( p == G.end() ) {
return false;
} else {
vdist.push_back(std::distance( itb, p ));
}
}
// Rcpp::Rcout << " ";
// for(auto vv : vdist ) {
// Rcpp::Rcout << vv << " ";
// }
// Rcpp::Rcout << std:: endl;
if( is_sorted(vdist.begin(), vdist.end()) ) {
return true;
} else {
return false;
}
}
std::vector evalOrderEffects(const std::vector& mutatedM,
const std::vector& OE) {
std::vector s;
for(auto const &o : OE) {
if(match_order_effects(o.NumID, mutatedM))
s.push_back(o.s);
}
return s;
}
bool match_negative_epist(const std::vector& E,
const std::vector& G) {
// When we have things like -1, 2 in epistasis. We need to check 2 is
// present and 1 is not present. E is the vector of epistatic coeffs,
// and G the sorted genotype.
if(G.size() < 1) return false;
for(auto const &e : E) {
if(e < 0) {
if(binary_search(G.begin(), G.end(), -e))
return false;
} else {
if(!binary_search(G.begin(), G.end(), e))
return false;
}
}
return true;
}
std::vector evalEpistasis(const std::vector& mutatedModules,
const std::vector& Epistasis) {
std::vector s;
// check_disable_later
if(! is_sorted(mutatedModules.begin(), mutatedModules.end()))
throw std::logic_error("mutatedModules not sorted in evalEpistasis."
" Bug in R code.");
for(auto const &p : Epistasis ) {
if(p.NumID[0] > 0 ) {
if(includes(mutatedModules.begin(), mutatedModules.end(),
p.NumID.begin(), p.NumID.end()))
s.push_back(p.s);
} else {
if(match_negative_epist(p.NumID, mutatedModules))
s.push_back(p.s);
}
}
// An alternative, but more confusing way
// for(auto const &p : Epistasis ) {
// if(p.NumID[0] > 0 ) {
// if(!includes(mutatedModules.begin(), mutatedModules.end(),
// p.NumID.begin(), p.NumID.end()))
// continue;
// } else {
// if(!match_negative_epist(p.NumID, mutatedModules))
// continue;
// }
// s.push_back(p.s);
// }
return s;
}
// FIXME: can we make it faster if we know each module a single gene?
// FIXME: if genotype is always kept sorted, with drivers first, can it be
// faster? As well, note that number of drivers is automatically known
// from this table of constraints.
// int getPosetByChild(const int child,
// const std::vector& Poset) {
// }
std::vector evalPosetConstraints(const std::vector& mutatedModules,
const std::vector& Poset,
const std::vector& allPosetG) {
// check_disable_later
if(! is_sorted(mutatedModules.begin(), mutatedModules.end()))
throw std::logic_error("mutatedModules not sorted in evalPosetConstraints."
" Bug in R code.");
size_t numDeps;
size_t sumDepsMet = 0;
Dependency deptype;
std::vector parent_matches;
std::vector s;
//This works reverted w.r.t. to evalOrderEffects and evalEpistasis:
//there, I examine if the effect is present in the genotype. Here, I
//examine if the genotype satisfies the constraints.
// Since the genotype can contain genes not in the poset, first find
// those that are mutated in the genotype AND are in the poset. Then
// check if the mutated have restrictions satisfied.
std::vector MPintersect;
std::set_intersection(allPosetG.begin(), allPosetG.end(),
mutatedModules.begin(), mutatedModules.end(),
std::back_inserter(MPintersect));
// We know MPintersect is sorted, so we can avoid an O(n*n) loop
size_t i = 0;
for(auto const &m : MPintersect) {
while ( Poset[i].childNumID != m) ++i;
// Not to catch the twisted case of an XOR with a 0 and something else
// as parents
if( (Poset[i].parentsNumID[0] == 0) &&
(Poset[i].parentsNumID.size() == 1)) {
s.push_back(Poset[i].s);
} else {
parent_matches.clear();
std::set_intersection(mutatedModules.begin(), mutatedModules.end(),
Poset[i].parentsNumID.begin(),
Poset[i].parentsNumID.end(),
back_inserter(parent_matches));
sumDepsMet = parent_matches.size();
numDeps = Poset[i].parentsNumID.size();
deptype = Poset[i].typeDep;
if( ((deptype == Dependency::semimonotone) &&
(sumDepsMet)) ||
((deptype == Dependency::monotone) &&
(sumDepsMet == numDeps)) ||
((deptype == Dependency::xmpn) &&
(sumDepsMet == 1)) ) {
s.push_back(Poset[i].s);
} else {
s.push_back(Poset[i].sh);
}
}
}
// for(auto const &parent : Poset[i].parentsNumID ) {
// parent_module_mutated = binary_search(mutatedModules.begin(),
// mutatedModules.end(),
// parent);
// if(parent_module_mutated) {
// ++sumDepsMet;
// if( deptype == Dependency::semimonotone ) {
// known = true;
// break;
// }
// if( (deptype == Dependency::xmpn && (sumDepsMet > 1))) {
// knwon = true;
// break;
// }
// }
// }
return s;
}
std::vector evalGenotypeFitness(const Genotype& ge,
const fitnessEffectsAll& F){
// check_disable_later
checkLegitGenotype(ge, F);
std::vector s;
if( (ge.orderEff.size() + ge.epistRtEff.size() +
ge.rest.size() + ge.flGenes.size()) == 0) {
Rcpp::warning("WARNING: you have evaluated fitness of a genotype of length zero.");
// s.push_back(1.0); //Eh??!! 1? or 0? FIXME It should be empty! and have prodFitness
// deal with it.
return s;
}
// If we are dealing with a fitness landscape, that is as far as we go here
// at least for now. No other genes affect fitness.
// But this can be easily fixed in the future; do not return
// s below, but keep adding, maybe the noIntGenes.
// Recall also prodFitness uses, well, the prod of 1 + s
// so we want an s s.t. 1 + s = birth rate passed,
// which is the value in the fitness landscape as interpreted now.
// i.e., s = birth rate - 1;
if(F.fitnessLandscape.NumID.size()) {
std::string gs = concatIntsString(ge.flGenes);
if(F.fitnessLandscape.flmap.find(gs) == F.fitnessLandscape.flmap.end()) {
s.push_back(-1.0);
} else {
s.push_back(F.fitnessLandscape.flmap.at(gs) - 1);
}
return s;
}
// Genes without any restriction or epistasis are just genes. No modules.
// So simple we do it here.
if(F.genesNoInt.shift > 0) {
int shift = F.genesNoInt.shift;
for(auto const & r : ge.rest ) {
s.push_back(F.genesNoInt.s[r - shift]);
}
}
// For the rest, there might be modules. Three different effects on
// fitness possible: as encoded in Poset, general epistasis, order effects.
// Epistatis and poset are checked against all mutations. Create single
// sorted vector with all mutations and map to modules, if needed. Then
// eval.
// Why not use a modified genotypeSingleVector without the no ints? We
// could, but not necessary. And you can place genes in any order you
// want, since this is not for order restrictions. That goes below.
// Why do I put the epist first? See previous answer.
// Why do I sort if one to one? binary searches. Not done below for order.
std::vector mutG (ge.epistRtEff);
// A gene can be involved in epistasis and order. This gene would only
// be in the orderEff vector, as seen in "createNewGenotype" or
// "convertGenotypeFromInts"
mutG.insert( mutG.end(), ge.orderEff.begin(), ge.orderEff.end());
std::vector mutatedModules;
if(F.gMOneToOne) {
sort(mutG.begin(), mutG.end());
mutatedModules = mutG;
} else {
mutatedModules = GeneToModule(mutG, F.Gene_Module_tabl, true, true);
}
std::vector srt =
evalPosetConstraints(mutatedModules, F.Poset, F.allPosetG);
std::vector se =
evalEpistasis(mutatedModules, F.Epistasis);
// For order effects we need a new vector of mutatedModules:
if(F.gMOneToOne) {
mutatedModules = ge.orderEff;
} else {
mutatedModules = GeneToModule(ge.orderEff, F.Gene_Module_tabl, false, true);
}
std::vector so =
evalOrderEffects(mutatedModules, F.orderE);
// I keep s, srt, se, so separate for now for debugging.
s.insert(s.end(), srt.begin(), srt.end());
s.insert(s.end(), se.begin(), se.end());
s.insert(s.end(), so.begin(), so.end());
return s;
}
vector getGenotypeDrivers(const Genotype& ge, const vector& drv) {
// Returns the actual mutated drivers in a genotype.
// drv comes from R, and it is the vector with the
// numbers of the genes, not modules.
vector presentDrv;
vector og = allGenesinGenotype(ge);
set_intersection(og.begin(), og.end(),
drv.begin(), drv.end(),
back_inserter(presentDrv));
return presentDrv;
}
double evalMutator(const Genotype& fullge,
const std::vector& full2mutator,
const fitnessEffectsAll& muEF,
bool verbose = false) {
// In contrast to nr_fitness, that sets birth and death, this simply
// returns the multiplication factor for the mutation rate. This is used
// by mutationFromParent and mutationFromScratch
// Remember that the fitnessEffectsAll struct for mutator does not use
// the same mapping from gene names to gene numerical IDs as for
// fitness. fitnessEffectsAll and its associated algorithms expects the
// present genes to be indexed as successive integers. That is not
// necessarily the case if only some of the genes are in the mutator
// fitnessEffectsAll. So we need to remap the gene numerical IDs. We
// could try remapping by gene inside the struct, but painful and
// error-prone. Much simpler at least for now to do:
// full genotype -> vector of ints (preserving order)
// -> convert the ints to the ints for the genotype of mutator
// -> genotype in terms of mutator
// the "genotype in terms of mutator" is never preserved. It is just a
// transient mapping.
// This will NOT work if we ever have order effects for mutator as we do
// not record order for those that matter for mutator if they do not matter for
// fitness.
vector g1 = genotypeSingleVector(fullge);
vector g2;
int tmp;
for (auto const & i : g1) {
tmp = full2mutator[i - 1]; //gives a -9 if no correspondence
if( tmp > 0 ) g2.push_back(tmp);
}
if(g2.size() == 0) {
return 1.0;
} else {
Genotype newg = convertGenotypeFromInts(g2, muEF);
vector s = evalGenotypeFitness(newg, muEF);
// just for checking
if(verbose) {
std::string sprod = "mutator product";
Rcpp::Rcout << "\n Individual " << sprod << " terms are :";
for(auto const &i : s) Rcpp::Rcout << " " << i;
Rcpp::Rcout << std::endl;
}
return prodMuts(s);
}
}
// [[Rcpp::export]]
double evalRGenotype(Rcpp::IntegerVector rG, Rcpp::List rFE,
bool verbose, bool prodNeg,
Rcpp::CharacterVector calledBy_) {
// Can evaluate both ONLY fitness or ONLY mutator. Not both at the same
// time. Use evalRGenotypeAndMut for that.
const std::string calledBy = Rcpp::as(calledBy_);
if(rG.size() == 0) {
// Why don't we evaluate it?
Rcpp::warning("WARNING: you have evaluated fitness/mutator status of a genotype of length zero.");
return 1;
}
//const Rcpp::List rF(rFE);
fitnessEffectsAll F = convertFitnessEffects(rFE);
Genotype g = convertGenotypeFromR(rG, F);
vector s = evalGenotypeFitness(g, F);
if(verbose) {
std::string sprod;
if(calledBy == "evalGenotype") {
sprod = "s";
} else { // if (calledBy == "evalGenotypeMut") {
sprod = "mutator product";
}
Rcpp::Rcout << "\n Individual " << sprod << " terms are :";
for(auto const &i : s) Rcpp::Rcout << " " << i;
Rcpp::Rcout << std::endl;
}
if(calledBy == "evalGenotype") {
if(!prodNeg)
return prodFitness(s);
else
return prodDeathFitness(s);
} else { //if (calledBy == "evalGenotypeMut") {
return prodMuts(s);
}
}
// [[Rcpp::export]]
Rcpp::NumericVector evalRGenotypeAndMut(Rcpp::IntegerVector rG,
Rcpp::List rFE,
Rcpp::List muEF,
Rcpp::IntegerVector full2mutator_,
bool verbose, bool prodNeg) {
// Basically to test evalMutator. We repeat the conversion to genotype,
// but that is unavoidable here.
NumericVector out(2);
// For fitness. Except for "evalGenotypeFromR", all is done as in the
// rest of the internal code for evaluating a genotype.
fitnessEffectsAll F = convertFitnessEffects(rFE);
fitnessEffectsAll muef = convertFitnessEffects(muEF);
Genotype g = convertGenotypeFromR(rG, F);
vector s = evalGenotypeFitness(g, F);
if(!prodNeg)
out[0] = prodFitness(s);
else
out[0] = prodDeathFitness(s);
if(verbose) {
std::string sprod = "s";
Rcpp::Rcout << "\n Individual " << sprod << " terms are :";
for(auto const &i : s) Rcpp::Rcout << " " << i;
Rcpp::Rcout << std::endl;
}
// out[0] = evalRGenotype(rG, rFE, verbose, prodNeg, "evalGenotype");
// Genotype fullge = convertGenotypeFromR(rG, F);
const std::vector full2mutator = Rcpp::as >(full2mutator_);
out[1] = evalMutator(g, full2mutator, muef, verbose);
return out;
}
double mutationFromScratch(const std::vector& mu,
const spParamsP& spP,
const Genotype& g,
const fitnessEffectsAll& fe,
const int mutationPropGrowth,
const std::vector full2mutator,
const fitnessEffectsAll& muEF) {
double mumult;
if(full2mutator.size() > 0) { // so there are mutator effects
mumult = evalMutator(g, full2mutator, muEF);
} else mumult = 1.0;
if(mu.size() == 1) {
if(mutationPropGrowth)
return(mumult * mu[0] * spP.numMutablePos * spP.birth);
else
return(mumult * mu[0] * spP.numMutablePos);
} else {
std::vector sortedG = allGenesinGenotype(g);
std::vector nonmutated;
set_difference(fe.allGenes.begin(), fe.allGenes.end(),
sortedG.begin(), sortedG.end(),
back_inserter(nonmutated));
// std::vector mutatedG = genotypeSingleVector(g);
// Not worth it using an accumulator?
// std::vector gg = genotypeSingleVector(g);
// accumulate(gg.begin(), gg.end(), 0.0,
// [](double x, int y) {return( x + mu[y - 1])});
double mutrate = 0.0;
for(auto const &nm : nonmutated) {
mutrate += mu[nm - 1];
}
if(mutationPropGrowth)
mutrate *= spP.birth;
return(mumult * mutrate);
}
}
std::vector < std::vector > list_to_vector_of_int_vectors(Rcpp::List vlist) {
// As it says. We check each vector is sorted!
std::vector < std::vector > vv(vlist.size());
for(int i = 0; i != vlist.size(); ++i) {
vv[i] = Rcpp::as >(vlist[i]);
if( ! is_sorted(vv[i].begin(), vv[i].end()) )
throw std::logic_error("Fixation genotypes not sorted. Bug in R code.");
}
return vv;
}
// // [[Rcpp::export]]
// void wrap_list_to_vector_of_int_vectors(Rcpp::List vlist) {
// std::vector < std::vector > vo(vlist.size());
// vo = list_to_vector_of_int_vectors(vlist);
// for(int ii = 0; ii != vo.size(); ++ii) {
// Rcpp::Rcout << "\n";
// Rcpp::Rcout << " list position " << ii + 1 << ": ";
// for(int jj = 0; jj != vo[ii].size(); ++jj ) {
// Rcpp::Rcout << vo[ii][jj] << " ";
// }
// }
// Rcpp::Rcout << "\n";
// }
// Wrong when/if there are mutator effects. For suppose there wre, and
// they affected the parent, but no new mutator gene affects the child.
// We will, however, multiply twice by the mutator effect. Therefore, we
// disable this for now. We could fix this, checking if there are new
// mutation effects, or mutliplying/subtracting only new, etc. But too
// much of a mess.
// double mutationFromParent(const std::vector& mu,
// const spParamsP& newP,
// const spParamsP& parentP,
// const std::vector& newMutations,
// // const std::vector& nonmutated,
// const int mutationPropGrowth,
// const Genotype& fullge,
// const std::vector full2mutator,
// const fitnessEffectsAll& muEF) {
// double mumult;
// if(full2mutator.size() > 0) { // so there are mutator effects
// mumult = evalMutator(fullge, full2mutator, muEF);
// } else mumult = 1.0;
// if(mu.size() == 1) {
// if(mutationPropGrowth)
// return(mumult * mu[0] * newP.numMutablePos * newP.birth);
// else
// return(mumult * mu[0] * newP.numMutablePos);
// } else {
// double mutrate = parentP.mutation;
// for(auto const mutated : newMutations) {
// mutrate -= mu[mutated - 1];
// }
// if(mutationPropGrowth)
// mutrate *= newP.birth;
// return(mumult * mutrate);
// }
// }
// About order of genes and their names, etc
// We first read the R gene module. The $geneModule. The function is
// R_GeneModuleToGeneModule
// We also read the no interaction. They have their own number-name
// correspondence, within the noInt genes part. See the struct
// genesWithoutInt. But that already comes order from R with numbers
// starting after the last gene with interaction. See the R function
// allFitnessEffects.