Browse code

3.99.8: unity build

ramon diaz-uriarte (at Phelsuma) authored on 15/09/2022 19:29:51
Showing 1 changed files
1 1
deleted file mode 100644
... ...
@@ -1,1933 +0,0 @@
1
-//     Copyright 2013-2021 Ramon Diaz-Uriarte
2
-
3
-//     This program is free software: you can redistribute it and/or modify
4
-//     it under the terms of the GNU General Public License as published by
5
-//     the Free Software Foundation, either version 3 of the License, or
6
-//     (at your option) any later version.
7
-
8
-//     This program is distributed in the hope that it will be useful,
9
-//     but WITHOUT ANY WARRANTY; without even the implied warranty of
10
-//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
-//     GNU General Public License for more details.
12
-
13
-//     You should have received a copy of the GNU General Public License
14
-//     along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
-
16
-
17
-// #include "randutils.h" //Nope, until we have gcc-4.8 in Win; full C++11
18
-#include "debug_common.h"
19
-#include "common_classes.h"
20
-#include "new_restrict.h"
21
-#include "exprtk.h"
22
-#include <Rcpp.h>
23
-#include <iomanip>
24
-#include <algorithm>
25
-#include <random>
26
-#include <string>
27
-#include <sstream>
28
-#include <limits>
29
-#include <regex>
30
-
31
-
32
-using namespace Rcpp;
33
-using std::vector;
34
-using std::back_inserter;
35
-
36
-std::string concatIntsString(const std::vector<int>& ints,
37
-			     const std::string sep = ", ") {
38
-  std::string strout;
39
-  std::string comma = "";
40
-  for(auto const &g : ints) {
41
-    strout += (comma + std::to_string(g));
42
-    comma = sep;
43
-  }
44
-  return strout;
45
-}
46
-
47
-// Cumulative product of (1 + s).
48
-//      If using fitness landscape, a single s is passed, and that already
49
-//      has a "-1" subtracted. So when a single s is passed, we just
50
-//      return s + 1, which is the fitness (birth rate) as specified in
51
-//      the fitness landscape. This applies to FDF too.
52
-//      See function evalGenotypeFitness.
53
-//      Whenever we call prodFitness (e.g., nr_fitness or
54
-//      initMutantInitialization) it is called on the output of
55
-//      evalGenotypeFitness
56
-double prodFitness(const std::vector<double>& s) {
57
-  return accumulate(s.begin(), s.end(), 1.0,
58
-		    [](double x, double y) {return (x * std::max(0.0, (1 + y)));});
59
-}
60
-
61
-
62
-// Plays same role as prodFitness, but for Bozic models
63
-double prodDeathFitness(const std::vector<double>& s) {
64
-  return accumulate(s.begin(), s.end(), 1.0,
65
-		    [](double x, double y) {return (x * std::max(0.0, (1 - y)));});
66
-}
67
-
68
-
69
-// To compute cumulative product of mutator effects
70
-double prodMuts(const std::vector<double>& s) {
71
-  return accumulate(s.begin(), s.end(), 1.0,
72
-		    std::multiplies<double>());
73
-}
74
-
75
-// Compute cPDetect for Detection/stopping mechanism
76
-double set_cPDetect(const double n2, const double p2,
77
-		    const double PDBaseline) {
78
-  return ( -log(1.0 - p2) *  (PDBaseline / (n2 - PDBaseline)) );
79
-}
80
-
81
-// Probability of detection given size
82
-double probDetectSize(const double n, const double cPDetect,
83
-		      const double PDBaseline) {
84
-  if(n <= PDBaseline) {
85
-    return 0;
86
-  } else {
87
-    return (1 - exp( -cPDetect * ( (n - PDBaseline)/PDBaseline ) ));
88
-  }
89
-}
90
-
91
-
92
-// return true if this is detected (as given by the probability of detection as a function of size)
93
-bool detectedSizeP(const double n, const double cPDetect,
94
-		   const double PDBaseline, std::mt19937& ran_gen) {
95
-  if(cPDetect < 0) {
96
-    // As we OR, return false if this condition does not apply
97
-    return false;
98
-  } else {
99
-    std::uniform_real_distribution<double> runif(0.0, 1.0);
100
-    double prob = probDetectSize(n, cPDetect, PDBaseline);
101
-    if(prob <= 0.0) return false;
102
-    if(runif(ran_gen) <= prob) {
103
-      return true;
104
-    } else {
105
-      return false;
106
-    }
107
-  }
108
-}
109
-
110
-
111
-
112
-bool operator==(const Genotype& lhs, const Genotype& rhs) {
113
-  return (lhs.orderEff == rhs.orderEff) &&
114
-    (lhs.epistRtEff == rhs.epistRtEff) &&
115
-    (lhs.rest == rhs.rest) &&
116
-    (lhs.flGenes == rhs.flGenes);
117
-}
118
-
119
-// Added for completeness, but not used now
120
-// bool operator<(const Genotype& lhs, const Genotype& rhs) {
121
-//   std::vector<int> lh = genotypeSingleVector(lhs);
122
-//   std::vector<int> rh = genotypeSingleVector(rhs);
123
-//   if( lh.size() < rh.size() ) return true;
124
-//   else if ( lh.size() > rh.size() ) return false;
125
-//   else {
126
-//     for(size_t i = 0; i != lh.size(); ++i) {
127
-//       if( lh[i] < rh[i] ) return true;
128
-//     }
129
-//     return false;
130
-//   }
131
-// }
132
-
133
-
134
-TypeModel stringToModel(const std::string& mod) {
135
-  if(mod == "exp")
136
-    return TypeModel::exp;
137
-  else if(mod == "bozic1")
138
-    return TypeModel::bozic1;
139
-  else if(mod == "mcfarlandlog")
140
-    return TypeModel::mcfarlandlog;
141
-  else if(mod == "mcfarlandlogd")
142
-    return TypeModel::mcfarlandlog_d;
143
-  else if (mod == "arbitrary")
144
-    return TypeModel::arbitrary;
145
-  else if (mod == "constant")
146
-    return TypeModel::constant;
147
-  else
148
-    throw std::out_of_range("Not a valid TypeModel");
149
-}
150
-
151
-
152
-Dependency stringToDep(const std::string& dep) {
153
-  if(dep == "monotone") // AND, CBN, CMPN
154
-    return Dependency::monotone;
155
-  else if(dep == "semimonotone") // OR, SMN, DMPN
156
-    return Dependency::semimonotone;
157
-  else if(dep == "xmpn") // XOR, XMPN
158
-    return Dependency::xmpn;
159
-  else if(dep == "--") // for root, for example
160
-    return Dependency::single;
161
-  else
162
-    throw std::out_of_range("Not a valid typeDep");
163
-  // We never create the NA from entry data. NA is reserved for Root.
164
-}
165
-
166
-
167
-
168
-// genotype struct -> genotype as a vector of ints
169
-vector<int> genotypeSingleVector(const Genotype& ge) {
170
-  // orderEff in the order they occur. All others are sorted.
171
-  std::vector<int> allgG;
172
-  allgG.insert(allgG.end(), ge.orderEff.begin(), ge.orderEff.end());
173
-  allgG.insert(allgG.end(), ge.epistRtEff.begin(), ge.epistRtEff.end());
174
-  allgG.insert(allgG.end(), ge.rest.begin(), ge.rest.end());
175
-  allgG.insert(allgG.end(), ge.flGenes.begin(), ge.flGenes.end());
176
-  // this should not be unique'd as it aint' sorted
177
-  return allgG;
178
-}
179
-
180
-
181
-vector<int> allGenesinFitness(const fitnessEffectsAll& F) {
182
-  // Sorted
183
-  std::vector<int> g0;
184
-
185
-  if(F.Gene_Module_tabl.size()) {
186
-    if( F.Gene_Module_tabl[0].GeneNumID != 0 )
187
-      throw std::logic_error("\n Gene module table's first element must be 0."
188
-			     " This should have been caught in R.");
189
-    for(decltype(F.Gene_Module_tabl.size()) i = 1;
190
-  	i != F.Gene_Module_tabl.size(); i++) {
191
-      g0.push_back(F.Gene_Module_tabl[i].GeneNumID);
192
-    }
193
-  }
194
-  for(auto const &b: F.genesNoInt.NumID) {
195
-    g0.push_back(b);
196
-  }
197
-  for(auto const &b: F.fitnessLandscape.NumID) {
198
-    g0.push_back(b);
199
-  }
200
-  sort(g0.begin(), g0.end());
201
-
202
-  // Can we assume the fitness IDs go from 0 to n? Nope: because of
203
-  // muEF. But we assume in several places that there are no repeated
204
-  // elements in the output from this function.
205
-
206
-  // We verify there are no repeated elements. That is to strongly
207
-  // check our assumptions are right. Alternatively, return the "uniqued"
208
-  // vector and do not check anything.
209
-  std::vector<int> g0_cp(g0);
210
-  g0.erase( unique( g0.begin(), g0.end() ), g0.end() );
211
-  if(g0.size() != g0_cp.size())
212
-    throw std::logic_error("\n allGenesinFitness: repeated genes. "
213
-			   " This should have been caught in R.");
214
-  return g0;
215
-}
216
-
217
-vector<int> allGenesinGenotype(const Genotype& ge){
218
-  // Like genotypeSingleVector, but sorted
219
-  std::vector<int> allgG;
220
-  for(auto const &g1 : ge.orderEff)
221
-    allgG.push_back(g1);
222
-  for(auto const &g2 : ge.epistRtEff)
223
-    allgG.push_back(g2);
224
-  for(auto const &g3 : ge.rest)
225
-    allgG.push_back(g3);
226
-  for(auto const &g4 : ge.flGenes)
227
-    allgG.push_back(g4);
228
-
229
-  sort(allgG.begin(), allgG.end());
230
-  // Remove duplicates see speed comparisons here:
231
-  // http://stackoverflow.com/questions/1041620/whats-the-most-efficient-way-to-erase-duplicates-and-sort-a-vector
232
-  // We assume here there are no duplicates. Yes, a gene can have both
233
-  // fitness effects and order effects and be in the DAG. But it will be
234
-  // in only one of the buckets.
235
-  std::vector<int> g0_cp(allgG);
236
-  allgG.erase( unique( allgG.begin(), allgG.end() ), allgG.end() );
237
-  if(allgG.size() != g0_cp.size())
238
-    throw std::logic_error("\n allGenesinGenotype: repeated genes."
239
-			   " This should have been caught in R.");
240
-  return allgG;
241
-}
242
-
243
-
244
-// For users: if something depends on 0, that is it. No further deps.
245
-// And do not touch the 0 in Gene_Module_table.
246
-std::vector<Poset_struct> rTable_to_Poset(Rcpp::List rt) {
247
-
248
-  // The restriction table, or Poset, has a first element
249
-  // with nothing, so that all references by mutated gene
250
-  // are simply accessing the Poset[mutated gene] without
251
-  // having to remember to add 1, etc.
252
-
253
-  std::vector<Poset_struct> Poset;
254
-
255
-  Poset.resize(rt.size() + 1);
256
-  Poset[0].child = "0"; //should this be Root?? I don't think so.
257
-  Poset[0].childNumID = 0;
258
-  Poset[0].typeDep = Dependency::NA;
259
-  Poset[0].s = std::numeric_limits<double>::quiet_NaN();
260
-  Poset[0].sh = std::numeric_limits<double>::quiet_NaN();
261
-  Poset[0].parents.resize(0);
262
-  Poset[0].parentsNumID.resize(0);
263
-
264
-  Rcpp::List rt_element;
265
-  // std::string tmpname;
266
-  Rcpp::IntegerVector parentsid;
267
-  Rcpp::CharacterVector parents;
268
-
269
-  for(int i = 1; i != (rt.size() + 1); ++i) {
270
-    rt_element = rt[i - 1];
271
-    Poset[i].child = Rcpp::as<std::string>(rt_element["child"]);
272
-    Poset[i].childNumID = as<int>(rt_element["childNumID"]);
273
-    Poset[i].typeDep = stringToDep(as<std::string>(rt_element["typeDep"]));
274
-    Poset[i].s = as<double>(rt_element["s"]);
275
-    Poset[i].sh = as<double>(rt_element["sh"]);
276
-
277
-    parentsid = as<Rcpp::IntegerVector>(rt_element["parentsNumID"]);
278
-    parents = as<Rcpp::CharacterVector>(rt_element["parents"]);
279
-
280
-    if( parentsid.size() != parents.size() ) {
281
-      throw std::logic_error("parents size != parentsNumID size. Bug in R code.");
282
-    }
283
-
284
-    for(int j = 0; j != parentsid.size(); ++j) {
285
-      Poset[i].parentsNumID.push_back(parentsid[j]);
286
-      Poset[i].parents.push_back( (Rcpp::as< std::string >(parents[j])) );
287
-    }
288
-
289
-    // Should not be needed if R always does what is should. Disable later?
290
-    if(! is_sorted(Poset[i].parentsNumID.begin(), Poset[i].parentsNumID.end()) )
291
-      throw std::logic_error("ParentsNumID not sorted. Bug in R code.");
292
-
293
-    if(std::isinf(Poset[i].s))
294
-      Rcpp::Rcout << "WARNING: at least one s is infinite"
295
-		  << std::endl;
296
-    if(std::isinf(Poset[i].sh) && (Poset[i].sh > 0))
297
-      Rcpp::Rcout << "WARNING: at least one sh is positive infinite"
298
-		  << std::endl;
299
-  }
300
-  return Poset;
301
-}
302
-
303
-
304
-std::vector<Gene_Module_struct> R_GeneModuleToGeneModule(Rcpp::List rGM) {
305
-
306
-  std::vector<Gene_Module_struct> geneModule;
307
-
308
-  Rcpp::IntegerVector GeneNumID = rGM["GeneNumID"];
309
-  Rcpp::IntegerVector ModuleNumID = rGM["ModuleNumID"];
310
-  Rcpp::CharacterVector GeneName = rGM["Gene"];
311
-  Rcpp::CharacterVector ModuleName = rGM["Module"];
312
-
313
-  geneModule.resize(GeneNumID.size()); // remove later?
314
-
315
-  for(size_t i = 0; i != geneModule.size(); ++i) {
316
-    if( static_cast<int>(i) != GeneNumID[i])
317
-      throw std::logic_error(" i != GeneNumID. Bug in R code.");
318
-    // remove the next two these later?
319
-    geneModule[i].GeneNumID = GeneNumID[i];
320
-    geneModule[i].ModuleNumID = ModuleNumID[i];
321
-    geneModule[i].GeneName = GeneName[i];
322
-    geneModule[i].ModuleName = ModuleName[i];
323
-  }
324
-  return geneModule;
325
-}
326
-
327
-
328
-std::vector<int> GeneToModule(const std::vector<int>& Drv,
329
-			     const
330
-			      std::vector<Gene_Module_struct>& Gene_Module_tabl,
331
-			      const bool sortout, const bool uniqueout) {
332
-
333
-  std::vector<int>  mutatedModules;
334
-
335
-  for(auto it = Drv.begin(); it != Drv.end(); ++it) {
336
-    mutatedModules.push_back(Gene_Module_tabl[(*it)].ModuleNumID);
337
-  }
338
-  // sortout and uniqueout returns a single element of each. uniqueout only removes
339
-  // successive duplicates. sortout without unique is just useful for knowing
340
-  // what happens for stats, etc. Neither sortout nor uniqueout for keeping
341
-  // track of order of module events.
342
-  if(sortout) {
343
-    sort( mutatedModules.begin(), mutatedModules.end() );
344
-  }
345
-  if(uniqueout) {
346
-    mutatedModules.erase( unique( mutatedModules.begin(),
347
-				  mutatedModules.end() ),
348
-			  mutatedModules.end() );
349
-  }
350
-  return mutatedModules;
351
-}
352
-
353
-
354
-fitnessLandscape_struct convertFitnessLandscape(Rcpp::List flg,
355
-                Rcpp::List fl_df, Rcpp::List full_FDF_spec,
356
-                bool fdb, bool fdd){
357
-
358
-
359
-  // We assume this:
360
-  // A symbol can only be a symbol if it is
361
-  // in the fitness landscape table. A genotype not in the fitness landscape table
362
-  // always has fitness zero, and noting can depend on it.
363
-  // A genotype of 0 genotype will never appear in an equation.
364
-
365
-
366
-  fitnessLandscape_struct flS;
367
-
368
-  flS.names = Rcpp::as<std::vector<std::string> >(flg["Gene"]);
369
-  flS.NumID = Rcpp::as<std::vector<int> >(flg["GeneNumID"]);
370
-
371
-  if (fdb) {
372
-
373
-    flS.flbmap.clear(); //Set to 0 flmap
374
-    std::vector<std::string> genotNames =
375
-      Rcpp::as<std::vector<std::string> >(full_FDF_spec["Genotype_as_numbers"]);
376
-
377
-    std::vector<std::string> fvarsbvect =
378
-    Rcpp::as<std::vector<std::string> > (full_FDF_spec["Genotype_as_fvarsb"]);
379
-
380
-    std::vector<std::string> birth =
381
-      Rcpp::as<std::vector<std::string> > (full_FDF_spec["Birth_as_fvars"]);
382
-
383
-    if(fvarsbvect.size() != genotNames.size() )
384
-      throw std::logic_error("fvarsbvect (fitnessLandscapeVariables) and "
385
-          "genotNames (fitnessLandscape_df$Genotypes) "
386
-          "are of different lenght. Should have been caught in R");
387
-    
388
-    // Fill up the map genotypes (as string of ints) to fitness (as string:
389
-    // the expression that exprTk will evaluate).
390
-    // Length given by number of genotypes in fitness landscape
391
-    for(size_t i = 0; i != genotNames.size(); ++i) {
392
-      flS.flFDBmap.insert({genotNames[i], birth[i]});
393
-    }
394
-
395
-    // Fill up the map genotypes (as string of ints) to fVars (as string)
396
-    // If a mismatch in $fitnessLandscape_df and
397
-    // $fitnessLandscapeVariables, this does silly things.
398
-    for(size_t i = 0; i != genotNames.size(); ++i) {
399
-      flS.flfVarsBmap.insert({genotNames[i], fvarsbvect[i]});
400
-    }
401
-  } else {
402
-
403
-    flS.flFDBmap.clear();//Set to 0 flFDBmap
404
-    flS.flfVarsBmap.clear();//Set to 0 flfVarsBmap
405
-
406
-    std::vector<std::string> genotNames =
407
-      Rcpp::as<std::vector<std::string> >(fl_df["Genotype"]);
408
-    Rcpp::NumericVector birth = fl_df["Birth"];
409
-
410
-    for(size_t i = 0; i != genotNames.size(); ++i) {
411
-      flS.flbmap.insert({genotNames[i], birth[i]});
412
-    }
413
-  }
414
-
415
-  // Check if death is specified
416
-  if (fl_df.containsElementNamed("Death")) {
417
-
418
-    if(fdd) {
419
-
420
-      flS.fldmap.clear(); //Set to 0 fldmap
421
-
422
-      std::vector<std::string> genotNames =
423
-      Rcpp::as<std::vector<std::string> >(full_FDF_spec["Genotype_as_numbers"]);
424
-
425
-      std::vector<std::string> fvarsdvect =
426
-      Rcpp::as<std::vector<std::string> > (full_FDF_spec["Genotype_as_fvarsd"]);
427
-
428
-      std::vector<std::string> death =
429
-        Rcpp::as<std::vector<std::string> > (full_FDF_spec["Death_as_fvars"]);
430
-
431
-
432
-      if(fvarsdvect.size() != genotNames.size() )
433
-        throw std::logic_error("fvarsdvect (fitnessLandscapeVariables) and "
434
-            "genotNames (fitnessLandscape_df$Genotypes) "
435
-            "are of different lenght. Should have been caught in R");
436
-      
437
-      // Fill up the map genotypes (as string of ints) to fitness (as string:
438
-      // the expression that exprTk will evaluate).
439
-      // Length given by number of genotypes in fitness landscape
440
-      for(size_t i = 0; i != genotNames.size(); ++i) {
441
-        flS.flFDDmap.insert({genotNames[i], death[i]});
442
-      }
443
-
444
-      // Fill up the map genotypes (as string of ints) to fVars (as string)
445
-      // If a mismatch in $fitnessLandscape_df and
446
-      // $fitnessLandscapeVariables, this does silly things.
447
-      for(size_t i = 0; i != genotNames.size(); ++i) {
448
-        flS.flfVarsDmap.insert({genotNames[i], fvarsdvect[i]});
449
-      }
450
-    } else {
451
-
452
-      flS.flFDDmap.clear();//Set to 0 flFDDmap
453
-      flS.flfVarsDmap.clear();//Set to 0 flfVarsDmap
454
-
455
-      std::vector<std::string> genotNames =
456
-        Rcpp::as<std::vector<std::string> >(fl_df["Genotype"]);
457
-
458
-      Rcpp::NumericVector death = fl_df["Death"];
459
-
460
-      for(size_t i = 0; i != genotNames.size(); ++i) {
461
-        flS.fldmap.insert({genotNames[i], death[i]});
462
-      }
463
-    }
464
-
465
-  } else {
466
-    flS.flFDDmap.clear();//Set to 0 flFDDmap
467
-    flS.flfVarsDmap.clear();//Set to 0 flfVarsDmap
468
-    flS.fldmap.clear(); //Set to 0 fldmap
469
-  }
470
-
471
-  return flS;
472
-}
473
-
474
-genesWithoutInt convertNoInts(Rcpp::List nI) {
475
-  genesWithoutInt genesNoInt;
476
-  genesNoInt.names = Rcpp::as<std::vector<std::string> >(nI["Gene"]);
477
-  genesNoInt.NumID = Rcpp::as<std::vector<int> >(nI["GeneNumID"]);
478
-  genesNoInt.s = Rcpp::as<std::vector<double> >(nI["s"]);
479
-  genesNoInt.shift = genesNoInt.NumID[0]; // we assume mutations always
480
-				      // indexed 1 to something. Not 0 to
481
-				      // something.
482
-  return genesNoInt;
483
-}
484
-
485
-
486
-std::vector<epistasis> convertEpiOrderEff(Rcpp::List ep) {
487
-
488
-  std::vector<epistasis> Epistasis;
489
-
490
-  Rcpp::List element;
491
-  // For epistasis, the numID must be sorted, but never with order effects.
492
-  // Things come sorted (or not) from R.
493
-  Epistasis.resize(ep.size());
494
-  for(int i = 0; i != ep.size(); ++i) {
495
-    element = ep[i];
496
-    Epistasis[i].NumID = Rcpp::as<std::vector<int> >(element["NumID"]);
497
-    Epistasis[i].names = Rcpp::as<std::vector<std::string> >(element["ids"]);
498
-    Epistasis[i].s = as<double>(element["s"]);
499
-  }
500
-  return Epistasis;
501
-}
502
-
503
-std::vector<int> sortedAllOrder(const std::vector<epistasis>& E) {
504
-
505
-  std::vector<int> allG;
506
-  for(auto const &ec : E) {
507
-    for(auto const &g : ec.NumID) {
508
-      allG.push_back(g);
509
-    }
510
-  }
511
-  sort(allG.begin(), allG.end());
512
-  allG.erase( unique( allG.begin(), allG.end()),
513
-		      allG.end());
514
-  return allG;
515
-}
516
-
517
-std::vector<int> sortedAllPoset(const std::vector<Poset_struct>& Poset) {
518
-  // Yes, this could be done inside rTable_to_Poset but this is cleaner
519
-  // and will only add very little time.
520
-  std::vector<int> allG;
521
-  for(auto const &p : Poset) {
522
-    allG.push_back(p.childNumID);
523
-  }
524
-  sort(allG.begin(), allG.end());
525
-  allG.erase( unique( allG.begin(), allG.end()),
526
-		      allG.end());
527
-  return allG;
528
-}
529
-
530
-fitnessEffectsAll convertFitnessEffects(Rcpp::List rFE) {
531
-  // Yes, some of the things below are data.frames in R, but for
532
-  // us that is used just as a list.
533
-
534
-  fitnessEffectsAll fe;
535
-
536
-  Rcpp::List rrt = rFE["long.rt"];
537
-  Rcpp::List re = rFE["long.epistasis"];
538
-  Rcpp::List ro = rFE["long.orderEffects"];
539
-  Rcpp::List rgi = rFE["long.geneNoInt"];
540
-  Rcpp::List rgm = rFE["geneModule"];
541
-  bool rone = as<bool>(rFE["gMOneToOne"]);
542
-  Rcpp::IntegerVector drv = rFE["drv"];
543
-  bool fdb = as<bool>(rFE["frequencyDependentBirth"]);
544
-  bool fdd = as<bool>(rFE["frequencyDependentDeath"]);
545
-  std::string fType = as<std::string>(rFE["frequencyType"]);
546
-  Rcpp::List flg = rFE["fitnessLandscape_gene_id"];
547
-  // clang does not like this: Rcpp::DataFrame fl_df = rFE["fitnessLandscape_df"];
548
-  Rcpp::List fl_df = rFE["fitnessLandscape_df"];
549
-
550
-  // In the future, if we want noInt and fitnessLandscape, all
551
-  // we need is use the fitness landscape with an index smaller than those
552
-  // of noInt. So we can use noInt with shift being those in fitnessLandscape.
553
-  // BEWARE: will need to modify also createNewGenotype.
554
-  //<std::vector<std::string> > fvariables = as<std::vector<std::string> > (fvars);
555
-  // if(fl_df.nrows()) {
556
-  if(fl_df.size()) {
557
-    
558
-    Rcpp::List full_FDF_spec;
559
-    if (fdb || fdd) {
560
-      // Should not be used in convertFitnessLandscape if not fdb and not fdd
561
-      full_FDF_spec = rFE["full_FDF_spec"];
562
-    }
563
-
564
-    fe.fitnessLandscape = convertFitnessLandscape(flg, fl_df, full_FDF_spec, fdb, fdd); 
565
-    if (fdb) {
566
-      fe.fVarsb = as<std::vector<std::string> > (full_FDF_spec["Genotype_as_fvarsb"]);
567
-      fe.frequencyType = fType;
568
-    }
569
-
570
-    if (fdd) {
571
-      fe.fVarsd = as<std::vector<std::string> > (full_FDF_spec["Genotype_as_fvarsd"]);
572
-      fe.frequencyType = fType;
573
-    }
574
-  }
575
-
576
-  if(rrt.size()) {
577
-    fe.Poset = rTable_to_Poset(rrt);
578
-  }
579
-  if(re.size()) {
580
-    fe.Epistasis = convertEpiOrderEff(re);
581
-  }
582
-  if(ro.size()) {
583
-    fe.orderE = convertEpiOrderEff(ro);
584
-  }
585
-  if(rgi.size()) {
586
-    fe.genesNoInt = convertNoInts(rgi);
587
-  } else {
588
-    fe.genesNoInt.shift = -9L;
589
-  }
590
-  // If this is null, use the nullFitnessEffects function; never
591
-  // end up here.
592
-  if( (rrt.size() + re.size() + ro.size() + rgi.size() + fl_df.size()) == 0) {
593
-      throw std::logic_error("\n Nothing inside this fitnessEffects; why are you here?"
594
-			     "  Bug in R code.");
595
-  }
596
-
597
-  // At least for now, if we use fitness landscape nothing else allowed
598
-  if(fl_df.size() && ((rrt.size() + re.size() + ro.size() + rgi.size()) > 0)) {
599
-    throw std::logic_error("\n Fitness landscape specification."
600
-			   " There should be no other terms. "
601
-			   " Bug in R code");
602
-  }
603
-
604
-  fe.Gene_Module_tabl = R_GeneModuleToGeneModule(rgm);
605
-  fe.allOrderG = sortedAllOrder(fe.orderE);
606
-  fe.allPosetG = sortedAllPoset(fe.Poset);
607
-  fe.gMOneToOne = rone;
608
-  fe.allGenes = allGenesinFitness(fe);
609
-  fe.genomeSize =  fe.Gene_Module_tabl.size() - 1 + fe.genesNoInt.s.size() +
610
-    fe.fitnessLandscape.NumID.size();
611
-  fe.drv = as<std::vector<int> > (drv);
612
-  sort(fe.drv.begin(), fe.drv.end()); //should not be needed, but just in case
613
-  // cannot trust R gives it sorted
614
-
615
-  fe.frequencyDependentBirth = fdb; //new line to insert frequencyDependentBirth
616
-  fe.frequencyDependentDeath = fdd; //new line to insert frequencyDependentDeath
617
-
618
-  if(fe.genomeSize != static_cast<int>(fe.allGenes.size())) {
619
-    throw std::logic_error("\n genomeSize != allGenes.size(). Bug in R code.");
620
-  }
621
-  // At least for now
622
-  if(fe.fitnessLandscape.NumID.size() > 0) {
623
-    if(fe.genomeSize != static_cast<int>(fe.fitnessLandscape.NumID.size())) {
624
-      throw std::logic_error("\n genomeSize != genes in fitness landscape."
625
-			     "Bug in R code.");
626
-    }
627
-  }
628
-  return fe;
629
-}
630
-
631
-// Before making allGenesinGenotype return a unique vector: we do a
632
-// set_difference below. If we look at the help
633
-// (http://en.cppreference.com/w/cpp/algorithm/set_difference) if we had
634
-// more repetitions of an element in allGenes than in sortedparent we
635
-// could have a problem. But if you look at function "allgenesinFitness",
636
-// which is the one used to give the allgenes vector, you will see that
637
-// that one returns only one entry per gene, as it parses the geneModule
638
-// structure. So even if allGenesinGenotype returns multiple entries
639
-// (which they don't), there will be no bugs as the maximum number of
640
-// entries in the output of setdiff will be 0 or 1 as m is 1. But
641
-// allGenesinGenotype cannot return more than one element as can be seen
642
-// in createNewGenotype: an element only ends in the order component if it
643
-// is not in the epistasis component.  So there are no repeated elements
644
-// in allGenes or in sortedparent below. Also, beware that does not break
645
-// correct fitness evaluation of fitnessEffects where the same gene is in
646
-// epistasis and order, as can be seen in the tests and because of how we
647
-// evaluate fitness, where genes in a genotype in the orderEffects bucket
648
-// are placed also in the epist for fitness eval. See evalGenotypeFitness
649
-// and createNewGenotype.
650
-void obtainMutations(const Genotype& parent,
651
-		     const fitnessEffectsAll& fe,
652
-		     int& numMutablePosParent,
653
-		     std::vector<int>& newMutations,
654
-		     //randutils::mt19937_rng& ran_gen
655
-		     std::mt19937& ran_gen,
656
-		     std::vector<double> mu) {
657
-  //Ugly: we return the mutations AND the numMutablePosParent This is
658
-  // almost ready to accept multiple mutations. And it returns a vector,
659
-  // newMutations.
660
-  std::vector<int> sortedparent = allGenesinGenotype(parent);
661
-  std::vector<int> nonmutated;
662
-  set_difference(fe.allGenes.begin(), fe.allGenes.end(),
663
-		 sortedparent.begin(), sortedparent.end(),
664
-		 back_inserter(nonmutated));
665
-  // numMutablePos is used not only for mutation but also to decide about
666
-  // the dummy or null mutation case.
667
-  numMutablePosParent = nonmutated.size();
668
-  if(nonmutated.size() < 1)
669
-    throw std::out_of_range("Trying to obtain a mutation when nonmutated.size is 0."
670
-			    " Bug in R code; let us know.");
671
-  if(mu.size() == 1) { // common mutation rate
672
-    //chromothripsis would not use this, or this is the limit case with a
673
-    // single mutant
674
-    std::uniform_int_distribution<int> rpos(0, nonmutated.size() - 1);
675
-    newMutations.push_back(nonmutated[rpos(ran_gen)]);
676
-  } else { // per-gene mutation rate.
677
-    // Remember that mutations always indexed from 1, not from 0.
678
-    // We take an element from a discrete distribution, with probabilities
679
-    // proportional to the rates.
680
-    // FIXMEmaybe:varmutrate give a warning if the only mu is for mu = 0?
681
-    std::vector<double> mu_nm;
682
-    for(auto const &nm : nonmutated) mu_nm.push_back(mu[nm - 1]);
683
-    std::discrete_distribution<int> rpos(mu_nm.begin(), mu_nm.end());
684
-    newMutations.push_back(nonmutated[rpos(ran_gen)]);
685
-  }
686
-  // randutils
687
-  // // Yes, the next will work, but pick is simpler!
688
-  // // size_t rpos = ran_gen.uniform(static_cast<size_t>(0), nonmutated.size() - 1);
689
-  // //  newMutations.push_back(nonmutated[rpos]);
690
-  // int posmutated = ran_gen.pick(nonmutated);
691
-  // newMutations.push_back(posmutated);
692
-}
693
-
694
-
695
-// std::vector<int> genesInOrderModules(const fitnessEffectsAll& fe) {
696
-//   vector<int> genes;
697
-//   if(fe.gMOneToOne)
698
-//     genes = fe.alOrderG;
699
-//   else {
700
-//     for(auto const &m : fe.allOrderG) {
701
-//       genes.push_back()
702
-//     }
703
-
704
-//   }
705
-//   return genes;
706
-// }
707
-
708
-
709
-fitness_as_genes fitnessAsGenes(const fitnessEffectsAll& fe) {
710
-  // Give the fitnessEffects in terms of genes, not modules.
711
-
712
-  // Extract the noInt. Then those in order effects by creating a multimap
713
-  // to go from map to genes. Then all remaining genes are those only in
714
-  // poset. By set_difference.
715
-  fitness_as_genes fg = zero_fitness_as_genes();
716
-
717
-  // fitness_as_genes fg;
718
-  fg.flGenes = fe.fitnessLandscape.NumID;
719
-  if(fg.flGenes.size()) {
720
-    return fg;
721
-  }
722
-
723
-  fg.noInt = fe.genesNoInt.NumID;
724
-  std::multimap<int, int> MG;
725
-  for( auto const &mt : fe.Gene_Module_tabl) {
726
-    MG.insert({mt.ModuleNumID, mt.GeneNumID});
727
-  }
728
-  for (auto const &o : fe.allOrderG) {
729
-    for(auto pos = MG.lower_bound(o); pos != MG.upper_bound(o); ++pos)
730
-      fg.orderG.push_back(pos->second);
731
-  }
732
-  sort(fg.orderG.begin(), fg.orderG.end());
733
-
734
-  std::vector<int> tmpv = fg.orderG;
735
-  tmpv.insert(tmpv.end(),fg.noInt.begin(), fg.noInt.end());
736
-  sort(tmpv.begin(), tmpv.end()); // should not be needed
737
-
738
-  set_difference(fe.allGenes.begin(), fe.allGenes.end(),
739
-		 tmpv.begin(), tmpv.end(),
740
-		 back_inserter(fg.posetEpistG));
741
-  return fg;
742
-}
743
-
744
-
745
-std::map<int, std::string> mapGenesIntToNames(const fitnessEffectsAll& fe) {
746
-  // This is a convenience, used in the creation of output.
747
-  // Sure, we could do this when reading the data in.
748
-  // The noInt in convertNoInts.
749
-  std::map<int, std::string> gg;
750
-
751
-  for(auto const &mt : fe.Gene_Module_tabl) {
752
-    gg.insert({mt.GeneNumID, mt.GeneName});
753
-  }
754
-  // this is pedantic, as what is the size_type of NumID and of names?
755
-  // for(decltype(fe.genesNoInt.s.size()) i = 0;
756
-  //     i != fe.genesNoInt.s.size(); ++i)
757
-
758
-  for(size_t i = 0;
759
-      i != fe.genesNoInt.NumID.size(); ++i){
760
-    gg.insert({fe.genesNoInt.NumID[i], fe.genesNoInt.names[i]});
761
-  }
762
-
763
-  for(size_t i = 0;
764
-      i != fe.fitnessLandscape.NumID.size(); ++i){
765
-    gg.insert({fe.fitnessLandscape.NumID[i], fe.fitnessLandscape.names[i]});
766
-  }
767
-
768
-
769
-  return gg;
770
-}
771
-
772
-// It is simple to write specialized functions for when
773
-// there are no restrictions or no order effects , etc.
774
-Genotype createNewGenotype(const Genotype& parent,
775
-			   const std::vector<int>& mutations,
776
-			   const fitnessEffectsAll& fe,
777
-			   std::mt19937& ran_gen,
778
-			   //randutils::mt19937_rng& ran_gen
779
-			   bool random = true) {
780
-  // random: if multiple mutations, randomly shuffle the ordered ones?
781
-  // This is the way to go if chromothripsis, but not if we give an
782
-  // initial mutant
783
-
784
-  Genotype newGenot = parent;
785
-  std::vector<int> tempOrder; // holder for multiple muts if order.
786
-  bool sort_rest = false;
787
-  bool sort_epist = false;
788
-  bool sort_flgenes = false;
789
-
790
-  // Order of ifs: I suspect order effects rare. No idea about
791
-  // non-interaction genes, but if common the action is simple.
792
-
793
-  // A gene that is involved both in order effects and epistasis, only
794
-  // ends up in the orderEff container, for concision (even if a gene can
795
-  // be involved in both orderEff and epistasis and rT). But in the
796
-  // genotype evaluation, in evalGenotypeFitness, notice that we create
797
-  // the vector of genes to be checked against epistais and order effects
798
-  // using also those from orderEff:
799
-  //   std::vector<int> mutG (ge.epistRtEff);
800
-  //   mutG.insert( mutG.end(), ge.orderEff.begin(), ge.orderEff.end());
801
-  for(auto const &g : mutations) {
802
-    // If we are dealing with a fitness landscape, that is as far as we go here
803
-    // at least for now. No other genes affect fitness.
804
-    // But this can be easily fixed in the future; like this?
805
-    // if(g <= (fe.fitnessLandscape.NumID.size() + 1)) {
806
-    // and restructure the else logic for the noInt
807
-    if(fe.fitnessLandscape.NumID.size()) {
808
-      newGenot.flGenes.push_back(g);
809
-      sort_flgenes = true;
810
-    } else {
811
-      if( (fe.genesNoInt.shift < 0) || (g < fe.genesNoInt.shift) ) { // Gene with int
812
-	// We can be dealing with modules
813
-	int m;
814
-	if(fe.gMOneToOne) {
815
-	  m = g;
816
-	} else {
817
-	  m = fe.Gene_Module_tabl[g].ModuleNumID;
818
-	}
819
-	if( !binary_search(fe.allOrderG.begin(), fe.allOrderG.end(), m) ) {
820
-	  newGenot.epistRtEff.push_back(g);
821
-	  sort_epist = true;
822
-	} else {
823
-	  tempOrder.push_back(g);
824
-	}
825
-      } else {
826
-	// No interaction genes so no module stuff
827
-	newGenot.rest.push_back(g);
828
-	sort_rest = true;
829
-      }
830
-    }
831
-  }
832
-
833
-  // If there is order but multiple simultaneous mutations
834
-  // (chromothripsis), we randomly insert them
835
-
836
-  // initMutant cannot use this: we give the order.
837
-  // That is why the call from initMutant uses random = false
838
-  if( (tempOrder.size() > 1) && random)
839
-    shuffle(tempOrder.begin(), tempOrder.end(), ran_gen);
840
-  // The new randutils engine:
841
-  // if(tempOrder.size() > 1)
842
-  //   ran_gen.shuffle(tempOrder.begin(), tempOrder.end());
843
-
844
-
845
-  for(auto const &g : tempOrder)
846
-    newGenot.orderEff.push_back(g);
847
-
848
-  // Sorting done at end, in case multiple mutations
849
-  if(sort_rest)
850
-    sort(newGenot.rest.begin(), newGenot.rest.end());
851
-  if(sort_epist)
852
-    sort(newGenot.epistRtEff.begin(), newGenot.epistRtEff.end());
853
-  if(sort_flgenes)
854
-    sort(newGenot.flGenes.begin(), newGenot.flGenes.end());
855
-  return newGenot;
856
-
857
-  // Prepare specialized functions:??
858
-  // Specialized functions:
859
-  // Never interactions: push into rest and sort. Identify by shift == 1.
860
-  // Never no interactions: remove the if. shift == -9.
861
-}
862
-
863
-
864
-
865
-// A paranoid check in case R code has bugs.
866
-void breakingGeneDiff(const vector<int>& genotype,
867
-		      const vector<int>& fitness) {
868
-  std::vector<int> diffg;
869
-
870
-  set_difference(genotype.begin(), genotype.end(),
871
-		 fitness.begin(), fitness.end(),
872
-		 back_inserter(diffg));
873
-  if(diffg.size()) {
874
-    Rcpp::Rcout << "Offending genes :";
875
-    for(auto const &gx : diffg) {
876
-      Rcpp::Rcout << " " << gx;
877
-    }
878
-    Rcpp::Rcout << "\t Genotype: ";
879
-    for(auto const &g1 : genotype) Rcpp::Rcout << " " << g1;
880
-    Rcpp::Rcout << "\t Fitness: ";
881
-    for(auto const &g1 : fitness) Rcpp::Rcout << " " << g1;
882
-
883
-    Rcpp::Rcout << "\n ";
884
-    throw std::logic_error("\n At least one gene in the genotype not in fitness effects."
885
-			   " Bug in R code.");
886
-
887
-  }
888
-}
889
-
890
-void checkNoNegZeroGene(const vector<int>& ge) {
891
-  if( ge[0] == 0 )
892
-    throw std::logic_error("\n Genotype cannot contain 0. Bug in R code.");
893
-  else if(ge[0] < 0)
894
-    throw std::logic_error("\n Genotype cannot contain negative values. Bug in R code.");
895