Browse code

v.2.5.12

- Several improvements to rfitness.
- simOGraph using transitive reduction properly.
- Miscell documentation improvements.
- Updated citation to Bioinformatics paper.



git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@126818 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 18/02/2017 20:20:42
Showing12 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.5.9
5
-Date: 2017-01-09
4
+Version: 2.5.12
5
+Date: 2017-02-08
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -29,7 +29,7 @@ License: GPL (>= 3)
29 29
 URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/
30 30
 BugReports: https://github.com/rdiaz02/OncoSimul/issues
31 31
 Depends: R (>= 3.3.0)
32
-Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel
32
+Imports: Rcpp (>= 0.12.4), parallel, data.table, graph, Rgraphviz, gtools, igraph, methods, RColorBrewer, grDevices, car, dplyr, smatr, ggplot2, ggrepel, nem
33 33
 Suggests: BiocStyle, knitr, Oncotree, testthat (>= 1.0.0), rmarkdown, bookdown, pander
34 34
 LinkingTo: Rcpp
35 35
 VignetteBuilder: knitr
... ...
@@ -63,6 +63,7 @@ importFrom("dplyr", "full_join", "left_join", "right_join", "%>%", "mutate",
63 63
            "filter")
64 64
 importFrom("smatr", "ma") ## for major axis regression in some tests
65 65
 importFrom("car", "linearHypothesis")
66
+importFrom("nem", "transitive.reduction")
66 67
 ## importFrom("slam", "simple_triplet_zero_matrix", ## "colapply_simple_triplet_matrix",
67 68
 ##            "col_sums")
68 69
 
... ...
@@ -1,4 +1,4 @@
1
-## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
1
+## Copyright 2013, 2014, 2015, 2016, 2017 Ramon Diaz-Uriarte
2 2
 
3 3
 ## This program is free software: you can redistribute it and/or modify
4 4
 ## it under the terms of the GNU General Public License as published by
... ...
@@ -271,7 +271,7 @@ allGenotypes_to_matrix <- function(x) {
271 271
     m <- rbind(c(rep(0, length(all_genes)), fwt),
272 272
                m)
273 273
     ## Ensure sorted
274
-    m <- data.frame(m)
274
+    ## m <- data.frame(m)
275 275
     rs <- rowSums(m[, -ncol(m), drop = FALSE])
276 276
     m <- m[order(rs), , drop = FALSE]
277 277
     ## m <- m[do.call(order, as.list(cbind(rs, m[, -ncol(m)]))), ]
... ...
@@ -81,8 +81,13 @@ simOGraph <- function(n, h = ifelse(n >= 4, 4, n),
81 81
    
82 82
 
83 83
     ## Prune to remove indirect connections
84
-    if(multilevelParent & removeDirectIndirect)
85
-        adjMat <- removeIndirectConnections(adjMat)
84
+    if(multilevelParent & removeDirectIndirect) {
85
+        ## adjMat <- transitiveReduction(adjMat)
86
+        trm <- nem::transitive.reduction(nem::transitive.closure(adjMat))
87
+        stopifnot(all(trm %in% c(0L, 1L) ))
88
+        storage.mode(trm) <- "integer"
89
+        adjMat <- trm
90
+    }
86 91
     if(out == "adjmat")
87 92
         return(adjMat)
88 93
     else {
... ...
@@ -153,36 +158,65 @@ connectIndiv <- function(parents, nparents, n) {
153 158
 ##     return(setdiff(allP, parents))
154 159
 ## }
155 160
 
156
-findAllParents <- function(x, adjMat) {
157
-    if(x == 0)
158
-        return(NULL)
159
-    else{
160
-        p <- which(adjMat[, x + 1] == 1) - 1
161
-        p1 <- unlist(lapply(p, function(x) findAllParents(x, adjMat)))
162
-        return(c(p, p1))
163
-    }
164
-}
161
+## findAllParents <- function(x, adjMat) {
162
+##     if(x == 0)
163
+##         return(NULL)
164
+##     else{
165
+##         p <- which(adjMat[, x + 1] == 1) - 1
166
+##         p1 <- unlist(lapply(p, function(x) findAllParents(x, adjMat)))
167
+##         return(c(p, p1))
168
+##     }
169
+## }
165 170
 
166
-repeatedParents <- function(x, adjMat) {
167
-    ap <- findAllParents(x, adjMat)
168
-    dups <- duplicated(ap)
169
-    dupP <- setdiff(ap[dups], 0)
170
-    dupP
171
-}
171
+## repeatedParents <- function(x, adjMat) {
172
+##     ap <- findAllParents(x, adjMat)
173
+##     dups <- duplicated(ap)
174
+##     dupP <- setdiff(ap[dups], 0)
175
+##     dupP
176
+## }
177
+
178
+
179
+## ## ## But this only works if a special order in the rows and columns
180
+## ## ## and will not work with the root row.
181
+## ## m1 <- matrix(0, ncol = 5, nrow = 5); colnames(m1) <- rownames(m1) <- LETTERS[1:5]
182
+## ## for(i in 1:4) m1[i, i+1] <- 1
183
+## ## library(ggm)
184
+## ## m1tc <- ggm::transClos(m1)
185
+## ## transitiveReduction(m1tc)
186
+
187
+
188
+## ## Other R and BioC packages that will do transitive reduction:
189
+## ## nem (BioC): works with adjacency matrices directly
190
+## ## rBiopaxParser (BioC): a wrapper to nem
191
+## ## rPref
192
+## ## relations
193
+## ## hasseDiagram
194
+
195
+## transitiveReduction <- function(adjMat) {
196
+##     ## Return the transitive reduction
197
+
198
+## But note my bug report to BioC,
199
+
200
+## https://support.bioconductor.org/p/91695/
201
+
202
+## See discussion and comments on
203
+## http://stackoverflow.com/a/6702198 
204
+## and comments on http://stackoverflow.com/a/2372202
205
+## So one need to do the transitive closure first.
206
+    
207
+##     ## We remove the direct connections. How? We search, for each node,
208
+##     ## for the set of all parents/grandparents/grandgrandparents/etc. If
209
+##     ## any of those ancestors is repeated, it means you go from that
210
+##     ## ancestor to the node in question through at least two different
211
+##     ## routes. Thus, ensure the direct is 0 (it might already be, no
212
+##     ## problem). Once you do that, you know there are not both indirect
213
+##     ## AND direct connections and thus you have the transitive reduction.
214
+##     for(i in ncol(adjMat):2) {
215
+##         dp <- repeatedParents( i - 1, adjMat)
216
+##         if(length(dp))
217
+##             adjMat[cbind(dp + 1, i)] <- 0L
218
+##     }
219
+##     return(adjMat)
220
+## }
172 221
 
173 222
 
174
-removeIndirectConnections <- function(adjMat) {
175
-    ## This is a bad name: we remove the direct connections. How? We
176
-    ## search, for each node, for the set of all
177
-    ## parents/grandparents/grandgranparents. If any of those ancestors is
178
-    ## repeated, it means you go from that ancestor to the node in
179
-    ## question through at least two different routes. Thus, ensure the
180
-    ## direct is 0 (it might already be, no problem). Once you do that,
181
-    ## you know there are not both indirect AND direct connections.
182
-    for(i in ncol(adjMat):2) {
183
-        dp <- repeatedParents( i - 1, adjMat)
184
-        if(length(dp))
185
-            adjMat[cbind(dp + 1, i)] <- 0L
186
-    }
187
-    return(adjMat)
188
-}
... ...
@@ -1,26 +1,50 @@
1
+## Copyright 2013, 2014, 2015, 2016, 2017 Ramon Diaz-Uriarte
2
+
3
+## This program is free software: you can redistribute it and/or modify
4
+## it under the terms of the GNU General Public License as published by
5
+## the Free Software Foundation, either version 3 of the License, or
6
+## (at your option) any later version.
7
+
8
+## This program is distributed in the hope that it will be useful,
9
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
+## GNU General Public License for more details.
12
+
13
+## You should have received a copy of the GNU General Public License
14
+## along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
+
16
+
17
+
1 18
 rfitness <- function(g, c= 0.5,
2 19
                      sd = 1,
20
+                     mu = 1,
3 21
                      reference = "random", ## "random", "max", or the vector,
4 22
                                      ## e.g., rep(1, g). If random, a
5 23
                                      ## random genotype is chosen as
6 24
                                      ## reference. If "max" this is rep(1, g)
7 25
                      scale = NULL, ## a two-element vector: min and max
8
-                     wt_is_1 = TRUE, ## wt has fitness 1
26
+                     wt_is_1 = c("subtract", "divide", "force", "no"),
27
+                     ## wt_is_1 = TRUE, ## wt has fitness 1
9 28
                      log = FALSE, ## log only makes sense if all values >
10 29
                                  ## 0. scale with min > 0, and/or set
11
-                                 ## wt_is_1 = TRUE
12
-                     min_accessible_genotypes = 0,
13
-                     accessible_th = 0) {
30
+                                 ## wt_is_1 = divide
31
+                     min_accessible_genotypes = NULL,
32
+                     accessible_th = 0,
33
+                     truncate_at_0 = TRUE) {
14 34
     ## Like Franke et al., 2011 and others of Krug. Very similar to Greene
15 35
     ## and Crona, 2014. And this allows moving from HoC to purely additive
16 36
     ## changing c and sd.
17 37
 
18 38
     ## FIXME future: do this with order too?
19 39
     ##    - do not generate a matrix of genotypes but a matrix of ordered genot.
40
+    wt_is_1 = match.arg(wt_is_1)
41
+    
20 42
     m <- generate_matrix_genotypes(g)
21 43
     done <- FALSE
44
+    ## attempts <- 0 ## for debugging/tracking purposes
22 45
     while(!done) {
23
-        f_r <- rnorm(nrow(m), mean = 0, sd = sd)
46
+        ## attempts <- attempts + 1
47
+        f_r <- rnorm(nrow(m), mean = mu, sd = sd)
24 48
         if(inherits(reference, "character") && length(reference) == 1) {
25 49
             if(reference == "random") {
26 50
                 referenceI <- m[sample(nrow(m), 1), ]
... ...
@@ -41,7 +65,7 @@ rfitness <- function(g, c= 0.5,
41 65
             fi <- (fi - min(fi))/(max(fi) - min(fi))
42 66
             fi <- scale[1] + fi * (scale[2] - scale[1])
43 67
         }
44
-        if(wt_is_1) {
68
+        if(wt_is_1 == "divide") {
45 69
             ## We need to shift to avoid ratios involving negative numbers and
46 70
             ## we need to avoid having any fitness at 0, specially the wt.  If
47 71
             ## any negative value, add the min, and shift by the magnitude of
... ...
@@ -54,16 +78,28 @@ rfitness <- function(g, c= 0.5,
54 78
             if(min_fi < 0)
55 79
                 fi <- fi + 2 * abs(min(fi))
56 80
             fi <- fi/fi[1]
81
+        } else if (wt_is_1 == "subtract") {
82
+            fi <- fi - fi[1] + 1.0
83
+        } else if(wt_is_1 == "force") {
84
+            fi[1] <- 1.0
85
+            if(!is.null(scale)) {
86
+                if( (1 > scale[2]) || (1 < scale[1]))
87
+                    warning("Using wt_is_1 = force and scale, but scale does ",
88
+                            "not include 1")
89
+            }
90
+        }
91
+        if(truncate_at_0) {
92
+            fi[fi <= 0] <- 1e-9
57 93
         }
58
-        
59 94
         if(log) {
60 95
             fi <- log(fi/fi[1]) + 1
61 96
         }
62 97
         m <- cbind(m, Fitness = fi)
63
-        if(min_accessible_genotypes > 0) {
98
+        if(!is_null_na(min_accessible_genotypes)) {
64 99
             ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
65 100
             ## Recall accessibleGenotypes includes the wt, if accessible.
66 101
             num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
102
+            ## message("\n     num_accessible_genotypes = ", num_accessible_genotypes, "\n")
67 103
             if(num_accessible_genotypes >= min_accessible_genotypes) {
68 104
                 done <- TRUE
69 105
                 attributes(m) <- c(attributes(m),
... ...
@@ -77,6 +113,7 @@ rfitness <- function(g, c= 0.5,
77 113
             done <- TRUE
78 114
         }
79 115
     }
116
+    ## message("\n number of attempts = ", attempts, "\n")
80 117
     class(m) <- c(class(m), "genotype_fitness_matrix")
81 118
     return(m)
82 119
 }
... ...
@@ -1,22 +1,22 @@
1
-citHeader("If you use OncoSimulR, please cite the OncoSimulR bioRxiv paper.",
1
+citHeader("If you use OncoSimulR, please cite the OncoSimulR Bioinformatics paper.",
2 2
           " A former version of OncoSimulR has been used in a large",
3 3
           " comparative study of methods to infer restrictions,",
4 4
           " published in BMC Bioinformatics; you might want to cite that too,",
5 5
           " if appropriate, such as when referring to using evolutionary simulations",
6
-          " to assess oncogenetic tree methods performance.")
6
+          " to assess oncogenetic tree/cancer progression methods performance.")
7 7
 
8 8
 
9 9
 citEntry(entry="Article",
10 10
          author = "R Diaz-Uriarte",
11
-         title = "OncoSimulR: genetic simulation of cancer progression with arbitrary epistasis and mutator genes.",
12
-         journal = "bioRxiv",
13
-         year = "2016",
14
-         doi = "10.1101/069500",
15
-         publisher = "Cold Spring Harbor Labs Journals",
16
-         url = "http://biorxiv.org/content/early/2016/08/14/069500",
11
+         title = "OncoSimulR: genetic simulation with arbitrary epistasis and mutator genes in asexual populations.",
12
+         journal = "Bioinformatics",
13
+         year = "2017",
14
+         doi = "10.1093/bioinformatics/btx077",
15
+         publisher = "Oxford University Press",
16
+         url = " https://doi.org/10.1093/bioinformatics/btx077",
17 17
          textVersion = paste("R Diaz-Uriarte.",
18
-                             "OncoSimulR: genetic simulation of cancer progression with arbitrary epistasis and mutator genes.",
19
-                             " 2016. bioRxiv, http://dx.doi.org/10.1101/069500.")
18
+                             "OncoSimulR: genetic simulation with arbitrary epistasis and mutator genes in asexual populations.",
19
+                             " 2017. Bioinformatics, https://doi.org/10.1093/bioinformatics/btx077.")
20 20
          )
21 21
 
22 22
 ## citEntry(entry="Manual",
... ...
@@ -1,3 +1,15 @@
1
+Changes in version 2.5.12 (2017-02-18):
2
+	- rfitness: allow simple forcing of wt to 1, shifting by
3
+	  subtraction, and specifying mu of normal distribution.
4
+	- simOGraph: proper trm comparison.
5
+	- Citation now shows Bioinformatics reference.
6
+
7
+Changes in version 2.5.11 (2017-01-27):
8
+	- Transitive reduction: must call transitive.closure first.
9
+
10
+Changes in version 2.5.10 (2017-01-27):
11
+	- Transitive reduction: calling nem in simOGraph
12
+	
1 13
 Changes in version 2.5.9 (2017-01-09):
2 14
 	- Added code coverage comments to vignette.
3 15
 
... ...
@@ -10,9 +10,10 @@
10 10
 
11 11
 \usage{
12 12
 
13
-rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
14
-         wt_is_1 = TRUE, log = FALSE, min_accessible_genotypes = 0,
15
-         accessible_th = 0)
13
+rfitness(g, c = 0.5, sd = 1, mu = 1, reference = "random", scale = NULL,
14
+         wt_is_1 = c("subtract", "divide", "force", "no"),
15
+         log = FALSE, min_accessible_genotypes = NULL,
16
+         accessible_th = 0, truncate_at_0 = TRUE)
16 17
 }
17 18
 
18 19
 
... ...
@@ -26,7 +27,11 @@ rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
26 27
     in Hamming distance from the reference genotype (see \code{reference}).}
27 28
 
28 29
   \item{sd}{The standard deviation of the random component (a normal
29
-  distribution of mean 0 and standard deviation \code{sd}).}
30
+  distribution of mean \code{mu} and standard deviation \code{sd}).}
31
+
32
+\item{mu}{The mean of the random component (a normal distribution of
33
+mean \code{mu} and standard deviation \code{sd}).}
34
+
30 35
 
31 36
 \item{reference}{The reference genotype: for the deterministic, additive
32 37
   part, this is the genotype with maximal fitness, and all other
... ...
@@ -46,15 +51,36 @@ rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
46 51
   two-element vector, fitness is re-scaled between \code{scale[1]} (the
47 52
   minimum) and \code{scale[2]} (the maximum).}
48 53
 
49
-\item{wt_is_1}{If TRUE, fitness will be scaled so that the wildtype (the
50
-  genotype with no mutations) has fitness of 1. This is applied after
51
-  \code{scale}, so if you specify both it is most likely that the final
52
-  fitness will not respect the limits in \code{scale}.}
54
+\item{wt_is_1}{If "divide" (the default) the fitness of all genotypes is
55
+  divided by the fitness of the wildtype (after possibly adding a value
56
+  to ensure no negative fitness) so that the wildtype (the genotype with
57
+  no mutations) has fitness 1. This is a case of scaling, and it is
58
+  applied after \code{scale}, so if you specify both
59
+  "wt_is_1 = 'divide'" and use an argument for \code{scale} it is most
60
+  likely that the final fitness will not respect the limits in
61
+  \code{scale}.
62
+
63
+  If "subtract" we shift all the fitness values (subtracting fitness of
64
+  the wildtype and adding 1) so that the wildtype ends up with a fitness
65
+  of 1. This is also applied after \code{scale}, so if you specify both
66
+  "wt_is_1 = 'subtract'" and use an argument for \code{scale} it is most
67
+  likely that the final fitness will not respect the limits in
68
+  \code{scale} (though the distorsion might be simpler to see as just a
69
+  shift up or down).
70
+  
71
+  If "force" we simply set the fitness of the wildtype to 1, without any
72
+  divisions. This means that the \code{scale} argument would work (but
73
+  it is up to you to make sure that the range of the scale argument
74
+  includes 1 or you might not get what you want). Note that using this
75
+  option can easily lead to landscapes with no accessible genotypes
76
+  (unless you also use \code{scale}).
77
+
78
+  If "none", the fitness of the wildtype is not touched.  }
53 79
 
54 80
 
55 81
 \item{log}{If TRUE, log-transform fitness.}
56 82
 
57
-\item{min_accessible_genotypes}{If larger than 0, the minimum number of
83
+\item{min_accessible_genotypes}{If not NULL, the minimum number of
58 84
   accessible genotypes in the fitness landscape. A genotype is
59 85
   considered accessible if you can reach if from the wildtype by going
60 86
   through at least one path where all changes in fitness are larger or
... ...
@@ -69,6 +95,10 @@ rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
69 95
   If the condition is not satisfied, we continue generating random
70 96
   fitness landscapes with the specified parameters until the condition
71 97
   is satisfied.
98
+
99
+  (Why check against NULL and not against zero? Because this allows you
100
+  to count accessible genotypes even if you do not want to ensure a
101
+  minimum number of accessible genotypes.)
72 102
 }
73 103
 
74 104
 \item{accessible_th}{The threshold for the minimal change in fitness at
... ...
@@ -78,6 +108,12 @@ rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
78 108
   allow small decreases in fitness in successive steps, use a small
79 109
   negative value for \code{accessible_th}.  }
80 110
 
111
+\item{truncate_at_0}{If TRUE (the default) any fitness <= 0 is
112
+  substituted by a small positive constant (1e-9). Why? Because
113
+  MAGELLAN and some plotting routines can have trouble (specially if you
114
+  log) with values <=0. Or we might have trouble if we want to log the
115
+  fitness.}
116
+
81 117
 } 
82 118
 
83 119
 
... ...
@@ -90,14 +126,25 @@ rfitness(g, c = 0.5, sd = 1, reference = "random", scale = NULL,
90 126
 
91 127
   where \eqn{d(i, j)} is the Hamming distance between genotypes \eqn{i}
92 128
   and \eqn{j} (the number of positions that differ) and \eqn{x_i} is a
93
-  random variable (in this case, a normal deviate of mean 0 and standard
94
-  deviation \code{sd}).
129
+  random variable (in this case, a normal deviate of mean \code{mu}
130
+  and standard deviation \code{sd}).
95 131
 
96 132
   Setting \eqn{c = 0} we obtain a House of Cards model. Setting \eqn{sd
97 133
     = 0} fitness is given by the distance from the reference and if the
98 134
     reference is the genotype with all positions mutated, then we have a
99 135
     fully additive model (fitness increases linearly with the number of
100 136
     positions mutated).
137
+
138
+  For OncoSimulR, we often want the wildtype to have a mean of
139
+  1. Reasonable settings are \code{mu = 1} and \code{wt_is_1 =
140
+  'subtract'} so that we simulate from a distribution centered in 1, and
141
+  we make sure afterwards (via a simple shift) that the wildtype is
142
+  actuall 1. The \code{sd} controls the standard deviation, with the
143
+  usual working and meaning as in a normal distribution, unless \code{c}
144
+  is different from zero. In this case, with \code{c} large, the range
145
+  of the data can be large, specially if \code{g} (the number of genes)
146
+  is large.
147
+  
101 148
 } 
102 149
 
103 150
 \value{
... ...
@@ -6,7 +6,7 @@ test_that("Expect output", {
6 6
     expect_output(print(rfitness(3, reference = c(1, 0, 1))), "Fitness",
7 7
                   fixed = TRUE)
8 8
     expect_output(print(rfitness(5, scale = c(1, 7))), "Fitness", fixed = TRUE)
9
-    expect_output(print(rfitness(5, scale = c(1, 4), wt_is_1 = FALSE,
9
+    expect_output(print(rfitness(5, scale = c(1, 4), wt_is_1 = "no",
10 10
                            log = TRUE)), "Fitness", fixed = TRUE)
11 11
     expect_output(print(rfitness(4, reference = "random2")), "Fitness",
12 12
                   fixed = TRUE)
... ...
@@ -40,3 +40,8 @@ test_that("Minimal tests of generate_matrix_genotypes", {
40 40
         rm(lucstmp)
41 41
     }
42 42
 })
43
+
44
+test_that("Warnings if scale out of scale", {
45
+    expect_warning(rfitness(4, wt_is_1 = "force", scale = c(0, 0.5)),
46
+                   "Using wt_is_1 = force", fixed = TRUE)
47
+})
... ...
@@ -1269,12 +1269,14 @@ In R, you can do
1269 1269
 ```{r}
1270 1270
 citation("OncoSimulR")
1271 1271
 ``` 
1272
-which will tell you how to cite the package.
1272
+which will tell you how to cite the package. Please, do cite the
1273
+Bionformatics paper if you use the package in publications.
1273 1274
 
1274 1275
 
1275
-This is the URL for the bioRxiv
1276
-paper:
1277
-[http://biorxiv.org/content/early/2016/08/14/069500](http://biorxiv.org/content/early/2016/08/14/069500). 
1276
+This is the URL for the Bioinformatics paper: [https://doi.org/10.1093/bioinformatics/btx077](https://doi.org/10.1093/bioinformatics/btx077)
1277
+(there is also an early preprint
1278
+at [bioRxiv](http://biorxiv.org/content/early/2016/08/14/069500),
1279
+but it should now point to the Bioinformatics paper). 
1278 1280
 
1279 1281
 <!-- This no longer helps that much with the many changes -->
1280 1282
 <!-- You -->
... ...
@@ -1,15 +1,15 @@
1 1
 \usepackage[%
2
-		shash={37dc184},
3
-		lhash={37dc184ae1ccb70eaf93c8791c8f029a990d203a},
4
-		authname={ramon diaz-uriarte (at Phelsuma)},
2
+		shash={57619a7},
3
+		lhash={57619a790a2604690e0cd2e73fbcd67d1e4d9fbf},
4
+		authname={Ramon Diaz-Uriarte (at Lacerta)},
5 5
 		authemail={rdiaz02@gmail.com},
6
-		authsdate={2017-01-08},
7
-		authidate={2017-01-08 23:51:49 +0100},
8
-		authudate={1483915909},
9
-		commname={ramon diaz-uriarte (at Phelsuma)},
6
+		authsdate={2017-02-09},
7
+		authidate={2017-02-09 19:35:58 +0100},
8
+		authudate={1486665358},
9
+		commname={Ramon Diaz-Uriarte (at Lacerta)},
10 10
 		commemail={rdiaz02@gmail.com},
11
-		commsdate={2017-01-08},
12
-		commidate={2017-01-08 23:51:49 +0100},
13
-		commudate={1483915909},
14
-		refnames={ (HEAD -> master)}
11
+		commsdate={2017-02-09},
12
+		commidate={2017-02-09 19:35:58 +0100},
13
+		commudate={1486665358},
14
+		refnames={ (HEAD, origin/master, origin/HEAD)}
15 15
 	]{gitsetinfo}
16 16
\ No newline at end of file