Browse code

2.17.1: rfitness, clarified log and truncate, and Magellan_stats, return vector and do not use log by default

ramon diaz-uriarte (at Phelsuma) authored on 28/11/2019 19:58:05
Showing9 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.17.0
5
-Date: 2019-08-14
4
+Version: 2.17.1
5
+Date: 2019-11-24
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -25,6 +25,10 @@ to_Magellan <- function(x, file,
25 25
                         max_num_genotypes = 2000) {
26 26
     ## Go directly if you have, as entry, an object from
27 27
     ## rfitness!! to_Fitness_Matrix can be very slow.
28
+    ## But often, when we use allFitnessEffects, we
29
+    ## obtain the fitness landscape as genotype_fitness_matrix
30
+    ## FIXME: we could use that fact here.
31
+    ## or maybe in to_Fitness_Matrix
28 32
     if(is.null(file)) {
29 33
         file <- tempfile()
30 34
         cat("\n Using file ", file, "\n")
... ...
@@ -118,7 +122,8 @@ to_Fitness_Matrix <- function(x, max_num_genotypes) {
118 122
 
119 123
 ## Based on from_genotype_fitness
120 124
 ## but if we are passed a fitness landscapes as produced by
121
-## rfitness, do nothing
125
+## rfitness, do nothing. Well, it actually does something.
126
+
122 127
 
123 128
 to_genotFitness_std <- function(x, simplify = TRUE,
124 129
                                 min_filter_fitness = 1e-9,
... ...
@@ -247,11 +252,19 @@ to_genotFitness_std <- function(x, simplify = TRUE,
247 252
     }
248 253
     if(any(is.na(x)))
249 254
         stop("NAs in fitness matrix")
255
+    ## Make sure correct class
256
+    if(is.data.frame(x)) x <- as.matrix(x)
257
+    stopifnot(inherits(x, "matrix"))
258
+    ## if(simplify) {
259
+    ##     return(x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE])
260
+    ## } else {
261
+    ##     return(x)
262
+    ## }
250 263
     if(simplify) {
251
-        return(x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE])
252
-    } else {
253
-        return(x)
264
+        x <- x[x[, ncol(x)] > min_filter_fitness, , drop = FALSE]
254 265
     }
266
+    class(x) <- c("matrix", "genotype_fitness_matrix")
267
+    return(x)
255 268
 }
256 269
 
257 270
 ## Deprecated after flfast
... ...
@@ -455,7 +468,7 @@ allGenotypes_to_matrix <- function(x) {
455 468
 
456 469
 Magellan_stats <- function(x, max_num_genotypes = 2000,
457 470
                            verbose = FALSE,
458
-                           use_log = TRUE,
471
+                           use_log = FALSE,
459 472
                            short = TRUE,
460 473
                            replace_missing = FALSE) {
461 474
     ## I always use
... ...
@@ -492,7 +505,7 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
492 505
     if(short) {
493 506
         ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
494 507
 
495
-        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
508
+        tmp <- unlist(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
496 509
         ## ## Make names more explicit, but check we have what we think we have
497 510
         ## ## New versions of Magellan produce different output apparently of variable length
498 511
         ## stopifnot(length(tmp) >= 23) ## 23) ## variable length
... ...
@@ -181,11 +181,25 @@ rfitness <- function(g, c= 0.5,
181 181
                             "not include 1")
182 182
             }
183 183
         }
184
-        if(truncate_at_0) {
185
-            fi[fi <= 0] <- 1e-9
186
-        }
187 184
         if(log) {
188
-            fi <- log(fi/fi[1]) + 1
185
+            ## If you want logs, you certainly do not want
186
+            ## the log of a negative number
187
+            fi[fi < 0] <- 0
188
+            if(wt_is_1 == "no") {
189
+                fi <- log(fi)
190
+            } else {
191
+                ## by decree, fitness of wt is 1. So shift everything
192
+                fi <- log(fi) + 1
193
+            }
194
+            ## former expression, but it was more confusing
195
+            ## fi <- log(fi/fi[1]) + 1
196
+            
197
+        }
198
+        if(truncate_at_0) {
199
+            ## yes, truncate but add noise to prevent identical
200
+            fi[fi <= 0] <- runif(sum(fi <= 0),
201
+                                 min = 1e-10,
202
+                                 max = 1e-9)
189 203
         }
190 204
         m <- cbind(m, Fitness = fi)
191 205
         if(!is_null_na(min_accessible_genotypes)) {
... ...
@@ -1,3 +1,10 @@
1
+Changes in version 2.17.1 (2019-11-24):
2
+	- rfitness: clarified log=TRUE and truncate_at_0 after log.
3
+	- Magellan_stats: really return a vector.
4
+	
5
+Changes in version 2.17.0 (2019-10-25):
6
+	- Bumped version for BioC-3.11
7
+	
1 8
 Changes in version 2.15.2 (2019-08-14):
2 9
 	- Trying to prevent fscanf warning in FitnessLandscape/input.c
3 10
 
... ...
@@ -76,10 +76,14 @@ mean \code{mu} and standard deviation \code{sd}).}
76 76
   option can easily lead to landscapes with no accessible genotypes
77 77
   (even if you also use \code{scale}).
78 78
 
79
-  If "none", the fitness of the wildtype is not touched.  }
79
+  If "no", the fitness of the wildtype is not modified.  }
80 80
 
81 81
 
82
-\item{log}{If TRUE, log-transform fitness.}
82
+\item{log}{If TRUE, log-transform fitness. Actually, there are two
83
+  cases: if \code{wt_is_1 = "no"} we simply log the fitness values;
84
+  otherwise, we log the fitness values and add a 1, thus shifting all
85
+  fitness values, because by decree the fitness (birth rate) of the
86
+  wildtype must be 1.}
83 87
 
84 88
 \item{min_accessible_genotypes}{If not NULL, the minimum number of
85 89
   accessible genotypes in the fitness landscape. A genotype is
... ...
@@ -110,10 +114,12 @@ mean \code{mu} and standard deviation \code{sd}).}
110 114
   negative value for \code{accessible_th}.  }
111 115
 
112 116
 \item{truncate_at_0}{If TRUE (the default) any fitness <= 0 is
113
-  substituted by a small positive constant (1e-9). Why? Because
114
-  MAGELLAN and some plotting routines can have trouble (specially if you
115
-  log) with values <=0. Or we might have trouble if we want to log the
116
-  fitness.}
117
+  substituted by a small positive constant (a random uniform number
118
+  between 1e-10 and 1e-9). Why? Because MAGELLAN and some plotting
119
+  routines can have trouble (specially if you log) with values <=0. Or
120
+  we might have trouble if we want to log the fitness. This is done
121
+  after possibly taking logs. Noise is added to prevent creating several
122
+  identical minimal fitness values.}
117 123
 
118 124
 \item{K}{K for NK model; K is the number of loci with which each locus
119 125
   interacts, and the larger the K the larger the ruggedness of the
... ...
@@ -16,7 +16,7 @@ to_Magellan(x, file,
16 16
 
17 17
 Magellan_stats(x, max_num_genotypes = 2000,
18 18
                verbose = FALSE,
19
-               use_log = TRUE,
19
+               use_log = FALSE,
20 20
                short = TRUE,
21 21
                replace_missing = FALSE)
22 22
 }
... ...
@@ -62,7 +62,10 @@ Magellan_stats(x, max_num_genotypes = 2000,
62 62
   
63 63
   \item{verbose}{If TRUE provide additional information about names of
64 64
     intermediate files.}
65
-  \item{use_log}{Use log fitness when computing statistics.}
65
+  \item{use_log}{Use log fitness when computing statistics. Note that
66
+    the \code{\link{rfitness}} function outputs what should be
67
+    interpreted as log-fitness values, and thus we set this option by
68
+    default to \code{FALSE}.}
66 69
   \item{short}{Give short output when computing statistics.}
67 70
 
68 71
   \item{replace_missing}{From MAGELLAN's \code{fl_statistics}: replace
... ...
@@ -35,8 +35,9 @@ test_that("Equality of fitness including allFitnessEffects", {
35 35
     colnames(m3) <- c("A", "B", "C", "Fitness")
36 36
     m4 <- rbind(c(0, 0, 0, 1), c(1, 0, 0, 2.0), c(0, 1, 1, 0))
37 37
     colnames(m4) <- c("A", "B", "C", "Fitness")
38
-    expect_equal(OncoSimulR:::to_genotFitness_std(df3), m3)
39
-    expect_equal(OncoSimulR:::to_genotFitness_std(df3, simplify = FALSE), m4)
38
+    ## yes, m3 and m3 missing the attributes
39
+    expect_equivalent(OncoSimulR:::to_genotFitness_std(df3), m3)
40
+    expect_equivalent(OncoSimulR:::to_genotFitness_std(df3, simplify = FALSE), m4)
40 41
 
41 42
     ## now, scale by fitness of WT
42 43
     df3 <- data.frame(Genotype = c("WT", "A", "B, C"),
... ...
@@ -52,8 +53,8 @@ test_that("Equality of fitness including allFitnessEffects", {
52 53
     m4[, "Fitness"] <- m4[, "Fitness"]/1.3
53 54
 
54 55
     
55
-    expect_equal(OncoSimulR:::to_genotFitness_std(df3), m3)
56
-    expect_equal(OncoSimulR:::to_genotFitness_std(df3, simplify = FALSE), m4)
56
+    expect_equivalent(OncoSimulR:::to_genotFitness_std(df3), m3)
57
+    expect_equivalent(OncoSimulR:::to_genotFitness_std(df3, simplify = FALSE), m4)
57 58
 
58 59
 
59 60
     
... ...
@@ -6263,14 +6263,16 @@ We could alternatively have specified fitness either directly
6263 6263
 specifying the fitness of each genotype or specifying epistatic
6264 6264
 effects. Let us use the second approach:
6265 6265
 
6266
-```{r}
6266
+%% this was wrong
6267
+%% u <- 0.2; i <- -0.02; vi <- 0.6; ui <- uv <- -Inf
6267 6268
 
6268
-u <- 0.2; i <- -0.02; vi <- 0.6; ui <- uv <- -Inf
6269
+```{r}
6270
+u <- 0.1; i <- -0.05; vi <- (1.2/0.95) - 1; ui <- uv <- -Inf
6269 6271
 od2 <- allFitnessEffects(
6270 6272
     epistasis = c("u" = u,  "u:i" = ui,
6271 6273
                   "u:v" = uv, "i" = i,
6272 6274
                   "v:-i" = -Inf, "v:i" = vi))
6273
-evalAllGenotypes(od, addwt = TRUE)
6275
+evalAllGenotypes(od2, addwt = TRUE)
6274 6276
 
6275 6277
 ```
6276 6278
 
... ...
@@ -1,15 +1,15 @@
1 1
 \usepackage[%
2
-		shash={413d4d7},
3
-		lhash={413d4d7190be166d727a6b82e0c6280e0893c026},
4
-		authname={ramon diaz-uriarte (at Phelsuma)},
5
-		authemail={rdiaz02@gmail.com},
6
-		authsdate={2019-06-12},
7
-		authidate={2019-06-12 13:09:29 +0200},
8
-		authudate={1560337769},
9
-		commname={ramon diaz-uriarte (at Phelsuma)},
10
-		commemail={rdiaz02@gmail.com},
11
-		commsdate={2019-06-12},
12
-		commidate={2019-06-12 13:09:29 +0200},
13
-		commudate={1560337769},
14
-		refnames={ (HEAD -> master, origin/master, origin/HEAD)}
2
+		shash={a8b5dae},
3
+		lhash={a8b5dae811213fc49041804915277838c2c56f19},
4
+		authname={Ramon Diaz-Uriarte},
5
+		authemail={rdiaz02@users.noreply.github.com},
6
+		authsdate={2019-11-25},
7
+		authidate={2019-11-25 05:33:16 +0100},
8
+		authudate={1574656396},
9
+		commname={Ramon Diaz-Uriarte},
10
+		commemail={rdiaz02@users.noreply.github.com},
11
+		commsdate={2019-11-25},
12
+		commidate={2019-11-25 05:33:16 +0100},
13
+		commudate={1574656396},
14
+		refnames={ (HEAD, origin/master, origin/HEAD)}
15 15
 	]{gitsetinfo}
16 16
\ No newline at end of file