Browse code

3.99.1: interventions, death with fdf, user variables

ramon diaz-uriarte (at Phelsuma) authored on 25/06/2022 14:24:13
Showing 1 changed files
... ...
@@ -71,18 +71,21 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
71 71
     
72 72
   
73 73
     x_from <- y_from <- x_to <- y_to <- Change <- muts <-
74
-        label <- fitness <- Type <- NULL
75
-                
74
+        label <- birth <- Type <- NULL
75
+    
76
+    if ("Fitness" %in% colnames(tfm$afe)) {
77
+      colnames(tfm$afe)[which(colnames(tfm$afe) == "Fitness")] <- "Birth"
78
+    }
76 79
                 
77 80
     dd <- data.frame(muts = mutated,
78
-                     fitness = tfm$afe$Fitness,
81
+                     birth = tfm$afe$Birth,
79 82
                      label = tfm$afe$Genotype,
80 83
                      stringsAsFactors = TRUE)
81 84
     cl <- gaj[vv]
82 85
     sg <- data.frame(x_from = mutated[vv[, 1]],
83
-                     y_from = tfm$afe$Fitness[vv[, 1]],
86
+                     y_from = tfm$afe$Birth[vv[, 1]],
84 87
                      x_to = mutated[vv[, 2]],
85
-                     y_to = tfm$afe$Fitness[vv[, 2]],
88
+                     y_to = tfm$afe$Birth[vv[, 2]],
86 89
                      Change = factor(ifelse(cl == 0, "Neutral",
87 90
                                      ifelse(cl > 0, "Gain", "Loss")),
88 91
                                      levels = c("Gain", "Loss", "Neutral")),
... ...
@@ -91,7 +94,7 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
91 94
     number_ticks <- function(n) {function(limits) pretty(limits, n)}
92 95
     
93 96
     p <- ggplot() +
94
-        xlab("") + ylab("Fitness") + 
97
+        xlab("") + ylab("Birth") + 
95 98
         theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(),
96 99
               panel.grid.minor.x = element_blank()) +
97 100
         geom_segment(data = sg,
... ...
@@ -121,9 +124,9 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
121 124
     
122 125
     if(show_labels && use_ggrepel) {
123 126
         p <- p + geom_text_repel(data = dd[-c(minF, maxF), ],
124
-                                 aes(x = muts, y = fitness, label = label)) +
127
+                                 aes(x = muts, y = birth, label = label)) +
125 128
             geom_label_repel(data = ddMM,
126
-                             aes(x = muts, y = fitness, label = label, fill = Type),
129
+                             aes(x = muts, y = birth, label = label, fill = Type),
127 130
                              color = "black",
128 131
                              fontface = "bold")
129 132
             ## geom_label_repel(data = dd[maxF, ], aes(x = muts, y = fitness, label = label),
... ...
@@ -141,10 +144,10 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
141 144
         }
142 145
             
143 146
         p <- p + geom_text(data = dd[-c(minF, maxF), ],
144
-                           aes(x = muts, y = fitness, label = label),
147
+                           aes(x = muts, y = birth, label = label),
145 148
                            vjust = -0.2, hjust = "inward") +
146 149
             geom_label(data = ddMM,
147
-                       aes(x = muts, y = fitness, label = label, fill = Type),
150
+                       aes(x = muts, y = birth, label = label, fill = Type),
148 151
                        vjust = -0.2, hjust = "inward", color = "black",
149 152
                        fontface = "bold")
150 153
             ## geom_label(data = dd[maxF, ], aes(x = muts, y = fitness, label = label),
Browse code

3.1.1: XOR, AND, OR: plots of DAGs honor all possible values

ramon diaz-uriarte (at Phelsuma) authored on 06/10/2021 18:38:06
Showing 1 changed files
... ...
@@ -230,6 +230,9 @@ fast_peaks <- function(x, th) {
230 230
                           th)]))
231 231
 }
232 232
 
233
+## FIXME! Unclear if we need the complete fitness matrix or if it will work with
234
+## pieces (say, after removing those with fitness < WT, to try to make it faster
235
+## and for general use in other cases)
233 236
 
234 237
 ## wrapper to the C++ code
235 238
 wrap_accessibleGenotypes <- function(x, th) {
Browse code

v. 2.99.9: fixed one-gene cases, nem in Readme, typos; removin unused code and v.1 from long tests

ramon diaz-uriarte (at Phelsuma) authored on 22/04/2021 10:27:49
Showing 1 changed files
... ...
@@ -319,65 +319,6 @@ faster_accessible_genotypes_R <- function(x, th) {
319 319
 }
320 320
 
321 321
 
322
-## ## This uses slam, but that is actually slower because
323
-## ## of the assignment
324
-## faster_accessible_genots_slam <- function(x, th = 0) {
325
-
326
-##     ## Given a genotype matrix, return the genotypes that are accessible
327
-##     ## via creating a directed adjacency matrix between genotypes
328
-##     ## connected (i.e., those that differ by gaining one mutation). 0
329
-##     ## means not connected, 1 means connected.
330
-    
331
-##     ## There is a more general function in OncoSimulR that will give the
332
-##     ## fitness difference. But not doing the difference is faster than
333
-##     ## just setting a value, say 1, if all we want is to keep track of
334
-##     ## accessible ones. And by using only 0/1 we can store only an
335
-##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
336
-##     ## simulations and computing just accessible genotypes, probably a
337
-##     ## hell of a lot faster.
338
-
339
-##     ## Well, this is not incredibly fast either.
340
-    
341
-##     ## Make sure sorted, so ancestors always before descendants
342
-##     rs0 <- rowSums(x[, -ncol(x)])
343
-##     x <- x[order(rs0), ]
344
-##     rm(rs0)
345
-    
346
-##     y <- x[, -ncol(x)]
347
-##     f <- x[, ncol(x)]
348
-##     rs <- rowSums(y)
349
-
350
-##     ## If 0, not accessible
351
-##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
352
-##                                       mode = "integer")
353
-##     for(i in 1:length(rs)) { ## i is the current genotype
354
-##         candidates <- which(rs == (rs[i] + 1))
355
-##         for(j in candidates) {
356
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
357
-##             ## if(sumdiff == 1)
358
-##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
359
-##                 ( (f[j] - f[i]) >= th ) )
360
-##                 adm[i, j] <- 1L
361
-##         }
362
-##     }
363
-
364
-##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
365
-##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
366
-##     while(TRUE) {
367
-##         ## We remove inaccessible cols (genotypes) and the corresponding
368
-##         ## rows repeatedly until nothing left to remove; any column left
369
-##         ## is therefore accessible throw at least one path.
370
-
371
-##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
372
-##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
373
-##         if(length(inacc_col) == 0) break;
374
-##         inacc_row <- inacc_col + 1 ## recall root row is left
375
-##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
376
-##     }
377
-##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
378
-## }
379
-
380
-
381 322
 generate_matrix_genotypes <- function(g) {
382 323
     ## FIXME future: do this for order too? Only if rfitness for order.
383 324
     ## Given a number of genes, generate all possible genotypes.
... ...
@@ -470,25 +411,6 @@ genot_to_adj_mat <- function(x) {
470 411
 }
471 412
 
472 413
 
473
-## ## to move above to C++ note that loop can be
474
-## for(i in 1:length(rs)) { ## i is the current genotype
475
-##     for(j in (i:length(rs))) {
476
-##         if(rs[j] > (rs[i] + 1)) break;
477
-##         else if(rs[j] == (rs[i] + 1)) {
478
-##             ## and use here my HammingDistance function
479
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
480
-##             ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
481
-##             if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]);
482
-##             }
483
-##     }
484
-## }
485
-
486
-## actually, all that is already in accessibleGenotypes except for the
487
-## filling up of adm.
488
-
489
-
490
-
491
-
492 414
 
493 415
 peak_valley <- function(x) {
494 416
     ## FIXME: when there are no identical entries, all this
Browse code

2.99.4

ramon diaz-uriarte (at Phelsuma) authored on 17/12/2020 15:07:07
Showing 1 changed files
... ...
@@ -16,12 +16,6 @@
16 16
 ## plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
17 17
 ##     plot.genotype_fitness_matrix <- plotFitnessLandscape 
18 18
 
19
-## FIXME: show only accessible paths? 
20
-## FIXME: when show_labels = FALSE we still show the boxes
21
-##        and some of the labels.!!!
22
-## FIXME: if using only_accessible, maybe we
23
-## can try to use fast_peaks, and use the slower
24
-## approach as fallback (if identical fitness)
25 19
 plotFitnessLandscape <- function(x, show_labels = TRUE,
26 20
                                  col = c("green4", "red", "yellow"),
27 21
                                  lty = c(1, 2, 3), use_ggrepel = FALSE,
... ...
@@ -31,7 +25,12 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
31 25
                                  ...) {
32 26
 
33 27
     ## FIXME future:
34
-
28
+    ## FIXME: when show_labels = FALSE we still show the boxes
29
+    ##        and some of the labels.!!!
30
+    ## FIXME: if using only_accessible, maybe we
31
+    ## can try to use fast_peaks, and use the slower
32
+    ## approach as fallback (if identical fitness)
33
+    
35 34
     ## - Allow passing order effects. Change "allGenotypes_to_matrix"
36 35
     ##   below? Probably not, as we cannot put order effects as a
37 36
     ##   matrix. Do something else, like allow only order effects if from
... ...
@@ -70,14 +69,7 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
70 69
     }
71 70
     vv <- which(!is.na(gaj), arr.ind = TRUE)
72 71
     
73
-    ## plot(x = mutated, y = e1$Fitness, ylab = "Fitness",
74
-    ##      xlab = "", type = "n", axes = FALSE)
75
-    ## box()
76
-    ## axis(2)
77
-    ## text(x = mutated, y = x$Fitness, labels = x$Genotype)
78
-
79
-    ## The R CMD CHEKC notes about no visible binding for global variable
80
-
72
+  
81 73
     x_from <- y_from <- x_to <- y_to <- Change <- muts <-
82 74
         label <- fitness <- Type <- NULL
83 75
                 
... ...
@@ -184,27 +176,6 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
184 176
 ######################################################################
185 177
 
186 178
 
187
-## wrap_filter_inaccessible <- function(x, max_num_genotypes, accessible_th) {
188
-##     ## wrap it, for my consumption
189
-##     tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)
190
-##     mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)])
191
-##     gaj <- genot_to_adj_mat(tfm$gfm)
192
-##     gaj <- filter_inaccessible(gaj, accessible_th)
193
-##     remaining <- as.numeric(colnames(gaj))
194
-##     mutated <- mutated[remaining]
195
-##     tfm$afe <- tfm$afe[remaining, , drop = FALSE]
196
-##     return(list(remaining = remaining,
197
-##                 mutated = mutated,
198
-##                 tfm = tfm))
199
-## }
200
-
201
-## No longer being used. Used to be in rfitness
202
-## count_accessible_g <- function(gfm, accessible_th) {
203
-##     gaj <- genot_to_adj_mat(gfm)
204
-##     gaj <- filter_inaccessible(gaj, accessible_th)
205
-##     return(ncol(gaj) - 1)
206
-## }
207
-
208 179
 
209 180
 ## There is now C++ code to get just the locations/positions of the
210 181
 ## accessible genotypes
Browse code

version 2.99.1: frequency-dependent fitness functionality

ramon diaz-uriarte (at Phelsuma) authored on 10/12/2020 11:41:53
Showing 1 changed files
... ...
@@ -1,4 +1,4 @@
1
-## Copyright 2013, 2014, 2015, 2016 Ramon Diaz-Uriarte
1
+## Copyright 2013-2021 Ramon Diaz-Uriarte
2 2
 
3 3
 ## This program is free software: you can redistribute it and/or modify
4 4
 ## it under the terms of the GNU General Public License as published by
Browse code

no longer refs to older installer and robustify one test

ramon diaz-uriarte (at Phelsuma) authored on 16/07/2018 20:27:32
Showing 1 changed files
... ...
@@ -17,7 +17,8 @@
17 17
 ##     plot.genotype_fitness_matrix <- plotFitnessLandscape 
18 18
 
19 19
 ## FIXME: show only accessible paths? 
20
-
20
+## FIXME: when show_labels = FALSE we still show the boxes
21
+##        and some of the labels.!!!
21 22
 ## FIXME: if using only_accessible, maybe we
22 23
 ## can try to use fast_peaks, and use the slower
23 24
 ## approach as fallback (if identical fitness)
ramon diaz-uriarte (at Phelsuma) authored on 17/04/2018 23:58:39
Showing 1 changed files
... ...
@@ -83,7 +83,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
83 83
                 
84 84
     dd <- data.frame(muts = mutated,
85 85
                      fitness = tfm$afe$Fitness,
86
-                     label = tfm$afe$Genotype)
86
+                     label = tfm$afe$Genotype,
87
+                     stringsAsFactors = TRUE)
87 88
     cl <- gaj[vv]
88 89
     sg <- data.frame(x_from = mutated[vv[, 1]],
89 90
                      y_from = tfm$afe$Fitness[vv[, 1]],
... ...
@@ -91,7 +92,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
91 92
                      y_to = tfm$afe$Fitness[vv[, 2]],
92 93
                      Change = factor(ifelse(cl == 0, "Neutral",
93 94
                                      ifelse(cl > 0, "Gain", "Loss")),
94
-                                     levels = c("Gain", "Loss", "Neutral")))
95
+                                     levels = c("Gain", "Loss", "Neutral")),
96
+                     stringsAsFactors = TRUE)
95 97
     ## From http://stackoverflow.com/a/17257422
96 98
     number_ticks <- function(n) {function(limits) pretty(limits, n)}
97 99
     
... ...
@@ -601,6 +603,7 @@ peak_valley <- function(x) {
601 603
 ##     ## mutated <- rowSums(gfm[, -ncol(gfm)])
602 604
 ##     gaj <- OncoSimulR:::genot_to_adj_mat(gfm)
603 605
 ##     gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0)
606
+##     Eh! that is assuming no genotype is inaccessible!
604 607
 ##     stopifnot(all(na.omit(as.vector(gaj == gaj2))))
605 608
 ##     remaining <- as.numeric(colnames(gaj2))
606 609
 ##     ## mutated <- mutated[remaining]
Browse code

version 2.7.2 Changes in version 2.7.2 (2017-09-27): - genot_to_adj_mat in C++. - fast_peaks (for no backmutation cases). - Better explanation and testing of peaks and valleys. - Clarified simOGraph transitive reduction. - Better handling of ti corner cases. - Magellan reading fuctions adapted to output of newer (as of 2017-07) version of Magellan. - sorting gene names in allGenotypes_to_matrix. - sampledGenotypes: genotype names with sorted gene names.

ramon diaz-uriarte (at Phelsuma) authored on 27/09/2017 10:23:50
Showing 1 changed files
... ...
@@ -18,6 +18,9 @@
18 18
 
19 19
 ## FIXME: show only accessible paths? 
20 20
 
21
+## FIXME: if using only_accessible, maybe we
22
+## can try to use fast_peaks, and use the slower
23
+## approach as fallback (if identical fitness)
21 24
 plotFitnessLandscape <- function(x, show_labels = TRUE,
22 25
                                  col = c("green4", "red", "yellow"),
23 26
                                  lty = c(1, 2, 3), use_ggrepel = FALSE,
... ...
@@ -224,13 +227,45 @@ filter_inaccessible <- function(x, accessible_th) {
224 227
 }
225 228
 
226 229
 
230
+## wrapper to the C++ code
231
+fast_peaks <- function(x, th) {
232
+    ## x is the fitness matrix, not adjacency matrix
233
+
234
+    ## Only works if no connected genotypes that form maxima. I.e., no
235
+    ## identical fitness. Do a sufficient check for it (too inclusive, though)
236
+    ## And only under no back mutation
237
+
238
+    original_pos <- 1:nrow(x)
239
+    numMut <- rowSums(x[, -ncol(x)])
240
+    o_numMut <- order(numMut)
241
+    x <- x[o_numMut, ]
242
+    numMut <- numMut[o_numMut]
243
+    f <- x[, ncol(x)]
244
+    ## Two assumptions
245
+    stopifnot(numMut[1] == 0)
246
+    ## make sure no repeated in those that could be maxima
247
+    if(any(duplicated(f[f >= f[1]])))
248
+        stop("There could be several connected maxima genotypes.",
249
+             " This function is not safe to use.")
250
+    
251
+    y <- x[, -ncol(x)]
252
+    storage.mode(y) <- "integer"
253
+    original_pos <- original_pos[o_numMut]
254
+    return(sort(original_pos[peaksLandscape(y, f,
255
+                          as.integer(numMut),
256
+                          th)]))
257
+}
258
+
259
+
227 260
 ## wrapper to the C++ code
228 261
 wrap_accessibleGenotypes <- function(x, th) {
229 262
     ## x is the fitness matrix, not adjacency matrix
263
+    original_pos <- 1:nrow(x)
230 264
     numMut <- rowSums(x[, -ncol(x)])
231 265
     o_numMut <- order(numMut)
232 266
     x <- x[o_numMut, ]
233 267
     numMut <- numMut[o_numMut]
268
+    original_pos <- original_pos[o_numMut]
234 269
     
235 270
     y <- x[, -ncol(x)]
236 271
     storage.mode(y) <- "integer"
... ...
@@ -238,7 +273,29 @@ wrap_accessibleGenotypes <- function(x, th) {
238 273
     acc <- accessibleGenotypes(y, x[, ncol(x)],
239 274
                                as.integer(numMut),
240 275
                                th)
241
-    return(acc[acc > 0])
276
+    ## return(acc[acc > 0])
277
+    return(sort(original_pos[acc[acc > 0]]))
278
+}
279
+
280
+
281
+## wrapper to the C++ code; the former one, only for testing. Remove
282
+## eventually FIXME
283
+wrap_accessibleGenotypes_former <- function(x, th) {
284
+    ## x is the fitness matrix, not adjacency matrix
285
+    original_pos <- 1:nrow(x)
286
+    numMut <- rowSums(x[, -ncol(x)])
287
+    o_numMut <- order(numMut)
288
+    x <- x[o_numMut, ]
289
+    numMut <- numMut[o_numMut]
290
+    original_pos <- original_pos[o_numMut]
291
+    
292
+    y <- x[, -ncol(x)]
293
+    storage.mode(y) <- "integer"
294
+
295
+    acc <- accessibleGenotypes_former(y, x[, ncol(x)],
296
+                                      as.integer(numMut),
297
+                                      th)
298
+    return(sort(original_pos[acc[acc > 0]]))
242 299
 }
243 300
 
244 301
 ## A transitional function
... ...
@@ -378,8 +435,10 @@ generate_matrix_genotypes <- function(g) {
378 435
 }
379 436
 
380 437
 
381
-
382
-genot_to_adj_mat <- function(x) {
438
+## The R version. See also the C++ one
439
+genot_to_adj_mat_R <- function(x) {
440
+    ## x is the fitness matrix
441
+    
383 442
     ## FIXME this can take about 23% of the time of the ggplot call.
384 443
     ## But them, we are quickly constructing a 2000*2000 matrix
385 444
     ## Given a genotype matrix, as given by allGenotypes_to_matrix, produce a
... ...
@@ -390,13 +449,16 @@ genot_to_adj_mat <- function(x) {
390 449
     ## FIXME: code is now in place to do all of this in C++
391 450
     
392 451
     ## Make sure sorted, so ancestors always before descendants
393
-    rs0 <- rowSums(x[, -ncol(x)])
394
-    x <- x[order(rs0), ]
395
-    rm(rs0)
452
+    original_pos <- 1:nrow(x)
453
+    numMut <- rowSums(x[, -ncol(x)])
454
+    o_numMut <- order(numMut)
455
+    x <- x[o_numMut, ]
456
+    original_pos <- original_pos[o_numMut]
457
+    rm(numMut)
396 458
     
397 459
     y <- x[, -ncol(x)]
398 460
     f <- x[, ncol(x)]
399
-    rs <- rowSums(y)
461
+    rs <- rowSums(y) ## redo for paranoia; could have ordered numMut
400 462
 
401 463
     ## Move this to C++?
402 464
     adm <- matrix(NA, nrow = length(rs), ncol = length(rs))
... ...
@@ -408,9 +470,51 @@ genot_to_adj_mat <- function(x) {
408 470
             if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
409 471
         }
410 472
     }
473
+    colnames(adm) <- rownames(adm) <- original_pos
411 474
     return(adm)
412 475
 }
413 476
 
477
+genot_to_adj_mat <- function(x) {
478
+    ## x is the fitness matrix
479
+
480
+    ## adding column and row names should rarely be necessary
481
+    ## as these are internal functions, etc. But just in case
482
+    original_pos <- 1:nrow(x)
483
+    numMut <- rowSums(x[, -ncol(x)])
484
+    o_numMut <- order(numMut)
485
+    x <- x[o_numMut, ]
486
+    numMut <- numMut[o_numMut]
487
+    original_pos <- original_pos[o_numMut]
488
+    
489
+    y <- x[, -ncol(x)]
490
+    storage.mode(y) <- "integer"
491
+
492
+    adm <- genot2AdjMat(y, x[, ncol(x)],
493
+                        as.integer(numMut))
494
+    colnames(adm) <- rownames(adm) <- original_pos
495
+    return(adm)
496
+}
497
+
498
+
499
+## ## to move above to C++ note that loop can be
500
+## for(i in 1:length(rs)) { ## i is the current genotype
501
+##     for(j in (i:length(rs))) {
502
+##         if(rs[j] > (rs[i] + 1)) break;
503
+##         else if(rs[j] == (rs[i] + 1)) {
504
+##             ## and use here my HammingDistance function
505
+##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
506
+##             ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
507
+##             if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]);
508
+##             }
509
+##     }
510
+## }
511
+
512
+## actually, all that is already in accessibleGenotypes except for the
513
+## filling up of adm.
514
+
515
+
516
+
517
+
414 518
 
415 519
 peak_valley <- function(x) {
416 520
     ## FIXME: when there are no identical entries, all this
... ...
@@ -466,7 +570,13 @@ peak_valley <- function(x) {
466 570
     }
467 571
     bad_fwd <- vector("integer", nrow(x))
468 572
     for(i in 1:nrow(x)) {
573
+        ## Eh, why any? All.
574
+        ## Nope, any: we want peaks in general, not just
575
+        ## under assumption of "no back mutation"
576
+        ## We get a different result when we restrict to accessible
577
+        ## because all < 0 in adjacency are turned to NAs.
469 578
         if( any(x[, i] < 0, na.rm = TRUE) || bad_fwd[i] ) {
579
+        ## if( all(x[, i] < 0, na.rm = TRUE) ) {
470 580
             ## this node is bad. Any descendant with fitness >= is bad
471 581
             bad_fwd[i] <- 1
472 582
             reach_f <- which(x[i, ] <= 0)
... ...
@@ -478,3 +588,36 @@ peak_valley <- function(x) {
478 588
     peak <- setdiff(candidate, bad)
479 589
     return(list(peak = peak, valley = valley))
480 590
 }
591
+
592
+
593
+
594
+## For the future
595
+## ## data.frame (two columns: genotype with "," and Fitness) -> fitness graph (DAG)
596
+## ## Return an adj matrix of the fitness graph from a fitness
597
+## ## landscape
598
+## ## Based on code in plotFitnessLandscape
599
+## flandscape_to_fgraph <- function(afe) {
600
+##     gfm <- OncoSimulR:::allGenotypes_to_matrix(afe)
601
+##     ## mutated <- rowSums(gfm[, -ncol(gfm)])
602
+##     gaj <- OncoSimulR:::genot_to_adj_mat(gfm)
603
+##     gaj2 <- OncoSimulR:::filter_inaccessible(gaj, 0)
604
+##     stopifnot(all(na.omit(as.vector(gaj == gaj2))))
605
+##     remaining <- as.numeric(colnames(gaj2))
606
+##     ## mutated <- mutated[remaining]
607
+##     afe <- afe[remaining, , drop = FALSE]
608
+##     ## vv <- which(!is.na(gaj2), arr.ind = TRUE)
609
+
610
+##     gaj2 <- gaj2
611
+##     gaj2[is.na(gaj2)] <- 0
612
+##     gaj2[gaj2 > 0] <- 1
613
+##     colnames(gaj2) <- rownames(gaj2) <- afe[, "Genotype"]
614
+##     return(gaj2)
615
+## }
616
+## ## This could be done easily in C++, taking care of row/colnames at end,
617
+## ## without moving around the full adjacency matrix.
618
+## ## Skeleton for C++
619
+## ## a call to accessibleGenotypesPeaksLandscape
620
+## ## (with another argument or changing the returnpeaks by a three value thing)
621
+## ## after done with first loop, 
622
+## ## return the matrix adm[accessible > 0, accessible >0]
623
+## ## only need care with row/colnames
Browse code

2.5.9; code coverage comments in vignette

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@125769 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 08/01/2017 23:16:08
Showing 1 changed files
... ...
@@ -223,9 +223,6 @@ filter_inaccessible <- function(x, accessible_th) {
223 223
     return(x)
224 224
 }
225 225
 
226
-f1 <- function() {}
227
-
228
-x <- 99
229 226
 
230 227
 ## wrapper to the C++ code
231 228
 wrap_accessibleGenotypes <- function(x, th) {
Browse code

v.2.5.1. \n - much faster accessible genots \n - AND of drivers and size \n - fixation \n - doc. improvements

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@123922 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 13/11/2016 09:46:27
Showing 1 changed files
... ...
@@ -192,16 +192,23 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
192 192
 ##                 tfm = tfm))
193 193
 ## }
194 194
 
195
-count_accessible_g <- function(gfm, accessible_th) {
196
-    gaj <- genot_to_adj_mat(gfm)
197
-    gaj <- filter_inaccessible(gaj, accessible_th)
198
-    return(ncol(gaj) - 1)
199
-}
195
+## No longer being used. Used to be in rfitness
196
+## count_accessible_g <- function(gfm, accessible_th) {
197
+##     gaj <- genot_to_adj_mat(gfm)
198
+##     gaj <- filter_inaccessible(gaj, accessible_th)
199
+##     return(ncol(gaj) - 1)
200
+## }
200 201
 
202
+
203
+## There is now C++ code to get just the locations/positions of the
204
+## accessible genotypes
201 205
 filter_inaccessible <- function(x, accessible_th) {
202 206
     ## Return an adjacency matrix with only accessible paths. x is the gaj
203 207
     ## matrix created in the plots. The difference between genotypes
204 208
     ## separated by a hamming distance of 1
209
+
210
+    ## FIXME: could do the x[, -1] before loop and not add the 1
211
+    ## inside while, and do that at end
205 212
     colnames(x) <- rownames(x) <- 1:ncol(x)
206 213
     while(TRUE) {
207 214
         ## remove first column
... ...
@@ -216,6 +223,132 @@ filter_inaccessible <- function(x, accessible_th) {
216 223
     return(x)
217 224
 }
218 225
 
226
+f1 <- function() {}
227
+
228
+x <- 99
229
+
230
+## wrapper to the C++ code
231
+wrap_accessibleGenotypes <- function(x, th) {
232
+    ## x is the fitness matrix, not adjacency matrix
233
+    numMut <- rowSums(x[, -ncol(x)])
234
+    o_numMut <- order(numMut)
235
+    x <- x[o_numMut, ]
236
+    numMut <- numMut[o_numMut]
237
+    
238
+    y <- x[, -ncol(x)]
239
+    storage.mode(y) <- "integer"
240
+
241
+    acc <- accessibleGenotypes(y, x[, ncol(x)],
242
+                               as.integer(numMut),
243
+                               th)
244
+    return(acc[acc > 0])
245
+}
246
+
247
+## A transitional function
248
+faster_accessible_genotypes_R <- function(x, th) {
249
+   rs0 <- rowSums(x[, -ncol(x)])
250
+    x <- x[order(rs0), ]
251
+    rm(rs0)
252
+    
253
+    y <- x[, -ncol(x)]
254
+    f <- x[, ncol(x)]
255
+    rs <- rowSums(y)
256
+
257
+   ## If 0, not accessible
258
+   ## adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
259
+   ##                                          mode = "integer")
260
+   
261
+   adm <- matrix(0, nrow = length(rs), ncol = length(rs))
262
+   storage.mode(adm) <- "integer"
263
+   
264
+   ## Most time is gone here
265
+    for(i in 1:length(rs)) { ## i is the current genotype
266
+        candidates <- which(rs == (rs[i] + 1))
267
+        for(j in candidates) {
268
+            if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
269
+                ( (f[j] - f[i]) >= th ) ) {
270
+                ## actually, this is the largest time sink using slam
271
+                adm[i, j] <- 1L
272
+                }
273
+        }
274
+    }
275
+
276
+    colnames(adm) <- rownames(adm) <- 1:ncol(adm)
277
+    admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
278
+    while(TRUE) {
279
+        ## We remove inaccessible cols (genotypes) and the corresponding
280
+        ## rows repeatedly until nothing left to remove; any column left
281
+        ## is therefore accessible throw at least one path.
282
+
283
+        ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
284
+        inacc_col <- which(colSums(admtmp) == 0L)
285
+        if(length(inacc_col) == 0) break;
286
+        inacc_row <- inacc_col + 1 ## recall root row is left
287
+        admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
288
+    }
289
+    return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
290
+
291
+}
292
+
293
+
294
+## ## This uses slam, but that is actually slower because
295
+## ## of the assignment
296
+## faster_accessible_genots_slam <- function(x, th = 0) {
297
+
298
+##     ## Given a genotype matrix, return the genotypes that are accessible
299
+##     ## via creating a directed adjacency matrix between genotypes
300
+##     ## connected (i.e., those that differ by gaining one mutation). 0
301
+##     ## means not connected, 1 means connected.
302
+    
303
+##     ## There is a more general function in OncoSimulR that will give the
304
+##     ## fitness difference. But not doing the difference is faster than
305
+##     ## just setting a value, say 1, if all we want is to keep track of
306
+##     ## accessible ones. And by using only 0/1 we can store only an
307
+##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
308
+##     ## simulations and computing just accessible genotypes, probably a
309
+##     ## hell of a lot faster.
310
+
311
+##     ## Well, this is not incredibly fast either.
312
+    
313
+##     ## Make sure sorted, so ancestors always before descendants
314
+##     rs0 <- rowSums(x[, -ncol(x)])
315
+##     x <- x[order(rs0), ]
316
+##     rm(rs0)
317
+    
318
+##     y <- x[, -ncol(x)]
319
+##     f <- x[, ncol(x)]
320
+##     rs <- rowSums(y)
321
+
322
+##     ## If 0, not accessible
323
+##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
324
+##                                       mode = "integer")
325
+##     for(i in 1:length(rs)) { ## i is the current genotype
326
+##         candidates <- which(rs == (rs[i] + 1))
327
+##         for(j in candidates) {
328
+##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
329
+##             ## if(sumdiff == 1)
330
+##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
331
+##                 ( (f[j] - f[i]) >= th ) )
332
+##                 adm[i, j] <- 1L
333
+##         }
334
+##     }
335
+
336
+##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
337
+##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
338
+##     while(TRUE) {
339
+##         ## We remove inaccessible cols (genotypes) and the corresponding
340
+##         ## rows repeatedly until nothing left to remove; any column left
341
+##         ## is therefore accessible throw at least one path.
342
+
343
+##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
344
+##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
345
+##         if(length(inacc_col) == 0) break;
346
+##         inacc_row <- inacc_col + 1 ## recall root row is left
347
+##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
348
+##     }
349
+##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
350
+## }
351
+
219 352
 
220 353
 generate_matrix_genotypes <- function(g) {
221 354
     ## FIXME future: do this for order too? Only if rfitness for order.
... ...
@@ -257,6 +390,8 @@ genot_to_adj_mat <- function(x) {
257 390
     ## that differ by gaining one mutation) with value being the
258 391
     ## difference in fitness between destination and origin
259 392
 
393
+    ## FIXME: code is now in place to do all of this in C++
394
+    
260 395
     ## Make sure sorted, so ancestors always before descendants
261 396
     rs0 <- rowSums(x[, -ncol(x)])
262 397
     x <- x[order(rs0), ]
Browse code

v.2.3.9. accessible genotypes

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@119231 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 09/07/2016 13:43:10
Showing 1 changed files
... ...
@@ -22,6 +22,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
22 22
                                  col = c("green4", "red", "yellow"),
23 23
                                  lty = c(1, 2, 3), use_ggrepel = FALSE,
24 24
                                  log = FALSE, max_num_genotypes = 2000,
25
+                                 only_accessible = FALSE,
26
+                                 accessible_th = 0,
25 27
                                  ...) {
26 28
 
27 29
     ## FIXME future:
... ...
@@ -37,7 +39,6 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
37 39
     ## adjacency matrix of genotype i go at row i and column i.  Follow back
38 40
     ## all entries in row i, and their previous, and forward all column i.
39 41
 
40
-
41 42
     ## gfm: genotype fitness matrix
42 43
     ## afe: all fitness effects
43 44
 
... ...
@@ -52,11 +53,17 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
52 53
     ## get the string representation, etc. And this is for use
53 54
     ## with OncoSimul.
54 55
 
56
+  
55 57
     tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)
56 58
 
57
-
58 59
     mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)])
59 60
     gaj <- genot_to_adj_mat(tfm$gfm)
61
+    if(only_accessible) {
62
+        gaj <- filter_inaccessible(gaj, accessible_th)
63
+        remaining <- as.numeric(colnames(gaj))
64
+        mutated <- mutated[remaining]
65
+        tfm$afe <- tfm$afe[remaining, , drop = FALSE]
66
+    }
60 67
     vv <- which(!is.na(gaj), arr.ind = TRUE)
61 68
     
62 69
     ## plot(x = mutated, y = e1$Fitness, ylab = "Fitness",
... ...
@@ -171,6 +178,44 @@ plot.evalAllGenotypes <- plot.evalAllGenotypesMut <-
171 178
 ######################################################################
172 179
 
173 180
 
181
+## wrap_filter_inaccessible <- function(x, max_num_genotypes, accessible_th) {
182
+##     ## wrap it, for my consumption
183
+##     tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)
184
+##     mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)])
185
+##     gaj <- genot_to_adj_mat(tfm$gfm)
186
+##     gaj <- filter_inaccessible(gaj, accessible_th)
187
+##     remaining <- as.numeric(colnames(gaj))
188
+##     mutated <- mutated[remaining]
189
+##     tfm$afe <- tfm$afe[remaining, , drop = FALSE]
190
+##     return(list(remaining = remaining,
191
+##                 mutated = mutated,
192
+##                 tfm = tfm))
193
+## }
194
+
195
+count_accessible_g <- function(gfm, accessible_th) {
196
+    gaj <- genot_to_adj_mat(gfm)
197
+    gaj <- filter_inaccessible(gaj, accessible_th)
198
+    return(ncol(gaj) - 1)
199
+}
200
+
201
+filter_inaccessible <- function(x, accessible_th) {
202
+    ## Return an adjacency matrix with only accessible paths. x is the gaj
203
+    ## matrix created in the plots. The difference between genotypes
204
+    ## separated by a hamming distance of 1
205
+    colnames(x) <- rownames(x) <- 1:ncol(x)
206
+    while(TRUE) {
207
+        ## remove first column
208
+        ## We use fact that all(na.omit(x) < u) is true if all are NA
209
+        ## so inaccessible rows are removed and thus destination columns
210
+        wrm <- which(apply(x[, -1, drop = FALSE], 2,
211
+                           function(y) {all(na.omit(y) < accessible_th)})) + 1
212
+        if(length(wrm) == 0) break;
213
+        x <- x[-wrm, -wrm, drop = FALSE]
214
+    }
215
+    x[x < 0] <- NA
216
+    return(x)
217
+}
218
+
174 219
 
175 220
 generate_matrix_genotypes <- function(g) {
176 221
     ## FIXME future: do this for order too? Only if rfitness for order.
Browse code

v.2.3.7; detectionProb mechanism;\n documentation enhancements;\n more tests

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@119150 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 05/07/2016 15:59:56
Showing 1 changed files
... ...
@@ -25,8 +25,6 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
25 25
                                  ...) {
26 26
 
27 27
     ## FIXME future:
28
-    
29
-   
30 28
 
31 29
     ## - Allow passing order effects. Change "allGenotypes_to_matrix"
32 30
     ##   below? Probably not, as we cannot put order effects as a
... ...
@@ -54,107 +52,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
54 52
     ## get the string representation, etc. And this is for use
55 53
     ## with OncoSimul.
56 54
 
57
-
58 55
     tfm <- to_Fitness_Matrix(x, max_num_genotypes = max_num_genotypes)
59 56
 
60
-    ## if( (inherits(x, "genotype_fitness_matrix")) ||
61
-    ##     ( (is.matrix(x) || is.data.frame(x)) && (ncol(x) > 2) ) ) {
62
-    ##     ## Why this? We go back and forth twice. We need both things. We
63
-    ##     ## could construct the afe below by appropriately pasting the
64
-    ##     ## columns names
65
-    ##     afe <- evalAllGenotypes(allFitnessEffects(
66
-    ##         epistasis = from_genotype_fitness(x)),
67
-    ##         order = FALSE, addwt = TRUE, max = max_num_genotypes)
68
-    ##     ## Might not be needed with the proper gfm object (so gmf <- x)
69
-    ##     ## but is needed if arbitrary matrices.
70
-    ##     gfm <- allGenotypes_to_matrix(afe) 
71
-    ## } else if(inherits(x, "fitnessEffects")) {
72
-    ##     if(!is.null(x$orderEffects) )
73
-    ##         stop("We cannot yet deal with order effects")
74
-    ##     afe <- evalAllGenotypes(x,
75
-    ##                             order = FALSE,
76
-    ##                             addwt = TRUE, max = max_num_genotypes)
77
-    ##     gfm <- allGenotypes_to_matrix(afe)
78
-    ## } else if( (inherits(x, "evalAllGenotypes")) ||
79
-    ##            (inherits(x, "evalAllGenotypesMut"))) {
80
-    ##     if(any(grepl(">", x[, 1], fixed = TRUE)))
81
-    ##         stop("We cannot deal with order effects yet.")
82
-    ##     x <- x[, c(1, 2)]
83
-    ##     if(x[1, "Genotype"] != "WT") {
84
-    ##         ## Yes, because we expect this present below
85
-    ##         x <- rbind(data.frame(Genotype = "WT",
86
-    ##                               Fitness = 1,
87
-    ##                               stringsAsFactors = FALSE),
88
-    ##                    x)
89
-    ##     }
90
-    ##     afe <- x
91
-    ##     ## in case we pass an evalAllgenotypesfitandmut
92
-    ##     gfm <- allGenotypes_to_matrix(afe)
93
-    ## } else if(is.data.frame(x)) {
94
-    ##     ## Assume a two-column data frame of genotypes as character
95
-    ##     ## vectors and fitness
96
-    ##     if(colnames(x)[2] != "Fitness")
97
-    ##         stop("We cannot guess what you are passing here") 
98
-    ##     afe <- evalAllGenotypes(allFitnessEffects(genotFitness = x),
99
-    ##                             order = FALSE, addwt = TRUE,
100
-    ##                             max = max_num_genotypes)
101
-    ##     gfm <- allGenotypes_to_matrix(afe)
102
-    ## } else {
103
-    ##    stop("We cannot guess what you are passing here") 
104
-    ## }
105
-   
106
-
107
-    
108
-    ## if(inherits(x, "genotype_fitness_matrix")) {
109
-    ##     ## Why this? We go back and forth twice. We need both things. We
110
-    ##     ## could construct the afe below by appropriately pasting the
111
-    ##     ## columns names
112
-    ##     afe <- evalAllGenotypes(allFitnessEffects(
113
-    ##         epistasis = from_genotype_fitness(x)),
114
-    ##         order = FALSE, addwt = TRUE, max = 2000)
115
-    ##     gfm <- allGenotypes_to_matrix(afe) ## should not be needed? gfm <- x
116
-    ## } else if(inherits(x, "fitnessEffects")) {
117
-    ##     if(!is.null(x$orderEffects) )
118
-    ##         stop("We cannot yet deal with order effects")
119
-    ##     afe <- evalAllGenotypes(x,
120
-    ##                             order = FALSE,
121
-    ##                             addwt = TRUE, max = 2000)
122
-    ##     gfm <- allGenotypes_to_matrix(x)
123
-    ## } else {
124
-    ##     lc <- ncol(x)
125
-    ##     ## detect an appropriately formatted matrix
126
-    ##     if( (is.data.frame(x) || is.matrix(x)) ) {
127
-    ##         if(ncol(x) > 2) {
128
-    ##             ## We cannot be sure all genotypes are present
129
-    ##             ## but that is not our business?
130
-    ##             ## colnames(x)[lc] <- "Fitness"
131
-    ##             ## So this must be a matrix
132
-    ##             afe <- evalAllGenotypes(allFitnessEffects(
133
-    ##                 epistasis = from_genotype_fitness(x)),
134
-    ##                 order = FALSE, addwt = TRUE, max = 2000)
135
-    ##             gfm <- allGenotypes_to_matrix(afe)
136
-    ##         } else {
137
-    ##             ## This must be a data frame
138
-    ##             if(!is.data.frame(x))
139
-    ##                 stop("How are you passing a matrix here?")
140
-    ##             if(colnames(x)[lc] != "Fitness")
141
-    ##                 stop("We cannot guess what you are passing here")
142
-    ##             ## We are passed an allFitnessEffects output
143
-    ##             ## Will be simpler later as we will know immediately
144
-    ##             if(x[1, "Genotype"] != "WT") {
145
-    ##                 ## Yes, because we expect this present below
146
-    ##                 x <- rbind(data.frame(Genotype = "WT",
147
-    ##                                       Fitness = 1,
148
-    ##                                       stringsAsFactors = FALSE),
149
-    ##                            x)
150
-    ##             }
151
-    ##             x <- afe
152
-    ##             gfm <- allGenotypes_to_matrix(afe)
153
-    ##         }
154
-    ##     } else {
155
-    ##         stop("This is not allowed input")
156
-    ##     }
157
-    ## }
158 57
 
159 58
     mutated <- rowSums(tfm$gfm[, -ncol(tfm$gfm)])
160 59
     gaj <- genot_to_adj_mat(tfm$gfm)
... ...
@@ -181,7 +80,8 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
181 80
                      x_to = mutated[vv[, 2]],
182 81
                      y_to = tfm$afe$Fitness[vv[, 2]],
183 82
                      Change = factor(ifelse(cl == 0, "Neutral",
184
-                                     ifelse(cl > 0, "Gain", "Loss"))))
83
+                                     ifelse(cl > 0, "Gain", "Loss")),
84
+                                     levels = c("Gain", "Loss", "Neutral")))
185 85
     ## From http://stackoverflow.com/a/17257422
186 86
     number_ticks <- function(n) {function(limits) pretty(limits, n)}
187 87
     
... ...
@@ -194,7 +94,11 @@ plotFitnessLandscape <- function(x, show_labels = TRUE,
194 94
                            xend = x_to, yend = y_to,
195