Browse code

v. 2.9.6

Changes in version 2.9.6 (2017-12-27):
- Updated citation.
- An example (in miscell-files) about using and stopping with
modules.
- Prototype for sampling the single larges pop at last period
(function largest_last_pop, commented out for now).

Changes in version 2.9.5 (2017-12-7):
- samplePop: new option "single-nowt"

Changes in version 2.9.4 (2017-11-30):
- Deal with the very rare NULL simulations in summary.

ramon diaz-uriarte (at Phelsuma) authored on 27/12/2017 13:28:03
Showing7 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.9.3
5
-Date: 2017-11-27
4
+Version: 2.9.6
5
+Date: 2017-12-27
6 6
 Authors@R: c(person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),
7 7
 		     email = "rdiaz02@gmail.com"),
8 8
 	      person("Mark", "Taylor", role = "ctb", email = "ningkiling@gmail.com"))
... ...
@@ -327,6 +327,28 @@ samplePop <- function(x, timeSample = "last",
327 327
 }
328 328
 
329 329
 
330
+
331
+## single_largest_last_pop <- function(x) {
332
+##     last <- nrow(x$pops.by.time)
333
+##     largest <- which.max(x$pops.by.time[last, , drop = FALSE]) - 1
334
+##     genot <- x[["GenotypesLabels"]][largest]
335
+##     strsplit(genot, split = ", ")[[1]]
336
+## }
337
+
338
+
339
+## ## like samplePop(x, timeSample = "last") but return the single most abundant genotype
340
+## Just a prototype. Not well tested.
341
+## largest_last_pop <- function(x) {
342
+##     y <- lapply(x, single_largest_last_pop)
343
+##     allg <- sort(unique(unlist(y)))
344
+##     m <- t(vapply(y, function(z) {as.integer(allg %in%  z) },
345
+##                   integer(length(allg)) ))
346
+##     colnames(m) <- allg
347
+##     return(m)
348
+## }
349
+
350
+
351
+
330 352
 oncoSimulPop <- function(Nindiv,
331 353
                          fp,
332 354
                          model = "Exp",
... ...
@@ -767,8 +789,39 @@ print.oncosimul <- function(x, ...) {
767 789
 ## }
768 790
 
769 791
 summary.oncosimulpop <- function(object, ...) {
792
+    ## some simulations could have failed seriously returning a NULL
793
+    ## FIXME: how, why? Out of memory?
794
+
795
+    rm1 <- which(unlist(lapply(object, is.null)))
796
+    if(length(rm1) > 0) {
797
+        if(length(rm1) < length(object)) {
798
+            warning("Some simulations returned NULL. They will be removed",
799
+                    " from the summary. The NULL runs are ",
800
+                    paste(rm1, collapse = ", "),
801
+                    ".")
802
+        } else {
803
+            warning("All simulations returned NULL.")
804
+            return(NA)
805
+        }
806
+    }
807
+
808
+    ## I do not want to rm above for two reasons:
809
+    ##   - avoid re-asigning to the object
810
+    ##   - changing the numbering of the indices below if NULLs
811
+    ## So I need something more involved
812
+    ## Figure out exactly what the summary of a NULL is
813
+    sumnull <- summary(NULL)
814
+    
770 815
     tmp <- lapply(object, summary)
771
-    rm <- which(unlist(lapply(tmp, function(x) (length(x) == 1) && (is.na(x)))))
816
+
817
+    ## rm <- which(unlist(lapply(tmp,
818
+    ##                           function(x) (length(x) == 1) &&
819
+    ##                                       (is.na(x) || is.null(x)))))
820
+
821
+    rm <- which(unlist(lapply(tmp,
822
+                              function(x) ((length(x) == 1) &&
823
+                                           (is.na(x) || is.null(x))) ||
824
+                                          (identical(x, sumnull)))))
772 825
     if(length(rm) > 0)
773 826
         if(length(rm) < length(object)) {
774 827
         warning("Some simulations seem to have failed and will be removed",
... ...
@@ -1631,14 +1684,58 @@ get.mut.vector <- function(x, timeSample, typeSample,
1631 1684
         return( as.numeric((tcrossprod(pop,
1632 1685
                                        x$Genotypes)/popSize) >= thresholdWhole) )
1633 1686
     } else if (typeSample %in%  c("singleCell", "single")) {
1634
-
1635 1687
         return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1688
+    } else if (typeSample %in% c("singleCell-noWT", "single-noWT",
1689
+                                 "singleCell-nowt", "single-nowt")) {
1690
+        genots <- x$Genotypes
1691
+        whichwt <- which(x$GenotypesLabels == "")
1692
+        if(length(whichwt)) {
1693
+            genots <- genots[, -whichwt, drop = FALSE]
1694
+            pop <- pop[-whichwt]
1695
+        }
1696
+        if(all(pop == 0)) {
1697
+            warning("No non-WT clone with required popSize or at required time")
1698
+            return(rep(NA, nrow(x$Genotypes)))
1699
+        } else {
1700
+            return(genots[, sample(seq_along(pop), 1, prob = pop)])
1701
+        }
1636 1702
     } else {
1637 1703
         stop("Unknown typeSample option")
1638 1704
     }
1639 1705
 }
1640 1706
 
1641 1707
 
1708
+## get.mut.vector <- function(x, timeSample, typeSample,
1709
+##                            thresholdWhole, popSizeSample) {
1710
+##     if(is.null(x$TotalPopSize)) {
1711
+##         warning(paste("It looks like this simulation never completed.",
1712
+##                       " Maybe it reached maximum allowed limits.",
1713
+##                       " You will get NAs"))
1714
+##         return(rep(NA, length(x$geneNames)))
1715
+##     }
1716
+##     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1717
+##     if(the.time < 0) { 
1718
+##         return(rep(NA, nrow(x$Genotypes)))
1719
+##     } 
1720
+##     pop <- x$pops.by.time[the.time, -1]
1721
+    
1722
+##     if(all(pop == 0)) {
1723
+##         stop("You found a bug: this should never happen")
1724
+##     }
1725
+    
1726
+##     if(typeSample %in% c("wholeTumor", "whole")) {
1727
+##         popSize <- x$PerSampleStats[the.time, 1]
1728
+##         return( as.numeric((tcrossprod(pop,
1729
+##                                        x$Genotypes)/popSize) >= thresholdWhole) )
1730
+##     } else if (typeSample %in%  c("singleCell", "single")) {
1731
+
1732
+##         return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1733
+##     } else {
1734
+##         stop("Unknown typeSample option")
1735
+##     }
1736
+## }
1737
+
1738
+
1642 1739
 
1643 1740
 
1644 1741
 
... ...
@@ -1,7 +1,8 @@
1 1
 citHeader("If you use OncoSimulR, please cite the OncoSimulR Bioinformatics paper.",
2
-          " A former version of OncoSimulR has been used in a large",
3
-          " comparative study of methods to infer restrictions,",
4
-          " published in BMC Bioinformatics; you might want to cite that too,",
2
+          " OncoSimulR has been used in two large",
3
+          " comparative studies of methods to infer restrictions in",
4
+          " the order of accumulation of mutations (cancer progression models)",
5
+          " published in Bioinformatics and BMC Bioinformatics; you might want to cite those too,",
5 6
           " if appropriate, such as when referring to using evolutionary simulations",
6 7
           " to assess oncogenetic tree/cancer progression methods performance.")
7 8
 
... ...
@@ -11,26 +12,38 @@ citEntry(entry="Article",
11 12
          title = "OncoSimulR: genetic simulation with arbitrary epistasis and mutator genes in asexual populations.",
12 13
          journal = "Bioinformatics",
13 14
          year = "2017",
15
+         volume = "33",
16
+         number = "12",
17
+         pages = "1898--1899",
14 18
          doi = "10.1093/bioinformatics/btx077",
15 19
          publisher = "Oxford University Press",
16 20
          url = " https://doi.org/10.1093/bioinformatics/btx077",
17 21
          textVersion = paste("R Diaz-Uriarte.",
18 22
                              "OncoSimulR: genetic simulation with arbitrary epistasis and mutator genes in asexual populations.",
19
-                             " 2017. Bioinformatics, https://doi.org/10.1093/bioinformatics/btx077.")
23
+                             " 2017. Bioinformatics, 33, 1898--1899. https://doi.org/10.1093/bioinformatics/btx077.")
20 24
          )
21 25
 
22
-## citEntry(entry="Manual",
23
-##          author = "R Diaz-Uriarte",
24
-##          title = "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis.",
25
-##          year = "2015",
26
-##          note = "R package version 2.0.0",
27
-##          textVersion = paste("R Diaz-Uriarte.",
28
-##              "OncoSimulR: Forward Genetic Simulation of Cancer Progresion with Epistasis. 2015. BioConductor package version 2.0.0 ")
29
-## )
30 26
 
31 27
 
32 28
 ## citHeader("A former version of OncoSimulR has been used in this paper:")
33 29
 
30
+
31
+citEntry(entry="Article",
32
+         author = "R Diaz-Uriarte",
33
+         title = "Cancer progression models and fitness landscapes: a many-to-many relationship",
34
+         journal = "Bioinformatics",
35
+         year = "2017",
36
+         doi = "10.1093/bioinformatics/btx663",
37
+         url = "https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btx663/",
38
+##         volume = "zz",
39
+##         number = "zz",
40
+         textVersion = paste("R Diaz-Uriarte.",
41
+                             "Cancer progression models and fitness landscapes: a many-to-many relationship",
42
+                             "2017", "Bioinformatics.",
43
+                             "https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btx663/")
44
+)
45
+
46
+
34 47
 citEntry(entry="Article",
35 48
          author = "R Diaz-Uriarte",
36 49
          title = "Identifying restrictions in the order of accumulation of mutations during tumor progression: effects of passengers, evolutionary models, and sampling",
... ...
@@ -1,3 +1,16 @@
1
+Changes in version 2.9.6 (2017-12-27):
2
+	- Updated citation.
3
+	- An example (in miscell-files) about using and stopping with
4
+	modules.
5
+	- Prototype for sampling the single larges pop at last period
6
+	(function largest_last_pop, commented out for now).
7
+
8
+Changes in version 2.9.5 (2017-12-7):
9
+	- samplePop: new option "single-nowt"
10
+
11
+Changes in version 2.9.4 (2017-11-30):
12
+	- Deal with the very rare NULL simulations in summary.
13
+
1 14
 Changes in version 2.9.3 (2017-11-27):
2 15
 	- Make clang happy (do not use flandscape as DataFrame)
3 16
 	
... ...
@@ -57,6 +57,8 @@ sampledGenotypes(y, genes = NULL)
57 57
     probability of sampling a cell (a clone) is directly proportional to
58 58
     its population size.  "wholeTumor" (or "whole") for whole tumor
59 59
     sampling (i.e., this is similar to a biopsy being the entire tumor).
60
+    "singleCell-noWT" or "single-nowt" is single cell sampling, but
61
+    excluding the wild type.
60 62
   }
61 63
   
62 64
   \item{thresholdWhole}{
... ...
@@ -252,7 +252,13 @@ test_that("exercising the sampling code, v2 objects", {
252 252
               expect_message(samplePop(o4, timeSample = "last",
253 253
                                        typeSample = "whole"),
254 254
                              "Subjects by Genes matrix of 2 subjects and 10 genes")
255
-          })
255
+              expect_message(samplePop(o4, typeSample = "single-nowt",
256
+                                       timeSample = "last"),
257
+                            "Subjects by Genes matrix of 2 subjects and 10 genes")
258
+              expect_message(samplePop(o4, typeSample = "single-nowt",
259
+                                       timeSample = "uniform"),
260
+                             "Subjects by Genes matrix of 2 subjects and 10 genes")
261
+})
256 262
 
257 263
 test_that("exercising the sampling code, v2 objects, more", {
258 264
               cs <-  data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"),
... ...
@@ -300,7 +306,13 @@ test_that("exercising the sampling code, v2 objects, more", {
300 306
               expect_message(samplePop(o4, timeSample = "last",
301 307
                                        typeSample = "whole"),
302 308
                              "Subjects by Genes matrix of 4 subjects and 6 genes")
303
-          })
309
+              expect_message(samplePop(o4, typeSample = "single-nowt",
310
+                                       timeSample = "last"),
311
+                            "Subjects by Genes matrix of 4 subjects and 6 genes")
312
+              expect_message(samplePop(o4, typeSample = "single-nowt",
313
+                                       timeSample = "uniform"),
314
+                             "Subjects by Genes matrix of 4 subjects and 6 genes")
315
+})
304 316
 
305 317
 
306 318
 
... ...
@@ -382,7 +394,37 @@ test_that("exercising sampling code, customSize", {
382 394
     expect_message(samplePop(o4, typeSample = "single",
383 395
                              popSizeSample = c(6100, 0, 5000, 9000)),
384 396
                    "Subjects by Genes matrix of 4 subjects and 6 genes")
385
-   
397
+
398
+    expect_message(samplePop(o4, typeSample = "single-nowt",
399
+                             popSizeSample = c(9000, 9000, 8500, 9000)),
400
+                   "Subjects by Genes matrix of 4 subjects and 6 genes")
401
+
402
+    ## these are specific for the nowt code to exercise the dealing with
403
+    ## borderline cases and the second to deal with a clearly non-borderline
404
+    o41 <- oncoSimulPop(4,
405
+                        cbn1,
406
+                        initSize = 2e3,
407
+                       detectionSize = 1e3,
408
+                       onlyCancer = TRUE,
409
+                       mc.cores = 2,
410
+                       max.num.tries = 5000,
411
+                       sampleEvery = 0.03, keepEvery = 1)
412
+    expect_message(samplePop(o41, typeSample = "single-nowt",
413
+                             popSizeSample = c(900, 800, 850, 900)),
414
+                   "Subjects by Genes matrix of 4 subjects and 6 genes")
415
+    expect_warning(samplePop(o41, typeSample = "single-nowt",
416
+                             popSizeSample = c(900, 800, 850, 900)),
417
+                   "No non-WT clone with required popSize or at required time")
418
+    o91 <- oncoSimulPop(4,
419
+                       cbn1, 
420
+                       detectionSize = 1e5,
421
+                       onlyCancer = TRUE,
422
+                       mc.cores = 2,
423
+                       max.num.tries = 5000,
424
+                       sampleEvery = 0.03, keepEvery = 1)
425
+     expect_message(samplePop(o91, typeSample = "single-nowt",
426
+                             popSizeSample = c(9000, 9000, 8500, 9000)),
427
+                   "Subjects by Genes matrix of 4 subjects and 6 genes")
386 428
 })
387 429
 
388 430
 
... ...
@@ -1,15 +1,15 @@
1 1
 \usepackage[%
2
-		shash={8f168d4},
3
-		lhash={8f168d4bffbf847d6faff5e0485705fe32878aa0},
4
-		authname={Ramon Diaz-Uriarte (at Coleonyx)},
2
+		shash={7c279ec},
3
+		lhash={7c279ec4db10924e57799b5d15e9d89217b7c785},
4
+		authname={Ramon Diaz-Uriarte (at Draco)},
5 5
 		authemail={rdiaz02@gmail.com},
6
-		authsdate={2017-11-14},
7
-		authidate={2017-11-14 16:12:04 +0100},
8
-		authudate={1510672324},
9
-		commname={Ramon Diaz-Uriarte (at Coleonyx)},
6
+		authsdate={2017-12-01},
7
+		authidate={2017-12-01 14:30:15 +0100},
8
+		authudate={1512135015},
9
+		commname={Ramon Diaz-Uriarte (at Draco)},
10 10
 		commemail={rdiaz02@gmail.com},
11
-		commsdate={2017-11-14},
12
-		commidate={2017-11-14 16:12:04 +0100},
13
-		commudate={1510672324},
11
+		commsdate={2017-12-01},
12
+		commidate={2017-12-01 14:30:15 +0100},
13
+		commudate={1512135015},
14 14
 		refnames={ (HEAD -> master, origin/master, origin/HEAD)}
15 15
 	]{gitsetinfo}
16 16
\ No newline at end of file