Browse code

more tests; handling of corner cases; v.1.99.6

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/OncoSimulR@108861 bc3139a8-67e5-0310-9ffc-ced21a209358

Ramon Diaz-Uriarte authored on 26/09/2015 13:47:28
Showing 18 changed files

... ...
@@ -1,9 +1,9 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progresion with Epistasis 
4
-Version: 1.99.5
5
-Date: 2015-06-25
6
-Author: Ramon Diaz-Uriarte. Melissa O'Neill for randutils.
4
+Version: 1.99.6
5
+Date: 2015-09-26
6
+Author: Ramon Diaz-Uriarte. 
7 7
 Maintainer: Ramon Diaz-Uriarte <rdiaz02@gmail.com>
8 8
 Description: Functions for forward population genetic simulation in
9 9
     asexual populations, with special focus on cancer progression.
... ...
@@ -19,7 +19,8 @@ Description: Functions for forward population genetic simulation in
19 19
     phylogenetic relationships of the clones.
20 20
 biocViews: BiologicalQuestion, SomaticMutation
21 21
 License: GPL (>= 3)
22
-URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/, http://ligarto.org/rdiaz
22
+URL: https://github.com/rdiaz02/OncoSimul, https://popmodels.cancercontrol.cancer.gov/gsr/packages/oncosimulr/
23
+BugReports: https://github.com/rdiaz02/OncoSimul/issues
23 24
 Depends: R (>= 3.1.0)
24 25
 Imports: Rcpp (>= 0.11.1), parallel, data.table, graph, Rgraphviz, gtools, igraph
25 26
 Suggests: BiocStyle, knitr, Oncotree, testthat
... ...
@@ -34,6 +34,8 @@ oncoSimulSample <- function(Nindiv,
34 34
                                 } else {
35 35
                                     nd <- (2 : round(0.75 * max(fp)))
36 36
                                 }
37
+                                if(length(nd) == 1) ## for sample
38
+                                       nd <- c(nd, nd)
37 39
                                 sample(nd, Nindiv,
38 40
                                        replace = TRUE)
39 41
                             },
... ...
@@ -67,7 +69,7 @@ oncoSimulSample <- function(Nindiv,
67 69
     if(keepPhylog)
68 70
         warning(paste("oncoSimulSample does not return the phylogeny",
69 71
                       "for now, so there is little point in storing it."))
70
-    
72
+
71 73
     if(max.num.tries.total < Nindiv)
72 74
         stop(paste("You have requested something impossible: ",
73 75
                    "max.num.tries.total < Nindiv"))
... ...
@@ -84,13 +86,16 @@ oncoSimulSample <- function(Nindiv,
84 86
                          detectionSize = detectionSize,
85 87
                          detectionDrivers = detectionDrivers)[, -1, drop = FALSE]
86 88
 
89
+    ## FIXME: we are not triggering an error, just a message. This is on
90
+    ## purpose, since some of these conditions DO provide useful
91
+    ## output. Do we want these to be errors?
87 92
     f.out.attempts <- function() {
88 93
         message("Run out of attempts")
89 94
         return(list(
90 95
             popSummary = NA,
91 96
             popSample = NA,
92 97
             HittedMaxTries = TRUE,
93
-            hittedWallTime = FALSE,
98
+            HittedWallTime = FALSE,
94 99
             UnrecoverExcept = FALSE
95 100
         ))    
96 101
     }    
... ...
@@ -112,7 +117,7 @@ oncoSimulSample <- function(Nindiv,
112 117
             popSummary = NA,
113 118
             popSample = NA,
114 119
             HittedMaxTries = TRUE,
115
-            hittedWallTime = FALSE,
120
+            HittedWallTime = FALSE,
116 121
             UnrecoverExcept = FALSE
117 122
         ))    
118 123
     }    
... ...
@@ -197,7 +202,7 @@ oncoSimulSample <- function(Nindiv,
197 202
             return(f.out.time.cpp())
198 203
         } else if( indiv > Nindiv ) {
199 204
             if(verbosity > 0)
200
-                message(paste("Successfully sampled ", Nindiv, " individuals"))
205
+                message(paste0("Successfully sampled ", Nindiv, " individuals"))
201 206
             class(pop) <- "oncosimulpop"
202 207
             if(inherits(fp, "fitnessEffects")) {
203 208
                 geneNames <- names(getNamesID(fp))
... ...
@@ -215,6 +220,7 @@ oncoSimulSample <- function(Nindiv,
215 220
                 UnrecoverExcept = FALSE
216 221
             ))
217 222
         } else if( attemptsLeft <= 0 ) {
223
+              ## it is very unlikely this will ever happen. 
218 224
             return(f.out.attempts())
219 225
         } else  if( as.double(difftime(Sys.time(), startTime, units = "secs"))
220 226
                    > max.wall.time.total ) {
... ...
@@ -400,6 +406,8 @@ oncoSimulIndiv <- function(fp,
400 406
     if(mu < 0) {
401 407
         stop("mutation rate (mu) is negative")
402 408
     }
409
+    ## We do not test for equality to 0. That might be a weird but
410
+    ## legitimate case?
403 411
 
404 412
     if( (keepEvery > 0) & (keepEvery < sampleEvery)) {
405 413
         keepEvery <- sampleEvery
... ...
@@ -542,9 +550,12 @@ oncoSimulIndiv <- function(fp,
542 550
 }
543 551
 
544 552
 summary.oncosimul <- function(object, ...) {
545
-
546
-    if(object$HittedWallTime || object$HittedMaxTries ||
547
-       object$other$UnrecoverExcept)
553
+    
554
+    if(object$other$UnrecoverExcept) ## yes, when bailing out from
555
+                                     ## except. can have just minimal
556
+                                     ## content
557
+        return(NA)
558
+    else if (object$HittedWallTime || object$HittedMaxTries)
548 559
         return(NA)
549 560
     else {
550 561
         tmp <- object[c("NumClones", "TotalPopSize", "LargestClone",
... ...
@@ -588,8 +599,8 @@ summary.oncosimulpop <- function(object, ...) {
588 599
 }
589 600
 
590 601
 print.oncosimulpop <- function(x, ...) {
591
-    cat("\nPopulation of OncoSimul trajectories of ",
592
-        length(x), " individuals. Call :\n")
602
+    cat("\nPopulation of OncoSimul trajectories of",
603
+        length(x), "individuals. Call :\n")
593 604
     print(attributes(x)$call)
594 605
     cat("\n")
595 606
     print(summary(x))
... ...
@@ -737,9 +748,10 @@ plotPoset <- function(x, names = NULL, addroot = FALSE,
737 748
         box()
738 749
 }
739 750
 
740
-plotAdjMat <- function(adjmat) {
741
-    plot(as(adjmat, "graphNEL"))
742
-}
751
+## this function seems to never be used
752
+## plotAdjMat <- function(adjmat) {
753
+##     plot(as(adjmat, "graphNEL"))
754
+## }
743 755
 
744 756
 
745 757
 
... ...
@@ -827,38 +839,6 @@ plotClonePhylog <- function(x, N = 1, t = "last",
827 839
 
828 840
 
829 841
 
830
-## plotClonePhylog <- function(x, timeEvent = FALSE,
831
-##                             showEvents = TRUE,
832
-##                             fixOverlap = TRUE) {
833
-##     if(!inherits(x, "oncosimul2"))
834
-##         stop("Phylogenetic information is only stored with v >=2")
835
-##     if(nrow(x$other$PhylogDF) == 0)
836
-##         stop("It seems you run the simulation with keepPhylog= FALSE")
837
-##     ## requireNamespace("igraph")
838
-##     df <- x$other$PhylogDF
839
-##     if(!showEvents) {
840
-##         df <- df[!duplicated(df[, c(1, 2)]), ]
841
-##     }
842
-##     g <- igraph::graph.data.frame(df)
843
-##     l0 <- igraph::layout.reingold.tilford(g)
844
-##     if(!timeEvent) {
845
-##         plot(g, layout = l0)
846
-##     } else {
847
-##         l1 <- l0
848
-##         indexAppear <- match(V(g)$name, as.character(df[, 2]))
849
-##         firstAppear <- df$time[indexAppear]
850
-##         firstAppear[1] <- 0
851
-##         l1[, 2] <- (max(firstAppear) - firstAppear)
852
-##         if(fixOverlap) {
853
-##             dx <- which(duplicated(l1[, 1]))
854
-##             if(length(dx)) {
855
-##                 ra <- range(l1[, 1])
856
-##                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
857
-##             }
858
-##         }
859
-##         plot(g, layout = l1)         
860
-##     }
861
-## }
862 842
 
863 843
 
864 844
 
... ...
@@ -866,58 +846,73 @@ plotClonePhylog <- function(x, N = 1, t = "last",
866 846
 
867 847
 ############# The rest are internal functions
868 848
 
869
-get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
870
-    ## Obtain, from  results from a simulation run, the vector
871
-    ## of 0/1 corresponding to each gene.
872
-    
873
-    ## threshold is the min. proportion for a mutation to be detected
874
-    ## We are doing whole tumor sampling here, as in Sprouffske
875 849
 
876
-    ## timeSample: do we sample at end, or at a time point, chosen
877
-    ## randomly, from all those with at least one driver?
878
-    
879
-    
850
+get.the.time.for.sample <- function(tmp, timeSample) {
880 851
     if(timeSample == "last") {
881
-        return(as.numeric(
882
-            (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
883
-                        tmp$Genotypes)/tmp$TotalPopSize) > threshold))
852
+        if(tmp$TotalPopSize == 0) {
853
+            warning(paste("Final population size is 0.",
854
+                          "Thus, there is nothing to sample with",
855
+                          "sampling last. You will get NAs"))
856
+            the.time <- -99
857
+        } else {
858
+              the.time <- nrow(tmp$pops.by.time)
859
+          }
884 860
     } else if (timeSample %in% c("uniform", "unif")) {
885
-        the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
886
-        pop <- tmp$pops.by.time[the.time, -1]
887
-        popSize <- tmp$PerSampleStats[the.time, 1]
888
-        return( as.numeric((tcrossprod(pop,
889
-                                       tmp$Genotypes)/popSize) > threshold) )
890
-    }
861
+          candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
862
+          
863
+          if (length(candidate.time) == 0) {
864
+              warning(paste("There is not a single sampled time",
865
+                            "at which there are any mutants.",
866
+                            "Thus, no uniform sampling possible.",
867
+                            "You will get NAs"))
868
+              the.time <- -99
869
+              ## return(rep(NA, nrow(tmp$Genotypes)))
870
+          } else if (length(candidate.time) == 1) {
871
+                message("Only one sampled time period with mutants.")
872
+                the.time <- candidate.time
873
+            } else {
874
+                  the.time <- sample(candidate.time, 1)
875
+              }
876
+      } else {
877
+            stop("Unknown timeSample option")
878
+        }
879
+    return(the.time)
891 880
 }
892 881
 
893 882
 
894
-get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
895
-    ## No threshold, as single cell.
896
-
897
-    ## timeSample: do we sample at end, or at a time point, chosen
898
-    ## randomly, from all those with at least one driver?
883
+get.mut.vector <- function(x, timeSample, typeSample,
884
+                           thresholdWhole) {
885
+    the.time <- get.the.time.for.sample(x, timeSample)
886
+    if(the.time < 0) { 
887
+        return(rep(NA, nrow(x$Genotypes)))
888
+    } 
889
+    pop <- x$pops.by.time[the.time, -1]
899 890
     
900
-    if(timeSample == "last") {
901
-        the.time <- nrow(tmp$pops.by.time)
902
-    } else if (timeSample %in% c("uniform", "unif")) {
903
-        the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
891
+    if(all(pop == 0)) {
892
+        stop("You found a bug: this should never happen")
904 893
     }
905
-    pop <- tmp$pops.by.time[the.time, -1]
906
-    ##       popSize <- tmp$PerSampleStats[the.time, 1]
907
-    ## genot <- sample(seq_along(pop), 1, prob = pop)
908
-    return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
894
+    
895
+    if(typeSample %in% c("wholeTumor", "whole")) {
896
+        popSize <- x$PerSampleStats[the.time, 1]
897
+        return( as.numeric((tcrossprod(pop,
898
+                                       x$Genotypes)/popSize) > thresholdWhole) )
899
+    } else if (typeSample %in%  c("singleCell", "single")) {
900
+
901
+          return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
902
+      } else {
903
+            stop("Unknown typeSample option")
904
+        }
909 905
 }
910 906
 
911 907
 
912
-get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
913
-                           thresholdWhole = 0.5) {
914
-    if(typeSample %in% c("wholeTumor", "whole")) {
915
-        get.mut.vector.whole(x, timeSample = timeSample,
916
-                             threshold = thresholdWhole)
917
-    } else if(typeSample %in%  c("singleCell", "single")) {
918
-        get.mut.vector.singlecell(x, timeSample = timeSample)
919
-    }
920
-}
908
+
909
+
910
+
911
+
912
+
913
+
914
+
915
+
921 916
 
922 917
 
923 918
 oncoSimul.internal <- function(poset, ## restrict.table,
... ...
@@ -973,13 +968,13 @@ oncoSimul.internal <- function(poset, ## restrict.table,
973 968
     if(numGenes > 64)
974 969
         stop("Largest possible number of genes is 64")
975 970
 
976
-    
977
-    if(initSize_species < 10) {
978
-        warning("initSize_species too small?")
979
-    }
980
-    if(initSize_iter < 100) {
981
-        warning("initSize_iter too small?")
982
-    }
971
+    ## These can never be set by the user
972
+    ## if(initSize_species < 10) {
973
+    ##     warning("initSize_species too small?")
974
+    ## }
975
+    ## if(initSize_iter < 100) {
976
+    ##     warning("initSize_iter too small?")
977
+    ## }
983 978
 
984 979
     ## numDrivers <- nrow(restrict.table)
985 980
     if(length(unique(restrict.table[, 1])) != numDrivers)
... ...
@@ -1067,25 +1062,32 @@ oncoSimul.internal <- function(poset, ## restrict.table,
1067 1062
 
1068 1063
 }
1069 1064
 
1065
+eFinalMf <- function(initSize, s, j) {
1066
+    ## Expected final sizes for McF, when K is set to the default.
1067
+    # j is number of drivers
1068
+    ## as it says, with no passengers
1069
+    ## Set B(d) = D(N)
1070
+    K <- initSize/(exp(1) - 1)
1071
+    return(K * (exp( (1 + s)^j) - 1))
1072
+}
1070 1073
 
1071 1074
 
1072 1075
 
1073
-
1074
-
1075
-create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
1076
-    if(tmp$NumClones > 1) {
1077
-        NumMutations <- apply(tmp$Genotypes, 2, sum)
1078
-        muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
1079
-                              t(apply(tmp$pops.by.time[, -c(1),
1080
-                                                       drop = FALSE], 1,
1081
-                                      function(x) tapply(x,
1082
-                                                         NumMutations, sum))))
1083
-        colnames(muts.by.time)[c(1)] <- "Time"
1084
-    } else {
1085
-        muts.by.time <- tmp$pops.by.time
1086
-    }
1087
-    return(muts.by.time)
1088
-} 
1076
+## We are not using this anymore
1077
+## create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
1078
+##     if(tmp$NumClones > 1) {
1079
+##         NumMutations <- apply(tmp$Genotypes, 2, sum)
1080
+##         muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
1081
+##                               t(apply(tmp$pops.by.time[, -c(1),
1082
+##                                                        drop = FALSE], 1,
1083
+##                                       function(x) tapply(x,
1084
+##                                                          NumMutations, sum))))
1085
+##         colnames(muts.by.time)[c(1)] <- "Time"
1086
+##     } else {
1087
+##         muts.by.time <- tmp$pops.by.time
1088
+##     }
1089
+##     return(muts.by.time)
1090
+## } 
1089 1091
 
1090 1092
 
1091 1093
 create.drivers.by.time <- function(tmp, ndr) {
... ...
@@ -1203,8 +1205,8 @@ plotClones <- function(z, ndr = NULL, na.subs = TRUE,
1203 1205
 
1204 1206
 plotDrivers0 <- function(x,
1205 1207
                          ndr,
1206
-                         timescale = 4,
1207
-                         trim.no.drivers = TRUE,
1208
+                         timescale = 1,
1209
+                         trim.no.drivers = FALSE,
1208 1210
                          addtot = TRUE,
1209 1211
                          addtotlwd = 2,
1210 1212
                          na.subs = TRUE, log = "y", type = "l",
... ...
@@ -1213,6 +1215,8 @@ plotDrivers0 <- function(x,
1213 1215
                          ...) {
1214 1216
     ## z <- create.drivers.by.time(x, numDrivers)
1215 1217
     z <- create.drivers.by.time(x, ndr)
1218
+    ## we can now never enter here because trim.no.drivers is always FALSE
1219
+    ## in call.
1216 1220
     if(trim.no.drivers && x$MaxDriversLast) {
1217 1221
         fi <- which(apply(z[, -c(1, 2), drop = FALSE], 1,
1218 1222
                           function(x) sum(x) > 0))[1]
... ...
@@ -1222,11 +1226,16 @@ plotDrivers0 <- function(x,
1222 1226
     if(na.subs){
1223 1227
         y[y == 0] <- NA
1224 1228
     }
1225
-    if(timescale != 1) {
1226
-        time <- timescale * z[, 1]
1227
-    } else {
1228
-        time <- z[, 1]
1229
-    }
1229
+
1230
+    ## Likewise, we can never enter here now as timescale fixed at 1. And
1231
+    ## this is silly.
1232
+    ## if(timescale != 1) {
1233
+    ##     time <- timescale * z[, 1]
1234
+    ## } else {
1235
+    ##     time <- z[, 1]
1236
+    ## }
1237
+    time <- timescale * z[, 1]
1238
+    
1230 1239
     if(nrow(y) <= 2) type <- "b"
1231 1240
     matplot(x = time,
1232 1241
             y = y,
... ...
@@ -1243,61 +1252,193 @@ plotDrivers0 <- function(x,
1243 1252
 }
1244 1253
 
1245 1254
 
1246
-rtNoDep <- function(numdrivers) {
1247
-    ## create a restriction table with no dependencies
1248
-    x <- matrix(nrow = numdrivers, ncol = 3)
1249
-    x[, 1] <- 1:numdrivers
1250
-    x[, 2] <- 0
1251
-    x[, 3] <- -9
1252
-    return(x)
1253
-}
1254 1255
 
1255 1256
 
1256
-## simulate from generative model. This might not be fully correct!!!
1257
+## No longer used
1258
+## rtNoDep <- function(numdrivers) {
1259
+##     ## create a restriction table with no dependencies
1260
+##     x <- matrix(nrow = numdrivers, ncol = 3)
1261
+##     x[, 1] <- 1:numdrivers
1262
+##     x[, 2] <- 0
1263
+##     x[, 3] <- -9
1264
+##     return(x)
1265
+## }
1257 1266
 
1258
-simposet <- function(poset, p) {
1259
-    ## if (length(parent.nodes) != length (child.nodes)){
1260
-    ##     print("An Error Occurred")
1261
-    ## }
1262
-    ##    else {
1263
-    num.genes <- max(poset) - 1 ## as root is not a gene
1264
-    genotype <-t(c(1, rep(NA, num.genes)))
1265
-    colnames(genotype) <- as.character(0:num.genes)
1267
+
1268
+## Simulate from generative model. This is a quick function, and is most
1269
+## likely wrong! Never used for anything.
1270
+
1271
+## simposet <- function(poset, p) {
1272
+##     ## if (length(parent.nodes) != length (child.nodes)){
1273
+##     ##     print("An Error Occurred")
1274
+##     ## }
1275
+##     ##    else {
1276
+##     num.genes <- max(poset) - 1 ## as root is not a gene
1277
+##     genotype <-t(c(1, rep(NA, num.genes)))
1278
+##     colnames(genotype) <- as.character(0:num.genes)
1266 1279
     
1267 1280
     
1268
-    poset$runif <- runif(nrow(poset))
1269
-    ## this.relation.prob.OK could be done outside, but having it inside
1270
-    ## the loop would allow to use different thresholds for different
1271
-    ## relationships
1272
-    for (i in (1:nrow(poset))) {
1273
-        child <- poset[i, 2]
1274
-        this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
1275
-        the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. 
1276
-        if (is.na(genotype[child])){
1277
-            genotype[child] <- this.relation.prob.OK * the.parent  
1278
-        }
1279
-        else
1280
-            genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
1281
-    }
1282
-    ##    }
1281
+##     poset$runif <- runif(nrow(poset))
1282
+##     ## this.relation.prob.OK could be done outside, but having it inside
1283
+##     ## the loop would allow to use different thresholds for different
1284
+##     ## relationships
1285
+##     for (i in (1:nrow(poset))) {
1286
+##         child <- poset[i, 2]
1287
+##         this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
1288
+##         the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. 
1289
+##         if (is.na(genotype[child])){
1290
+##             genotype[child] <- this.relation.prob.OK * the.parent  
1291
+##         }
1292
+##         else
1293
+##             genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
1294
+##     }
1295
+##     ##    }
1283 1296
     
1284
-    return(genotype)
1285
-}
1286
-
1287
-
1288
-
1289
-
1290
-eFinalMf <- function(initSize, s, j) {
1291
-    ## Expected final sizes for McF, when K is set to the default.
1292
-    # j is number of drivers
1293
-    ## as it says, with no passengers
1294
-    ## Set B(d) = D(N)
1295
-    K <- initSize/(exp(1) - 1)
1296
-    return(K * (exp( (1 + s)^j) - 1))
1297
-}
1297
+##     return(genotype)
1298
+## }
1298 1299
 
1299 1300
 
1300 1301
 ## to plot and adjacency matrix in this context can do
1301 1302
 ## plotPoset(intAdjMatToPoset(adjMat))
1302 1303
 ## where intAdjMatToPoset is from best oncotree code: generate-random-trees.
1303 1304
 ## No! the above is simpler
1305
+
1306
+
1307
+
1308
+
1309
+## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
1310
+##     ## Obtain, from  results from a simulation run, the vector
1311
+##     ## of 0/1 corresponding to each gene.
1312
+    
1313
+##     ## threshold is the min. proportion for a mutation to be detected
1314
+##     ## We are doing whole tumor sampling here, as in Sprouffske
1315
+
1316
+##     ## timeSample: do we sample at end, or at a time point, chosen
1317
+##     ## randomly, from all those with at least one driver?
1318
+    
1319
+##     if(timeSample == "last") {
1320
+##         if(tmp$TotalPopSize == 0)
1321
+##             warning(paste("Final population size is 0.",
1322
+##                           "Thus, there is nothing to sample with ",
1323
+##                           "sampling last. You will get NAs"))
1324
+##         return(as.numeric(
1325
+##             (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
1326
+##                         tmp$Genotypes)/tmp$TotalPopSize) > threshold))
1327
+##     } else if (timeSample %in% c("uniform", "unif")) {
1328
+##           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
1329
+          
1330
+##           if (length(candidate.time) == 0) {
1331
+##               warning(paste("There is not a single sampled time",
1332
+##                             "at which there are any mutants.",
1333
+##                             "Thus, no uniform sampling possible.",
1334
+##                             "You will get NAs"))
1335
+##               return(rep(NA, nrow(tmp$Genotypes)))
1336
+##           } else if (length(candidate.time) == 1) {
1337
+##                 the.time <- candidate.time
1338
+##             } else {
1339
+##                   the.time <- sample(candidate.time, 1)
1340
+##               }
1341
+##           pop <- tmp$pops.by.time[the.time, -1]
1342
+##           popSize <- tmp$PerSampleStats[the.time, 1]
1343
+##           ## if(popSize == 0)
1344
+##           ##     warning(paste("Population size at this time is 0.",
1345
+##           ##                   "Thus, there is nothing to sample at this time point.",
1346
+##           ##                   "You will get NAs"))
1347
+##           return( as.numeric((tcrossprod(pop,
1348
+##                                        tmp$Genotypes)/popSize) > threshold) )
1349
+##       }
1350
+## }
1351
+
1352
+
1353
+
1354
+##           the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
1355
+##           if(length(the.time) == 0) {
1356
+##               warning(paste("There are no clones with drivers at any time point.",
1357
+##                             "No uniform sampling possible.",
1358
+##                             "You will get a vector of NAs."))
1359
+##             return(rep(NA, nrow(tmp$Genotypes)))  
1360
+##           }
1361
+## get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
1362
+##     ## No threshold, as single cell.
1363
+
1364
+##     ## timeSample: do we sample at end, or at a time point, chosen
1365
+##     ## randomly, from all those with at least one driver?
1366
+    
1367
+##     if(timeSample == "last") {
1368
+##         the.time <- nrow(tmp$pops.by.time)
1369
+##     } else if (timeSample %in% c("uniform", "unif")) {
1370
+##          candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
1371
+         
1372
+##          if (length(candidate.time) == 0) {
1373
+##              warning(paste("There is not a single sampled time",
1374
+##                            "at which there are any mutants.",
1375
+##                            "Thus, no uniform sampling possible.",
1376
+##                            "You will get NAs"))
1377
+##              return(rep(NA, nrow(tmp$Genotypes)))
1378
+##          } else if (length(candidate.time) == 1) {
1379
+##                the.time <- candidate.time
1380
+##            } else {
1381
+##                  the.time <- sample(candidate.time, 1)
1382
+##              }
1383
+
1384
+##      }
1385
+##     pop <- tmp$pops.by.time[the.time, -1]
1386
+##     ##       popSize <- tmp$PerSampleStats[the.time, 1]
1387
+##     ## genot <- sample(seq_along(pop), 1, prob = pop)
1388
+##     if(all(pop == 0)) {
1389
+##         warning(paste("All clones have a population size of 0",
1390
+##                       "at the chosen time. Nothing to sample.",
1391
+##                       "You will get NAs"))
1392
+##         return(rep(NA, nrow(tmp$Genotypes)))
1393
+##     } else {
1394
+##           return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1395
+##       }
1396
+## }
1397
+
1398
+
1399
+## get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
1400
+##                            thresholdWhole = 0.5) {
1401
+##     if(typeSample %in% c("wholeTumor", "whole")) {
1402
+##         get.mut.vector.whole(x, timeSample = timeSample,
1403
+##                              threshold = thresholdWhole)
1404
+##     } else if(typeSample %in%  c("singleCell", "single")) {
1405
+##         get.mut.vector.singlecell(x, timeSample = timeSample)
1406
+##     }
1407
+## }
1408
+
1409
+
1410
+
1411
+
1412
+
1413
+## plotClonePhylog <- function(x, timeEvent = FALSE,
1414
+##                             showEvents = TRUE,
1415
+##                             fixOverlap = TRUE) {
1416
+##     if(!inherits(x, "oncosimul2"))
1417
+##         stop("Phylogenetic information is only stored with v >=2")
1418
+##     if(nrow(x$other$PhylogDF) == 0)
1419
+##         stop("It seems you run the simulation with keepPhylog= FALSE")
1420
+##     ## requireNamespace("igraph")
1421
+##     df <- x$other$PhylogDF
1422
+##     if(!showEvents) {
1423
+##         df <- df[!duplicated(df[, c(1, 2)]), ]
1424
+##     }
1425
+##     g <- igraph::graph.data.frame(df)
1426
+##     l0 <- igraph::layout.reingold.tilford(g)
1427
+##     if(!timeEvent) {
1428
+##         plot(g, layout = l0)
1429
+##     } else {
1430
+##         l1 <- l0
1431
+##         indexAppear <- match(V(g)$name, as.character(df[, 2]))
1432
+##         firstAppear <- df$time[indexAppear]
1433
+##         firstAppear[1] <- 0
1434
+##         l1[, 2] <- (max(firstAppear) - firstAppear)
1435
+##         if(fixOverlap) {
1436
+##             dx <- which(duplicated(l1[, 1]))
1437
+##             if(length(dx)) {
1438
+##                 ra <- range(l1[, 1])
1439
+##                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
1440
+##             }
1441
+##         }
1442
+##         plot(g, layout = l1)         
1443
+##     }
1444
+## }
... ...
@@ -252,22 +252,24 @@ OTtoPoset <- function(x) {
252 252
 ##     return(am[cn, cn])
253 253
 ## }
254 254
 
255
-sortAdjMat <- function(am) {
256
-    ## If column names, except Root, are integers, sort as integers. O.w.,
257
-    ## general lexicog. sort.
258
-    cn <- colnames(am)
259
-    rootpos <- grep("^Root$", cn) 
260
-    if(length(rootpos) != 1)
261
-        stop("No root in adj mat, or multiple Roots")
262
-    cn0 <- colnames(am)[-rootpos]
263
-    namesInts <- type.convert(cn0, as.is = TRUE)
264
-    if(is.integer(namesInts)) {
265
-        cn <- c("Root", sort(namesInts))
266
-    } else {
267
-        cn <- c("Root", sort(cn0))
268
-    }
269
-    return(am[cn, cn])
270
-}
255
+
256
+## No longer used
257
+## sortAdjMat <- function(am) {
258
+##     ## If column names, except Root, are integers, sort as integers. O.w.,
259
+##     ## general lexicog. sort.
260
+##     cn <- colnames(am)
261
+##     rootpos <- grep("^Root$", cn) 
262
+##     if(length(rootpos) != 1)
263
+##         stop("No root in adj mat, or multiple Roots")
264
+##     cn0 <- colnames(am)[-rootpos]
265
+##     namesInts <- type.convert(cn0, as.is = TRUE)
266
+##     if(is.integer(namesInts)) {
267
+##         cn <- c("Root", sort(namesInts))
268
+##     } else {
269
+##         cn <- c("Root", sort(cn0))
270
+##     }
271
+##     return(am[cn, cn])
272
+## }
271 273
 
272 274
 
273 275
 
... ...
@@ -98,6 +98,7 @@ gm.to.geneModuleL <- function(gm, one.to.one) {
98 98
     if(length(unique(geneMod$Gene)) != nrow(geneMod)) {
99 99
         stop("Are there identical gene names in different modules?")
100 100
     }
101
+    ## I think this is unreacheable now. Caught earlier.
101 102
     if(length(unique(geneMod$GeneNumID)) != nrow(geneMod)) {
102 103
         stop("Are there identical gene names in different modules?")
103 104
     }
... ...
@@ -193,7 +194,8 @@ to.long.rt <- function(rt, idm) {
193 194
         z$childNumID <- idm[z$child]
194 195
         z$parentsNumID <- idm[z$parents]
195 196
         if( any(is.na(z$parentsNumID)) ||
196
-           any(is.na(z$childNumID)) ) {
197
+            any(is.na(z$childNumID)) ) {
198
+            ## I think this can no longer be reached ever. Caught before.
197 199
             stop(paste("An ID is NA:",
198 200
                        "Is a gene part of two different modules?",
199 201
                        "(That includes being by itself and part",
... ...
@@ -531,46 +533,46 @@ allFitnessEffects <- function(rT = NULL,
531 533
 }
532 534
 
533 535
 
534
-
535
-rtAndGeneModule <- function(mdeps, gM = NULL) {
536
-    ## To show a table of restrictions when there are modules. Do not use
537
-    ## for anything else. Maybe as intermediate to plotting.
536
+## No longer used
537
+## rtAndGeneModule <- function(mdeps, gM = NULL) {
538
+##     ## To show a table of restrictions when there are modules. Do not use
539
+##     ## for anything else. Maybe as intermediate to plotting.
538 540
     
539
-    ## Specify restriction table of modules and a mapping of modules to
540
-    ## genes. gM is a named vector; names are modules, values are elements
541
-    ## of each module.
542
-
543
-    ## We do nothing important if gM is NULL except checks
544
-
545
-    ## If there are modules, the table shows the individual genes.
546
-    checkRT(mdeps)
547
-    ## if(ncol(mdeps) != 5)
548
-    ##     stop("mdeps must be of exactly 5 columns")
549
-    ## if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep")))
550
-    ##     stop(paste("Column names of mdeps not of appropriate format. ",
551
-    ##                "Should be parent, child, s, sh, typeDep"))
552
-    if(!is.null(gM)) {
553
-        if(any(is.na(match(mdeps[ , 1], names(gM)))))
554
-            stop("Some values in parent not from a known module")
555
-        if(any(is.na(match(mdeps[ , 2], names(gM)))))
556
-            stop("Some values in child not from a known module")
557
-        if(any(is.na(match(names(gM), c(mdeps[, 1], mdeps[, 2])))))
558
-            stop("Some values in module in neither parent or child")
541
+##     ## Specify restriction table of modules and a mapping of modules to
542
+##     ## genes. gM is a named vector; names are modules, values are elements
543
+##     ## of each module.
544
+
545
+##     ## We do nothing important if gM is NULL except checks
546
+
547
+##     ## If there are modules, the table shows the individual genes.
548
+##     checkRT(mdeps)
549
+##     ## if(ncol(mdeps) != 5)
550
+##     ##     stop("mdeps must be of exactly 5 columns")
551
+##     ## if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep")))
552
+##     ##     stop(paste("Column names of mdeps not of appropriate format. ",
553
+##     ##                "Should be parent, child, s, sh, typeDep"))
554
+##     if(!is.null(gM)) {
555
+##         if(any(is.na(match(mdeps[ , 1], names(gM)))))
556
+##             stop("Some values in parent not from a known module")
557
+##         if(any(is.na(match(mdeps[ , 2], names(gM)))))
558
+##             stop("Some values in child not from a known module")
559
+##         if(any(is.na(match(names(gM), c(mdeps[, 1], mdeps[, 2])))))
560
+##             stop("Some values in module in neither parent or child")
559 561
         
560
-        parent <- gM[mdeps[, 1]]
561
-        child <- gM[mdeps[, 2]]
562
-        df <- data.frame(parent = parent,
563
-                         child = child,
564
-                         s = mdeps$s,
565
-                         sh = mdeps$sh,
566
-                         typeDep = mdeps$typeDep,
567
-                         stringsAsFactors = FALSE)
568
-    } else {
569
-        df <- mdeps
570
-    }
571
-    rownames(df) <- seq.int(nrow(df))
572
-    return(df)
573
-}
562
+##         parent <- gM[mdeps[, 1]]
563
+##         child <- gM[mdeps[, 2]]
564
+##         df <- data.frame(parent = parent,
565
+##                          child = child,
566
+##                          s = mdeps$s,
567
+##                          sh = mdeps$sh,
568
+##                          typeDep = mdeps$typeDep,
569
+##                          stringsAsFactors = FALSE)
570
+##     } else {
571
+##         df <- mdeps
572
+##     }
573
+##     rownames(df) <- seq.int(nrow(df))
574
+##     return(df)
575
+## }
574 576
 
575 577
 ## wrap.test.rt <- function(rt, gM = NULL) {
576 578
 ##     ## FIXME add epistasis and orderEffects
... ...
@@ -579,18 +581,18 @@ rtAndGeneModule <- function(mdeps, gM = NULL) {
579 581
 ##     wrap_test_rt(lrt$long.rt, lrt$geneModule)
580 582
 ## }
581 583
 
582
-
583
-wrap.readFitnessEffects <- function(rt, epi, oe, ni, gm, echo = TRUE) {
584
-    tt <- allFitnessEffects(rt, epi, oe, ni, gm)
585
-    readFitnessEffects(tt, echo = echo)
586
-    ## readFitnessEffects(tt$long.rt,
587
-    ##                    tt$long.epistasis,
588
-    ##                    tt$long.orderEffects,
589
-    ##                    tt$long.geneNoInt,
590
-    ##                    tt$geneModule,
591
-    ##                    tt$gMOneToOne,
592
-    ##                    echo = TRUE)
593
-}
584
+## No longer used
585
+## wrap.readFitnessEffects <- function(rt, epi, oe, ni, gm, echo = TRUE) {
586
+##     tt <- allFitnessEffects(rt, epi, oe, ni, gm)
587
+##     readFitnessEffects(tt, echo = echo)
588
+##     ## readFitnessEffects(tt$long.rt,
589
+##     ##                    tt$long.epistasis,
590
+##     ##                    tt$long.orderEffects,
591
+##     ##                    tt$long.geneNoInt,
592
+##     ##                    tt$geneModule,
593
+##     ##                    tt$gMOneToOne,
594
+##     ##                    echo = TRUE)
595
+## }
594 596
 
595 597
 evalGenotype <- function(genotype, fitnessEffects,
596 598
                          verbose = FALSE,
... ...
@@ -659,8 +661,8 @@ evalAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256,
659 661
     nn <- nrow(fitnessEffects$geneModule) -1  + nrow(fitnessEffects$long.geneNoInt)
660 662
     tnn <- tot(nn)
661 663
     if(tnn > max) {
662
-        m <- paste("There are ", tnn, "genotypes.")
663
-        m <- paste(m, " This is larger than max.")
664
+        m <- paste("There are", tnn, "genotypes.")
665
+        m <- paste(m, "This is larger than max.")
664 666
         m <- paste(m, "Adjust max and rerun if you want")
665 667
         stop(m)
666 668
     }
... ...
@@ -859,16 +861,22 @@ nr_oncoSimul.internal <- function(rFE,
859 861
     } else {
860 862
         initMutant <- vector(mode = "integer")
861 863
     }
862
-    if(initSize_species < 10) {
863
-        warning("initSize_species too small?")
864
-    }
865
-    if(initSize_iter < 100) {
866
-        warning("initSize_iter too small?")
867
-    }
864
+    ## these are never user options
865
+    ## if(initSize_species < 10) {
866
+    ##     warning("initSize_species too small?")
867
+    ## }
868
+    ## if(initSize_iter < 100) {
869
+    ##     warning("initSize_iter too small?")
870
+    ## }
868 871
 
869 872
     if(typeFitness %in% c("bozic1", "bozic2")) {
870 873
         thesh <- unlist(lapply(rFE$long.rt, function(x) x$sh))
871
-        thes <- unlist(lapply(rFE$long.rt, function(x) x$s))
874
+        ## thes <- unlist(lapply(rFE$long.rt, function(x) x$s))
875
+        thes <- unlist(c(lapply(rFE$long.rt, function(x) x$s),
876
+                         lapply(rFE$long.epistasis, function(x) x$s),
877
+                         lapply(rFE$long.orderEffects, function(x) x$s),
878
+                         rFE$long.geneNoInt$s))
879
+        
872 880
         if(any(thes > 1 )) {
873 881
             m <- paste("You are using a Bozic model with",
874 882
                        "the new restriction specification, and you have",
... ...
@@ -876,13 +884,16 @@ nr_oncoSimul.internal <- function(rFE,
876 884
                        "But that is the same as setting that to 1:",
877 885
                        "we obviously cannot allow negative death rates,",
878 886
                        "nor problems derived from multiplying odd or even",
879
-                       "numbers of negative numbers.")
887
+                       "numbers of negative numbers.",
888
+                       "You can set those > 1 to 1, but then you will",
889
+                       "eventually get the simulations to abort when the",
890
+                       "death rate becomes 0.")
880 891
             stop(m)
881 892
         }
882
-        if(any(thesh == -1)) {
893
+        if(any( (thesh <= -1) & (thesh > -Inf))) {
883 894
             m <- paste("You are using a Bozic model with",
884 895
                        "the new restriction specification, and you have",
885
-                       "at least one sh of -1. Maybe you mean -Inf?")
896
+                       "at least one sh <= -1. Maybe you mean -Inf?")
886 897
             warning(m)
887 898
         }
888 899
         if(any(thes == 1 )) {
889 900
Binary files a/data/examplesFitnessEffects.RData and b/data/examplesFitnessEffects.RData differ
... ...
@@ -1,3 +1,8 @@
1
+Changes in version 1.99.6 (2015-09-26):
2
+	- Improved test coverage and removed stringsAsfactors from tests
3
+	- Consistent handling of corner cases in Bozic
4
+	- Miscell minor documentation improvements
5
+
1 6
 Changes in version 1.99.5 (2015-06-25):
2 7
 	- Fixed bug in initMutant, added tests, and vignette section.
3 8
 
... ...
@@ -80,14 +80,16 @@ allFitnessEffects(rT = NULL, epistasis = NULL, orderEffects = NULL,
80 80
   modules to contain more than one gene. If you specify a geneToModule
81 81
   argument, it must necessarily contain "Root".  
82 82
 }
83
+
83 84
 \item{drvNames}{The names of genes that are considered drivers. This is
84 85
   only used for: a) deciding when to stop the simulations, in case you
85 86
   use number of drivers as a simulation stopping criterion (see
86 87
   \code{\link{oncoSimulIndiv}}); b) for summarization purposes (e.g.,
87 88
   how many drivers are mutated); c) in figures. But you need not
88 89
   specifiy anything if you do not want to, and you can pass an empty
89
-  vector. The default is to assume that all genes that are not in the
90
-  \code{noIntGenes} are drivers.}
90
+  vector (as \code{character(0)}). The default is to assume that all
91
+  genes that are not in the \code{noIntGenes} are drivers.}
92
+
91 93
 \item{keepInput}{
92 94
   If TRUE, whether to keep the original input. This is only useful for
93 95
   human consumption of the output. It is useful because it is easier to
... ...
@@ -72,6 +72,8 @@ oncoSimulSample(Nindiv,
72 72
                                 } else {
73 73
                                     nd <- (2 : round(0.75 * max(fp)))
74 74
                                 }
75
+                                if (length(nd) == 1) 
76
+                                    nd <- c(nd, nd)
75 77
                                 sample(nd, Nindiv,
76 78
                                        replace = TRUE)
77 79
                             },
... ...
@@ -64,7 +64,23 @@ samplePop(x, timeSample = "last", typeSample = "whole",
64 64
 
65 65
   Please see \code{\link{oncoSimulSample}} for a much more efficient way
66 66
   of sampling when you are sure what you want to sample.
67
-}
67
+
68
+  Note that if you have set \code{onlyCancer = FALSE} in the call to
69
+  \code{\link{oncoSimulSample}}, you can end up trying to sample from
70
+  simulations where the population size is 0. In this case, you will get
71
+  a vector/matrix of NAs and a warning.
72
+
73
+  Similarly, when using \code{timeSample = "last"} you might end up with
74
+  a vector of 0 (not NAs) because you are sampling from a population
75
+  that contains no clones with mutated genes. This event (sampling from
76
+  a population that contains no clones with mutated genes), by
77
+  construction, cannot happen when \code{timeSample = "unif"} as
78
+  "uniform" sampling is taken here to mean sampling at a time choosen
79
+  uniformly from all the times recorded in the simulation between the
80
+  time when the first driver appeared and the final time
81
+  period. However, you might still get a vector of 0, with uniform
82
+  sampling, if you sample from a population that contains only a few
83
+  cells with any mutated genes, and most cells with no mutated genes.}
68 84
 
69 85
 \value{
70 86
 
... ...
@@ -85,9 +101,11 @@ samplePop(x, timeSample = "last", typeSample = "whole",
85 101
 }
86 102
 
87 103
 \references{
88
-  Diaz-Uriarte, R. (2014). Inferring restrictions in the temporal order
89
-  of mutations during tumor progression: effects of passenger mutations,
90
-  evolutionary models, and sampling. \url{http://dx.doi.org/10.1101/005587}
104
+  Diaz-Uriarte, R. (2015). Identifying restrictions in the order of
105
+  accumulation of mutations during tumor progression: effects of
106
+  passengers, evolutionary models, and sampling
107
+  \url{http://www.biomedcentral.com/1471-2105/16/41/abstract}
108
+
91 109
 }
92 110
 
93 111
 \author{
... ...
@@ -8,14 +8,15 @@ test_that("Bauer example: correct number of fitness classes", {
8 8
                         child = c("p", paste0("s", 1:5)),
9 9
                         s = c(sd, rep(sdp, 5)),
10 10
                         sh = c(0, rep(sp, 5)),
11
-                        typeDep = "MN",
12
-                        stringsAsFactors = FALSE)
11
+                        typeDep = "MN")
13 12
     b1 <- evalAllGenotypes(allFitnessEffects(bauer), order = FALSE)
14 13
     b2 <- evalAllGenotypes(allFitnessEffects(bauer), order = TRUE, max = 2000)
15 14
     expect_equal(length(unique(b1$Fitness)), 11)
16 15
     expect_equal(length(unique(b2$Fitness)), 11)
17 16
 } )
18 17
 
18
+
19
+
19 20
 test_that("Bauer example: identical values fitness classes, unorder and ord", {
20 21
     sd <- 0.1
21 22
     sdp <- 0.15
... ...
@@ -24,8 +25,7 @@ test_that("Bauer example: identical values fitness classes, unorder and ord", {
24 25
                         child = c("p", paste0("s", 1:5)),
25 26
                         s = c(sd, rep(sdp, 5)),
26 27
                         sh = c(0, rep(sp, 5)),
27
-                        typeDep = "MN",
28
-                        stringsAsFactors = FALSE)
28
+                        typeDep = "MN")
29 29
     b1 <- evalAllGenotypes(allFitnessEffects(bauer), order = FALSE)
30 30
     b2 <- evalAllGenotypes(allFitnessEffects(bauer), order = TRUE, max = 2000)
31 31
     expect_equal(unique(b1$Fitness), unique(b2$Fitness))
... ...
@@ -40,15 +40,13 @@ test_that("Bauer example: identical values fitness classes, rename", {
40 40
                         child = c("p", paste0("s", 1:5)),
41 41
                         s = c(sd, rep(sdp, 5)),
42 42
                         sh = c(0, rep(sp, 5)),
43
-                        typeDep = "MN",
44
-                        stringsAsFactors = FALSE)
43
+                        typeDep = "MN")
45 44
     b1 <- evalAllGenotypes(allFitnessEffects(bauer), order = FALSE)
46 45
     bauer3 <- data.frame(parent = c("Root", rep("u", 5)),
47 46
                          child = c("u", paste0("s", 1:5)),
48 47
                          s = c(sd, rep(sdp, 5)),
49 48
                          sh = c(0, rep(sp, 5)),
50
-                         typeDep = "MN",
51
-                         stringsAsFactors = FALSE)
49
+                         typeDep = "MN")
52 50
     b3 <- evalAllGenotypes(allFitnessEffects(bauer), order = TRUE, max = 2000)
53 51
     expect_equal(unique(b1$Fitness), unique(b3$Fitness))
54 52
 } )
... ...
@@ -62,15 +60,13 @@ test_that("Bauer example: identical values fitness classes, diff. order", {
62 60
                         child = c("p", paste0("s", 1:5)),
63 61
                         s = c(sd, rep(sdp, 5)),
64 62
                         sh = c(0, rep(sp, 5)),
65
-                        typeDep = "MN",
66
-                        stringsAsFactors = FALSE)
63
+                        typeDep = "MN")
67 64
     b1 <- evalAllGenotypes(allFitnessEffects(bauer), order = FALSE)
68 65
     bauer3 <- data.frame(parent = c(rep("u", 5), "Root"),
69 66
                          child = c(paste0("s", 1:5), "u"),
70 67
                          s = c(sd, rep(sdp, 5)),
71 68
                          sh = c(0, rep(sp, 5)),
72
-                         typeDep = "MN",
73
-                         stringsAsFactors = FALSE)
69
+                         typeDep = "MN")
74 70
     b3 <- evalAllGenotypes(allFitnessEffects(bauer), order = TRUE, max = 2000)
75 71
     expect_equal(unique(b1$Fitness), unique(b3$Fitness))
76 72
 } )
... ...
@@ -513,8 +509,7 @@ test_that("Error if not same sh within child", {
513 509
                      child = c("a", "b", "d", "e", "c", "c", rep("g", 3)),
514 510
                      s = c(0.01, 0.02, 0.03, 0.04, 0.1, 0.1, rep(0.2, 3)),
515 511
                      sh = c(rep(0, 4), c(-.1, -.2), c(-.05, -.06, -.07)),
516
-                     typeDep = "MN",
517
-                     stringsAsFactors = FALSE)
512
+                     typeDep = "MN")
518 513
     expect_error(allFitnessEffects(c1))
519 514
 })
520 515
 
... ...
@@ -862,21 +857,248 @@ test_that("long example OK", {
862 857
 
863 858
 
864 859
 
865
-## how is table geneModule with no ints? are they there?
866 860
 
867
-## check it breaks if same ID
868 861
 
869
-## check breaks if in restriction and no interactions
862
+test_that("Bauer example: exercising drvNames", {
863
+    sd <- 0.1
864
+    sdp <- 0.15
865
+    sp <- 0.05
866
+    bauer <- data.frame(parent = c("Root", rep("p", 5)),
867
+                        child = c("p", paste0("s", 1:5)),
868
+                        s = c(sd, rep(sdp, 5)),
869
+                        sh = c(0, rep(sp, 5)),
870
+                        typeDep = "MN")
871
+    b1 <- evalAllGenotypes(allFitnessEffects(bauer, drvNames = c("s1", "s5")),
872
+                           order = FALSE)
873
+    b2 <- evalAllGenotypes(allFitnessEffects(bauer,
874
+                                             drvNames = c("s2", "s3", "s4")),
875
+                           order = TRUE, max = 2000)
876
+    expect_equal(length(unique(b1$Fitness)), 11)
877
+    expect_equal(length(unique(b2$Fitness)), 11)
878
+} )
879
+
880
+
881
+test_that("non distinct gene names per module caught", {
882
+    expect_error(ofe1 <-
883
+                     allFitnessEffects(
884
+                         orderEffects = c("F > D" = -0.3, "D > F" = 0.4),
885
+                         geneToModule =
886
+                             c("Root" = "Root",
887
+                               "F" = "f1, f2",
888
+                               "D" = "d1, d2, f1") ),
889
+                 "Are there identical gene names in different modules?")
890
+    s <- 0.2
891
+    expect_error(
892
+        m1 <- allFitnessEffects(data.frame(
893
+            parent = c("Root", "A"),
894
+            child = c("A", "B"),
895
+            s = s,
896
+            sh = -1,
897
+            typeDep = "OR"),
898
+            geneToModule = c("Root" = "Root",
899
+                             "A" = "a1, b1, a2",
900
+                             "B" = "b1")),
901
+        "Are there identical gene names in different modules?")
902
+})
870 903
 
871 904
 
905
+test_that("gene by itself and in module", {
906
+    expect_error(ofe1 <- allFitnessEffects(data.frame(parent = c("Root", "Root", "A"),
907
+                                                      child = c("A", "e", "B"),
908
+                                 s = .1,
909
+                                 sh = .1,
910
+                                 typeDep = "OR"),
911
+                      geneToModule =
912
+                          c("Root" = "Root",
913
+                            "e" = "e",
914
+                            "A" = "a1, e, a2",
915
+                            "B" = "b1"))
916
+                ,
917
+                 "Are there identical gene names in different modules?")
918
+    ## I think the "Is a gene part of two ... cannot be reached anymore"
919
+})
920
+
921
+
922
+
923
+test_that("a silly epistasis example", {
924
+    ## make sure we exercise the nrow(df) == 0L
925
+    expect_output(sv <- allFitnessEffects(
926
+                     orderEffects = c("A > B" = 0.1),
927
+                     epistasis = c("A" = 1)), "")
928
+    expect_output(evalAllGenotypes(sv), "")
929
+})
930
+
931
+test_that("can run without keeping input", {
932
+    cs <-  data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"),
933
+                      child = c("a", "b", "d", "e", "c", "c", rep("g", 3)),
934
+                 s = 0.1,
935
+                 sh = -0.9,
936
+                 typeDep = "MN")
937
+    expect_output(cbn1 <- allFitnessEffects(cs, keepInput = FALSE), "")
938
+})
872 939
 
873 940
 
874 941
 
942
+test_that("we are exercising evalGenotype with a comma, echo, and proNeg", {
943
+    ## we do this in the vignette already
944
+    ofe2 <- allFitnessEffects(orderEffects = c("F > D" = -0.3, "D > F" = 0.4),
945
+                              geneToModule =
946
+                                  c("Root" = "Root",
947
+                                    "F" = "f1, f2, f3",
948
+                                    "D" = "d1, d2") )
949
+    expect_equal(evalGenotype("d1 , d2, f3", ofe2, verbose = TRUE, echo = TRUE),
950
+                 1.4)
951
+    expect_equal(evalGenotype("f3 , d1 , d2", ofe2, verbose = TRUE, echo = TRUE),
952
+                 0.7)
953
+    expect_equal(evalGenotype("f3 , d1 , d2", ofe2, verbose = TRUE,
954
+                              echo = TRUE, model = "Bozic"),
955
+                 1.3)
956
+})
957
+
875 958
 
959
+test_that("We limit number of genotypes in eval", {
960
+    expect_error(evalAllGenotypes(
961
+        allFitnessEffects(
962
+            noIntGenes = runif(10)), order = FALSE),
963
+        "There are 1024 genotypes. This is larger than max.",
964
+        fixed = TRUE)
965
+})
876 966
 
967
+## how is table geneModule with no ints? are they there?
877 968
 
969
+## check it breaks if same ID
878 970
 
971
+## check breaks if in restriction and no interactions
879 972
 
880 973
 
881 974
 
882 975
 
976
+## Nope, this is not inconsistent
977
+
978
+test_that("Bozic limit cases handled consistently", {
979
+    sv <- allFitnessEffects(data.frame(
980
+        parent = c("Root", "Root", "a1", "a2"),
981
+        child = c("a1", "a2", "b", "b"),
982
+        s = 1.2,
983
+        sh = 0.1,
984
+        typeDep = "OR"),
985
+        noIntGenes = c("E" = 0.85, "F" = 1))
986
+    expect_output(evalAllGenotypes(sv, order = FALSE, addwt = TRUE,
987
+                                   model = "Bozic"), ## this works
988
+                  "Death_rate", fixed = TRUE, all = FALSE)
989
+    expect_error(oncoSimulIndiv(sv, model = "Bozic"),
990
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
991
+                 ) 
992
+    sv2 <- allFitnessEffects(epistasis = c("-A : B" = 1.5,
993
+                                           "A : -B" = 1.5,
994
+                                           "A:B" = 2.5))
995
+    expect_output(evalAllGenotypes(sv2, order = FALSE, addwt = TRUE,
996
+                                   model = "Bozic"), ## this works
997
+                  "Death_rate", fixed = TRUE, all = FALSE)
998
+    expect_error(oncoSimulIndiv(sv2, model = "Bozic"),
999
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
1000
+                 ) 
1001
+    sv3 <- allFitnessEffects(epistasis = c("-A : B" = 0.1,
1002
+                                           "A : -B" = 1,
1003
+                                           "A:B" = 0.1))
1004
+    expect_output(evalAllGenotypes(sv3, order = FALSE, addwt = TRUE,
1005
+                                   model = "Bozic"), ## this works
1006
+                  "Death_rate", fixed = TRUE, all = FALSE)
1007
+    expect_warning(oncoSimulIndiv(sv3, model = "Bozic"),
1008
+                   "You are using a Bozic model with the new restriction specification, and you have at least one s of 1."
1009
+                   ) 
1010
+    sv4 <- allFitnessEffects(epistasis = c("-A : B" = 0.99,
1011
+                                           "A : -B" = 0.95,
1012
+                                           "A:B" = 0.5),
1013
+                             orderEffects = c("G > H" = 1.1),
1014
+                             noIntGenes = c("E" = 0.85, "F" = 1.35))
1015
+    expect_output(evalAllGenotypes(sv4, order = FALSE, addwt = TRUE,
1016
+                                   model = "Bozic"), ## this works
1017
+                  "Death_rate", fixed = TRUE, all = FALSE)
1018
+    expect_error(oncoSimulIndiv(sv4, model = "Bozic"),
1019
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
1020
+                 ) 
1021
+    sv5 <- allFitnessEffects(epistasis = c("-A : B" = 0.99,
1022
+                                           "A : -B" = 0.95,
1023
+                                           "A:B" = 1),
1024
+                             orderEffects = c("G > H" = 1),
1025
+                             noIntGenes = c("E" = 0.85, "F" = 1))
1026
+    expect_output(evalAllGenotypes(sv5, order = FALSE, addwt = TRUE,
1027
+                                   model = "Bozic"), ## this works
1028
+                  "Death_rate", fixed = TRUE, all = FALSE)
1029
+    expect_warning(oncoSimulIndiv(sv5, model = "Bozic"),
1030
+                   "You are using a Bozic model with the new restriction specification, and you have at least one s of 1."
1031
+                   ) 
1032
+    sv4c <- allFitnessEffects(epistasis = c("-A : B" = 0.99,
1033
+                                           "A : -B" = 1.5,
1034
+                                           "A:B" = 0.5),
1035
+                             orderEffects = c("G > H" = 1),
1036
+                             noIntGenes = c("E" = 0.85, "F" = 1.35))
1037
+    expect_output(evalAllGenotypes(sv4c, order = FALSE, addwt = TRUE,
1038
+                                   model = "Bozic"), ## this works
1039
+                  "Death_rate", fixed = TRUE, all = FALSE)
1040
+    expect_error(oncoSimulIndiv(sv4c, model = "Bozic"),
1041
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
1042
+                 ) 
1043
+    sv4d <- allFitnessEffects(epistasis = c("-A : B" = 0.99,
1044
+                                           "A : -B" = 0.95,
1045
+                                           "A:B" = 0.5),
1046
+                             orderEffects = c("G > H" = 1.1),
1047
+                             noIntGenes = c("E" = 0.85, "F" = 0.35))
1048
+    expect_output(evalAllGenotypes(sv4d, order = FALSE, addwt = TRUE,
1049
+                                   model = "Bozic"), ## this works
1050
+                  "Death_rate", fixed = TRUE, all = FALSE)
1051
+    expect_error(oncoSimulIndiv(sv4d, model = "Bozic"),
1052
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
1053
+                 ) 
1054
+    sv4d <- allFitnessEffects(epistasis = c("-A : B" = 0.99,
1055
+                                           "A : -B" = 0.95,
1056
+                                           "A:B" = 0.5),
1057
+                             orderEffects = c("G > H" = 0.1),
1058
+                             noIntGenes = c("E" = 0.85, "F" = 1.3))
1059
+    expect_output(evalAllGenotypes(sv4d, order = FALSE, addwt = TRUE,
1060
+                                   model = "Bozic"), ## this works
1061
+                  "Death_rate", fixed = TRUE, all = FALSE)
1062
+    expect_error(oncoSimulIndiv(sv4d, model = "Bozic"),
1063
+                 "You are using a Bozic model with the new restriction specification, and you have at least one s > 1."
1064
+                 )
1065
+    svff <- allFitnessEffects(data.frame(
1066
+        parent = c("Root", "Root", "a1", "a2"),
1067
+        child = c("a1", "a2", "b", "b"),
1068
+        s = 0.2,
1069
+        sh = -1,
1070
+        typeDep = "OR"),
1071
+        noIntGenes = c("E" = 0.85, "F" = .1))
1072
+    expect_output(evalAllGenotypes(svff, order = FALSE, addwt = TRUE,
1073
+                                   model = "Bozic"), ## this works
1074
+                  "Death_rate", fixed = TRUE, all = FALSE)
1075
+    expect_warning(oncoSimulIndiv(svff, model = "Bozic"),
1076
+                 "You are using a Bozic model with the new restriction specification, and you have at least one sh <= -1."
1077
+                 ) 
1078
+    svff2 <- allFitnessEffects(data.frame(
1079
+        parent = c("Root", "Root", "a1", "a2"),
1080
+        child = c("a1", "a2", "b", "b"),
1081
+        s = 0.2,
1082
+        sh = -1.5,
1083
+        typeDep = "OR"),
1084
+        noIntGenes = c("E" = 0.85, "F" = .1))
1085
+    expect_output(evalAllGenotypes(svff2, order = FALSE, addwt = TRUE,
1086
+                                   model = "Bozic"), ## this works
1087
+                  "Death_rate", fixed = TRUE, all = FALSE)
1088
+    expect_warning(oncoSimulIndiv(svff2, model = "Bozic"),
1089
+                 "You are using a Bozic model with the new restriction specification, and you have at least one sh <= -1."
1090
+                 ) 
1091
+    svff3 <- allFitnessEffects(data.frame(
1092
+        parent = c("Root", "Root", "a1", "a2"),
1093
+        child = c("a1", "a2", "b", "b"),
1094
+        s = 0.2,
1095
+        sh = -Inf,
1096
+        typeDep = "OR"),
1097
+        noIntGenes = c("E" = 0.85, "F" = .1))
1098
+    expect_output(evalAllGenotypes(svff3, order = FALSE, addwt = TRUE,
1099
+                                   model = "Bozic"), ## this works
1100
+                  "Death_rate", fixed = TRUE, all = FALSE)
1101
+    expect_output(oncoSimulIndiv(svff3, model = "Bozic"),
1102
+                 "Individual OncoSimul trajectory with call"
1103
+                 ) 
1104
+})
883 1105
new file mode 100644
... ...
@@ -0,0 +1,130 @@
1
+## These are all used in the vignette and the help functions but we add
2
+## them here because we want to make sure we exercise the code even if we
3
+## just run the test routines.
4
+
5
+## BEWARE: this do not test that the plotting is correct! It just calls it.
6
+
7
+test_that("exercising the oncosimul plotting code", {
8
+              data(examplePosets)
9
+              p701 <- examplePosets[["p701"]]
10
+              b1 <- oncoSimulIndiv(p701)
11
+              plot(b1, addtot = TRUE, plotDiversity = TRUE)
12
+              p1 <- oncoSimulPop(2, p701, mc.cores = 2)
13
+              plot(p1, ask = FALSE)
14
+              oi <- allFitnessEffects(orderEffects =
15
+                                          c("F > D" = -0.3, "D > F" = 0.4),
16
+                                      noIntGenes = rexp(5, 10),
17
+                                      geneToModule =
18
+                                          c("Root" = "Root",
19
+                                            "F" = "f1, f2, f3",
20
+                                            "D" = "d1, d2") )
21
+              out <- oncoSimulPop(4,
22
+                                  oi, 
23
+                                  detectionSize = 1e4,
24
+                                  onlyCancer = FALSE)
25
+              plot(out)
26
+          })
27
+
28
+test_that("exercising the oncosimul plotting code, thinning", {
29
+              data(examplePosets)
30
+              p701 <- examplePosets[["p701"]]
31
+              b1 <- oncoSimulIndiv(p701)
32
+              plot(b1, addtot = TRUE, plotDiversity = TRUE)
33
+              p1 <- oncoSimulPop(2, p701, mc.cores = 2)
34
+              plot(p1, ask = FALSE)
35
+              oi <- allFitnessEffects(orderEffects =
36
+                                          c("F > D" = -0.3, "D > F" = 0.4),
37
+                                      noIntGenes = rexp(5, 10),
38
+                                      geneToModule =
39
+                                          c("Root" = "Root",
40
+                                            "F" = "f1, f2, f3",
41
+                                            "D" = "d1, d2") )
42
+              out <- oncoSimulPop(4,
43
+                                  oi, 
44
+                                  detectionSize = 1e4,
45
+                                  onlyCancer = FALSE)
46
+              plot(out, thinData = TRUE)
47
+          })
48
+
49
+
50
+test_that("exercising the poset plotting code", {
51
+              data(examplePosets)
52
+              plotPoset(examplePosets[["p1101"]])
53
+              poset701 <- examplePosets[["p701"]]
54
+              plotPoset(poset701, addroot = TRUE)
55
+              plotPoset(poset701, addroot = TRUE,
56
+                        names = c("Root", "KRAS", "SMAD4", "CDNK2A", "TP53",
57
+                            "MLL3","PXDN", "TGFBR2"))
58
+          })
59
+
60
+
61
+
62
+test_that("exercising plotClonePhylog", {
63
+              data(examplesFitnessEffects)
64
+              tmp <-  oncoSimulIndiv(examplesFitnessEffects[["o3"]],
65
+                                     model = "McFL", 
66
+                                     mu = 5e-5,
67
+                                     detectionSize = 1e8, 
68
+                                     detectionDrivers = 3,
69
+                                     sampleEvery = 0.025,
70
+                                     max.num.tries = 10,
71
+                                     keepEvery = 5,
72
+                                     initSize = 2000,
73
+                                     finalTime = 3000,
74
+                                     onlyCancer = FALSE,
75
+                                     keepPhylog = TRUE)
76
+              ## Show only those with N > 10 at end
77
+              plotClonePhylog(tmp, N = 10)
78
+              ## Show only those with N > 1 between times 5 and 1000
79
+              plotClonePhylog(tmp, N = 1, t = c(5, 1000))
80
+              ## Show everything, even if teminal nodes are extinct
81
+              plotClonePhylog(tmp, N = 0)
82
+              ## Show time when first appeared
83
+              plotClonePhylog(tmp, N = 10, timeEvents = TRUE)
84
+              ## This can take a few seconds
85
+              plotClonePhylog(tmp, N = 10, keepEvents = TRUE)
86
+          })
87
+
88
+test_that("exercising the fitnessEffects plotting code", {
89
+              data(examplesFitnessEffects)
90
+              plot(examplesFitnessEffects[["cbn1"]])
91
+              cs <-  data.frame(parent = c(rep("Root", 4), "a", "b", "d", "e", "c"),
92
+                 child = c("a", "b", "d", "e", "c", "c", rep("g", 3)),
93
+                 s = 0.1,
94
+                                sh = -0.9,
95
+                                typeDep = "MN")
96
+              cbn1 <- allFitnessEffects(cs)
97
+              plot(cbn1)
98
+              plot(cbn1, "igraph")
99
+              p4 <- data.frame(parent = c(rep("Root", 4), "A", "B", "D", "E", "C", "F"),
100
+                  child = c("A", "B", "D", "E", "C", "C", "F", "F", "G", "G"),
101
+                  s = c(0.01, 0.02, 0.03, 0.04, 0.1, 0.1, 0.2, 0.2, 0.3, 0.3),
102
+                  sh = c(rep(0, 4), c(-.9, -.9), c(-.95, -.95), c(-.99, -.99)),
103
+                  typeDep = c(rep("--", 4), 
104
+                      "XMPN", "XMPN", "MN", "MN", "SM", "SM"))