Browse code

v. 2.99.9: fixed one-gene cases, nem in Readme, typos; removin unused code and v.1 from long tests

ramon diaz-uriarte (at Phelsuma) authored on 22/04/2021 10:27:49
Showing 20 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.99.7
5
-Date: 2020-12-30
4
+Version: 2.99.9
5
+Date: 2021-04-22
6 6
 Authors@R: c(
7 7
 	      person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),	
8 8
  	   		     email = "rdiaz02@gmail.com"),
... ...
@@ -57,9 +57,6 @@ oncoSimulSample <- function(Nindiv,
57 57
                             max.memory = 2000,
58 58
                             max.wall.time.total = 600,
59 59
                             max.num.tries.total = 500 * Nindiv,
60
-                            ## well, obviously they are errors
61
-                            ## errorHitWallTime = TRUE,
62
-                            ## errorHitMaxTries = TRUE,
63 60
                             typeSample = "whole",
64 61
                             thresholdWhole = 0.5,
65 62
                             initMutant = NULL,
... ...
@@ -458,9 +455,6 @@ oncoSimulIndiv <- function(fp,
458 455
                            keepEvery = sampleEvery,
459 456
                            minDetectDrvCloneSz = "auto",
460 457
                            extraTime = 0,
461
-                           ## used to be this
462
-                           ## ifelse(model \%in\% c("Bozic", "Exp"), -9,
463
-                           ##                     5 * sampleEvery),
464 458
                            finalTime = 0.25 * 25 * 365,
465 459
                            onlyCancer = TRUE,
466 460
                            keepPhylog = FALSE,
... ...
@@ -585,84 +579,6 @@ oncoSimulIndiv <- function(fp,
585 579
     if(is_null_na(finalTime)) finalTime <- Inf
586 580
 
587 581
     if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA")
588
-    
589
-    ## if(!inherits(fp, "fitnessEffects")) {
590
-        ## if(any(unlist(lapply(list(fp, 
591
-        ##                           numPassengers,
592
-        ##                           s, sh), is.null)))) {
593
-        ##     m <- paste("You are using the old poset format.",
594
-        ##                "You must specify all of poset, numPassengers",
595
-        ##                "s, and sh.")
596
-        ##     stop(m)
597
-           
598
-        ## }
599
-        ## if(AND_DrvProbExit) {
600
-        ##     stop("The AND_DrvProbExit = TRUE setting is invalid",
601
-        ##          " with the old poset format.")
602
-        ## }
603
-        ## if(!is.null(muEF))
604
-        ##     stop("Mutator effects cannot be specified with the old poset format.")
605
-        ## if( length(initMutant) > 0)  
606
-        ##     warning("With the old poset format you can no longer use initMutant.",
607
-        ##             " The initMutant you passed will be ignored.")
608
-        ## ## stop("With the old poset, initMutant can only take a single value.")
609
-        ## if(!is_null_na(fixation))
610
-        ##     stop("'fixation' cannot be specified with the old poset format.")
611
-        ## ## Seeding C++ is now much better in new version
612
-        ## if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
613
-        ##     ## name is a legacy. This is really the seed for the C++ generator.
614
-        ##     ## Nope, we cannot use 2^32, because as.integer will fail.
615
-        ##     seed <- as.integer(round(runif(1, min = 0, max = 2^16)))
616
-        ## }
617
-        ## if(verbosity >= 2)
618
-        ##     cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
619
-
620
-        ## if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
621
-        ## ## if(message.v1)
622
-        ## ##     message("You are using the old poset format. Consider using the new one.")
623
-   
624
-    
625
-        ## ## A simulation stops if cancer or finalTime appear, the first
626
-        ## ## one. But if we set onlyCnacer = FALSE, we also accept simuls
627
-        ## ## without cancer (or without anything)
628
-        
629
-        ## op <- try(oncoSimul.internal(poset = fp, ## restrict.table = rt,
630
-        ##                              ## numGenes = numGenes,
631
-        ##                              numPassengers = numPassengers,
632
-        ##                              typeCBN = "CBN",
633
-        ##                              birth = birth,
634
-        ##                              s = s,
635
-        ##                              death = death,  
636
-        ##                              mu =  mu,  
637
-        ##                              initSize =  initSize, 
638
-        ##                              sampleEvery =  sampleEvery,  
639
-        ##                              detectionSize =  detectionSize, 
640
-        ##                              finalTime = finalTime, 
641
-        ##                              initSize_species = 2000, 
642
-        ##                              initSize_iter = 500, 
643
-        ##                              seed = seed, 
644
-        ##                              verbosity = verbosity, 
645
-        ##                              speciesFS = 10000,  
646
-        ##                              ratioForce = 2,
647
-        ##                              typeFitness = typeFitness,
648
-        ##                              max.memory = max.memory,
649
-        ##                              mutationPropGrowth = mutationPropGrowth,                                   
650
-        ##                              initMutant = -1, 
651
-        ##                              max.wall.time = max.wall.time,
652
-        ##                              max.num.tries = max.num.tries,
653
-        ##                              keepEvery = keepEvery,  
654
-        ##                              ## alpha = 0.0015,  
655
-        ##                              sh = sh,
656
-        ##                              K = K, 
657
-        ##                              minDetectDrvCloneSz = minDetectDrvCloneSz,
658
-        ##                              extraTime = extraTime,
659
-        ##                              detectionDrivers = detectionDrivers,
660
-        ##                              onlyCancer = onlyCancer,
661
-        ##                              errorHitWallTime = errorHitWallTime,
662
-        ##                              errorHitMaxTries = errorHitMaxTries),
663
-        ##           silent = !verbosity)
664
-        ## objClass <- "oncosimul"
665
-    ## } else {
666 582
         s <- sh <- NULL ## force it.
667 583
         if(numPassengers != 0)
668 584
             warning(paste("Specifying numPassengers has no effect",
... ...
@@ -934,88 +850,6 @@ plot.oncosimulpop <- function(x, ask = TRUE,
934 850
 }
935 851
 
936 852
 
937
-## plot.oncosimul <- function(x, col = c(8, "orange", 6:1),
938
-##                            log = "y",
939
-##                            ltyClone = 2:6,
940
-##                            lwdClone = 0.9,
941
-##                            ltyDrivers = 1,
942
-##                            lwdDrivers = 3,
943
-##                            xlab = "Time units",
944
-##                            ylab = "Number of cells",
945
-##                            plotClones = TRUE,
946
-##                            plotDrivers = TRUE,
947
-##                            addtot = FALSE,
948
-##                            addtotlwd = 0.5,
949
-##                            yl = NULL,
950
-##                            thinData = FALSE,
951
-##                            thinData.keep = 0.1,
952
-##                            thinData.min = 2,
953
-##                            plotDiversity = FALSE,
954
-##                            ...
955
-##                            ) {
956
-
957
-##     if(thinData)
958
-##         x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
959
-
960
-##     ## uvx
961
-##     if(!inherits(x, "oncosimul2"))
962
-##         ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
963
-##     else {
964
-##         ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
965
-##     }
966
-    
967
-##     if(is.null(yl)) {
968
-##         if(log %in% c("y", "xy", "yx") )
969
-##             yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
970
-##         else
971
-##             yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
972
-##     }
973
-##     if(plotDiversity) {
974
-##         par(fig = c(0, 1, 0.8, 1))
975
-##         m1 <- par()$mar
976
-##         m <- m1
977
-##         m[c(1, 3)] <- c(0, 0.7)
978
-##         op <- par(mar = m )
979
-##         plotShannon(x)
980
-##         par(op)
981
-##         m1[c(3)] <- 0.2
982
-##         op <- par(mar = m1)
983
-##         par(fig = c(0, 1, 0, 0.8), new = TRUE)  
984
-##     }
985
-##     if(plotClones) {
986
-##         plotClones(x,
987
-##                    ndr = ndr, 
988
-##                    xlab = xlab,
989
-##                    ylab = ylab,
990
-##                    lty = ltyClone,
991
-##                    col = col, 
992
-##                    ylim = yl,
993
-##                    lwd = lwdClone,
994
-##                    axes = FALSE,
995
-##                    log = log,
996
-##                    ...)
997
-##     }
998
-
999
-##     if(plotClones && plotDrivers)
1000
-##         par(new = TRUE)
1001
-    
1002
-##     if(plotDrivers){
1003
-##         plotDrivers0(x,
1004
-##                      ndr,
1005
-##                      timescale = 1,
1006
-##                      trim.no.drivers = FALSE,
1007
-##                      xlab = "", ylab = "",
1008
-##                      lwd = lwdDrivers,
1009
-##                      lty = ltyDrivers,
1010
-##                      col = col, 
1011
-##                      addtot = addtot,
1012
-##                      addtotlwd = addtotlwd,
1013
-##                      log = log, ylim = yl,
1014
-##                      ...)
1015
-##     }
1016
-    
1017
-## }
1018
-
1019 853
 
1020 854
 plot.oncosimul <- function(x,
1021 855
                            show = "drivers", 
... ...
@@ -1511,10 +1345,6 @@ plotPoset <- function(x, names = NULL, addroot = FALSE,
1511 1345
         box()
1512 1346
 }
1513 1347
 
1514
-## this function seems to never be used
1515
-## plotAdjMat <- function(adjmat) {
1516
-##     plot(as(adjmat, "graphNEL"))
1517
-## }
1518 1348
 
1519 1349
 
1520 1350
 
... ...
@@ -1591,12 +1421,6 @@ plotClonePhylog <- function(x, N = 1, t = "last",
1591 1421
              "very fast, before any clones beyond the initial were ",
1592 1422
              "generated.")
1593 1423
     pc <- phylogClone(x, N, t, keepEvents)
1594
-    ## if(is.na(pc)) {
1595
-    ##     ## This should not be reachable, as caught before
1596
-    ##     ## where we check for nrow of PhylogDF   
1597
-    ##     warning("No clone phylogeny available. Exiting without plotting.")
1598
-    ##     return(NULL)
1599
-    ## }
1600 1424
         
1601 1425
     l0 <- igraph::layout.reingold.tilford(pc$g)
1602 1426
     if(!timeEvents) {
... ...
@@ -1724,198 +1548,6 @@ get.mut.vector <- function(x, timeSample, typeSample,
1724 1548
 }
1725 1549
 
1726 1550
 
1727
-## get.mut.vector <- function(x, timeSample, typeSample,
1728
-##                            thresholdWhole, popSizeSample) {
1729
-##     if(is.null(x$TotalPopSize)) {
1730
-##         warning(paste("It looks like this simulation never completed.",
1731
-##                       " Maybe it reached maximum allowed limits.",
1732
-##                       " You will get NAs"))
1733
-##         return(rep(NA, length(x$geneNames)))
1734
-##     }
1735
-##     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1736
-##     if(the.time < 0) { 
1737
-##         return(rep(NA, nrow(x$Genotypes)))
1738
-##     } 
1739
-##     pop <- x$pops.by.time[the.time, -1]
1740
-    
1741
-##     if(all(pop == 0)) {
1742
-##         stop("You found a bug: this should never happen")
1743
-##     }
1744
-    
1745
-##     if(typeSample %in% c("wholeTumor", "whole")) {
1746
-##         popSize <- x$PerSampleStats[the.time, 1]
1747
-##         return( as.numeric((tcrossprod(pop,
1748
-##                                        x$Genotypes)/popSize) >= thresholdWhole) )
1749
-##     } else if (typeSample %in%  c("singleCell", "single")) {
1750
-
1751
-##         return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1752
-##     } else {
1753
-##         stop("Unknown typeSample option")
1754
-##     }
1755
-## }
1756
-
1757
-
1758
-
1759
-
1760
-
1761
-
1762
-
1763
-
1764
-
1765
-
1766
-
1767
-
1768
-## oncoSimul.internal <- function(poset, ## restrict.table,
1769
-##                                numPassengers, 
1770
-##                                ## numGenes,
1771
-##                                typeCBN,
1772
-##                                birth, 
1773
-##                                s,
1774
-##                                death,
1775
-##                                mu,
1776
-##                                initSize,
1777
-##                                sampleEvery,
1778
-##                                detectionSize,
1779
-##                                finalTime,
1780
-##                                initSize_species,
1781
-##                                initSize_iter,
1782
-##                                seed,
1783
-##                                verbosity,
1784
-##                                speciesFS,
1785
-##                                ratioForce,
1786
-##                                typeFitness,
1787
-##                                max.memory,
1788
-##                                mutationPropGrowth, ## make it explicit
1789
-##                                initMutant,
1790
-##                                max.wall.time,
1791
-##                                keepEvery,
1792
-##                                alpha,
1793
-##                                sh,                               
1794
-##                                K,
1795
-##                                ## endTimeEvery,
1796
-##                                detectionDrivers,
1797
-##                                onlyCancer,
1798
-##                                errorHitWallTime,
1799
-##                                max.num.tries,
1800
-##                                errorHitMaxTries,
1801
-##                                minDetectDrvCloneSz,
1802
-##                                extraTime) {
1803
-
1804
-##     ## the value of 20000, in megabytes, for max.memory sets a limit of ~ 20 GB
1805
-  
1806
-
1807
-##     ## if(keepEvery < sampleEvery)
1808
-##     ##     warning("setting keepEvery to sampleEvery")
1809
-
1810
-##     ## a backdoor to allow passing restrictionTables directly
1811
-##     if(inherits(poset, "restrictionTable"))
1812
-##         restrict.table <- poset
1813
-##     else
1814
-##         restrict.table <- poset.to.restrictTable(poset)
1815
-##     numDrivers <- nrow(restrict.table)
1816
-##     numGenes <- (numDrivers + numPassengers)
1817
-##     if(numGenes < 2)
1818
-##         stop("There must be at least two genes (loci) in the fitness effects.",
1819
-##              "If you only care about a degenerate case with just one,",
1820
-##              "you can enter a second gene",
1821
-##              "with fitness effect of zero.")
1822
-##     if(numGenes > 64)
1823
-##         stop("Largest possible number of genes (loci) is 64 for version 1.",
1824
-##              "You are strongly encouraged to use the new specification",
1825
-##              "as in version 2.")
1826
-
1827
-##     ## These can never be set by the user
1828
-##     ## if(initSize_species < 10) {
1829
-##     ##     warning("initSize_species too small?")
1830
-##     ## }
1831
-##     ## if(initSize_iter < 100) {
1832
-##     ##     warning("initSize_iter too small?")
1833
-##     ## }
1834
-
1835
-##     ## numDrivers <- nrow(restrict.table)
1836
-##     if(length(unique(restrict.table[, 1])) != numDrivers)
1837
-##         stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)")
1838
-##     ddr <- restrict.table[, 1]
1839
-##     if(any(diff(ddr) != 1))
1840
-##         stop("BAIL OUT NOW:  any(diff(ddr) != 1")
1841
-##     ## sanity checks
1842
-##     if(max(restrict.table[, 1]) != numDrivers)
1843
-##         stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers")
1844
-##     if(numDrivers > numGenes)
1845
-##         stop("BAIL OUT NOW: numDrivers > numGenes")
1846
-    
1847
-##     non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1]
1848
-
1849
-
1850
-
1851
-
1852
-##     ## if( (is.null(endTimeEvery) || (endTimeEvery > 0)) &&
1853
-##     ##    (typeFitness %in% c("bozic1", "exp") )) {
1854
-##     ##     warning(paste("endTimeEvery will take a positive value. ",
1855
-##     ##                   "This will make simulations not stop until the next ",
1856
-##     ##                   "endTimeEvery has been reached. Thus, in simulations ",
1857
-##     ##                   "with very fast growth, simulations can take a long ",
1858
-##     ##                   "time to finish, or can hit the wall time limit. "))
1859
-##     ## }
1860
-##     ## if(is.null(endTimeEvery))
1861
-##     ##     endTimeEvery <- keepEvery
1862
-##     ## if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) )
1863
-##     ##     warning("!(endTimeEvery %% keepEvery)")
1864
-##     ## a sanity check in restricTable, so no neg. indices for the positive deps
1865
-##     neg.deps <- function(x) {
1866
-##         ## checks a row of restrict.table
1867
-##         numdeps <- x[2]
1868
-##         if(numdeps) {
1869
-##             deps <- x[3:(3+numdeps - 1)]
1870
-##             return(any(deps < 0))
1871
-##         } else FALSE
1872
-##     }
1873
-##     if(any(apply(restrict.table, 1, neg.deps)))
1874
-##         stop("BAIL OUT NOW: Negative dependencies in restriction table")
1875
-
1876
-##     ## transpose the table
1877
-##     rtC <- convertRestrictTable(restrict.table)
1878
-
1879
-    
1880
-##     return(c(
1881
-##         BNB_Algo5(restrictTable = rtC,
1882
-##         numDrivers = numDrivers,
1883
-##         numGenes = numGenes,
1884
-##         typeCBN_= typeCBN,
1885
-##         s = s, 
1886
-##         death = death,
1887
-##         mu = mu,
1888
-##         initSize = initSize,
1889
-##         sampleEvery = sampleEvery,
1890
-##         detectionSize = detectionSize,
1891
-##         finalTime = finalTime,
1892
-##         initSp = initSize_species,
1893
-##         initIt = initSize_iter,
1894
-##         seed = seed,
1895
-##         verbosity = verbosity,
1896
-##         speciesFS = speciesFS,
1897
-##         ratioForce = ratioForce,
1898
-##         typeFitness_ = typeFitness,
1899
-##         maxram = max.memory,
1900
-##         mutationPropGrowth = as.integer(mutationPropGrowth),
1901
-##         initMutant = initMutant,
1902
-##         maxWallTime = max.wall.time,
1903
-##         keepEvery = keepEvery,
1904
-##         sh = sh,
1905
-##         K = K,
1906
-##         detectionDrivers = detectionDrivers,
1907
-##         onlyCancer = onlyCancer,
1908
-##         errorHitWallTime = errorHitWallTime,
1909
-##         maxNumTries = max.num.tries,
1910
-##         errorHitMaxTries = errorHitMaxTries,
1911
-##         minDetectDrvCloneSz = minDetectDrvCloneSz,
1912
-##         extraTime = extraTime
1913
-##     ),
1914
-##     NumDrivers = numDrivers
1915
-##              ))
1916
-
1917
-## }
1918
-
1919 1551
 OncoSimulWide2Long <- function(x) {
1920 1552
     ## Put data in long format, for ggplot et al
1921 1553
     
... ...
@@ -1947,23 +1579,6 @@ OncoSimulWide2Long <- function(x) {
1947 1579
 
1948 1580
 
1949 1581
 
1950
-## We are not using this anymore
1951
-## create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
1952
-##     if(tmp$NumClones > 1) {
1953
-##         NumMutations <- apply(tmp$Genotypes, 2, sum)
1954
-##         muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
1955
-##                               t(apply(tmp$pops.by.time[, -c(1),
1956
-##                                                        drop = FALSE], 1,
1957
-##                                       function(x) tapply(x,
1958
-##                                                          NumMutations, sum))))
1959
-##         colnames(muts.by.time)[c(1)] <- "Time"
1960
-##     } else {
1961
-##         muts.by.time <- tmp$pops.by.time
1962
-##     }
1963
-##     return(muts.by.time)
1964
-## } 
1965
-
1966
-
1967 1582
 create.drivers.by.time <- function(tmp, ndr) {
1968 1583
     ## CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum)
1969 1584
     CountNumDrivers <- ndr
... ...
@@ -2084,248 +1699,9 @@ is_null_na <- function(x) {
2084 1699
 
2085 1700
 
2086 1701
 
2087
-
2088
-
2089
-
2090
-
2091
-
2092
-
2093
-
2094
-
2095
-
2096 1702
 ## simpsonI <- function(x) {
2097 1703
 ##     sx <- sum(x)
2098 1704
 ##     p <- x/sx
2099 1705
 ##     p <- p[p > 0]
2100 1706
 ##     return(sum(p^2)))
2101 1707
 ## }
2102
-
2103
-## plotSimpson <- function(z) {
2104
-    
2105
-##     h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
2106
-##                1, shannonI)
2107
-##     plot(x = z$pops.by.time[, 1],
2108
-##          y = h, lty = "l", xlab = "", ylab = "H")
2109
-## }
2110
-
2111
-
2112
-## plotClones <- function(z, ndr = NULL, na.subs = TRUE,
2113
-##                        log = "y", type = "l",
2114
-##                        lty = 1:8, col = 1:9, ...) {
2115
-
2116
-##     ## if given ndr, we order columns based on ndr, so clones with more
2117
-##     ## drivers are plotted last
2118
-
2119
-##     y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
2120
-    
2121
-##     if(na.subs){
2122
-##         y[y == 0] <- NA
2123
-##     }
2124
-##     if(!is.null(ndr)) {
2125
-##         ## could be done above, to avoid creating
2126
-##         ## more copies
2127
-##         oo <- order(ndr)
2128
-##         y <- y[, oo, drop = FALSE]
2129
-##         ndr <- ndr[oo]
2130
-##         col <- col[ndr + 1]
2131
-##     }
2132
-##     matplot(x = z$pops.by.time[, 1],
2133
-##             y = y,
2134
-##             log = log, type = type,
2135
-##             col = col, lty = lty,
2136
-##             ...)
2137
-##     box()
2138
-## }
2139
-
2140
-
2141
-
2142
-
2143
-
2144
-## No longer used
2145
-## rtNoDep <- function(numdrivers) {
2146
-##     ## create a restriction table with no dependencies
2147
-##     x <- matrix(nrow = numdrivers, ncol = 3)
2148
-##     x[, 1] <- 1:numdrivers
2149
-##     x[, 2] <- 0
2150
-##     x[, 3] <- -9
2151
-##     return(x)
2152
-## }
2153
-
2154
-
2155
-## Simulate from generative model. This is a quick function, and is most
2156
-## likely wrong! Never used for anything.
2157
-
2158
-## simposet <- function(poset, p) {
2159
-##     ## if (length(parent.nodes) != length (child.nodes)){
2160
-##     ##     print("An Error Occurred")
2161
-##     ## }
2162
-##     ##    else {
2163
-##     num.genes <- max(poset) - 1 ## as root is not a gene
2164
-##     genotype <-t(c(1, rep(NA, num.genes)))
2165
-##     colnames(genotype) <- as.character(0:num.genes)
2166
-    
2167
-    
2168
-##     poset$runif <- runif(nrow(poset))
2169
-##     ## this.relation.prob.OK could be done outside, but having it inside
2170
-##     ## the loop would allow to use different thresholds for different
2171
-##     ## relationships
2172
-##     for (i in (1:nrow(poset))) {
2173
-##         child <- poset[i, 2]
2174
-##         this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
2175
-##         the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. 
2176
-##         if (is.na(genotype[child])){
2177
-##             genotype[child] <- this.relation.prob.OK * the.parent  
2178
-##         }
2179
-##         else
2180
-##             genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
2181
-##     }
2182
-##     ##    }
2183
-    
2184
-##     return(genotype)
2185
-## }
2186
-
2187
-
2188
-## to plot and adjacency matrix in this context can do
2189
-## plotPoset(intAdjMatToPoset(adjMat))
2190
-## where intAdjMatToPoset is from best oncotree code: generate-random-trees.
2191
-## No! the above is simpler
2192
-
2193
-
2194
-
2195
-
2196
-## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
2197
-##     ## Obtain, from  results from a simulation run, the vector
2198
-##     ## of 0/1 corresponding to each gene.
2199
-    
2200
-##     ## threshold is the min. proportion for a mutation to be detected
2201
-##     ## We are doing whole tumor sampling here, as in Sprouffske
2202
-
2203
-##     ## timeSample: do we sample at end, or at a time point, chosen
2204
-##     ## randomly, from all those with at least one driver?
2205
-    
2206
-##     if(timeSample == "last") {
2207
-##         if(tmp$TotalPopSize == 0)
2208
-##             warning(paste("Final population size is 0.",
2209
-##                           "Thus, there is nothing to sample with ",
2210
-##                           "sampling last. You will get NAs"))
2211
-##         return(as.numeric(
2212
-##             (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
2213
-##                         tmp$Genotypes)/tmp$TotalPopSize) > threshold))
2214
-##     } else if (timeSample %in% c("uniform", "unif")) {
2215
-##           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
2216
-          
2217
-##           if (length(candidate.time) == 0) {
2218
-##               warning(paste("There is not a single sampled time",
2219
-##                             "at which there are any mutants.",
2220
-##                             "Thus, no uniform sampling possible.",
2221
-##                             "You will get NAs"))
2222
-##               return(rep(NA, nrow(tmp$Genotypes)))
2223
-##           } else if (length(candidate.time) == 1) {
2224
-##                 the.time <- candidate.time
2225
-##             } else {
2226
-##                   the.time <- sample(candidate.time, 1)
2227
-##               }
2228
-##           pop <- tmp$pops.by.time[the.time, -1]
2229
-##           popSize <- tmp$PerSampleStats[the.time, 1]
2230
-##           ## if(popSize == 0)
2231
-##           ##     warning(paste("Population size at this time is 0.",
2232
-##           ##                   "Thus, there is nothing to sample at this time point.",
2233
-##           ##                   "You will get NAs"))
2234
-##           return( as.numeric((tcrossprod(pop,
2235
-##                                        tmp$Genotypes)/popSize) > threshold) )
2236
-##       }
2237
-## }
2238
-
2239
-
2240
-
2241
-##           the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
2242
-##           if(length(the.time) == 0) {
2243
-##               warning(paste("There are no clones with drivers at any time point.",
2244
-##                             "No uniform sampling possible.",
2245
-##                             "You will get a vector of NAs."))
2246
-##             return(rep(NA, nrow(tmp$Genotypes)))  
2247
-##           }
2248
-## get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
2249
-##     ## No threshold, as single cell.
2250
-
2251
-##     ## timeSample: do we sample at end, or at a time point, chosen
2252
-##     ## randomly, from all those with at least one driver?
2253
-    
2254
-##     if(timeSample == "last") {
2255
-##         the.time <- nrow(tmp$pops.by.time)
2256
-##     } else if (timeSample %in% c("uniform", "unif")) {
2257
-##          candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
2258
-         
2259
-##          if (length(candidate.time) == 0) {
2260
-##              warning(paste("There is not a single sampled time",
2261
-##                            "at which there are any mutants.",
2262
-##                            "Thus, no uniform sampling possible.",
2263
-##                            "You will get NAs"))
2264
-##              return(rep(NA, nrow(tmp$Genotypes)))
2265
-##          } else if (length(candidate.time) == 1) {
2266
-##                the.time <- candidate.time
2267
-##            } else {
2268
-##                  the.time <- sample(candidate.time, 1)
2269
-##              }
2270
-
2271
-##      }
2272
-##     pop <- tmp$pops.by.time[the.time, -1]
2273
-##     ##       popSize <- tmp$PerSampleStats[the.time, 1]
2274
-##     ## genot <- sample(seq_along(pop), 1, prob = pop)
2275
-##     if(all(pop == 0)) {
2276
-##         warning(paste("All clones have a population size of 0",
2277
-##                       "at the chosen time. Nothing to sample.",
2278
-##                       "You will get NAs"))
2279
-##         return(rep(NA, nrow(tmp$Genotypes)))
2280
-##     } else {
2281
-##           return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
2282
-##       }
2283
-## }
2284
-
2285
-
2286
-## get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
2287
-##                            thresholdWhole = 0.5) {
2288
-##     if(typeSample %in% c("wholeTumor", "whole")) {
2289
-##         get.mut.vector.whole(x, timeSample = timeSample,
2290
-##                              threshold = thresholdWhole)
2291
-##     } else if(typeSample %in%  c("singleCell", "single")) {
2292
-##         get.mut.vector.singlecell(x, timeSample = timeSample)
2293
-##     }
2294
-## }
2295
-
2296
-
2297
-
2298
-
2299
-
2300
-## plotClonePhylog <- function(x, timeEvent = FALSE,
2301
-##                             showEvents = TRUE,
2302
-##                             fixOverlap = TRUE) {
2303
-##     if(!inherits(x, "oncosimul2"))
2304
-##         stop("Phylogenetic information is only stored with v >=2")
2305
-##     if(nrow(x$other$PhylogDF) == 0)
2306
-##         stop("It seems you run the simulation with keepPhylog= FALSE")
2307
-##     ## requireNamespace("igraph")
2308
-##     df <- x$other$PhylogDF
2309
-##     if(!showEvents) {
2310
-##         df <- df[!duplicated(df[, c(1, 2)]), ]
2311
-##     }
2312
-##     g <- igraph::graph.data.frame(df)
2313
-##     l0 <- igraph::layout.reingold.tilford(g)
2314
-##     if(!timeEvent) {
2315
-##         plot(g, layout = l0)
2316
-##     } else {
2317
-##         l1 <- l0
2318
-##         indexAppear <- match(V(g)$name, as.character(df[, 2]))
2319
-##         firstAppear <- df$time[indexAppear]
2320
-##         firstAppear[1] <- 0
2321
-##         l1[, 2] <- (max(firstAppear) - firstAppear)
2322
-##         if(fixOverlap) {
2323
-##             dx <- which(duplicated(l1[, 1]))
2324
-##             if(length(dx)) {
2325
-##                 ra <- range(l1[, 1])
2326
-##                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
2327
-##             }
2328
-##         }
2329
-##         plot(g, layout = l1)         
2330
-##     }
2331
-## }
... ...
@@ -30,6 +30,3 @@ accessibleGenotypes_former <- function(y, f, numMut, th) {
30 30
     .Call('OncoSimulR_accessibleGenotypes_former', PACKAGE = 'OncoSimulR', y, f, numMut, th)
31 31
 }
32 32
 
33
-## readFitnessEffects <- function(rFE, echo) {
34
-##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
35
-## }
... ...
@@ -319,65 +319,6 @@ faster_accessible_genotypes_R <- function(x, th) {
319 319
 }
320 320
 
321 321
 
322
-## ## This uses slam, but that is actually slower because
323
-## ## of the assignment
324
-## faster_accessible_genots_slam <- function(x, th = 0) {
325
-
326
-##     ## Given a genotype matrix, return the genotypes that are accessible
327
-##     ## via creating a directed adjacency matrix between genotypes
328
-##     ## connected (i.e., those that differ by gaining one mutation). 0
329
-##     ## means not connected, 1 means connected.
330
-    
331
-##     ## There is a more general function in OncoSimulR that will give the
332
-##     ## fitness difference. But not doing the difference is faster than
333
-##     ## just setting a value, say 1, if all we want is to keep track of
334
-##     ## accessible ones. And by using only 0/1 we can store only an
335
-##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
336
-##     ## simulations and computing just accessible genotypes, probably a
337
-##     ## hell of a lot faster.
338
-
339
-##     ## Well, this is not incredibly fast either.
340
-    
341
-##     ## Make sure sorted, so ancestors always before descendants
342
-##     rs0 <- rowSums(x[, -ncol(x)])
343
-##     x <- x[order(rs0), ]
344
-##     rm(rs0)
345
-    
346
-##     y <- x[, -ncol(x)]
347
-##     f <- x[, ncol(x)]
348
-##     rs <- rowSums(y)
349
-
350
-##     ## If 0, not accessible
351
-##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
352
-##                                       mode = "integer")
353
-##     for(i in 1:length(rs)) { ## i is the current genotype
354
-##         candidates <- which(rs == (rs[i] + 1))
355
-##         for(j in candidates) {
356
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
357
-##             ## if(sumdiff == 1)
358
-##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
359
-##                 ( (f[j] - f[i]) >= th ) )
360
-##                 adm[i, j] <- 1L
361
-##         }
362
-##     }
363
-
364
-##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
365
-##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
366
-##     while(TRUE) {
367
-##         ## We remove inaccessible cols (genotypes) and the corresponding
368
-##         ## rows repeatedly until nothing left to remove; any column left
369
-##         ## is therefore accessible throw at least one path.
370
-
371
-##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
372
-##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
373
-##         if(length(inacc_col) == 0) break;
374
-##         inacc_row <- inacc_col + 1 ## recall root row is left
375
-##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
376
-##     }
377
-##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
378
-## }
379
-
380
-
381 322
 generate_matrix_genotypes <- function(g) {
382 323
     ## FIXME future: do this for order too? Only if rfitness for order.
383 324
     ## Given a number of genes, generate all possible genotypes.
... ...
@@ -470,25 +411,6 @@ genot_to_adj_mat <- function(x) {
470 411
 }
471 412
 
472 413
 
473
-## ## to move above to C++ note that loop can be
474
-## for(i in 1:length(rs)) { ## i is the current genotype
475
-##     for(j in (i:length(rs))) {
476
-##         if(rs[j] > (rs[i] + 1)) break;
477
-##         else if(rs[j] == (rs[i] + 1)) {
478
-##             ## and use here my HammingDistance function
479
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
480
-##             ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
481
-##             if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]);
482
-##             }
483
-##     }
484
-## }
485
-
486
-## actually, all that is already in accessibleGenotypes except for the
487
-## filling up of adm.
488
-
489
-
490
-
491
-
492 414
 
493 415
 peak_valley <- function(x) {
494 416
     ## FIXME: when there are no identical entries, all this
... ...
@@ -307,90 +307,6 @@ to_genotFitness_std <- function(x,
307 307
     return(x)
308 308
 }
309 309
 
310
-## Deprecated after flfast
311
-## to_genotFitness_std is faster and has better error checking
312
-## and is very similar and does not use
313
-## the genot_fitness_to_epistasis, which is not reasonable anymore.
314
-
315
-## from_genotype_fitness <- function(x) {
316
-##     ## Would break with output from allFitnessEffects and
317
-##     ## output from allGenotypeAndMut
318
-
319
-##     ## For the very special and weird case of
320
-##     ## a matrix but only a single gene so with a 0 and 1
321
-##     ## No, this is a silly and meaningless case.
322
-##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
323
-
324
-##     ## } else  blabla:
325
-
326
-##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
327
-##         stop("Input must inherit from matrix or data.frame.")
328
-
329
-##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
330
-##     ##     stop(paste0("Genotype fitness input either two-column data frame",
331
-##     ##          " or a numeric matrix with > 2 columns."))
332
-##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
333
-##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
334
-##     ##                 " of a single gene. For this degenerate cases use",
335
-##     ##                 " a data frame specification."))
336
-
337
-##     if(ncol(x) > 2) {
338
-##         if(inherits(x, "matrix")) {
339
-##             if(!is.numeric(x))
340
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
341
-##         } else if(inherits(x, "data.frame")) {
342
-##             if(!all(unlist(lapply(x, is.numeric))))
343
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
344
-##         }
345
-
346
-##         ## We are expecting here a matrix of 0/1 where columns are genes
347
-##         ## except for the last column, that is Fitness
348
-##         ## Of course, can ONLY work with epistastis, NOT order
349
-##         return(genot_fitness_to_epistasis(x))
350
-##     } else {
351
-##         if(!inherits(x, "data.frame"))
352
-##             stop("genotFitness: if two-column must be data frame")
353
-##         ## Make sure no factors
354
-##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
355
-##         ## Make sure no numbers
356
-##         if(any(is.numeric(x[, 1])))
357
-##             stop(paste0("genotFitness: first column of data frame is numeric.",
358
-##                         " Ambiguous and suggests possible error. If sure,",
359
-##                         " enter that column as character"))
360
-
361
-##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
362
-##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
363
-##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
364
-##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
365
-##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
366
-##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
367
-##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
368
-##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
369
-##         if(omarker) {
370
-##             ## do something. To be completed
371
-##             stop("This code not yet ready")
372
-##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
373
-##             ## involve epistasis and order. But they must have different
374
-##             ## genes. Otherwise, it is not manageable.
375
-##         }
376
-##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
377
-##             ## the second case above corresponds to passing just single letter genotypes
378
-##             ## as there is not a single marker
379
-##             x <- x[, c(1, 2), drop = FALSE]
380
-##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
381
-##                 message("Column names of object not Genotype and Fitness.",
382
-##                         " Renaming them assuming that is what you wanted")
383
-##                 colnames(x) <- c("Genotype", "Fitness")
384
-##             }
385
-##             if((!omarker) && (!emarker) && (!nogoodepi)) {
386
-##                 message("All single-gene genotypes as input to from_genotype_fitness")
387
-##             }
388
-##             ## Yes, we need to do this to  scale the fitness and put the "-"
389
-##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
390
-##         }
391
-##     }
392
-## }
393
-
394 310
 
395 311
 
396 312
 
... ...
@@ -504,7 +420,13 @@ allGenotypes_to_matrix <- function(x,
504 420
     )
505 421
 
506 422
     all_genes <- sort(unique(unlist(splitted_genots)))
507
-
423
+    if(length(all_genes) < 2) stop(paste("There must be at least two genes (loci)",
424
+                                         "in the fitness effects.",
425
+                                         "If you only care about a case with",
426
+                                         "a single one (or none) enter gene(s)",
427
+                                         "with a fitness effect of zero.",
428
+                                         "For freq.dep.fitness, create another ",
429
+                                         "genotype that always has fitness zero."))
508 430
     m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
509 431
     the_match <- lapply(
510 432
         splitted_genots,
... ...
@@ -522,7 +444,7 @@ allGenotypes_to_matrix <- function(x,
522 444
     if (frequencyDependentFitness) {
523 445
         m <- as.data.frame(m)
524 446
         m[, 1:length(all_genes)] <- apply(
525
-            m[, 1:length(all_genes)],
447
+            m[, 1:length(all_genes), drop = FALSE],
526 448
             2,
527 449
             as.numeric
528 450
         )
... ...
@@ -607,78 +529,6 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
607 529
 }
608 530
 
609 531
 
610
-## Former version, that always tries to give a vector
611
-## and that uses an external executable.
612
-## Magellan_stats and Magellan_draw cannot be tested
613
-## routinely, as they depend on external software
614
-Magellan_stats_former <- function(x, max_num_genotypes = 2000,
615
-                           verbose = FALSE,
616
-                           use_log = TRUE,
617
-                           short = TRUE,
618
-                           fl_statistics = "fl_statistics",
619
-                           replace_missing = FALSE) { # nocov start
620
-    ## I always use
621
-    ## if(!is.null(x) && is.null(file))
622
-    ##     stop("one of object or file name")
623
-    ## if(is.null(file))
624
-    fn <- tempfile()
625
-    fnret <- tempfile()
626
-    if(verbose)
627
-        cat("\n Using input file", fn, " and output file ", fnret, "\n")
628
-
629
-    if(use_log) {
630
-        logarg <- "-l"
631
-    } else {
632
-        logarg <- NULL
633
-    }
634
-    if(short) {
635
-        shortarg <- "-s"
636
-    } else {
637
-        shortarg <- NULL
638
-    }
639
-
640
-    if(replace_missing) {
641
-        zarg <- "-z"
642
-    } else {
643
-        zarg <- NULL
644
-    }
645
-
646
-    to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
647
-    ## newer versions of Magellan provide some extra values to standard output
648
-    call_M <- system2(fl_statistics,
649
-                      args = paste(fn, shortarg, logarg, zarg, "-o", fnret),
650
-                      stdout = NULL)
651
-    if(short) {
652
-        ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
653
-
654
-        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
655
-        ## Make names more explicit, but check we have what we think we have
656
-        ## New versions of Magellan produce different output apparently of variable length
657
-        stopifnot(length(tmp) >= 23) ## 23) ## variable length
658
-        stopifnot(identical(names(tmp)[1:13], ## only some
659
-                            c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
660
-                              "nchains", "nsteps", "nori", "depth", "magn", "sign",
661
-                              "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
662
-        ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
663
-        if(length(tmp) >= 24) ## the new version
664
-            stopifnot(identical(names(tmp)[c(20, 24)],
665
-                                c("steps_m", "mProbOpt_0")))
666
-        ## steps_m: the mean number of steps over the entire landscape to
667
-        ## reach the global optimum
668
-        ## mProbOpt_0: The mean probability to
669
-        ## reach that optimum (again averaged over the entire landscape).
670
-        ## but if there are multiple optima, there are many of these
671
-        names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
672
-                        "r/s","n_chains", "n_steps", "n_origins", "max_depth",
673
-                        "epist_magn", "epist_sign", "epist_rsign")## ,
674
-                        ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
675
-                        ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
676
-        ## "std_dev_pairs", "var_outdegree")
677
-    } else {
678
-        tmp <- readLines(fnret)
679
-    }
680
-    return(tmp)
681
-} # nocov end
682 532
 
683 533
 Magellan_draw <- function(x, max_num_genotypes = 2000,
684 534
                           verbose = FALSE,
... ...
@@ -82,20 +82,11 @@ simOGraph <- function(n, h = ifelse(n >= 4, 4, n),
82 82
 
83 83
     ## Prune to remove indirect connections
84 84
     if(multilevelParent & removeDirectIndirect) {
85
-        ## adjMat <- transitiveReduction(adjMat)
86
-        ## calling transitive.closure not necessary, as that
87
-        ## is called inside transitive.reduction
88
-        ## trm <- nem::transitive.reduction(nem::transitive.closure(adjMat))
89
-
90 85
         ## could use relations package as
91
-        ## r1 <- relations::transitive_reduction(
92
-        ##       relations::transitive_closure(relations::as.relation(adjMat2)))
93
-        ## trm <- relation_incidence(r1)
94 86
         ## but storage mode is double, etc, etc.
95 87
         ## And would need to double check it is working as I think it is.
96 88
         ## For now, trying to use nem's code directly
97
-        
98
-        trm <- nem_transitive.reduction(adjMat)
89
+         trm <- nem_transitive.reduction(adjMat)
99 90
         stopifnot(all(trm %in% c(0L, 1L) ))
100 91
         storage.mode(trm) <- "integer"
101 92
         adjMat <- trm
... ...
@@ -164,72 +155,15 @@ connectIndiv <- function(parents, nparents, n) {
164 155
     return(c(0L,v)) ## added root
165 156
 }
166 157
 
167
-## Not used
168
-## findSuperParents <- function(x, adjMat) {
169
-##     parents <- which(adjMat[, x + 1]  == 1) - 1
170
-##     allP <- findAllParents(x, adjMat)
171
-##     return(setdiff(allP, parents))
172
-## }
173
-
174
-## findAllParents <- function(x, adjMat) {
175
-##     if(x == 0)
176
-##         return(NULL)
177
-##     else{
178
-##         p <- which(adjMat[, x + 1] == 1) - 1
179
-##         p1 <- unlist(lapply(p, function(x) findAllParents(x, adjMat)))
180
-##         return(c(p, p1))
181
-##     }
182
-## }
183
-
184
-## repeatedParents <- function(x, adjMat) {
185
-##     ap <- findAllParents(x, adjMat)
186
-##     dups <- duplicated(ap)
187
-##     dupP <- setdiff(ap[dups], 0)
188
-##     dupP
189
-## }
190
-
191
-
192
-## ## ## But this only works if a special order in the rows and columns
193
-## ## ## and will not work with the root row.
194
-## ## m1 <- matrix(0, ncol = 5, nrow = 5); colnames(m1) <- rownames(m1) <- LETTERS[1:5]
195
-## ## for(i in 1:4) m1[i, i+1] <- 1
196
-## ## library(ggm)
197
-## ## m1tc <- ggm::transClos(m1)
198
-## ## transitiveReduction(m1tc)
199
-
200
-
201 158
 ## ## Other R and BioC packages that will do transitive reduction:
202 159
 ## ## nem (BioC): works with adjacency matrices directly
203 160
 ## ## rBiopaxParser (BioC): a wrapper to nem
204 161
 ## ## rPref
205 162
 ## ## relations
206 163
 ## ## hasseDiagram
207
-
208
-## transitiveReduction <- function(adjMat) {
209
-##     ## Return the transitive reduction
210
-
211 164
 ## But note my bug report to BioC,
212
-
213 165
 ## https://support.bioconductor.org/p/91695/
214
-
215 166
 ## See discussion and comments on
216 167
 ## http://stackoverflow.com/a/6702198 
217 168
 ## and comments on http://stackoverflow.com/a/2372202
218 169
 ## So one need to do the transitive closure first.
219
-    
220
-##     ## We remove the direct connections. How? We search, for each node,
221
-##     ## for the set of all parents/grandparents/grandgrandparents/etc. If
222
-##     ## any of those ancestors is repeated, it means you go from that
223
-##     ## ancestor to the node in question through at least two different
224
-##     ## routes. Thus, ensure the direct is 0 (it might already be, no
225
-##     ## problem). Once you do that, you know there are not both indirect
226
-##     ## AND direct connections and thus you have the transitive reduction.
227
-##     for(i in ncol(adjMat):2) {
228
-##         dp <- repeatedParents( i - 1, adjMat)
229
-##         if(length(dp))
230
-##             adjMat[cbind(dp + 1, i)] <- 0L
231
-##     }
232
-##     return(adjMat)
233
-## }
234
-
235
-
... ...
@@ -1644,7 +1644,7 @@ nr_oncoSimul.internal <- function(rFE,
1644 1644
                    "allFitnessEffects"))
1645 1645
 
1646 1646
     if(countGenesFe(rFE) < 2) {
1647
-        stop("There must be at least two genes (loci) in the fitness effects.",
1647
+        stop("There must be at least two genes (loci) in the fitness effects. ",
1648 1648
              "If you only care about a degenerate case with just one,",
1649 1649
              "you can enter a second gene (locus)",
1650 1650
              "with fitness effect of zero.")
... ...
@@ -2249,702 +2249,6 @@ genotype_letterslabel <- function(df) {
2249 2249
 
2250 2250
 
2251 2251
 
2252
-## emptyFitnessEffects <- function() {
2253
-##     list(long.rt = list(),
2254
-##          long.epistasis = list(),
2255
-##          long.orderEffects = list(),
2256
-##          long.geneNoInt = list(),
2257
-##          geneModule = list(),
2258
-##          gMOneToOne = TRUE,
2259
-##          geneToModule = list(),
2260
-##          graph = NULL,
2261
-##          drv = vector(mode = "integer", length = 0)
2262
-##          )
2263
-## }
2264
-
2265
-
2266
-
2267
-### Later, for all the effects, we will do some kind of dplyr match?
2268
-
2269
-### a, b, c, in fitness, only a, c in mut.
2270
-### fitness table for a,b,c
2271
-### each row name transformed removing b (so leaving only present)
2272
-### each row transformed matched to row in mut table.
2273
-
2274
-## t1 <- data.frame(v1 = c("a,b", "a,c", "b"), v2 = c("b", "c", "b"), v3 = 1:3, stringsAsFactors = FALSE)
2275
-## t2 <- data.frame(v2 = c("b", "c"), v4 = c(11, 12), stringsAsFactors = FALSE)
2276
-## full_join(t1, t2, by = "v2")
2277
-
2278
-
2279
-
2280
-## FIXME
2281
-
2282
-## new exit code
2283
-## check:
2284
-## baseline < n2
2285
-## p2 < 1
2286
-
2287
-## baseline defaults to init size + .1
2288
-## use c < -9, n2 < -9, p2 < -9 for no values
2289
-## and check one of c or p2 and n2 are valid if using exit
2290
-
2291
-
2292
-
2293
-
2294
-
2295
-
2296
-
2297
-
2298
-
2299
-
2300
-## fVariablesL <- function (g, frequencyType) {
2301
-
2302
-##   if (is.null(g) | g == 0)
2303
-##     stop("Number of genes must be integer > 0")
2304
-
2305
-##   if(g > length(LETTERS))
2306
-##     stop(paste0("Number of genes must be < length(LETTERS).",
2307
-##                 " Please specify variables with numbers"))
2308
-
2309
-##   combinationsList <- list()
2310
-##   for (i in 0:g) {
2311
-##     combinationsList <- append(combinationsList,
2312
-##                                combn(LETTERS[1:g], i, list, simplify = TRUE))
2313
-##   }
2314
-
2315
-##   if (frequencyType == "abs"){
2316
-##     fsVector <-sapply(sapply(combinationsList,
2317
-##                              function(x) paste0(x, collapse = "_")),
2318
-##                       function(x) paste0("n_", x))
2319
-##   }else{
2320
-##     fsVector <-sapply(sapply(combinationsList,
2321
-##                              function(x) paste0(x, collapse = "_")),
2322
-##                       function(x) paste0("f_", x))
2323
-##   }
2324
-
2325
-##   return (fsVector)
2326
-## }
2327
-
2328
-
2329
-## ## Assuming we are using the full fitness landscapes (i.e., none of
2330
-## ## setting genotypes with 0 fitness are absent from the table)
2331
-## conversionTable <- function(g, frequencyType){
2332
-##   df <- data.frame(let = fVariablesL(g, frequencyType)[-1],
2333
-##                    num = fVariablesN(g, frequencyType)[-1],
2334
-##                    stringsAsFactors = FALSE)
2335
-##   return (df)
2336
-## }
2337
-
2338
-## findAndReplace <- function(str, conversionTable_input){
2339
-
2340
-##   pattern <- rev(setNames(as.character(conversionTable_input$num),
2341
-##                       conversionTable_input$let))
2342
-
2343
-##   str <- stringr::str_replace_all(string = str,
2344
-##                                   pattern = pattern)
2345
-##   return(str)
2346
-## }
2347
-
2348
-
2349
-
2350
-## Former version, with fitness landscape
2351
-## allFitnessORMutatorEffects <- function(rT = NULL,
2352
-##                                        epistasis = NULL,
2353
-##                                        orderEffects = NULL,
2354
-##                                        noIntGenes = NULL,
2355
-##                                        geneToModule = NULL,
2356
-##                                        drvNames = NULL,
2357
-##                                        keepInput = TRUE,
2358
-##                                        ## refFE = NULL,
2359
-##                                        calledBy = NULL) {
2360
-##     ## From allFitnessEffects. Generalized so we deal with Fitness
2361
-##     ## and mutator.
2362
-
2363
-##     ## restrictions: the usual rt
2364
-
2365
-##     ## epistasis: as it says, with the ":"
2366
-
2367
-##     ## orderEffects: the ">"
2368
-
2369
-##     ## All of the above can be genes or can be modules (if you pass a
2370
-##     ## geneToModule)
2371
-
2372
-##     ## rest: rest of genes, with fitness
2373
-
2374
-
2375
-##     ## For epistasis and order effects we create the output object but
2376
-##     ## missing the numeric ids of genes. With rT we do it in one go, as we
2377
-##     ## already know the mapping of genes to numeric ids. We could do the
2378
-##     ## same in epistasis and order, but we would be splitting twice
2379
-##     ## (whereas for rT extracting the names is very simple).
2380
-
2381
-##     ## called appropriately?
2382
-##     if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
2383
-##         stop("How did you call this function?. Bug.")
2384
-
2385
-##     if(calledBy == "allMutatorEffects") {
2386
-##         ## very paranoid check
2387
-##         if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
2388
-##             stop("allMutatorEffects called with forbidden arguments.",
2389
-##                  "Is this an attempt to subvert the function?")
2390
-##     }
2391
-
2392
-##     rtNames <- NULL
2393
-##     epiNames <- NULL
2394
-##     orNames <- NULL
2395
-##     if(!is.null(rT)) {
2396
-##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
2397
-##         rT$parent <- as.character(rT$parent)
2398
-##         rT$child <- as.character(rT$child)
2399
-##         rT$typeDep <- as.character(rT$typeDep)
2400
-##         rtNames <- unique(c(rT$parent, rT$child))
2401
-##     }
2402
-##     if(!is.null(epistasis)) {
2403
-##         long.epistasis <- to.long.epist.order(epistasis, ":")
2404
-##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
2405
-##         ## deal with the possible negative signs
2406
-##         epiNames <- setdiff(unique(
2407
-##             unlist(lapply(long.epistasis,
2408
-##                           function(x) lapply(x$ids,
2409
-##                                              function(z) strsplit(z, "^-"))))),
2410
-##                             "")
2411
-##     } else {
2412
-##         long.epistasis <- list()
2413
-##     }
2414
-##     if(!is.null(orderEffects)) {
2415
-##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
2416
-##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
2417
-##     } else {
2418
-##         long.orderEffects <- list()
2419
-##     }
2420
-##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
2421
-##     if(is.null(geneToModule)) {
2422
-##         gMOneToOne <- TRUE
2423
-##         geneToModule <- geneModuleNull(allModuleNames)
2424
-##     } else {
2425
-##         gMOneToOne <- FALSE
2426
-##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
2427
-##             stop(paste("Some values in geneToModule not present in any of",
2428
-##                        " rT, epistasis, or order effects"))
2429
-##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
2430
-##             stop(paste("Some values in rT, epistasis, ",
2431
-##                        "or order effects not in geneToModule"))
2432
-##     }
2433
-##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
2434
-
2435
-##     idm <- unique(geneModule$ModuleNumID)
2436
-##     names(idm) <- unique(geneModule$Module)
2437
-
2438
-##     if(!is.null(rT)) {
2439
-##         checkRT(rT)
2440
-##         long.rt <- to.long.rt(rT, idm)
2441
-##     } else {
2442
-##         long.rt <- list() ## yes, we want an object of length 0
2443
-##     }
2444
-
2445
-##     ## Append the numeric ids to epistasis and order
2446
-##     if(!is.null(epistasis)) {
2447
-##         long.epistasis <- lapply(long.epistasis,
2448
-##                                  function(x)
2449
-##                                      addIntID.epist.order(x, idm,
2450
-##                                                           sort = TRUE,
2451
-##                                                           sign = TRUE))
2452
-##     }
2453
-##     if(!is.null(orderEffects)) {
2454
-##         long.orderEffects <- lapply(long.orderEffects,
2455
-##                                     function(x)
2456
-##                                         addIntID.epist.order(x, idm,
2457
-##                                                              sort = FALSE,
2458
-##                                                              sign = FALSE))
2459
-##     }
2460
-
2461
-##     if(!is.null(noIntGenes)) {
2462
-##         if(inherits(noIntGenes, "character")) {
2463
-##             wm <- paste("noIntGenes is a character vector.",
2464
-##                         "This is probably not what you want, and will",
2465
-##                         "likely result in an error downstream.",
2466
-##                         "You can get messages like",
2467
-##                         " 'not compatible with requested type', and others.",
2468
-##                         "We are stopping.")
2469
-##             stop(wm)
2470
-##         }
2471
-
2472
-##         mg <- max(geneModule[, "GeneNumID"])
2473
-##         gnum <- seq_along(noIntGenes) + mg
2474
-##         if(!is.null(names(noIntGenes))) {
2475
-##             ng <- names(noIntGenes)
2476
-##             if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE)
2477
-##                 || grepl(":", ng, fixed = TRUE))
2478
-##                 stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
2479
-##             if(any(ng %in% geneModule[, "Gene"] ))
2480
-##                 stop("A gene in noIntGenes also present in the other terms")
2481
-##             if(any(duplicated(ng)))
2482
-##                 stop("Duplicated gene names in geneNoInt")
2483
-##             if(any(is.na(ng)))
2484
-##                 stop("In noIntGenes some genes have names, some don't.",
2485
-##                      " Name all of them, or name none of them.")
2486
-##         } else {
2487
-##             ng <- gnum
2488
-##         }
2489
-##         geneNoInt <- data.frame(Gene = as.character(ng),
2490
-##                                 GeneNumID = gnum,
2491
-##                                 s = noIntGenes,
2492
-##                                 stringsAsFactors = FALSE)
2493
-##     } else {
2494
-##         geneNoInt <- data.frame()
2495
-##     }
2496
-
2497
-##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
2498
-##              nrow(geneNoInt)) == 0)
2499
-##         stop("You have specified nothing!")
2500
-
2501
-##     if(calledBy == "allFitnessEffects") {
2502
-##         if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
2503
-##             graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
2504
-##         } else {
2505
-##             graphE <- NULL
2506
-##         }
2507
-##     } else {
2508
-##         graphE <- NULL
2509
-##     }
2510
-##     if(!is.null(drvNames)) {
2511
-##         drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
2512
-##         ## drivers should never be in the geneNoInt; Why!!!???
2513
-##         ## Catch the problem. This is an overkill,
2514
-##         ## so since we catch the issue, we could leave the geneNoInt. But
2515
-##         ## that should not be there in this call.
2516
-##         ## if(any(drvNames %in% geneNoInt$Gene)) {
2517
-##         ##     stop(paste("At least one gene in drvNames is a geneNoInt gene.",
2518
-##         ##                "That is not allowed.",
2519
-##         ##                "If that gene is a driver, pass it as gene in the epistasis",
2520
-##         ##                "component."))
2521
-##         ## }
2522
-##         ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
2523
-##     } else {
2524
-##         ## we used to have this default
2525
-##         ## drv <- geneModule$GeneNumID[-1]
2526
-##         drv <- vector(mode = "integer", length = 0L)
2527
-##     }
2528
-
2529
-##     if(!keepInput) {
2530
-##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
2531
-##     }
2532
-##     out <- list(long.rt = long.rt,
2533
-##                 long.epistasis = long.epistasis,
2534
-##                 long.orderEffects = long.orderEffects,
2535