Browse code

v. 2.99.9: fixed one-gene cases, nem in Readme, typos; removin unused code and v.1 from long tests

ramon diaz-uriarte (at Phelsuma) authored on 22/04/2021 10:27:49
Showing20 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 Package: OncoSimulR
2 2
 Type: Package
3 3
 Title: Forward Genetic Simulation of Cancer Progression with Epistasis 
4
-Version: 2.99.7
5
-Date: 2020-12-30
4
+Version: 2.99.9
5
+Date: 2021-04-22
6 6
 Authors@R: c(
7 7
 	      person("Ramon", "Diaz-Uriarte", role = c("aut", "cre"),	
8 8
  	   		     email = "rdiaz02@gmail.com"),
... ...
@@ -57,9 +57,6 @@ oncoSimulSample <- function(Nindiv,
57 57
                             max.memory = 2000,
58 58
                             max.wall.time.total = 600,
59 59
                             max.num.tries.total = 500 * Nindiv,
60
-                            ## well, obviously they are errors
61
-                            ## errorHitWallTime = TRUE,
62
-                            ## errorHitMaxTries = TRUE,
63 60
                             typeSample = "whole",
64 61
                             thresholdWhole = 0.5,
65 62
                             initMutant = NULL,
... ...
@@ -458,9 +455,6 @@ oncoSimulIndiv <- function(fp,
458 455
                            keepEvery = sampleEvery,
459 456
                            minDetectDrvCloneSz = "auto",
460 457
                            extraTime = 0,
461
-                           ## used to be this
462
-                           ## ifelse(model \%in\% c("Bozic", "Exp"), -9,
463
-                           ##                     5 * sampleEvery),
464 458
                            finalTime = 0.25 * 25 * 365,
465 459
                            onlyCancer = TRUE,
466 460
                            keepPhylog = FALSE,
... ...
@@ -585,84 +579,6 @@ oncoSimulIndiv <- function(fp,
585 579
     if(is_null_na(finalTime)) finalTime <- Inf
586 580
 
587 581
     if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA")
588
-    
589
-    ## if(!inherits(fp, "fitnessEffects")) {
590
-        ## if(any(unlist(lapply(list(fp, 
591
-        ##                           numPassengers,
592
-        ##                           s, sh), is.null)))) {
593
-        ##     m <- paste("You are using the old poset format.",
594
-        ##                "You must specify all of poset, numPassengers",
595
-        ##                "s, and sh.")
596
-        ##     stop(m)
597
-           
598
-        ## }
599
-        ## if(AND_DrvProbExit) {
600
-        ##     stop("The AND_DrvProbExit = TRUE setting is invalid",
601
-        ##          " with the old poset format.")
602
-        ## }
603
-        ## if(!is.null(muEF))
604
-        ##     stop("Mutator effects cannot be specified with the old poset format.")
605
-        ## if( length(initMutant) > 0)  
606
-        ##     warning("With the old poset format you can no longer use initMutant.",
607
-        ##             " The initMutant you passed will be ignored.")
608
-        ## ## stop("With the old poset, initMutant can only take a single value.")
609
-        ## if(!is_null_na(fixation))
610
-        ##     stop("'fixation' cannot be specified with the old poset format.")
611
-        ## ## Seeding C++ is now much better in new version
612
-        ## if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
613
-        ##     ## name is a legacy. This is really the seed for the C++ generator.
614
-        ##     ## Nope, we cannot use 2^32, because as.integer will fail.
615
-        ##     seed <- as.integer(round(runif(1, min = 0, max = 2^16)))
616
-        ## }
617
-        ## if(verbosity >= 2)
618
-        ##     cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
619
-
620
-        ## if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
621
-        ## ## if(message.v1)
622
-        ## ##     message("You are using the old poset format. Consider using the new one.")
623
-   
624
-    
625
-        ## ## A simulation stops if cancer or finalTime appear, the first
626
-        ## ## one. But if we set onlyCnacer = FALSE, we also accept simuls
627
-        ## ## without cancer (or without anything)
628
-        
629
-        ## op <- try(oncoSimul.internal(poset = fp, ## restrict.table = rt,
630
-        ##                              ## numGenes = numGenes,
631
-        ##                              numPassengers = numPassengers,
632
-        ##                              typeCBN = "CBN",
633
-        ##                              birth = birth,
634
-        ##                              s = s,
635
-        ##                              death = death,  
636
-        ##                              mu =  mu,  
637
-        ##                              initSize =  initSize, 
638
-        ##                              sampleEvery =  sampleEvery,  
639
-        ##                              detectionSize =  detectionSize, 
640
-        ##                              finalTime = finalTime, 
641
-        ##                              initSize_species = 2000, 
642
-        ##                              initSize_iter = 500, 
643
-        ##                              seed = seed, 
644
-        ##                              verbosity = verbosity, 
645
-        ##                              speciesFS = 10000,  
646
-        ##                              ratioForce = 2,
647
-        ##                              typeFitness = typeFitness,
648
-        ##                              max.memory = max.memory,
649
-        ##                              mutationPropGrowth = mutationPropGrowth,                                   
650
-        ##                              initMutant = -1, 
651
-        ##                              max.wall.time = max.wall.time,
652
-        ##                              max.num.tries = max.num.tries,
653
-        ##                              keepEvery = keepEvery,  
654
-        ##                              ## alpha = 0.0015,  
655
-        ##                              sh = sh,
656
-        ##                              K = K, 
657
-        ##                              minDetectDrvCloneSz = minDetectDrvCloneSz,
658
-        ##                              extraTime = extraTime,
659
-        ##                              detectionDrivers = detectionDrivers,
660
-        ##                              onlyCancer = onlyCancer,
661
-        ##                              errorHitWallTime = errorHitWallTime,
662
-        ##                              errorHitMaxTries = errorHitMaxTries),
663
-        ##           silent = !verbosity)
664
-        ## objClass <- "oncosimul"
665
-    ## } else {
666 582
         s <- sh <- NULL ## force it.
667 583
         if(numPassengers != 0)
668 584
             warning(paste("Specifying numPassengers has no effect",
... ...
@@ -934,88 +850,6 @@ plot.oncosimulpop <- function(x, ask = TRUE,
934 850
 }
935 851
 
936 852
 
937
-## plot.oncosimul <- function(x, col = c(8, "orange", 6:1),
938
-##                            log = "y",
939
-##                            ltyClone = 2:6,
940
-##                            lwdClone = 0.9,
941
-##                            ltyDrivers = 1,
942
-##                            lwdDrivers = 3,
943
-##                            xlab = "Time units",
944
-##                            ylab = "Number of cells",
945
-##                            plotClones = TRUE,
946
-##                            plotDrivers = TRUE,
947
-##                            addtot = FALSE,
948
-##                            addtotlwd = 0.5,
949
-##                            yl = NULL,
950
-##                            thinData = FALSE,
951
-##                            thinData.keep = 0.1,
952
-##                            thinData.min = 2,
953
-##                            plotDiversity = FALSE,
954
-##                            ...
955
-##                            ) {
956
-
957
-##     if(thinData)
958
-##         x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
959
-
960
-##     ## uvx
961
-##     if(!inherits(x, "oncosimul2"))
962
-##         ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
963
-##     else {
964
-##         ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
965
-##     }
966
-    
967
-##     if(is.null(yl)) {
968
-##         if(log %in% c("y", "xy", "yx") )
969
-##             yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
970
-##         else
971
-##             yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
972
-##     }
973
-##     if(plotDiversity) {
974
-##         par(fig = c(0, 1, 0.8, 1))
975
-##         m1 <- par()$mar
976
-##         m <- m1
977
-##         m[c(1, 3)] <- c(0, 0.7)
978
-##         op <- par(mar = m )
979
-##         plotShannon(x)
980
-##         par(op)
981
-##         m1[c(3)] <- 0.2
982
-##         op <- par(mar = m1)
983
-##         par(fig = c(0, 1, 0, 0.8), new = TRUE)  
984
-##     }
985
-##     if(plotClones) {
986
-##         plotClones(x,
987
-##                    ndr = ndr, 
988
-##                    xlab = xlab,
989
-##                    ylab = ylab,
990
-##                    lty = ltyClone,
991
-##                    col = col, 
992
-##                    ylim = yl,
993
-##                    lwd = lwdClone,
994
-##                    axes = FALSE,
995
-##                    log = log,
996
-##                    ...)
997
-##     }
998
-
999
-##     if(plotClones && plotDrivers)
1000
-##         par(new = TRUE)
1001
-    
1002
-##     if(plotDrivers){
1003
-##         plotDrivers0(x,
1004
-##                      ndr,
1005
-##                      timescale = 1,
1006
-##                      trim.no.drivers = FALSE,
1007
-##                      xlab = "", ylab = "",
1008
-##                      lwd = lwdDrivers,
1009
-##                      lty = ltyDrivers,
1010
-##                      col = col, 
1011
-##                      addtot = addtot,
1012
-##                      addtotlwd = addtotlwd,
1013
-##                      log = log, ylim = yl,
1014
-##                      ...)
1015
-##     }
1016
-    
1017
-## }
1018
-
1019 853
 
1020 854
 plot.oncosimul <- function(x,
1021 855
                            show = "drivers", 
... ...
@@ -1511,10 +1345,6 @@ plotPoset <- function(x, names = NULL, addroot = FALSE,
1511 1345
         box()
1512 1346
 }
1513 1347
 
1514
-## this function seems to never be used
1515
-## plotAdjMat <- function(adjmat) {
1516
-##     plot(as(adjmat, "graphNEL"))
1517
-## }
1518 1348
 
1519 1349
 
1520 1350
 
... ...
@@ -1591,12 +1421,6 @@ plotClonePhylog <- function(x, N = 1, t = "last",
1591 1421
              "very fast, before any clones beyond the initial were ",
1592 1422
              "generated.")
1593 1423
     pc <- phylogClone(x, N, t, keepEvents)
1594
-    ## if(is.na(pc)) {
1595
-    ##     ## This should not be reachable, as caught before
1596
-    ##     ## where we check for nrow of PhylogDF   
1597
-    ##     warning("No clone phylogeny available. Exiting without plotting.")
1598
-    ##     return(NULL)
1599
-    ## }
1600 1424
         
1601 1425
     l0 <- igraph::layout.reingold.tilford(pc$g)
1602 1426
     if(!timeEvents) {
... ...
@@ -1724,198 +1548,6 @@ get.mut.vector <- function(x, timeSample, typeSample,
1724 1548
 }
1725 1549
 
1726 1550
 
1727
-## get.mut.vector <- function(x, timeSample, typeSample,
1728
-##                            thresholdWhole, popSizeSample) {
1729
-##     if(is.null(x$TotalPopSize)) {
1730
-##         warning(paste("It looks like this simulation never completed.",
1731
-##                       " Maybe it reached maximum allowed limits.",
1732
-##                       " You will get NAs"))
1733
-##         return(rep(NA, length(x$geneNames)))
1734
-##     }
1735
-##     the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
1736
-##     if(the.time < 0) { 
1737
-##         return(rep(NA, nrow(x$Genotypes)))
1738
-##     } 
1739
-##     pop <- x$pops.by.time[the.time, -1]
1740
-    
1741
-##     if(all(pop == 0)) {
1742
-##         stop("You found a bug: this should never happen")
1743
-##     }
1744
-    
1745
-##     if(typeSample %in% c("wholeTumor", "whole")) {
1746
-##         popSize <- x$PerSampleStats[the.time, 1]
1747
-##         return( as.numeric((tcrossprod(pop,
1748
-##                                        x$Genotypes)/popSize) >= thresholdWhole) )
1749
-##     } else if (typeSample %in%  c("singleCell", "single")) {
1750
-
1751
-##         return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
1752
-##     } else {
1753
-##         stop("Unknown typeSample option")
1754
-##     }
1755
-## }
1756
-
1757
-
1758
-
1759
-
1760
-
1761
-
1762
-
1763
-
1764
-
1765
-
1766
-
1767
-
1768
-## oncoSimul.internal <- function(poset, ## restrict.table,
1769
-##                                numPassengers, 
1770
-##                                ## numGenes,
1771
-##                                typeCBN,
1772
-##                                birth, 
1773
-##                                s,
1774
-##                                death,
1775
-##                                mu,
1776
-##                                initSize,
1777
-##                                sampleEvery,
1778
-##                                detectionSize,
1779
-##                                finalTime,
1780
-##                                initSize_species,
1781
-##                                initSize_iter,
1782
-##                                seed,
1783
-##                                verbosity,
1784
-##                                speciesFS,
1785
-##                                ratioForce,
1786
-##                                typeFitness,
1787
-##                                max.memory,
1788
-##                                mutationPropGrowth, ## make it explicit
1789
-##                                initMutant,
1790
-##                                max.wall.time,
1791
-##                                keepEvery,
1792
-##                                alpha,
1793
-##                                sh,                               
1794
-##                                K,
1795
-##                                ## endTimeEvery,
1796
-##                                detectionDrivers,
1797
-##                                onlyCancer,
1798
-##                                errorHitWallTime,
1799
-##                                max.num.tries,
1800
-##                                errorHitMaxTries,
1801
-##                                minDetectDrvCloneSz,
1802
-##                                extraTime) {
1803
-
1804
-##     ## the value of 20000, in megabytes, for max.memory sets a limit of ~ 20 GB
1805
-  
1806
-
1807
-##     ## if(keepEvery < sampleEvery)
1808
-##     ##     warning("setting keepEvery to sampleEvery")
1809
-
1810
-##     ## a backdoor to allow passing restrictionTables directly
1811
-##     if(inherits(poset, "restrictionTable"))
1812
-##         restrict.table <- poset
1813
-##     else
1814
-##         restrict.table <- poset.to.restrictTable(poset)
1815
-##     numDrivers <- nrow(restrict.table)
1816
-##     numGenes <- (numDrivers + numPassengers)
1817
-##     if(numGenes < 2)
1818
-##         stop("There must be at least two genes (loci) in the fitness effects.",
1819
-##              "If you only care about a degenerate case with just one,",
1820
-##              "you can enter a second gene",
1821
-##              "with fitness effect of zero.")
1822
-##     if(numGenes > 64)
1823
-##         stop("Largest possible number of genes (loci) is 64 for version 1.",
1824
-##              "You are strongly encouraged to use the new specification",
1825
-##              "as in version 2.")
1826
-
1827
-##     ## These can never be set by the user
1828
-##     ## if(initSize_species < 10) {
1829
-##     ##     warning("initSize_species too small?")
1830
-##     ## }
1831
-##     ## if(initSize_iter < 100) {
1832
-##     ##     warning("initSize_iter too small?")
1833
-##     ## }
1834
-
1835
-##     ## numDrivers <- nrow(restrict.table)
1836
-##     if(length(unique(restrict.table[, 1])) != numDrivers)
1837
-##         stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)")
1838
-##     ddr <- restrict.table[, 1]
1839
-##     if(any(diff(ddr) != 1))
1840
-##         stop("BAIL OUT NOW:  any(diff(ddr) != 1")
1841
-##     ## sanity checks
1842
-##     if(max(restrict.table[, 1]) != numDrivers)
1843
-##         stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers")
1844
-##     if(numDrivers > numGenes)
1845
-##         stop("BAIL OUT NOW: numDrivers > numGenes")
1846
-    
1847
-##     non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1]
1848
-
1849
-
1850
-
1851
-
1852
-##     ## if( (is.null(endTimeEvery) || (endTimeEvery > 0)) &&
1853
-##     ##    (typeFitness %in% c("bozic1", "exp") )) {
1854
-##     ##     warning(paste("endTimeEvery will take a positive value. ",
1855
-##     ##                   "This will make simulations not stop until the next ",
1856
-##     ##                   "endTimeEvery has been reached. Thus, in simulations ",
1857
-##     ##                   "with very fast growth, simulations can take a long ",
1858
-##     ##                   "time to finish, or can hit the wall time limit. "))
1859
-##     ## }
1860
-##     ## if(is.null(endTimeEvery))
1861
-##     ##     endTimeEvery <- keepEvery
1862
-##     ## if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) )
1863
-##     ##     warning("!(endTimeEvery %% keepEvery)")
1864
-##     ## a sanity check in restricTable, so no neg. indices for the positive deps
1865
-##     neg.deps <- function(x) {
1866
-##         ## checks a row of restrict.table
1867
-##         numdeps <- x[2]
1868
-##         if(numdeps) {
1869
-##             deps <- x[3:(3+numdeps - 1)]
1870
-##             return(any(deps < 0))
1871
-##         } else FALSE
1872
-##     }
1873
-##     if(any(apply(restrict.table, 1, neg.deps)))
1874
-##         stop("BAIL OUT NOW: Negative dependencies in restriction table")
1875
-
1876
-##     ## transpose the table
1877
-##     rtC <- convertRestrictTable(restrict.table)
1878
-
1879
-    
1880
-##     return(c(
1881
-##         BNB_Algo5(restrictTable = rtC,
1882
-##         numDrivers = numDrivers,
1883
-##         numGenes = numGenes,
1884
-##         typeCBN_= typeCBN,
1885
-##         s = s, 
1886
-##         death = death,
1887
-##         mu = mu,
1888
-##         initSize = initSize,
1889
-##         sampleEvery = sampleEvery,
1890
-##         detectionSize = detectionSize,
1891
-##         finalTime = finalTime,
1892
-##         initSp = initSize_species,
1893
-##         initIt = initSize_iter,
1894
-##         seed = seed,
1895
-##         verbosity = verbosity,
1896
-##         speciesFS = speciesFS,
1897
-##         ratioForce = ratioForce,
1898
-##         typeFitness_ = typeFitness,
1899
-##         maxram = max.memory,
1900
-##         mutationPropGrowth = as.integer(mutationPropGrowth),
1901
-##         initMutant = initMutant,
1902
-##         maxWallTime = max.wall.time,
1903
-##         keepEvery = keepEvery,
1904
-##         sh = sh,
1905
-##         K = K,
1906
-##         detectionDrivers = detectionDrivers,
1907
-##         onlyCancer = onlyCancer,
1908
-##         errorHitWallTime = errorHitWallTime,
1909
-##         maxNumTries = max.num.tries,
1910
-##         errorHitMaxTries = errorHitMaxTries,
1911
-##         minDetectDrvCloneSz = minDetectDrvCloneSz,
1912
-##         extraTime = extraTime
1913
-##     ),
1914
-##     NumDrivers = numDrivers
1915
-##              ))
1916
-
1917
-## }
1918
-
1919 1551
 OncoSimulWide2Long <- function(x) {
1920 1552
     ## Put data in long format, for ggplot et al
1921 1553
     
... ...
@@ -1947,23 +1579,6 @@ OncoSimulWide2Long <- function(x) {
1947 1579
 
1948 1580
 
1949 1581
 
1950
-## We are not using this anymore
1951
-## create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
1952
-##     if(tmp$NumClones > 1) {
1953
-##         NumMutations <- apply(tmp$Genotypes, 2, sum)
1954
-##         muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
1955
-##                               t(apply(tmp$pops.by.time[, -c(1),
1956
-##                                                        drop = FALSE], 1,
1957
-##                                       function(x) tapply(x,
1958
-##                                                          NumMutations, sum))))
1959
-##         colnames(muts.by.time)[c(1)] <- "Time"
1960
-##     } else {
1961
-##         muts.by.time <- tmp$pops.by.time
1962
-##     }
1963
-##     return(muts.by.time)
1964
-## } 
1965
-
1966
-
1967 1582
 create.drivers.by.time <- function(tmp, ndr) {
1968 1583
     ## CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum)
1969 1584
     CountNumDrivers <- ndr
... ...
@@ -2084,248 +1699,9 @@ is_null_na <- function(x) {
2084 1699
 
2085 1700
 
2086 1701
 
2087
-
2088
-
2089
-
2090
-
2091
-
2092
-
2093
-
2094
-
2095
-
2096 1702
 ## simpsonI <- function(x) {
2097 1703
 ##     sx <- sum(x)
2098 1704
 ##     p <- x/sx
2099 1705
 ##     p <- p[p > 0]
2100 1706
 ##     return(sum(p^2)))
2101 1707
 ## }
2102
-
2103
-## plotSimpson <- function(z) {
2104
-    
2105
-##     h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
2106
-##                1, shannonI)
2107
-##     plot(x = z$pops.by.time[, 1],
2108
-##          y = h, lty = "l", xlab = "", ylab = "H")
2109
-## }
2110
-
2111
-
2112
-## plotClones <- function(z, ndr = NULL, na.subs = TRUE,
2113
-##                        log = "y", type = "l",
2114
-##                        lty = 1:8, col = 1:9, ...) {
2115
-
2116
-##     ## if given ndr, we order columns based on ndr, so clones with more
2117
-##     ## drivers are plotted last
2118
-
2119
-##     y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
2120
-    
2121
-##     if(na.subs){
2122
-##         y[y == 0] <- NA
2123
-##     }
2124
-##     if(!is.null(ndr)) {
2125
-##         ## could be done above, to avoid creating
2126
-##         ## more copies
2127
-##         oo <- order(ndr)
2128
-##         y <- y[, oo, drop = FALSE]
2129
-##         ndr <- ndr[oo]
2130
-##         col <- col[ndr + 1]
2131
-##     }
2132
-##     matplot(x = z$pops.by.time[, 1],
2133
-##             y = y,
2134
-##             log = log, type = type,
2135
-##             col = col, lty = lty,
2136
-##             ...)
2137
-##     box()
2138
-## }
2139
-
2140
-
2141
-
2142
-
2143
-
2144
-## No longer used
2145
-## rtNoDep <- function(numdrivers) {
2146
-##     ## create a restriction table with no dependencies
2147
-##     x <- matrix(nrow = numdrivers, ncol = 3)
2148
-##     x[, 1] <- 1:numdrivers
2149
-##     x[, 2] <- 0
2150
-##     x[, 3] <- -9
2151
-##     return(x)
2152
-## }
2153
-
2154
-
2155
-## Simulate from generative model. This is a quick function, and is most
2156
-## likely wrong! Never used for anything.
2157
-
2158
-## simposet <- function(poset, p) {
2159
-##     ## if (length(parent.nodes) != length (child.nodes)){
2160
-##     ##     print("An Error Occurred")
2161
-##     ## }
2162
-##     ##    else {
2163
-##     num.genes <- max(poset) - 1 ## as root is not a gene
2164
-##     genotype <-t(c(1, rep(NA, num.genes)))
2165
-##     colnames(genotype) <- as.character(0:num.genes)
2166
-    
2167
-    
2168
-##     poset$runif <- runif(nrow(poset))
2169
-##     ## this.relation.prob.OK could be done outside, but having it inside
2170
-##     ## the loop would allow to use different thresholds for different
2171
-##     ## relationships
2172
-##     for (i in (1:nrow(poset))) {
2173
-##         child <- poset[i, 2]
2174
-##         this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
2175
-##         the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype. 
2176
-##         if (is.na(genotype[child])){
2177
-##             genotype[child] <- this.relation.prob.OK * the.parent  
2178
-##         }
2179
-##         else
2180
-##             genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
2181
-##     }
2182
-##     ##    }
2183
-    
2184
-##     return(genotype)
2185
-## }
2186
-
2187
-
2188
-## to plot and adjacency matrix in this context can do
2189
-## plotPoset(intAdjMatToPoset(adjMat))
2190
-## where intAdjMatToPoset is from best oncotree code: generate-random-trees.
2191
-## No! the above is simpler
2192
-
2193
-
2194
-
2195
-
2196
-## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
2197
-##     ## Obtain, from  results from a simulation run, the vector
2198
-##     ## of 0/1 corresponding to each gene.
2199
-    
2200
-##     ## threshold is the min. proportion for a mutation to be detected
2201
-##     ## We are doing whole tumor sampling here, as in Sprouffske
2202
-
2203
-##     ## timeSample: do we sample at end, or at a time point, chosen
2204
-##     ## randomly, from all those with at least one driver?
2205
-    
2206
-##     if(timeSample == "last") {
2207
-##         if(tmp$TotalPopSize == 0)
2208
-##             warning(paste("Final population size is 0.",
2209
-##                           "Thus, there is nothing to sample with ",
2210
-##                           "sampling last. You will get NAs"))
2211
-##         return(as.numeric(
2212
-##             (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
2213
-##                         tmp$Genotypes)/tmp$TotalPopSize) > threshold))
2214
-##     } else if (timeSample %in% c("uniform", "unif")) {
2215
-##           candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
2216
-          
2217
-##           if (length(candidate.time) == 0) {
2218
-##               warning(paste("There is not a single sampled time",
2219
-##                             "at which there are any mutants.",
2220
-##                             "Thus, no uniform sampling possible.",
2221
-##                             "You will get NAs"))
2222
-##               return(rep(NA, nrow(tmp$Genotypes)))
2223
-##           } else if (length(candidate.time) == 1) {
2224
-##                 the.time <- candidate.time
2225
-##             } else {
2226
-##                   the.time <- sample(candidate.time, 1)
2227
-##               }
2228
-##           pop <- tmp$pops.by.time[the.time, -1]
2229
-##           popSize <- tmp$PerSampleStats[the.time, 1]
2230
-##           ## if(popSize == 0)
2231
-##           ##     warning(paste("Population size at this time is 0.",
2232
-##           ##                   "Thus, there is nothing to sample at this time point.",
2233
-##           ##                   "You will get NAs"))
2234
-##           return( as.numeric((tcrossprod(pop,
2235
-##                                        tmp$Genotypes)/popSize) > threshold) )
2236
-##       }
2237
-## }
2238
-
2239
-
2240
-
2241
-##           the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
2242
-##           if(length(the.time) == 0) {
2243
-##               warning(paste("There are no clones with drivers at any time point.",
2244
-##                             "No uniform sampling possible.",
2245
-##                             "You will get a vector of NAs."))
2246
-##             return(rep(NA, nrow(tmp$Genotypes)))  
2247
-##           }
2248
-## get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
2249
-##     ## No threshold, as single cell.
2250
-
2251
-##     ## timeSample: do we sample at end, or at a time point, chosen
2252
-##     ## randomly, from all those with at least one driver?
2253
-    
2254
-##     if(timeSample == "last") {
2255
-##         the.time <- nrow(tmp$pops.by.time)
2256
-##     } else if (timeSample %in% c("uniform", "unif")) {
2257
-##          candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
2258
-         
2259
-##          if (length(candidate.time) == 0) {
2260
-##              warning(paste("There is not a single sampled time",
2261
-##                            "at which there are any mutants.",
2262
-##                            "Thus, no uniform sampling possible.",
2263
-##                            "You will get NAs"))
2264
-##              return(rep(NA, nrow(tmp$Genotypes)))
2265
-##          } else if (length(candidate.time) == 1) {
2266
-##                the.time <- candidate.time
2267
-##            } else {
2268
-##                  the.time <- sample(candidate.time, 1)
2269
-##              }
2270
-
2271
-##      }
2272
-##     pop <- tmp$pops.by.time[the.time, -1]
2273
-##     ##       popSize <- tmp$PerSampleStats[the.time, 1]
2274
-##     ## genot <- sample(seq_along(pop), 1, prob = pop)
2275
-##     if(all(pop == 0)) {
2276
-##         warning(paste("All clones have a population size of 0",
2277
-##                       "at the chosen time. Nothing to sample.",
2278
-##                       "You will get NAs"))
2279
-##         return(rep(NA, nrow(tmp$Genotypes)))
2280
-##     } else {
2281
-##           return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
2282
-##       }
2283
-## }
2284
-
2285
-
2286
-## get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
2287
-##                            thresholdWhole = 0.5) {
2288
-##     if(typeSample %in% c("wholeTumor", "whole")) {
2289
-##         get.mut.vector.whole(x, timeSample = timeSample,
2290
-##                              threshold = thresholdWhole)
2291
-##     } else if(typeSample %in%  c("singleCell", "single")) {
2292
-##         get.mut.vector.singlecell(x, timeSample = timeSample)
2293
-##     }
2294
-## }
2295
-
2296
-
2297
-
2298
-
2299
-
2300
-## plotClonePhylog <- function(x, timeEvent = FALSE,
2301
-##                             showEvents = TRUE,
2302
-##                             fixOverlap = TRUE) {
2303
-##     if(!inherits(x, "oncosimul2"))
2304
-##         stop("Phylogenetic information is only stored with v >=2")
2305
-##     if(nrow(x$other$PhylogDF) == 0)
2306
-##         stop("It seems you run the simulation with keepPhylog= FALSE")
2307
-##     ## requireNamespace("igraph")
2308
-##     df <- x$other$PhylogDF
2309
-##     if(!showEvents) {
2310
-##         df <- df[!duplicated(df[, c(1, 2)]), ]
2311
-##     }
2312
-##     g <- igraph::graph.data.frame(df)
2313
-##     l0 <- igraph::layout.reingold.tilford(g)
2314
-##     if(!timeEvent) {
2315
-##         plot(g, layout = l0)
2316
-##     } else {
2317
-##         l1 <- l0
2318
-##         indexAppear <- match(V(g)$name, as.character(df[, 2]))
2319
-##         firstAppear <- df$time[indexAppear]
2320
-##         firstAppear[1] <- 0
2321
-##         l1[, 2] <- (max(firstAppear) - firstAppear)
2322
-##         if(fixOverlap) {
2323
-##             dx <- which(duplicated(l1[, 1]))
2324
-##             if(length(dx)) {
2325
-##                 ra <- range(l1[, 1])
2326
-##                 l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
2327
-##             }
2328
-##         }
2329
-##         plot(g, layout = l1)         
2330
-##     }
2331
-## }
... ...
@@ -30,6 +30,3 @@ accessibleGenotypes_former <- function(y, f, numMut, th) {
30 30
     .Call('OncoSimulR_accessibleGenotypes_former', PACKAGE = 'OncoSimulR', y, f, numMut, th)
31 31
 }
32 32
 
33
-## readFitnessEffects <- function(rFE, echo) {
34
-##     invisible(.Call('OncoSimulR_readFitnessEffects', PACKAGE = 'OncoSimulR', rFE, echo))
35
-## }
... ...
@@ -319,65 +319,6 @@ faster_accessible_genotypes_R <- function(x, th) {
319 319
 }
320 320
 
321 321
 
322
-## ## This uses slam, but that is actually slower because
323
-## ## of the assignment
324
-## faster_accessible_genots_slam <- function(x, th = 0) {
325
-
326
-##     ## Given a genotype matrix, return the genotypes that are accessible
327
-##     ## via creating a directed adjacency matrix between genotypes
328
-##     ## connected (i.e., those that differ by gaining one mutation). 0
329
-##     ## means not connected, 1 means connected.
330
-    
331
-##     ## There is a more general function in OncoSimulR that will give the
332
-##     ## fitness difference. But not doing the difference is faster than
333
-##     ## just setting a value, say 1, if all we want is to keep track of
334
-##     ## accessible ones. And by using only 0/1 we can store only an
335
-##     ## integer. And no na.omits, etc. Is too restricted? Yes. But for
336
-##     ## simulations and computing just accessible genotypes, probably a
337
-##     ## hell of a lot faster.
338
-
339
-##     ## Well, this is not incredibly fast either.
340
-    
341
-##     ## Make sure sorted, so ancestors always before descendants
342
-##     rs0 <- rowSums(x[, -ncol(x)])
343
-##     x <- x[order(rs0), ]
344
-##     rm(rs0)
345
-    
346
-##     y <- x[, -ncol(x)]
347
-##     f <- x[, ncol(x)]
348
-##     rs <- rowSums(y)
349
-
350
-##     ## If 0, not accessible
351
-##     adm <- slam::simple_triplet_zero_matrix(nrow = length(rs), ncol = length(rs),
352
-##                                       mode = "integer")
353
-##     for(i in 1:length(rs)) { ## i is the current genotype
354
-##         candidates <- which(rs == (rs[i] + 1))
355
-##         for(j in candidates) {
356
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
357
-##             ## if(sumdiff == 1)
358
-##             if( (sum(abs(y[j, ] - y[i, ])) == 1) &&
359
-##                 ( (f[j] - f[i]) >= th ) )
360
-##                 adm[i, j] <- 1L
361
-##         }
362
-##     }
363
-
364
-##     colnames(adm) <- rownames(adm) <- 1:ncol(adm)
365
-##     admtmp <- adm[, -1, drop = FALSE] ## we do not want the root column.
366
-##     while(TRUE) {
367
-##         ## We remove inaccessible cols (genotypes) and the corresponding
368
-##         ## rows repeatedly until nothing left to remove; any column left
369
-##         ## is therefore accessible throw at least one path.
370
-
371
-##         ## inacc_col <- which(slam::colapply_simple_triplet_matrix(admtmp, FUN = sum) == 0L)
372
-##         inacc_col <- which(slam::col_sums(admtmp) == 0L)
373
-##         if(length(inacc_col) == 0) break;
374
-##         inacc_row <- inacc_col + 1 ## recall root row is left
375
-##         admtmp <- admtmp[-inacc_row, -inacc_col, drop = FALSE]
376
-##     }
377
-##     return(as.numeric(c(colnames(adm)[1], colnames(admtmp))))
378
-## }
379
-
380
-
381 322
 generate_matrix_genotypes <- function(g) {
382 323
     ## FIXME future: do this for order too? Only if rfitness for order.
383 324
     ## Given a number of genes, generate all possible genotypes.
... ...
@@ -470,25 +411,6 @@ genot_to_adj_mat <- function(x) {
470 411
 }
471 412
 
472 413
 
473
-## ## to move above to C++ note that loop can be
474
-## for(i in 1:length(rs)) { ## i is the current genotype
475
-##     for(j in (i:length(rs))) {
476
-##         if(rs[j] > (rs[i] + 1)) break;
477
-##         else if(rs[j] == (rs[i] + 1)) {
478
-##             ## and use here my HammingDistance function
479
-##             ## sumdiff <- sum(abs(y[j, ] - y[i, ]))
480
-##             ## if(sumdiff == 1) adm[i, j] <- (f[j] - f[i])
481
-##             if(HammingDistance(y[j, ], y[i, ]) == 1) adm[i, j] = (f[j] - f[i]);
482
-##             }
483
-##     }
484
-## }
485
-
486
-## actually, all that is already in accessibleGenotypes except for the
487
-## filling up of adm.
488
-
489
-
490
-
491
-
492 414
 
493 415
 peak_valley <- function(x) {
494 416
     ## FIXME: when there are no identical entries, all this
... ...
@@ -307,90 +307,6 @@ to_genotFitness_std <- function(x,
307 307
     return(x)
308 308
 }
309 309
 
310
-## Deprecated after flfast
311
-## to_genotFitness_std is faster and has better error checking
312
-## and is very similar and does not use
313
-## the genot_fitness_to_epistasis, which is not reasonable anymore.
314
-
315
-## from_genotype_fitness <- function(x) {
316
-##     ## Would break with output from allFitnessEffects and
317
-##     ## output from allGenotypeAndMut
318
-
319
-##     ## For the very special and weird case of
320
-##     ## a matrix but only a single gene so with a 0 and 1
321
-##     ## No, this is a silly and meaningless case.
322
-##     ## if( ( ncol(x) == 2 ) && (nrow(x) == 1) && (x[1, 1] == 1) ) {
323
-
324
-##     ## } else  blabla:
325
-
326
-##     if(! (inherits(x, "matrix") || inherits(x, "data.frame")) )
327
-##         stop("Input must inherit from matrix or data.frame.")
328
-
329
-##     ## if((ncol(x) > 2) && !(inherits(x, "matrix"))
330
-##     ##     stop(paste0("Genotype fitness input either two-column data frame",
331
-##     ##          " or a numeric matrix with > 2 columns."))
332
-##     ## if( (ncol(x) > 2) && (nrow(x) == 1) )
333
-##     ##     stop(paste0("It looks like you have a matrix for a single genotype",
334
-##     ##                 " of a single gene. For this degenerate cases use",
335
-##     ##                 " a data frame specification."))
336
-
337
-##     if(ncol(x) > 2) {
338
-##         if(inherits(x, "matrix")) {
339
-##             if(!is.numeric(x))
340
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
341
-##         } else if(inherits(x, "data.frame")) {
342
-##             if(!all(unlist(lapply(x, is.numeric))))
343
-##                 stop("A genotype fitness matrix/data.frame must be numeric.")
344
-##         }
345
-
346
-##         ## We are expecting here a matrix of 0/1 where columns are genes
347
-##         ## except for the last column, that is Fitness
348
-##         ## Of course, can ONLY work with epistastis, NOT order
349
-##         return(genot_fitness_to_epistasis(x))
350
-##     } else {
351
-##         if(!inherits(x, "data.frame"))
352
-##             stop("genotFitness: if two-column must be data frame")
353
-##         ## Make sure no factors
354
-##         if(is.factor(x[, 1])) x[, 1] <- as.character(x[, 1])
355
-##         ## Make sure no numbers
356
-##         if(any(is.numeric(x[, 1])))
357
-##             stop(paste0("genotFitness: first column of data frame is numeric.",
358
-##                         " Ambiguous and suggests possible error. If sure,",
359
-##                         " enter that column as character"))
360
-
361
-##         omarker <- any(grepl(">", x[, 1], fixed = TRUE))
362
-##         emarker <- any(grepl(",", x[, 1], fixed = TRUE))
363
-##         nogoodepi <- any(grepl(":", x[, 1], fixed = TRUE))
364
-##         ## if(omarker && emarker) stop("Specify only epistasis or order, not both.")
365
-##         if(nogoodepi && emarker) stop("Specify the genotypes separated by a ',', not ':'.")
366
-##         if(nogoodepi && !emarker) stop("Specify the genotypes separated by a ',', not ':'.")
367
-##         ## if(nogoodepi && omarker) stop("If you want order, use '>' and if epistasis ','.")
368
-##         ## if(!omarker && !emarker) stop("You specified neither epistasis nor order")
369
-##         if(omarker) {
370
-##             ## do something. To be completed
371
-##             stop("This code not yet ready")
372
-##             ## You can pass to allFitnessEffects genotype -> fitness mappings that
373
-##             ## involve epistasis and order. But they must have different
374
-##             ## genes. Otherwise, it is not manageable.
375
-##         }
376
-##         if( emarker || ( (!omarker) && (!emarker) && (!nogoodepi)) ) {
377
-##             ## the second case above corresponds to passing just single letter genotypes
378
-##             ## as there is not a single marker
379
-##             x <- x[, c(1, 2), drop = FALSE]
380
-##             if(!all(colnames(x) == c("Genotype", "Fitness"))) {
381
-##                 message("Column names of object not Genotype and Fitness.",
382
-##                         " Renaming them assuming that is what you wanted")
383
-##                 colnames(x) <- c("Genotype", "Fitness")
384
-##             }
385
-##             if((!omarker) && (!emarker) && (!nogoodepi)) {
386
-##                 message("All single-gene genotypes as input to from_genotype_fitness")
387
-##             }
388
-##             ## Yes, we need to do this to  scale the fitness and put the "-"
389
-##             return(genot_fitness_to_epistasis(allGenotypes_to_matrix(x)))
390
-##         }
391
-##     }
392
-## }
393
-
394 310
 
395 311
 
396 312
 
... ...
@@ -504,7 +420,13 @@ allGenotypes_to_matrix <- function(x,
504 420
     )
505 421
 
506 422
     all_genes <- sort(unique(unlist(splitted_genots)))
507
-
423
+    if(length(all_genes) < 2) stop(paste("There must be at least two genes (loci)",
424
+                                         "in the fitness effects.",
425
+                                         "If you only care about a case with",
426
+                                         "a single one (or none) enter gene(s)",
427
+                                         "with a fitness effect of zero.",
428
+                                         "For freq.dep.fitness, create another ",
429
+                                         "genotype that always has fitness zero."))
508 430
     m <- matrix(0, nrow = length(splitted_genots), ncol = length(all_genes))
509 431
     the_match <- lapply(
510 432
         splitted_genots,
... ...
@@ -522,7 +444,7 @@ allGenotypes_to_matrix <- function(x,
522 444
     if (frequencyDependentFitness) {
523 445
         m <- as.data.frame(m)
524 446
         m[, 1:length(all_genes)] <- apply(
525
-            m[, 1:length(all_genes)],
447
+            m[, 1:length(all_genes), drop = FALSE],
526 448
             2,
527 449
             as.numeric
528 450
         )
... ...
@@ -607,78 +529,6 @@ Magellan_stats <- function(x, max_num_genotypes = 2000,
607 529
 }
608 530
 
609 531
 
610
-## Former version, that always tries to give a vector
611
-## and that uses an external executable.
612
-## Magellan_stats and Magellan_draw cannot be tested
613
-## routinely, as they depend on external software
614
-Magellan_stats_former <- function(x, max_num_genotypes = 2000,
615
-                           verbose = FALSE,
616
-                           use_log = TRUE,
617
-                           short = TRUE,
618
-                           fl_statistics = "fl_statistics",
619
-                           replace_missing = FALSE) { # nocov start
620
-    ## I always use
621
-    ## if(!is.null(x) && is.null(file))
622
-    ##     stop("one of object or file name")
623
-    ## if(is.null(file))
624
-    fn <- tempfile()
625
-    fnret <- tempfile()
626
-    if(verbose)
627
-        cat("\n Using input file", fn, " and output file ", fnret, "\n")
628
-
629
-    if(use_log) {
630
-        logarg <- "-l"
631
-    } else {
632
-        logarg <- NULL
633
-    }
634
-    if(short) {
635
-        shortarg <- "-s"
636
-    } else {
637
-        shortarg <- NULL
638
-    }
639
-
640
-    if(replace_missing) {
641
-        zarg <- "-z"
642
-    } else {
643
-        zarg <- NULL
644
-    }
645
-
646
-    to_Magellan(x, fn, max_num_genotypes = max_num_genotypes)
647
-    ## newer versions of Magellan provide some extra values to standard output
648
-    call_M <- system2(fl_statistics,
649
-                      args = paste(fn, shortarg, logarg, zarg, "-o", fnret),
650
-                      stdout = NULL)
651
-    if(short) {
652
-        ## tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[-1])
653
-
654
-        tmp <- as.vector(read.table(fnret, skip = 1, header = TRUE)[c(-1)])
655
-        ## Make names more explicit, but check we have what we think we have
656
-        ## New versions of Magellan produce different output apparently of variable length
657
-        stopifnot(length(tmp) >= 23) ## 23) ## variable length
658
-        stopifnot(identical(names(tmp)[1:13], ## only some
659
-                            c("ngeno", "npeaks", "nsinks", "gamma", "gamma.", "r.s",
660
-                              "nchains", "nsteps", "nori", "depth", "magn", "sign",
661
-                              "rsign"))) ## , "w.1.", "w.2.", "w.3..", "mode_w", "s.1.",
662
-        ## "s.2.", "s.3..", "mode_s", "pairs_s", "outD_v")))
663
-        if(length(tmp) >= 24) ## the new version
664
-            stopifnot(identical(names(tmp)[c(20, 24)],
665
-                                c("steps_m", "mProbOpt_0")))
666
-        ## steps_m: the mean number of steps over the entire landscape to
667
-        ## reach the global optimum
668
-        ## mProbOpt_0: The mean probability to
669
-        ## reach that optimum (again averaged over the entire landscape).
670
-        ## but if there are multiple optima, there are many of these
671
-        names(tmp)[1:13] <- c("n_genotypes", "n_peaks", "n_sinks", "gamma", "gamma_star",
672
-                        "r/s","n_chains", "n_steps", "n_origins", "max_depth",
673
-                        "epist_magn", "epist_sign", "epist_rsign")## ,
674
-                        ## "walsh_coef_1", "walsh_coef_2", "walsh_coef_3", "mode_walsh",
675
-                        ## "synerg_coef_1", "synerg_coef_2", "synerg_coef_3", "mode_synerg",
676
-        ## "std_dev_pairs", "var_outdegree")
677
-    } else {
678
-        tmp <- readLines(fnret)
679
-    }
680
-    return(tmp)
681
-} # nocov end
682 532
 
683 533
 Magellan_draw <- function(x, max_num_genotypes = 2000,
684 534
                           verbose = FALSE,
... ...
@@ -82,20 +82,11 @@ simOGraph <- function(n, h = ifelse(n >= 4, 4, n),
82 82
 
83 83
     ## Prune to remove indirect connections
84 84
     if(multilevelParent & removeDirectIndirect) {
85
-        ## adjMat <- transitiveReduction(adjMat)
86
-        ## calling transitive.closure not necessary, as that
87
-        ## is called inside transitive.reduction
88
-        ## trm <- nem::transitive.reduction(nem::transitive.closure(adjMat))
89
-
90 85
         ## could use relations package as
91
-        ## r1 <- relations::transitive_reduction(
92
-        ##       relations::transitive_closure(relations::as.relation(adjMat2)))
93
-        ## trm <- relation_incidence(r1)
94 86
         ## but storage mode is double, etc, etc.
95 87
         ## And would need to double check it is working as I think it is.
96 88
         ## For now, trying to use nem's code directly
97
-        
98
-        trm <- nem_transitive.reduction(adjMat)
89
+         trm <- nem_transitive.reduction(adjMat)
99 90
         stopifnot(all(trm %in% c(0L, 1L) ))
100 91
         storage.mode(trm) <- "integer"
101 92
         adjMat <- trm
... ...
@@ -164,72 +155,15 @@ connectIndiv <- function(parents, nparents, n) {
164 155
     return(c(0L,v)) ## added root
165 156
 }
166 157
 
167
-## Not used
168
-## findSuperParents <- function(x, adjMat) {
169
-##     parents <- which(adjMat[, x + 1]  == 1) - 1
170
-##     allP <- findAllParents(x, adjMat)
171
-##     return(setdiff(allP, parents))
172
-## }
173
-
174
-## findAllParents <- function(x, adjMat) {
175
-##     if(x == 0)
176
-##         return(NULL)
177
-##     else{
178
-##         p <- which(adjMat[, x + 1] == 1) - 1
179
-##         p1 <- unlist(lapply(p, function(x) findAllParents(x, adjMat)))
180
-##         return(c(p, p1))
181
-##     }
182
-## }
183
-
184
-## repeatedParents <- function(x, adjMat) {
185
-##     ap <- findAllParents(x, adjMat)
186
-##     dups <- duplicated(ap)
187
-##     dupP <- setdiff(ap[dups], 0)
188
-##     dupP
189
-## }
190
-
191
-
192
-## ## ## But this only works if a special order in the rows and columns
193
-## ## ## and will not work with the root row.
194
-## ## m1 <- matrix(0, ncol = 5, nrow = 5); colnames(m1) <- rownames(m1) <- LETTERS[1:5]
195
-## ## for(i in 1:4) m1[i, i+1] <- 1
196
-## ## library(ggm)
197
-## ## m1tc <- ggm::transClos(m1)
198
-## ## transitiveReduction(m1tc)
199
-
200
-
201 158
 ## ## Other R and BioC packages that will do transitive reduction:
202 159
 ## ## nem (BioC): works with adjacency matrices directly
203 160
 ## ## rBiopaxParser (BioC): a wrapper to nem
204 161
 ## ## rPref
205 162
 ## ## relations
206 163
 ## ## hasseDiagram
207
-
208
-## transitiveReduction <- function(adjMat) {
209
-##     ## Return the transitive reduction
210
-
211 164
 ## But note my bug report to BioC,
212
-
213 165
 ## https://support.bioconductor.org/p/91695/
214
-
215 166
 ## See discussion and comments on
216 167
 ## http://stackoverflow.com/a/6702198 
217 168
 ## and comments on http://stackoverflow.com/a/2372202
218 169
 ## So one need to do the transitive closure first.
219
-    
220
-##     ## We remove the direct connections. How? We search, for each node,
221
-##     ## for the set of all parents/grandparents/grandgrandparents/etc. If
222
-##     ## any of those ancestors is repeated, it means you go from that
223
-##     ## ancestor to the node in question through at least two different
224
-##     ## routes. Thus, ensure the direct is 0 (it might already be, no
225
-##     ## problem). Once you do that, you know there are not both indirect
226
-##     ## AND direct connections and thus you have the transitive reduction.
227
-##     for(i in ncol(adjMat):2) {
228
-##         dp <- repeatedParents( i - 1, adjMat)
229
-##         if(length(dp))
230
-##             adjMat[cbind(dp + 1, i)] <- 0L
231
-##     }
232
-##     return(adjMat)
233
-## }
234
-
235
-
... ...
@@ -1644,7 +1644,7 @@ nr_oncoSimul.internal <- function(rFE,
1644 1644
                    "allFitnessEffects"))
1645 1645
 
1646 1646
     if(countGenesFe(rFE) < 2) {
1647
-        stop("There must be at least two genes (loci) in the fitness effects.",
1647
+        stop("There must be at least two genes (loci) in the fitness effects. ",
1648 1648
              "If you only care about a degenerate case with just one,",
1649 1649
              "you can enter a second gene (locus)",
1650 1650
              "with fitness effect of zero.")
... ...
@@ -2249,702 +2249,6 @@ genotype_letterslabel <- function(df) {
2249 2249
 
2250 2250
 
2251 2251
 
2252
-## emptyFitnessEffects <- function() {
2253
-##     list(long.rt = list(),
2254
-##          long.epistasis = list(),
2255
-##          long.orderEffects = list(),
2256
-##          long.geneNoInt = list(),
2257
-##          geneModule = list(),
2258
-##          gMOneToOne = TRUE,
2259
-##          geneToModule = list(),
2260
-##          graph = NULL,
2261
-##          drv = vector(mode = "integer", length = 0)
2262
-##          )
2263
-## }
2264
-
2265
-
2266
-
2267
-### Later, for all the effects, we will do some kind of dplyr match?
2268
-
2269
-### a, b, c, in fitness, only a, c in mut.
2270
-### fitness table for a,b,c
2271
-### each row name transformed removing b (so leaving only present)
2272
-### each row transformed matched to row in mut table.
2273
-
2274
-## t1 <- data.frame(v1 = c("a,b", "a,c", "b"), v2 = c("b", "c", "b"), v3 = 1:3, stringsAsFactors = FALSE)
2275
-## t2 <- data.frame(v2 = c("b", "c"), v4 = c(11, 12), stringsAsFactors = FALSE)
2276
-## full_join(t1, t2, by = "v2")
2277
-
2278
-
2279
-
2280
-## FIXME
2281
-
2282
-## new exit code
2283
-## check:
2284
-## baseline < n2
2285
-## p2 < 1
2286
-
2287
-## baseline defaults to init size + .1
2288
-## use c < -9, n2 < -9, p2 < -9 for no values
2289
-## and check one of c or p2 and n2 are valid if using exit
2290
-
2291
-
2292
-
2293
-
2294
-
2295
-
2296
-
2297
-
2298
-
2299
-
2300
-## fVariablesL <- function (g, frequencyType) {
2301
-
2302
-##   if (is.null(g) | g == 0)
2303
-##     stop("Number of genes must be integer > 0")
2304
-
2305
-##   if(g > length(LETTERS))
2306
-##     stop(paste0("Number of genes must be < length(LETTERS).",
2307
-##                 " Please specify variables with numbers"))
2308
-
2309
-##   combinationsList <- list()
2310
-##   for (i in 0:g) {
2311
-##     combinationsList <- append(combinationsList,
2312
-##                                combn(LETTERS[1:g], i, list, simplify = TRUE))
2313
-##   }
2314
-
2315
-##   if (frequencyType == "abs"){
2316
-##     fsVector <-sapply(sapply(combinationsList,
2317
-##                              function(x) paste0(x, collapse = "_")),
2318
-##                       function(x) paste0("n_", x))
2319
-##   }else{
2320
-##     fsVector <-sapply(sapply(combinationsList,
2321
-##                              function(x) paste0(x, collapse = "_")),
2322
-##                       function(x) paste0("f_", x))
2323
-##   }
2324
-
2325
-##   return (fsVector)
2326
-## }
2327
-
2328
-
2329
-## ## Assuming we are using the full fitness landscapes (i.e., none of
2330
-## ## setting genotypes with 0 fitness are absent from the table)
2331
-## conversionTable <- function(g, frequencyType){
2332
-##   df <- data.frame(let = fVariablesL(g, frequencyType)[-1],
2333
-##                    num = fVariablesN(g, frequencyType)[-1],
2334
-##                    stringsAsFactors = FALSE)
2335
-##   return (df)
2336
-## }
2337
-
2338
-## findAndReplace <- function(str, conversionTable_input){
2339
-
2340
-##   pattern <- rev(setNames(as.character(conversionTable_input$num),
2341
-##                       conversionTable_input$let))
2342
-
2343
-##   str <- stringr::str_replace_all(string = str,
2344
-##                                   pattern = pattern)
2345
-##   return(str)
2346
-## }
2347
-
2348
-
2349
-
2350
-## Former version, with fitness landscape
2351
-## allFitnessORMutatorEffects <- function(rT = NULL,
2352
-##                                        epistasis = NULL,
2353
-##                                        orderEffects = NULL,
2354
-##                                        noIntGenes = NULL,
2355
-##                                        geneToModule = NULL,
2356
-##                                        drvNames = NULL,
2357
-##                                        keepInput = TRUE,
2358
-##                                        ## refFE = NULL,
2359
-##                                        calledBy = NULL) {
2360
-##     ## From allFitnessEffects. Generalized so we deal with Fitness
2361
-##     ## and mutator.
2362
-
2363
-##     ## restrictions: the usual rt
2364
-
2365
-##     ## epistasis: as it says, with the ":"
2366
-
2367
-##     ## orderEffects: the ">"
2368
-
2369
-##     ## All of the above can be genes or can be modules (if you pass a
2370
-##     ## geneToModule)
2371
-
2372
-##     ## rest: rest of genes, with fitness
2373
-
2374
-
2375
-##     ## For epistasis and order effects we create the output object but
2376
-##     ## missing the numeric ids of genes. With rT we do it in one go, as we
2377
-##     ## already know the mapping of genes to numeric ids. We could do the
2378
-##     ## same in epistasis and order, but we would be splitting twice
2379
-##     ## (whereas for rT extracting the names is very simple).
2380
-
2381
-##     ## called appropriately?
2382
-##     if( !(calledBy %in% c("allFitnessEffects", "allMutatorEffects") ))
2383
-##         stop("How did you call this function?. Bug.")
2384
-
2385
-##     if(calledBy == "allMutatorEffects") {
2386
-##         ## very paranoid check
2387
-##         if( !is.null(rT) || !is.null(orderEffects) || !is.null(drvNames))
2388
-##             stop("allMutatorEffects called with forbidden arguments.",
2389
-##                  "Is this an attempt to subvert the function?")
2390
-##     }
2391
-
2392
-##     rtNames <- NULL
2393
-##     epiNames <- NULL
2394
-##     orNames <- NULL
2395
-##     if(!is.null(rT)) {
2396
-##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
2397
-##         rT$parent <- as.character(rT$parent)
2398
-##         rT$child <- as.character(rT$child)
2399
-##         rT$typeDep <- as.character(rT$typeDep)
2400
-##         rtNames <- unique(c(rT$parent, rT$child))
2401
-##     }
2402
-##     if(!is.null(epistasis)) {
2403
-##         long.epistasis <- to.long.epist.order(epistasis, ":")
2404
-##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
2405
-##         ## deal with the possible negative signs
2406
-##         epiNames <- setdiff(unique(
2407
-##             unlist(lapply(long.epistasis,
2408
-##                           function(x) lapply(x$ids,
2409
-##                                              function(z) strsplit(z, "^-"))))),
2410
-##                             "")
2411
-##     } else {
2412
-##         long.epistasis <- list()
2413
-##     }
2414
-##     if(!is.null(orderEffects)) {
2415
-##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
2416
-##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
2417
-##     } else {
2418
-##         long.orderEffects <- list()
2419
-##     }
2420
-##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
2421
-##     if(is.null(geneToModule)) {
2422
-##         gMOneToOne <- TRUE
2423
-##         geneToModule <- geneModuleNull(allModuleNames)
2424
-##     } else {
2425
-##         gMOneToOne <- FALSE
2426
-##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
2427
-##             stop(paste("Some values in geneToModule not present in any of",
2428
-##                        " rT, epistasis, or order effects"))
2429
-##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
2430
-##             stop(paste("Some values in rT, epistasis, ",
2431
-##                        "or order effects not in geneToModule"))
2432
-##     }
2433
-##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
2434
-
2435
-##     idm <- unique(geneModule$ModuleNumID)
2436
-##     names(idm) <- unique(geneModule$Module)
2437
-
2438
-##     if(!is.null(rT)) {
2439
-##         checkRT(rT)
2440
-##         long.rt <- to.long.rt(rT, idm)
2441
-##     } else {
2442
-##         long.rt <- list() ## yes, we want an object of length 0
2443
-##     }
2444
-
2445
-##     ## Append the numeric ids to epistasis and order
2446
-##     if(!is.null(epistasis)) {
2447
-##         long.epistasis <- lapply(long.epistasis,
2448
-##                                  function(x)
2449
-##                                      addIntID.epist.order(x, idm,
2450
-##                                                           sort = TRUE,
2451
-##                                                           sign = TRUE))
2452
-##     }
2453
-##     if(!is.null(orderEffects)) {
2454
-##         long.orderEffects <- lapply(long.orderEffects,
2455
-##                                     function(x)
2456
-##                                         addIntID.epist.order(x, idm,
2457
-##                                                              sort = FALSE,
2458
-##                                                              sign = FALSE))
2459
-##     }
2460
-
2461
-##     if(!is.null(noIntGenes)) {
2462
-##         if(inherits(noIntGenes, "character")) {
2463
-##             wm <- paste("noIntGenes is a character vector.",
2464
-##                         "This is probably not what you want, and will",
2465
-##                         "likely result in an error downstream.",
2466
-##                         "You can get messages like",
2467
-##                         " 'not compatible with requested type', and others.",
2468
-##                         "We are stopping.")
2469
-##             stop(wm)
2470
-##         }
2471
-
2472
-##         mg <- max(geneModule[, "GeneNumID"])
2473
-##         gnum <- seq_along(noIntGenes) + mg
2474
-##         if(!is.null(names(noIntGenes))) {
2475
-##             ng <- names(noIntGenes)
2476
-##             if( grepl(",", ng, fixed = TRUE) || grepl(">", ng, fixed = TRUE)
2477
-##                 || grepl(":", ng, fixed = TRUE))
2478
-##                 stop("The name of some noIntGenes contain a ',' or a '>' or a ':'")
2479
-##             if(any(ng %in% geneModule[, "Gene"] ))
2480
-##                 stop("A gene in noIntGenes also present in the other terms")
2481
-##             if(any(duplicated(ng)))
2482
-##                 stop("Duplicated gene names in geneNoInt")
2483
-##             if(any(is.na(ng)))
2484
-##                 stop("In noIntGenes some genes have names, some don't.",
2485
-##                      " Name all of them, or name none of them.")
2486
-##         } else {
2487
-##             ng <- gnum
2488
-##         }
2489
-##         geneNoInt <- data.frame(Gene = as.character(ng),
2490
-##                                 GeneNumID = gnum,
2491
-##                                 s = noIntGenes,
2492
-##                                 stringsAsFactors = FALSE)
2493
-##     } else {
2494
-##         geneNoInt <- data.frame()
2495
-##     }
2496
-
2497
-##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
2498
-##              nrow(geneNoInt)) == 0)
2499
-##         stop("You have specified nothing!")
2500
-
2501
-##     if(calledBy == "allFitnessEffects") {
2502
-##         if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
2503
-##             graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
2504
-##         } else {
2505
-##             graphE <- NULL
2506
-##         }
2507
-##     } else {
2508
-##         graphE <- NULL
2509
-##     }
2510
-##     if(!is.null(drvNames)) {
2511
-##         drv <- unique(getGeneIDNum(geneModule, geneNoInt, drvNames))
2512
-##         ## drivers should never be in the geneNoInt; Why!!!???
2513
-##         ## Catch the problem. This is an overkill,
2514
-##         ## so since we catch the issue, we could leave the geneNoInt. But
2515
-##         ## that should not be there in this call.
2516
-##         ## if(any(drvNames %in% geneNoInt$Gene)) {
2517
-##         ##     stop(paste("At least one gene in drvNames is a geneNoInt gene.",
2518
-##         ##                "That is not allowed.",
2519
-##         ##                "If that gene is a driver, pass it as gene in the epistasis",
2520
-##         ##                "component."))
2521
-##         ## }
2522
-##         ## drv <- getGeneIDNum(geneModule, NULL, drvNames)
2523
-##     } else {
2524
-##         ## we used to have this default
2525
-##         ## drv <- geneModule$GeneNumID[-1]
2526
-##         drv <- vector(mode = "integer", length = 0L)
2527
-##     }
2528
-
2529
-##     if(!keepInput) {
2530
-##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
2531
-##     }
2532
-##     out <- list(long.rt = long.rt,
2533
-##                 long.epistasis = long.epistasis,
2534
-##                 long.orderEffects = long.orderEffects,
2535
-##                 long.geneNoInt = geneNoInt,
2536
-##                 geneModule = geneModule,
2537
-##                 gMOneToOne = gMOneToOne,
2538
-##                 geneToModule = geneToModule,
2539
-##                 graph = graphE,
2540
-##                 drv = drv,
2541
-##                 rT = rT,
2542
-##                 epistasis = epistasis,
2543
-##                 orderEffects = orderEffects,
2544
-##                 noIntGenes = noIntGenes
2545
-##                 )
2546
-##     if(calledBy == "allFitnessEffects") {
2547
-##         class(out) <- c("fitnessEffects")
2548
-##     } else if(calledBy == "allMutatorEffects") {
2549
-##         class(out) <- c("mutatorEffects")
2550
-##     }
2551
-##     return(out)
2552
-## }
2553
-
2554
-
2555
-
2556
-## Former version
2557
-## allFitnessEffects <- function(rT = NULL,
2558
-##                               epistasis = NULL,
2559
-##                               orderEffects = NULL,
2560
-##                               noIntGenes = NULL,
2561
-##                               geneToModule = NULL,
2562
-##                               drvNames = NULL,
2563
-##                               genotFitness = NULL,
2564
-##                               keepInput = TRUE) {
2565
-
2566
-##     if(!is.null(genotFitness)) {
2567
-##         if(!is.null(rT) || !is.null(epistasis) ||
2568
-##            !is.null(orderEffects) || !is.null(noIntGenes) ||
2569
-##            !is.null(geneToModule)) {
2570
-##             stop("You have a non-null genotFitness.",
2571
-##                  " If you pass the complete genotype to fitness mapping",
2572
-##                  " you cannot pass any of rT, epistasis, orderEffects",
2573
-##                  " noIntGenes or geneToModule.")
2574
-##         }
2575
-##         epistasis <- from_genotype_fitness(genotFitness)
2576
-##     }
2577
-##     allFitnessORMutatorEffects(
2578
-##         rT = rT,
2579
-##         epistasis = epistasis,
2580
-##         orderEffects = orderEffects,
2581
-##         noIntGenes = noIntGenes,
2582
-##         geneToModule = geneToModule,
2583
-##         drvNames = drvNames,
2584
-##         keepInput = keepInput,
2585
-##         calledBy = "allFitnessEffects")
2586
-## }
2587
-
2588
-## allFitnessEffects <- function(rT = NULL,
2589
-##                               epistasis = NULL,
2590
-##                               orderEffects = NULL,
2591
-##                               noIntGenes = NULL,
2592
-##                               geneToModule = NULL,
2593
-##                               drvNames = NULL,
2594
-##                               keepInput = TRUE) {
2595
-##     ## restrictions: the usual rt
2596
-
2597
-##     ## epistasis: as it says, with the ":"
2598
-
2599
-##     ## orderEffects: the ">"
2600
-
2601
-##     ## All of the above can be genes or can be modules (if you pass a
2602
-##     ## geneToModule)
2603
-
2604
-##     ## rest: rest of genes, with fitness
2605
-
2606
-
2607
-##     ## For epistasis and order effects we create the output object but
2608
-##     ## missing the numeric ids of genes. With rT we do it in one go, as we
2609
-##     ## already know the mapping of genes to numeric ids. We could do the
2610
-##     ## same in epistasis and order, but we would be splitting twice
2611
-##     ## (whereas for rT extracting the names is very simple).
2612
-
2613
-
2614
-##     rtNames <- NULL
2615
-##     epiNames <- NULL
2616
-##     orNames <- NULL
2617
-##     if(!is.null(rT)) {
2618
-##         ## This is really ugly, but to prevent the stringsAsFactors I need it here:
2619
-##         rT$parent <- as.character(rT$parent)
2620
-##         rT$child <- as.character(rT$child)
2621
-##         rT$typeDep <- as.character(rT$typeDep)
2622
-##         rtNames <- unique(c(rT$parent, rT$child))
2623
-##     }
2624
-##     if(!is.null(epistasis)) {
2625
-##         long.epistasis <- to.long.epist.order(epistasis, ":")
2626
-##         ## epiNames <- unique(unlist(lapply(long.epistasis, function(x) x$ids)))
2627
-##         ## deal with the possible negative signs
2628
-##         epiNames <- setdiff(unique(
2629
-##             unlist(lapply(long.epistasis,
2630
-##                           function(x) lapply(x$ids,
2631
-##                                              function(z) strsplit(z, "^-"))))),
2632
-##                             "")
2633
-
2634
-##     } else {
2635
-##         long.epistasis <- list()
2636
-##     }
2637
-##     if(!is.null(orderEffects)) {
2638
-##         long.orderEffects <- to.long.epist.order(orderEffects, ">")
2639
-##         orNames <- unique(unlist(lapply(long.orderEffects, function(x) x$ids)))
2640
-##     } else {
2641
-##         long.orderEffects <- list()
2642
-##     }
2643
-##     allModuleNames <- unique(c(rtNames, epiNames, orNames))
2644
-##     if(is.null(geneToModule)) {
2645
-##         gMOneToOne <- TRUE
2646
-##         geneToModule <- geneModuleNull(allModuleNames)
2647
-##     } else {
2648
-##         gMOneToOne <- FALSE
2649
-##         if(any(is.na(match(setdiff(names(geneToModule), "Root"), allModuleNames))))
2650
-##             stop(paste("Some values in geneToModule not present in any of",
2651
-##                        " rT, epistasis, or order effects"))
2652
-##         if(any(is.na(match(allModuleNames, names(geneToModule)))))
2653
-##             stop(paste("Some values in rT, epistasis, ",
2654
-##                        "or order effects not in geneToModule"))
2655
-##     }
2656
-##     geneModule <- gm.to.geneModuleL(geneToModule, one.to.one = gMOneToOne)
2657
-
2658
-##     idm <- unique(geneModule$ModuleNumID)
2659
-##     names(idm) <- unique(geneModule$Module)
2660
-
2661
-##     if(!is.null(rT)) {
2662
-##         checkRT(rT)
2663
-##         long.rt <- to.long.rt(rT, idm)
2664
-##     } else {
2665
-##         long.rt <- list() ## yes, we want an object of length 0
2666
-##     }
2667
-
2668
-##     ## Append the numeric ids to epistasis and order
2669
-##     if(!is.null(epistasis)) {
2670
-##         long.epistasis <- lapply(long.epistasis,
2671
-##                                  function(x)
2672
-##                                      addIntID.epist.order(x, idm,
2673
-##                                                           sort = TRUE,
2674
-##                                                           sign = TRUE))
2675
-##     }
2676
-##     if(!is.null(orderEffects)) {
2677
-##         long.orderEffects <- lapply(long.orderEffects,
2678
-##                                     function(x)
2679
-##                                         addIntID.epist.order(x, idm,
2680
-##                                                              sort = FALSE,
2681
-##                                                              sign = FALSE))
2682
-##     }
2683
-
2684
-##     if(!is.null(noIntGenes)) {
2685
-##         mg <- max(geneModule[, "GeneNumID"])
2686
-##         gnum <- seq_along(noIntGenes) + mg
2687
-##         if(!is.null(names(noIntGenes))) {
2688
-##             ng <- names(noIntGenes)
2689
-##             if(any(ng %in% geneModule[, "Gene"] ))
2690
-##                 stop("A gene in noIntGenes also present in the other terms")
2691
-##         } else {
2692
-##             ng <- gnum
2693
-##         }
2694
-##         geneNoInt <- data.frame(Gene = as.character(ng),
2695
-##                                 GeneNumID = gnum,
2696
-##                                 s = noIntGenes,
2697
-##                                 stringsAsFactors = FALSE)
2698
-##     } else {
2699
-##         geneNoInt <- data.frame()
2700
-##     }
2701
-##     if( (length(long.rt) + length(long.epistasis) + length(long.orderEffects) +
2702
-##              nrow(geneNoInt)) == 0)
2703
-##         stop("You have specified nothing!")
2704
-
2705
-##     if((length(long.rt) + length(long.epistasis) + length(long.orderEffects)) > 1) {
2706
-##         graphE <- fitnessEffectsToIgraph(rT, epistasis, orderEffects)
2707
-##     } else {
2708
-##         graphE <- NULL
2709
-##     }
2710
-
2711
-##     if(!is.null(drvNames)) {
2712
-##         drv <- getGeneIDNum(geneModule, geneNoInt, drvNames)
2713
-##     } else {
2714
-##         drv <- geneModule$GeneNumID[-1]
2715
-##     }
2716
-##     if(!keepInput) {
2717
-##         rT <- epistasis <- orderEffects <- noIntGenes <- NULL
2718
-##     }
2719
-##     out <- list(long.rt = long.rt,
2720
-##                 long.epistasis = long.epistasis,
2721
-##                 long.orderEffects = long.orderEffects,
2722
-##                 long.geneNoInt = geneNoInt,
2723
-##                 geneModule = geneModule,
2724
-##                 gMOneToOne = gMOneToOne,
2725
-##                 geneToModule = geneToModule,
2726
-##                 graph = graphE,
2727
-##                 drv = drv,
2728
-##                 rT = rT,
2729
-##                 epistasis = epistasis,
2730
-##                 orderEffects = orderEffects,
2731
-##                 noIntGenes = noIntGenes
2732
-##                 )
2733
-##     class(out) <- c("fitnessEffects")
2734
-##     return(out)
2735
-## }
2736
-
2737
-
2738
-## No longer used
2739
-## rtAndGeneModule <- function(mdeps, gM = NULL) {
2740
-##     ## To show a table of restrictions when there are modules. Do not use
2741
-##     ## for anything else. Maybe as intermediate to plotting.
2742
-
2743
-##     ## Specify restriction table of modules and a mapping of modules to
2744
-##     ## genes. gM is a named vector; names are modules, values are elements
2745
-##     ## of each module.
2746
-
2747
-##     ## We do nothing important if gM is NULL except checks
2748
-
2749
-##     ## If there are modules, the table shows the individual genes.
2750
-##     checkRT(mdeps)
2751
-##     ## if(ncol(mdeps) != 5)
2752
-##     ##     stop("mdeps must be of exactly 5 columns")
2753
-##     ## if(!identical(colnames(mdeps), c("parent", "child", "s", "sh", "typeDep")))
2754
-##     ##     stop(paste("Column names of mdeps not of appropriate format. ",
2755
-##     ##                "Should be parent, child, s, sh, typeDep"))
2756
-##     if(!is.null(gM)) {
2757
-##         if(any(is.na(match(mdeps[ , 1], names(gM)))))
2758
-##             stop("Some values in parent not from a known module")
2759
-##         if(any(is.na(match(mdeps[ , 2], names(gM)))))
2760
-##             stop("Some values in child not from a known module")
2761
-##         if(any(is.na(match(names(gM), c(mdeps[, 1], mdeps[, 2])))))
2762
-##             stop("Some values in module in neither parent or child")
2763
-
2764
-##         parent <- gM[mdeps[, 1]]
2765
-##         child <- gM[mdeps[, 2]]
2766
-##         df <- data.frame(parent = parent,
2767
-##                          child = child,
2768
-##                          s = mdeps$s,
2769
-##                          sh = mdeps$sh,
2770
-##                          typeDep = mdeps$typeDep,
2771
-##                          stringsAsFactors = FALSE)
2772
-##     } else {
2773
-##         df <- mdeps
2774
-##     }
2775
-##     rownames(df) <- seq.int(nrow(df))
2776
-##     return(df)
2777
-## }
2778
-
2779
-## wrap.test.rt <- function(rt, gM = NULL) {
2780
-##     ## FIXME add epistasis and orderEffects
2781
-##     lrt <- allFitnessEffects(rt, geneToModule = gM)
2782
-##     ## wrap_test_rt(lrt$long.rt)
2783
-##     wrap_test_rt(lrt$long.rt, lrt$geneModule)
2784
-## }
2785
-
2786
-## No longer used
2787
-## wrap.readFitnessEffects <- function(rt, epi, oe, ni, gm, echo = TRUE) {
2788
-##     tt <- allFitnessEffects(rt, epi, oe, ni, gm)
2789
-##     readFitnessEffects(tt, echo = echo)
2790
-##     ## readFitnessEffects(tt$long.rt,
2791
-##     ##                    tt$long.epistasis,
2792
-##     ##                    tt$long.orderEffects,
2793
-##     ##                    tt$long.geneNoInt,
2794
-##     ##                    tt$geneModule,
2795
-##     ##                    tt$gMOneToOne,
2796
-##     ##                    echo = TRUE)
2797
-## }
2798
-
2799
-
2800
-
2801
-## evalGenotype <- function(genotype, fitnessEffects,
2802
-##                          verbose = FALSE,
2803
-##                          echo = FALSE,
2804
-##                          model = "") {
2805
-##     ## genotype can be a vector of integers, that are the exact same in
2806
-##     ## the table of fitnessEffects or a vector of strings, or a vector (a
2807
-##     ## string) with genes separated by "," or ">"
2808
-    
2809
-##     if(echo)
2810
-##         cat(paste("Genotype: ", genotype))
2811
-##     if(!is.integer(genotype)) {
2812
-##         if(length(grep(">", genotype))) {
2813
-##             genotype <- nice.vector.eo(genotype, ">")
2814
-##         } else if(length(grep(",", genotype))) {
2815
-##             genotype <- nice.vector.eo(genotype, ",")
2816
-##         }
2817
-##         all.g.nums <- c(fitnessEffects$geneModule$GeneNumID,
2818
-##                         fitnessEffects$long.geneNoInt$GeneNumID)
2819
-##         all.g.names <- c(fitnessEffects$geneModule$Gene,
2820
-##                          fitnessEffects$long.geneNoInt$Gene)
2821
-##         genotype <- all.g.nums[match(genotype, all.g.names)]
2822
-##     }
2823
-##     if(any(is.na(genotype)))
2824
-##         stop("genotype contains NAs or genes not in fitnessEffects")
2825
-##     if(!length(genotype))
2826
-##         stop("genotypes must have at least one mutated gene")
2827
-##     if(any(genotype < 0)) {
2828
-##         stop(paste("genotypes cannot contain negative values.",
2829
-##                    "If you see this message, you found a bug."))
2830
-##     }
2831
-##     if(model %in% c("Bozic", "bozic1", "bozic2") )
2832
-##         prodNeg <- TRUE
2833
-##     else
2834
-##         prodNeg <- FALSE
2835
-##     ff <- evalRGenotype(genotype, fitnessEffects, verbose, prodNeg)
2836
-
2837
-
2838
-##     if(echo) {
2839
-##         if(!prodNeg)
2840
-##             cat(" Fitness: ", ff, "\n")
2841
-##         else
2842
-##             cat(" Death rate: ", ff, "\n")
2843
-##     } ## else {
2844
-##     ##     return(ff)
2845
-##     ## }
2846
-##     return(ff)
2847
-## }
2848
-
2849
-## For multiple genotypes, lapply the matching.
2850
-## Nope, I think unneeded
2851
-## internal.convert_genotypes <- function(genotypes, gm) {
2852
-##     genotypes <- lapply(lg, function(x) gm$GeneNumID[match(x, gm$Gene)])
2853
-## }
2854
-
2855
-
2856
-
2857
-
2858
-
2859
-## evalAllGenotypes <- function(fitnessEffects, order = TRUE, max = 256,
2860
-##                              addwt = FALSE,
2861
-##                              model = "") {
2862
-
2863
-##     if(order)
2864
-##         tot <- function(n) {sum(sapply(seq.int(n),
2865
-##                                        function(x) choose(n, x) * factorial(x)))}
2866
-##     else
2867
-##         tot <- function(n) {2^n}
2868
-##     nn <- nrow(fitnessEffects$geneModule) -1  + nrow(fitnessEffects$long.geneNoInt)
2869
-##     tnn <- tot(nn)
2870
-##     if(tnn > max) {
2871
-##         m <- paste("There are", tnn, "genotypes.")
2872
-##         m <- paste(m, "This is larger than max.")
2873
-##         m <- paste(m, "Adjust max and rerun if you want")
2874
-##         stop(m)
2875
-##     }
2876
-##     if(order) {
2877
-##         f1 <- function(n) {
2878
-##             lapply(seq.int(n), function(x) permutations(n = n, r = x))
2879
-##         }
2880
-##     } else {
2881
-##         f1 <- function(n) {
2882
-##             lapply(seq.int(n), function(x) combinations(n = n, r = x))}
2883
-
2884
-##     }
2885
-##     genotNums <- f1(nn)
2886
-##     list.of.vectors <- function(y) {
2887
-##         ## there's got to be a simpler way
2888
-##         lapply(unlist(lapply(y, function(x) {apply(x, 1, list)}), recursive = FALSE),
2889
-##                function(m) m[[1]])
2890
-##     }
2891
-##     genotNums <- list.of.vectors(genotNums)
2892
-##     names <- c(fitnessEffects$geneModule$Gene[-1],
2893
-##                fitnessEffects$long.geneNoInt$Gene)
2894
-
2895
-##     genotNames <- unlist(lapply(lapply(genotNums, function(x) names[x]),
2896
-##                          function(z)
2897
-##                              paste(z,
2898
-##                                    collapse = if(order){" > "} else {", "} )))
2899
-##     ## This ain't the best, as we repeatedly read and convert
2900
-##     ## fitnessEffects.  If this were slow, prepare C++ function that takes
2901
-##     ## vectors of genotypes and uses same fitnessEffects.
2902
-##     if(model %in% c("Bozic", "bozic1", "bozic2") )
2903
-##         prodNeg <- TRUE
2904
-##     else
2905
-##         prodNeg <- FALSE
2906
-##     allf <- vapply(genotNums,
2907
-##                    function(x) evalRGenotype(x, fitnessEffects, FALSE, prodNeg),
2908
-##                    1.1)
2909
-##     df <- data.frame(Genotype = genotNames, Fitness = allf,
2910
-##                      stringsAsFactors = FALSE)
2911
-##     if(addwt)
2912
-##         df <- rbind(data.frame(Genotype = "WT", Fitness = 1,
2913
-##                                stringsAsFactors = FALSE), df)
2914
-##     if(prodNeg)
2915
-##         colnames(df)[match("Fitness", colnames(df))] <- "Death_rate"
2916
-##     return(df)
2917
-## }
2918
-
2919
-
2920
-
2921
-
2922
-## geneNumIDReset <- function(x, ref){
2923
-##     ## Set GeneNumID of a fitnessEffect object, x, using ref as the
2924
-##     ## reference fitness effect object.
2925
-##     ## Check also if all in x are in ref.
2926
-
2927
-##     gg <- allNamedGenes(ref)
2928
-##     gnid <- gg$GeneNumID
2929
-##     names(gnid) <- gg$Gene
2930
-##     ## FIXME: this later and conditional on what is in thee
2931
-##     gnid <- c("Root" = 0, gnid)
2932
-
2933
-##     if(!all(x$geneModule$Gene %in% names(gnid) ))
2934
-##         stop("Some genes not in reference fitnessEffects (rebasing geneModule)")
2935
-##     x$geneModule$GeneNumID <- gnid[geneModule$Gene]
2936
-
2937
-##     ## and then go over all the lists in the x object.
2938
-
2939
-##     if(nrow(x$long.geneNoInt)) {
2940
-##         ## now, mapping for the noInt if this is mutator
2941
-##         if(!all(x$long.geneNoInt$Gene %in% names(gnid) ))
2942
-##             stop("Some genes not in reference fitnessEffects (rebasing geneNoInt)")
2943
-##         x$long.geneNoInt$GeneNumID <- gnid[long.geneNoInt$Gene]
2944
-##     }
2945
-## }
2946
-
2947
-
2948 2252
 ## character vector, named replacement -> replaced vector
2949 2253
 ## named_replace: names are the (fixed) pattern, value the replacement
2950 2254
 ## stringr::str_replace_all probably better
... ...
@@ -279,110 +279,3 @@ rfitness <- function(g, c= 0.5,
279 279
 
280 280
 
281 281
 
282
-
283
-
284
-
285
-
286
-## rfitness <- function(g, c= 0.5,
287
-##                      sd = 1,
288
-##                      mu = 1,
289
-##                      reference = "random", ## "random", "max", or the vector,
290
-##                                      ## e.g., rep(1, g). If random, a
291
-##                                      ## random genotype is chosen as
292
-##                                      ## reference. If "max" this is rep(1, g)
293
-##                      scale = NULL, ## a two-element vector: min and max
294
-##                      wt_is_1 = c("subtract", "divide", "force", "no"),
295
-##                      ## wt_is_1 = TRUE, ## wt has fitness 1
296
-##                      log = FALSE, ## log only makes sense if all values >
297
-##                                  ## 0. scale with min > 0, and/or set
298
-##                                  ## wt_is_1 = divide
299
-##                      min_accessible_genotypes = NULL,
300
-##                      accessible_th = 0,
301
-##                      truncate_at_0 = TRUE) {
302
-##     ## Like Franke et al., 2011 and others of Krug. Very similar to Greene
303
-##     ## and Crona, 2014. And this allows moving from HoC to purely additive
304
-##     ## changing c and sd.
305
-
306
-##     ## FIXME future: do this with order too?
307
-##     ##    - do not generate a matrix of genotypes but a matrix of ordered genot.
308
-##     wt_is_1 = match.arg(wt_is_1)
309
-##     if(is_null_na(g)) stop("Number of genes argument (g) is null or NA")
310
-##     m <- generate_matrix_genotypes(g)
311
-##     done <- FALSE
312
-##     ## attempts <- 0 ## for debugging/tracking purposes
313
-##     while(!done) {
314
-##         ## attempts <- attempts + 1
315
-##         f_r <- rnorm(nrow(m), mean = mu, sd = sd)
316
-##         if(inherits(reference, "character") && length(reference) == 1) {
317
-##             if(reference == "random") {
318
-##                 referenceI <- m[sample(nrow(m), 1), ]
319
-##             } else if(reference == "max") {
320
-##                 referenceI <- rep(1, g)
321
-##             } else if(reference == "random2") {
322
-##                 referenceI <- create_eq_ref(g)
323
-##             }
324
-##         } else {
325
-##             referenceI <- reference
326
-##             }
327
-##         d_reference <- apply(m, 1, function(x) sum(abs(x - referenceI)))
328
-##         f_det <- -c * d_reference
329
-##         ## f_det <- rowSums(m) * slope/nrow(m) ## this is Greene and Krona
330
-##         fi <- f_r + f_det
331
-
332
-##         if(!is.null(scale)) {
333
-##             fi <- (fi - min(fi))/(max(fi) - min(fi))
334
-##             fi <- scale[1] + fi * (scale[2] - scale[1])
335
-##         }
336
-##         if(wt_is_1 == "divide") {
337
-##             ## We need to shift to avoid ratios involving negative numbers and
338
-##             ## we need to avoid having any fitness at 0, specially the wt.  If
339
-##             ## any negative value, add the min, and shift by the magnitude of
340
-##             ## the min to avoid any at 0.
341
-
342
-##             ## If you use scale and wt_is_1, this will move the scale. It is
343
-##             ## not possible to obtain a linear transformation that will keep
344
-##             ## the min and max of the scale, and wt at 1.
345
-##             min_fi <- min(fi)
346
-##             if(min_fi < 0)
347
-##                 fi <- fi + 2 * abs(min(fi))
348
-##             fi <- fi/fi[1]
349
-##         } else if (wt_is_1 == "subtract") {
350
-##             fi <- fi - fi[1] + 1.0
351
-##         } else if(wt_is_1 == "force") {
352
-##             fi[1] <- 1.0
353
-##             if(!is.null(scale)) {
354
-##                 if( (1 > scale[2]) || (1 < scale[1]))
355
-##                     warning("Using wt_is_1 = force and scale, but scale does ",
356
-##                             "not include 1")
357
-##             }
358
-##         }
359
-##         if(truncate_at_0) {
360
-##             fi[fi <= 0] <- 1e-9
361
-##         }
362
-##         if(log) {
363
-##             fi <- log(fi/fi[1]) + 1
364
-##         }
365
-##         m <- cbind(m, Fitness = fi)
366
-##         if(!is_null_na(min_accessible_genotypes)) {
367
-##             ## num_accessible_genotypes <- count_accessible_g(m, accessible_th)
368
-##             ## Recall accessibleGenotypes includes the wt, if accessible.
369
-##             num_accessible_genotypes <- length(wrap_accessibleGenotypes(m, accessible_th)) - 1
370
-##             ## message("\n     num_accessible_genotypes = ", num_accessible_genotypes, "\n")
371
-##             if(num_accessible_genotypes >= min_accessible_genotypes) {
372
-##                 done <- TRUE
373
-##                 attributes(m) <- c(attributes(m),
374
-##                                    accessible_genotypes = num_accessible_genotypes,
375
-##                                    accessible_th = accessible_th)
376
-##             } else {
377
-##                 ## Cannot start again with a fitness column
378
-##                 m <- m[, -ncol(m), drop = FALSE]
379
-##             }
380
-##         } else {
381
-##             done <- TRUE
382
-##         }
383
-##     }
384
-##     ## message("\n number of attempts = ", attempts, "\n")
385
-##     class(m) <- c(class(m), "genotype_fitness_matrix")
386
-##     return(m)
387
-## }
388
-
... ...
@@ -24,7 +24,6 @@ citEntry(entry="Article",
24 24
          )
25 25
 
26 26
 
27
-
28 27
 citEntry(entry = "Article",
29 28
 	author = "R Diaz-Uriarte and C Vasallo",
30 29
 	title = "Every which way? On predicting tumor evolution using cancer progression models",
... ...
@@ -42,12 +41,11 @@ citEntry(entry = "Article",
42 41
 )
43 42
 
44 43
 
45
-
46 44
 citEntry(entry="Article",
47 45
          author = "R Diaz-Uriarte",
48 46
          title = "Cancer progression models and fitness landscapes: a many-to-many relationship",
49 47
          journal = "Bioinformatics",
50
-         year = "2017",
48
+         year = "2018",
51 49
          doi = "10.1093/bioinformatics/btx663",
52 50
          url = "https://academic.oup.com/bioinformatics/article/34/5/836/4557185",
53 51
          volume = "34",
... ...
@@ -1,3 +1,12 @@
1
+Changes in version 2.99.9 (2021-04-22):
2
+	- Fixed date typo in one citation.
3
+	- We were inconsistent, allowing some examples of one gene.
4
+	- Readme: nem.
5
+	
6
+Changes in version 2.99.8 (2021-01-01):
7
+	- Removing unused code.
8
+	- Long tests: no longer using v.1.