## Copyright 2016-2021 Ramon Diaz-Uriarte

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <http://www.gnu.org/licenses/>.


## Note that, in contrast to POM, LOD is not well defined if the population
## becomes extinct.

## Functions to obtain LOD and POM similar to Szendro et al., 2014, PNAS.
genot_max <- function(x) {
    x$GenotypesLabels[which.max(x$pops.by.time[nrow(x$pops.by.time), -1])]
}



## Filter the PhylogDF so we obtain LOD, sensu stricto.

## No longer necessary with LOD from C++ removing duplicates
## ## Now this is coming from LOD_DF, which already only has
## ## implicitly pop_size_child == 0
## Only used in one function use for testing
filter_phylog_df_LOD <- function(y) {
    keep <- !rev(duplicated(rev(y$child)))
    return(y[keep, ])
}



## ## Filter the PhylogDF so we obtain LOD, sensu stricto.
## ## For the old version
## filter_phylog_df_LOD_with_n <- function(y) {
##     y <- y[y$pop_size_child == 0, , drop = FALSE]
##     keep <- !rev(duplicated(rev(y$child)))
##     return(y[keep, ])
## }


## from phylogClone, key parts for the LOD strict structure
phcl_from_lod <- function(df, x) {
    ## no longer necessary
    ## ## the !keepEvents. Which I move here to speed things up.
    ## df <- df[!duplicated(df[, c(1, 2)]), , drop = FALSE]
    
    tG <- unique(c(as.character(df[, 1]), as.character(df[, 2])))
    ## ## Do as in phylogClone. So that we have the same nodes
    ## ## in LOD all and not LOD all?
    ## z <- which_N_at_T(x, N = 1, t = "last")
    ## tG <- x$GenotypesLabels[z]
    
    ## ## FIXME: aren't these two warnings redundant or aliased?
    ## ## yes, partlt
    ##  I think this can never happen now
    ## if ((length(tG) == 1) && (tG == "")) {
    ##     warning("There never was a descendant of WT")
    ## }
    if (nrow(df) == 0) {
        warning("LOD structure has 0 rows: no descendants of initMutant ever appeared. ")
        return(NA)
    }
    g <- igraph::graph.data.frame(df[, c(1, 2)])
    nodesInP <- unique(unlist(igraph::neighborhood(g, order = 1e+09, 
                                                   nodes = tG, mode = "in")))
    allLabels <- unique(as.character(unlist(df[, c(1, 2)])))
    nodesRm <- setdiff(allLabels, V(g)$name[nodesInP])
    g <- igraph::delete.vertices(g, nodesRm)
    tmp <- list(graph = g, df = df)
    class(tmp) <- c(class(tmp), "phylogClone")
    return(tmp)
}

LOD.internal <- function(x) {
    if(is.null(x$pops.by.time)) {
        warning("Missing needed components. This might be a failed simulation.",
                " Returning NA.")
        return(list(all_paths = NA, lod_single = NA))
    }
    if (!inherits(x, "oncosimul2")) 
        stop("LOD information is only stored with v >= 2")
    ## y <- filter_phylog_df_LOD(x$other$LOD_DF)
    y <- x$other$LOD_DF
    pc <- phcl_from_lod(y)
    ## need eval for oncoSimulPop calls and for LOD_as_path
    initMutant <- x$InitMutant

    ## No descendants means that: never a descendant.
    ## Note the same that the init mutant be the final state.
    if((length(pc) == 1) && (is.na(pc))) {
        lod <- "No_descendants"
        ## bail out here. We do not need the rest.
        if(initMutant != "")
            attributes(lod)$initMutant <- initMutant
        return(lod)
    }
    
    pcg <- pc$graph
    end <- genot_max(x)
    
    if(end == initMutant) {
        if(initMutant == "") {
            stinitm <- "WT"
        } else {
            stinitm <- paste0("initMutant(", initMutant, ")")
        }
        lod <- paste0(stinitm, "_is_end")
    } else {
        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
                                              mode = "out")
        if(length(all_paths) > 1)
            stop("length(all_paths) > 1???")
        lod <- igraph::as_ids(all_paths[[1]])
    }
    if(initMutant != "")
        attributes(lod)$initMutant <- initMutant
    return(lod)
}


POM.internal <- function(x) {
    if(is.null(x$pops.by.time)) {
        warning("Missing needed components. This might be a failed simulation.",
                " Returning NA.")
        return(NA)
    } else {
        x$other$POM
    }
}



diversityLOD <- function(llod) {
    ## nn <- names(llod[[1]])
    nn <- llod[[1]]
    if( is_null_na(nn) ||
        !(is.list(llod)))
        stop("Object must be a list of LODs")
    pathstr <- unlist(lapply(llod, function(x) paste(x,
                                                     collapse = "_")))
    shannonI(table(pathstr))
}

## diversityLOD <- function(llod) {
##     nn <- names(llod[[1]])
##     if( is_null_na(nn) ||
##         !(is.list(llod)))
##         stop("Object must be a list of LODs")
##     pathstr <- unlist(lapply(llod, function(x) paste(names(x),
##                                                      collapse = "_")))
##     shannonI(table(pathstr))
## }

LOD_as_path <- function(llod) {
    path_l <- function(u) {
        if(length(u) == 1) {
            if(is.null(attributes(u)$initMutant))
                initMutant <- ""
            else
                initMutant <- attributes(u)$initMutant
            if(initMutant == "") initMutant <- "WT"
            if(grepl("_is_end", u))
                return(initMutant)
            if(u == "No_descendants")
                return(initMutant)
        } else {
            ## Deal with "" meaning WT
            ## the_names <- names(u)
            the_names <- u
            the_names_wt <- which(the_names == "")
            
            if(length(the_names_wt)) {
                if(length(the_names_wt) > 1) stop("more than 1 WT?!")
                if(the_names_wt > 1) stop("WT in position not 1?!")
                the_names[the_names_wt] <- "WT"
            }
            return(paste(the_names, collapse = " -> ")) 
        }
    }
    if(!is.list(llod))
        pathstr <- path_l(llod)
    else
        pathstr <- unlist(lapply(llod, path_l))
    return(pathstr)
}
## We would just need a LOD_as_DAG

diversityPOM <- function(lpom) {
    if(!inherits(lpom, "list"))
        stop("Object must be a list of POMs")
    pomstr <- unlist(lapply(lpom, function(x) paste(x, collapse = "_")))
    shannonI(table(pomstr))
}

## a legacy
diversity_POM <- diversityPOM
diversity_LOD <- diversityLOD


POM <- function(x) {
    UseMethod("POM", x)
}


LOD <- function(x) {
    UseMethod("LOD", x)
}

POM.oncosimul2 <- function(x) return(POM.internal(x))
LOD.oncosimul2 <- function(x) return(LOD.internal(x))

POM.oncosimulpop <- function(x) return(lapply(x, POM.internal))
LOD.oncosimulpop <- function(x) return(lapply(x, LOD.internal))




## POM.oncosimul2 <- function(x) {
##     out <- POM.internal(x)
##     class(out) <- c(class(out), "oncosimul_pom")
##     return(out)
## }

## LOD.oncosimul2 <- function(x) {
##     out <- LOD.internal(x)
##     class(out) <- c(class(out), "oncosimul_lod")
##     return(out)
## }


## POM.oncosimulpop <- function(x) {
##     out <- lapply(x, POM.internal)
##     class(out) <- c(class(out), "oncosimul_pom_list")
##     return(out)
## }

## LOD.oncosimulpop <- function(x) {
##     out <- lapply(x, LOD.internal)
##     class(out) <- c(class(out), "oncosimul_lod_list")
##     return(out)
## }


## summary.oncosimul_lod_list <- function(x) {
##     cat("List of ", length(x), " simulations\n.")
##     cat("Shannon's diversity (entropy) = ", diversityLOD(x), "\n")
## }

## summary.oncosimul_pom_list <- function(x) {
##     cat("List of ", length(x), " simulations\n.")
##     cat("Shannon's diversity (entropy) = ", diversityPOM(x), "\n")
## }



POM_pre_2.9.2 <- function(x) {
    if(is.null(x$pops.by.time)) {
        warning("Missing needed components. This might be a failed simulation.",
                " Returning NA.")
        return(NA)
    }
    x$GenotypesLabels[rle(apply(x$pops.by.time[, -1, drop = FALSE],
                                1, which.max))$values]
}



LOD.internal_pre_2.9.2 <- function(x, strict) {
    ## Not identical to LOD of Szendro because:
    
    ##  a) I think we want all paths, not just their single LOD, which I
    ##  think they might use out of convenience.

    ##  b) For me it is a mess, a complicated mess, to use their LOD as
    ##  they define it and there are many ambiguities in how to define it
    ##  in continuous time.

    ## This also means that single simulation might yield multiple LODs

    ## keepEvents is FALSE to make this object as small as possible.
    if(is.null(x$pops.by.time)) {
        warning("Missing needed components. This might be a failed simulation.",
                " Returning NA.")
        return(list(all_paths = NA, lod_single = NA))
    }
    if(strict) {
        if (!inherits(x, "oncosimul2")) 
            stop("LOD information is only stored with v >= 2")
        y <- filter_phylog_df_LOD(x$other$LOD_DF)
        pc <- phcl_from_lod(y)
    } else {
        pc <- phylogClone(x, keepEvents = FALSE)
    }
    ## need eval for oncoSimulPop calls and for LOD_as_path
    initMutant <- x$InitMutant
    
    if((length(pc) == 1) && (is.na(pc))) {
        lodlist <- list(all_paths = NA,
                        lod_single = "No_descendants")
        ## bail out here. We do not need the rest.
        if(initMutant != "")
            attributes(lodlist)$initMutant <- initMutant
        return(lodlist)
    }
    
    pcg <- pc$graph
    end <- genot_max(x)
    
    ## if(!is.null(eval(attributes(x)$call$initMutant))) {
    ##     initMutant <- eval(attributes(s7)$call$initMutant)
    ## } else {
    ##     initMutant <- ""
    ## }
    ## browser()
    if(end == initMutant) {
        if(initMutant == "") {
            stinitm <- "WT"
        } else {
            stinitm <- paste0("initMutant(", initMutant, ")")
        }
        lod_single <- paste0(stinitm, "_is_end")
        all_paths <- list(lod_single)
    } else {
        all_paths <- igraph::all_simple_paths(pcg, from = initMutant, to = end,
                                              mode = "out")
       
        if(!strict) {
            ## the next is partially redundant
            ## graph_to_end <- igraph::make_ego_graph(pcg, order = 1e9, nodes = end,
            ##                                        mode = "in")
            ## if(length(graph_to_end) != 1) stop("length(graph_to_end) > 1")
            ## I am not sure if I should keep the last one. Redundant
            
            ## This gives a single path and it is the first entry into each
            ## destination. But we do not check there is no extinction afterwards.
            ## The closest to the single Szendro LOD
            singlep <- pc$df
            singlep[, 1] <- as.character(singlep[, 1])
            singlep[, 2] <- as.character(singlep[, 2])
            singlep <- singlep[ do.call(order, singlep[, c(2, 3)]), ]
            singlep <- singlep[!duplicated(singlep[, 2]), ]
            gsingle <- igraph::graph_from_data_frame(singlep)
            lod_single <- igraph::all_simple_paths(gsingle, from = initMutant,
                                                   to = end, mode = "out")
            if(length(lod_single) != 1) stop("lod_single != 1")
        }
    }
    if(strict) {
        if(length(all_paths) > 1)
            stop("length(all_paths) > 1???")
        lodlist <- list(all_paths = NA,
                    lod_single = all_paths[[1]])
    } else {
        lodlist <- list(all_paths = all_paths,
                    lod_single = lod_single[[1]])
    }
    if(initMutant != "")
        attributes(lodlist)$initMutant <- initMutant
    return(lodlist)
}



## LOD_as_path_pre_2.9.2 <- function(llod) {
##     path_l <- function(u) {
##         if(length(u$lod_single) == 1) {
##             if(is.null(attributes(u)$initMutant))
##                 initMutant <- ""
##             else
##                 initMutant <- attributes(u)$initMutant
##             if(initMutant == "") initMutant <- "WT"
##             if(grepl("_is_end", u$lod_single))
##                 return(initMutant)
##             if(u$lod_single == "No_descendants")
##                 return(initMutant)
##         } else {
##             ## Deal with "" meaning WT
##             the_names <- names(u$lod_single)
##             the_names_wt <- which(the_names == "")
            
##             if(length(the_names_wt)) {
##                 if(length(the_names_wt) > 1) stop("more than 1 WT?!")
##                 if(the_names_wt > 1) stop("WT in position not 1?!")
##                 the_names[the_names_wt] <- "WT"
##             }
##             return(paste(the_names, collapse = " -> ")) 
##             ## return(paste0("WT", paste(names(u$lod_single),
##             ##                           collapse = " -> ")) )
##         }
##     }
##     if(identical(names(llod), c("all_paths", "lod_single")))
##         pathstr <- path_l(llod)
##     else {
##         ## should be a list
##         pathstr <- unlist(lapply(llod, path_l))
##     }
##     return(pathstr)
##     ## pathstr <- unlist(lapply(llod, function(x) paste(names(x$lod_single),
##     ##                                                  collapse = " -> ")))
##     ## return(paste0("WT", pathstr))
## }



LOD.oncosimul2_pre_2.9.2 <- function(x, strict = TRUE)
    return(LOD.internal_pre_2.9.2(x, strict))

## LOD.oncosimulpop_pre_2.9.2 <- function(x, strict = TRUE)
##     return(lapply(x, LOD.internal_pre_2.9.2, strict))






## Note for self: we could get all the LODs per simulation in the strict
## sense of those never becoming extinct if we subset the phylogClone
## object to children in which if we arrive at the children at any two
## times t and t+k, we retain only rows where any time > t is such that
## the popsize is > 0. But this is not worth it now.