#' GWCoGAPS
#'
#' @details calls the C++ MCMC code and performs Bayesian
#' matrix factorization returning the two matrices that reconstruct
#' the data matrix for whole genome data;
#' @param simulationName name of this simulation
#' @param nFactor number of patterns (basis vectors, metagenes), which must be
#' greater than or equal to the number of rows of FP
#' @param nCores number of cores for parallelization. If left to the default NA, nCores = nSets.
#' @param cut number of branches at which to cut dendrogram used in patternMatch4Parallel
#' @param minNS minimum of individual set contributions a cluster must contain
#' @param manualMatch logical indicating whether or not to stop after initial phase for manual pattern matching
#' @param consensusPatterns fixed pattern matrix to be used to ensure reciprocity of A weights accross sets 
#' @param ... additional parameters to be fed into \code{gapsRun} and \code{gapsMapRun}
#' @return list of A and P estimates
#' @seealso \code{\link{gapsRun}}, \code{\link{patternMatch4Parallel}}, and \code{\link{gapsMapRun}}
#' @examples
#' data(SimpSim)
#' sim_name <- "example"
#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
#' result <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
#' @export
GWCoGAPS <- function(simulationName, nFactor, nCores=NA, cut=NA, minNS=NA, manualMatch=FALSE, consensusPatterns=NULL, ...)
{
    if (!is.null(list(...)$checkpointFile))
    {
        stop("checkpoint file name automatically set in GWCoGAPS - don't pass this parameter")
    }

    if (is.null(consensusPatterns))
    {
        allDataSets <- preInitialPhase(simulationName, nCores)
        initialResult <- runInitialPhase(simulationName, allDataSets, nFactor, ...)
        if (manualMatch)
        {
            saveRDS(initialResult,file=paste(simulationName,"_initial.rds", sep=""))
            stop("Please provide consensus patterns upon restarting.")
        }
        matchedPatternSets <- postInitialPhase(initialResult, length(allDataSets), cut, minNS)
        save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData", sep=""))
        consensusPatterns <- matchedPatternSets[[1]]
    } 
    finalResult <- runFinalPhase(simulationName, allDataSets, consensusPatterns, nCores, ...)
    return(postFinalPhase(finalResult, consensusPatterns))
}

#' Restart a GWCoGaps Run from Checkpoint
#'
#' @inheritParams GWCoGAPS
#' @return list of A and P estimates
#' @importFrom utils file_test
#' @examples
#' data(SimpSim)
#' sim_name <- "example"
#' createGWCoGAPSSets(SimpSim.D, SimpSim.S, nSets=2, sim_name)
#' trash <- GWCoGAPS(sim_name, nFactor=3, nEquil=200, nSample=200)
#' result <- GWCoGapsFromCheckpoint(sim_name, 2)
#' @export
GWCoGapsFromCheckpoint <- function(simulationName, nCores, cut=NA, minNS=NA, ...)
{
    # find data files
    allDataSets <- preInitialPhase(simulationName, nCores)

    # figure out phase from file signature
    initialCpts <- list.files(full.names=TRUE, pattern=paste(simulationName,
        "_initial_cpt_[0-9]+.out", sep=""))
    finalCpts <- list.files(full.names=TRUE, pattern=paste(simulationName,
        "_final_cpt_[0-9]+.out", sep=""))

    if (length(finalCpts))
    {
        finalResult <- foreach(i=1:length(allDataSets)) %dopar%
        {
            # load data set and shift values so gene minimum is zero
            load(allDataSets[[i]])
            sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x)
                pmin(0,min(x))))
    
            # run CoGAPS with fixed patterns
            cptFileName <- paste(simulationName, "_final_cpt_", i, ".out",
                sep="")
            CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
        }
        load(paste(simulationName, "_matched_ps.RData", sep=""))
    }
    else if (file_test("-f", paste(simulationName, "_matched_ps.RData", sep="")))
    {
        load(paste(simulationName, "_matched_ps.RData", sep=""))
        consensusPatterns<-matchedPatternSets[[1]]
        finalResult <- runFinalPhase(simulationName, allDataSets,
            consensusPatterns, nCores, ...)
    }
    else if (length(initialCpts))
    {
        # initial phase - always needs to be run to get matchedPatternSets
        initialResult <- foreach(i=1:length(allDataSets)) %dopar%
        {
            # load data set and shift values so gene minimum is zero
            load(allDataSets[[i]])
            sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x)
                pmin(0,min(x))))

            # run CoGAPS from checkpoint
            cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out",
                sep="")
            CoGapsFromCheckpoint(sampleD, sampleS, cptFileName)
        }
        matchedPatternSets <- postInitialPhase(initialResult,
            length(allDataSets), cut, minNS)
        save(matchedPatternSets, file=paste(simulationName, "_matched_ps.RData",
            sep=""))
        consensusPatterns<-matchedPatternSets[[1]]
        finalResult <- runFinalPhase(simulationName, allDataSets,
            consensusPatterns, nCores, ...)
    }
    else
    {
        stop("no checkpoint files found")
    }
    return(postFinalPhase(finalResult, consensusPatterns))
}

preInitialPhase <- function(simulationName, nCores)
{
    # find data files
    fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
    allDataSets <- list.files(full.names=TRUE, pattern=fileSig)

    # establish the number of cores that we are able to use
    if (is.na(nCores))
    {
        nCores <- length(allDataSets)
    }
    registerDoParallel(cores=nCores)
    return(allDataSets)
}

runInitialPhase <- function(simulationName, allDataSets, nFactor, ...)
{
    #generate seeds for parallelization
    nut <- generateSeeds(chains=length(allDataSets), seed=-1)

    # run CoGAPS for each set
    initialResult <- foreach(i=1:length(allDataSets)) %dopar%
    {
        # load data set and shift values so gene minimum is zero
        load(allDataSets[[i]])
        sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x))))

        # run CoGAPS without any fixed patterns
        cptFileName <- paste(simulationName, "_initial_cpt_", i, ".out", sep="")
        CoGAPS(sampleD, sampleS, nFactor=nFactor, seed=nut[i],
            checkpointFile=cptFileName, ...)
    }
    return(initialResult)
}

postInitialPhase <- function(initialResult, nSets, cut, minNS)
{
    nFactor <- ncol(initialResult[[1]]$Amean)
    BySet <- reOrderBySet(AP=initialResult, nFactor=nFactor, nSets=nSets)

    #run postpattern match function
    if (is.na(cut))
    {
        cut <- nFactor
    }

    return(patternMatch4Parallel(Ptot=BySet$P, nP=nFactor, nSets=nSets, cnt=cut,
        minNS=minNS, bySet=TRUE))
}

runFinalPhase <- function(simulationName, allDataSets, consensusPatterns, nCores, ...)
{    
    if (length(dim(consensusPatterns)) != 2)
    {
        stop("consensusPatterns must be a matrix")
    }

    # find data files if providing consensus patterns
    fileSig <- paste(simulationName, "_partition_[0-9]+.RData", sep="")
    allDataSets <- list.files(full.names=TRUE, pattern=fileSig)

    if (is.na(nCores))
    {
        nCores <- length(allDataSets)
    }
    registerDoParallel(cores=nCores)

    # generate seeds for parallelization
    nut <- generateSeeds(chains=length(allDataSets), seed=-1)

    # final number of factors
    nFactorFinal <- nrow(consensusPatterns)

    # run fixed CoGAPS
    finalResult <- foreach(i=1:length(allDataSets)) %dopar%
    {
        # load data set and shift values so gene minimum is zero
        load(allDataSets[[i]])
        sampleD <- sweep(sampleD, 1, apply(sampleD, 1, function(x) pmin(0,min(x))))

        # run CoGAPS with fixed patterns
        cptFileName <- paste(simulationName, "_final_cpt_", i, ".out", sep="")
        CoGAPS(sampleD, sampleS, fixedPatterns=consensusPatterns,
            nFactor=nFactorFinal, seed=nut[i], checkpointFile=cptFileName,
            whichMatrixFixed='P', ...)

    }
    return(finalResult)
}

postFinalPhase <- function(finalResult, consensusPatterns)
{
    Aresult <- postFixed4Parallel(finalResult, consensusPatterns)
    finalResult <- list("Amean"=Aresult$A, "Asd"=Aresult$Asd,"Pmean"=consensusPatterns)
    class(finalResult) <- append(class(finalResult), "CoGAPS")
    return(finalResult)
}