#' Create Gene Sets for scCoGAPS
#' @export
#'
#' @description factors whole genome data into randomly generated sets for indexing
#' @param D data matrix
#' @param nSets number of sets to partition the data into
#' @param simulationName name used to identify files created by this simulation
#' @param samplingRatio vector of relative quantities to use for sampling celltypes
#' @param anotionObj vector of same length as number of columns of D 
#' @param path character string indicating were to save resulting data objects. default is current working dir
#' @return simulationName used to identify saved files
#' @examples
#' data(SimpSim)
#' createscCoGAPSSets(SimpSim.D, nSets=2, simulationName="example")
createscCoGAPSSets <- function(D, nSets, simulationName, samplingRatio=NULL,
path="", anotionObj=NULL)
{
    # check gene names
    if (length(unique(colnames(D))) != length(colnames(D)))
    {
        warning("Cell identifiers not unique!")
    }

    # partition data by sampling random sets of cells
    cells <- 1:ncol(D)
    setSize <- floor(length(cells) / nSets)
    for (set in 1:nSets)
    {
        if (is.null(samplingRatio))
        {
            # sample cell names
            sampleSize <- ifelse(set == nSets, length(cells), setSize)
            cellset <- sample(cells, sampleSize, replace=FALSE)
            cells <- cells[!(cells %in% cellset)]
        }
        else
        {
            if (length(unique(anotionObj)) != length(samplingRatio))
            {
                warning("Not all celltypes will be sampled from.")
            }
            ct.indx <- lapply(unique(anotionObj), function(x) which(anotionObj == x))
            cellset <- lapply(unique(anotionObj), function(x)
                sample(colnames(D)[ct.indx[[x]]], samplingRatio[x],replace=TRUE))
        }

        # partition data
        sampleD <- D[,cellset]
        #log transform 
        sampleD <- log2(sampleD+1)
        # generate S
        sampleS <- pmax(.1*sampleD, .1)
        save(sampleD, sampleS, file=paste0(path,simulationName, "_partition_",
            set,".RData"));
    }
    return(simulationName)
}