R/CoGAPS.R
e5201463
 # regenerate `SimpSim.result` with correct row/col names
 # need to pass whole GSets list
 # not one element
 # smooth patterns nbd
 # bluered not quotes - it's a function
 # make sure import heatmap.2 from gplots
 # make sure GWCoGAPS vignette is included
 # look into conference poster/abstract submission
 # write first draft of abstract
 
ad6cc202
 #' CoGAPS Matrix Factorization Algorithm
 #' 
 #' @details calls the C++ MCMC code and performs Bayesian
f62e7b27
 #' matrix factorization returning the two matrices that reconstruct
 #' the data matrix
ad6cc202
 #' @param D data matrix
 #' @param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
08b41bdc
 #' @param nFactor number of patterns (basis vectors, metagenes), which must be
f62e7b27
 #' greater than or equal to the number of rows of FP
08b41bdc
 #' @param nEquil number of iterations for burn-in
 #' @param nSample number of iterations for sampling
 #' @param nOutputs how often to print status into R by iterations
 #' @param alphaA sparsity parameter for A domain
 #' @param alphaP sparsity parameter for P domain
 #' @param maxGibbmassA limit truncated normal to max size
 #' @param maxGibbmassP limit truncated normal to max size
 #' @param seed a positive seed is used as-is, while any negative seed tells
f62e7b27
 #' the algorithm to pick a seed based on the current time
08b41bdc
 #' @param messages display progress messages
 #' @param singleCellRNASeq indicates if the data is single cell RNA-seq data
 #' @param whichMatrixFixed character to indicate whether A or P matric contains
f62e7b27
 #' the fixed patterns
08b41bdc
 #' @param fixedPatterns matrix of fixed values in either A or P matrix
 #' @param checkpointInterval time (in seconds) between creating a checkpoint
d9e7e41c
 #' @param checkpointFile name of the checkpoint file
080fd044
 #' @param nCores number of cpu cores to run in parallel over
cfc26de7
 #' @param ... keeps backwards compatibility with arguments from older versions
ad6cc202
 #' @return list with A and P matrix estimates
536217d8
 #' @importFrom methods new
f62e7b27
 #' @examples
 #' data(SimpSim)
 #' result <- CoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nOutputs=250)
ad6cc202
 #' @export
8269c03c
 CoGAPS <- function(D, S, nFactor=7, nEquil=250, nSample=250, nOutputs=1000,
dd3a9441
 alphaA=0.01, alphaP=0.01, maxGibbmassA=100, maxGibbmassP=100,
d9e7e41c
 seed=-1, messages=TRUE, singleCellRNASeq=FALSE, whichMatrixFixed='N',
6981b959
 fixedPatterns=matrix(0), checkpointInterval=0, 
b9cc8323
 checkpointFile="gaps_checkpoint.out", nCores=1, ...)
ad6cc202
 {
1de64512
     # get v2 arguments
     oldArgs <- list(...)
     if (!is.null(oldArgs$nOutR))
         nOutputs <- oldArgs$nOutR
     if (!is.null(oldArgs$max_gibbmass_paraA))
         maxGibbmassA <- oldArgs$max_gibbmass_paraA
     if (!is.null(oldArgs$max_gibbmass_paraP))
         maxGibbmassP <- oldArgs$max_gibbmass_paraP
     if (!is.null(oldArgs$sampleSnapshots) & is.null(oldArgs$numSnapshots))
         nSnapshots <- 100
     if (!is.null(oldArgs$sampleSnapshots) & !is.null(oldArgs$numSnapshots))
         nSnapshots <- oldArgs$numSnapshots
     if (missing(D) & !is.null(oldArgs$data))
         D <- oldArgs$data
     if (missing(S) & !is.null(oldArgs$unc))
         S <- oldArgs$unc
cb4957f4
 
10e5acf7
     # get pump arguments - hidden for now from user
     pumpThreshold <- "unique"
     nPumpSamples <- 0
     if (!is.null(list(...)$pumpThreshold))
         pumpThreshold <- list(...)$pumpThreshold
     if (!is.null(list(...)$nPumpSamples))
         pumpThreshold <- list(...)$nPumpSamples
 
1de64512
     # check arguments
     if (class(D) != "matrix" | class(S) != "matrix")
         stop('D and S must be matrices')
536217d8
     if (any(D < 0) | any(S < 0))
ad6cc202
         stop('D and S matrix must be non-negative')
d9e7e41c
     if (nrow(D) != nrow(S) | ncol(D) != ncol(S))
         stop('D and S matrix have different dimensions')
     if (whichMatrixFixed == 'A' & nrow(fixedPatterns) != nrow(D))
         stop('invalid number of rows for fixedPatterns')
     if (whichMatrixFixed == 'A' & ncol(fixedPatterns) > nFactor)
         stop('invalid number of columns for fixedPatterns')
     if (whichMatrixFixed == 'P' & nrow(fixedPatterns) > nFactor)
         stop('invalid number of rows for fixedPatterns')
     if (whichMatrixFixed == 'P' & ncol(fixedPatterns) != ncol(D))
         stop('invalid number of columns for fixedPatterns')
6981b959
     thresholdEnum <- c("unique", "cut")
24ba0679
     
     # get seed
     if (seed < 0)
     {
         # TODO get time in milliseconds
         seed <- 0
     }
cb4957f4
 
536217d8
     # run algorithm with call to C++ code
cfc26de7
     result <- cogaps_cpp(D, S, nFactor, nEquil, nEquil/10, nSample, nOutputs,
dd3a9441
         alphaA, alphaP, maxGibbmassA, maxGibbmassP, seed, messages,
d9e7e41c
         singleCellRNASeq, whichMatrixFixed, fixedPatterns, checkpointInterval,
b9cc8323
         checkpointFile, which(thresholdEnum==pumpThreshold), nPumpSamples,
         nCores)
f62e7b27
     
d9e7e41c
     # label matrices and return list
f62e7b27
     patternNames <- paste('Patt', 1:nFactor, sep='')
     rownames(result$Amean) <- rownames(result$Asd) <- rownames(D)
     colnames(result$Amean) <- colnames(result$Asd) <- patternNames
     rownames(result$Pmean) <- rownames(result$Psd) <- patternNames
     colnames(result$Pmean) <- colnames(result$Psd) <- colnames(D)
     return(v2CoGAPS(result, ...)) # backwards compatible with v2
ad6cc202
 }
cb4957f4
 
ad6cc202
 #' Restart CoGAPS from Checkpoint File
8269c03c
 #' @export
ad6cc202
 #'
 #' @details loads the state of a previous CoGAPS run from a file and
 #'  continues the run from that point
 #' @param D data matrix
 #' @param S uncertainty matrix
dd3a9441
 #' @param nFactor number of patterns
 #' @param nIter number of iterations
 #' @param checkpointFile path to checkpoint file
ad6cc202
 #' @param path path to checkpoint file
 #' @return list with A and P matrix estimates
8269c03c
 #' @examples
 #' data(SimpSim)
 #' result <- CoGAPS(SimpSim.D, SimpSim.S, nFactor=3, nOutputs=250)
dd3a9441
 CoGapsFromCheckpoint <- function(D, S, nFactor, nIter, checkpointFile)
ad6cc202
 {
dd3a9441
     cogapsFromCheckpoint_cpp(D, S, nFactor, nIter, nIter, checkpointFile)
ad6cc202
 }
cb4957f4
 
1a024fff
 #' CoGAPS with file input for matrix
080fd044
 #'
 #' @param D file path for data matrix
 #' @return list with A and P matrix estimates
 #' @examples
 #'  file <- system.file("extdata/GIST.mtx", package="CoGAPS")
 #'  CoGAPSFromFile(file)
1a024fff
 CoGAPSFromFile <- function(D)
 {
     cogapsFromFile_cpp(D)
 }
 
536217d8
 #' Display Information About Package Compilation
08b41bdc
 #'
 #' @details displays information about how the package was compiled, i.e. which
 #'  compiler/version was used, which compile time options were enabled, etc...
f62e7b27
 #' @return display builds information
 #' @examples
54fc4f75
 #'  CoGAPS::buildReport()
536217d8
 #' @export
0f279c94
 buildReport <- function()
536217d8
 {
20bce466
     getBuildReport_cpp()
536217d8
 }
 
ad6cc202
 #' Backwards Compatibility with v2
 #'
 #' @param D data matrix
 #' @param S uncertainty matrix
cfc26de7
 #' @param ABins unused
 #' @param PBins unused
 #' @param simulation_id unused
 #' @param nOutR number of output messages
 #' @param output_atomic unused
 #' @param fixedBinProbs unused
 #' @param fixedDomain unused
 #' @param sampleSnapshots indicates if snapshots should be made
 #' @param numSnapshots how many snapshots to take
 #' @param nMaxA unused
 #' @param nMaxP unused
 #' @param max_gibbmass_paraA limit truncated normal to max size
 #' @param max_gibbmass_paraP limit truncated normal to max size
ad6cc202
 #' @return list with A and P matrix estimates
536217d8
 #' @importFrom methods new
cfc26de7
 #' @inheritParams CoGAPS
ae8a824e
 #' @examples
 #' data(SimpSim)
 #' result <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3)
ad6cc202
 #' @export
1de64512
 gapsRun <- function(D, S, ABins=data.frame(), PBins=data.frame(), nFactor=7,
 simulation_id="simulation", nEquil=1000, nSample=1000, nOutR=1000,
 output_atomic=FALSE, fixedBinProbs=FALSE, fixedDomain="N", sampleSnapshots=TRUE,
 numSnapshots=100, alphaA=0.01, nMaxA=100000, max_gibbmass_paraA=100.0,
 alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0, seed=-1, messages=TRUE)
ad6cc202
 {
10e5acf7
     warning('gapsRun is deprecated with v3.0, use CoGAPS')
f62e7b27
     CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample,
         nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0),
         alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA,
         messages=messages, maxGibbmassP=max_gibbmass_paraP, seed=seed)
ad6cc202
 }
cb4957f4
 
ad6cc202
 #' Backwards Compatibility with v2
 #'
 #' @param D data matrix
 #' @param S uncertainty matrix
 #' @param FP data.frame with rows giving fixed patterns for P
cfc26de7
 #' @param fixedMatrix unused
ad6cc202
 #' @param ... v2 style parameters
 #' @return list with A and P matrix estimates
536217d8
 #' @importFrom methods new
cfc26de7
 #' @inheritParams gapsRun
ae8a824e
 #' @examples
 #' data(SimpSim)
 #' nC <- ncol(SimpSim.D)
afdd7d63
 #' patterns <- matrix(1:nC/nC, nrow=1, ncol=nC)
ae8a824e
 #' result <- gapsMapRun(SimpSim.D, SimpSim.S, FP=patterns, nFactor=3)
ad6cc202
 #' @export
1de64512
 gapsMapRun <- function(D, S, FP, ABins=data.frame(), PBins=data.frame(),
 nFactor=5, simulation_id="simulation", nEquil=1000, nSample=1000, nOutR=1000,
 output_atomic=FALSE, fixedMatrix="P", fixedBinProbs=FALSE, fixedDomain="N",
 sampleSnapshots=TRUE, numSnapshots=100, alphaA=0.01, nMaxA=100000,
 max_gibbmass_paraA=100.0, alphaP=0.01, nMaxP=100000, max_gibbmass_paraP=100.0,
 seed=-1, messages=TRUE)
ad6cc202
 {
10e5acf7
     warning('gapsMapRun is deprecated with v3.0, use CoGaps')
f62e7b27
     CoGAPS(D, S, nFactor=nFactor, nEquil=nEquil, nSample=nSample,
         nOutputs=nOutR, nSnapshots=ifelse(sampleSnapshots,numSnapshots,0),
         alphaA=alphaA, alphaP=alphaP, maxGibbmassA=max_gibbmass_paraA,
         messages=messages, maxGibbmassP=max_gibbmass_paraP, seed=seed,
         whichMatrixFixed='P', fixedPatterns=as.matrix(FP))
ad6cc202
 }
cb4957f4
 
f62e7b27
 # helper function for backwards compatibility
1de64512
 v2CoGAPS <- function(result, ...)
ad6cc202
 {
1de64512
     if (!is.null(list(...)$GStoGenes))
ad6cc202
     {
10e5acf7
         warning('GStoGenes is deprecated with v3.0, see CoGAPS documentation')
f62e7b27
         if (is.null(list(...)$plot) | list(...)$plot)
         {
             plotGAPS(result$Amean, result$Pmean)
         }
         if (is.null(list(...)$nPerm))
         {
             nPerm <- 500
         }
         else
         {
             nPerm <- list(...)$nPerm
         }
         GSP <- calcCoGAPSStat(result$Amean, result$Asd, list(...)$GStoGenes,
             nPerm)
         result <- list(meanChi2=result$meanChi2, Amean=result$Amean,
             Asd=result$Asd, Pmean=result$Pmean, Psd=result$Psd,
             GSUpreg=GSP$GSUpreg, GSDownreg=GSP$GSDownreg, GSActEst=GSP$GSActEst)
1de64512
     }
     return(result)
74377719
 }