Browse code

clean up output

Tom Sherman authored on 02/08/2018 16:52:31
Showing46 changed files

... ...
@@ -26,6 +26,7 @@
26 26
 ^src/file_parser/TsvParser.o
27 27
 ^src/file_parser/MtxParser.o
28 28
 ^src/math/Algorithms.o
29
+^src/math/Math.o
29 30
 ^src/math/Random.o
30 31
 ^src/cpp_tests/testAlgorithms.o
31 32
 ^src/cpp_tests/testAtomicDomain.o
... ...
@@ -22,10 +22,12 @@ Imports:
22 22
     methods,
23 23
     gplots,
24 24
     graphics,
25
+    RColorBrewer,
25 26
     Rcpp,
26 27
     S4Vectors,
27 28
     SingleCellExperiment,
28
-    SummarizedExperiment
29
+    SummarizedExperiment,
30
+    utils
29 31
 Suggests:
30 32
     testthat,
31 33
     knitr,
... ...
@@ -25,6 +25,7 @@ importClassesFrom(S4Vectors,character_OR_NULL)
25 25
 importClassesFrom(SingleCellExperiment,LinearEmbeddingMatrix)
26 26
 importFrom(BiocParallel,MulticoreParam)
27 27
 importFrom(BiocParallel,bplapply)
28
+importFrom(RColorBrewer,brewer.pal)
28 29
 importFrom(Rcpp,evalCpp)
29 30
 importFrom(SummarizedExperiment,assay)
30 31
 importFrom(cluster,agnes)
... ...
@@ -53,4 +54,5 @@ importFrom(stats,hclust)
53 54
 importFrom(stats,runif)
54 55
 importFrom(stats,weighted.mean)
55 56
 importFrom(tools,file_ext)
57
+importFrom(utils,packageVersion)
56 58
 useDynLib(CoGAPS)
... ...
@@ -36,27 +36,29 @@ supported <- function(file)
36 36
 #' @param transposeData T/F for transposing data while reading it in - useful
37 37
 #' for data that is stored as samples x genes since CoGAPS requires data to be
38 38
 #' genes x samples
39
+#' @param BPPARAM BiocParallel backend 
39 40
 #' @param ... allows for overwriting parameters in params
40 41
 #' @return CogapsResult object
41 42
 #' @examples
42 43
 #' # Running from R object
43 44
 #' data(GIST)
44
-#' resultA <- CoGAPS(GIST.D)
45
+#' resultA <- CoGAPS(GIST.data_frame)
45 46
 #'
46 47
 #' # Running from file name
47 48
 #' gist_path <- system.file("extdata/GIST.mtx", package="CoGAPS")
48 49
 #' resultB <- CoGAPS(gist_path)
49 50
 #'
50
-#' Setting Parameters
51
+#' # Setting Parameters
51 52
 #' params <- new("CogapsParams")
52 53
 #' params <- setParam(params, "nPatterns", 5)
53
-#' resultC <- CoGAPS(GIST.D, params)
54
+#' resultC <- CoGAPS(GIST.data_frame, params)
54 55
 #' @importFrom methods new is
55 56
 #' @importFrom SummarizedExperiment assay
57
+#' @importFrom utils packageVersion
56 58
 CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
57 59
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
58 60
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
59
-checkpointInFile=NULL, transposeData=FALSE, ...)
61
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
60 62
 {
61 63
     # store all parameters in a list and parse parameters from ...
62 64
     allParams <- list("gaps"=params,
... ...
@@ -67,10 +69,14 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
67 69
         "checkpointInterval"=checkpointInterval,
68 70
         "checkpointInFile"=checkpointInFile,
69 71
         "transposeData"=transposeData,
72
+        "bpBackend"=BPPARAM,
70 73
         "whichMatrixFixed"=NULL # internal parameter
71 74
     )
72 75
     allParams <- parseExtraParams(allParams, list(...))
73 76
 
77
+    # display start up message for the user
78
+    startupMessage(data, allParams$transposeData, allParams$gaps@distributed)
79
+
74 80
     # check file extension
75 81
     if (is(data, "character") & !supported(data))
76 82
         stop("unsupported file extension for data")
... ...
@@ -90,6 +96,9 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
90 96
         if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell)
91 97
             warning("running single-cell CoGAPS with singleCell=FALSE")
92 98
 
99
+    if (!is.null(allParams$gaps@distributed) & allParams$nThreads > 1)
100
+        stop("can't run multi-threaded and distributed CoGAPS at the same time")
101
+
93 102
     # convert data to matrix
94 103
     if (is(data, "data.frame"))
95 104
         data <- data.matrix(data)
... ...
@@ -126,7 +135,10 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
126 135
         Psd         = gapsReturnList$Psd,
127 136
         seed        = gapsReturnList$seed,
128 137
         meanChiSq   = gapsReturnList$meanChiSq,
129
-        diagnostics = list("diag"=gapsReturnList$diagnostics, "params"=params)
138
+        geneNames   = getGeneNames(data, allParams),
139
+        sampleNames = getSampleNames(data, allParams),
140
+        diagnostics = list("diag"=gapsReturnList$diagnostics, "params"=params,
141
+                            "version"=utils::packageVersion("CoGAPS"))
130 142
     ))
131 143
 }
132 144
 
... ...
@@ -140,13 +152,13 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
140 152
 scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
141 153
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
142 154
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
143
-checkpointInFile=NULL, transposeData=FALSE, ...)
155
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
144 156
 {
145 157
     params@distributed <- "single-cell"
146 158
     params@singleCell <- TRUE
147 159
     CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty,
148 160
         checkpointOutFile, checkpointInterval, checkpointInFile, transposeData,
149
-        ...)
161
+        BPPARAM, ...)
150 162
 }
151 163
 
152 164
 #' Genome Wide CoGAPS
... ...
@@ -159,14 +171,58 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
159 171
 GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
160 172
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
161 173
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
162
-checkpointInFile=NULL, transposeData=FALSE, ...)
174
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
163 175
 {
164 176
     params@distributed <- "genome-wide"
165 177
     CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty,
166 178
         checkpointOutFile, checkpointInterval, checkpointInFile, transposeData,
167
-        ...)
179
+        BPPARAM, ...)
168 180
 }   
169 181
 
182
+#' write start up message
183
+#'
184
+#' @param data data set
185
+#' @param transpose if we are transposing the data set
186
+#' @param distributed if we are running distributed CoGAPS
187
+#' @return message displayed to screen
188
+startupMessage <- function(data, transpose, distributed)
189
+{
190
+    nGenes <- ifelse(transpose, ncol_helper(data), nrow_helper(data))
191
+    nSamples <- ifelse(transpose, nrow_helper(data), ncol_helper(data))
192
+
193
+    dist_message <- "Standard"
194
+    if (!is.null(distributed))
195
+        dist_message <- distributed
196
+    message(paste("Running", dist_message, "CoGAPS on", nGenes, "genes and",
197
+        nSamples, "samples"))
198
+}
199
+
200
+#' find the gene names in the data
201
+#'
202
+#' @param data data set
203
+#' @param allParams list of all cogaps parameters
204
+#' @return vector of gene names
205
+getGeneNames <- function(data, allParams)
206
+{
207
+    if (is(data, "matrix") & !allParams$transposeData)
208
+        return(rownames(data))
209
+    if (is(data, "matrix") & allParams$transposeData)
210
+        return(colnames(data))
211
+}
212
+
213
+#' find the sample names in the data
214
+#'
215
+#' @param data data set
216
+#' @param allParams list of all cogaps parameters
217
+#' @return vector of sample names
218
+getSampleNames <- function(data, allParams)
219
+{
220
+    if (is(data, "matrix") & !allParams$transposeData)
221
+        return(colnames(data))
222
+    if (is(data, "matrix") & allParams$transposeData)
223
+        return(rownames(data))
224
+}
225
+
170 226
 #' parse parameters passed through the ... variable
171 227
 #'
172 228
 #' @param allParams list of all parameters
... ...
@@ -8,27 +8,32 @@
8 8
 #' @param uncertainty uncertainty matrix (same supported types as data)
9 9
 #' @return list
10 10
 #' @importFrom BiocParallel bplapply MulticoreParam
11
-distributedCogaps <- function(data, allParams, uncertainty, BPPARAM=NULL)
11
+distributedCogaps <- function(data, allParams, uncertainty)
12 12
 {
13
-    FUN <- function(set, data, allParams, uncertainty, fixedMatrix=NULL)
13
+    FUN <- function(index, sets, data, allParams, uncertainty, fixedMatrix=NULL)
14 14
     {
15 15
         internal <- ifelse(is(data, "character"), cogaps_cpp_from_file, cogaps_cpp)
16
-        raw <- internal(data, allParams, uncertainty, set, fixedMatrix)
16
+        raw <- internal(data, allParams, uncertainty, sets[[index]],
17
+            fixedMatrix, index == 1)
17 18
         new("CogapsResult", Amean=raw$Amean, Asd=raw$Asd, Pmean=raw$Pmean,
18
-            Psd=raw$Psd, seed=raw$seed, meanChiSq=raw$meanChiSq)    
19
+            Psd=raw$Psd, seed=raw$seed, meanChiSq=raw$meanChiSq)
19 20
     }
20 21
 
21 22
     # randomly sample either rows or columns into subsets to break the data up
22 23
     set.seed(allParams$gaps@seed)
23 24
     sets <- createSets(data, allParams)
24
-    if (is.null(BPPARAM))
25
-        BPPARAM <- BiocParallel::MulticoreParam(workers=length(sets))
25
+    if (is.null(allParams$bpBackend))
26
+        allParams$bpBackend <- BiocParallel::MulticoreParam(workers=length(sets))
26 27
     
27 28
     # run Cogaps normally on each subset of the data
28
-    initialResult <- bplapply(sets, FUN, BPPARAM=BPPARAM, data=data,
29
-        allParams=allParams, uncertainty=uncertainty)
29
+    if (allParams$messages)
30
+        message("Running Across Subsets...")
31
+    initialResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend,
32
+        sets=sets, data=data, allParams=allParams, uncertainty=uncertainty)
30 33
 
31 34
     # match patterns in either A or P matrix
35
+    if (allParams$messages)
36
+        message("Matching Patterns Across Subsets...")
32 37
     consensusMatrix <- findConsensusMatrix(initialResult, allParams)
33 38
     allParams$gaps@nPatterns <- ncol(consensusMatrix)
34 39
 
... ...
@@ -36,9 +41,12 @@ distributedCogaps <- function(data, allParams, uncertainty, BPPARAM=NULL)
36 41
     allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
37 42
         == "genome-wide", "P", "A")
38 43
 
39
-    # ru final phase with fixed matrix
40
-    finalResult <- bplapply(sets, FUN, BPPARAM=BPPARAM, data=data, 
41
-        allParams=allParams, uncertainty=uncertainty, fixedMatrix=consensusMatrix)
44
+    # run final phase with fixed matrix
45
+    if (allParams$messages)
46
+        message("Running Final Stage...")
47
+    finalResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend,
48
+        sets=sets, data=data, allParams=allParams, uncertainty=uncertainty,
49
+        fixedMatrix=consensusMatrix)
42 50
 
43 51
     # get result 
44 52
     return(stitchTogether(finalResult, allParams))
... ...
@@ -121,16 +129,9 @@ findConsensusMatrix <- function(result, allParams)
121 129
 }
122 130
 
123 131
 #' Match Patterns Across Multiple Runs
124
-#' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
125
-#' @param nSets number of parallel sets used to generate \code{Atot}
126
-#' @param cnt  number of branches at which to cut dendrogram
127
-#' @param minNS minimum of individual set contributions a cluster must contain
128
-#' @param maxNS maximum of individual set contributions a cluster must contain
129
-#' @param ignore.NA logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.
130
-#' @param bySet logical indicating whether to return list of matched set solutions from \code{Atot}
131
-#' @param ... additional parameters for \code{agnes}
132
-#' @return a matrix of consensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
133
-#' consensus pattern is also returned.
132
+#' @param allPatterns matrix of patterns stored in the columns
133
+#' @param allParams list of all CoGAPS parameters
134
+#' @return a matrix of consensus patterns
134 135
 #' @importFrom stats weighted.mean
135 136
 patternMatch <- function(allPatterns, allParams)
136 137
 {
... ...
@@ -1,12 +1,12 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps_cpp_from_file <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL) {
5
-    .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
4
+cogaps_cpp_from_file <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL, isMaster = TRUE) {
5
+    .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix, isMaster)
6 6
 }
7 7
 
8
-cogaps_cpp <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL) {
9
-    .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
8
+cogaps_cpp <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL, isMaster = TRUE) {
9
+    .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix, isMaster)
10 10
 }
11 11
 
12 12
 getBuildReport_cpp <- function() {
... ...
@@ -38,6 +38,9 @@ setClass("CogapsParams", slots = c(
38 38
 ))
39 39
 
40 40
 #' constructor for CogapsParams
41
+#' @param .Object CogapsParams object
42
+#' @param ... initial values for slots
43
+#' @return initialized CogapsParams object
41 44
 #' @importFrom methods callNextMethod
42 45
 setMethod("initialize", "CogapsParams",
43 46
     function(.Object, ...)
... ...
@@ -114,7 +117,7 @@ setGeneric("setParam", function(object, whichParam, value)
114 117
 #' @return the modified params object
115 118
 #' @examples
116 119
 #'  params <- new("CogapsParams")
117
-#'  params <- setDistributedParams(3, 2, 4)
120
+#'  params <- setDistributedParams(params, 5)
118 121
 setGeneric("setDistributedParams", function(object, nSets, cut=NULL,
119 122
 minNS=NULL, maxNS=NULL)
120 123
     {standardGeneric("setDistributedParams")})
... ...
@@ -1,6 +1,8 @@
1 1
 #' CogapsResult
2 2
 #' @export
3 3
 #'
4
+#' @slot sampleStdDev std dev of the sampled P matrices
5
+#' @slot featureStdDev std dev of the sampled A matrices
4 6
 #' @description Contains all output from Cogaps run
5 7
 #' @importClassesFrom S4Vectors Annotated
6 8
 #' @importClassesFrom SingleCellExperiment LinearEmbeddingMatrix
... ...
@@ -10,20 +12,30 @@ setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
10 12
 ))
11 13
 
12 14
 #' Constructor for CogapsResult
15
+#' @param .Object CogapsResult object
16
+#' @param Amean mean of sampled A matrices
17
+#' @param Pmean mean of sampled P matrices
18
+#' @param Asd std dev of sampled A matrices
19
+#' @param Psd std dev of sampled P matrices
20
+#' @param seed random seed used for the run
21
+#' @param meanChiSq mean value of ChiSq statistic
22
+#' @param geneNames names of genes in original data set
23
+#' @param sampleNames names of samples in original data set
24
+#' @param diagnostics assorted diagnostic reports from the run
25
+#' @param ... initial values for slots
13 26
 #' @return initialized CogapsResult object
14 27
 #' @importFrom methods callNextMethod
15 28
 setMethod("initialize", "CogapsResult",
16
-function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=list(),
17
-...)
29
+function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, geneNames=NULL,
30
+sampleNames=NULL, diagnostics=NULL, ...)
18 31
 {
19 32
     .Object@featureLoadings <- Amean
20 33
     .Object@sampleFactors <- Pmean
34
+    .Object@featureStdDev <- Asd
35
+    .Object@sampleStdDev <- Psd
21 36
 
22
-    if (!is.null(Asd))
23
-        .Object@featureStdDev <- Asd
24
-    if (!is.null(Psd))
25
-        .Object@sampleStdDev <- Psd
26
-
37
+    .Object@metadata[["geneNames"]] <- geneNames
38
+    .Object@metadata[["sampleNames"]] <- sampleNames
27 39
     .Object@metadata[["seed"]] <- seed
28 40
     .Object@metadata[["meanChiSq"]] <- meanChiSq
29 41
     .Object@metadata[["diagnostics"]] <- diagnostics
... ...
@@ -52,7 +64,7 @@ setValidity("CogapsResult",
52 64
 #' @param object an object of type CogapsResult
53 65
 #' @return chi-sq error
54 66
 #' data(SimpSim)
55
-#' result <- CoGAPS(SimpSim.D)
67
+#' result <- CoGAPS(SimpSim.data)
56 68
 #' meanChiSq(result)
57 69
 setGeneric("getMeanChiSq", function(object)
58 70
     {standardGeneric("getMeanChiSq")})
... ...
@@ -70,7 +82,7 @@ setGeneric("getMeanChiSq", function(object)
70 82
 #' @return matrix of z-scores
71 83
 #' @examples
72 84
 #' data(SimpSim)
73
-#' result <- CoGAPS(SimpSim.D)
85
+#' result <- CoGAPS(SimpSim.data)
74 86
 #' feature_zscore <- calcZ(result)
75 87
 setGeneric("calcZ", function(object, which="feature")
76 88
     {standardGeneric("calcZ")})
... ...
@@ -85,7 +97,7 @@ setGeneric("calcZ", function(object, which="feature")
85 97
 #' @return the D' estimate of a gene or set of genes
86 98
 #' @examples
87 99
 #' data(SimpSim)
88
-#' result <- CoGAPS(SimpSim.D)
100
+#' result <- CoGAPS(SimpSim.data)
89 101
 #' D_estimate <- reconstructGene(result)
90 102
 setGeneric("reconstructGene", function(object, genes=NULL)
91 103
     {standardGeneric("reconstructGene")})
... ...
@@ -104,7 +116,7 @@ setGeneric("reconstructGene", function(object, genes=NULL)
104 116
 #' @return plots a heatmap of the A Matrix
105 117
 #' @examples
106 118
 #' data(SimpSim)
107
-#' result <- CoGAPS(SimpSim.D)
119
+#' result <- CoGAPS(SimpSim.data)
108 120
 #' binMatrix <- binaryA(result, threshold=3)
109 121
 setGeneric("binaryA", function(object, threshold=3)
110 122
     {standardGeneric("binaryA")})
... ...
@@ -121,8 +133,8 @@ setGeneric("binaryA", function(object, threshold=3)
121 133
 #' @return creates a residual plot
122 134
 #' @examples
123 135
 #' data(SimpSim)
124
-#' result <- CoGAPS(SimpSim.D)
125
-#' plotResiduals(result, SimpSim.D)
136
+#' result <- CoGAPS(SimpSim.data)
137
+#' plotResiduals(result, SimpSim.data)
126 138
 setGeneric("plotResiduals", function(object, data, uncertainty=NULL)
127 139
     {standardGeneric("plotResiduals")})
128 140
 
... ...
@@ -138,10 +150,6 @@ setGeneric("plotResiduals", function(object, data, uncertainty=NULL)
138 150
 #' @param GStoGenes data.frame or list with gene sets
139 151
 #' @param numPerm number of permutations for null
140 152
 #' @return gene set statistics for each column of A
141
-#' @examples
142
-#' data('SimpSim')
143
-#' result <- CoGAPS(SimpSim.D)
144
-#' calcCoGAPSStat(result, GStoGenes=GSets, numPerm=500)
145 153
 setGeneric("calcCoGAPSStat", function(object, GStoGenes, numPerm=500)
146 154
     {standardGeneric("calcCoGAPSStat")})
147 155
 
... ...
@@ -159,10 +167,6 @@ setGeneric("calcCoGAPSStat", function(object, GStoGenes, numPerm=500)
159 167
 #' @param Pw weight on genes
160 168
 #' @param nullGenes logical indicating gene adjustment
161 169
 #' @return gene similiarity statistic
162
-#' @examples
163
-#' data(SimpSim)
164
-#' result <- CoGAPS(SimpSim.D)
165
-#' calcGeneGSStat(result, GSGenes=GSets[[1]], numPerm=500)
166 170
 setGeneric("calcGeneGSStat", function(object, GStoGenes, numPerm,
167 171
 Pw=rep(1,ncol(object@featureLoadings)), nullGenes=FALSE)
168 172
     {standardGeneric("calcGeneGSStat")})
... ...
@@ -184,10 +188,6 @@ Pw=rep(1,ncol(object@featureLoadings)), nullGenes=FALSE)
184 188
 #' @param PwNull - logical indicating gene adjustment
185 189
 #' @return A vector of length GSGenes containing the p-values of set membership
186 190
 #' for each gene containined in the set specified in GSGenes.
187
-#' @examples
188
-#' data(SimpSim)
189
-#' result <- CoGAPS(SimpSim.D)
190
-#' computeGeneGSProb(result, GSGenes=GSets[[1]], numPerm=500)
191 191
 setGeneric("computeGeneGSProb", function(object, GStoGenes, numPerm=500,
192 192
 Pw=rep(1,ncol(object@featureLoadings)), PwNull=FALSE)
193 193
     {standardGeneric("computeGeneGSProb")})
... ...
@@ -207,6 +207,6 @@ Pw=rep(1,ncol(object@featureLoadings)), PwNull=FALSE)
207 207
 #' @return By default a non-overlapping list of genes associated with each
208 208
 #' \code{lp}. If \code{full=TRUE} a data.frame of genes rankings with a column
209 209
 #' for each \code{lp} will also be returned.
210
-setGeneric("patternMarkers", function(object, threshold="all", lp=NA, full=FALSE)
210
+setGeneric("patternMarkers", function(object, threshold="all", lp=NA)
211 211
     {standardGeneric("patternMarkers")})
212 212
 
... ...
@@ -21,7 +21,8 @@ plot.CogapsResult <- function(x, ...)
21 21
     xlimits <- c(0, nSamples + 1)
22 22
     ylimits <- c(0, max(x@sampleFactors) * 1.1)
23 23
 
24
-    plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude")
24
+    plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude",
25
+        xlab="Samples")
25 26
 
26 27
     for (i in 1:nFactors)
27 28
     {
... ...
@@ -77,6 +78,7 @@ function(object, genes)
77 78
 #' @aliases binaryA
78 79
 #' @importFrom gplots heatmap.2
79 80
 #' @importFrom graphics mtext
81
+#' @importFrom RColorBrewer brewer.pal
80 82
 setMethod("binaryA", signature(object="CogapsResult"),
81 83
 function(object, threshold)
82 84
 {
... ...
@@ -95,6 +97,7 @@ function(object, threshold)
95 97
 #' @aliases plotResiduals
96 98
 #' @importFrom gplots heatmap.2
97 99
 #' @importFrom grDevices colorRampPalette
100
+#' @importFrom RColorBrewer brewer.pal
98 101
 setMethod("plotResiduals", signature(object="CogapsResult"),
99 102
 function(object, data, uncertainty)
100 103
 {
... ...
@@ -142,9 +145,9 @@ function(object, threshold, lp)
142 145
         ssranks <- matrix(NA, nrow=nGenes, ncol=nPatterns,
143 146
             dimnames=dimnames(object@featureLoadings))
144 147
         ssgenes <- matrix(NA, nrow=nGenes, ncol=nPatterns, dimnames=NULL)
145
-        for (i in 1:nP)
148
+        for (i in 1:nPatterns)
146 149
         {
147
-            lp <- rep(0,dim(Amatrix)[2])
150
+            lp <- rep(0,nPatterns)
148 151
             lp[i] <- 1
149 152
             sstat[,i] <- unlist(apply(Arowmax, 1, function(x) sqrt(t(x-lp)%*%(x-lp))))
150 153
             ssranks[,i]<-rank(sstat[,i])
... ...
@@ -152,8 +155,8 @@ function(object, threshold, lp)
152 155
         }
153 156
         if (threshold=="cut")
154 157
         {
155
-            geneThresh <- sapply(1:nP,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
156
-            ssgenes.th <- sapply(1:nP,function(x) ssgenes[1:geneThresh[x],x])
158
+            geneThresh <- sapply(1:nPatterns,function(x) min(which(ssranks[ssgenes[,x],x] > apply(ssranks[ssgenes[,x],],1,min))))
159
+            ssgenes.th <- sapply(1:nPatterns,function(x) ssgenes[1:geneThresh[x],x])
157 160
         }
158 161
         else if (threshold=="all")
159 162
         {
... ...
@@ -451,7 +451,7 @@ configure:6353: gcc -o conftest -UNDEBUG -Wall -pedantic -g -O0   conftest.c  >&
451 451
 configure:6353: $? = 0
452 452
 configure:6353: ./conftest
453 453
 configure:6353: $? = 0
454
-configure:6363: result: 806ea:7100800:7ffafbff:bfebfbff
454
+configure:6363: result: 806ea:6100800:7ffafbff:bfebfbff
455 455
 configure:6390: checking for x86 cpuid 0x00000007 output
456 456
 configure:6420: gcc -o conftest -UNDEBUG -Wall -pedantic -g -O0   conftest.c  >&5
457 457
 configure:6420: $? = 0
... ...
@@ -757,7 +757,7 @@ ax_cv_cxx_openmp=-fopenmp
757 757
 ax_cv_gcc_x86_avx_xgetbv_0x00000000=1f:0
758 758
 ax_cv_gcc_x86_avx_xgetbv_=unknown
759 759
 ax_cv_gcc_x86_cpuid_0x00000000=16:756e6547:6c65746e:49656e69
760
-ax_cv_gcc_x86_cpuid_0x00000001=806ea:7100800:7ffafbff:bfebfbff
760
+ax_cv_gcc_x86_cpuid_0x00000001=806ea:6100800:7ffafbff:bfebfbff
761 761
 ax_cv_gcc_x86_cpuid_0x00000007=0:29c6fbf:0:0
762 762
 ax_cv_gcc_x86_cpuid_0x80000000=80000008:0:0:0
763 763
 ax_cv_gcc_x86_cpuid_0x80000001=0:0:121:2c100800
... ...
@@ -7,7 +7,8 @@
7 7
 CoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  ...)
11 12
 }
12 13
 \arguments{
13 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -36,6 +37,8 @@ contained in this file}
36 37
 for data that is stored as samples x genes since CoGAPS requires data to be
37 38
 genes x samples}
38 39
 
40
+\item{BPPARAM}{BiocParallel backend}
41
+
39 42
 \item{...}{allows for overwriting parameters in params}
40 43
 }
41 44
 \value{
... ...
@@ -53,14 +56,14 @@ SingleCellExperiment. The supported file types are csv, tsv, and mtx.
53 56
 \examples{
54 57
 # Running from R object
55 58
 data(GIST)
56
-resultA <- CoGAPS(GIST.D)
59
+resultA <- CoGAPS(GIST.data_frame)
57 60
 
58 61
 # Running from file name
59 62
 gist_path <- system.file("extdata/GIST.mtx", package="CoGAPS")
60 63
 resultB <- CoGAPS(gist_path)
61 64
 
62
-Setting Parameters
65
+# Setting Parameters
63 66
 params <- new("CogapsParams")
64 67
 params <- setParam(params, "nPatterns", 5)
65
-resultC <- CoGAPS(GIST.D, params)
68
+resultC <- CoGAPS(GIST.data_frame, params)
66 69
 }
... ...
@@ -7,3 +7,11 @@
7 7
 \description{
8 8
 Contains all output from Cogaps run
9 9
 }
10
+\section{Slots}{
11
+
12
+\describe{
13
+\item{\code{sampleStdDev}}{std dev of the sampled P matrices}
14
+
15
+\item{\code{featureStdDev}}{std dev of the sampled A matrices}
16
+}}
17
+
... ...
@@ -4,10 +4,11 @@
4 4
 \alias{GWCoGAPS}
5 5
 \title{Genome Wide CoGAPS}
6 6
 \usage{
7
-GWCoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
7
+GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  ...)
11 12
 }
12 13
 \arguments{
13 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -36,6 +37,8 @@ contained in this file}
36 37
 for data that is stored as samples x genes since CoGAPS requires data to be
37 38
 genes x samples}
38 39
 
40
+\item{BPPARAM}{BiocParallel backend}
41
+
39 42
 \item{...}{allows for overwriting parameters in params}
40 43
 }
41 44
 \value{
... ...
@@ -26,6 +26,6 @@ threshold * Asd and 0 otherwise
26 26
 }
27 27
 \examples{
28 28
 data(SimpSim)
29
-result <- CoGAPS(SimpSim.D)
29
+result <- CoGAPS(SimpSim.data)
30 30
 binMatrix <- binaryA(result, threshold=3)
31 31
 }
... ...
@@ -25,8 +25,3 @@ calculates the gene set statistics for each
25 25
 column of A using a Z-score from the elements of the A matrix,
26 26
 the input gene set, and permutation tests
27 27
 }
28
-\examples{
29
-data('SimpSim')
30
-result <- CoGAPS(SimpSim.D)
31
-calcCoGAPSStat(result, GStoGenes=GSets, numPerm=500)
32
-}
... ...
@@ -31,8 +31,3 @@ calculates the probability that a gene
31 31
 listed in a gene set behaves like other genes in the set within
32 32
 the given data set
33 33
 }
34
-\examples{
35
-data(SimpSim)
36
-result <- CoGAPS(SimpSim.D)
37
-calcGeneGSStat(result, GSGenes=GSets[[1]], numPerm=500)
38
-}
... ...
@@ -25,6 +25,6 @@ and standard deviation matrices
25 25
 }
26 26
 \examples{
27 27
 data(SimpSim)
28
-result <- CoGAPS(SimpSim.D)
28
+result <- CoGAPS(SimpSim.data)
29 29
 feature_zscore <- calcZ(result)
30 30
 }
... ...
@@ -35,8 +35,3 @@ membership for each candidate gene in a set specified in \code{GSGenes} by
35 35
 comparing the inferred activity of that gene to the average activity of the
36 36
 set.
37 37
 }
38
-\examples{
39
-data(SimpSim)
40
-result <- CoGAPS(SimpSim.D)
41
-computeGeneGSProb(result, GSGenes=GSets[[1]], numPerm=500)
42
-}
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{distributedCogaps}
5 5
 \title{CoGAPS Distributed Matrix Factorization Algorithm}
6 6
 \usage{
7
-distributedCogaps(data, allParams, uncertainty, BPPARAM = NULL)
7
+distributedCogaps(data, allParams, uncertainty)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{File name or R object (see details for supported types)}
11 11
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{getGeneNames}
4
+\alias{getGeneNames}
5
+\title{find the gene names in the data}
6
+\usage{
7
+getGeneNames(data, allParams)
8
+}
9
+\arguments{
10
+\item{data}{data set}
11
+
12
+\item{allParams}{list of all cogaps parameters}
13
+}
14
+\value{
15
+vector of gene names
16
+}
17
+\description{
18
+find the gene names in the data
19
+}
... ...
@@ -16,7 +16,7 @@ getMeanChiSq(object)
16 16
 \value{
17 17
 chi-sq error
18 18
 data(SimpSim)
19
-result <- CoGAPS(SimpSim.D)
19
+result <- CoGAPS(SimpSim.data)
20 20
 meanChiSq(result)
21 21
 }
22 22
 \description{
23 23
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{getSampleNames}
4
+\alias{getSampleNames}
5
+\title{find the sample names in the data}
6
+\usage{
7
+getSampleNames(data, allParams)
8
+}
9
+\arguments{
10
+\item{data}{data set}
11
+
12
+\item{allParams}{list of all cogaps parameters}
13
+}
14
+\value{
15
+vector of sample names
16
+}
17
+\description{
18
+find the sample names in the data
19
+}
... ...
@@ -7,6 +7,14 @@
7 7
 \usage{
8 8
 \S4method{initialize}{CogapsParams}(.Object, ...)
9 9
 }
10
+\arguments{
11
+\item{.Object}{CogapsParams object}
12
+
13
+\item{...}{initial values for slots}
14
+}
15
+\value{
16
+initialized CogapsParams object
17
+}
10 18
 \description{
11 19
 constructor for CogapsParams
12 20
 }
... ...
@@ -6,7 +6,31 @@
6 6
 \title{Constructor for CogapsResult}
7 7
 \usage{
8 8
 \S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd, seed,
9
-  meanChiSq, diagnostics = list(), ...)
9
+  meanChiSq, geneNames = NULL, sampleNames = NULL,
10
+  diagnostics = NULL, ...)
11
+}
12
+\arguments{
13
+\item{.Object}{CogapsResult object}
14
+
15
+\item{Amean}{mean of sampled A matrices}
16
+
17
+\item{Pmean}{mean of sampled P matrices}
18
+
19
+\item{Asd}{std dev of sampled A matrices}
20
+
21
+\item{Psd}{std dev of sampled P matrices}
22
+
23
+\item{seed}{random seed used for the run}
24
+
25
+\item{meanChiSq}{mean value of ChiSq statistic}
26
+
27
+\item{geneNames}{names of genes in original data set}
28
+
29
+\item{sampleNames}{names of samples in original data set}
30
+
31
+\item{diagnostics}{assorted diagnostic reports from the run}
32
+
33
+\item{...}{initial values for slots}
10 34
 }
11 35
 \value{
12 36
 initialized CogapsResult object
... ...
@@ -6,9 +6,10 @@
6 6
 \alias{patternMarkers,CogapsResult-method}
7 7
 \title{compute pattern markers statistic}
8 8
 \usage{
9
-patternMarkers(object, threshold = "all", lp = NA, full = FALSE)
9
+patternMarkers(object, threshold = "all", lp = NA)
10 10
 
11
-\S4method{patternMarkers}{CogapsResult}(object, threshold, lp)
11
+\S4method{patternMarkers}{CogapsResult}(object, threshold = "all",
12
+  lp = NA)
12 13
 }
13 14
 \arguments{
14 15
 \item{object}{an object of type CogapsResult}
... ...
@@ -7,25 +7,12 @@
7 7
 patternMatch(allPatterns, allParams)
8 8
 }
9 9
 \arguments{
10
-\item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
10
+\item{allPatterns}{matrix of patterns stored in the columns}
11 11
 
12
-\item{nSets}{number of parallel sets used to generate \code{Atot}}
13
-
14
-\item{cnt}{number of branches at which to cut dendrogram}
15
-
16
-\item{minNS}{minimum of individual set contributions a cluster must contain}
17
-
18
-\item{maxNS}{maximum of individual set contributions a cluster must contain}
19
-
20
-\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
21
-
22
-\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}}
23
-
24
-\item{...}{additional parameters for \code{agnes}}
12
+\item{allParams}{list of all CoGAPS parameters}
25 13
 }
26 14
 \value{
27
-a matrix of consensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
28
-consensus pattern is also returned.
15
+a matrix of consensus patterns
29 16
 }
30 17
 \description{
31 18
 Match Patterns Across Multiple Runs
... ...
@@ -25,6 +25,6 @@ calculate residuals and produce heatmap
25 25
 }
26 26
 \examples{
27 27
 data(SimpSim)
28
-result <- CoGAPS(SimpSim.D)
29
-plotResiduals(result, SimpSim.D)
28
+result <- CoGAPS(SimpSim.data)
29
+plotResiduals(result, SimpSim.data)
30 30
 }
... ...
@@ -23,6 +23,6 @@ reconstruct gene
23 23
 }
24 24
 \examples{
25 25
 data(SimpSim)
26
-result <- CoGAPS(SimpSim.D)
26
+result <- CoGAPS(SimpSim.data)
27 27
 D_estimate <- reconstructGene(result)
28 28
 }
... ...
@@ -4,10 +4,11 @@
4 4
 \alias{scCoGAPS}
5 5
 \title{Single Cell CoGAPS}
6 6
 \usage{
7
-scCoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
7
+scCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  ...)
11 12
 }
12 13
 \arguments{
13 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -36,6 +37,8 @@ contained in this file}
36 37
 for data that is stored as samples x genes since CoGAPS requires data to be
37 38
 genes x samples}
38 39
 
40
+\item{BPPARAM}{BiocParallel backend}
41
+
39 42
 \item{...}{allows for overwriting parameters in params}
40 43
 }
41 44
 \value{
... ...
@@ -32,5 +32,5 @@ these parameters  are interrelated so they must be set together
32 32
 }
33 33
 \examples{
34 34
  params <- new("CogapsParams")
35
- params <- setDistributedParams(3, 2, 4)
35
+ params <- setDistributedParams(params, 5)
36 36
 }
37 37
new file mode 100644
... ...
@@ -0,0 +1,21 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{startupMessage}
4
+\alias{startupMessage}
5
+\title{write start up message}
6
+\usage{
7
+startupMessage(data, transpose, distributed)
8
+}
9
+\arguments{
10
+\item{data}{data set}
11
+
12
+\item{transpose}{if we are transposing the data set}
13
+
14
+\item{distributed}{if we are running distributed CoGAPS}
15
+}
16
+\value{
17
+message displayed to screen
18
+}
19
+\description{
20
+write start up message
21
+}
... ...
@@ -76,16 +76,17 @@ std::vector<unsigned> getSubsetIndices(const Rcpp::Nullable<Rcpp::IntegerVector>
76 76
     return std::vector<unsigned>(1); // interpreted as null, i.e. will be ignored
77 77
 }
78 78
 
79
-bool processDistributedParameters(const Rcpp::List &allParams)
79
+// return if running distributed, and if so, are we partitioning rows/cols
80
+std::pair<bool, bool> processDistributedParameters(const Rcpp::List &allParams)
80 81
 {
81 82
     const Rcpp::S4 &gapsParams(allParams["gaps"]);
82 83
     if (!Rf_isNull(gapsParams.slot("distributed")))
83 84
     {
84 85
         std::string d = Rcpp::as<std::string>(gapsParams.slot("distributed"));
85 86
         GAPS_ASSERT(d == "genome-wide" || d == "single-cell");
86
-        return d == "genome-wide";
87
+        return std::pair<bool, bool>(true, d == "genome-wide");
87 88
     }
88
-    return false; // will be ignored anyways
89
+    return std::pair<bool, bool>(false, false);
89 90
 }
90 91
 
91 92
 // this is the main function that creates a GapsRunner and runs CoGAPS
... ...
@@ -93,11 +94,12 @@ bool processDistributedParameters(const Rcpp::List &allParams)
93 94
 template <class DataType>
94 95
 static Rcpp::List cogapsRun(const DataType &data, const Rcpp::List &allParams,
95 96
 const DataType &uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector> &indices,
96
-const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
97
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
97 98
 {
98 99
     // calculate essential parameters needed for constructing GapsRunner
99 100
     unsigned nPatterns = getNumPatterns(allParams);
100
-    bool partitionRows = processDistributedParameters(allParams);
101
+    bool printThreads = !processDistributedParameters(allParams).first;
102
+    bool partitionRows = processDistributedParameters(allParams).second;
101 103
     std::vector<unsigned> cIndices(getSubsetIndices(indices));
102 104
 
103 105
     // construct GapsRunner
... ...
@@ -143,13 +145,13 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
143 145
 
144 146
     // set parameters that aren't saved in the checkpoint
145 147
     runner.setMaxThreads(allParams["nThreads"]);
146
-    runner.setPrintMessages(allParams["messages"]);
148
+    runner.setPrintMessages(allParams["messages"] && isMaster);
147 149
     runner.setOutputFrequency(allParams["outputFrequency"]);
148 150
     runner.setCheckpointOutFile(allParams["checkpointOutFile"]);
149 151
     runner.setCheckpointInterval(allParams["checkpointInterval"]);
150 152
 
151 153
     // run cogaps and return the GapsResult in an R list
152
-    GapsResult result(runner.run());
154
+    GapsResult result(runner.run(printThreads));
153 155
     return Rcpp::List::create(
154 156
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
155 157
         Rcpp::Named("Pmean") = createRMatrix(result.Pmean, true),
... ...
@@ -168,14 +170,16 @@ Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector &data,
168 170
 const Rcpp::List &allParams,
169 171
 const Rcpp::Nullable<Rcpp::CharacterVector> &uncertainty=R_NilValue,
170 172
 const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
171
-const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
173
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue,
174
+bool isMaster=true)
172 175
 {
173 176
     std::string unc = ""; // interpreted as null, i.e. will be ignored
174 177
     if (uncertainty.isNotNull())
175 178
     {
176 179
         unc = Rcpp::as<std::string>(Rcpp::CharacterVector(uncertainty));
177 180
     }
178
-    return cogapsRun(Rcpp::as<std::string>(data), allParams, unc, indices, fixedMatrix);
181
+    return cogapsRun(Rcpp::as<std::string>(data), allParams, unc, indices,
182
+        fixedMatrix, isMaster);
179 183
 }
180 184
 
181 185
 // [[Rcpp::export]]
... ...
@@ -183,14 +187,16 @@ Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &data,
183 187
 const Rcpp::List &allParams,
184 188
 const Rcpp::Nullable<Rcpp::NumericMatrix> &uncertainty=R_NilValue,
185 189
 const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
186
-const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
190
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue,
191
+bool isMaster=true)
187 192
 {
188 193
     Matrix unc(1,1); // interpreted as null, i.e. will be ignored
189 194
     if (uncertainty.isNotNull())
190 195
     {
191 196
         unc = convertRMatrix(Rcpp::NumericMatrix(uncertainty));
192 197
     }
193
-    return cogapsRun(convertRMatrix(data), allParams, unc, indices, fixedMatrix);
198
+    return cogapsRun(convertRMatrix(data), allParams, unc, indices,
199
+        fixedMatrix, isMaster);
194 200
 }
195 201
 
196 202
 // [[Rcpp::export]]
... ...
@@ -77,16 +77,16 @@ void GapsRunner::setCheckpointInterval(unsigned interval)
77 77
     mCheckpointInterval = interval;
78 78
 }
79 79
 
80
-GapsResult GapsRunner::run()
80
+GapsResult GapsRunner::run(bool printThreads)
81 81
 {
82 82
     mStartTime = bpt_now();
83 83
 
84 84
     // calculate appropiate number of threads if compiled with openmp
85 85
     #ifdef __GAPS_OPENMP__
86
-    if (mPrintMessages)
86
+    if (mPrintMessages && printThreads)
87 87
     {
88 88
         unsigned availableThreads = omp_get_max_threads();
89
-        mMaxThreads = std::min(availableThreads, mMaxThreads);
89
+        mMaxThreads = gaps::min(availableThreads, mMaxThreads);
90 90
         gaps_printf("Running on %d out of %d available threads\n",
91 91
             mMaxThreads, availableThreads);
92 92
     }
... ...
@@ -130,13 +130,13 @@ void GapsRunner::runOnePhase()
130 130
         {        
131 131
             float temp = static_cast<float>(2 * mCurrentIteration)
132 132
                 / static_cast<float>(mMaxIterations);
133
-            mASampler.setAnnealingTemp(std::min(1.f, temp));
134
-            mPSampler.setAnnealingTemp(std::min(1.f, temp));
133
+            mASampler.setAnnealingTemp(gaps::min(1.f, temp));
134
+            mPSampler.setAnnealingTemp(gaps::min(1.f, temp));
135 135
         }
136 136
     
137 137
         // number of updates per iteration is poisson 
138
-        unsigned nA = gaps::random::poisson(std::max(mASampler.nAtoms(), 10ul));
139
-        unsigned nP = gaps::random::poisson(std::max(mPSampler.nAtoms(), 10ul));
138
+        unsigned nA = gaps::random::poisson(gaps::max(mASampler.nAtoms(), 10));
139
+        unsigned nP = gaps::random::poisson(gaps::max(mPSampler.nAtoms(), 10));
140 140
         updateSampler(nA, nP);
141 141
 
142 142
         if (mPhase == 'S')
... ...
@@ -180,7 +180,7 @@ void GapsRunner::updateSampler(unsigned nA, unsigned nP)
180 180
 static double estimatedNumUpdates(double current, double total, float nAtoms)
181 181
 {
182 182
     double coef = nAtoms / std::log(current);
183
-    return coef * std::log(std::sqrt(2 * total * gaps::algo::pi)) +
183
+    return coef * std::log(std::sqrt(2 * total * gaps::pi)) +
184 184
         total * coef * std::log(total) - total * coef;
185 185
 }
186 186
 
... ...
@@ -90,7 +90,7 @@ public:
90 90
     void setCheckpointOutFile(const std::string &outFile);
91 91
     void setCheckpointInterval(unsigned interval);
92 92
 
93
-    GapsResult run();
93
+    GapsResult run(bool printThreads=true);
94 94
 
95 95
     // serialization
96 96
     friend Archive& operator>>(Archive &ar, GapsRunner &runner);
... ...
@@ -71,7 +71,7 @@ static unsigned geneThreshold(const ColMatrix &rankMatrix, unsigned pat)
71 71
         {
72 72
             if (j != pat && rankMatrix(i,j) <= rankMatrix(i,pat))
73 73
             {
74
-                cutRank = std::min(cutRank, std::max(0.f, rankMatrix(i,pat)-1));
74
+                cutRank = gaps::min(cutRank, gaps::max(0.f, rankMatrix(i,pat)-1));
75 75
             }
76 76
         }
77 77
     }
... ...
@@ -425,7 +425,7 @@ template <class T, class MatA, class MatB>
425 425
 void GibbsSampler<T, MatA, MatB>::removeMass(uint64_t pos, float mass, unsigned row, unsigned col)
426 426
 {
427 427
     mDomain.cacheErase(pos);
428
-    mMatrix(row, col) = std::max(mMatrix(row, col) - mass, 0.f);
428
+    mMatrix(row, col) = gaps::max(mMatrix(row, col) - mass, 0.f);
429 429
     impl()->updateAPMatrix(row, col, -mass);
430 430
 }
431 431
 
... ...
@@ -448,7 +448,7 @@ unsigned col)
448 448
     }
449 449
 
450 450
     // accept mass as long as it's non-zero
451
-    if (mass >= gaps::algo::epsilon)
451
+    if (mass >= gaps::epsilon)
452 452
     {
453 453
         addMass(pos, mass, row, col);
454 454
         mQueue.acceptBirth();
... ...
@@ -466,7 +466,7 @@ void GibbsSampler<T, MatA, MatB>::death(uint64_t pos, float mass, unsigned row,
466 466
 unsigned col)
467 467
 {
468 468
     // kill off atom
469
-    mMatrix(row, col) = std::max(mMatrix(row, col) - mass, 0.f);
469
+    mMatrix(row, col) = gaps::max(mMatrix(row, col) - mass, 0.f);
470 470
     impl()->updateAPMatrix(row, col, -mass);
471 471
 
472 472
     // calculate rebirth mass
... ...
@@ -573,7 +573,7 @@ template <class T, class MatA, class MatB>
573 573
 bool GibbsSampler<T, MatA, MatB>::updateAtomMass(uint64_t pos, float mass,
574 574
 float delta)
575 575
 {
576
-    if (mass + delta < gaps::algo::epsilon)
576
+    if (mass + delta < gaps::epsilon)
577 577
     {
578 578
         mDomain.cacheErase(pos);
579 579
         mQueue.acceptDeath();
... ...
@@ -617,7 +617,7 @@ std::pair<float, bool> GibbsSampler<T, MatA, MatB>::gibbsMass(AlphaParameters al
617 617
     alpha.s *= mAnnealingTemp;
618 618
     alpha.su *= mAnnealingTemp;
619 619
 
620
-    if (alpha.s > gaps::algo::epsilon)
620
+    if (alpha.s > gaps::epsilon)
621 621
     {
622 622
         float mean = (alpha.su - mLambda) / alpha.s;
623 623
         float sd = 1.f / std::sqrt(alpha.s);
... ...
@@ -626,8 +626,8 @@ std::pair<float, bool> GibbsSampler<T, MatA, MatB>::gibbsMass(AlphaParameters al
626 626
         if (pLower < 1.f)
627 627
         {
628 628
             float m = gaps::random::inverseNormSample(pLower, 1.f, mean, sd);
629
-            float gMass = std::min(m, mMaxGibbsMass / mLambda);
630
-            return std::pair<float, bool>(gMass, gMass >= gaps::algo::epsilon);
629
+            float gMass = gaps::min(m, mMaxGibbsMass / mLambda);
630
+            return std::pair<float, bool>(gMass, gMass >= gaps::epsilon);
631 631
         }
632 632
     }
633 633
     return std::pair<float, bool>(0.f, false);
... ...
@@ -640,7 +640,7 @@ float m1, float m2)
640 640
     alpha.s *= mAnnealingTemp;
641 641
     alpha.su *= mAnnealingTemp;
642 642
 
643
-    if (alpha.s > gaps::algo::epsilon)
643
+    if (alpha.s > gaps::epsilon)
644 644
     {
645 645
         float mean = alpha.su / alpha.s; // lambda cancels out
646 646
         float sd = 1.f / std::sqrt(alpha.s);
... ...
@@ -650,7 +650,7 @@ float m1, float m2)
650 650
         if (!(pLower >  0.95f || pUpper < 0.05f))
651 651
         {
652 652
             float delta = gaps::random::inverseNormSample(pLower, pUpper, mean, sd);
653
-            float gibbsMass = std::min(std::max(-m1, delta), m2); // conserve mass
653
+            float gibbsMass = gaps::min(gaps::max(-m1, delta), m2); // conserve mass
654 654
             return std::pair<float, bool>(gibbsMass, true);
655 655
         }
656 656
     }
... ...
@@ -17,6 +17,7 @@ OBJECTS =   AtomicDomain.o \
17 17
             file_parser/TsvParser.o \
18 18
             file_parser/MtxParser.o \
19 19
             math/Algorithms.o \
20
+            math/Math.o \
20 21
             math/Random.o \
21 22
             cpp_tests/testAlgorithms.o \
22 23
             cpp_tests/testEfficientSets.o \
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps_cpp_from_file
9
-Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::CharacterVector>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix);
10
-RcppExport SEXP _CoGAPS_cogaps_cpp_from_file(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP) {
9
+Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::CharacterVector>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix, bool isMaster);
10
+RcppExport SEXP _CoGAPS_cogaps_cpp_from_file(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP, SEXP isMasterSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject rcpp_result_gen;
13 13
     Rcpp::RNGScope rcpp_rngScope_gen;
... ...
@@ -16,13 +16,14 @@ BEGIN_RCPP
16 16
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::CharacterVector>& >::type uncertainty(uncertaintySEXP);
17 17
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector>& >::type indices(indicesSEXP);
18 18
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type fixedMatrix(fixedMatrixSEXP);
19
-    rcpp_result_gen = Rcpp::wrap(cogaps_cpp_from_file(data, allParams, uncertainty, indices, fixedMatrix));
19
+    Rcpp::traits::input_parameter< bool >::type isMaster(isMasterSEXP);
20
+    rcpp_result_gen = Rcpp::wrap(cogaps_cpp_from_file(data, allParams, uncertainty, indices, fixedMatrix, isMaster));
20 21
     return rcpp_result_gen;
21 22
 END_RCPP
22 23
 }
23 24
 // cogaps_cpp
24
-Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::NumericMatrix>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix);
25
-RcppExport SEXP _CoGAPS_cogaps_cpp(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP) {
25
+Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::NumericMatrix>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix, bool isMaster);
26
+RcppExport SEXP _CoGAPS_cogaps_cpp(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP, SEXP isMasterSEXP) {
26 27
 BEGIN_RCPP
27 28
     Rcpp::RObject rcpp_result_gen;
28 29
     Rcpp::RNGScope rcpp_rngScope_gen;
... ...
@@ -31,7 +32,8 @@ BEGIN_RCPP
31 32
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type uncertainty(uncertaintySEXP);
32 33
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector>& >::type indices(indicesSEXP);
33 34
     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type fixedMatrix(fixedMatrixSEXP);
34
-    rcpp_result_gen = Rcpp::wrap(cogaps_cpp(data, allParams, uncertainty, indices, fixedMatrix));
35
+    Rcpp::traits::input_parameter< bool >::type isMaster(isMasterSEXP);
36
+    rcpp_result_gen = Rcpp::wrap(cogaps_cpp(data, allParams, uncertainty, indices, fixedMatrix, isMaster));
35 37
     return rcpp_result_gen;
36 38
 END_RCPP
37 39
 }
... ...
@@ -57,8 +59,8 @@ END_RCPP
57 59
 }
58 60
 
59 61
 static const R_CallMethodDef CallEntries[] = {
60
-    {"_CoGAPS_cogaps_cpp_from_file", (DL_FUNC) &_CoGAPS_cogaps_cpp_from_file, 5},
61
-    {"_CoGAPS_cogaps_cpp", (DL_FUNC) &_CoGAPS_cogaps_cpp, 5},
62
+    {"_CoGAPS_cogaps_cpp_from_file", (DL_FUNC) &_CoGAPS_cogaps_cpp_from_file, 6},
63
+    {"_CoGAPS_cogaps_cpp", (DL_FUNC) &_CoGAPS_cogaps_cpp, 6},
62 64
     {"_CoGAPS_getBuildReport_cpp", (DL_FUNC) &_CoGAPS_getBuildReport_cpp, 0},
63 65
     {"_CoGAPS_run_catch_unit_tests", (DL_FUNC) &_CoGAPS_run_catch_unit_tests, 0},
64 66
     {NULL, NULL, 0}
... ...
@@ -1,4 +1,5 @@
1 1
 #include "catch.h"
2
+#include "../math/Math.h"
2 3
 #include "../math/Random.h"
3 4
 
4 5
 #define TEST_APPROX(x) Approx(x).epsilon(0.001)
... ...
@@ -19,8 +20,8 @@ TEST_CASE("Test Random.h - Random Number Generation")
19 20
         unsigned N = 10000;
20 21
         for (unsigned i = 0; i < N; ++i)
21 22
         {
22
-            min = std::min(gaps::random::uniform(), min);
23
-            max = std::max(gaps::random::uniform(), max);
23
+            min = gaps::min(gaps::random::uniform(), min);
24
+            max = gaps::max(gaps::random::uniform(), max);
24 25
             sum += gaps::random::uniform();
25 26
         }
26 27
         REQUIRE(sum / N == Approx(0.5f).epsilon(0.01f));
... ...
@@ -39,8 +40,8 @@ TEST_CASE("Test Random.h - Random Number Generation")
39 40
         float min = 10., max = 0.;
40 41
         for (unsigned i = 0; i < 1000; ++i)
41 42
         {
42
-            min = std::min(gaps::random::uniform(0.f,10.f), min);
43
-            max = std::max(gaps::random::uniform(0.f,10.f), max);
43
+            min = gaps::min(gaps::random::uniform(0.f,10.f), min);
44
+            max = gaps::max(gaps::random::uniform(0.f,10.f), max);
44 45
         }
45 46
         REQUIRE(min < 0.1f);
46 47
         REQUIRE(max > 9.9f);
... ...
@@ -4,9 +4,9 @@
4 4
 #include "../Archive.h"
5 5
 #include "../GapsAssert.h"
6 6
 #include "../file_parser/FileParser.h"
7
+#include "../math/Math.h"
7 8
 #include "Vector.h"
8 9
 
9
-#include <algorithm>
10 10
 #include <vector>
11 11
 
12 12
 /****************************** MATRIX INTERFACE ******************************/
... ...
@@ -273,7 +273,7 @@ T GenericMatrix<T>::pmax(float scale, float max) const
273 273
     {
274 274
         for (unsigned j = 0; j < mNumCols; ++j)
275 275
         {
276
-            mat(i,j) = std::max(impl()->operator()(i,j) * scale, max);
276
+            mat(i,j) = gaps::max(impl()->operator()(i,j) * scale, max);
277 277
         }
278 278
     }
279 279
     return mat;
... ...
@@ -20,7 +20,7 @@ float gaps::algo::min(const Vector &vec)
20 20
     float min = vec[0];
21 21
     for (unsigned i = 0; i < vec.size(); ++i)
22 22
     {
23
-        min = std::min(min, vec[i]);
23
+        min = gaps::min(min, vec[i]);
24 24
     }
25 25
     return min;
26 26
 }
... ...
@@ -30,7 +30,7 @@ float gaps::algo::max(const Vector &vec)
30 30
     float max = vec[0];
31 31
     for (unsigned i = 0; i < vec.size(); ++i)
32 32
     {
33
-        max = std::max(max, vec[i]);
33
+        max = gaps::max(max, vec[i]);
34 34
     }
35 35
     return max;
36 36
 }
... ...
@@ -28,11 +28,8 @@ namespace gaps
28 28
 {
29 29
 namespace algo
30 30
 {
31
-    const float epsilon = 1.0e-10f;
32
-    const float pi = 3.14159265358979323846264f;
33
-
34 31
     bool isVectorZero(const float *vec, unsigned size);
35
-
32
+    
36 33
     // vector algorithms    
37 34
     unsigned whichMin(const Vector &vec);
38 35
     float sum(const Vector &vec);
39 36
new file mode 100644
... ...
@@ -0,0 +1,31 @@
1
+#include "Math.h"
2
+
3
+float gaps::min(float a, float b)
4
+{
5
+    return a < b ? a : b;
6
+}
7
+
8
+unsigned gaps::min(unsigned a, unsigned b)
9
+{
10
+    return a < b ? a : b;
11
+}
12
+
13
+uint64_t gaps::min(uint64_t a, uint64_t b)
14
+{
15
+    return a < b ? a : b;
16
+}
17
+
18
+float gaps::max(float a, float b)
19
+{
20
+    return a < b ? b : a;
21
+}
22
+
23
+unsigned gaps::max(unsigned a, unsigned b)
24
+{
25
+    return a < b ? b : a;
26
+}
27
+
28
+uint64_t gaps::max(uint64_t a, uint64_t b)
29
+{
30
+    return a < b ? b : a;
31
+}
0 32
\ No newline at end of file
1 33
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+#ifndef __COGAPS_MATH_H__
2
+#define __COGAPS_MATH_H__
3
+
4
+#include <stdint.h>
5
+
6
+namespace gaps
7
+{
8
+    const float epsilon = 1.0e-10f;
9
+    const float pi = 3.14159265358979323846264f;
10
+
11
+    float min(float a, float b);
12
+    unsigned min(unsigned a, unsigned b);
13
+    uint64_t min(uint64_t a, uint64_t b);
14
+
15
+    float max(float a, float b);
16
+    unsigned max(unsigned a, unsigned b);
17
+    uint64_t max(uint64_t a, uint64_t b);
18
+}
19
+
20
+#endif
0 21
\ No newline at end of file
... ...
@@ -3,8 +3,8 @@ context("CoGAPS")
3 3
 test_that("Valid Top-Level CoGAPS Calls",
4 4
 {
5 5
     data(GIST)
6
-    testDataFrame <- GIST.D
7
-    testMatrix <- as.matrix(testDataFrame)
6
+    testDataFrame <- GIST.data_frame
7
+    testMatrix <- GIST.matrix
8 8
     #testSummarizedExperiment <- 
9 9
     #testSingleCellExperiment
10 10
 
... ...
@@ -52,8 +52,8 @@ test_that("Valid Top-Level CoGAPS Calls",
52 52
         res[[i]]@sampleFactors == res[[i+1]]@sampleFactors)))
53 53
 
54 54
     # passing uncertainty
55
-    expect_error(CoGAPS(testDataFrame, uncertainty=as.matrix(GIST.S),
56
-        nIterations=100, outputFrequency=50, seed=1), NA)    
55
+    #expect_error(CoGAPS(testDataFrame, uncertainty=as.matrix(GIST.S),
56
+    #    nIterations=100, outputFrequency=50, seed=1), NA)    
57 57
 
58 58
     # multiple threads
59 59
     expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
... ...
@@ -70,9 +70,11 @@ test_that("Valid Top-Level CoGAPS Calls",
70 70
         seed=1, distributed="genome-wide"), NA)
71 71
 
72 72
     expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
73
-        seed=1, distributed="single-cell", transposeData=TRUE), NA)
73
+        seed=1, distributed="single-cell", singleCell=TRUE,
74
+        transposeData=TRUE), NA)
74 75
     expect_error(CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50,
75
-        seed=1, distributed="single-cell", transposeData=TRUE), NA)
76
+        seed=1, distributed="single-cell", singleCell=TRUE,
77
+        transposeData=TRUE), NA)
76 78
 })
77 79
 
78 80
 #test_that("Invalid Top-Level CoGAPS Calls",
... ...
@@ -13,6 +13,7 @@ output:
13 13
 
14 14
 ```{r include=FALSE, cache=FALSE}
15 15
 library(CoGAPS)
16
+library(BiocParallel)
16 17
 ```
17 18
 
18 19
 # Introduction
... ...
@@ -28,7 +29,7 @@ the elements in the A and P matrices are constrained to be greater than or
28 29
 equal to zero. This constraint simultaneously reflects the non-negative nature
29 30
 of gene expression data and enforces the additive nature of the resulting
30 31
 feature dimensions, generating solutions that are biologically intuitive to
31
-interpret @SEUNG_1999.
32
+interpret (@SEUNG_1999).
32 33
 
33 34
 CoGAPS has two extensions that allow it to scale up to large data sets,
34 35
 Genome-Wide CoGAPS (GWCoGAPS) and Single-Cell CoGAPS (scCOGAPS). This package
... ...
@@ -229,12 +230,12 @@ The following calls are equivalent:
229 230
 
230 231
 ```{r}
231 232
 # genome-wide CoGAPS
232
-CoGAPS(SimpSim.data, params, distributed="genome-wide")
233
-GWCoGAPS(SimpSim.data, params)
233
+CoGAPS(SimpSim.data, params, distributed="genome-wide", outputFrequency=1000)
234
+GWCoGAPS(SimpSim.data, params, outputFrequency=1000)
234 235
 
235 236
 # single-cell CoGAPS
236
-CoGAPS(SimpSim.data, params, distributed="single-cell", singleCell=TRUE)
237
-scCoGAPS(SimpSim.data, params)
237
+CoGAPS(SimpSim.data, params, distributed="single-cell", singleCell=TRUE, outputFrequency=1000)
238
+scCoGAPS(SimpSim.data, params, outputFrequency=1000)
238 239
 ```
239 240
 
240 241
 Notice that we also set the parameter `singleCell=TRUE`. This makes some 
... ...
@@ -268,12 +269,38 @@ THIS SECTION UNDER CONSTRUCTION
268 269
 
269 270
 ## Checkpoint System - Saving/Loading CoGAPS Runs
270 271
 
272
+CoGAPS allows the user to save their progress throughout the run, and restart
273
+from the latest saved "checkpoint". This is intended to that if the server
274
+crashes in the middle of a long run it doesn't need to be restarted from the
275
+beginning. Set the `checkpointInterval` parameter to save checkpoints and
276
+pass a file name as `checkpointInFile` to load from a checkpoint. 
277
+
278
+```{r}
279
+# our initial run
280
+CoGAPS(SimpSim.data, params, checkpointInterval=100, checkpointOutFile="vignette_example.out")
281
+
282
+# assume the previous run crashes
283
+CoGAPS(SimpSim.data, checkpointInFile="vignette_example.out")
284
+```
285
+
271 286
 ## Manual Pipeline for GWCoGAPS/scCoGAPS
272 287
 
273 288
 THIS SECTION UNDER CONSTRUCTION
274 289
 
275 290
 ## Setting Parallel Backend for GWCoGAPS/scCoGAPS
276 291
 
292
+The distributed computation for CoGAPS uses `BiocParallel` underneath the hood
293
+to manage the parallelization. The user has the option to specify what the
294
+backend should be. By default, it is `MulticoreParam` with the same number
295
+of workers at `nSets`. Use the `BPPARAM` parameter in `CoGAPS` to set the
296
+backend. See the vignette for `BiocParallel` for more information about the
297
+different choices for the backend.
298
+
299
+```{r}
300
+# run CoGAPS with serial backend
301
+scCoGAPS(SimpSim.data, params, BPPARAM=BiocParallel::SerialParam())
302
+```
303
+
277 304
 ## Transposing Data
278 305
 
279 306
 If your data is stored as samples x genes, `CoGAPS` allows you to pass