Browse code

allow rds for parameters; move all critical parametrers in CogapsParams

Tom Sherman authored on 18/02/2019 19:33:02
Showing 28 changed files

... ...
@@ -2,6 +2,7 @@
2 2
 
3 3
 S3method(plot,CogapsResult)
4 4
 export(CoGAPS)
5
+export(CogapsParams)
5 6
 export(GWCoGAPS)
6 7
 export(binaryA)
7 8
 export(buildReport)
... ...
@@ -11,10 +12,12 @@ export(checkpointsEnabled)
11 12
 export(findConsensusMatrix)
12 13
 export(getClusteredPatterns)
13 14
 export(getCorrelationToMeanPattern)
15
+export(getFeatureLoadings)
14 16
 export(getMeanChiSq)
15 17
 export(getOriginalParameters)
16 18
 export(getParam)
17 19
 export(getRetinaSubset)
20
+export(getSampleFactors)
18 21
 export(getSubsets)
19 22
 export(getUnmatchedPatterns)
20 23
 export(getVersion)
... ...
@@ -25,6 +28,7 @@ export(reconstructGene)
25 28
 export(scCoGAPS)
26 29
 export(setAnnotationWeights)
27 30
 export(setDistributedParams)
31
+export(setFixedPatterns)
28 32
 export(setParam)
29 33
 exportClasses(CogapsParams)
30 34
 exportClasses(CogapsResult)
... ...
@@ -49,19 +49,7 @@ checkpointsEnabled <- function()
49 49
 #' @param transposeData T/F for transposing data while reading it in - useful
50 50
 #' for data that is stored as samples x genes since CoGAPS requires data to be
51 51
 #' genes x samples
52
-#' @param subsetIndices set of indices to use from the data
53
-#' @param subsetDim which dimension (1=rows, 2=cols) to subset
54 52
 #' @param BPPARAM BiocParallel backend 
55
-#' @param geneNames vector of names of genes in data
56
-#' @param sampleNames vector of names of samples in data
57
-#' @param fixedPatterns fix either 'A' or 'P' matrix to these values, in the
58
-#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
59
-#' skipped and fixedPatterns is used for all sets - allowing manual pattern
60
-#' matching, as well as fixed runs of standard CoGAPS
61
-#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed
62
-#' @param takePumpSamples whether or not to take PUMP samples
63
-#' @param outputToFile name of a file to save the output to, will create 4 files
64
-#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"
65 53
 #' @param workerID if calling CoGAPS in parallel the worker ID can be specified,
66 54
 #' only worker 1 prints output and each worker outputs when it finishes, this
67 55
 #' is not neccesary when using the default parallel methods (i.e. distributed
... ...
@@ -82,17 +70,18 @@ checkpointsEnabled <- function()
82 70
 #' params <- setParam(params, "nPatterns", 3)
83 71
 #' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25)
84 72
 #' @importFrom methods new is
85
-#' @importFrom SummarizedExperiment assay
86
-#' @importFrom utils packageVersion
87
-CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
88
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
89
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
90
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
91
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
92
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
73
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
74
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
75
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE,
76
+BPPARAM=NULL, workerID=1, ...)
93 77
 {
94
-    # store all parameters in a list and parse parameters from ...
78
+    # pre-process inputs
79
+    data <- getValueOrRds(data)
80
+    data <- convertDataToMatrix(data)
81
+    params <- getValueOrRds(params)
95 82
     validObject(params)
83
+
84
+    # store all parameters in a list and parse parameters from ...
96 85
     allParams <- list("gaps"=params,
97 86
         "nThreads"=nThreads,
98 87
         "messages"=messages,
... ...
@@ -100,48 +89,24 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
100 89
         "checkpointOutFile"=checkpointOutFile,
101 90
         "checkpointInterval"=checkpointInterval,
102 91
         "checkpointInFile"=checkpointInFile,
92
+        "geneNames"=NULL, # the gene/sample names in the params object are kept
93
+        "sampleNames"=NULL, # as a reference, these are the values actually used
103 94
         "transposeData"=transposeData,
104
-        "subsetIndices"=subsetIndices,
105
-        "subsetDim"=subsetDim,
106 95
         "BPPARAM"=BPPARAM,
107
-        "fixedPatterns"=fixedPatterns,
108
-        "whichMatrixFixed"=whichMatrixFixed,
109
-        "takePumpSamples"=takePumpSamples,
110
-        "outputToFile"=outputToFile,
96
+        "outputToFile"=NULL,
111 97
         "workerID"=workerID
112 98
     )
113 99
     allParams <- parseExtraParams(allParams, list(...))
114
-
115
-    # if rds was passed, we first read it in before any processing
116
-    if (is(data, "character"))
117
-    {
118
-        if (tools::file_ext(data) == "rds")
119
-        {
120
-            gapsCat(allParams, "reading RDS file...")
121
-            data <- readRDS(data)
122
-            gapsCat(allParams, "done\n")
123
-        }
124
-    }
125
-
126
-    # convert data if needed
127
-    if (is(data, "data.frame"))
128
-        data <- data.matrix(data)
129
-    else if (is(data, "SummarizedExperiment"))
130
-        data <- SummarizedExperiment::assay(data, "counts")
131
-    else if (is(data, "SingleCellExperiment"))
132
-        data <- SummarizedExperiment::assay(data, "counts")
133
-
134
-    # check that inputs are valid, then read the gene/sample names from the data
100
+    allParams <- getDimNames(data, allParams)
135 101
     checkInputs(data, uncertainty, allParams)
136
-    allParams <- getNamesFromData(data, allParams, geneNames, sampleNames)
137
-   
102
+
138 103
     # check if we're running from a checkpoint
139 104
     if (!is.null(allParams$checkpointInFile))
140 105
     {
141 106
         gapsCat(allParams, "Running CoGAPS from a checkpoint\n")
142 107
     }
143 108
 
144
-    # determine which function to call cogaps algorithm
109
+    # determine function to call cogaps algorithm
145 110
     dispatchFunc <- cogaps_cpp # default
146 111
     if (!is.null(allParams$gaps@distributed))
147 112
         dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps
... ...
@@ -170,12 +135,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
170 135
 #' params <- setParam(params, "nPatterns", 3)
171 136
 #' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam())
172 137
 #' }
173
-scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
174
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
175
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
176
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
177
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
178
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
138
+scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
139
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
140
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE,
141
+BPPARAM=NULL, workerID=1, ...)
179 142
 {
180 143
     params@distributed <- "single-cell"
181 144
     params@singleCell <- TRUE
... ...
@@ -190,15 +153,7 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
190 153
         checkpointInterval=checkpointInterval,
191 154
         checkpointInFile=checkpointInFile,
192 155
         transposeData=transposeData,
193
-        subsetIndices=subsetIndices,
194
-        subsetDim=subsetDim,
195 156
         BPPARAM=BPPARAM,
196
-        geneNames=geneNames,
197
-        sampleNames=sampleNames,
198
-        fixedPatterns=fixedPatterns,
199
-        whichMatrixFixed=whichMatrixFixed,
200
-        takePumpSamples=takePumpSamples,
201
-        outputToFile=outputToFile,
202 157
         workerID=workerID,
203 158
         ...
204 159
     )
... ...
@@ -220,12 +175,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
220 175
 #' params <- setParam(params, "nPatterns", 3)
221 176
 #' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam())
222 177
 #' }
223
-GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
224
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
225
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
226
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
227
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
228
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
178
+GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
179
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
180
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE,
181
+BPPARAM=NULL, workerID=1, ...)
229 182
 {
230 183
     params@distributed <- "genome-wide"
231 184
     CoGAPS(
... ...
@@ -239,16 +192,8 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
239 192
         checkpointInterval=checkpointInterval,
240 193
         checkpointInFile=checkpointInFile,
241 194
         transposeData=transposeData,
242
-        subsetIndices=subsetIndices,
243
-        subsetDim=subsetDim,
244 195
         BPPARAM=BPPARAM,
245
-        geneNames=geneNames,
246
-        sampleNames=sampleNames,
247
-        fixedPatterns=fixedPatterns,
248
-        whichMatrixFixed=whichMatrixFixed,
249
-        takePumpSamples=takePumpSamples,
250
-        outputToFile=outputToFile,
251 196
         workerID=workerID,
252 197
         ...
253 198
     )
254
-}
255 199
\ No newline at end of file
200
+}
... ...
@@ -26,8 +26,8 @@ workerID)
26 26
     else
27 27
         allParams$sampleNames <- allParams$sampleNames[subsetIndices]
28 28
 
29
-    allParams$subsetIndices <- subsetIndices
30
-    allParams$subsetDim <- ifelse(genomeWide, 1, 2)
29
+    allParams$gaps@subsetIndices <- subsetIndices
30
+    allParams$gaps@subsetDim <- ifelse(genomeWide, 1, 2)
31 31
     allParams$workerID <- workerID
32 32
 
33 33
     # call CoGAPS
... ...
@@ -226,6 +226,7 @@ corcut <- function(allPatterns, cut, minNS)
226 226
 #' @return list with all CoGAPS output
227 227
 stitchTogether <- function(result, allParams)
228 228
 {
229
+    print("stiching together...")
229 230
     if (allParams$gaps@distributed == "genome-wide")
230 231
     {
231 232
         consensus <- result[[1]]@sampleFactors
... ...
@@ -4,13 +4,14 @@
4 4
 #' @description combines retina subsets from extdata directory
5 5
 #' @param n number of subsets to use
6 6
 #' @return matrix of RNA counts
7
+#' @examples
8
+#' retSubset <- getRetinaSubset()
9
+#' dim(retSubset)
7 10
 #' @importFrom rhdf5 h5read
8 11
 getRetinaSubset <- function(n=1)
9 12
 {
10 13
     if (!(n %in% 1:4))
11
-    {
12 14
         stop("invalid number of subsets requested")
13
-    }
14 15
 
15 16
     subset_1_path <- system.file("extdata/retina_subset_1.h5", package="CoGAPS")
16 17
     subset_2_path <- system.file("extdata/retina_subset_2.h5", package="CoGAPS")
... ...
@@ -66,6 +67,35 @@ supported <- function(file)
66 67
     return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct"))
67 68
 }
68 69
 
70
+#' checks if file is rds format
71
+#' @keywords internal
72
+#'
73
+#' @param file path to file
74
+#' @return TRUE if file is .rds, FALSE if not
75
+#' @importFrom tools file_ext
76
+isRdsFile <- function(file)
77
+{
78
+    if (is.null(file))
79
+        return(FALSE)
80
+    if (length(file) == 0)
81
+        return(FALSE)
82
+    if (!is(file, "character"))
83
+        return(FALSE)
84
+    return(tools::file_ext(file) == ".rds")
85
+}
86
+
87
+#' get input that might be an RDS file
88
+#' @keywords internal
89
+#'
90
+#' @param input some user input
91
+#' @return if input is an RDS file, read it - otherwise return input
92
+getValueOrRds <- function(input)
93
+{
94
+    if (isRdsFile(input))
95
+        return(readRDS(input))
96
+    return(input)
97
+}
98
+
69 99
 #' get number of rows from supported file name or matrix
70 100
 #' @keywords internal
71 101
 #'
... ...
@@ -114,9 +144,7 @@ ncolHelper <- function(data)
114 144
 getGeneNames <- function(data, transpose)
115 145
 {
116 146
     if (transpose)
117
-    {
118 147
         return(getSampleNames(data, FALSE))
119
-    }
120 148
 
121 149
     names <- NULL
122 150
     if (is(data, "character"))
... ...
@@ -127,16 +155,13 @@ getGeneNames <- function(data, transpose)
127 155
             "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1))))
128 156
         )
129 157
     }
130
-    else if (is(data, "matrix") | is(data, "data.frame"))
158
+    else
131 159
     {
132 160
         names <- rownames(data)
133 161
     }
134 162
 
135 163
     if (is.null(names))
136
-    {
137
-        nGenes <- nrowHelper(data)
138
-        return(paste("Gene", 1:nGenes, sep="_"))
139
-    }
164
+        return(paste("Gene", 1:nrowHelper(data), sep="_"))
140 165
     return(names)
141 166
 }
142 167
 
... ...
@@ -146,9 +171,7 @@ getGeneNames <- function(data, transpose)
146 171
 getSampleNames <- function(data, transpose)
147 172
 {
148 173
     if (transpose)
149
-    {
150 174
         return(getGeneNames(data, FALSE))
151
-    }
152 175
 
153 176
     names <- NULL
154 177
     if (is(data, "character"))
... ...
@@ -159,16 +182,13 @@ getSampleNames <- function(data, transpose)
159 182
             "gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2])
160 183
         )
161 184
     }
162
-    else if (is(data, "matrix") | is(data, "data.frame"))
185
+    else
163 186
     {
164 187
         names <- colnames(data)
165 188
     }
166 189
 
167 190
     if (is.null(names))
168
-    {
169
-        nSamples <- ncolHelper(data)
170
-        return(paste("Sample", 1:nSamples, sep="_"))
171
-    }
191
+        return(paste("Sample", 1:ncolHelper(data), sep="_"))
172 192
     return(names)
173 193
 }
174 194
 
... ...
@@ -236,9 +256,6 @@ parseExtraParams <- function(allParams, extraParams)
236 256
 #' @return throws an error if data has problems
237 257
 checkDataMatrix <- function(data, uncertainty, params)
238 258
 {
239
-    if (!is(data, "matrix") & !is(data, "data.frame")
240
-    & !is(data, "SummarizedExperiment") & !is(data, "SingleCellExperiment"))
241
-        stop("unsupported object type of CoGAPS")
242 259
     if (any(is.na(data)))
243 260
         stop("NA values in data")
244 261
     if (!all(apply(data, 2, is.numeric)))
... ...
@@ -260,9 +277,6 @@ checkDataMatrix <- function(data, uncertainty, params)
260 277
 #' @return throws an error if inputs are invalid
261 278
 checkInputs <- function(data, uncertainty, allParams)
262 279
 {
263
-    if (is(data, "character") & !supported(data))
264
-        stop("unsupported file extension for data")
265
-
266 280
     if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character"))
267 281
         stop("uncertainty must be same data type as data (file name)")
268 282
     if (is(uncertainty, "character") & !supported(uncertainty))
... ...
@@ -272,34 +286,16 @@ checkInputs <- function(data, uncertainty, allParams)
272 286
     if (!is.null(uncertainty) & allParams$gaps@sparseOptimization)
273 287
         stop("must use default uncertainty when enabling sparseOptimization")
274 288
 
275
-    if (!(allParams$whichMatrixFixed %in% c("A", "P", "N")))
276
-        stop("Invalid choice of fixed matrix, must be 'A' or 'P'")
277
-    if (!is.null(allParams$fixedPatterns) & allParams$whichMatrixFixed == "N")
278
-        stop("fixedPatterns passed without setting whichMatrixFixed")
279
-    if (allParams$whichMatrixFixed %in% c("A", "P") & is.null(allParams$fixedPatterns))
280
-        stop("whichMatrixFixed is set without passing fixedPatterns")
281
-
282 289
     if (!is.null(allParams$gaps@distributed))
283 290
     {
284 291
         if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell)
285 292
             warning("running single-cell CoGAPS with singleCell=FALSE")
286
-        if (!is.null(allParams$fixedPatterns) & is.null(allParams$gaps@explicitSets))
287
-            warning("doing manual pattern matching with using explicit subsets")
288 293
         if (allParams$nThreads > 1)
289 294
             stop("can't run multi-threaded and distributed CoGAPS at the same time")
290 295
         if (!is.null(allParams$checkpointInFile))
291 296
             stop("checkpoints not supported for distributed cogaps")
292
-        if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P")
293
-            stop("can't fix P matrix when running single-cell CoGAPS")
294
-        if (allParams$gaps@distributed == "genome-wide" & allParams$whichMatrixFixed == "A")
295
-            stop("can't fix A matrix when running genome-wide CoGAPS")
296 297
     }
297 298
 
298
-    if (!(allParams$subsetDim %in% c(0,1,2)))
299
-        stop("invalid subset dimension")
300
-    if (allParams$subsetDim > 0 & is.null(allParams$subsetIndices))
301
-        stop("subsetDim provided without subsetIndices")
302
-
303 299
     if (!is(data, "character"))
304 300
         checkDataMatrix(data, uncertainty, allParams$gaps)
305 301
 }
... ...
@@ -309,38 +305,72 @@ checkInputs <- function(data, uncertainty, allParams)
309 305
 #'
310 306
 #' @param data data matrix
311 307
 #' @param allParams list of all parameters
312
-#' @param geneNames vector of names of genes in data
313
-#' @param sampleNames vector of names of samples in data
314 308
 #' @return list of all parameters with added gene names
315
-getNamesFromData <- function(data, allParams, geneNames, sampleNames)
309
+getDimNames <- function(data, allParams)
316 310
 {
317
-    # get gene/sample names
318
-    if (is.null(geneNames))
311
+    # get user supplied names
312
+    geneNames <- allParams$gaps@geneNames
313
+    sampleNames <- allParams$gaps@sampleNames
314
+
315
+    # if user didn't supply any names, pull from data set or use default labels
316
+    if (is.null(allParams$gaps@geneNames))
319 317
         geneNames <- getGeneNames(data, allParams$transposeData)
320
-    if (is.null(sampleNames))
318
+    if (is.null(allParams$gaps@sampleNames))
321 319
         sampleNames <- getSampleNames(data, allParams$transposeData)
322 320
 
321
+    # get the number of genes/samples
323 322
     nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data))
324 323
     nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data))
325 324
 
326
-    if (allParams$subsetDim == 1)
325
+    # handle any subsetting
326
+    if (allParams$gaps@subsetDim == 1)
327 327
     {
328
-        nGenes <- length(allParams$subsetIndices)
329
-        geneNames <- geneNames[allParams$subsetIndices]
328
+        nGenes <- length(allParams$gaps@subsetIndices)
329
+        geneNames <- geneNames[allParams$gaps@subsetIndices]
330 330
     }
331
-    else if (allParams$subsetDim == 2)
331
+    else if (allParams$gaps@subsetDim == 2)
332 332
     {
333
-        nSamples <- length(allParams$subsetIndices)
334
-        sampleNames <- sampleNames[allParams$subsetIndices]
333
+        nSamples <- length(allParams$gaps@subsetIndices)
334
+        sampleNames <- sampleNames[allParams$gaps@subsetIndices]
335 335
     }    
336 336
 
337
+    # check that names align with expected number of genes/samples
337 338
     if (length(geneNames) != nGenes)
338 339
         stop("incorrect number of gene names given")
339 340
     if (length(sampleNames) != nSamples)
340 341
         stop("incorrect number of sample names given")
341 342
 
343
+    # store processed gene/sample names directly in allParams list
344
+    # this is an important distinction - allParams@gaps contains the
345
+    # gene/sample names originally passed by the user, allParams contains
346
+    # the procseed gene/sample names to be used when labeling the result
342 347
     allParams$geneNames <- geneNames
343 348
     allParams$sampleNames <- sampleNames
344
-
345 349
     return(allParams)
350
+}
351
+
352
+#' convert any acceptable data input to a numeric matrix
353
+#' @keywords internal
354
+#'
355
+#' @description convert supported R objects containing the data to a
356
+#' numeric matrix, if data is a file name do nothing. Exits with an error
357
+#' if data is not a supported type.
358
+#' @param data data input
359
+#' @return data matrix
360
+#' @importFrom methods is
361
+#' @importFrom SummarizedExperiment assay
362
+convertDataToMatrix <- function(data)
363
+{
364
+    if (is(data, "character") & !supported(data))
365
+        stop("unsupported file extension for data")
366
+    else if (is(data, "matrix") | is(data, "character"))
367
+        return(data)
368
+    else if (is(data, "data.frame"))
369
+        return(data.matrix(data))
370
+    else if (is(data, "SummarizedExperiment"))
371
+        return(SummarizedExperiment::assay(data, "counts"))
372
+    else if (is(data, "SingleCellExperiment"))
373
+        return(SummarizedExperiment::assay(data, "counts"))
374
+    else
375
+        stop("unsupported data type")
346 376
 }
347 377
\ No newline at end of file
... ...
@@ -28,6 +28,16 @@
28 28
 #' the rows (cols) to use for weighted sampling
29 29
 #' @slot samplingWeight [distributed parameter] weights associated with 
30 30
 #' samplingAnnotation
31
+#' @slot subsetIndices set of indices to use from the data
32
+#' @slot subsetDim which dimension (1=rows, 2=cols) to subset
33
+#' @slot geneNames vector of names of genes in data
34
+#' @slot sampleNames vector of names of samples in data
35
+#' @slot fixedPatterns fix either 'A' or 'P' matrix to these values, in the
36
+#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
37
+#' skipped and fixedPatterns is used for all sets - allowing manual pattern
38
+#' matching, as well as fixed runs of standard CoGAPS
39
+#' @slot whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed
40
+#' @slot takePumpSamples whether or not to take PUMP samples
31 41
 #' @importClassesFrom S4Vectors character_OR_NULL
32 42
 setClass("CogapsParams", slots = c(
33 43
     nPatterns = "numeric",
... ...
@@ -46,7 +56,14 @@ setClass("CogapsParams", slots = c(
46 56
     maxNS = "numeric",
47 57
     explicitSets = "ANY",
48 58
     samplingAnnotation = "character_OR_NULL",
49
-    samplingWeight = "numeric"
59
+    samplingWeight = "ANY",
60
+    subsetIndices="ANY",
61
+    subsetDim="numeric",
62
+    geneNames="character_OR_NULL",
63
+    sampleNames="character_OR_NULL",
64
+    fixedPatterns="ANY",
65
+    whichMatrixFixed="character",
66
+    takePumpSamples="logical"
50 67
 ))
51 68
 
52 69
 #' constructor for CogapsParams
... ...
@@ -70,7 +87,7 @@ setMethod("initialize", "CogapsParams",
70 87
         if (!is.null(list(...)$maxNS))
71 88
             stop("maxNS must be set after CogapsParams are intialized")
72 89
         if (!is.null(distributed))
73
-            if (distributed == "none")
90
+            if (distributed == "none") # allows it to be a pure string parameter
74 91
                 distributed <- NULL
75 92
         .Object@distributed <- distributed
76 93
         
... ...
@@ -89,7 +106,14 @@ setMethod("initialize", "CogapsParams",
89 106
         .Object@maxNS <- .Object@minNS + .Object@nSets
90 107
         .Object@explicitSets <- NULL
91 108
         .Object@samplingAnnotation <- NULL
92
-        .Object@samplingWeight <- integer(0)
109
+        .Object@samplingWeight <- NULL
110
+        .Object@subsetIndices <- NULL
111
+        .Object@subsetDim <- 0
112
+        .Object@geneNames <- NULL
113
+        .Object@sampleNames <- NULL
114
+        .Object@fixedPatterns <- NULL
115
+        .Object@whichMatrixFixed <- 'N'
116
+        .Object@takePumpSamples <- FALSE
93 117
 
94 118
         .Object <- callNextMethod(.Object, ...)
95 119
         .Object
... ...
@@ -104,42 +128,60 @@ setValidity("CogapsParams",
104 128
             "number of patterns must be an integer greater than zero"
105 129
         if (object@nIterations <= 0 | object@nIterations %% 1 != 0)
106 130
             "number of iterations must be an integer greater than zero"
107
-        if (object@alphaA  <= 0 | object@alphaP <= 0)
131
+        if (object@alphaA <= 0 | object@alphaP <= 0)
108 132
             "alpha parameter must be greater than zero"
109
-        if (object@maxGibbsMassA  <= 0 | object@maxGibbsMassP <= 0)
133
+        if (object@maxGibbsMassA <= 0 | object@maxGibbsMassP <= 0)
110 134
             "maxGibbsMass must be greater than zero"
111 135
         if (object@seed <= 0 | object@seed %% 1 != 0)
112 136
             "random seed must be an integer greater than zero"
113
-        if (object@minNS <= 1 | object@minNS %% 1 != 0)
114
-            "minNS must be an integer greater than one"
115
-        if (object@nSets <= 1 | object@nSets %% 1 != 0)
116
-            "minNS must be an integer greater than one"
117
-        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
118
-            "nSets doesn't match length of explicitSets"
119
-        if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight))
120
-            "samplingWeight has mismatched size with amount of distinct annotations"
121
-        if (object@cut > object@nPatterns)
122
-            "cut must be less than or equal to nPatterns"
123 137
 
124
-        # check type of explicitSets
125
-        if (!is.null(object@explicitSets) & !is(object@explicitSets, "list"))
126
-            "explicitSets must be a list of numeric or character"
127
-        isNum <- sapply(object@explicitSets, function(s) is(s, "numeric"))
128
-        isChar <- sapply(object@explicitSets, function(s) is(s, "charcater"))
129
-        if (!is.null(object@explicitSets) & !(all(isNum) | all(isChar)))
130
-            "explicitSets must be a list of numeric or character"
138
+        if (!(object@whichMatrixFixed %in% c("A", "P", "N")))
139
+            stop("Invalid choice of fixed matrix, must be 'A' or 'P'")
140
+        if (!is.null(object@fixedPatterns) & object@whichMatrixFixed == "N")
141
+            stop("fixedPatterns passed without setting whichMatrixFixed")
142
+        if (object@whichMatrixFixed %in% c("A", "P") & is.null(object@fixedPatterns))
143
+            stop("whichMatrixFixed is set without passing fixedPatterns")
131 144
 
132
-        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
133
-            "wrong number of sets given"
134
-        if (length(object@samplingWeight) & is.null(names(object@samplingWeight)))
135
-            "samplingWeight must be a named vector"
136
-
137
-        if (!is.null(object@explicitSets) & !is.null(object@samplingAnnotation))
138
-            "explicitSets and samplingAnnotation/samplingWeight are both set"
145
+        if (!(object@subsetDim %in% c(0,1,2)))
146
+            stop("invalid subset dimension")
147
+        if (object@subsetDim > 0 & is.null(object@subsetIndices))
148
+            stop("subsetDim provided without subsetIndices")
139 149
 
140 150
         if (!is.null(object@distributed))
151
+        {
141 152
             if (!(object@distributed %in% c("genome-wide", "single-cell")))
142 153
                 "distributed method must be either 'genome-wide' or 'single-cell'"
154
+            if (!is.null(object@fixedPatterns) & is.null(object@explicitSets))
155
+                "doing manual pattern matching without using explicit subsets"
156
+            if (object@distributed == "single-cell" & object@whichMatrixFixed == "P")
157
+                "can't fix P matrix when running single-cell CoGAPS"
158
+            if (object@distributed == "genome-wide" & object@whichMatrixFixed == "A")
159
+                "can't fix A matrix when running genome-wide CoGAPS"
160
+            if (object@minNS <= 1 | object@minNS %% 1 != 0)
161
+                "minNS must be an integer greater than one"
162
+            if (object@nSets <= 1 | object@nSets %% 1 != 0)
163
+                "minNS must be an integer greater than one"
164
+            if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight))
165
+                "samplingWeight has mismatched size with amount of distinct annotations"
166
+            if (object@cut > object@nPatterns)
167
+                "cut must be less than or equal to nPatterns"
168
+            if (length(object@samplingWeight) & is.null(names(object@samplingWeight)))
169
+                "samplingWeight must be a named vector"
170
+
171
+            if (!is.null(object@explicitSets))
172
+            {
173
+                if (!is(object@explicitSets, "list"))
174
+                    "explicitSets must be a list"
175
+                if (length(object@explicitSets) != object@nSets)
176
+                    "nSets doesn't match length of explicitSets"
177
+                if (!is.null(object@samplingAnnotation))
178
+                    "explicitSets and samplingAnnotation/samplingWeight are both set"
179
+                isNum <- sapply(object@explicitSets, function(s) is(s, "numeric"))
180
+                isChar <- sapply(object@explicitSets, function(s) is(s, "character"))
181
+                if (!all(isNum) & !all(isChar))
182
+                    "explicitSets must be a list of numeric or character"
183
+            }
184
+        }
143 185
     }
144 186
 )
145 187
 
... ...
@@ -194,6 +236,23 @@ minNS=NULL, maxNS=NULL)
194 236
 setGeneric("setAnnotationWeights", function(object, annotation, weights)
195 237
     {standardGeneric("setAnnotationWeights")})
196 238
 
239
+#' set the fixed patterns for either the A or the P matrix
240
+#' @export
241
+#' @docType methods
242
+#' @rdname setFixedPatterns-methods
243
+#'
244
+#' @description these parameters are interrelated so they must be set together
245
+#' @param object an object of type CogapsParams
246
+#' @param fixedPatterns values for either the A or P matrix
247
+#' @param whichMatrixFixed either 'A' or 'P' indicating which matrix is fixed
248
+#' @return the modified params object
249
+#' @examples
250
+#' params <- new("CogapsParams")
251
+#' data(GIST)
252
+#' params <- setFixedPatterns(params, getSampleFactors(GIST.result), 'P')
253
+setGeneric("setFixedPatterns", function(object, fixedPatterns, whichMatrixFixed)
254
+    {standardGeneric("setFixedPatterns")})
255
+
197 256
 #' get the value of a parameter
198 257
 #' @export
199 258
 #' @docType methods
... ...
@@ -41,7 +41,8 @@ sampleNames, diagnostics=NULL, ...)
41 41
     patternNames <- paste("Pattern", 1:ncol(Amean), sep="_")
42 42
 
43 43
     if (length(geneNames) != nrow(.Object@featureLoadings))
44
-        stop("number of gene names doesn't match data size")
44
+        stop("number of gene names doesn't match data size, ",
45
+            length(geneNames), " != ", nrow(.Object@featureLoadings))
45 46
     if (length(sampleNames) != nrow(.Object@sampleFactors))
46 47
         stop("number of sample names doesn't match data size")
47 48
 
... ...
@@ -82,6 +83,32 @@ setValidity("CogapsResult",
82 83
 
83 84
 ################################### GENERICS ###################################
84 85
 
86
+#' return featureLoadings matrix from CogapsResult object
87
+#' @export
88
+#' @docType methods
89
+#' @rdname getFeatureLoadings-methods
90
+#'
91
+#' @param object an object of type CogapsResult
92
+#' @return featureLoadings matrix
93
+#' @examples
94
+#' data(GIST)
95
+#' getFeatureLoadings(GIST.result)
96
+setGeneric("getFeatureLoadings", function(object)
97
+    {standardGeneric("getFeatureLoadings")})
98
+
99
+#' return sampleFactors matrix from CogapsResult object
100
+#' @export
101
+#' @docType methods
102
+#' @rdname getSampleFactors-methods
103
+#'
104
+#' @param object an object of type CogapsResult
105
+#' @return sampleFactors matrix
106
+#' @examples
107
+#' data(GIST)
108
+#' getSampleFactors(GIST.result)
109
+setGeneric("getSampleFactors", function(object)
110
+    {standardGeneric("getSampleFactors")})
111
+
85 112
 #' return chi-sq of final matrices
86 113
 #' @export
87 114
 #' @docType methods
... ...
@@ -1,3 +1,17 @@
1
+#' CogapsParams constructor
2
+#' @export
3
+#'
4
+#' @description create a CogapsParams object
5
+#' @param ... parameters for the initialization method
6
+#' @return CogapsParams object
7
+#' @examples
8
+#' params <- CogapsParams(nPatterns=10)
9
+#' params
10
+CogapsParams <- function(...)
11
+{
12
+    new("CogapsParams", ...)
13
+}
14
+
1 15
 setMethod("show", signature("CogapsParams"),
2 16
 function(object)
3 17
 {
... ...
@@ -59,11 +73,15 @@ function(object, whichParam, value)
59 73
     }
60 74
     else if (whichParam %in% c("nSets", "cut", "minNS", "maxNS"))
61 75
     {
62
-        stop("please set this parameter with setDistributedParams")
76
+        stop("please set \'", whichParam, "\' with setDistributedParams")
63 77
     }
64 78
     else if (whichParam %in% c("samplingAnnotation", "samplingWeight"))
65 79
     {
66
-        stop("please set this parameter with setAnnotationWeights")
80
+        stop("please set \'", whichParam, "\' with setAnnotationWeights")
81
+    }
82
+    else if (whichParam %in% c("fixedPatterns", "whichMatrixFixed"))
83
+    {
84
+        stop("please set \'", whichParam, "\' with setFixedPatterns")
67 85
     }
68 86
     else if (whichParam == "nPatterns")
69 87
     {
... ...
@@ -116,6 +134,18 @@ function(object, annotation, weights)
116 134
     return(object)
117 135
 })
118 136
 
137
+#' @rdname setFixedPatterns-methods
138
+#' @aliases setFixedPatterns
139
+setMethod("setFixedPatterns", signature(object="CogapsParams"),
140
+function(object, fixedPatterns, whichMatrixFixed)
141
+{
142
+    object@fixedPatterns <- fixedPatterns
143
+    object@whichMatrixFixed <- whichMatrixFixed
144
+
145
+    validObject(object)
146
+    return(object)
147
+})
148
+
119 149
 #' @rdname getParam-methods
120 150
 #' @aliases getParam
121 151
 setMethod("getParam", signature(object="CogapsParams"),
... ...
@@ -4,6 +4,7 @@
4 4
 #' @param returnList list from cogaps_cpp
5 5
 #' @param allParams list of all parameters
6 6
 #' @return CogapsResult object
7
+#' @importFrom utils packageVersion
7 8
 createCogapsResult <- function(returnList, allParams)
8 9
 {
9 10
     res <- new("CogapsResult",
... ...
@@ -57,6 +58,22 @@ plot.CogapsResult <- function(x, ...)
57 58
         lty=1, cex=0.8, col=colors, bty="y", ncol=5)
58 59
 }
59 60
 
61
+#' @rdname getFeatureLoadings-methods
62
+#' @aliases getFeatureLoadings
63
+setMethod("getFeatureLoadings", signature(object="CogapsResult"),
64
+function(object)
65
+{
66
+    object@featureLoadings
67
+})
68
+
69
+#' @rdname getSampleFactors-methods
70
+#' @aliases getSampleFactors
71
+setMethod("getSampleFactors", signature(object="CogapsResult"),
72
+function(object)
73
+{
74
+    object@sampleFactors
75
+})
76
+
60 77
 #' @rdname getMeanChiSq-methods
61 78
 #' @aliases getMeanChiSq
62 79
 setMethod("getMeanChiSq", signature(object="CogapsResult"),
... ...
@@ -7,11 +7,8 @@
7 7
 CoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE,
11
-  subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL,
12
-  geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL,
13
-  whichMatrixFixed = "N", takePumpSamples = FALSE,
14
-  outputToFile = NULL, workerID = 1, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  workerID = 1, ...)
15 12
 }
16 13
 \arguments{
17 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -40,28 +37,8 @@ contained in this file}
40 37
 for data that is stored as samples x genes since CoGAPS requires data to be
41 38
 genes x samples}
42 39
 
43
-\item{subsetIndices}{set of indices to use from the data}
44
-
45
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset}
46
-
47 40
 \item{BPPARAM}{BiocParallel backend}
48 41
 
49
-\item{geneNames}{vector of names of genes in data}
50
-
51
-\item{sampleNames}{vector of names of samples in data}
52
-
53
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the
54
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
55
-skipped and fixedPatterns is used for all sets - allowing manual pattern
56
-matching, as well as fixed runs of standard CoGAPS}
57
-
58
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed}
59
-
60
-\item{takePumpSamples}{whether or not to take PUMP samples}
61
-
62
-\item{outputToFile}{name of a file to save the output to, will create 4 files
63
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
64
-
65 42
 \item{workerID}{if calling CoGAPS in parallel the worker ID can be specified,
66 43
 only worker 1 prints output and each worker outputs when it finishes, this
67 44
 is not neccesary when using the default parallel methods (i.e. distributed
... ...
@@ -51,5 +51,22 @@ the rows (cols) to use for weighted sampling}
51 51
 
52 52
 \item{\code{samplingWeight}}{[distributed parameter] weights associated with 
53 53
 samplingAnnotation}
54
+
55
+\item{\code{subsetIndices}}{set of indices to use from the data}
56
+
57
+\item{\code{subsetDim}}{which dimension (1=rows, 2=cols) to subset}
58
+
59
+\item{\code{geneNames}}{vector of names of genes in data}
60
+
61
+\item{\code{sampleNames}}{vector of names of samples in data}
62
+
63
+\item{\code{fixedPatterns}}{fix either 'A' or 'P' matrix to these values, in the
64
+context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
65
+skipped and fixedPatterns is used for all sets - allowing manual pattern
66
+matching, as well as fixed runs of standard CoGAPS}
67
+
68
+\item{\code{whichMatrixFixed}}{either 'A' or 'P', indicating which matrix is fixed}
69
+
70
+\item{\code{takePumpSamples}}{whether or not to take PUMP samples}
54 71
 }}
55 72
 
56 73
new file mode 100755
... ...
@@ -0,0 +1,21 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/methods-CogapsParams.R
3
+\name{CogapsParams}
4
+\alias{CogapsParams}
5
+\title{CogapsParams constructor}
6
+\usage{
7
+CogapsParams(...)
8
+}
9
+\arguments{
10
+\item{...}{parameters for the initialization method}
11
+}
12
+\value{
13
+CogapsParams object
14
+}
15
+\description{
16
+create a CogapsParams object
17
+}
18
+\examples{
19
+params <- CogapsParams(nPatterns=10)
20
+params
21
+}
... ...
@@ -7,11 +7,8 @@
7 7
 GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE,
11
-  subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL,
12
-  geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL,
13
-  whichMatrixFixed = "N", takePumpSamples = FALSE,
14
-  outputToFile = NULL, workerID = 1, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  workerID = 1, ...)
15 12
 }
16 13
 \arguments{
17 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -40,28 +37,8 @@ contained in this file}
40 37
 for data that is stored as samples x genes since CoGAPS requires data to be
41 38
 genes x samples}
42 39
 
43
-\item{subsetIndices}{set of indices to use from the data}
44
-
45
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset}
46
-
47 40
 \item{BPPARAM}{BiocParallel backend}
48 41
 
49
-\item{geneNames}{vector of names of genes in data}
50
-
51
-\item{sampleNames}{vector of names of samples in data}
52
-
53
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the
54
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
55
-skipped and fixedPatterns is used for all sets - allowing manual pattern
56
-matching, as well as fixed runs of standard CoGAPS}
57
-
58
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed}
59
-
60
-\item{takePumpSamples}{whether or not to take PUMP samples}
61
-
62
-\item{outputToFile}{name of a file to save the output to, will create 4 files
63
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
64
-
65 42
 \item{workerID}{if calling CoGAPS in parallel the worker ID can be specified,
66 43
 only worker 1 prints output and each worker outputs when it finishes, this
67 44
 is not neccesary when using the default parallel methods (i.e. distributed
68 45
new file mode 100755
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{convertDataToMatrix}
4
+\alias{convertDataToMatrix}
5
+\title{convert any acceptable data input to a numeric matrix}
6
+\usage{
7
+convertDataToMatrix(data)
8
+}
9
+\arguments{
10
+\item{data}{data input}
11
+}
12
+\value{
13
+data matrix
14
+}
15
+\description{
16
+convert supported R objects containing the data to a
17
+numeric matrix, if data is a file name do nothing. Exits with an error
18
+if data is not a supported type.
19
+}
20
+\keyword{internal}
0 21
similarity index 63%
1 22
rename from man/getNamesFromData.Rd
2 23
rename to man/getDimNames.Rd
... ...
@@ -1,19 +1,15 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/HelperFunctions.R
3
-\name{getNamesFromData}
4
-\alias{getNamesFromData}
3
+\name{getDimNames}
4
+\alias{getDimNames}
5 5
 \title{extracts gene/sample names from the data}
6 6
 \usage{
7
-getNamesFromData(data, allParams, geneNames, sampleNames)
7
+getDimNames(data, allParams)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{data matrix}
11 11
 
12 12
 \item{allParams}{list of all parameters}
13
-
14
-\item{geneNames}{vector of names of genes in data}
15
-
16
-\item{sampleNames}{vector of names of samples in data}
17 13
 }
18 14
 \value{
19 15
 list of all parameters with added gene names
20 16
new file mode 100755
... ...
@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getFeatureLoadings}
5
+\alias{getFeatureLoadings}
6
+\alias{getFeatureLoadings,CogapsResult-method}
7
+\title{return featureLoadings matrix from CogapsResult object}
8
+\usage{
9
+getFeatureLoadings(object)
10
+
11
+\S4method{getFeatureLoadings}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+featureLoadings matrix
18
+}
19
+\description{
20
+return featureLoadings matrix from CogapsResult object
21
+}
22
+\examples{
23
+data(GIST)
24
+getFeatureLoadings(GIST.result)
25
+}
... ...
@@ -15,3 +15,7 @@ matrix of RNA counts
15 15
 \description{
16 16
 combines retina subsets from extdata directory
17 17
 }
18
+\examples{
19
+retSubset <- getRetinaSubset()
20
+dim(retSubset)
21
+}
18 22
new file mode 100755
... ...
@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getSampleFactors}
5
+\alias{getSampleFactors}
6
+\alias{getSampleFactors,CogapsResult-method}
7
+\title{return sampleFactors matrix from CogapsResult object}
8
+\usage{
9
+getSampleFactors(object)
10
+
11
+\S4method{getSampleFactors}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+sampleFactors matrix
18
+}
19
+\description{
20
+return sampleFactors matrix from CogapsResult object
21
+}
22
+\examples{
23
+data(GIST)
24
+getSampleFactors(GIST.result)
25
+}
0 26
new file mode 100755
... ...
@@ -0,0 +1,18 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{getValueOrRds}
4
+\alias{getValueOrRds}
5
+\title{get input that might be an RDS file}
6
+\usage{
7
+getValueOrRds(input)
8
+}
9
+\arguments{
10
+\item{input}{some user input}
11
+}
12
+\value{
13
+if input is an RDS file, read it - otherwise return input
14
+}
15
+\description{
16
+get input that might be an RDS file
17
+}
18
+\keyword{internal}
0 19
new file mode 100755
... ...
@@ -0,0 +1,18 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{isRdsFile}
4
+\alias{isRdsFile}
5
+\title{checks if file is rds format}
6
+\usage{
7
+isRdsFile(file)
8
+}
9
+\arguments{
10
+\item{file}{path to file}
11
+}
12
+\value{
13
+TRUE if file is .rds, FALSE if not
14
+}
15
+\description{
16
+checks if file is rds format
17
+}
18
+\keyword{internal}
... ...
@@ -7,11 +7,8 @@
7 7
 scCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE,
11
-  subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL,
12
-  geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL,
13
-  whichMatrixFixed = "N", takePumpSamples = FALSE,
14
-  outputToFile = NULL, workerID = 1, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  workerID = 1, ...)
15 12
 }
16 13
 \arguments{
17 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -40,28 +37,8 @@ contained in this file}
40 37
 for data that is stored as samples x genes since CoGAPS requires data to be
41 38
 genes x samples}
42 39
 
43
-\item{subsetIndices}{set of indices to use from the data}
44
-
45
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset}
46
-
47 40
 \item{BPPARAM}{BiocParallel backend}
48 41
 
49
-\item{geneNames}{vector of names of genes in data}
50
-
51
-\item{sampleNames}{vector of names of samples in data}
52
-
53
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the
54
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
55
-skipped and fixedPatterns is used for all sets - allowing manual pattern
56
-matching, as well as fixed runs of standard CoGAPS}
57
-
58
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed}
59
-
60
-\item{takePumpSamples}{whether or not to take PUMP samples}
61
-
62
-\item{outputToFile}{name of a file to save the output to, will create 4 files
63
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
64
-
65 42
 \item{workerID}{if calling CoGAPS in parallel the worker ID can be specified,
66 43
 only worker 1 prints output and each worker outputs when it finishes, this
67 44
 is not neccesary when using the default parallel methods (i.e. distributed
68 45
new file mode 100755
... ...
@@ -0,0 +1,31 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R
3
+\docType{methods}
4
+\name{setFixedPatterns}
5
+\alias{setFixedPatterns}
6
+\alias{setFixedPatterns,CogapsParams-method}
7
+\title{set the fixed patterns for either the A or the P matrix}
8
+\usage{
9
+setFixedPatterns(object, fixedPatterns, whichMatrixFixed)
10
+
11
+\S4method{setFixedPatterns}{CogapsParams}(object, fixedPatterns,
12
+  whichMatrixFixed)
13
+}
14
+\arguments{
15
+\item{object}{an object of type CogapsParams}
16
+
17
+\item{fixedPatterns}{values for either the A or P matrix}
18
+
19
+\item{whichMatrixFixed}{either 'A' or 'P' indicating which matrix is fixed}
20
+}
21
+\value{
22
+the modified params object
23
+}
24
+\description{
25
+these parameters are interrelated so they must be set together
26
+}
27
+\examples{
28
+params <- new("CogapsParams")
29
+data(GIST)
30
+params <- setFixedPatterns(params, getSampleFactors(GIST.result), 'P')
31
+}
... ...
@@ -49,12 +49,12 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara
49 49
     const Rcpp::S4 &gapsParams(allParams["gaps"]);
50 50
 
51 51
     // check if subsetting data
52
-    unsigned subsetDim = Rcpp::as<unsigned>(allParams["subsetDim"]);
53
-    bool subsetGenes = subsetDim == 1;
52
+    unsigned subsetDim = Rcpp::as<unsigned>(gapsParams.slot("subsetDim"));
53
+    bool subsetGenes = (subsetDim == 1);
54 54
     std::vector<unsigned> subset;
55 55
     if (subsetDim > 0)
56 56
     {
57
-        Rcpp::IntegerVector subsetR = allParams["subsetIndices"];
57
+        Rcpp::IntegerVector subsetR = gapsParams.slot("subsetIndices");
58 58
         subset = Rcpp::as< std::vector<unsigned> >(subsetR);
59 59
     }
60 60
 
... ...
@@ -71,7 +71,7 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara
71 71
     params.outputFrequency = allParams["outputFrequency"];
72 72
     params.checkpointOutFile = Rcpp::as<std::string>(allParams["checkpointOutFile"]);
73 73
     params.checkpointInterval = allParams["checkpointInterval"];
74
-    params.takePumpSamples = allParams["takePumpSamples"];
74
+    params.takePumpSamples = gapsParams.slot("takePumpSamples");
75 75
 
76 76
     // extract model specific parameters from list
77 77
     params.seed = gapsParams.slot("seed");
... ...
@@ -85,11 +85,11 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara
85 85
     params.useSparseOptimization = gapsParams.slot("sparseOptimization");
86 86
 
87 87
     // check if using fixed matrix
88
-    params.whichMatrixFixed = Rcpp::as<char>(allParams["whichMatrixFixed"]);
88
+    params.whichMatrixFixed = Rcpp::as<char>(gapsParams.slot("whichMatrixFixed"));
89 89
     if (params.whichMatrixFixed != 'N')
90 90
     {
91 91
         params.useFixedPatterns = true;
92
-        Rcpp::NumericMatrix fixedMatrixR = allParams["fixedPatterns"];
92
+        Rcpp::NumericMatrix fixedMatrixR = gapsParams.slot("fixedPatterns");
93 93
         params.fixedPatterns = convertRMatrix(fixedMatrixR);
94 94
     }
95 95
 
... ...
@@ -122,17 +122,17 @@ const DataType &uncertainty)
122 122
 
123 123
     // convert R parameters to GapsParameters struct
124 124
     GapsParameters params(getGapsParameters(data, allParams));
125
+#ifdef GAPS_DEBUG
126
+    if (params.printMessages)
127
+    {
128
+        params.print();
129
+    }
130
+#endif
125 131
 
126 132
     // create GapsRunner, note we must first initialize the random generator
127 133
     GapsRandomState randState(params.seed);
128 134
     GapsResult result(gaps::run(data, params, uncertainty, &randState));
129 135
 
130
-    // write result to file if requested
131
-    if (allParams["outputToFile"] != R_NilValue)
132
-    {
133
-        result.writeToFile(Rcpp::as<std::string>(allParams["outputToFile"]));
134
-    }
135
-    
136 136
     // return R list
137 137
     return Rcpp::List::create(
138 138
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
... ...
@@ -1,4 +1,5 @@
1 1
 #include "GapsParameters.h"
2
+#include "utils/GapsPrint.h"
2 3
 
3 4
 Archive& operator<<(Archive &ar, const GapsParameters &p)
4 5
 {
... ...
@@ -15,6 +16,58 @@ Archive& operator>>(Archive &ar, GapsParameters &p)
15 16
         >> p.singleCell >> p.useSparseOptimization >> p.checkpointInterval;
16 17
     return ar;
17 18
 }
19
+
20
+void GapsParameters::print() const
21
+{
22
+    gaps_printf("\n---- C++ Parameters ----\n\n");
23
+
24
+    gaps_printf("transposeData: %s\n", transposeData ? "TRUE" : "FALSE");
25
+    gaps_printf("nGenes: %d\n", nGenes);
26
+    gaps_printf("nSamples: %d\n", nSamples);
27
+    gaps_printf("nPatterns: %d\n", nPatterns);
28
+    gaps_printf("nIterations: %d\n", nIterations);
29
+    gaps_printf("seed: %d\n", seed);
30
+    gaps_printf("\n");
31
+
32
+    gaps_printf("maxThreads: %d\n", maxThreads);
33
+    gaps_printf("printMessages: %s\n", printMessages ? "TRUE" : "FALSE");
34
+    gaps_printf("outputFrequency: %d\n", outputFrequency);
35
+    gaps_printf("\n");
36
+
37
+    gaps_printf("singleCell: %s\n", singleCell ? "TRUE" : "FALSE");
38
+    gaps_printf("useSparseOptimization: %s\n", useSparseOptimization ? "TRUE" : "FALSE");
39
+    gaps_printf("takePumpSamples: %s\n", takePumpSamples ? "TRUE" : "FALSE");
40
+    gaps_printf("\n");
41
+
42
+    gaps_printf("runningDistributed: %s\n", runningDistributed ? "TRUE" : "FALSE");
43
+    gaps_printf("printThreadUsage: %s\n", printThreadUsage ? "TRUE" : "FALSE");
44
+    gaps_printf("workerID: %d\n", workerID);
45
+    gaps_printf("\n");
46
+
47
+    gaps_printf("alphaA: %f\n", alphaA);
48
+    gaps_printf("alphaP: %f\n", alphaP);
49
+    gaps_printf("maxGibbsMassA: %f\n", maxGibbsMassA);
50
+    gaps_printf("maxGibbsMassP: %f\n", maxGibbsMassP);
51
+    gaps_printf("\n");
52
+
53
+    gaps_printf("useCheckPoint: %s\n", useCheckPoint ? "TRUE" : "FALSE");
54
+    gaps_printf("checkpointInterval: %d\n", checkpointInterval);
55
+    gaps_printf("checkpointFile: %s\n", checkpointFile.c_str());
56
+    gaps_printf("checkpointOutFile: %s\n", checkpointOutFile.c_str());
57
+    gaps_printf("\n");
58
+
59
+    gaps_printf("subsetData: %s\n", subsetData ? "TRUE" : "FALSE");
60
+    gaps_printf("subsetGenes: %s\n", subsetGenes ? "TRUE" : "FALSE");
61
+    gaps_printf("dataIndicesSubset.size(): %lu\n", dataIndicesSubset.size());
62
+    gaps_printf("\n");
63
+
64
+    gaps_printf("useFixedPatterns: %s\n", useFixedPatterns ? "TRUE" : "FALSE");
65
+    gaps_printf("whichMatrixFixed: %c\n", whichMatrixFixed);
66
+    gaps_printf("fixedPatterns.nRow(): %d\n", fixedPatterns.nRow());
67
+    gaps_printf("fixedPatterns.nCol(): %d\n", fixedPatterns.nCol());
68
+
69
+    gaps_printf("\n------------------------\n\n");
70
+}
18 71
     
19 72
 void GapsParameters::calculateDataDimensions(const std::string &file)
20 73
 {
... ...
@@ -22,10 +22,12 @@ public:
22 22
         bool t_subsetData=false, bool t_subsetGenes=false,
23 23
         const std::vector<unsigned> t_dataIndicesSubset=std::vector<unsigned>());
24 24
 
25
+    void print() const;
26
+
25 27
     Matrix fixedPatterns;
26 28
 
27 29
     std::vector<unsigned> dataIndicesSubset;
28
-
30
+    
29 31
     std::string checkpointFile;
30 32
     std::string checkpointOutFile;
31 33
 
... ...
@@ -15,9 +15,10 @@ getHistogram <- function(set, anno)
15 15
 test_that("Subsetting Data",
16 16
 {
17 17
     data(GIST)
18
-    #testMatrix <- GIST.matrix
19 18
 
19
+    ## BAD TEST TO RUN
20 20
     # sampling with weights
21
+    #testMatrix <- GIST.matrix
21 22
     #anno <- sample(letters[1:5], size=nrow(testMatrix), replace=TRUE)
22 23
     #weights <- c(0,1,4,1,0)
23 24
     #names(weights) <- letters[1:5]
... ...
@@ -29,7 +30,6 @@ test_that("Subsetting Data",
29 30
     #hist <- sapply(getSubsets(result), getHistogram, anno=anno)
30 31
     #freq <- unname(rowSums(hist) / sum(hist))
31 32
     
32
-    # dumb test
33 33
     #expect_true(all.equal(freq, unname(weights / sum(weights)), tol=0.1))
34 34
 
35 35
     # running cogaps with given subsets
... ...
@@ -37,9 +37,11 @@ test_that("Subsetting Data",
37 37
     mat <- GIST.matrix
38 38
     rownames(mat) <- NULL
39 39
     colnames(mat) <- NULL
40
-    result <- GWCoGAPS(mat, nPatterns=7, explicitSets=sets, nIterations=100,
41
-        messages=FALSE, whichMatrixFixed="P",
42
-        fixedPatterns=matrix(1,nrow=ncol(mat), ncol=7))
40
+    nPatterns <- ncol(getSampleFactors(GIST.result))
41
+    params <- CogapsParams()
42
+    params <- setFixedPatterns(params, getSampleFactors(GIST.result), "P")
43
+    result <- GWCoGAPS(mat, params, nPatterns=nPatterns, explicitSets=sets,
44
+        nIterations=100, messages=FALSE, seed=42)
43 45
     subsets <- lapply(getSubsets(result), getIndices)
44 46
     expect_true(all(sapply(1:4, function(i) all.equal(sets[[i]], subsets[[i]]))))
45 47
 })
46 48
\ No newline at end of file
... ...
@@ -98,16 +98,16 @@ test_that("Valid Top-Level CoGAPS Calls",
98 98
 
99 99
     # single-cell CoGAPS
100 100
     expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
101
-        outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", singleCell=TRUE,
102
-        transposeData=TRUE), NA)
101
+        outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell",
102
+        singleCell=TRUE, transposeData=TRUE), NA)
103 103
     expect_true(no_na_in_result(res))
104 104
 
105 105
     expect_equal(nrow(res@featureLoadings), 9)
106 106
     expect_equal(nrow(res@sampleFactors), 1363)
107 107
 
108 108
     expect_error(res <- CoGAPS(gistTsvPath, nIterations=100,
109
-        outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", singleCell=TRUE,
110
-        transposeData=TRUE), NA)
109
+        outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell",
110
+        singleCell=TRUE, transposeData=TRUE), NA)
111 111
     expect_true(no_na_in_result(res))
112 112
 
113 113
     expect_equal(nrow(res@featureLoadings), 9)
... ...
@@ -165,31 +165,32 @@ test_that("Valid Top-Level CoGAPS Calls",
165 165
     nPat <- 3
166 166
     fixedA <- matrix(runif(nrow(testMatrix) * nPat, 1, 10), ncol=nPat)
167 167
     fixedP <- matrix(runif(ncol(testMatrix) * nPat, 1, 10), ncol=nPat)
168
-    res <- CoGAPS(testMatrix, nIterations=100, outputFrequency=100, seed=42,
169
-        messages=FALSE, nPatterns=nPat, whichMatrixFixed="A",
170
-        fixedPatterns=fixedA)
168
+    params <- CogapsParams()
169
+    params <- setFixedPatterns(params, fixedA, "A")
170
+    res <- CoGAPS(testMatrix, params, nIterations=100, outputFrequency=100,
171
+        seed=42, messages=FALSE, nPatterns=nPat)
171 172
     
172 173
     expect_true(all(dim(res@featureLoadings) == dim(fixedA)))
173 174
     for (i in 1:ncol(fixedA))
174 175
         fixedA[,i] <- fixedA[,i] * (res@featureLoadings[1,i] / fixedA[1,i])
175 176
     all.equal(unname(res@featureLoadings), fixedA, tolerance=0.001)
176 177
 
177
-    res <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=100, seed=42,
178
-        messages=FALSE, nPatterns=nPat, whichMatrixFixed="P",
179
-        fixedPatterns=fixedP)
178
+    params <- CogapsParams()
179
+    params <- setFixedPatterns(params, fixedP, "P")
180
+    res <- CoGAPS(gistCsvPath, params, nIterations=100, outputFrequency=100, seed=42,
181
+        messages=FALSE, nPatterns=nPat)
180 182
 
181 183
     expect_true(all(dim(res@sampleFactors) == dim(fixedP)))
182 184
     for (i in 1:ncol(fixedP))
183 185
         fixedP[,i] <- fixedP[,i] * (res@sampleFactors[1,i] / fixedP[1,i])
184 186
     all.equal(unname(res@sampleFactors), fixedP, tolerance=0.001)
185 187
 
186
-
187 188
     # make sure that "none" gets converted to NULL for distributed
188 189
     res <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=100, seed=42,
189 190
         messages=FALSE, nPatterns=3, distributed="none")
190 191
     expect_true(is.null(res@metadata$params@distributed))    
191 192
 
192
-    params <- new("CogapsParams")
193
+    params <- CogapsParams()
193 194
     params <- setParam(params, "distributed", "none")
194 195
     res <- CoGAPS(gistCsvPath, params, nIterations=100, outputFrequency=100, seed=42,
195 196
         messages=FALSE, nPatterns=3)
... ...
@@ -434,7 +434,9 @@ result <- GWCoGAPS(GIST.matrix, messages=FALSE)
434 434
 consensusMatrix <- getUnmatchedPatterns(result)[[1]]
435 435
 
436 436
 # run with our custom matched patterns matrix
437
-GWCoGAPS(GIST.matrix, fixedPatterns=consensusMatrix, explicitSets=getSubsets(result), whichMatrixFixed='P')
437
+params <- CogapsParams()
438
+params <- setFixedPatterns(params, consensusMatrix, 'P')
439
+GWCoGAPS(GIST.matrix, params, explicitSets=getSubsets(result))
438 440
 ```
439 441
 
440 442
 # sessionInfo()
441 443
deleted file mode 100755
... ...
@@ -1,71 +0,0 @@
1
-## Pattern Markers
2
-
3
-## Selecting Appropiate Number of Patterns
4
-
5
-Selecting the best value for *nPatterns* is the most difficult part of the
6
-analysis. For starters, there is not one "best" value for the number of
7
-patterns - various numbers of patterns can capture various levels of
8
-granularity in the data. To further complicate the problem, there's not a
9
-clear way to compare runs for different numbers of patterns.
10
-
11
-Here we show the simplest approach of selecting dimensionality by plotting the
12