Browse code

added more features to GWCoGAPS and scCoGAPS

Tom Sherman authored on 08/08/2018 22:34:56
Showing55 changed files

... ...
@@ -23,6 +23,7 @@
23 23
 ^src/data_structures/Vector.o
24 24
 ^src/file_parser/FileParser.o
25 25
 ^src/file_parser/CsvParser.o
26
+^src/file_parser/GctParser.o
26 27
 ^src/file_parser/TsvParser.o
27 28
 ^src/file_parser/MtxParser.o
28 29
 ^src/math/Algorithms.o
... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 3.3.5
2
+Version: 3.3.6
3 3
 Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
... ...
@@ -49,8 +49,10 @@ Collate:
49 49
     'class-CogapsParams.R'
50 50
     'CoGAPS.R'
51 51
     'DistributedCogaps.R'
52
+    'HelperFunctions.R'
52 53
     'Package.R'
53 54
     'RcppExports.R'
55
+    'SubsetData.R'
54 56
     'class-CogapsResult.R'
55 57
     'methods-CogapsParams.R'
56 58
     'methods-CogapsResult.R'
... ...
@@ -9,15 +9,20 @@ export(calcCoGAPSStat)
9 9
 export(calcGeneGSStat)
10 10
 export(calcZ)
11 11
 export(computeGeneGSProb)
12
+export(getClusteredPatterns)
13
+export(getCorrelationToMeanPattern)
12 14
 export(getMeanChiSq)
13 15
 export(getOriginalParameters)
14 16
 export(getParam)
17
+export(getSubsets)
18
+export(getUnmatchedPatterns)
15 19
 export(getVersion)
16 20
 export(patternMarkers)
17 21
 export(plotPatternMarkers)
18 22
 export(plotResiduals)
19 23
 export(reconstructGene)
20 24
 export(scCoGAPS)
25
+export(setAnnotationWeights)
21 26
 export(setDistributedParams)
22 27
 export(setParam)
23 28
 exportClasses(CogapsParams)
... ...
@@ -45,6 +50,7 @@ importFrom(methods,"slot<-")
45 50
 importFrom(methods,callNextMethod)
46 51
 importFrom(methods,is)
47 52
 importFrom(methods,new)
53
+importFrom(methods,show)
48 54
 importFrom(methods,slot)
49 55
 importFrom(methods,slotNames)
50 56
 importFrom(methods,validObject)
... ...
@@ -1,51 +1,17 @@
1 1
 #' @include class-CogapsParams.R
2 2
 NULL
3 3
 
4
-#' Checks if file is supported
5
-#' @param file path to file
6
-#' @return TRUE if file is supported, FALSE if not
7
-#' @importFrom tools file_ext
8
-supported <- function(file)
9
-{
10
-    if (!is(file, "character"))
11
-        return(FALSE)
12
-    return(tools::file_ext(file) %in% c("tsv", "csv", "mtx"))
13
-}
14
-
15
-#' get number of rows from supported file name or matrix
16
-#' @param data either a file name or a matrix
17
-#' @return number of rows
18
-#' @importFrom data.table fread
19
-#' @importFrom tools file_ext
20
-nrow_helper <- function(data)
21
-{
22
-    if (is(data, "character"))
23
-    {
24
-        return(switch(tools::file_ext(data),
25
-            "csv" = nrow(data.table::fread(data, select=1)),
26
-            "tsv" = nrow(data.table::fread(data, select=1)),
27
-            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1])
28
-        ))
29
-    }
30
-    return(nrow(data))
31
-}
32
-
33
-#' get number of columns from supported file name or matrix
34
-#' @param data either a file name or a matrix
35
-#' @return number of columns
36
-#' @importFrom data.table fread
37
-#' @importFrom tools file_ext
38
-ncol_helper <- function(data)
4
+#' Information About Package Compilation
5
+#' @export
6
+#'
7
+#' @details returns information about how the package was compiled, i.e. which
8
+#'  compiler/version was used, which compile time options were enabled, etc...
9
+#' @return string containing build report
10
+#' @examples
11
+#' CoGAPS::buildReport()
12
+buildReport <- function()
39 13
 {
40
-    if (is(data, "character"))
41
-    {
42
-        return(switch(tools::file_ext(data),
43
-            "csv" = ncol(data.table::fread(data, nrows=1)) - 1,
44
-            "tsv" = ncol(data.table::fread(data, nrows=1)) - 1,
45
-            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2])
46
-        ))
47
-    }
48
-    return(ncol(data))
14
+    getBuildReport_cpp()
49 15
 }
50 16
 
51 17
 #' CoGAPS Matrix Factorization Algorithm
... ...
@@ -73,6 +39,11 @@ ncol_helper <- function(data)
73 39
 #' for data that is stored as samples x genes since CoGAPS requires data to be
74 40
 #' genes x samples
75 41
 #' @param BPPARAM BiocParallel backend 
42
+#' @param geneNames vector of names of genes in data
43
+#' @param sampleNames vector of names of samples in data
44
+#' @param matchedPatterns manually matched patterns for distributed CoGAPS
45
+#' @param outputToFile name of a file to save the output to, will create 4 files
46
+#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"
76 47
 #' @param ... allows for overwriting parameters in params
77 48
 #' @return CogapsResult object
78 49
 #' @examples
... ...
@@ -94,9 +65,12 @@ ncol_helper <- function(data)
94 65
 CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
95 66
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
96 67
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
97
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
68
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL,
69
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL,
70
+outputToFile=NULL, ...)
98 71
 {
99 72
     # store all parameters in a list and parse parameters from ...
73
+    validObject(params)
100 74
     allParams <- list("gaps"=params,
101 75
         "nThreads"=nThreads,
102 76
         "messages"=messages,
... ...
@@ -106,12 +80,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
106 80
         "checkpointInFile"=checkpointInFile,
107 81
         "transposeData"=transposeData,
108 82
         "bpBackend"=BPPARAM,
83
+        "matchedPatterns"=matchedPatterns,
84
+        "outputToFile"=outputToFile,
109 85
         "whichMatrixFixed"=NULL # internal parameter
110 86
     )
111 87
     allParams <- parseExtraParams(allParams, list(...))
112 88
 
113 89
     # display start up message for the user
114
-    startupMessage(data, allParams$transposeData, allParams$gaps@distributed)
90
+    startupMessage(data, allParams)
115 91
 
116 92
     # check file extension
117 93
     if (is(data, "character") & !supported(data))
... ...
@@ -125,7 +101,7 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
125 101
     if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix"))
126 102
         stop("uncertainty must be a matrix unless data is a file path")
127 103
     if (!is(data, "character"))
128
-        checkDataMatrix(data, uncertainty, params)
104
+        checkDataMatrix(data, uncertainty, allParams$gaps)
129 105
 
130 106
     # check single cell parameter
131 107
     if (!is.null(allParams$gaps@distributed))
... ...
@@ -136,16 +112,15 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
136 112
         stop("can't run multi-threaded and distributed CoGAPS at the same time")
137 113
 
138 114
     # convert data to matrix
115
+    if (is(data, "matrix"))
116
+        data <- data
139 117
     if (is(data, "data.frame"))
140 118
         data <- data.matrix(data)
141 119
     else if (is(data, "SummarizedExperiment"))
142 120
         data <- SummarizedExperiment::assay(data, "counts")
143 121
     else if (is(data, "SingleCellExperiment"))
144 122
         data <- SummarizedExperiment::assay(data, "counts")
145
-
146
-    # label matrix
147
-    
148
-
123
+   
149 124
     # determine which function to call cogaps algorithm
150 125
     if (!is.null(allParams$gaps@distributed))
151 126
         dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps
... ...
@@ -160,9 +135,24 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
160 135
         if (!is.null(allParams$gaps@distributed))
161 136
             stop("checkpoints not supported for distributed cogaps")
162 137
         else
163
-            message("Running CoGAPS from a checkpoint")
138
+            cat("Running CoGAPS from a checkpoint")
164 139
     }
165 140
 
141
+    # get gene/sample names
142
+    if (is.null(geneNames)) geneNames <- getGeneNames(data, allParams$transposeData)
143
+    if (is.null(sampleNames)) sampleNames <- getSampleNames(data, allParams$transposeData)
144
+
145
+    nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data))
146
+    nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data))
147
+
148
+    if (length(geneNames) != nGenes)
149
+        stop("incorrect number of gene names given")
150
+    if (length(sampleNames) != nSamples)
151
+        stop("incorrect number of sample names given")
152
+
153
+    allParams$geneNames <- geneNames
154
+    allParams$sampleNames <- sampleNames
155
+
166 156
     # run cogaps
167 157
     gapsReturnList <- dispatchFunc(data, allParams, uncertainty)
168 158
 
... ...
@@ -172,8 +162,9 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
172 162
         Asd         = gapsReturnList$Asd,
173 163
         Pmean       = gapsReturnList$Pmean,
174 164
         Psd         = gapsReturnList$Psd,
175
-        seed        = gapsReturnList$seed,
176 165
         meanChiSq   = gapsReturnList$meanChiSq,
166
+        geneNames   = gapsReturnList$geneNames,
167
+        sampleNames = gapsReturnList$sampleNames,
177 168
         diagnostics = append(gapsReturnList$diagnostics,
178 169
             list("params"=allParams$gaps, "version"=utils::packageVersion("CoGAPS")))
179 170
     ))
... ...
@@ -189,13 +180,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
189 180
 scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
190 181
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
191 182
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
192
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
183
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL,
184
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...)
193 185
 {
194 186
     params@distributed <- "single-cell"
195 187
     params@singleCell <- TRUE
196 188
     CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty,
197 189
         checkpointOutFile, checkpointInterval, checkpointInFile, transposeData,
198
-        BPPARAM, ...)
190
+        BPPARAM, geneNames, sampleNames, matchedPatterns, ...)
199 191
 }
200 192
 
201 193
 #' Genome Wide CoGAPS
... ...
@@ -208,83 +200,11 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
208 200
 GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
209 201
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
210 202
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
211
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...)
203
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL,
204
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...)
212 205
 {
213 206
     params@distributed <- "genome-wide"
214 207
     CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty,
215 208
         checkpointOutFile, checkpointInterval, checkpointInFile, transposeData,
216
-        BPPARAM, ...)
217
-}   
218
-
219
-#' write start up message
220
-#'
221
-#' @param data data set
222
-#' @param transpose if we are transposing the data set
223
-#' @param distributed if we are running distributed CoGAPS
224
-#' @return message displayed to screen
225
-startupMessage <- function(data, transpose, distributed)
226
-{
227
-    nGenes <- ifelse(transpose, ncol_helper(data), nrow_helper(data))
228
-    nSamples <- ifelse(transpose, nrow_helper(data), ncol_helper(data))
229
-
230
-    dist_message <- "Standard"
231
-    if (!is.null(distributed))
232
-        dist_message <- distributed
233
-    message(paste("Running", dist_message, "CoGAPS on", nGenes, "genes and",
234
-        nSamples, "samples"))
235
-}
236
-
237
-#' parse parameters passed through the ... variable
238
-#'
239
-#' @param allParams list of all parameters
240
-#' @param extraParams list of parameters in ...
241
-#' @return allParams with any valid parameters in extraParams added
242
-#' @note will halt with an error if any parameters in extraParams are invalid
243
-#' @importFrom methods slotNames
244
-parseExtraParams <- function(allParams, extraParams)
245
-{
246
-    # parse direct params
247
-    for (s in slotNames(allParams$gaps))
248
-    {
249
-        if (!is.null(extraParams[[s]]))
250
-        {
251
-            allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]])
252
-            extraParams[[s]] <- NULL
253
-        }
254
-    }
255
-
256
-    # check for unrecognized options
257
-    if (length(extraParams) > 0)
258
-        stop(paste("unrecognized argument:", names(extraParams)[1]))
259
-
260
-    return(allParams)
261
-}
262
-
263
-#' check that provided data is valid
264
-#'
265
-#' @param data data matrix
266
-#' @param uncertainty uncertainty matrix, can be null
267
-#' @param params CogapsParams object
268
-#' @return throws an error if data has problems
269
-checkDataMatrix <- function(data, uncertainty, params)
270
-{
271
-    if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0)
272
-        stop("negative values in data and/or uncertainty matrix")
273
-    if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns)
274
-        stop("nPatterns must be less than dimensions of data")
275
-    if (sum(uncertainty < 1e-5) > 0)
276
-        warning("small values in uncertainty matrix detected")
277
-}
278
-
279
-#' Information About Package Compilation
280
-#' @export
281
-#'
282
-#' @details returns information about how the package was compiled, i.e. which
283
-#'  compiler/version was used, which compile time options were enabled, etc...
284
-#' @return string containing build report
285
-#' @examples
286
-#' CoGAPS::buildReport()
287
-buildReport <- function()
288
-{
289
-    getBuildReport_cpp()
290
-}
209
+        BPPARAM, geneNames, sampleNames, matchedPatterns, ...)
210
+}
291 211
\ No newline at end of file
... ...
@@ -1,4 +1,5 @@
1 1
 #' CoGAPS Distributed Matrix Factorization Algorithm
2
+#' @keywords internal
2 3
 #'
3 4
 #' @description runs CoGAPS over subsets of the data and stitches the results
4 5
 #' back together
... ...
@@ -10,13 +11,20 @@
10 11
 #' @importFrom BiocParallel bplapply MulticoreParam
11 12
 distributedCogaps <- function(data, allParams, uncertainty)
12 13
 {
13
-    FUN <- function(index, sets, data, allParams, uncertainty, fixedMatrix=NULL)
14
+    FUN <- function(index, sets, data, allParams, uncertainty, geneNames,
15
+    sampleNames, fixedMatrix=NULL)
14 16
     {
17
+        if (allParams$gaps@distributed == "genome-wide")
18
+            geneNames <- geneNames[sets[[index]]]
19
+        else
20
+            sampleNames <- sampleNames[sets[[index]]]
21
+
15 22
         internal <- ifelse(is(data, "character"), cogaps_cpp_from_file, cogaps_cpp)
16 23
         raw <- internal(data, allParams, uncertainty, sets[[index]],
17 24
             fixedMatrix, index == 1)
18 25
         new("CogapsResult", Amean=raw$Amean, Asd=raw$Asd, Pmean=raw$Pmean,
19
-            Psd=raw$Psd, seed=raw$seed, meanChiSq=raw$meanChiSq)
26
+            Psd=raw$Psd, meanChiSq=raw$meanChiSq, geneNames=geneNames,
27
+            sampleNames=sampleNames)
20 28
     }
21 29
 
22 30
     # randomly sample either rows or columns into subsets to break the data up
... ...
@@ -25,71 +33,78 @@ distributedCogaps <- function(data, allParams, uncertainty)
25 33
     if (is.null(allParams$bpBackend))
26 34
         allParams$bpBackend <- BiocParallel::MulticoreParam(workers=length(sets))
27 35
     
28
-    # run Cogaps normally on each subset of the data
29
-    if (allParams$messages)
30
-        message("Running Across Subsets...")
31
-    initialResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend,
32
-        sets=sets, data=data, allParams=allParams, uncertainty=uncertainty)
33
-
34
-    # get all unmatched patterns
35
-    if (allParams$gaps@distributed == "genome-wide")
36
-        unmatchedPatterns <- lapply(initialResult, function(x) x@sampleFactors)
36
+    if (is.null(allParams$matchedPatterns))
37
+    {
38
+        # run Cogaps normally on each subset of the data
39
+        if (allParams$messages)
40
+            cat("Running Across Subsets...\n\n")
41
+        initialResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend,
42
+            sets=sets, data=data, allParams=allParams, uncertainty=uncertainty,
43
+            geneNames=allParams$geneNames, sampleNames=allParams$sampleNames)
44
+
45
+        # get all unmatched patterns
46
+        if (allParams$gaps@distributed == "genome-wide")
47
+            unmatchedPatterns <- lapply(initialResult, function(x) x@sampleFactors)
48
+        else
49
+            unmatchedPatterns <- lapply(initialResult, function(x) x@featureLoadings)
50
+
51
+        # match patterns in either A or P matrix
52
+        if (allParams$messages)
53
+            cat("\nMatching Patterns Across Subsets...\n")
54
+        matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams)
55
+        allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus)
56
+
57
+        # set fixed matrix
58
+        allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
59
+            == "genome-wide", "P", "A")
60
+    }
37 61
     else
38
-        unmatchedPatterns <- lapply(initialResult, function(x) x@featureLoadings)
39
-
40
-    # match patterns in either A or P matrix
41
-    if (allParams$messages)
42
-        message("Matching Patterns Across Subsets...")
43
-    matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams)
44
-    allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus)
45
-
46
-    # set fixed matrix
47
-    allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
48
-        == "genome-wide", "P", "A")
49
-
62
+    {
63
+        matchedPatterns <- list(consensus=allParams$matchedPatterns)
64
+        allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus)
65
+        allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
66
+            == "genome-wide", "P", "A")
67
+    }
68
+        
50 69
     # run final phase with fixed matrix
51 70
     if (allParams$messages)
52
-        message("Running Final Stage...")
71
+        cat("Running Final Stage...\n\n")
53 72
     finalResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend,
54 73
         sets=sets, data=data, allParams=allParams, uncertainty=uncertainty,
74
+        geneNames=allParams$geneNames, sampleNames=allParams$sampleNames,    
55 75
         fixedMatrix=matchedPatterns$consensus)
56 76
 
57 77
     # concatenate final result
58 78
     fullResult <- stitchTogether(finalResult, allParams)
59 79
 
60 80
     # add diagnostic information before returning
61
-    fullResult$diagnostics$unmatchedPatterns <- unmatchedPatterns
62
-    fullResult$diagnostics$clusteredPatterns <- matchedPatterns$clusteredPatterns
63
-    fullResult$diagnostics$RtoMeanPattern <- lapply(matchedPatterns$clusteredPatterns, correlationToMeanPattern)
64
-    fullResult$diagnostics$subsets <- sets
65
-    return(fullResult)
66
-}
67
-
68
-#' partition genes/samples into subsets
69
-#' @description either genes or samples or partitioned depending on the type
70
-#' of distributed CoGAPS (i.e. genome-wide or single-cell)
71
-#' @param data either file name or matrix
72
-#' @param allParams list of all CoGAPS parameters
73
-#' @return list of sorted subsets of either genes or samples
74
-createSets <- function(data, allParams)
75
-{
76
-    total <- ifelse(xor(allParams$transposeData, allParams$gaps@distributed == "genome-wide"),
77
-        nrow_helper(data), ncol_helper(data))
78
-    setSize <- floor(total / allParams$gaps@nSets)
79
-
80
-    sets <- list()
81
-    remaining <- 1:total
82
-    for (n in 1:(allParams$gaps@nSets - 1))
81
+    if (is.null(allParams$matchedPatterns))
83 82
     {
84
-        selected <- sample(remaining, setSize, replace=FALSE)
85
-        sets[[n]] <- sort(selected)
86
-        remaining <- setdiff(remaining, selected)
83
+        fullResult$diagnostics$unmatchedPatterns <- unmatchedPatterns
84
+        fullResult$diagnostics$clusteredPatterns <- matchedPatterns$clusteredPatterns
85
+        fullResult$diagnostics$CorrToMeanPattern <- lapply(matchedPatterns$clusteredPatterns, corrToMeanPattern)
87 86
     }
88
-    sets[[allParams$gaps@nSets]] <- sort(remaining)
89
-    return(sets)
87
+
88
+    if (allParams$gaps@distributed == "genome-wide")
89
+        allNames <- allParams$geneNames
90
+    else
91
+        allNames <- allParams$sampleNames
92
+    fullResult$diagnostics$subsets <- lapply(sets, function(s) allNames[s])
93
+
94
+    # rename genes/samples if dimension was subsetted incompletely
95
+    allUsedIndices <- sort(unlist(sets))
96
+    if (allParams$gaps@distributed == "genome-wide")
97
+        fullResult$geneNames <- allParams$geneNames[allUsedIndices]
98
+    else
99
+        fullResult$sampleNames <- allParams$sampleNames[allUsedIndices]
100
+
101
+    return(fullResult)
90 102
 }
91 103
 
104
+
92 105
 #' find the consensus pattern matrix across all subsets
106
+#' @keywords internal
107
+#'
93 108
 #' @param unmatchedPatterns list of all unmatched pattern matrices from initial
94 109
 #' run of CoGAPS
95 110
 #' @param allParams list of all CoGAPS parameters
... ...
@@ -103,73 +118,84 @@ findConsensusMatrix <- function(unmatchedPatterns, allParams)
103 118
 }
104 119
 
105 120
 #' Match Patterns Across Multiple Runs
121
+#' @keywords internal
122
+#'
106 123
 #' @param allPatterns matrix of patterns stored in the columns
107 124
 #' @param allParams list of all CoGAPS parameters
108 125
 #' @return a matrix of consensus patterns
109 126
 #' @importFrom stats weighted.mean
110 127
 patternMatch <- function(allPatterns, allParams)
111 128
 {
112
-    PatsByClust <- corcut(allPatterns, allParams)
129
+    # cluster patterns
130
+    clusters <- corcut(allPatterns, allParams$gaps@cut, allParams$gaps@minNS)
113 131
 
114
-    # split by maxNS
115
-    indx <- which(sapply(PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
116
-    while (length(indx) > 0)
117
-    { 
118
-        allParams$gaps@cut <- 2
119
-        internalPatsByClust <- corcut(PatsByClust[[indx[1]]], allParams)
120
-
121
-        PatsByClust[[indx[1]]] <- internalPatsByClust[[1]]
122
-        if (length(internalPatsByClust) > 1)
123
-        {
124
-            PatsByClust <- append(PatsByClust, internalPatsByClust[2])
125
-        }
126
-        indx <- which(sapply(PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
132
+    # function to split a cluster in two (might fail to do so)
133
+    splitCluster <- function(list, index, minNS)
134
+    {
135
+        split <- corcut(list[[index]], 2, minNS)
136
+        list[[index]] <- split[[1]]
137
+        if (length(split) > 1)
138
+            list <- append(list, split[2])
139
+        return(list)
127 140
     }
128 141
 
129
-    # create matrix of mean patterns - weighted by coefficient of determination
130
-    PatsByCDSWavg <- sapply(PatsByClust, function(clust)
131
-        apply(clust, 1, function(row) weighted.mean(row, correlationToMeanPattern(clust)^3)))
132
-    colnames(PatsByCDSWavg) <- paste("Pattern", 1:length(PatsByClust))
142
+    # split large clusters into two
143
+    tooLarge <- function(x) ncol(x) > allParams$gaps@maxNS
144
+    indx <- which(sapply(clusters, tooLarge))
145
+    while (length(indx) > 0)
146
+    {
147
+        clusters <- splitCluster(clusters, indx[1], allParams$gaps@minNS)
148
+        indx <- which(sapply(clusters, tooLarge))
149
+    }
150
+    names(clusters) <- as.character(1:length(clusters))
133 151
 
134
-    # scale
135
-    return(list("clusteredPatterns"=PatsByClust,
136
-        "consensus"=apply(PatsByCDSWavg, 2, function(col) col / max(col))))
152
+    # create matrix of mean patterns - weighted by correlation to mean pattern
153
+    meanPatterns <- sapply(clusters, function(clust) apply(clust, 1,
154
+        function(row)  weighted.mean(row, corrToMeanPattern(clust)^3)))
155
+    colnames(meanPatterns) <- paste("Pattern", 1:length(clusters))
137 156
 
157
+    # returned patterns after scaling max to 1
158
+    return(list("clusteredPatterns"=clusters,
159
+        "consensus"=apply(meanPatterns, 2, function(col) col / max(col))))
138 160
 }
139 161
 
140
-correlationToMeanPattern <- function(cluster)
162
+#' calculate correlation of each pattern in a cluster to the cluster mean
163
+#' @keywords internal
164
+corrToMeanPattern <- function(cluster)
141 165
 {
142 166
     meanPat <- rowMeans(cluster)
143 167
     sapply(1:ncol(cluster), function(j) round(cor(x=cluster[,j], y=meanPat), 3))
144 168
 }
145 169
 
146 170
 #' cluster patterns together
171
+#' @keywords internal
172
+#'
147 173
 #' @param allPatterns matrix of all patterns across subsets
148
-#' @param allParams list of all CoGAPS parameters
174
+#' @param cut number of branches at which to cut dendrogram
175
+#' @param minNS minimum of individual set contributions a cluster must contain
149 176
 #' @return patterns listed by which cluster they belong to
150 177
 #' @importFrom cluster agnes
151 178
 #' @importFrom stats cutree as.hclust cor
152
-corcut <- function(allPatterns, allParams)
179
+corcut <- function(allPatterns, cut, minNS)
153 180
 {
154 181
     corr.dist <- cor(allPatterns)
155 182
     corr.dist <- 1 - corr.dist
156 183
 
157
-    clust <- cluster::agnes(x=corr.dist, diss=TRUE, "complete")
158
-    patternIds <- stats::cutree(stats::as.hclust(clust), k=allParams$gaps@cut)
184
+    clusterSummary <- cluster::agnes(x=corr.dist, diss=TRUE, "complete")
185
+    clusterIds <- stats::cutree(stats::as.hclust(clusterSummary), k=cut)
159 186
 
160
-    PatsByClust <- list()
161
-    for (cluster in unique(patternIds))
187
+    clusters <- list()
188
+    for (id in unique(clusterIds))
162 189
     {
163
-        if (sum(patternIds==cluster) >= allParams$gaps@minNS)
164
-        {
165
-            clusterPats <- allPatterns[,patternIds==cluster]
166
-            PatsByClust[[as.character(cluster)]] <- clusterPats
167
-        }
190
+        if (sum(clusterIds==id) >= minNS)
191
+            clusters[[as.character(id)]] <- allPatterns[,clusterIds==id,drop=FALSE]
168 192
     }
169
-    return(PatsByClust)
193
+    return(clusters)
170 194
 }
171 195
 
172 196
 #' concatenate final results across subsets
197
+#' @keywords internal
198
+#'
173 199
 #' @param result list of CogapsResult object from all runs across subsets
174 200
 #' @param allParams list of all CoGAPS parameters
175 201
 #' @return list with all CoGAPS output
176 202
new file mode 100644
... ...
@@ -0,0 +1,152 @@
1
+#' wrapper around cat
2
+#' @keywords
3
+#'
4
+#' @description cleans up message printing
5
+#' @param allParams all cogaps parameters
6
+#' @param ... arguments forwarded to cat
7
+#' @return displays text
8
+gapsCat <- function(allParams, ...)
9
+{
10
+    if (allParams$messages)
11
+        cat(...)
12
+}
13
+
14
+#' checks if file is supported
15
+#' @keywords internal
16
+#'
17
+#' @param file path to file
18
+#' @return TRUE if file is supported, FALSE if not
19
+#' @importFrom tools file_ext
20
+supported <- function(file)
21
+{
22
+    if (!is(file, "character"))
23
+        return(FALSE)
24
+    return(tools::file_ext(file) %in% c("tsv", "csv", "mtx"))
25
+}
26
+
27
+#' get number of rows from supported file name or matrix
28
+#' @keywords internal
29
+#'
30
+#' @param data either a file name or a matrix
31
+#' @return number of rows
32
+#' @importFrom data.table fread
33
+#' @importFrom tools file_ext
34
+nrowHelper <- function(data)
35
+{
36
+    if (is(data, "character"))
37
+    {
38
+        return(switch(tools::file_ext(data),
39
+            "csv" = nrow(data.table::fread(data, select=1)),
40
+            "tsv" = nrow(data.table::fread(data, select=1)),
41
+            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1])
42
+        ))
43
+    }
44
+    return(nrow(data))
45
+}
46
+
47
+#' get number of columns from supported file name or matrix
48
+#' @keywords internal
49
+#'
50
+#' @param data either a file name or a matrix
51
+#' @return number of columns
52
+#' @importFrom data.table fread
53
+#' @importFrom tools file_ext
54
+ncolHelper <- function(data)
55
+{
56
+    if (is(data, "character"))
57
+    {
58
+        return(switch(tools::file_ext(data),
59
+            "csv" = ncol(data.table::fread(data, nrows=1)) - 1,
60
+            "tsv" = ncol(data.table::fread(data, nrows=1)) - 1,
61
+            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2])
62
+        ))
63
+    }
64
+    return(ncol(data))
65
+}
66
+
67
+#' extract gene names from data
68
+#' @keywords internal
69
+getGeneNames <- function(data, transpose)
70
+{
71
+    nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data))
72
+    return(paste("Gene", 1:nGenes, sep="_"))
73
+}
74
+
75
+#' extract sample names from data
76
+#' @keywords internal
77
+getSampleNames <- function(data, transpose)
78
+{
79
+    nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data))
80
+    return(paste("Sample", 1:nSamples, sep="_"))
81
+}
82
+
83
+#' write start up message
84
+#' @keywords internal
85
+#'
86
+#' @param data data set
87
+#' @param allParams list of all parameters
88
+#' @return message displayed to screen
89
+#' @importFrom methods show
90
+startupMessage <- function(data, allParams)
91
+{
92
+    nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data))
93
+    nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data))
94
+
95
+    dist_message <- "Standard"
96
+    if (!is.null(allParams$gaps@distributed))
97
+        dist_message <- allParams$gaps@distributed
98
+
99
+    cat("Running", dist_message, "CoGAPS on", nGenes, "genes and",
100
+        nSamples, "samples")
101
+
102
+    if (allParams$messages)
103
+    {
104
+        cat(" with parameters:\n\n")
105
+        methods::show(allParams$gaps)
106
+    }
107
+    cat("\n")
108
+}
109
+
110
+#' parse parameters passed through the ... variable
111
+#' @keywords internal
112
+#'
113
+#' @param allParams list of all parameters
114
+#' @param extraParams list of parameters in ...
115
+#' @return allParams with any valid parameters in extraParams added
116
+#' @note will halt with an error if any parameters in extraParams are invalid
117
+#' @importFrom methods slotNames
118
+parseExtraParams <- function(allParams, extraParams)
119
+{
120
+    # parse direct params
121
+    for (s in slotNames(allParams$gaps))
122
+    {
123
+        if (!is.null(extraParams[[s]]))
124
+        {
125
+            allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]])
126
+            extraParams[[s]] <- NULL
127
+        }
128
+    }
129
+
130
+    # check for unrecognized options
131
+    if (length(extraParams) > 0)
132
+        stop(paste("unrecognized argument:", names(extraParams)[1]))
133
+
134
+    return(allParams)
135
+}
136
+
137
+#' check that provided data is valid
138
+#' @keywords internal
139
+#'
140
+#' @param data data matrix
141
+#' @param uncertainty uncertainty matrix, can be null
142
+#' @param params CogapsParams object
143
+#' @return throws an error if data has problems
144
+checkDataMatrix <- function(data, uncertainty, params)
145
+{
146
+    if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0)
147
+        stop("negative values in data and/or uncertainty matrix")
148
+    if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns)
149
+        stop("nPatterns must be less than dimensions of data")
150
+    if (sum(uncertainty < 1e-5) > 0)
151
+        warning("small values in uncertainty matrix detected")
152
+}
0 153
\ No newline at end of file
1 154
new file mode 100644
... ...
@@ -0,0 +1,95 @@
1
+#' use user provided subsets
2
+#' @keywords internal
3
+#'
4
+#' @param allParams list of all CoGAPS parameters
5
+#' @param total total number of rows (cols) that are being paritioned
6
+#' @return list of subsets
7
+sampleWithExplictSets <- function(allParams, total)
8
+{
9
+    if (all(sapply(allParams$gaps@explicitSets, function(s) is(s, "numeric"))))
10
+    {
11
+        gapsCat(allParams, "using provided indexed subsets\n")
12
+        return(allParams$gaps@explicitSets)
13
+    }
14
+    else if (all(sapply(allParams$gaps@explicitSets, function(s) is(s, "character"))))
15
+    {
16
+        gapsCat(allParams, "using provided named subsets\n")
17
+        if (allParams$gaps@distributed == "genome-wide")
18
+            allNames <- allParams$geneNames
19
+        else
20
+            allNames <- allParams$sampleNames
21
+        return(lapply(allParams$gaps@explicitSets, function(set) which(allNames %in% set)))
22
+    }
23
+}
24
+
25
+#' subset rows (cols) proportional to the user provided weights
26
+#' @keywords internal
27
+#'
28
+#' @param allParams list of all CoGAPS parameters
29
+#' @param setSize the size of each subset of the total
30
+#' @return list of subsets
31
+sampleWithAnnotationWeights <- function(allParams, setSize)
32
+{
33
+    groups <- unique(allParams$gaps@samplingAnnotation)
34
+    return(lapply(1:allParams$gaps@nSets, function(i)
35
+    {
36
+        groupCount <- sample(groups, size=setSize, replace=TRUE,
37
+            prob=allParams$gaps@samplingWeight)
38
+        sort(unlist(sapply(groups, function(g)
39
+        {
40
+            groupNdx <- which(allParams$gaps@samplingAnnotation == g)
41
+            sample(groupNdx, size=sum(groupCount == g), replace=TRUE)
42
+        })))
43
+    }))
44
+}
45
+
46
+#' subset data by uniformly partioning rows (cols)
47
+#' @keywords internal
48
+#'
49
+#' @param allParams list of all CoGAPS parameters
50
+#' @param total total number of rows (cols) that are being paritioned
51
+#' @param setSize the size of each subset of the total
52
+#' @return list of subsets
53
+sampleUniformly <- function(allParams, total, setSize)
54
+{
55
+    sets <- list()
56
+    remaining <- 1:total
57
+    for (n in 1:(allParams$gaps@nSets - 1))
58
+    {
59
+        selected <- sample(remaining, setSize, replace=FALSE)
60
+        sets[[n]] <- sort(selected)
61
+        remaining <- setdiff(remaining, selected)
62
+    }
63
+    sets[[allParams$gaps@nSets]] <- sort(remaining)
64
+    return(sets)
65
+}
66
+
67
+#' partition genes/samples into subsets
68
+#' @keywords internal
69
+#'
70
+#' @description either genes or samples or partitioned depending on the type
71
+#' of distributed CoGAPS (i.e. genome-wide or single-cell)
72
+#' @param data either file name or matrix
73
+#' @param allParams list of all CoGAPS parameters
74
+#' @return list of sorted subsets of either genes or samples
75
+createSets <- function(data, allParams)
76
+{
77
+    subsetRows <- xor(allParams$transposeData, allParams$gaps@distributed == "genome-wide")
78
+    total <- ifelse(subsetRows, nrowHelper(data), ncolHelper(data))
79
+    setSize <- floor(total / allParams$gaps@nSets)
80
+
81
+    gapsCat(allParams, "Creating subsets...")
82
+
83
+    if (!is.null(allParams$gaps@explicitSets))
84
+    {
85
+        return(sampleWithExplictSets(allParams, total))
86
+    }
87
+
88
+    if (!is.null(allParams$gaps@samplingAnnotation))
89
+    {
90
+        gapsCat(allParams, "sampling with annotation weights\n")
91
+        return(sampleWithAnnotationWeights(allParams, setSize))
92
+    }
93
+    gapsCat(allParams, "\n")
94
+    return(sampleUniformly(allParams, total, setSize))
95
+}
... ...
@@ -20,6 +20,11 @@
20 20
 #' a cluster must contain
21 21
 #' @slot maxNS [distributed parameter] maximum of individual set contributions
22 22
 #' a cluster can contain
23
+#' @slot explicitSets [distributed parameter] specify subsets by index or name
24
+#' @slot samplingAnnotation [distributed parameter] specify categories along
25
+#' the rows (cols) to use for weighted sampling
26
+#' @slot samplingWeight [distributed parameter] weights associated with 
27
+#' samplingAnnotation
23 28
 #' @importClassesFrom S4Vectors character_OR_NULL
24 29
 setClass("CogapsParams", slots = c(
25 30
     nPatterns = "numeric",
... ...
@@ -34,7 +39,10 @@ setClass("CogapsParams", slots = c(
34 39
     nSets = "numeric",
35 40
     cut = "numeric",
36 41
     minNS = "numeric",
37
-    maxNS = "numeric"
42
+    maxNS = "numeric",
43
+    explicitSets = "ANY",
44
+    samplingAnnotation = "character_OR_NULL",
45
+    samplingWeight = "numeric"
38 46
 ))
39 47
 
40 48
 #' constructor for CogapsParams
... ...
@@ -60,6 +68,9 @@ setMethod("initialize", "CogapsParams",
60 68
         .Object@nSets <- 4
61 69
         .Object@minNS <- ceiling(.Object@nSets / 2)
62 70
         .Object@maxNS <- .Object@minNS + .Object@nSets
71
+        .Object@explicitSets <- NULL
72
+        .Object@samplingAnnotation <- NULL
73
+        .Object@samplingWeight <- integer(0)
63 74
 
64 75
         .Object <- callNextMethod(.Object, ...)
65 76
         .Object
... ...
@@ -84,6 +95,21 @@ setValidity("CogapsParams",
84 95
             "minNS must be an integer greater than one"
85 96
         if (object@nSets <= 1 | object@nSets %% 1 != 0)
86 97
             "minNS must be an integer greater than one"
98
+        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
99
+            "nSets doesn't match length of explicitSets"
100
+        if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight))
101
+            stop("samplingWeight has mismatched size with amount of distinct annotations")
102
+
103
+        # check type of explicitSets
104
+        if (!is.null(object@explicitSets) & !is(object@explicitSets, "list"))
105
+            "explicitSets must be a list of numeric or character"
106
+        isNum <- sapply(object@explicitSets, function(s) is(s, "numeric"))
107
+        isChar <- sapply(object@explicitSets, function(s) is(s, "charcater"))
108
+        if (!is.null(object@explicitSets) & !(all(isNum) | all(isChar)))
109
+            "explicitSets must be a list of numeric or character"
110
+
111
+        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
112
+            "wrong number of sets given"
87 113
     }
88 114
 )
89 115
 
... ...
@@ -122,6 +148,22 @@ setGeneric("setDistributedParams", function(object, nSets, cut=NULL,
122 148
 minNS=NULL, maxNS=NULL)
123 149
     {standardGeneric("setDistributedParams")})
124 150
 
151
+#' set the annotation labels and weights for subsetting the data
152
+#' @export
153
+#' @docType methods
154
+#' @rdname setAnnotationWeights-methods
155
+#'
156
+#' @description these parameters  are interrelated so they must be set together
157
+#' @param object an object of type CogapsParams
158
+#' @param annotation vector of labels
159
+#' @param weights vector of weights
160
+#' @return the modified params object
161
+#' @examples
162
+#'  params <- new("CogapsParams")
163
+#'  params <- setAnnotationWeights(params, c('a', 'b', 'c'), c(1,2,1))
164
+setGeneric("setAnnotationWeights", function(object, annotation, weights)
165
+    {standardGeneric("setAnnotationWeights")})
166
+
125 167
 #' get the value of a parameter
126 168
 #' @export
127 169
 #' @docType methods
... ...
@@ -17,14 +17,16 @@ setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
17 17
 #' @param Pmean mean of sampled P matrices
18 18
 #' @param Asd std dev of sampled A matrices
19 19
 #' @param Psd std dev of sampled P matrices
20
-#' @param seed random seed used for the run
21 20
 #' @param meanChiSq mean value of ChiSq statistic
21
+#' @param geneNames names of genes in data
22
+#' @param sampleNames names of samples in data
22 23
 #' @param diagnostics assorted diagnostic reports from the run
23 24
 #' @param ... initial values for slots
24 25
 #' @return initialized CogapsResult object
25 26
 #' @importFrom methods callNextMethod
26 27
 setMethod("initialize", "CogapsResult",
27
-function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=NULL, ...)
28
+function(.Object, Amean, Pmean, Asd, Psd, meanChiSq, geneNames,
29
+sampleNames, diagnostics=NULL, ...)
28 30
 {
29 31
     .Object@featureLoadings <- Amean
30 32
     .Object@sampleFactors <- Pmean
... ...
@@ -32,12 +34,19 @@ function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=NULL, ...
32 34
     .Object@sampleStdDev <- Psd
33 35
 
34 36
     patternNames <- paste("Pattern", 1:ncol(Amean), sep="_")
37
+
38
+    rownames(.Object@featureLoadings) <- geneNames
35 39
     colnames(.Object@featureLoadings) <- patternNames
40
+
41
+    rownames(.Object@featureStdDev) <- geneNames
36 42
     colnames(.Object@featureStdDev) <- patternNames
43
+
44
+    rownames(.Object@sampleFactors) <- sampleNames
37 45
     colnames(.Object@sampleFactors) <- patternNames
46
+
47
+    rownames(.Object@sampleStdDev) <- sampleNames
38 48
     colnames(.Object@sampleStdDev) <- patternNames
39 49
 
40
-    .Object@metadata[["seed"]] <- seed
41 50
     .Object@metadata[["meanChiSq"]] <- meanChiSq
42 51
     .Object@metadata <- append(.Object@metadata, diagnostics)
43 52
 
... ...
@@ -99,6 +108,62 @@ setGeneric("getVersion", function(object)
99 108
 setGeneric("getOriginalParameters", function(object)
100 109
     {standardGeneric("getOriginalParameters")})
101 110
 
111
+#' return unmatched patterns from each subset
112
+#' @export
113
+#' @docType methods
114
+#' @rdname getUnmatchedPatterns-methods
115
+#'
116
+#' @param object an object of type CogapsResult
117
+#' @return CogapsParams object
118
+#' @examples
119
+#' data(SimpSim)
120
+#' result <- CoGAPS(SimpSim.data)
121
+#' getUnmatchedPatterns(result)
122
+setGeneric("getUnmatchedPatterns", function(object)
123
+    {standardGeneric("getUnmatchedPatterns")})
124
+
125
+#' return clustered patterns from set of all patterns across all subsets
126
+#' @export
127
+#' @docType methods
128
+#' @rdname getClusteredPatterns-methods
129
+#'
130
+#' @param object an object of type CogapsResult
131
+#' @return CogapsParams object
132
+#' @examples
133
+#' data(SimpSim)
134
+#' result <- CoGAPS(SimpSim.data)
135
+#' getClusteredPatterns(result)
136
+setGeneric("getClusteredPatterns", function(object)
137
+    {standardGeneric("getClusteredPatterns")})
138
+
139
+#' return correlation between each pattern and the cluster mean
140
+#' @export
141
+#' @docType methods
142
+#' @rdname getCorrelationToMeanPattern-methods
143
+#'
144
+#' @param object an object of type CogapsResult
145
+#' @return CogapsParams object
146
+#' @examples
147
+#' data(SimpSim)
148
+#' result <- CoGAPS(SimpSim.data)
149
+#' getCorrelationToMeanPattern(result)
150
+setGeneric("getCorrelationToMeanPattern", function(object)
151
+    {standardGeneric("getCorrelationToMeanPattern")})
152
+
153
+#' return the names of the genes (samples) in each subset
154
+#' @export
155
+#' @docType methods
156
+#' @rdname getSubsets-methods
157
+#'
158
+#' @param object an object of type CogapsResult
159
+#' @return CogapsParams object
160
+#' @examples
161
+#' data(SimpSim)
162
+#' result <- CoGAPS(SimpSim.data)
163
+#' getSubsets(result)
164
+setGeneric("getSubsets", function(object)
165
+    {standardGeneric("getSubsets")})
166
+
102 167
 #' compute z-score matrix
103 168
 #' @export
104 169
 #' @docType methods
... ...
@@ -1,14 +1,13 @@
1 1
 setMethod("show", signature("CogapsParams"),
2 2
 function(object)
3 3
 {
4
-    cat("An Object of class \"CogapsParams\"\n")
5
-    cat("\n")
6 4
     cat("-- Standard Parameters --\n")
7 5
     cat("nPatterns     ", object@nPatterns, "\n")
8 6
     cat("nIterations   ", object@nIterations, "\n")
9 7
     cat("seed          ", object@seed, "\n")
10 8
     cat("singleCell    ", object@singleCell, "\n")
11
-    cat("distributed   ", ifelse(is.null(object@distributed), FALSE, TRUE), "\n")
9
+    if (!is.null(object@distributed))
10
+        cat("distributed   ", object@distributed, "\n")
12 11
     cat("\n")
13 12
     cat("-- Sparsity Parameters --\n")
14 13
     if (object@alphaA == object@alphaP)
... ...
@@ -61,6 +60,10 @@ function(object, whichParam, value)
61 60
     {
62 61
         stop("please set this parameter with setDistributedParams")
63 62
     }
63
+    else if (whichParam %in% c("samplingAnnotation", "samplingWeight"))
64
+    {
65
+        stop("please set this parameter with setAnnotationWeights")
66
+    }
64 67
     else
65 68
     {
66 69
         slot(object, whichParam) <- value
... ...
@@ -75,6 +78,9 @@ function(object, whichParam, value)
75 78
 setMethod("setDistributedParams", signature(object="CogapsParams"),
76 79
 function(object, nSets, cut, minNS, maxNS)
77 80
 {
81
+    message("setting distributed parameters - call this again if you change ",
82
+        "nPatterns")
83
+
78 84
     object@nSets <- nSets
79 85
 
80 86
     object@cut <- ifelse(is.null(cut), object@nPatterns, cut)
... ...
@@ -85,6 +91,18 @@ function(object, nSets, cut, minNS, maxNS)
85 91
     return(object)
86 92
 })
87 93
 
94
+#' @rdname setAnnotationWeights-methods
95
+#' @aliases setAnnotationWeights
96
+setMethod("setAnnotationWeights", signature(object="CogapsParams"),
97
+function(object, annotation, weights)
98
+{
99
+    object@samplingAnnotation <- annotation
100
+    object@samplingWeight <- weights
101
+
102
+    validObject(object)
103
+    return(object)
104
+})
105
+
88 106
 #' @rdname getParam-methods
89 107
 #' @aliases getParam
90 108
 setMethod("getParam", signature(object="CogapsParams"),
... ...
@@ -47,7 +47,7 @@ function(object)
47 47
 setMethod("getVersion", signature(object="CogapsResult"),
48 48
 function(object)
49 49
 {
50
-    object@metadata$diagnostics$version
50
+    object@metadata$version
51 51
 })
52 52
 
53 53
 #' @rdname getOriginalParameters-methods
... ...
@@ -55,7 +55,55 @@ function(object)
55 55
 setMethod("getOriginalParameters", signature(object="CogapsResult"),
56 56
 function(object)
57 57
 {
58
-    object@metadata$diagnostics$params
58
+    object@metadata$params
59
+})
60
+
61
+#' @rdname getUnmatchedPatterns-methods
62
+#' @aliases getUnmatchedPatterns
63
+setMethod("getUnmatchedPatterns", signature(object="CogapsResult"),
64
+function(object)
65
+{
66
+    if (!is.null(object@metadata$unmatchedPatterns))
67
+        return(object@metadata$unmatchedPatterns)
68
+
69
+    message("this result was not generated with a call to GWCoGAPS or scCoGAPS")
70
+    return(NULL)
71
+})
72
+
73
+#' @rdname getClusteredPatterns-methods
74
+#' @aliases getClusteredPatterns
75
+setMethod("getClusteredPatterns", signature(object="CogapsResult"),
76
+function(object)
77
+{
78
+    if (!is.null(object@metadata$clusteredPatterns))
79
+        return(object@metadata$clusteredPatterns)
80
+
81
+    message("this result was not generated with a call to GWCoGAPS or scCoGAPS")
82
+    return(NULL)
83
+})
84
+
85
+#' @rdname getCorrelationToMeanPattern-methods
86
+#' @aliases getCorrelationToMeanPattern
87
+setMethod("getCorrelationToMeanPattern", signature(object="CogapsResult"),
88
+function(object)
89
+{
90
+    if (!is.null(object@metadata$CorrToMeanPattern))
91
+        return(object@metadata$CorrToMeanPattern)
92
+
93
+    message("this result was not generated with a call to GWCoGAPS or scCoGAPS")
94
+    return(NULL)
95
+})
96
+
97
+#' @rdname getSubsets-methods
98
+#' @aliases getSubsets
99
+setMethod("getSubsets", signature(object="CogapsResult"),
100
+function(object)
101
+{
102
+    if (!is.null(object@metadata$subsets))
103
+        return(object@metadata$subsets)
104
+
105
+    message("this result was not generated with a call to GWCoGAPS or scCoGAPS")
106
+    return(NULL)
59 107
 })
60 108
 
61 109
 #' @rdname calcZ-methods
... ...
@@ -1,4 +1,4 @@
1
-# CoGAPS Version: 3.3.5
1
+# CoGAPS Version: 3.3.6
2 2
 
3 3
 [![Bioc](https://bioconductor.org/images/logo_bioconductor.gif)](https://bioconductor.org/packages/CoGAPS)
4 4
 [![downloads](https://bioconductor.org/shields/downloads/CancerInSilico.svg)](https://bioconductor.org/packages/CoGAPS)
... ...
@@ -1 +1,2 @@
1
-rm src/Makevars
1
+#!/bin/sh
2
+rm -f src/Makevars
... ...
@@ -8,7 +8,8 @@ CoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10 10
   checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
-  ...)
11
+  geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL,
12
+  outputToFile = NULL, ...)
12 13
 }
13 14
 \arguments{
14 15
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -39,6 +40,15 @@ genes x samples}
39 40
 
40 41
 \item{BPPARAM}{BiocParallel backend}
41 42
 
43
+\item{geneNames}{vector of names of genes in data}
44
+
45
+\item{sampleNames}{vector of names of samples in data}
46
+
47
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS}
48
+
49
+\item{outputToFile}{name of a file to save the output to, will create 4 files
50
+of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
51
+
42 52
 \item{...}{allows for overwriting parameters in params}
43 53
 }
44 54
 \value{
... ...
@@ -39,5 +39,13 @@ a cluster must contain}
39 39
 
40 40
 \item{\code{maxNS}}{[distributed parameter] maximum of individual set contributions
41 41
 a cluster can contain}
42
+
43
+\item{\code{explicitSets}}{[distributed parameter] specify subsets by index or name}
44
+
45
+\item{\code{samplingAnnotation}}{[distributed parameter] specify categories along
46
+the rows (cols) to use for weighted sampling}
47
+
48
+\item{\code{samplingWeight}}{[distributed parameter] weights associated with 
49
+samplingAnnotation}
42 50
 }}
43 51
 
... ...
@@ -8,7 +8,7 @@ GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10 10
   checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
-  ...)
11
+  geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL, ...)
12 12
 }
13 13
 \arguments{
14 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -39,6 +39,12 @@ genes x samples}
39 39
 
40 40
 \item{BPPARAM}{BiocParallel backend}
41 41
 
42
+\item{geneNames}{vector of names of genes in data}
43
+
44
+\item{sampleNames}{vector of names of samples in data}
45
+
46
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS}
47
+
42 48
 \item{...}{allows for overwriting parameters in params}
43 49
 }
44 50
 \value{
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
2
+% Please edit documentation in R/HelperFunctions.R
3 3
 \name{checkDataMatrix}
4 4
 \alias{checkDataMatrix}
5 5
 \title{check that provided data is valid}
... ...
@@ -19,3 +19,4 @@ throws an error if data has problems
19 19
 \description{
20 20
 check that provided data is valid
21 21
 }
22
+\keyword{internal}
... ...
@@ -4,12 +4,14 @@
4 4
 \alias{corcut}
5 5
 \title{cluster patterns together}
6 6
 \usage{
7
-corcut(allPatterns, allParams)
7
+corcut(allPatterns, cut, minNS)
8 8
 }
9 9
 \arguments{
10 10
 \item{allPatterns}{matrix of all patterns across subsets}
11 11
 
12
-\item{allParams}{list of all CoGAPS parameters}
12
+\item{cut}{number of branches at which to cut dendrogram}
13
+
14
+\item{minNS}{minimum of individual set contributions a cluster must contain}
13 15
 }
14 16
 \value{
15 17
 patterns listed by which cluster they belong to
... ...
@@ -17,3 +19,4 @@ patterns listed by which cluster they belong to
17 19
 \description{
18 20
 cluster patterns together
19 21
 }
22
+\keyword{internal}
20 23
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{corrToMeanPattern}
4
+\alias{corrToMeanPattern}
5
+\title{calculate correlation of each pattern in a cluster to the cluster mean}
6
+\usage{
7
+corrToMeanPattern(cluster)
8
+}
9
+\description{
10
+calculate correlation of each pattern in a cluster to the cluster mean
11
+}
12
+\keyword{internal}
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/DistributedCogaps.R
2
+% Please edit documentation in R/SubsetData.R
3 3
 \name{createSets}
4 4
 \alias{createSets}
5 5
 \title{partition genes/samples into subsets}
... ...
@@ -18,3 +18,4 @@ list of sorted subsets of either genes or samples
18 18
 either genes or samples or partitioned depending on the type
19 19
 of distributed CoGAPS (i.e. genome-wide or single-cell)
20 20
 }
21
+\keyword{internal}
... ...
@@ -23,3 +23,4 @@ back together
23 23
 \details{
24 24
 For file types CoGAPS supports csv, tsv, and mtx
25 25
 }
26
+\keyword{internal}
... ...
@@ -18,3 +18,4 @@ matrix of consensus patterns
18 18
 \description{
19 19
 find the consensus pattern matrix across all subsets
20 20
 }
21
+\keyword{internal}
21 22
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{gapsCat}
4
+\alias{gapsCat}
5
+\title{wrapper around cat}
6
+\usage{
7
+gapsCat(allParams, ...)
8
+}
9
+\arguments{
10
+\item{allParams}{all cogaps parameters}
11
+
12
+\item{...}{arguments forwarded to cat}
13
+}
14
+\value{
15
+displays text
16
+}
17
+\description{
18
+cleans up message printing
19
+}
20
+\keyword{}
0 21
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getClusteredPatterns}
5
+\alias{getClusteredPatterns}
6
+\alias{getClusteredPatterns,CogapsResult-method}
7
+\title{return clustered patterns from set of all patterns across all subsets}
8
+\usage{
9
+getClusteredPatterns(object)
10
+
11
+\S4method{getClusteredPatterns}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+CogapsParams object
18
+}
19
+\description{
20
+return clustered patterns from set of all patterns across all subsets
21
+}
22
+\examples{
23
+data(SimpSim)
24
+result <- CoGAPS(SimpSim.data)
25
+getClusteredPatterns(result)
26
+}
0 27
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getCorrelationToMeanPattern}
5
+\alias{getCorrelationToMeanPattern}
6
+\alias{getCorrelationToMeanPattern,CogapsResult-method}
7
+\title{return correlation between each pattern and the cluster mean}
8
+\usage{
9
+getCorrelationToMeanPattern(object)
10
+
11
+\S4method{getCorrelationToMeanPattern}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+CogapsParams object
18
+}
19
+\description{
20
+return correlation between each pattern and the cluster mean
21
+}
22
+\examples{
23
+data(SimpSim)
24
+result <- CoGAPS(SimpSim.data)
25
+getCorrelationToMeanPattern(result)
26
+}
0 27
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{getGeneNames}
4
+\alias{getGeneNames}
5
+\title{extract gene names from data}
6
+\usage{
7
+getGeneNames(data, transpose)
8
+}
9
+\description{
10
+extract gene names from data
11
+}
12
+\keyword{internal}
0 13
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{getSampleNames}
4
+\alias{getSampleNames}
5
+\title{extract sample names from data}
6
+\usage{
7
+getSampleNames(data, transpose)
8
+}
9
+\description{
10
+extract sample names from data
11
+}
12
+\keyword{internal}
0 13
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getSubsets}
5
+\alias{getSubsets}
6
+\alias{getSubsets,CogapsResult-method}
7
+\title{return the names of the genes (samples) in each subset}
8
+\usage{
9
+getSubsets(object)
10
+
11
+\S4method{getSubsets}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+CogapsParams object
18
+}
19
+\description{
20
+return the names of the genes (samples) in each subset
21
+}
22
+\examples{
23
+data(SimpSim)
24
+result <- CoGAPS(SimpSim.data)
25
+getSubsets(result)
26
+}
0 27
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getUnmatchedPatterns}
5
+\alias{getUnmatchedPatterns}
6
+\alias{getUnmatchedPatterns,CogapsResult-method}
7
+\title{return unmatched patterns from each subset}
8
+\usage{
9
+getUnmatchedPatterns(object)
10
+
11
+\S4method{getUnmatchedPatterns}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+CogapsParams object
18
+}
19
+\description{
20
+return unmatched patterns from each subset
21
+}
22
+\examples{
23
+data(SimpSim)
24
+result <- CoGAPS(SimpSim.data)
25
+getUnmatchedPatterns(result)
26
+}
... ...
@@ -5,8 +5,8 @@
5 5
 \alias{initialize,CogapsResult-method}
6 6
 \title{Constructor for CogapsResult}
7 7
 \usage{
8
-\S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd, seed,
9
-  meanChiSq, diagnostics = NULL, ...)
8
+\S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd,
9
+  meanChiSq, geneNames, sampleNames, diagnostics = NULL, ...)
10 10
 }
11 11
 \arguments{
12 12
 \item{.Object}{CogapsResult object}
... ...
@@ -19,10 +19,12 @@
19 19
 
20 20
 \item{Psd}{std dev of sampled P matrices}
21 21
 
22
-\item{seed}{random seed used for the run}
23
-
24 22
 \item{meanChiSq}{mean value of ChiSq statistic}
25 23
 
24
+\item{geneNames}{names of genes in data}
25
+
26
+\item{sampleNames}{names of samples in data}
27
+
26 28
 \item{diagnostics}{assorted diagnostic reports from the run}
27 29
 
28 30
 \item{...}{initial values for slots}
29 31
similarity index 69%
30 32
rename from man/ncol_helper.Rd
31 33
rename to man/ncolHelper.Rd
... ...
@@ -1,10 +1,10 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
3
-\name{ncol_helper}
4
-\alias{ncol_helper}
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{ncolHelper}
4
+\alias{ncolHelper}
5 5
 \title{get number of columns from supported file name or matrix}
6 6
 \usage{
7
-ncol_helper(data)
7
+ncolHelper(data)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{either a file name or a matrix}
... ...
@@ -15,3 +15,4 @@ number of columns
15 15
 \description{
16 16
 get number of columns from supported file name or matrix
17 17
 }
18
+\keyword{internal}
18 19
similarity index 68%
19 20
rename from man/nrow_helper.Rd
20 21
rename to man/nrowHelper.Rd
... ...
@@ -1,10 +1,10 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
3
-\name{nrow_helper}
4
-\alias{nrow_helper}
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{nrowHelper}
4
+\alias{nrowHelper}
5 5
 \title{get number of rows from supported file name or matrix}
6 6
 \usage{
7
-nrow_helper(data)
7
+nrowHelper(data)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{either a file name or a matrix}
... ...
@@ -15,3 +15,4 @@ number of rows
15 15
 \description{
16 16
 get number of rows from supported file name or matrix
17 17
 }
18
+\keyword{internal}
... ...
@@ -1,5 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
2
+% Please edit documentation in R/HelperFunctions.R
3 3
 \name{parseExtraParams}
4 4
 \alias{parseExtraParams}
5 5
 \title{parse parameters passed through the ... variable}
... ...
@@ -20,3 +20,4 @@ parse parameters passed through the ... variable
20 20
 \note{
21 21
 will halt with an error if any parameters in extraParams are invalid
22 22
 }
23
+\keyword{internal}
... ...
@@ -17,3 +17,4 @@ a matrix of consensus patterns
17 17
 \description{
18 18
 Match Patterns Across Multiple Runs
19 19
 }
20
+\keyword{internal}
20 21
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/SubsetData.R
3
+\name{sampleUniformly}
4
+\alias{sampleUniformly}
5
+\title{subset data by uniformly partioning rows (cols)}
6
+\usage{
7
+sampleUniformly(allParams, total, setSize)
8
+}
9
+\arguments{
10
+\item{allParams}{list of all CoGAPS parameters}
11
+
12
+\item{total}{total number of rows (cols) that are being paritioned}
13
+
14
+\item{setSize}{the size of each subset of the total}
15
+}
16
+\value{
17
+list of subsets
18
+}
19
+\description{
20
+subset data by uniformly partioning rows (cols)
21
+}
22
+\keyword{internal}
0 23
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/SubsetData.R
3
+\name{sampleWithAnnotationWeights}
4
+\alias{sampleWithAnnotationWeights}
5
+\title{subset rows (cols) proportional to the user provided weights}
6
+\usage{
7
+sampleWithAnnotationWeights(allParams, setSize)
8
+}
9
+\arguments{
10
+\item{allParams}{list of all CoGAPS parameters}
11
+
12
+\item{setSize}{the size of each subset of the total}
13
+}
14
+\value{
15
+list of subsets
16
+}
17
+\description{
18
+subset rows (cols) proportional to the user provided weights
19
+}
20
+\keyword{internal}
0 21
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/SubsetData.R
3
+\name{sampleWithExplictSets}
4
+\alias{sampleWithExplictSets}
5
+\title{use user provided subsets}
6
+\usage{
7
+sampleWithExplictSets(allParams, total)
8
+}
9
+\arguments{
10
+\item{allParams}{list of all CoGAPS parameters}
11
+
12
+\item{total}{total number of rows (cols) that are being paritioned}
13
+}
14
+\value{
15
+list of subsets
16
+}
17
+\description{
18
+use user provided subsets
19
+}
20
+\keyword{internal}
... ...
@@ -8,7 +8,7 @@ scCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10 10
   checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
-  ...)
11
+  geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL, ...)
12 12
 }
13 13
 \arguments{
14 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -39,6 +39,12 @@ genes x samples}
39 39
 
40 40
 \item{BPPARAM}{BiocParallel backend}
41 41
 
42
+\item{geneNames}{vector of names of genes in data}
43
+
44
+\item{sampleNames}{vector of names of samples in data}
45
+
46
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS}
47
+
42 48
 \item{...}{allows for overwriting parameters in params}
43 49
 }
44 50
 \value{
45 51
new file mode 100644
... ...
@@ -0,0 +1,29 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R
3
+\docType{methods}
4
+\name{setAnnotationWeights}
5
+\alias{setAnnotationWeights}
6
+\alias{setAnnotationWeights,CogapsParams-method}
7
+\title{set the annotation labels and weights for subsetting the data}
8
+\usage{
9
+setAnnotationWeights(object, annotation, weights)
10
+
11
+\S4method{setAnnotationWeights}{CogapsParams}(object, annotation, weights)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsParams}
15
+
16
+\item{annotation}{vector of labels}
17
+
18
+\item{weights}{vector of weights}
19
+}
20
+\value{
21
+the modified params object
22
+}
23
+\description{
24
+these parameters  are interrelated so they must be set together
25
+}
26
+\examples{
27
+ params <- new("CogapsParams")
28
+ params <- setAnnotationWeights(params, c('a', 'b', 'c'), c(1,2,1))
29
+}
... ...
@@ -1,17 +1,15 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
2
+% Please edit documentation in R/HelperFunctions.R
3 3
 \name{startupMessage}
4 4
 \alias{startupMessage}
5 5
 \title{write start up message}
6 6
 \usage{
7
-startupMessage(data, transpose, distributed)
7
+startupMessage(data, allParams)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{data set}
11 11
 
12
-\item{transpose}{if we are transposing the data set}
13
-
14
-\item{distributed}{if we are running distributed CoGAPS}
12
+\item{allParams}{list of all parameters}
15 13
 }
16 14
 \value{
17 15
 message displayed to screen
... ...
@@ -19,3 +17,4 @@ message displayed to screen
19 17
 \description{
20 18
 write start up message
21 19
 }
20
+\keyword{internal}
... ...
@@ -17,3 +17,4 @@ list with all CoGAPS output
17 17
 \description{
18 18
 concatenate final results across subsets
19 19
 }
20
+\keyword{internal}
... ...
@@ -1,8 +1,8 @@
1 1
 % Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/CoGAPS.R
2
+% Please edit documentation in R/HelperFunctions.R
3 3
 \name{supported}
4 4
 \alias{supported}
5
-\title{Checks if file is supported}
5
+\title{checks if file is supported}
6 6
 \usage{
7 7
 supported(file)
8 8
 }
... ...
@@ -13,5 +13,6 @@ supported(file)
13 13
 TRUE if file is supported, FALSE if not
14 14
 }
15 15
 \description{
16
-Checks if file is supported
16
+checks if file is supported
17 17
 }
18
+\keyword{internal}
... ...
@@ -3,6 +3,7 @@
3 3
 
4 4
 #include <Rcpp.h>
5 5
 #include <string>
6
+#include <sstream>
6 7
 
7 8
 // these are helper functions for converting matrix/vector types
8 9
 // to and from R objects
... ...
@@ -150,8 +151,16 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
150 151
     runner.setCheckpointOutFile(allParams["checkpointOutFile"]);
151 152
     runner.setCheckpointInterval(allParams["checkpointInterval"]);
152 153
 
153
-    // run cogaps and return the GapsResult in an R list
154
+    // run cogaps
154 155
     GapsResult result(runner.run(printThreads));
156
+    
157
+    // write result to file if requested
158
+    if (allParams["outputToFile"] != R_NilValue)
159
+    {
160
+        result.writeToFile(Rcpp::as<std::string>(allParams["outputToFile"]));
161
+    }
162
+
163
+    // return R list
155 164
     return Rcpp::List::create(
156 165
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
157 166
         Rcpp::Named("Pmean") = createRMatrix(result.Pmean, true),
... ...
@@ -159,6 +168,8 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster)
159 168
         Rcpp::Named("Psd") = createRMatrix(result.Psd, true),
160 169
         Rcpp::Named("seed") = runner.getSeed(),
161 170
         Rcpp::Named("meanChiSq") = result.meanChiSq,
171
+        Rcpp::Named("geneNames") = allParams["geneNames"],
172
+        Rcpp::Named("sampleNames") = allParams["sampleNames"],
162 173
         Rcpp::Named("diagnostics") = Rcpp::List::create()
163 174
     );
164 175
 }
... ...
@@ -26,9 +26,48 @@ struct GapsResult
26 26
         Psd(stat.Psd()), meanChiSq(0.f), seed(0)
27 27
     {}
28 28
 
29
-    void writeCsv(const std::string &path);
30
-    void writeTsv(const std::string &path);
31
-    void writeGct(const std::string &path);
29
+    void writeToFile(const std::string &fullPath)
30
+    {
31
+        std::size_t pos = fullPath.find_last_of('.');
32
+        std::string base = fullPath.substr(0, pos);
33
+
34
+        switch (FileParser::fileType(fullPath))
35
+        {
36
+            case GAPS_CSV: return writeCsv(base);
37
+            case GAPS_TSV: return writeTsv(base);
38
+            case GAPS_GCT: return writeGct(base);
39
+        }
40
+    }
41
+
42
+    void writeCsv(const std::string &path)
43
+    {
44
+        unsigned nPatterns = Amean.nCol();
45
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
46
+        FileParser::writeToCsv(path + label + "Amean.csv", Amean);
47
+        FileParser::writeToCsv(path + label + "Pmean.csv", Pmean);
48
+        FileParser::writeToCsv(path + label + "Asd.csv", Asd);
49
+        FileParser::writeToCsv(path + label + "Psd.csv", Psd);
50
+    }
51
+
52
+    void writeTsv(const std::string &path)
53
+    {
54
+        unsigned nPatterns = Amean.nCol();
55
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
56
+        FileParser::writeToCsv(path + label + "Amean.tsv", Amean);
57
+        FileParser::writeToCsv(path + label + "Pmean.tsv", Pmean);
58
+        FileParser::writeToCsv(path + label + "Asd.tsv", Asd);
59
+        FileParser::writeToCsv(path + label + "Psd.tsv", Psd);
60
+    }
61
+
62
+    void writeGct(const std::string &path)
63
+    {
64
+        unsigned nPatterns = Amean.nCol();
65
+        std::string label("_" + gaps::to_string(nPatterns) + "_");
66
+        FileParser::writeToCsv(path + label + "Amean.gct", Amean);
67
+        FileParser::writeToCsv(path + label + "Pmean.gct", Pmean);
68
+        FileParser::writeToCsv(path + label + "Asd.gct", Asd);
69
+        FileParser::writeToCsv(path + label + "Psd.gct", Psd);
70
+    }
32 71
 };
33 72
 
34 73
 class GapsRunner
... ...
@@ -13,6 +13,7 @@ OBJECTS =   AtomicDomain.o \
13 13
             data_structures/Matrix.o \
14 14
             data_structures/Vector.o \
15 15
             file_parser/CsvParser.o \
16
+            file_parser/GctParser.o \
16 17
             file_parser/FileParser.o \
17 18
             file_parser/TsvParser.o \
18 19
             file_parser/MtxParser.o \
... ...
@@ -2,6 +2,7 @@
2 2
 #include "../file_parser/CsvParser.h"
3 3
 #include "../file_parser/TsvParser.h"
4 4
 #include "../file_parser/MtxParser.h"
5
+#include "../file_parser/GctParser.h"
5 6
 
6 7
 #include "../data_structures/Matrix.h"
7 8
 
... ...
@@ -13,6 +14,7 @@ TEST_CASE("Test Parsers")
13 14
     std::string csvPath = Rcpp::as<std::string>(env["gistCsvPath"]);
14 15
     std::string tsvPath = Rcpp::as<std::string>(env["gistTsvPath"]);
15 16
     std::string mtxPath = Rcpp::as<std::string>(env["gistMtxPath"]);
17
+    std::string gctPath = Rcpp::as<std::string>(env["gistGctPath"]);
16 18
 
17 19
     SECTION("Test CsvParser")
18 20
     {
... ...
@@ -83,4 +85,30 @@ TEST_CASE("Test Parsers")
83 85
         }
84 86
         REQUIRE(count == 12267);
85 87
     }
88
+
89
+    SECTION("Test GctParser")
90
+    {
91
+        GctParser p(gctPath);
92
+        REQUIRE(p.nRow() == 1363);
93
+        REQUIRE(p.nCol() == 9);
94
+
95
+        unsigned row = 0;
96
+        unsigned col = 0;
97
+        unsigned count = 0;
98
+        while (p.hasNext())
99
+        {
100
+            MatrixElement e(p.getNext());
101
+            REQUIRE(e.row == row);
102
+            REQUIRE(e.col == col);
103
+
104
+            ++count;
105
+            ++col;
106
+            if (col == 9)
107
+            {
108
+                ++row;
109
+                col = 0;
110
+            }
111
+        }
112
+        REQUIRE(count == 12267);
113
+    }
86 114
 }
87 115
\ No newline at end of file
... ...
@@ -3,6 +3,7 @@
3 3
 #include "FileParser.h"
4 4
 #include "MtxParser.h"
5 5
 #include "TsvParser.h"
6
+#include "GctParser.h"
6 7
 
7 8
 #include <string>
8 9
 
... ...
@@ -13,6 +14,7 @@ AbstractFileParser* AbstractFileParser::create(const std::string &path)
13 14
         case GAPS_MTX: return new MtxParser(path);
14 15
         case GAPS_CSV: return new CsvParser(path);
15 16
         case GAPS_TSV: return new TsvParser(path);
17
+        case GAPS_GCT: return new GctParser(path);
16 18
         default: GAPS_ERROR("Invalid file type\n");
17 19
     }
18 20
 }
... ...
@@ -58,6 +60,7 @@ GapsFileType FileParser::fileType(const std::string &path)
58 60
     if (ext == ".mtx") { return GAPS_MTX; }
59 61
     if (ext == ".csv") { return GAPS_CSV; }
60 62
     if (ext == ".tsv") { return GAPS_TSV; }
63
+    if (ext == ".gct") { return GAPS_GCT; }
61 64
 
62 65
     return GAPS_INVALID_FILE_TYPE;
63 66
 }
... ...
@@ -11,6 +11,7 @@ enum GapsFileType
11 11
     GAPS_MTX,
12 12
     GAPS_CSV,
13 13
     GAPS_TSV,
14
+    GAPS_GCT,
14 15
     GAPS_INVALID_FILE_TYPE
15 16
 };
16 17
 
... ...
@@ -68,6 +69,9 @@ public:
68 69
 
69 70
     template <class MatrixType>
70 71
     static void writeToMtx(const std::string &path, const MatrixType &mat);
72
+
73
+    template <class MatrixType>
74
+    static void writeToGct(const std::string &path, const MatrixType &mat);
71 75
 };
72 76
 
73 77
 // temporary solution - should be moved into specific file parsers, ok for now
... ...
@@ -158,4 +162,38 @@ void FileParser::writeToMtx(const std::string &path, const MatrixType &mat)
158 162
     outputFile.close();
159 163
 }
160 164
 
165
+template <class MatrixType>
166
+void FileParser::writeToGct(const std::string &path, const MatrixType &mat)
167
+{
168
+    GAPS_ASSERT(FileParser::fileType(path) == GAPS_GCT);
169
+
170
+    std::ofstream outputFile;
171
+    outputFile.open(path.c_str());
172
+    outputFile << "#1.2\n";
173
+    outputFile << mat.nRow() << "\t" << mat.nCol() << "\n";
174
+
175
+    outputFile << "\"NAME\"\t\"Description\"";
176
+
177
+    // write column names
178
+    for (unsigned i = 0; i < mat.nCol(); ++i)
179
+    {
180
+        outputFile << "\t\"Col" << i << "\"";
181
+    }
182
+    outputFile << "\n";
183
+
184
+    for (unsigned i = 0; i < mat.nRow(); ++i)
185
+    {
186
+        // write row names
187
+        outputFile << "\"Row" << i << "\"\t\"BLANK\"\t";
188
+        
189
+        // write data
190
+        for (unsigned j = 0; j < mat.nCol(); ++j)
191
+        {
192
+            outputFile << "\t" << mat(i,j);
193
+        }
194
+        outputFile << "\n";
195
+    }
196
+    outputFile.close();
197
+}
198
+
161 199
 #endif
162 200
\ No newline at end of file
163 201
new file mode 100644
... ...
@@ -0,0 +1,56 @@
1
+#include "GctParser.h"
2
+#include "../GapsAssert.h"
3
+#include "../math/Algorithms.h"
4
+
5
+#include <fstream>