... | ... |
@@ -177,7 +177,6 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=FALSE, ...) |
177 | 177 |
warning(paste("scCoGAPS is deprecated, use the main function CoGAPS", |
178 | 178 |
"with the argument: distributed=\"single-cell\"")) |
179 | 179 |
params@distributed <- "single-cell" |
180 |
- params@singleCell <- TRUE |
|
181 | 180 |
CoGAPS( |
182 | 181 |
data=data, |
183 | 182 |
params=params, |
... | ... |
@@ -66,6 +66,10 @@ compiledWithOpenMPSupport <- function() |
66 | 66 |
#' is not neccesary when using the default parallel methods (i.e. distributed |
67 | 67 |
#' CoGAPS) but only when the user is manually calling CoGAPS in parallel |
68 | 68 |
#' @param asynchronousUpdates enable asynchronous updating which allows for multi-threaded runs |
69 |
+#' @param nSnapshots how many snapshots to take in each phase, setting this to 0 disables |
|
70 |
+#' snapshots |
|
71 |
+#' @param snapshotPhase which phase to take snapsjots in e.g. "equilibration", "sampling", |
|
72 |
+#' "all" |
|
69 | 73 |
#' @param ... allows for overwriting parameters in params |
70 | 74 |
#' @return CogapsResult object |
71 | 75 |
#' @examples |
... | ... |
@@ -83,7 +83,7 @@ compiledWithOpenMPSupport <- function() |
83 | 83 |
#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
84 | 84 |
#' @importFrom methods new is |
85 | 85 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
86 |
-outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
86 |
+outputFrequency=1000, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
87 | 87 |
checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
88 | 88 |
BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, nSnapshots=0, |
89 | 89 |
snapshotPhase='sampling', ...) |
... | ... |
@@ -85,7 +85,8 @@ compiledWithOpenMPSupport <- function() |
85 | 85 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
86 | 86 |
outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
87 | 87 |
checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
88 |
-BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, nSnapshots=0, ...) |
|
88 |
+BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, nSnapshots=0, |
|
89 |
+snapshotPhase='sampling', ...) |
|
89 | 90 |
{ |
90 | 91 |
# pre-process inputs |
91 | 92 |
if (is(data, "character")) |
... | ... |
@@ -112,6 +113,7 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, nSnapshots=0, ...) |
112 | 113 |
"messages"=messages, |
113 | 114 |
"outputFrequency"=outputFrequency, |
114 | 115 |
"nSnapshots"=nSnapshots, |
116 |
+ "snapshotPhase"=snapshotPhase, |
|
115 | 117 |
"checkpointOutFile"=checkpointOutFile, |
116 | 118 |
"checkpointInterval"=checkpointInterval, |
117 | 119 |
"checkpointInFile"=checkpointInFile, |
The argument 'nSnapshots' specifies how many samples of the A and P matrix should be saved. The snapshots are equally spaced out during the sampling phase. This is useful for various post-run analysis techniques but should primarily be used to test ideas, not as part of an official analysis.
... | ... |
@@ -85,7 +85,7 @@ compiledWithOpenMPSupport <- function() |
85 | 85 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
86 | 86 |
outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
87 | 87 |
checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
88 |
-BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
|
88 |
+BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, nSnapshots=0, ...) |
|
89 | 89 |
{ |
90 | 90 |
# pre-process inputs |
91 | 91 |
if (is(data, "character")) |
... | ... |
@@ -111,6 +111,7 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
111 | 111 |
"nThreads"=nThreads, |
112 | 112 |
"messages"=messages, |
113 | 113 |
"outputFrequency"=outputFrequency, |
114 |
+ "nSnapshots"=nSnapshots, |
|
114 | 115 |
"checkpointOutFile"=checkpointOutFile, |
115 | 116 |
"checkpointInterval"=checkpointInterval, |
116 | 117 |
"checkpointInFile"=checkpointInFile, |
... | ... |
@@ -165,7 +165,7 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
165 | 165 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
166 | 166 |
outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
167 | 167 |
checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
168 |
-BPPARAM=NULL, workerID=1, ...) |
|
168 |
+BPPARAM=NULL, workerID=1, asynchronousUpdates=FALSE, ...) |
|
169 | 169 |
{ |
170 | 170 |
warning(paste("scCoGAPS is deprecated, use the main function CoGAPS", |
171 | 171 |
"with the argument: distributed=\"single-cell\"")) |
... | ... |
@@ -207,7 +207,7 @@ BPPARAM=NULL, workerID=1, ...) |
207 | 207 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
208 | 208 |
outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
209 | 209 |
checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
210 |
-BPPARAM=NULL, workerID=1, ...) |
|
210 |
+BPPARAM=NULL, workerID=1, asynchronousUpdates=FALSE, ...) |
|
211 | 211 |
{ |
212 | 212 |
warning(paste("GWCoGAPS is deprecated, use the main function CoGAPS", |
213 | 213 |
"with the argument: distributed=\"genome-wide\"")) |
... | ... |
@@ -17,7 +17,7 @@ buildReport <- function() |
17 | 17 |
#' Check if package was built with checkpoints enabled |
18 | 18 |
#' @export |
19 | 19 |
#' |
20 |
-#' @return true/false if check are enabled |
|
20 |
+#' @return true/false if checkpoints are enabled |
|
21 | 21 |
#' @examples |
22 | 22 |
#' CoGAPS::checkpointsEnabled() |
23 | 23 |
checkpointsEnabled <- function() |
... | ... |
@@ -25,6 +25,17 @@ checkpointsEnabled <- function() |
25 | 25 |
checkpointsEnabled_cpp() |
26 | 26 |
} |
27 | 27 |
|
28 |
+#' Check if compiler supported OpenMP |
|
29 |
+#' @export |
|
30 |
+#' |
|
31 |
+#' @return true/false if OpenMP was supported |
|
32 |
+#' @examples |
|
33 |
+#' CoGAPS::compiledWithOpenMPSupport() |
|
34 |
+compiledWithOpenMPSupport <- function() |
|
35 |
+{ |
|
36 |
+ compiledWithOpenMPSupport_cpp() |
|
37 |
+} |
|
38 |
+ |
|
28 | 39 |
#' CoGAPS Matrix Factorization Algorithm |
29 | 40 |
#' @export |
30 | 41 |
#' |
... | ... |
@@ -86,6 +97,15 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
86 | 97 |
params <- getValueOrRds(params) |
87 | 98 |
validObject(params) |
88 | 99 |
|
100 |
+ # check OpenMP support |
|
101 |
+ if (!compiledWithOpenMPSupport()) |
|
102 |
+ { |
|
103 |
+ if (asynchronousUpdates & nThreads > 1) |
|
104 |
+ warning("requesting multi-threaded version of CoGAPS but compiler did not support OpenMP") |
|
105 |
+ asynchronousUpdates = FALSE |
|
106 |
+ nThreads = 1 |
|
107 |
+ } |
|
108 |
+ |
|
89 | 109 |
# store all parameters in a list and parse parameters from ... |
90 | 110 |
allParams <- list("gaps"=params, |
91 | 111 |
"nThreads"=nThreads, |
... | ... |
@@ -54,6 +54,7 @@ checkpointsEnabled <- function() |
54 | 54 |
#' only worker 1 prints output and each worker outputs when it finishes, this |
55 | 55 |
#' is not neccesary when using the default parallel methods (i.e. distributed |
56 | 56 |
#' CoGAPS) but only when the user is manually calling CoGAPS in parallel |
57 |
+#' @param asynchronousUpdates enable asynchronous updating which allows for multi-threaded runs |
|
57 | 58 |
#' @param ... allows for overwriting parameters in params |
58 | 59 |
#' @return CogapsResult object |
59 | 60 |
#' @examples |
... | ... |
@@ -71,11 +71,15 @@ checkpointsEnabled <- function() |
71 | 71 |
#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
72 | 72 |
#' @importFrom methods new is |
73 | 73 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
74 |
-outputFrequency=2500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
74 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
75 | 75 |
checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
76 | 76 |
BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
77 | 77 |
{ |
78 | 78 |
# pre-process inputs |
79 |
+ if (is(data, "character")) |
|
80 |
+ dataName <- data |
|
81 |
+ else |
|
82 |
+ dataName <- deparse(substitute(data)) |
|
79 | 83 |
data <- getValueOrRds(data) |
80 | 84 |
data <- convertDataToMatrix(data) |
81 | 85 |
params <- getValueOrRds(params) |
... | ... |
@@ -95,7 +99,8 @@ BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
95 | 99 |
"BPPARAM"=BPPARAM, |
96 | 100 |
"outputToFile"=NULL, |
97 | 101 |
"workerID"=workerID, |
98 |
- "asynchronousUpdates"=asynchronousUpdates |
|
102 |
+ "asynchronousUpdates"=asynchronousUpdates, |
|
103 |
+ "dataName"=dataName |
|
99 | 104 |
) |
100 | 105 |
allParams <- parseExtraParams(allParams, list(...)) |
101 | 106 |
allParams <- getDimNames(data, allParams) |
... | ... |
@@ -73,7 +73,7 @@ checkpointsEnabled <- function() |
73 | 73 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
74 | 74 |
outputFrequency=2500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
75 | 75 |
checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
76 |
-BPPARAM=NULL, workerID=1, ...) |
|
76 |
+BPPARAM=NULL, workerID=1, asynchronousUpdates=TRUE, ...) |
|
77 | 77 |
{ |
78 | 78 |
# pre-process inputs |
79 | 79 |
data <- getValueOrRds(data) |
... | ... |
@@ -94,7 +94,8 @@ BPPARAM=NULL, workerID=1, ...) |
94 | 94 |
"transposeData"=transposeData, |
95 | 95 |
"BPPARAM"=BPPARAM, |
96 | 96 |
"outputToFile"=NULL, |
97 |
- "workerID"=workerID |
|
97 |
+ "workerID"=workerID, |
|
98 |
+ "asynchronousUpdates"=asynchronousUpdates |
|
98 | 99 |
) |
99 | 100 |
allParams <- parseExtraParams(allParams, list(...)) |
100 | 101 |
allParams <- getDimNames(data, allParams) |
... | ... |
@@ -140,6 +140,8 @@ outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
140 | 140 |
checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
141 | 141 |
BPPARAM=NULL, workerID=1, ...) |
142 | 142 |
{ |
143 |
+ warning(paste("scCoGAPS is deprecated, use the main function CoGAPS", |
|
144 |
+ "with the argument: distributed=\"single-cell\"")) |
|
143 | 145 |
params@distributed <- "single-cell" |
144 | 146 |
params@singleCell <- TRUE |
145 | 147 |
CoGAPS( |
... | ... |
@@ -180,6 +182,8 @@ outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
180 | 182 |
checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
181 | 183 |
BPPARAM=NULL, workerID=1, ...) |
182 | 184 |
{ |
185 |
+ warning(paste("GWCoGAPS is deprecated, use the main function CoGAPS", |
|
186 |
+ "with the argument: distributed=\"genome-wide\"")) |
|
183 | 187 |
params@distributed <- "genome-wide" |
184 | 188 |
CoGAPS( |
185 | 189 |
data=data, |
... | ... |
@@ -111,7 +111,7 @@ BPPARAM=NULL, workerID=1, ...) |
111 | 111 |
if (!is.null(allParams$gaps@distributed)) |
112 | 112 |
dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
113 | 113 |
else if (is(data, "character")) |
114 |
- dispatchFunc <- cogaps_cpp_from_file # data is a file path |
|
114 |
+ dispatchFunc <- cogaps_from_file_cpp # data is a file path |
|
115 | 115 |
|
116 | 116 |
# run cogaps |
117 | 117 |
startupMessage(data, allParams) |
... | ... |
@@ -71,8 +71,8 @@ checkpointsEnabled <- function() |
71 | 71 |
#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
72 | 72 |
#' @importFrom methods new is |
73 | 73 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
74 |
-outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
75 |
-checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
74 |
+outputFrequency=2500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
75 |
+checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE, |
|
76 | 76 |
BPPARAM=NULL, workerID=1, ...) |
77 | 77 |
{ |
78 | 78 |
# pre-process inputs |
... | ... |
@@ -49,19 +49,7 @@ checkpointsEnabled <- function() |
49 | 49 |
#' @param transposeData T/F for transposing data while reading it in - useful |
50 | 50 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
51 | 51 |
#' genes x samples |
52 |
-#' @param subsetIndices set of indices to use from the data |
|
53 |
-#' @param subsetDim which dimension (1=rows, 2=cols) to subset |
|
54 | 52 |
#' @param BPPARAM BiocParallel backend |
55 |
-#' @param geneNames vector of names of genes in data |
|
56 |
-#' @param sampleNames vector of names of samples in data |
|
57 |
-#' @param fixedPatterns fix either 'A' or 'P' matrix to these values, in the |
|
58 |
-#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
59 |
-#' skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
60 |
-#' matching, as well as fixed runs of standard CoGAPS |
|
61 |
-#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
|
62 |
-#' @param takePumpSamples whether or not to take PUMP samples |
|
63 |
-#' @param outputToFile name of a file to save the output to, will create 4 files |
|
64 |
-#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
|
65 | 53 |
#' @param workerID if calling CoGAPS in parallel the worker ID can be specified, |
66 | 54 |
#' only worker 1 prints output and each worker outputs when it finishes, this |
67 | 55 |
#' is not neccesary when using the default parallel methods (i.e. distributed |
... | ... |
@@ -82,17 +70,18 @@ checkpointsEnabled <- function() |
82 | 70 |
#' params <- setParam(params, "nPatterns", 3) |
83 | 71 |
#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
84 | 72 |
#' @importFrom methods new is |
85 |
-#' @importFrom SummarizedExperiment assay |
|
86 |
-#' @importFrom utils packageVersion |
|
87 |
-CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
88 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
89 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
90 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
91 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
92 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
73 |
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
74 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
75 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
76 |
+BPPARAM=NULL, workerID=1, ...) |
|
93 | 77 |
{ |
94 |
- # store all parameters in a list and parse parameters from ... |
|
78 |
+ # pre-process inputs |
|
79 |
+ data <- getValueOrRds(data) |
|
80 |
+ data <- convertDataToMatrix(data) |
|
81 |
+ params <- getValueOrRds(params) |
|
95 | 82 |
validObject(params) |
83 |
+ |
|
84 |
+ # store all parameters in a list and parse parameters from ... |
|
96 | 85 |
allParams <- list("gaps"=params, |
97 | 86 |
"nThreads"=nThreads, |
98 | 87 |
"messages"=messages, |
... | ... |
@@ -100,48 +89,24 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
100 | 89 |
"checkpointOutFile"=checkpointOutFile, |
101 | 90 |
"checkpointInterval"=checkpointInterval, |
102 | 91 |
"checkpointInFile"=checkpointInFile, |
92 |
+ "geneNames"=NULL, # the gene/sample names in the params object are kept |
|
93 |
+ "sampleNames"=NULL, # as a reference, these are the values actually used |
|
103 | 94 |
"transposeData"=transposeData, |
104 |
- "subsetIndices"=subsetIndices, |
|
105 |
- "subsetDim"=subsetDim, |
|
106 | 95 |
"BPPARAM"=BPPARAM, |
107 |
- "fixedPatterns"=fixedPatterns, |
|
108 |
- "whichMatrixFixed"=whichMatrixFixed, |
|
109 |
- "takePumpSamples"=takePumpSamples, |
|
110 |
- "outputToFile"=outputToFile, |
|
96 |
+ "outputToFile"=NULL, |
|
111 | 97 |
"workerID"=workerID |
112 | 98 |
) |
113 | 99 |
allParams <- parseExtraParams(allParams, list(...)) |
114 |
- |
|
115 |
- # if rds was passed, we first read it in before any processing |
|
116 |
- if (is(data, "character")) |
|
117 |
- { |
|
118 |
- if (tools::file_ext(data) == "rds") |
|
119 |
- { |
|
120 |
- gapsCat(allParams, "reading RDS file...") |
|
121 |
- data <- readRDS(data) |
|
122 |
- gapsCat(allParams, "done\n") |
|
123 |
- } |
|
124 |
- } |
|
125 |
- |
|
126 |
- # convert data if needed |
|
127 |
- if (is(data, "data.frame")) |
|
128 |
- data <- data.matrix(data) |
|
129 |
- else if (is(data, "SummarizedExperiment")) |
|
130 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
131 |
- else if (is(data, "SingleCellExperiment")) |
|
132 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
133 |
- |
|
134 |
- # check that inputs are valid, then read the gene/sample names from the data |
|
100 |
+ allParams <- getDimNames(data, allParams) |
|
135 | 101 |
checkInputs(data, uncertainty, allParams) |
136 |
- allParams <- getNamesFromData(data, allParams, geneNames, sampleNames) |
|
137 |
- |
|
102 |
+ |
|
138 | 103 |
# check if we're running from a checkpoint |
139 | 104 |
if (!is.null(allParams$checkpointInFile)) |
140 | 105 |
{ |
141 | 106 |
gapsCat(allParams, "Running CoGAPS from a checkpoint\n") |
142 | 107 |
} |
143 | 108 |
|
144 |
- # determine which function to call cogaps algorithm |
|
109 |
+ # determine function to call cogaps algorithm |
|
145 | 110 |
dispatchFunc <- cogaps_cpp # default |
146 | 111 |
if (!is.null(allParams$gaps@distributed)) |
147 | 112 |
dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
... | ... |
@@ -170,12 +135,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
170 | 135 |
#' params <- setParam(params, "nPatterns", 3) |
171 | 136 |
#' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam()) |
172 | 137 |
#' } |
173 |
-scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
174 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
175 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
176 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
177 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
178 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
138 |
+scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
139 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
140 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
141 |
+BPPARAM=NULL, workerID=1, ...) |
|
179 | 142 |
{ |
180 | 143 |
params@distributed <- "single-cell" |
181 | 144 |
params@singleCell <- TRUE |
... | ... |
@@ -190,15 +153,7 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
190 | 153 |
checkpointInterval=checkpointInterval, |
191 | 154 |
checkpointInFile=checkpointInFile, |
192 | 155 |
transposeData=transposeData, |
193 |
- subsetIndices=subsetIndices, |
|
194 |
- subsetDim=subsetDim, |
|
195 | 156 |
BPPARAM=BPPARAM, |
196 |
- geneNames=geneNames, |
|
197 |
- sampleNames=sampleNames, |
|
198 |
- fixedPatterns=fixedPatterns, |
|
199 |
- whichMatrixFixed=whichMatrixFixed, |
|
200 |
- takePumpSamples=takePumpSamples, |
|
201 |
- outputToFile=outputToFile, |
|
202 | 157 |
workerID=workerID, |
203 | 158 |
... |
204 | 159 |
) |
... | ... |
@@ -220,12 +175,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
220 | 175 |
#' params <- setParam(params, "nPatterns", 3) |
221 | 176 |
#' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam()) |
222 | 177 |
#' } |
223 |
-GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
224 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
225 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
226 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
227 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
228 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
178 |
+GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
179 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
180 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
181 |
+BPPARAM=NULL, workerID=1, ...) |
|
229 | 182 |
{ |
230 | 183 |
params@distributed <- "genome-wide" |
231 | 184 |
CoGAPS( |
... | ... |
@@ -239,16 +192,8 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
239 | 192 |
checkpointInterval=checkpointInterval, |
240 | 193 |
checkpointInFile=checkpointInFile, |
241 | 194 |
transposeData=transposeData, |
242 |
- subsetIndices=subsetIndices, |
|
243 |
- subsetDim=subsetDim, |
|
244 | 195 |
BPPARAM=BPPARAM, |
245 |
- geneNames=geneNames, |
|
246 |
- sampleNames=sampleNames, |
|
247 |
- fixedPatterns=fixedPatterns, |
|
248 |
- whichMatrixFixed=whichMatrixFixed, |
|
249 |
- takePumpSamples=takePumpSamples, |
|
250 |
- outputToFile=outputToFile, |
|
251 | 196 |
workerID=workerID, |
252 | 197 |
... |
253 | 198 |
) |
254 |
-} |
|
255 | 199 |
\ No newline at end of file |
200 |
+} |
... | ... |
@@ -14,6 +14,17 @@ buildReport <- function() |
14 | 14 |
getBuildReport_cpp() |
15 | 15 |
} |
16 | 16 |
|
17 |
+#' Check if package was built with checkpoints enabled |
|
18 |
+#' @export |
|
19 |
+#' |
|
20 |
+#' @return true/false if check are enabled |
|
21 |
+#' @examples |
|
22 |
+#' CoGAPS::checkpointsEnabled() |
|
23 |
+checkpointsEnabled <- function() |
|
24 |
+{ |
|
25 |
+ checkpointsEnabled_cpp() |
|
26 |
+} |
|
27 |
+ |
|
17 | 28 |
#' CoGAPS Matrix Factorization Algorithm |
18 | 29 |
#' @export |
19 | 30 |
#' |
... | ... |
@@ -101,10 +101,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
101 | 101 |
) |
102 | 102 |
allParams <- parseExtraParams(allParams, list(...)) |
103 | 103 |
|
104 |
- # if rds was passed, we first read it in before any processings |
|
104 |
+ # if rds was passed, we first read it in before any processing |
|
105 | 105 |
if (is(data, "character")) |
106 | 106 |
{ |
107 |
- if (tools::file_ext(data) == "rds")) |
|
107 |
+ if (tools::file_ext(data) == "rds") |
|
108 | 108 |
{ |
109 | 109 |
gapsCat(allParams, "reading RDS file...") |
110 | 110 |
data <- readRDS(data) |
... | ... |
@@ -101,12 +101,15 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
101 | 101 |
) |
102 | 102 |
allParams <- parseExtraParams(allParams, list(...)) |
103 | 103 |
|
104 |
- # if rds was passed, we first read it in before any processing |
|
105 |
- if (is(data, "character") & tools::file_ext(data) == "rds") |
|
104 |
+ # if rds was passed, we first read it in before any processings |
|
105 |
+ if (is(data, "character")) |
|
106 | 106 |
{ |
107 |
- gapsCat(allParams, "reading RDS file...") |
|
108 |
- data <- readRDS(data) |
|
109 |
- gapsCat(allParams, "done\n") |
|
107 |
+ if (tools::file_ext(data) == "rds")) |
|
108 |
+ { |
|
109 |
+ gapsCat(allParams, "reading RDS file...") |
|
110 |
+ data <- readRDS(data) |
|
111 |
+ gapsCat(allParams, "done\n") |
|
112 |
+ } |
|
110 | 113 |
} |
111 | 114 |
|
112 | 115 |
# convert data if needed |
... | ... |
@@ -101,6 +101,14 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
101 | 101 |
) |
102 | 102 |
allParams <- parseExtraParams(allParams, list(...)) |
103 | 103 |
|
104 |
+ # if rds was passed, we first read it in before any processing |
|
105 |
+ if (is(data, "character") & tools::file_ext(data) == "rds") |
|
106 |
+ { |
|
107 |
+ gapsCat(allParams, "reading RDS file...") |
|
108 |
+ data <- readRDS(data) |
|
109 |
+ gapsCat(allParams, "done\n") |
|
110 |
+ } |
|
111 |
+ |
|
104 | 112 |
# convert data if needed |
105 | 113 |
if (is(data, "data.frame")) |
106 | 114 |
data <- data.matrix(data) |
... | ... |
@@ -140,12 +140,14 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
140 | 140 |
#' @return CogapsResult object |
141 | 141 |
#' @importFrom methods new |
142 | 142 |
#' @examples |
143 |
+#' \dontrun{ |
|
143 | 144 |
#' data(GIST) |
144 | 145 |
#' params <- new("CogapsParams") |
145 | 146 |
#' params <- setDistributedParams(params, nSets=2) |
146 | 147 |
#' params <- setParam(params, "nIterations", 100) |
147 | 148 |
#' params <- setParam(params, "nPatterns", 3) |
148 | 149 |
#' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam()) |
150 |
+#' } |
|
149 | 151 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
150 | 152 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
151 | 153 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -188,12 +190,14 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
188 | 190 |
#' @return CogapsResult object |
189 | 191 |
#' @importFrom methods new |
190 | 192 |
#' @examples |
193 |
+#' \dontrun{ |
|
191 | 194 |
#' data(GIST) |
192 | 195 |
#' params <- new("CogapsParams") |
193 | 196 |
#' params <- setDistributedParams(params, nSets=2) |
194 | 197 |
#' params <- setParam(params, "nIterations", 100) |
195 | 198 |
#' params <- setParam(params, "nPatterns", 3) |
196 | 199 |
#' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam()) |
200 |
+#' } |
|
197 | 201 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
198 | 202 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
199 | 203 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -60,16 +60,16 @@ buildReport <- function() |
60 | 60 |
#' @examples |
61 | 61 |
#' # Running from R object |
62 | 62 |
#' data(GIST) |
63 |
-#' resultA <- CoGAPS(GIST.data_frame, nIterations=100) |
|
63 |
+#' resultA <- CoGAPS(GIST.data_frame, nIterations=25) |
|
64 | 64 |
#' |
65 | 65 |
#' # Running from file name |
66 | 66 |
#' gist_path <- system.file("extdata/GIST.mtx", package="CoGAPS") |
67 |
-#' resultB <- CoGAPS(gist_path, nIterations=100) |
|
67 |
+#' resultB <- CoGAPS(gist_path, nIterations=25) |
|
68 | 68 |
#' |
69 | 69 |
#' # Setting Parameters |
70 | 70 |
#' params <- new("CogapsParams") |
71 |
-#' params <- setParam(params, "nPatterns", 5) |
|
72 |
-#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=100) |
|
71 |
+#' params <- setParam(params, "nPatterns", 3) |
|
72 |
+#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
|
73 | 73 |
#' @importFrom methods new is |
74 | 74 |
#' @importFrom SummarizedExperiment assay |
75 | 75 |
#' @importFrom utils packageVersion |
... | ... |
@@ -141,7 +141,11 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
141 | 141 |
#' @importFrom methods new |
142 | 142 |
#' @examples |
143 | 143 |
#' data(GIST) |
144 |
-#' result <- scCoGAPS(t(GIST.matrix), BPPARAM=BiocParallel::SerialParam(), nIterations=100) |
|
144 |
+#' params <- new("CogapsParams") |
|
145 |
+#' params <- setDistributedParams(params, nSets=2) |
|
146 |
+#' params <- setParam(params, "nIterations", 100) |
|
147 |
+#' params <- setParam(params, "nPatterns", 3) |
|
148 |
+#' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam()) |
|
145 | 149 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
146 | 150 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
147 | 151 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -185,7 +189,11 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
185 | 189 |
#' @importFrom methods new |
186 | 190 |
#' @examples |
187 | 191 |
#' data(GIST) |
188 |
-#' result <- GWCoGAPS(GIST.matrix, BPPARAM=BiocParallel::SerialParam(), nIterations=100) |
|
192 |
+#' params <- new("CogapsParams") |
|
193 |
+#' params <- setDistributedParams(params, nSets=2) |
|
194 |
+#' params <- setParam(params, "nIterations", 100) |
|
195 |
+#' params <- setParam(params, "nPatterns", 3) |
|
196 |
+#' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam()) |
|
189 | 197 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
190 | 198 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
191 | 199 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -147,7 +147,7 @@ messages=TRUE, outputFrequency=500, uncertainty=NULL, |
147 | 147 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
148 | 148 |
checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
149 | 149 |
BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
150 |
-whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
150 |
+whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
151 | 151 |
{ |
152 | 152 |
params@distributed <- "single-cell" |
153 | 153 |
params@singleCell <- TRUE |
... | ... |
@@ -169,6 +169,7 @@ whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
169 | 169 |
sampleNames=sampleNames, |
170 | 170 |
fixedPatterns=fixedPatterns, |
171 | 171 |
whichMatrixFixed=whichMatrixFixed, |
172 |
+ takePumpSamples=takePumpSamples, |
|
172 | 173 |
outputToFile=outputToFile, |
173 | 174 |
workerID=workerID, |
174 | 175 |
... |
... | ... |
@@ -190,7 +191,7 @@ messages=TRUE, outputFrequency=500, uncertainty=NULL, |
190 | 191 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
191 | 192 |
checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
192 | 193 |
BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
193 |
-whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
194 |
+whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
194 | 195 |
{ |
195 | 196 |
params@distributed <- "genome-wide" |
196 | 197 |
CoGAPS( |
... | ... |
@@ -211,6 +212,7 @@ whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
211 | 212 |
sampleNames=sampleNames, |
212 | 213 |
fixedPatterns=fixedPatterns, |
213 | 214 |
whichMatrixFixed=whichMatrixFixed, |
215 |
+ takePumpSamples=takePumpSamples, |
|
214 | 216 |
outputToFile=outputToFile, |
215 | 217 |
workerID=workerID, |
216 | 218 |
... |
... | ... |
@@ -48,6 +48,7 @@ buildReport <- function() |
48 | 48 |
#' skipped and fixedPatterns is used for all sets - allowing manual pattern |
49 | 49 |
#' matching, as well as fixed runs of standard CoGAPS |
50 | 50 |
#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
51 |
+#' @param takePumpSamples whether or not to take PUMP samples |
|
51 | 52 |
#' @param outputToFile name of a file to save the output to, will create 4 files |
52 | 53 |
#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
53 | 54 |
#' @param workerID if calling CoGAPS in parallel the worker ID can be specified, |
... | ... |
@@ -77,7 +78,7 @@ messages=TRUE, outputFrequency=500, uncertainty=NULL, |
77 | 78 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
78 | 79 |
checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
79 | 80 |
BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
80 |
-whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
81 |
+whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
81 | 82 |
{ |
82 | 83 |
# store all parameters in a list and parse parameters from ... |
83 | 84 |
validObject(params) |
... | ... |
@@ -94,6 +95,7 @@ whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
94 | 95 |
"BPPARAM"=BPPARAM, |
95 | 96 |
"fixedPatterns"=fixedPatterns, |
96 | 97 |
"whichMatrixFixed"=whichMatrixFixed, |
98 |
+ "takePumpSamples"=takePumpSamples, |
|
97 | 99 |
"outputToFile"=outputToFile, |
98 | 100 |
"workerID"=workerID |
99 | 101 |
) |
... | ... |
@@ -43,7 +43,10 @@ buildReport <- function() |
43 | 43 |
#' @param BPPARAM BiocParallel backend |
44 | 44 |
#' @param geneNames vector of names of genes in data |
45 | 45 |
#' @param sampleNames vector of names of samples in data |
46 |
-#' @param fixedPatterns fix either 'A' or 'P' matrix to these values |
|
46 |
+#' @param fixedPatterns fix either 'A' or 'P' matrix to these values, in the |
|
47 |
+#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
48 |
+#' skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
49 |
+#' matching, as well as fixed runs of standard CoGAPS |
|
47 | 50 |
#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
48 | 51 |
#' @param outputToFile name of a file to save the output to, will create 4 files |
49 | 52 |
#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
... | ... |
@@ -38,12 +38,19 @@ buildReport <- function() |
38 | 38 |
#' @param transposeData T/F for transposing data while reading it in - useful |
39 | 39 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
40 | 40 |
#' genes x samples |
41 |
+#' @param subsetIndices set of indices to use from the data |
|
42 |
+#' @param subsetDim which dimension (1=rows, 2=cols) to subset |
|
41 | 43 |
#' @param BPPARAM BiocParallel backend |
42 | 44 |
#' @param geneNames vector of names of genes in data |
43 | 45 |
#' @param sampleNames vector of names of samples in data |
44 |
-#' @param matchedPatterns manually matched patterns for distributed CoGAPS |
|
46 |
+#' @param fixedPatterns fix either 'A' or 'P' matrix to these values |
|
47 |
+#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
|
45 | 48 |
#' @param outputToFile name of a file to save the output to, will create 4 files |
46 | 49 |
#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
50 |
+#' @param workerID if calling CoGAPS in parallel the worker ID can be specified, |
|
51 |
+#' only worker 1 prints output and each worker outputs when it finishes, this |
|
52 |
+#' is not neccesary when using the default parallel methods (i.e. distributed |
|
53 |
+#' CoGAPS) but only when the user is manually calling CoGAPS in parallel |
|
47 | 54 |
#' @param ... allows for overwriting parameters in params |
48 | 55 |
#' @return CogapsResult object |
49 | 56 |
#' @examples |
... | ... |
@@ -65,9 +72,9 @@ buildReport <- function() |
65 | 72 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
66 | 73 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
67 | 74 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
68 |
-checkpointInFile=NULL, transposeData=FALSE, subsetData=NULL, BPPARAM=NULL, |
|
69 |
-geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, whichMatrixFixed='N', |
|
70 |
-outputToFile=NULL, ...) |
|
75 |
+checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
76 |
+BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
77 |
+whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
71 | 78 |
{ |
72 | 79 |
# store all parameters in a list and parse parameters from ... |
73 | 80 |
validObject(params) |
... | ... |
@@ -79,14 +86,24 @@ outputToFile=NULL, ...) |
79 | 86 |
"checkpointInterval"=checkpointInterval, |
80 | 87 |
"checkpointInFile"=checkpointInFile, |
81 | 88 |
"transposeData"=transposeData, |
82 |
- "subsetData"=subsetData, |
|
83 |
- "bpBackend"=BPPARAM, |
|
89 |
+ "subsetIndices"=subsetIndices, |
|
90 |
+ "subsetDim"=subsetDim, |
|
91 |
+ "BPPARAM"=BPPARAM, |
|
84 | 92 |
"fixedPatterns"=fixedPatterns, |
85 | 93 |
"whichMatrixFixed"=whichMatrixFixed, |
86 | 94 |
"outputToFile"=outputToFile, |
95 |
+ "workerID"=workerID |
|
87 | 96 |
) |
88 | 97 |
allParams <- parseExtraParams(allParams, list(...)) |
89 | 98 |
|
99 |
+ # convert data if needed |
|
100 |
+ if (is(data, "data.frame")) |
|
101 |
+ data <- data.matrix(data) |
|
102 |
+ else if (is(data, "SummarizedExperiment")) |
|
103 |
+ data <- SummarizedExperiment::assay(data, "counts") |
|
104 |
+ else if (is(data, "SingleCellExperiment")) |
|
105 |
+ data <- SummarizedExperiment::assay(data, "counts") |
|
106 |
+ |
|
90 | 107 |
# check that inputs are valid, then read the gene/sample names from the data |
91 | 108 |
checkInputs(data, uncertainty, allParams) |
92 | 109 |
allParams <- getNamesFromData(data, allParams, geneNames, sampleNames) |
... | ... |
@@ -94,10 +111,7 @@ outputToFile=NULL, ...) |
94 | 111 |
# check if we're running from a checkpoint |
95 | 112 |
if (!is.null(allParams$checkpointInFile)) |
96 | 113 |
{ |
97 |
- if (!is.null(allParams$gaps@distributed)) |
|
98 |
- stop("checkpoints not supported for distributed cogaps") |
|
99 |
- else |
|
100 |
- gapsCat(allParams, "Running CoGAPS from a checkpoint\n") |
|
114 |
+ gapsCat(allParams, "Running CoGAPS from a checkpoint\n") |
|
101 | 115 |
} |
102 | 116 |
|
103 | 117 |
# determine which function to call cogaps algorithm |
... | ... |
@@ -110,19 +124,7 @@ outputToFile=NULL, ...) |
110 | 124 |
# run cogaps |
111 | 125 |
startupMessage(data, allParams) |
112 | 126 |
gapsReturnList <- dispatchFunc(data, allParams, uncertainty) |
113 |
- |
|
114 |
- # convert list to CogapsResult object |
|
115 |
- return(new("CogapsResult", |
|
116 |
- Amean = gapsReturnList$Amean, |
|
117 |
- Asd = gapsReturnList$Asd, |
|
118 |
- Pmean = gapsReturnList$Pmean, |
|
119 |
- Psd = gapsReturnList$Psd, |
|
120 |
- meanChiSq = gapsReturnList$meanChiSq, |
|
121 |
- geneNames = gapsReturnList$geneNames, |
|
122 |
- sampleNames = gapsReturnList$sampleNames, |
|
123 |
- diagnostics = append(gapsReturnList$diagnostics, |
|
124 |
- list("params"=allParams$gaps, "version"=utils::packageVersion("CoGAPS"))) |
|
125 |
- )) |
|
127 |
+ return(createCogapsResult(gapsReturnList, allParams)) |
|
126 | 128 |
} |
127 | 129 |
|
128 | 130 |
#' Single Cell CoGAPS |
... | ... |
@@ -138,14 +140,34 @@ outputToFile=NULL, ...) |
138 | 140 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
139 | 141 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
140 | 142 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
141 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
142 |
-geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
143 |
+checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
144 |
+BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
145 |
+whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
143 | 146 |
{ |
144 | 147 |
params@distributed <- "single-cell" |
145 | 148 |
params@singleCell <- TRUE |
146 |
- CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
|
147 |
- checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
|
148 |
- BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
149 |
+ CoGAPS( |
|
150 |
+ data=data, |
|
151 |
+ params=params, |
|
152 |
+ nThreads=nThreads, |
|
153 |
+ messages=messages, |
|
154 |
+ outputFrequency=outputFrequency, |
|
155 |
+ uncertainty=uncertainty, |
|
156 |
+ checkpointOutFile=checkpointOutFile, |
|
157 |
+ checkpointInterval=checkpointInterval, |
|
158 |
+ checkpointInFile=checkpointInFile, |
|
159 |
+ transposeData=transposeData, |
|
160 |
+ subsetIndices=subsetIndices, |
|
161 |
+ subsetDim=subsetDim, |
|
162 |
+ BPPARAM=BPPARAM, |
|
163 |
+ geneNames=geneNames, |
|
164 |
+ sampleNames=sampleNames, |
|
165 |
+ fixedPatterns=fixedPatterns, |
|
166 |
+ whichMatrixFixed=whichMatrixFixed, |
|
167 |
+ outputToFile=outputToFile, |
|
168 |
+ workerID=workerID, |
|
169 |
+ ... |
|
170 |
+ ) |
|
149 | 171 |
} |
150 | 172 |
|
151 | 173 |
#' Genome Wide CoGAPS |
... | ... |
@@ -161,11 +183,31 @@ geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
161 | 183 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
162 | 184 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
163 | 185 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
164 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
165 |
-geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
186 |
+checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
187 |
+BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
188 |
+whichMatrixFixed='N', outputToFile=NULL, workerID=1, ...) |
|
166 | 189 |
{ |
167 | 190 |
params@distributed <- "genome-wide" |
168 |
- CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
|
169 |
- checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
|
170 |
- BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
191 |
+ CoGAPS( |
|
192 |
+ data=data, |
|
193 |
+ params=params, |
|
194 |
+ nThreads=nThreads, |
|
195 |
+ messages=messages, |
|
196 |
+ outputFrequency=outputFrequency, |
|
197 |
+ uncertainty=uncertainty, |
|
198 |
+ checkpointOutFile=checkpointOutFile, |
|
199 |
+ checkpointInterval=checkpointInterval, |
|
200 |
+ checkpointInFile=checkpointInFile, |
|
201 |
+ transposeData=transposeData, |
|
202 |
+ subsetIndices=subsetIndices, |
|
203 |
+ subsetDim=subsetDim, |
|
204 |
+ BPPARAM=BPPARAM, |
|
205 |
+ geneNames=geneNames, |
|
206 |
+ sampleNames=sampleNames, |
|
207 |
+ fixedPatterns=fixedPatterns, |
|
208 |
+ whichMatrixFixed=whichMatrixFixed, |
|
209 |
+ outputToFile=outputToFile, |
|
210 |
+ workerID=workerID, |
|
211 |
+ ... |
|
212 |
+ ) |
|
171 | 213 |
} |
172 | 214 |
\ No newline at end of file |
... | ... |
@@ -65,8 +65,8 @@ buildReport <- function() |
65 | 65 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
66 | 66 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
67 | 67 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
68 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
69 |
-geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, |
|
68 |
+checkpointInFile=NULL, transposeData=FALSE, subsetData=NULL, BPPARAM=NULL, |
|
69 |
+geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, whichMatrixFixed='N', |
|
70 | 70 |
outputToFile=NULL, ...) |
71 | 71 |
{ |
72 | 72 |
# store all parameters in a list and parse parameters from ... |
... | ... |
@@ -79,82 +79,33 @@ outputToFile=NULL, ...) |
79 | 79 |
"checkpointInterval"=checkpointInterval, |
80 | 80 |
"checkpointInFile"=checkpointInFile, |
81 | 81 |
"transposeData"=transposeData, |
82 |
+ "subsetData"=subsetData, |
|
82 | 83 |
"bpBackend"=BPPARAM, |
83 |
- "matchedPatterns"=matchedPatterns, |
|
84 |
+ "fixedPatterns"=fixedPatterns, |
|
85 |
+ "whichMatrixFixed"=whichMatrixFixed, |
|
84 | 86 |
"outputToFile"=outputToFile, |
85 |
- "whichMatrixFixed"=NULL # internal parameter |
|
86 | 87 |
) |
87 | 88 |
allParams <- parseExtraParams(allParams, list(...)) |
88 | 89 |
|
89 |
- # check file extension |
|
90 |
- if (is(data, "character") & !supported(data)) |
|
91 |
- stop("unsupported file extension for data") |
|
92 |
- |
|
93 |
- # enforce the use of explicit subsets with manual pattern matching |
|
94 |
- if (!is.null(allParams$matchedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
95 |
- stop("must provide explicit subsets when doing manual pattern matching") |
|
96 |
- |
|
97 |
- # check uncertainty matrix |
|
98 |
- if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
|
99 |
- stop("uncertainty must be same data type as data (file name)") |
|
100 |
- if (is(uncertainty, "character") & !supported(uncertainty)) |
|
101 |
- stop("unsupported file extension for uncertainty") |
|
102 |
- if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix")) |
|
103 |
- stop("uncertainty must be a matrix unless data is a file path") |
|
104 |
- if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
|
105 |
- stop("must use default uncertainty when enabling sparseOptimization") |
|
106 |
- |
|
107 |
- # check single cell parameter |
|
108 |
- if (!is.null(allParams$gaps@distributed)) |
|
109 |
- if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
|
110 |
- warning("running single-cell CoGAPS with singleCell=FALSE") |
|
111 |
- |
|
112 |
- if (!is.null(allParams$gaps@distributed) & allParams$nThreads > 1) |
|
113 |
- stop("can't run multi-threaded and distributed CoGAPS at the same time") |
|
114 |
- |
|
115 |
- # convert data to matrix |
|
116 |
- if (is(data, "matrix")) |
|
117 |
- data <- data |
|
118 |
- if (is(data, "data.frame")) |
|
119 |
- data <- data.matrix(data) |
|
120 |
- else if (is(data, "SummarizedExperiment")) |
|
121 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
122 |
- else if (is(data, "SingleCellExperiment")) |
|
123 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
124 |
- if (!is(data, "character")) |
|
125 |
- checkDataMatrix(data, uncertainty, allParams$gaps) |
|
90 |
+ # check that inputs are valid, then read the gene/sample names from the data |
|
91 |
+ checkInputs(data, uncertainty, allParams) |
|
92 |
+ allParams <- getNamesFromData(data, allParams, geneNames, sampleNames) |
|
126 | 93 |
|
127 |
- # determine which function to call cogaps algorithm |
|
128 |
- if (!is.null(allParams$gaps@distributed)) |
|
129 |
- dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
|
130 |
- else if (is(data, "character")) |
|
131 |
- dispatchFunc <- cogaps_cpp_from_file # data is a file path |
|
132 |
- else |
|
133 |
- dispatchFunc <- cogaps_cpp # default |
|
134 |
- |
|
135 | 94 |
# check if we're running from a checkpoint |
136 | 95 |
if (!is.null(allParams$checkpointInFile)) |
137 | 96 |
{ |
138 | 97 |
if (!is.null(allParams$gaps@distributed)) |
139 | 98 |
stop("checkpoints not supported for distributed cogaps") |
140 | 99 |
else |
141 |
- cat("Running CoGAPS from a checkpoint\n") |
|
100 |
+ gapsCat(allParams, "Running CoGAPS from a checkpoint\n") |
|
142 | 101 |
} |
143 | 102 |
|
144 |
- # get gene/sample names |
|
145 |
- if (is.null(geneNames)) geneNames <- getGeneNames(data, allParams$transposeData) |
|
146 |
- if (is.null(sampleNames)) sampleNames <- getSampleNames(data, allParams$transposeData) |
|
147 |
- |
|
148 |
- nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
149 |
- nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
150 |
- |
|
151 |
- if (length(geneNames) != nGenes) |
|
152 |
- stop("incorrect number of gene names given") |
|
153 |
- if (length(sampleNames) != nSamples) |
|
154 |
- stop("incorrect number of sample names given") |
|
155 |
- |
|
156 |
- allParams$geneNames <- geneNames |
|
157 |
- allParams$sampleNames <- sampleNames |
|
103 |
+ # determine which function to call cogaps algorithm |
|
104 |
+ dispatchFunc <- cogaps_cpp # default |
|
105 |
+ if (!is.null(allParams$gaps@distributed)) |
|
106 |
+ dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
|
107 |
+ else if (is(data, "character")) |
|
108 |
+ dispatchFunc <- cogaps_cpp_from_file # data is a file path |
|
158 | 109 |
|
159 | 110 |
# run cogaps |
160 | 111 |
startupMessage(data, allParams) |
... | ... |
@@ -101,8 +101,6 @@ outputToFile=NULL, ...) |
101 | 101 |
stop("unsupported file extension for uncertainty") |
102 | 102 |
if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix")) |
103 | 103 |
stop("uncertainty must be a matrix unless data is a file path") |
104 |
- if (!is(data, "character")) |
|
105 |
- checkDataMatrix(data, uncertainty, allParams$gaps) |
|
106 | 104 |
if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
107 | 105 |
stop("must use default uncertainty when enabling sparseOptimization") |
108 | 106 |
|
... | ... |
@@ -123,6 +121,8 @@ outputToFile=NULL, ...) |
123 | 121 |
data <- SummarizedExperiment::assay(data, "counts") |
124 | 122 |
else if (is(data, "SingleCellExperiment")) |
125 | 123 |
data <- SummarizedExperiment::assay(data, "counts") |
124 |
+ if (!is(data, "character")) |
|
125 |
+ checkDataMatrix(data, uncertainty, allParams$gaps) |
|
126 | 126 |
|
127 | 127 |
# determine which function to call cogaps algorithm |
128 | 128 |
if (!is.null(allParams$gaps@distributed)) |
... | ... |
@@ -49,16 +49,16 @@ buildReport <- function() |
49 | 49 |
#' @examples |
50 | 50 |
#' # Running from R object |
51 | 51 |
#' data(GIST) |
52 |
-#' resultA <- CoGAPS(GIST.data_frame, nIterations=250) |
|
52 |
+#' resultA <- CoGAPS(GIST.data_frame, nIterations=100) |
|
53 | 53 |
#' |
54 | 54 |
#' # Running from file name |
55 | 55 |
#' gist_path <- system.file("extdata/GIST.mtx", package="CoGAPS") |
56 |
-#' resultB <- CoGAPS(gist_path, nIterations=250) |
|
56 |
+#' resultB <- CoGAPS(gist_path, nIterations=100) |
|
57 | 57 |
#' |
58 | 58 |
#' # Setting Parameters |
59 | 59 |
#' params <- new("CogapsParams") |
60 | 60 |
#' params <- setParam(params, "nPatterns", 5) |
61 |
-#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=250) |
|
61 |
+#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=100) |
|
62 | 62 |
#' @importFrom methods new is |
63 | 63 |
#' @importFrom SummarizedExperiment assay |
64 | 64 |
#' @importFrom utils packageVersion |
... | ... |
@@ -182,8 +182,8 @@ outputToFile=NULL, ...) |
182 | 182 |
#' @return CogapsResult object |
183 | 183 |
#' @importFrom methods new |
184 | 184 |
#' @examples |
185 |
-#' data(SimpSim) |
|
186 |
-#' result <- scCoGAPS(t(SimpSim.data), BPPARAM=BiocParallel::SerialParam(), nIterations=250) |
|
185 |
+#' data(GIST) |
|
186 |
+#' result <- scCoGAPS(t(GIST.matrix), BPPARAM=BiocParallel::SerialParam(), nIterations=100) |
|
187 | 187 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
188 | 188 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
189 | 189 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -205,8 +205,8 @@ geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
205 | 205 |
#' @return CogapsResult object |
206 | 206 |
#' @importFrom methods new |
207 | 207 |
#' @examples |
208 |
-#' data(SimpSim) |
|
209 |
-#' result <- GWCoGAPS(SimpSim.data, BPPARAM=BiocParallel::SerialParam(), nIterations=250) |
|
208 |
+#' data(GIST) |
|
209 |
+#' result <- GWCoGAPS(GIST.matrix, BPPARAM=BiocParallel::SerialParam(), nIterations=100) |
|
210 | 210 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
211 | 211 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
212 | 212 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -49,16 +49,16 @@ buildReport <- function() |
49 | 49 |
#' @examples |
50 | 50 |
#' # Running from R object |
51 | 51 |
#' data(GIST) |
52 |
-#' resultA <- CoGAPS(GIST.data_frame) |
|
52 |
+#' resultA <- CoGAPS(GIST.data_frame, nIterations=250) |
|
53 | 53 |
#' |
54 | 54 |
#' # Running from file name |
55 | 55 |
#' gist_path <- system.file("extdata/GIST.mtx", package="CoGAPS") |
56 |
-#' resultB <- CoGAPS(gist_path) |
|
56 |
+#' resultB <- CoGAPS(gist_path, nIterations=250) |
|
57 | 57 |
#' |
58 | 58 |
#' # Setting Parameters |
59 | 59 |
#' params <- new("CogapsParams") |
60 | 60 |
#' params <- setParam(params, "nPatterns", 5) |
61 |
-#' resultC <- CoGAPS(GIST.data_frame, params) |
|
61 |
+#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=250) |
|
62 | 62 |
#' @importFrom methods new is |
63 | 63 |
#' @importFrom SummarizedExperiment assay |
64 | 64 |
#' @importFrom utils packageVersion |
... | ... |
@@ -183,7 +183,7 @@ outputToFile=NULL, ...) |
183 | 183 |
#' @importFrom methods new |
184 | 184 |
#' @examples |
185 | 185 |
#' data(SimpSim) |
186 |
-#' result <- scCoGAPS(t(SimpSim.data), BPPARAM=BiocParallel::SerialParam()) |
|
186 |
+#' result <- scCoGAPS(t(SimpSim.data), BPPARAM=BiocParallel::SerialParam(), nIterations=250) |
|
187 | 187 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
188 | 188 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
189 | 189 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -206,7 +206,7 @@ geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
206 | 206 |
#' @importFrom methods new |
207 | 207 |
#' @examples |
208 | 208 |
#' data(SimpSim) |
209 |
-#' result <- GWCoGAPS(SimpSim.data, BPPARAM=BiocParallel::SerialParam()) |
|
209 |
+#' result <- GWCoGAPS(SimpSim.data, BPPARAM=BiocParallel::SerialParam(), nIterations=250) |
|
210 | 210 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
211 | 211 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
212 | 212 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -181,6 +181,9 @@ outputToFile=NULL, ...) |
181 | 181 |
#' @inheritParams CoGAPS |
182 | 182 |
#' @return CogapsResult object |
183 | 183 |
#' @importFrom methods new |
184 |
+#' @examples |
|
185 |
+#' data(SimpSim) |
|
186 |
+#' result <- scCoGAPS(t(SimpSim.data), BPPARAM=BiocParallel::SerialParam()) |
|
184 | 187 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
185 | 188 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
186 | 189 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -201,6 +204,9 @@ geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
201 | 204 |
#' @inheritParams CoGAPS |
202 | 205 |
#' @return CogapsResult object |
203 | 206 |
#' @importFrom methods new |
207 |
+#' @examples |
|
208 |
+#' data(SimpSim) |
|
209 |
+#' result <- GWCoGAPS(SimpSim.data, BPPARAM=BiocParallel::SerialParam()) |
|
204 | 210 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
205 | 211 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
206 | 212 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
... | ... |
@@ -103,8 +103,8 @@ outputToFile=NULL, ...) |
103 | 103 |
stop("uncertainty must be a matrix unless data is a file path") |
104 | 104 |
if (!is(data, "character")) |
105 | 105 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
106 |
- if (!is.null(uncertainty) & allParams$gaps@useSparseOptimization) |
|
107 |
- stop("must use default uncertainty when enabling useSparseOptimization") |
|
106 |
+ if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
|
107 |
+ stop("must use default uncertainty when enabling sparseOptimization") |
|
108 | 108 |
|
109 | 109 |
# check single cell parameter |
110 | 110 |
if (!is.null(allParams$gaps@distributed)) |
... | ... |
@@ -103,6 +103,8 @@ outputToFile=NULL, ...) |
103 | 103 |
stop("uncertainty must be a matrix unless data is a file path") |
104 | 104 |
if (!is(data, "character")) |
105 | 105 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
106 |
+ if (!is.null(uncertainty) & allParams$gaps@useSparseOptimization) |
|
107 |
+ stop("must use default uncertainty when enabling useSparseOptimization") |
|
106 | 108 |
|
107 | 109 |
# check single cell parameter |
108 | 110 |
if (!is.null(allParams$gaps@distributed)) |
... | ... |
@@ -136,7 +136,7 @@ outputToFile=NULL, ...) |
136 | 136 |
if (!is.null(allParams$gaps@distributed)) |
137 | 137 |
stop("checkpoints not supported for distributed cogaps") |
138 | 138 |
else |
139 |
- cat("Running CoGAPS from a checkpoint") |
|
139 |
+ cat("Running CoGAPS from a checkpoint\n") |
|
140 | 140 |
} |
141 | 141 |
|
142 | 142 |
# get gene/sample names |
... | ... |
@@ -86,13 +86,14 @@ outputToFile=NULL, ...) |
86 | 86 |
) |
87 | 87 |
allParams <- parseExtraParams(allParams, list(...)) |
88 | 88 |
|
89 |
- # display start up message for the user |
|
90 |
- startupMessage(data, allParams) |
|
91 |
- |
|
92 | 89 |
# check file extension |
93 | 90 |
if (is(data, "character") & !supported(data)) |
94 | 91 |
stop("unsupported file extension for data") |
95 | 92 |
|
93 |
+ # enforce the use of explicit subsets with manual pattern matching |
|
94 |
+ if (!is.null(allParams$matchedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
95 |
+ stop("must provide explicit subsets when doing manual pattern matching") |
|
96 |
+ |
|
96 | 97 |
# check uncertainty matrix |
97 | 98 |
if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
98 | 99 |
stop("uncertainty must be same data type as data (file name)") |
... | ... |
@@ -154,6 +155,7 @@ outputToFile=NULL, ...) |
154 | 155 |
allParams$sampleNames <- sampleNames |
155 | 156 |
|
156 | 157 |
# run cogaps |
158 |
+ startupMessage(data, allParams) |
|
157 | 159 |
gapsReturnList <- dispatchFunc(data, allParams, uncertainty) |
158 | 160 |
|
159 | 161 |
# convert list to CogapsResult object |
... | ... |
@@ -1,51 +1,17 @@ |
1 | 1 |
#' @include class-CogapsParams.R |
2 | 2 |
NULL |
3 | 3 |
|
4 |
-#' Checks if file is supported |
|
5 |
-#' @param file path to file |
|
6 |
-#' @return TRUE if file is supported, FALSE if not |
|
7 |
-#' @importFrom tools file_ext |
|
8 |
-supported <- function(file) |
|
9 |
-{ |
|
10 |
- if (!is(file, "character")) |
|
11 |
- return(FALSE) |
|
12 |
- return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
13 |
-} |
|
14 |
- |
|
15 |
-#' get number of rows from supported file name or matrix |
|
16 |
-#' @param data either a file name or a matrix |
|
17 |
-#' @return number of rows |
|
18 |
-#' @importFrom data.table fread |
|
19 |
-#' @importFrom tools file_ext |
|
20 |
-nrow_helper <- function(data) |
|
21 |
-{ |
|
22 |
- if (is(data, "character")) |
|
23 |
- { |
|
24 |
- return(switch(tools::file_ext(data), |
|
25 |
- "csv" = nrow(data.table::fread(data, select=1)), |
|
26 |
- "tsv" = nrow(data.table::fread(data, select=1)), |
|
27 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
28 |
- )) |
|
29 |
- } |
|
30 |
- return(nrow(data)) |
|
31 |
-} |
|
32 |
- |
|
33 |
-#' get number of columns from supported file name or matrix |
|
34 |
-#' @param data either a file name or a matrix |
|
35 |
-#' @return number of columns |
|
36 |
-#' @importFrom data.table fread |
|
37 |
-#' @importFrom tools file_ext |
|
38 |
-ncol_helper <- function(data) |
|
4 |
+#' Information About Package Compilation |
|
5 |
+#' @export |
|
6 |
+#' |
|
7 |
+#' @details returns information about how the package was compiled, i.e. which |
|
8 |
+#' compiler/version was used, which compile time options were enabled, etc... |
|
9 |
+#' @return string containing build report |
|
10 |
+#' @examples |
|
11 |
+#' CoGAPS::buildReport() |
|
12 |
+buildReport <- function() |
|
39 | 13 |
{ |
40 |
- if (is(data, "character")) |
|
41 |
- { |
|
42 |
- return(switch(tools::file_ext(data), |
|
43 |
- "csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
44 |
- "tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
45 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
46 |
- )) |
|
47 |
- } |
|
48 |
- return(ncol(data)) |
|
14 |
+ getBuildReport_cpp() |
|
49 | 15 |
} |
50 | 16 |
|
51 | 17 |
#' CoGAPS Matrix Factorization Algorithm |
... | ... |
@@ -73,6 +39,11 @@ ncol_helper <- function(data) |
73 | 39 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
74 | 40 |
#' genes x samples |
75 | 41 |
#' @param BPPARAM BiocParallel backend |
42 |
+#' @param geneNames vector of names of genes in data |
|
43 |
+#' @param sampleNames vector of names of samples in data |
|
44 |
+#' @param matchedPatterns manually matched patterns for distributed CoGAPS |
|
45 |
+#' @param outputToFile name of a file to save the output to, will create 4 files |
|
46 |
+#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
|
76 | 47 |
#' @param ... allows for overwriting parameters in params |
77 | 48 |
#' @return CogapsResult object |
78 | 49 |
#' @examples |
... | ... |
@@ -94,9 +65,12 @@ ncol_helper <- function(data) |
94 | 65 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
95 | 66 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
96 | 67 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
97 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
68 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
69 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, |
|
70 |
+outputToFile=NULL, ...) |
|
98 | 71 |
{ |
99 | 72 |
# store all parameters in a list and parse parameters from ... |
73 |
+ validObject(params) |
|
100 | 74 |
allParams <- list("gaps"=params, |
101 | 75 |
"nThreads"=nThreads, |
102 | 76 |
"messages"=messages, |
... | ... |
@@ -106,12 +80,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
106 | 80 |
"checkpointInFile"=checkpointInFile, |
107 | 81 |
"transposeData"=transposeData, |
108 | 82 |
"bpBackend"=BPPARAM, |
83 |
+ "matchedPatterns"=matchedPatterns, |
|
84 |
+ "outputToFile"=outputToFile, |
|
109 | 85 |
"whichMatrixFixed"=NULL # internal parameter |
110 | 86 |
) |
111 | 87 |
allParams <- parseExtraParams(allParams, list(...)) |
112 | 88 |
|
113 | 89 |
# display start up message for the user |
114 |
- startupMessage(data, allParams$transposeData, allParams$gaps@distributed) |
|
90 |
+ startupMessage(data, allParams) |
|
115 | 91 |
|
116 | 92 |
# check file extension |
117 | 93 |
if (is(data, "character") & !supported(data)) |
... | ... |
@@ -125,7 +101,7 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
125 | 101 |
if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix")) |
126 | 102 |
stop("uncertainty must be a matrix unless data is a file path") |
127 | 103 |
if (!is(data, "character")) |
128 |
- checkDataMatrix(data, uncertainty, params) |
|
104 |
+ checkDataMatrix(data, uncertainty, allParams$gaps) |
|
129 | 105 |
|
130 | 106 |
# check single cell parameter |
131 | 107 |
if (!is.null(allParams$gaps@distributed)) |
... | ... |
@@ -136,16 +112,15 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
136 | 112 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
137 | 113 |
|
138 | 114 |
# convert data to matrix |
115 |
+ if (is(data, "matrix")) |
|
116 |
+ data <- data |
|
139 | 117 |
if (is(data, "data.frame")) |
140 | 118 |
data <- data.matrix(data) |
141 | 119 |
else if (is(data, "SummarizedExperiment")) |
142 | 120 |
data <- SummarizedExperiment::assay(data, "counts") |
143 | 121 |
else if (is(data, "SingleCellExperiment")) |
144 | 122 |
data <- SummarizedExperiment::assay(data, "counts") |
145 |
- |
|
146 |
- # label matrix |
|
147 |
- |
|
148 |
- |
|
123 |
+ |
|
149 | 124 |
# determine which function to call cogaps algorithm |
150 | 125 |
if (!is.null(allParams$gaps@distributed)) |
151 | 126 |
dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
... | ... |
@@ -160,9 +135,24 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
160 | 135 |
if (!is.null(allParams$gaps@distributed)) |
161 | 136 |
stop("checkpoints not supported for distributed cogaps") |
162 | 137 |
else |
163 |
- message("Running CoGAPS from a checkpoint") |
|
138 |
+ cat("Running CoGAPS from a checkpoint") |
|
164 | 139 |
} |
165 | 140 |
|
141 |
+ # get gene/sample names |
|
142 |
+ if (is.null(geneNames)) geneNames <- getGeneNames(data, allParams$transposeData) |
|
143 |
+ if (is.null(sampleNames)) sampleNames <- getSampleNames(data, allParams$transposeData) |
|
144 |
+ |
|
145 |
+ nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
146 |
+ nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
147 |
+ |
|
148 |
+ if (length(geneNames) != nGenes) |
|
149 |
+ stop("incorrect number of gene names given") |
|
150 |
+ if (length(sampleNames) != nSamples) |
|
151 |
+ stop("incorrect number of sample names given") |
|
152 |
+ |
|
153 |
+ allParams$geneNames <- geneNames |
|
154 |
+ allParams$sampleNames <- sampleNames |
|
155 |
+ |
|
166 | 156 |
# run cogaps |
167 | 157 |
gapsReturnList <- dispatchFunc(data, allParams, uncertainty) |
168 | 158 |
|
... | ... |
@@ -172,8 +162,9 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
172 | 162 |
Asd = gapsReturnList$Asd, |
173 | 163 |
Pmean = gapsReturnList$Pmean, |
174 | 164 |
Psd = gapsReturnList$Psd, |
175 |
- seed = gapsReturnList$seed, |
|
176 | 165 |
meanChiSq = gapsReturnList$meanChiSq, |
166 |
+ geneNames = gapsReturnList$geneNames, |
|
167 |
+ sampleNames = gapsReturnList$sampleNames, |
|
177 | 168 |
diagnostics = append(gapsReturnList$diagnostics, |
178 | 169 |
list("params"=allParams$gaps, "version"=utils::packageVersion("CoGAPS"))) |
179 | 170 |
)) |
... | ... |
@@ -189,13 +180,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
189 | 180 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
190 | 181 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
191 | 182 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
192 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
183 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
184 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
193 | 185 |
{ |
194 | 186 |
params@distributed <- "single-cell" |
195 | 187 |
params@singleCell <- TRUE |
196 | 188 |
CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
197 | 189 |
checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
198 |
- BPPARAM, ...) |
|
190 |
+ BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
199 | 191 |
} |
200 | 192 |
|
201 | 193 |
#' Genome Wide CoGAPS |
... | ... |
@@ -208,83 +200,11 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
208 | 200 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
209 | 201 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
210 | 202 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
211 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
203 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
204 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
212 | 205 |
{ |
213 | 206 |
params@distributed <- "genome-wide" |
214 | 207 |
CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
215 | 208 |
checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
216 |
- BPPARAM, ...) |
|
217 |
-} |
|
218 |
- |
|
219 |
-#' write start up message |
|
220 |
-#' |
|
221 |
-#' @param data data set |
|
222 |
-#' @param transpose if we are transposing the data set |
|
223 |
-#' @param distributed if we are running distributed CoGAPS |
|
224 |
-#' @return message displayed to screen |
|
225 |
-startupMessage <- function(data, transpose, distributed) |
|
226 |
-{ |
|
227 |
- nGenes <- ifelse(transpose, ncol_helper(data), nrow_helper(data)) |
|
228 |
- nSamples <- ifelse(transpose, nrow_helper(data), ncol_helper(data)) |
|
229 |
- |
|
230 |
- dist_message <- "Standard" |
|
231 |
- if (!is.null(distributed)) |
|
232 |
- dist_message <- distributed |
|
233 |
- message(paste("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
|
234 |
- nSamples, "samples")) |
|
235 |
-} |
|
236 |
- |
|
237 |
-#' parse parameters passed through the ... variable |
|
238 |
-#' |
|
239 |
-#' @param allParams list of all parameters |
|
240 |
-#' @param extraParams list of parameters in ... |
|
241 |
-#' @return allParams with any valid parameters in extraParams added |
|
242 |
-#' @note will halt with an error if any parameters in extraParams are invalid |
|
243 |
-#' @importFrom methods slotNames |
|
244 |
-parseExtraParams <- function(allParams, extraParams) |
|
245 |
-{ |
|
246 |
- # parse direct params |
|
247 |
- for (s in slotNames(allParams$gaps)) |
|
248 |
- { |
|
249 |
- if (!is.null(extraParams[[s]])) |
|
250 |
- { |
|
251 |
- allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]]) |
|
252 |
- extraParams[[s]] <- NULL |
|
253 |
- } |
|
254 |
- } |
|
255 |
- |
|
256 |
- # check for unrecognized options |
|
257 |
- if (length(extraParams) > 0) |
|
258 |
- stop(paste("unrecognized argument:", names(extraParams)[1])) |
|
259 |
- |
|
260 |
- return(allParams) |
|
261 |
-} |
|
262 |
- |
|
263 |
-#' check that provided data is valid |
|
264 |
-#' |
|
265 |
-#' @param data data matrix |
|
266 |
-#' @param uncertainty uncertainty matrix, can be null |
|
267 |
-#' @param params CogapsParams object |
|
268 |
-#' @return throws an error if data has problems |
|
269 |
-checkDataMatrix <- function(data, uncertainty, params) |
|
270 |
-{ |
|
271 |
- if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0) |
|
272 |
- stop("negative values in data and/or uncertainty matrix") |
|
273 |
- if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns) |
|
274 |
- stop("nPatterns must be less than dimensions of data") |
|
275 |
- if (sum(uncertainty < 1e-5) > 0) |
|
276 |
- warning("small values in uncertainty matrix detected") |
|
277 |
-} |
|
278 |
- |
|
279 |
-#' Information About Package Compilation |
|
280 |
-#' @export |
|
281 |
-#' |
|
282 |
-#' @details returns information about how the package was compiled, i.e. which |
|
283 |
-#' compiler/version was used, which compile time options were enabled, etc... |
|
284 |
-#' @return string containing build report |
|
285 |
-#' @examples |
|
286 |
-#' CoGAPS::buildReport() |
|
287 |
-buildReport <- function() |
|
288 |
-{ |
|
289 |
- getBuildReport_cpp() |
|
290 |
-} |
|
209 |
+ BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
210 |
+} |
|
291 | 211 |
\ No newline at end of file |
... | ... |
@@ -73,8 +73,6 @@ ncol_helper <- function(data) |
73 | 73 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
74 | 74 |
#' genes x samples |
75 | 75 |
#' @param BPPARAM BiocParallel backend |
76 |
-#' @param saveUnmatchedPatterns when running distributed cogaps, save the |
|
77 |
-#' intermediate result from each subset of the data |
|
78 | 76 |
#' @param ... allows for overwriting parameters in params |
79 | 77 |
#' @return CogapsResult object |
80 | 78 |
#' @examples |
... | ... |
@@ -96,8 +94,7 @@ ncol_helper <- function(data) |
96 | 94 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
97 | 95 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |