... | ... |
@@ -2,6 +2,7 @@ |
2 | 2 |
|
3 | 3 |
S3method(plot,CogapsResult) |
4 | 4 |
export(CoGAPS) |
5 |
+export(CogapsParams) |
|
5 | 6 |
export(GWCoGAPS) |
6 | 7 |
export(binaryA) |
7 | 8 |
export(buildReport) |
... | ... |
@@ -11,10 +12,12 @@ export(checkpointsEnabled) |
11 | 12 |
export(findConsensusMatrix) |
12 | 13 |
export(getClusteredPatterns) |
13 | 14 |
export(getCorrelationToMeanPattern) |
15 |
+export(getFeatureLoadings) |
|
14 | 16 |
export(getMeanChiSq) |
15 | 17 |
export(getOriginalParameters) |
16 | 18 |
export(getParam) |
17 | 19 |
export(getRetinaSubset) |
20 |
+export(getSampleFactors) |
|
18 | 21 |
export(getSubsets) |
19 | 22 |
export(getUnmatchedPatterns) |
20 | 23 |
export(getVersion) |
... | ... |
@@ -25,6 +28,7 @@ export(reconstructGene) |
25 | 28 |
export(scCoGAPS) |
26 | 29 |
export(setAnnotationWeights) |
27 | 30 |
export(setDistributedParams) |
31 |
+export(setFixedPatterns) |
|
28 | 32 |
export(setParam) |
29 | 33 |
exportClasses(CogapsParams) |
30 | 34 |
exportClasses(CogapsResult) |
... | ... |
@@ -49,19 +49,7 @@ checkpointsEnabled <- function() |
49 | 49 |
#' @param transposeData T/F for transposing data while reading it in - useful |
50 | 50 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
51 | 51 |
#' genes x samples |
52 |
-#' @param subsetIndices set of indices to use from the data |
|
53 |
-#' @param subsetDim which dimension (1=rows, 2=cols) to subset |
|
54 | 52 |
#' @param BPPARAM BiocParallel backend |
55 |
-#' @param geneNames vector of names of genes in data |
|
56 |
-#' @param sampleNames vector of names of samples in data |
|
57 |
-#' @param fixedPatterns fix either 'A' or 'P' matrix to these values, in the |
|
58 |
-#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
59 |
-#' skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
60 |
-#' matching, as well as fixed runs of standard CoGAPS |
|
61 |
-#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
|
62 |
-#' @param takePumpSamples whether or not to take PUMP samples |
|
63 |
-#' @param outputToFile name of a file to save the output to, will create 4 files |
|
64 |
-#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
|
65 | 53 |
#' @param workerID if calling CoGAPS in parallel the worker ID can be specified, |
66 | 54 |
#' only worker 1 prints output and each worker outputs when it finishes, this |
67 | 55 |
#' is not neccesary when using the default parallel methods (i.e. distributed |
... | ... |
@@ -82,17 +70,18 @@ checkpointsEnabled <- function() |
82 | 70 |
#' params <- setParam(params, "nPatterns", 3) |
83 | 71 |
#' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25) |
84 | 72 |
#' @importFrom methods new is |
85 |
-#' @importFrom SummarizedExperiment assay |
|
86 |
-#' @importFrom utils packageVersion |
|
87 |
-CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
88 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
89 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
90 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
91 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
92 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
73 |
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
74 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
75 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
76 |
+BPPARAM=NULL, workerID=1, ...) |
|
93 | 77 |
{ |
94 |
- # store all parameters in a list and parse parameters from ... |
|
78 |
+ # pre-process inputs |
|
79 |
+ data <- getValueOrRds(data) |
|
80 |
+ data <- convertDataToMatrix(data) |
|
81 |
+ params <- getValueOrRds(params) |
|
95 | 82 |
validObject(params) |
83 |
+ |
|
84 |
+ # store all parameters in a list and parse parameters from ... |
|
96 | 85 |
allParams <- list("gaps"=params, |
97 | 86 |
"nThreads"=nThreads, |
98 | 87 |
"messages"=messages, |
... | ... |
@@ -100,48 +89,24 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
100 | 89 |
"checkpointOutFile"=checkpointOutFile, |
101 | 90 |
"checkpointInterval"=checkpointInterval, |
102 | 91 |
"checkpointInFile"=checkpointInFile, |
92 |
+ "geneNames"=NULL, # the gene/sample names in the params object are kept |
|
93 |
+ "sampleNames"=NULL, # as a reference, these are the values actually used |
|
103 | 94 |
"transposeData"=transposeData, |
104 |
- "subsetIndices"=subsetIndices, |
|
105 |
- "subsetDim"=subsetDim, |
|
106 | 95 |
"BPPARAM"=BPPARAM, |
107 |
- "fixedPatterns"=fixedPatterns, |
|
108 |
- "whichMatrixFixed"=whichMatrixFixed, |
|
109 |
- "takePumpSamples"=takePumpSamples, |
|
110 |
- "outputToFile"=outputToFile, |
|
96 |
+ "outputToFile"=NULL, |
|
111 | 97 |
"workerID"=workerID |
112 | 98 |
) |
113 | 99 |
allParams <- parseExtraParams(allParams, list(...)) |
114 |
- |
|
115 |
- # if rds was passed, we first read it in before any processing |
|
116 |
- if (is(data, "character")) |
|
117 |
- { |
|
118 |
- if (tools::file_ext(data) == "rds") |
|
119 |
- { |
|
120 |
- gapsCat(allParams, "reading RDS file...") |
|
121 |
- data <- readRDS(data) |
|
122 |
- gapsCat(allParams, "done\n") |
|
123 |
- } |
|
124 |
- } |
|
125 |
- |
|
126 |
- # convert data if needed |
|
127 |
- if (is(data, "data.frame")) |
|
128 |
- data <- data.matrix(data) |
|
129 |
- else if (is(data, "SummarizedExperiment")) |
|
130 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
131 |
- else if (is(data, "SingleCellExperiment")) |
|
132 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
133 |
- |
|
134 |
- # check that inputs are valid, then read the gene/sample names from the data |
|
100 |
+ allParams <- getDimNames(data, allParams) |
|
135 | 101 |
checkInputs(data, uncertainty, allParams) |
136 |
- allParams <- getNamesFromData(data, allParams, geneNames, sampleNames) |
|
137 |
- |
|
102 |
+ |
|
138 | 103 |
# check if we're running from a checkpoint |
139 | 104 |
if (!is.null(allParams$checkpointInFile)) |
140 | 105 |
{ |
141 | 106 |
gapsCat(allParams, "Running CoGAPS from a checkpoint\n") |
142 | 107 |
} |
143 | 108 |
|
144 |
- # determine which function to call cogaps algorithm |
|
109 |
+ # determine function to call cogaps algorithm |
|
145 | 110 |
dispatchFunc <- cogaps_cpp # default |
146 | 111 |
if (!is.null(allParams$gaps@distributed)) |
147 | 112 |
dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
... | ... |
@@ -170,12 +135,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
170 | 135 |
#' params <- setParam(params, "nPatterns", 3) |
171 | 136 |
#' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam()) |
172 | 137 |
#' } |
173 |
-scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
174 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
175 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
176 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
177 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
178 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
138 |
+scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
139 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
140 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
141 |
+BPPARAM=NULL, workerID=1, ...) |
|
179 | 142 |
{ |
180 | 143 |
params@distributed <- "single-cell" |
181 | 144 |
params@singleCell <- TRUE |
... | ... |
@@ -190,15 +153,7 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
190 | 153 |
checkpointInterval=checkpointInterval, |
191 | 154 |
checkpointInFile=checkpointInFile, |
192 | 155 |
transposeData=transposeData, |
193 |
- subsetIndices=subsetIndices, |
|
194 |
- subsetDim=subsetDim, |
|
195 | 156 |
BPPARAM=BPPARAM, |
196 |
- geneNames=geneNames, |
|
197 |
- sampleNames=sampleNames, |
|
198 |
- fixedPatterns=fixedPatterns, |
|
199 |
- whichMatrixFixed=whichMatrixFixed, |
|
200 |
- takePumpSamples=takePumpSamples, |
|
201 |
- outputToFile=outputToFile, |
|
202 | 157 |
workerID=workerID, |
203 | 158 |
... |
204 | 159 |
) |
... | ... |
@@ -220,12 +175,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
220 | 175 |
#' params <- setParam(params, "nPatterns", 3) |
221 | 176 |
#' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam()) |
222 | 177 |
#' } |
223 |
-GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
|
224 |
-messages=TRUE, outputFrequency=500, uncertainty=NULL, |
|
225 |
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
|
226 |
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0, |
|
227 |
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL, |
|
228 |
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
|
178 |
+GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE, |
|
179 |
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out", |
|
180 |
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE, |
|
181 |
+BPPARAM=NULL, workerID=1, ...) |
|
229 | 182 |
{ |
230 | 183 |
params@distributed <- "genome-wide" |
231 | 184 |
CoGAPS( |
... | ... |
@@ -239,16 +192,8 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...) |
239 | 192 |
checkpointInterval=checkpointInterval, |
240 | 193 |
checkpointInFile=checkpointInFile, |
241 | 194 |
transposeData=transposeData, |
242 |
- subsetIndices=subsetIndices, |
|
243 |
- subsetDim=subsetDim, |
|
244 | 195 |
BPPARAM=BPPARAM, |
245 |
- geneNames=geneNames, |
|
246 |
- sampleNames=sampleNames, |
|
247 |
- fixedPatterns=fixedPatterns, |
|
248 |
- whichMatrixFixed=whichMatrixFixed, |
|
249 |
- takePumpSamples=takePumpSamples, |
|
250 |
- outputToFile=outputToFile, |
|
251 | 196 |
workerID=workerID, |
252 | 197 |
... |
253 | 198 |
) |
254 |
-} |
|
255 | 199 |
\ No newline at end of file |
200 |
+} |
... | ... |
@@ -26,8 +26,8 @@ workerID) |
26 | 26 |
else |
27 | 27 |
allParams$sampleNames <- allParams$sampleNames[subsetIndices] |
28 | 28 |
|
29 |
- allParams$subsetIndices <- subsetIndices |
|
30 |
- allParams$subsetDim <- ifelse(genomeWide, 1, 2) |
|
29 |
+ allParams$gaps@subsetIndices <- subsetIndices |
|
30 |
+ allParams$gaps@subsetDim <- ifelse(genomeWide, 1, 2) |
|
31 | 31 |
allParams$workerID <- workerID |
32 | 32 |
|
33 | 33 |
# call CoGAPS |
... | ... |
@@ -226,6 +226,7 @@ corcut <- function(allPatterns, cut, minNS) |
226 | 226 |
#' @return list with all CoGAPS output |
227 | 227 |
stitchTogether <- function(result, allParams) |
228 | 228 |
{ |
229 |
+ print("stiching together...") |
|
229 | 230 |
if (allParams$gaps@distributed == "genome-wide") |
230 | 231 |
{ |
231 | 232 |
consensus <- result[[1]]@sampleFactors |
... | ... |
@@ -4,13 +4,14 @@ |
4 | 4 |
#' @description combines retina subsets from extdata directory |
5 | 5 |
#' @param n number of subsets to use |
6 | 6 |
#' @return matrix of RNA counts |
7 |
+#' @examples |
|
8 |
+#' retSubset <- getRetinaSubset() |
|
9 |
+#' dim(retSubset) |
|
7 | 10 |
#' @importFrom rhdf5 h5read |
8 | 11 |
getRetinaSubset <- function(n=1) |
9 | 12 |
{ |
10 | 13 |
if (!(n %in% 1:4)) |
11 |
- { |
|
12 | 14 |
stop("invalid number of subsets requested") |
13 |
- } |
|
14 | 15 |
|
15 | 16 |
subset_1_path <- system.file("extdata/retina_subset_1.h5", package="CoGAPS") |
16 | 17 |
subset_2_path <- system.file("extdata/retina_subset_2.h5", package="CoGAPS") |
... | ... |
@@ -66,6 +67,35 @@ supported <- function(file) |
66 | 67 |
return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct")) |
67 | 68 |
} |
68 | 69 |
|
70 |
+#' checks if file is rds format |
|
71 |
+#' @keywords internal |
|
72 |
+#' |
|
73 |
+#' @param file path to file |
|
74 |
+#' @return TRUE if file is .rds, FALSE if not |
|
75 |
+#' @importFrom tools file_ext |
|
76 |
+isRdsFile <- function(file) |
|
77 |
+{ |
|
78 |
+ if (is.null(file)) |
|
79 |
+ return(FALSE) |
|
80 |
+ if (length(file) == 0) |
|
81 |
+ return(FALSE) |
|
82 |
+ if (!is(file, "character")) |
|
83 |
+ return(FALSE) |
|
84 |
+ return(tools::file_ext(file) == ".rds") |
|
85 |
+} |
|
86 |
+ |
|
87 |
+#' get input that might be an RDS file |
|
88 |
+#' @keywords internal |
|
89 |
+#' |
|
90 |
+#' @param input some user input |
|
91 |
+#' @return if input is an RDS file, read it - otherwise return input |
|
92 |
+getValueOrRds <- function(input) |
|
93 |
+{ |
|
94 |
+ if (isRdsFile(input)) |
|
95 |
+ return(readRDS(input)) |
|
96 |
+ return(input) |
|
97 |
+} |
|
98 |
+ |
|
69 | 99 |
#' get number of rows from supported file name or matrix |
70 | 100 |
#' @keywords internal |
71 | 101 |
#' |
... | ... |
@@ -114,9 +144,7 @@ ncolHelper <- function(data) |
114 | 144 |
getGeneNames <- function(data, transpose) |
115 | 145 |
{ |
116 | 146 |
if (transpose) |
117 |
- { |
|
118 | 147 |
return(getSampleNames(data, FALSE)) |
119 |
- } |
|
120 | 148 |
|
121 | 149 |
names <- NULL |
122 | 150 |
if (is(data, "character")) |
... | ... |
@@ -127,16 +155,13 @@ getGeneNames <- function(data, transpose) |
127 | 155 |
"gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1)))) |
128 | 156 |
) |
129 | 157 |
} |
130 |
- else if (is(data, "matrix") | is(data, "data.frame")) |
|
158 |
+ else |
|
131 | 159 |
{ |
132 | 160 |
names <- rownames(data) |
133 | 161 |
} |
134 | 162 |
|
135 | 163 |
if (is.null(names)) |
136 |
- { |
|
137 |
- nGenes <- nrowHelper(data) |
|
138 |
- return(paste("Gene", 1:nGenes, sep="_")) |
|
139 |
- } |
|
164 |
+ return(paste("Gene", 1:nrowHelper(data), sep="_")) |
|
140 | 165 |
return(names) |
141 | 166 |
} |
142 | 167 |
|
... | ... |
@@ -146,9 +171,7 @@ getGeneNames <- function(data, transpose) |
146 | 171 |
getSampleNames <- function(data, transpose) |
147 | 172 |
{ |
148 | 173 |
if (transpose) |
149 |
- { |
|
150 | 174 |
return(getGeneNames(data, FALSE)) |
151 |
- } |
|
152 | 175 |
|
153 | 176 |
names <- NULL |
154 | 177 |
if (is(data, "character")) |
... | ... |
@@ -159,16 +182,13 @@ getSampleNames <- function(data, transpose) |
159 | 182 |
"gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2]) |
160 | 183 |
) |
161 | 184 |
} |
162 |
- else if (is(data, "matrix") | is(data, "data.frame")) |
|
185 |
+ else |
|
163 | 186 |
{ |
164 | 187 |
names <- colnames(data) |
165 | 188 |
} |
166 | 189 |
|
167 | 190 |
if (is.null(names)) |
168 |
- { |
|
169 |
- nSamples <- ncolHelper(data) |
|
170 |
- return(paste("Sample", 1:nSamples, sep="_")) |
|
171 |
- } |
|
191 |
+ return(paste("Sample", 1:ncolHelper(data), sep="_")) |
|
172 | 192 |
return(names) |
173 | 193 |
} |
174 | 194 |
|
... | ... |
@@ -236,9 +256,6 @@ parseExtraParams <- function(allParams, extraParams) |
236 | 256 |
#' @return throws an error if data has problems |
237 | 257 |
checkDataMatrix <- function(data, uncertainty, params) |
238 | 258 |
{ |
239 |
- if (!is(data, "matrix") & !is(data, "data.frame") |
|
240 |
- & !is(data, "SummarizedExperiment") & !is(data, "SingleCellExperiment")) |
|
241 |
- stop("unsupported object type of CoGAPS") |
|
242 | 259 |
if (any(is.na(data))) |
243 | 260 |
stop("NA values in data") |
244 | 261 |
if (!all(apply(data, 2, is.numeric))) |
... | ... |
@@ -260,9 +277,6 @@ checkDataMatrix <- function(data, uncertainty, params) |
260 | 277 |
#' @return throws an error if inputs are invalid |
261 | 278 |
checkInputs <- function(data, uncertainty, allParams) |
262 | 279 |
{ |
263 |
- if (is(data, "character") & !supported(data)) |
|
264 |
- stop("unsupported file extension for data") |
|
265 |
- |
|
266 | 280 |
if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
267 | 281 |
stop("uncertainty must be same data type as data (file name)") |
268 | 282 |
if (is(uncertainty, "character") & !supported(uncertainty)) |
... | ... |
@@ -272,34 +286,16 @@ checkInputs <- function(data, uncertainty, allParams) |
272 | 286 |
if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
273 | 287 |
stop("must use default uncertainty when enabling sparseOptimization") |
274 | 288 |
|
275 |
- if (!(allParams$whichMatrixFixed %in% c("A", "P", "N"))) |
|
276 |
- stop("Invalid choice of fixed matrix, must be 'A' or 'P'") |
|
277 |
- if (!is.null(allParams$fixedPatterns) & allParams$whichMatrixFixed == "N") |
|
278 |
- stop("fixedPatterns passed without setting whichMatrixFixed") |
|
279 |
- if (allParams$whichMatrixFixed %in% c("A", "P") & is.null(allParams$fixedPatterns)) |
|
280 |
- stop("whichMatrixFixed is set without passing fixedPatterns") |
|
281 |
- |
|
282 | 289 |
if (!is.null(allParams$gaps@distributed)) |
283 | 290 |
{ |
284 | 291 |
if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
285 | 292 |
warning("running single-cell CoGAPS with singleCell=FALSE") |
286 |
- if (!is.null(allParams$fixedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
287 |
- warning("doing manual pattern matching with using explicit subsets") |
|
288 | 293 |
if (allParams$nThreads > 1) |
289 | 294 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
290 | 295 |
if (!is.null(allParams$checkpointInFile)) |
291 | 296 |
stop("checkpoints not supported for distributed cogaps") |
292 |
- if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P") |
|
293 |
- stop("can't fix P matrix when running single-cell CoGAPS") |
|
294 |
- if (allParams$gaps@distributed == "genome-wide" & allParams$whichMatrixFixed == "A") |
|
295 |
- stop("can't fix A matrix when running genome-wide CoGAPS") |
|
296 | 297 |
} |
297 | 298 |
|
298 |
- if (!(allParams$subsetDim %in% c(0,1,2))) |
|
299 |
- stop("invalid subset dimension") |
|
300 |
- if (allParams$subsetDim > 0 & is.null(allParams$subsetIndices)) |
|
301 |
- stop("subsetDim provided without subsetIndices") |
|
302 |
- |
|
303 | 299 |
if (!is(data, "character")) |
304 | 300 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
305 | 301 |
} |
... | ... |
@@ -309,38 +305,72 @@ checkInputs <- function(data, uncertainty, allParams) |
309 | 305 |
#' |
310 | 306 |
#' @param data data matrix |
311 | 307 |
#' @param allParams list of all parameters |
312 |
-#' @param geneNames vector of names of genes in data |
|
313 |
-#' @param sampleNames vector of names of samples in data |
|
314 | 308 |
#' @return list of all parameters with added gene names |
315 |
-getNamesFromData <- function(data, allParams, geneNames, sampleNames) |
|
309 |
+getDimNames <- function(data, allParams) |
|
316 | 310 |
{ |
317 |
- # get gene/sample names |
|
318 |
- if (is.null(geneNames)) |
|
311 |
+ # get user supplied names |
|
312 |
+ geneNames <- allParams$gaps@geneNames |
|
313 |
+ sampleNames <- allParams$gaps@sampleNames |
|
314 |
+ |
|
315 |
+ # if user didn't supply any names, pull from data set or use default labels |
|
316 |
+ if (is.null(allParams$gaps@geneNames)) |
|
319 | 317 |
geneNames <- getGeneNames(data, allParams$transposeData) |
320 |
- if (is.null(sampleNames)) |
|
318 |
+ if (is.null(allParams$gaps@sampleNames)) |
|
321 | 319 |
sampleNames <- getSampleNames(data, allParams$transposeData) |
322 | 320 |
|
321 |
+ # get the number of genes/samples |
|
323 | 322 |
nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
324 | 323 |
nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
325 | 324 |
|
326 |
- if (allParams$subsetDim == 1) |
|
325 |
+ # handle any subsetting |
|
326 |
+ if (allParams$gaps@subsetDim == 1) |
|
327 | 327 |
{ |
328 |
- nGenes <- length(allParams$subsetIndices) |
|
329 |
- geneNames <- geneNames[allParams$subsetIndices] |
|
328 |
+ nGenes <- length(allParams$gaps@subsetIndices) |
|
329 |
+ geneNames <- geneNames[allParams$gaps@subsetIndices] |
|
330 | 330 |
} |
331 |
- else if (allParams$subsetDim == 2) |
|
331 |
+ else if (allParams$gaps@subsetDim == 2) |
|
332 | 332 |
{ |
333 |
- nSamples <- length(allParams$subsetIndices) |
|
334 |
- sampleNames <- sampleNames[allParams$subsetIndices] |
|
333 |
+ nSamples <- length(allParams$gaps@subsetIndices) |
|
334 |
+ sampleNames <- sampleNames[allParams$gaps@subsetIndices] |
|
335 | 335 |
} |
336 | 336 |
|
337 |
+ # check that names align with expected number of genes/samples |
|
337 | 338 |
if (length(geneNames) != nGenes) |
338 | 339 |
stop("incorrect number of gene names given") |
339 | 340 |
if (length(sampleNames) != nSamples) |
340 | 341 |
stop("incorrect number of sample names given") |
341 | 342 |
|
343 |
+ # store processed gene/sample names directly in allParams list |
|
344 |
+ # this is an important distinction - allParams@gaps contains the |
|
345 |
+ # gene/sample names originally passed by the user, allParams contains |
|
346 |
+ # the procseed gene/sample names to be used when labeling the result |
|
342 | 347 |
allParams$geneNames <- geneNames |
343 | 348 |
allParams$sampleNames <- sampleNames |
344 |
- |
|
345 | 349 |
return(allParams) |
350 |
+} |
|
351 |
+ |
|
352 |
+#' convert any acceptable data input to a numeric matrix |
|
353 |
+#' @keywords internal |
|
354 |
+#' |
|
355 |
+#' @description convert supported R objects containing the data to a |
|
356 |
+#' numeric matrix, if data is a file name do nothing. Exits with an error |
|
357 |
+#' if data is not a supported type. |
|
358 |
+#' @param data data input |
|
359 |
+#' @return data matrix |
|
360 |
+#' @importFrom methods is |
|
361 |
+#' @importFrom SummarizedExperiment assay |
|
362 |
+convertDataToMatrix <- function(data) |
|
363 |
+{ |
|
364 |
+ if (is(data, "character") & !supported(data)) |
|
365 |
+ stop("unsupported file extension for data") |
|
366 |
+ else if (is(data, "matrix") | is(data, "character")) |
|
367 |
+ return(data) |
|
368 |
+ else if (is(data, "data.frame")) |
|
369 |
+ return(data.matrix(data)) |
|
370 |
+ else if (is(data, "SummarizedExperiment")) |
|
371 |
+ return(SummarizedExperiment::assay(data, "counts")) |
|
372 |
+ else if (is(data, "SingleCellExperiment")) |
|
373 |
+ return(SummarizedExperiment::assay(data, "counts")) |
|
374 |
+ else |
|
375 |
+ stop("unsupported data type") |
|
346 | 376 |
} |
347 | 377 |
\ No newline at end of file |
... | ... |
@@ -28,6 +28,16 @@ |
28 | 28 |
#' the rows (cols) to use for weighted sampling |
29 | 29 |
#' @slot samplingWeight [distributed parameter] weights associated with |
30 | 30 |
#' samplingAnnotation |
31 |
+#' @slot subsetIndices set of indices to use from the data |
|
32 |
+#' @slot subsetDim which dimension (1=rows, 2=cols) to subset |
|
33 |
+#' @slot geneNames vector of names of genes in data |
|
34 |
+#' @slot sampleNames vector of names of samples in data |
|
35 |
+#' @slot fixedPatterns fix either 'A' or 'P' matrix to these values, in the |
|
36 |
+#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
37 |
+#' skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
38 |
+#' matching, as well as fixed runs of standard CoGAPS |
|
39 |
+#' @slot whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed |
|
40 |
+#' @slot takePumpSamples whether or not to take PUMP samples |
|
31 | 41 |
#' @importClassesFrom S4Vectors character_OR_NULL |
32 | 42 |
setClass("CogapsParams", slots = c( |
33 | 43 |
nPatterns = "numeric", |
... | ... |
@@ -46,7 +56,14 @@ setClass("CogapsParams", slots = c( |
46 | 56 |
maxNS = "numeric", |
47 | 57 |
explicitSets = "ANY", |
48 | 58 |
samplingAnnotation = "character_OR_NULL", |
49 |
- samplingWeight = "numeric" |
|
59 |
+ samplingWeight = "ANY", |
|
60 |
+ subsetIndices="ANY", |
|
61 |
+ subsetDim="numeric", |
|
62 |
+ geneNames="character_OR_NULL", |
|
63 |
+ sampleNames="character_OR_NULL", |
|
64 |
+ fixedPatterns="ANY", |
|
65 |
+ whichMatrixFixed="character", |
|
66 |
+ takePumpSamples="logical" |
|
50 | 67 |
)) |
51 | 68 |
|
52 | 69 |
#' constructor for CogapsParams |
... | ... |
@@ -70,7 +87,7 @@ setMethod("initialize", "CogapsParams", |
70 | 87 |
if (!is.null(list(...)$maxNS)) |
71 | 88 |
stop("maxNS must be set after CogapsParams are intialized") |
72 | 89 |
if (!is.null(distributed)) |
73 |
- if (distributed == "none") |
|
90 |
+ if (distributed == "none") # allows it to be a pure string parameter |
|
74 | 91 |
distributed <- NULL |
75 | 92 |
.Object@distributed <- distributed |
76 | 93 |
|
... | ... |
@@ -89,7 +106,14 @@ setMethod("initialize", "CogapsParams", |
89 | 106 |
.Object@maxNS <- .Object@minNS + .Object@nSets |
90 | 107 |
.Object@explicitSets <- NULL |
91 | 108 |
.Object@samplingAnnotation <- NULL |
92 |
- .Object@samplingWeight <- integer(0) |
|
109 |
+ .Object@samplingWeight <- NULL |
|
110 |
+ .Object@subsetIndices <- NULL |
|
111 |
+ .Object@subsetDim <- 0 |
|
112 |
+ .Object@geneNames <- NULL |
|
113 |
+ .Object@sampleNames <- NULL |
|
114 |
+ .Object@fixedPatterns <- NULL |
|
115 |
+ .Object@whichMatrixFixed <- 'N' |
|
116 |
+ .Object@takePumpSamples <- FALSE |
|
93 | 117 |
|
94 | 118 |
.Object <- callNextMethod(.Object, ...) |
95 | 119 |
.Object |
... | ... |
@@ -104,42 +128,60 @@ setValidity("CogapsParams", |
104 | 128 |
"number of patterns must be an integer greater than zero" |
105 | 129 |
if (object@nIterations <= 0 | object@nIterations %% 1 != 0) |
106 | 130 |
"number of iterations must be an integer greater than zero" |
107 |
- if (object@alphaA <= 0 | object@alphaP <= 0) |
|
131 |
+ if (object@alphaA <= 0 | object@alphaP <= 0) |
|
108 | 132 |
"alpha parameter must be greater than zero" |
109 |
- if (object@maxGibbsMassA <= 0 | object@maxGibbsMassP <= 0) |
|
133 |
+ if (object@maxGibbsMassA <= 0 | object@maxGibbsMassP <= 0) |
|
110 | 134 |
"maxGibbsMass must be greater than zero" |
111 | 135 |
if (object@seed <= 0 | object@seed %% 1 != 0) |
112 | 136 |
"random seed must be an integer greater than zero" |
113 |
- if (object@minNS <= 1 | object@minNS %% 1 != 0) |
|
114 |
- "minNS must be an integer greater than one" |
|
115 |
- if (object@nSets <= 1 | object@nSets %% 1 != 0) |
|
116 |
- "minNS must be an integer greater than one" |
|
117 |
- if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets) |
|
118 |
- "nSets doesn't match length of explicitSets" |
|
119 |
- if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight)) |
|
120 |
- "samplingWeight has mismatched size with amount of distinct annotations" |
|
121 |
- if (object@cut > object@nPatterns) |
|
122 |
- "cut must be less than or equal to nPatterns" |
|
123 | 137 |
|
124 |
- # check type of explicitSets |
|
125 |
- if (!is.null(object@explicitSets) & !is(object@explicitSets, "list")) |
|
126 |
- "explicitSets must be a list of numeric or character" |
|
127 |
- isNum <- sapply(object@explicitSets, function(s) is(s, "numeric")) |
|
128 |
- isChar <- sapply(object@explicitSets, function(s) is(s, "charcater")) |
|
129 |
- if (!is.null(object@explicitSets) & !(all(isNum) | all(isChar))) |
|
130 |
- "explicitSets must be a list of numeric or character" |
|
138 |
+ if (!(object@whichMatrixFixed %in% c("A", "P", "N"))) |
|
139 |
+ stop("Invalid choice of fixed matrix, must be 'A' or 'P'") |
|
140 |
+ if (!is.null(object@fixedPatterns) & object@whichMatrixFixed == "N") |
|
141 |
+ stop("fixedPatterns passed without setting whichMatrixFixed") |
|
142 |
+ if (object@whichMatrixFixed %in% c("A", "P") & is.null(object@fixedPatterns)) |
|
143 |
+ stop("whichMatrixFixed is set without passing fixedPatterns") |
|
131 | 144 |
|
132 |
- if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets) |
|
133 |
- "wrong number of sets given" |
|
134 |
- if (length(object@samplingWeight) & is.null(names(object@samplingWeight))) |
|
135 |
- "samplingWeight must be a named vector" |
|
136 |
- |
|
137 |
- if (!is.null(object@explicitSets) & !is.null(object@samplingAnnotation)) |
|
138 |
- "explicitSets and samplingAnnotation/samplingWeight are both set" |
|
145 |
+ if (!(object@subsetDim %in% c(0,1,2))) |
|
146 |
+ stop("invalid subset dimension") |
|
147 |
+ if (object@subsetDim > 0 & is.null(object@subsetIndices)) |
|
148 |
+ stop("subsetDim provided without subsetIndices") |
|
139 | 149 |
|
140 | 150 |
if (!is.null(object@distributed)) |
151 |
+ { |
|
141 | 152 |
if (!(object@distributed %in% c("genome-wide", "single-cell"))) |
142 | 153 |
"distributed method must be either 'genome-wide' or 'single-cell'" |
154 |
+ if (!is.null(object@fixedPatterns) & is.null(object@explicitSets)) |
|
155 |
+ "doing manual pattern matching without using explicit subsets" |
|
156 |
+ if (object@distributed == "single-cell" & object@whichMatrixFixed == "P") |
|
157 |
+ "can't fix P matrix when running single-cell CoGAPS" |
|
158 |
+ if (object@distributed == "genome-wide" & object@whichMatrixFixed == "A") |
|
159 |
+ "can't fix A matrix when running genome-wide CoGAPS" |
|
160 |
+ if (object@minNS <= 1 | object@minNS %% 1 != 0) |
|
161 |
+ "minNS must be an integer greater than one" |
|
162 |
+ if (object@nSets <= 1 | object@nSets %% 1 != 0) |
|
163 |
+ "minNS must be an integer greater than one" |
|
164 |
+ if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight)) |
|
165 |
+ "samplingWeight has mismatched size with amount of distinct annotations" |
|
166 |
+ if (object@cut > object@nPatterns) |
|
167 |
+ "cut must be less than or equal to nPatterns" |
|
168 |
+ if (length(object@samplingWeight) & is.null(names(object@samplingWeight))) |
|
169 |
+ "samplingWeight must be a named vector" |
|
170 |
+ |
|
171 |
+ if (!is.null(object@explicitSets)) |
|
172 |
+ { |
|
173 |
+ if (!is(object@explicitSets, "list")) |
|
174 |
+ "explicitSets must be a list" |
|
175 |
+ if (length(object@explicitSets) != object@nSets) |
|
176 |
+ "nSets doesn't match length of explicitSets" |
|
177 |
+ if (!is.null(object@samplingAnnotation)) |
|
178 |
+ "explicitSets and samplingAnnotation/samplingWeight are both set" |
|
179 |
+ isNum <- sapply(object@explicitSets, function(s) is(s, "numeric")) |
|
180 |
+ isChar <- sapply(object@explicitSets, function(s) is(s, "character")) |
|
181 |
+ if (!all(isNum) & !all(isChar)) |
|
182 |
+ "explicitSets must be a list of numeric or character" |
|
183 |
+ } |
|
184 |
+ } |
|
143 | 185 |
} |
144 | 186 |
) |
145 | 187 |
|
... | ... |
@@ -194,6 +236,23 @@ minNS=NULL, maxNS=NULL) |
194 | 236 |
setGeneric("setAnnotationWeights", function(object, annotation, weights) |
195 | 237 |
{standardGeneric("setAnnotationWeights")}) |
196 | 238 |
|
239 |
+#' set the fixed patterns for either the A or the P matrix |
|
240 |
+#' @export |
|
241 |
+#' @docType methods |
|
242 |
+#' @rdname setFixedPatterns-methods |
|
243 |
+#' |
|
244 |
+#' @description these parameters are interrelated so they must be set together |
|
245 |
+#' @param object an object of type CogapsParams |
|
246 |
+#' @param fixedPatterns values for either the A or P matrix |
|
247 |
+#' @param whichMatrixFixed either 'A' or 'P' indicating which matrix is fixed |
|
248 |
+#' @return the modified params object |
|
249 |
+#' @examples |
|
250 |
+#' params <- new("CogapsParams") |
|
251 |
+#' data(GIST) |
|
252 |
+#' params <- setFixedPatterns(params, getSampleFactors(GIST.result), 'P') |
|
253 |
+setGeneric("setFixedPatterns", function(object, fixedPatterns, whichMatrixFixed) |
|
254 |
+ {standardGeneric("setFixedPatterns")}) |
|
255 |
+ |
|
197 | 256 |
#' get the value of a parameter |
198 | 257 |
#' @export |
199 | 258 |
#' @docType methods |
... | ... |
@@ -41,7 +41,8 @@ sampleNames, diagnostics=NULL, ...) |
41 | 41 |
patternNames <- paste("Pattern", 1:ncol(Amean), sep="_") |
42 | 42 |
|
43 | 43 |
if (length(geneNames) != nrow(.Object@featureLoadings)) |
44 |
- stop("number of gene names doesn't match data size") |
|
44 |
+ stop("number of gene names doesn't match data size, ", |
|
45 |
+ length(geneNames), " != ", nrow(.Object@featureLoadings)) |
|
45 | 46 |
if (length(sampleNames) != nrow(.Object@sampleFactors)) |
46 | 47 |
stop("number of sample names doesn't match data size") |
47 | 48 |
|
... | ... |
@@ -82,6 +83,32 @@ setValidity("CogapsResult", |
82 | 83 |
|
83 | 84 |
################################### GENERICS ################################### |
84 | 85 |
|
86 |
+#' return featureLoadings matrix from CogapsResult object |
|
87 |
+#' @export |
|
88 |
+#' @docType methods |
|
89 |
+#' @rdname getFeatureLoadings-methods |
|
90 |
+#' |
|
91 |
+#' @param object an object of type CogapsResult |
|
92 |
+#' @return featureLoadings matrix |
|
93 |
+#' @examples |
|
94 |
+#' data(GIST) |
|
95 |
+#' getFeatureLoadings(GIST.result) |
|
96 |
+setGeneric("getFeatureLoadings", function(object) |
|
97 |
+ {standardGeneric("getFeatureLoadings")}) |
|
98 |
+ |
|
99 |
+#' return sampleFactors matrix from CogapsResult object |
|
100 |
+#' @export |
|
101 |
+#' @docType methods |
|
102 |
+#' @rdname getSampleFactors-methods |
|
103 |
+#' |
|
104 |
+#' @param object an object of type CogapsResult |
|
105 |
+#' @return sampleFactors matrix |
|
106 |
+#' @examples |
|
107 |
+#' data(GIST) |
|
108 |
+#' getSampleFactors(GIST.result) |
|
109 |
+setGeneric("getSampleFactors", function(object) |
|
110 |
+ {standardGeneric("getSampleFactors")}) |
|
111 |
+ |
|
85 | 112 |
#' return chi-sq of final matrices |
86 | 113 |
#' @export |
87 | 114 |
#' @docType methods |
... | ... |
@@ -1,3 +1,17 @@ |
1 |
+#' CogapsParams constructor |
|
2 |
+#' @export |
|
3 |
+#' |
|
4 |
+#' @description create a CogapsParams object |
|
5 |
+#' @param ... parameters for the initialization method |
|
6 |
+#' @return CogapsParams object |
|
7 |
+#' @examples |
|
8 |
+#' params <- CogapsParams(nPatterns=10) |
|
9 |
+#' params |
|
10 |
+CogapsParams <- function(...) |
|
11 |
+{ |
|
12 |
+ new("CogapsParams", ...) |
|
13 |
+} |
|
14 |
+ |
|
1 | 15 |
setMethod("show", signature("CogapsParams"), |
2 | 16 |
function(object) |
3 | 17 |
{ |
... | ... |
@@ -59,11 +73,15 @@ function(object, whichParam, value) |
59 | 73 |
} |
60 | 74 |
else if (whichParam %in% c("nSets", "cut", "minNS", "maxNS")) |
61 | 75 |
{ |
62 |
- stop("please set this parameter with setDistributedParams") |
|
76 |
+ stop("please set \'", whichParam, "\' with setDistributedParams") |
|
63 | 77 |
} |
64 | 78 |
else if (whichParam %in% c("samplingAnnotation", "samplingWeight")) |
65 | 79 |
{ |
66 |
- stop("please set this parameter with setAnnotationWeights") |
|
80 |
+ stop("please set \'", whichParam, "\' with setAnnotationWeights") |
|
81 |
+ } |
|
82 |
+ else if (whichParam %in% c("fixedPatterns", "whichMatrixFixed")) |
|
83 |
+ { |
|
84 |
+ stop("please set \'", whichParam, "\' with setFixedPatterns") |
|
67 | 85 |
} |
68 | 86 |
else if (whichParam == "nPatterns") |
69 | 87 |
{ |
... | ... |
@@ -116,6 +134,18 @@ function(object, annotation, weights) |
116 | 134 |
return(object) |
117 | 135 |
}) |
118 | 136 |
|
137 |
+#' @rdname setFixedPatterns-methods |
|
138 |
+#' @aliases setFixedPatterns |
|
139 |
+setMethod("setFixedPatterns", signature(object="CogapsParams"), |
|
140 |
+function(object, fixedPatterns, whichMatrixFixed) |
|
141 |
+{ |
|
142 |
+ object@fixedPatterns <- fixedPatterns |
|
143 |
+ object@whichMatrixFixed <- whichMatrixFixed |
|
144 |
+ |
|
145 |
+ validObject(object) |
|
146 |
+ return(object) |
|
147 |
+}) |
|
148 |
+ |
|
119 | 149 |
#' @rdname getParam-methods |
120 | 150 |
#' @aliases getParam |
121 | 151 |
setMethod("getParam", signature(object="CogapsParams"), |
... | ... |
@@ -4,6 +4,7 @@ |
4 | 4 |
#' @param returnList list from cogaps_cpp |
5 | 5 |
#' @param allParams list of all parameters |
6 | 6 |
#' @return CogapsResult object |
7 |
+#' @importFrom utils packageVersion |
|
7 | 8 |
createCogapsResult <- function(returnList, allParams) |
8 | 9 |
{ |
9 | 10 |
res <- new("CogapsResult", |
... | ... |
@@ -57,6 +58,22 @@ plot.CogapsResult <- function(x, ...) |
57 | 58 |
lty=1, cex=0.8, col=colors, bty="y", ncol=5) |
58 | 59 |
} |
59 | 60 |
|
61 |
+#' @rdname getFeatureLoadings-methods |
|
62 |
+#' @aliases getFeatureLoadings |
|
63 |
+setMethod("getFeatureLoadings", signature(object="CogapsResult"), |
|
64 |
+function(object) |
|
65 |
+{ |
|
66 |
+ object@featureLoadings |
|
67 |
+}) |
|
68 |
+ |
|
69 |
+#' @rdname getSampleFactors-methods |
|
70 |
+#' @aliases getSampleFactors |
|
71 |
+setMethod("getSampleFactors", signature(object="CogapsResult"), |
|
72 |
+function(object) |
|
73 |
+{ |
|
74 |
+ object@sampleFactors |
|
75 |
+}) |
|
76 |
+ |
|
60 | 77 |
#' @rdname getMeanChiSq-methods |
61 | 78 |
#' @aliases getMeanChiSq |
62 | 79 |
setMethod("getMeanChiSq", signature(object="CogapsResult"), |
... | ... |
@@ -7,11 +7,8 @@ |
7 | 7 |
CoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 |
- checkpointInFile = NULL, transposeData = FALSE, |
|
11 |
- subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL, |
|
12 |
- geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL, |
|
13 |
- whichMatrixFixed = "N", takePumpSamples = FALSE, |
|
14 |
- outputToFile = NULL, workerID = 1, ...) |
|
10 |
+ checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
|
11 |
+ workerID = 1, ...) |
|
15 | 12 |
} |
16 | 13 |
\arguments{ |
17 | 14 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -40,28 +37,8 @@ contained in this file} |
40 | 37 |
for data that is stored as samples x genes since CoGAPS requires data to be |
41 | 38 |
genes x samples} |
42 | 39 |
|
43 |
-\item{subsetIndices}{set of indices to use from the data} |
|
44 |
- |
|
45 |
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset} |
|
46 |
- |
|
47 | 40 |
\item{BPPARAM}{BiocParallel backend} |
48 | 41 |
|
49 |
-\item{geneNames}{vector of names of genes in data} |
|
50 |
- |
|
51 |
-\item{sampleNames}{vector of names of samples in data} |
|
52 |
- |
|
53 |
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the |
|
54 |
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
55 |
-skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
56 |
-matching, as well as fixed runs of standard CoGAPS} |
|
57 |
- |
|
58 |
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed} |
|
59 |
- |
|
60 |
-\item{takePumpSamples}{whether or not to take PUMP samples} |
|
61 |
- |
|
62 |
-\item{outputToFile}{name of a file to save the output to, will create 4 files |
|
63 |
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"} |
|
64 |
- |
|
65 | 42 |
\item{workerID}{if calling CoGAPS in parallel the worker ID can be specified, |
66 | 43 |
only worker 1 prints output and each worker outputs when it finishes, this |
67 | 44 |
is not neccesary when using the default parallel methods (i.e. distributed |
... | ... |
@@ -51,5 +51,22 @@ the rows (cols) to use for weighted sampling} |
51 | 51 |
|
52 | 52 |
\item{\code{samplingWeight}}{[distributed parameter] weights associated with |
53 | 53 |
samplingAnnotation} |
54 |
+ |
|
55 |
+\item{\code{subsetIndices}}{set of indices to use from the data} |
|
56 |
+ |
|
57 |
+\item{\code{subsetDim}}{which dimension (1=rows, 2=cols) to subset} |
|
58 |
+ |
|
59 |
+\item{\code{geneNames}}{vector of names of genes in data} |
|
60 |
+ |
|
61 |
+\item{\code{sampleNames}}{vector of names of samples in data} |
|
62 |
+ |
|
63 |
+\item{\code{fixedPatterns}}{fix either 'A' or 'P' matrix to these values, in the |
|
64 |
+context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
65 |
+skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
66 |
+matching, as well as fixed runs of standard CoGAPS} |
|
67 |
+ |
|
68 |
+\item{\code{whichMatrixFixed}}{either 'A' or 'P', indicating which matrix is fixed} |
|
69 |
+ |
|
70 |
+\item{\code{takePumpSamples}}{whether or not to take PUMP samples} |
|
54 | 71 |
}} |
55 | 72 |
|
56 | 73 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,21 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/methods-CogapsParams.R |
|
3 |
+\name{CogapsParams} |
|
4 |
+\alias{CogapsParams} |
|
5 |
+\title{CogapsParams constructor} |
|
6 |
+\usage{ |
|
7 |
+CogapsParams(...) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{...}{parameters for the initialization method} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+CogapsParams object |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+create a CogapsParams object |
|
17 |
+} |
|
18 |
+\examples{ |
|
19 |
+params <- CogapsParams(nPatterns=10) |
|
20 |
+params |
|
21 |
+} |
... | ... |
@@ -7,11 +7,8 @@ |
7 | 7 |
GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 |
- checkpointInFile = NULL, transposeData = FALSE, |
|
11 |
- subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL, |
|
12 |
- geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL, |
|
13 |
- whichMatrixFixed = "N", takePumpSamples = FALSE, |
|
14 |
- outputToFile = NULL, workerID = 1, ...) |
|
10 |
+ checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
|
11 |
+ workerID = 1, ...) |
|
15 | 12 |
} |
16 | 13 |
\arguments{ |
17 | 14 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -40,28 +37,8 @@ contained in this file} |
40 | 37 |
for data that is stored as samples x genes since CoGAPS requires data to be |
41 | 38 |
genes x samples} |
42 | 39 |
|
43 |
-\item{subsetIndices}{set of indices to use from the data} |
|
44 |
- |
|
45 |
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset} |
|
46 |
- |
|
47 | 40 |
\item{BPPARAM}{BiocParallel backend} |
48 | 41 |
|
49 |
-\item{geneNames}{vector of names of genes in data} |
|
50 |
- |
|
51 |
-\item{sampleNames}{vector of names of samples in data} |
|
52 |
- |
|
53 |
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the |
|
54 |
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
55 |
-skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
56 |
-matching, as well as fixed runs of standard CoGAPS} |
|
57 |
- |
|
58 |
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed} |
|
59 |
- |
|
60 |
-\item{takePumpSamples}{whether or not to take PUMP samples} |
|
61 |
- |
|
62 |
-\item{outputToFile}{name of a file to save the output to, will create 4 files |
|
63 |
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"} |
|
64 |
- |
|
65 | 42 |
\item{workerID}{if calling CoGAPS in parallel the worker ID can be specified, |
66 | 43 |
only worker 1 prints output and each worker outputs when it finishes, this |
67 | 44 |
is not neccesary when using the default parallel methods (i.e. distributed |
68 | 45 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{convertDataToMatrix} |
|
4 |
+\alias{convertDataToMatrix} |
|
5 |
+\title{convert any acceptable data input to a numeric matrix} |
|
6 |
+\usage{ |
|
7 |
+convertDataToMatrix(data) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{data}{data input} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+data matrix |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+convert supported R objects containing the data to a |
|
17 |
+numeric matrix, if data is a file name do nothing. Exits with an error |
|
18 |
+if data is not a supported type. |
|
19 |
+} |
|
20 |
+\keyword{internal} |
0 | 21 |
similarity index 63% |
1 | 22 |
rename from man/getNamesFromData.Rd |
2 | 23 |
rename to man/getDimNames.Rd |
... | ... |
@@ -1,19 +1,15 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 | 2 |
% Please edit documentation in R/HelperFunctions.R |
3 |
-\name{getNamesFromData} |
|
4 |
-\alias{getNamesFromData} |
|
3 |
+\name{getDimNames} |
|
4 |
+\alias{getDimNames} |
|
5 | 5 |
\title{extracts gene/sample names from the data} |
6 | 6 |
\usage{ |
7 |
-getNamesFromData(data, allParams, geneNames, sampleNames) |
|
7 |
+getDimNames(data, allParams) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{data matrix} |
11 | 11 |
|
12 | 12 |
\item{allParams}{list of all parameters} |
13 |
- |
|
14 |
-\item{geneNames}{vector of names of genes in data} |
|
15 |
- |
|
16 |
-\item{sampleNames}{vector of names of samples in data} |
|
17 | 13 |
} |
18 | 14 |
\value{ |
19 | 15 |
list of all parameters with added gene names |
20 | 16 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,25 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getFeatureLoadings} |
|
5 |
+\alias{getFeatureLoadings} |
|
6 |
+\alias{getFeatureLoadings,CogapsResult-method} |
|
7 |
+\title{return featureLoadings matrix from CogapsResult object} |
|
8 |
+\usage{ |
|
9 |
+getFeatureLoadings(object) |
|
10 |
+ |
|
11 |
+\S4method{getFeatureLoadings}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+featureLoadings matrix |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return featureLoadings matrix from CogapsResult object |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(GIST) |
|
24 |
+getFeatureLoadings(GIST.result) |
|
25 |
+} |
18 | 22 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,25 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getSampleFactors} |
|
5 |
+\alias{getSampleFactors} |
|
6 |
+\alias{getSampleFactors,CogapsResult-method} |
|
7 |
+\title{return sampleFactors matrix from CogapsResult object} |
|
8 |
+\usage{ |
|
9 |
+getSampleFactors(object) |
|
10 |
+ |
|
11 |
+\S4method{getSampleFactors}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+sampleFactors matrix |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return sampleFactors matrix from CogapsResult object |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(GIST) |
|
24 |
+getSampleFactors(GIST.result) |
|
25 |
+} |
0 | 26 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{getValueOrRds} |
|
4 |
+\alias{getValueOrRds} |
|
5 |
+\title{get input that might be an RDS file} |
|
6 |
+\usage{ |
|
7 |
+getValueOrRds(input) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{input}{some user input} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+if input is an RDS file, read it - otherwise return input |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+get input that might be an RDS file |
|
17 |
+} |
|
18 |
+\keyword{internal} |
0 | 19 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,18 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{isRdsFile} |
|
4 |
+\alias{isRdsFile} |
|
5 |
+\title{checks if file is rds format} |
|
6 |
+\usage{ |
|
7 |
+isRdsFile(file) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{file}{path to file} |
|
11 |
+} |
|
12 |
+\value{ |
|
13 |
+TRUE if file is .rds, FALSE if not |
|
14 |
+} |
|
15 |
+\description{ |
|
16 |
+checks if file is rds format |
|
17 |
+} |
|
18 |
+\keyword{internal} |
... | ... |
@@ -7,11 +7,8 @@ |
7 | 7 |
scCoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 |
- checkpointInFile = NULL, transposeData = FALSE, |
|
11 |
- subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL, |
|
12 |
- geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL, |
|
13 |
- whichMatrixFixed = "N", takePumpSamples = FALSE, |
|
14 |
- outputToFile = NULL, workerID = 1, ...) |
|
10 |
+ checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
|
11 |
+ workerID = 1, ...) |
|
15 | 12 |
} |
16 | 13 |
\arguments{ |
17 | 14 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -40,28 +37,8 @@ contained in this file} |
40 | 37 |
for data that is stored as samples x genes since CoGAPS requires data to be |
41 | 38 |
genes x samples} |
42 | 39 |
|
43 |
-\item{subsetIndices}{set of indices to use from the data} |
|
44 |
- |
|
45 |
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset} |
|
46 |
- |
|
47 | 40 |
\item{BPPARAM}{BiocParallel backend} |
48 | 41 |
|
49 |
-\item{geneNames}{vector of names of genes in data} |
|
50 |
- |
|
51 |
-\item{sampleNames}{vector of names of samples in data} |
|
52 |
- |
|
53 |
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the |
|
54 |
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is |
|
55 |
-skipped and fixedPatterns is used for all sets - allowing manual pattern |
|
56 |
-matching, as well as fixed runs of standard CoGAPS} |
|
57 |
- |
|
58 |
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed} |
|
59 |
- |
|
60 |
-\item{takePumpSamples}{whether or not to take PUMP samples} |
|
61 |
- |
|
62 |
-\item{outputToFile}{name of a file to save the output to, will create 4 files |
|
63 |
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"} |
|
64 |
- |
|
65 | 42 |
\item{workerID}{if calling CoGAPS in parallel the worker ID can be specified, |
66 | 43 |
only worker 1 prints output and each worker outputs when it finishes, this |
67 | 44 |
is not neccesary when using the default parallel methods (i.e. distributed |
68 | 45 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{setFixedPatterns} |
|
5 |
+\alias{setFixedPatterns} |
|
6 |
+\alias{setFixedPatterns,CogapsParams-method} |
|
7 |
+\title{set the fixed patterns for either the A or the P matrix} |
|
8 |
+\usage{ |
|
9 |
+setFixedPatterns(object, fixedPatterns, whichMatrixFixed) |
|
10 |
+ |
|
11 |
+\S4method{setFixedPatterns}{CogapsParams}(object, fixedPatterns, |
|
12 |
+ whichMatrixFixed) |
|
13 |
+} |
|
14 |
+\arguments{ |
|
15 |
+\item{object}{an object of type CogapsParams} |
|
16 |
+ |
|
17 |
+\item{fixedPatterns}{values for either the A or P matrix} |
|
18 |
+ |
|
19 |
+\item{whichMatrixFixed}{either 'A' or 'P' indicating which matrix is fixed} |
|
20 |
+} |
|
21 |
+\value{ |
|
22 |
+the modified params object |
|
23 |
+} |
|
24 |
+\description{ |
|
25 |
+these parameters are interrelated so they must be set together |
|
26 |
+} |
|
27 |
+\examples{ |
|
28 |
+params <- new("CogapsParams") |
|
29 |
+data(GIST) |
|
30 |
+params <- setFixedPatterns(params, getSampleFactors(GIST.result), 'P') |
|
31 |
+} |
... | ... |
@@ -49,12 +49,12 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara |
49 | 49 |
const Rcpp::S4 &gapsParams(allParams["gaps"]); |
50 | 50 |
|
51 | 51 |
// check if subsetting data |
52 |
- unsigned subsetDim = Rcpp::as<unsigned>(allParams["subsetDim"]); |
|
53 |
- bool subsetGenes = subsetDim == 1; |
|
52 |
+ unsigned subsetDim = Rcpp::as<unsigned>(gapsParams.slot("subsetDim")); |
|
53 |
+ bool subsetGenes = (subsetDim == 1); |
|
54 | 54 |
std::vector<unsigned> subset; |
55 | 55 |
if (subsetDim > 0) |
56 | 56 |
{ |
57 |
- Rcpp::IntegerVector subsetR = allParams["subsetIndices"]; |
|
57 |
+ Rcpp::IntegerVector subsetR = gapsParams.slot("subsetIndices"); |
|
58 | 58 |
subset = Rcpp::as< std::vector<unsigned> >(subsetR); |
59 | 59 |
} |
60 | 60 |
|
... | ... |
@@ -71,7 +71,7 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara |
71 | 71 |
params.outputFrequency = allParams["outputFrequency"]; |
72 | 72 |
params.checkpointOutFile = Rcpp::as<std::string>(allParams["checkpointOutFile"]); |
73 | 73 |
params.checkpointInterval = allParams["checkpointInterval"]; |
74 |
- params.takePumpSamples = allParams["takePumpSamples"]; |
|
74 |
+ params.takePumpSamples = gapsParams.slot("takePumpSamples"); |
|
75 | 75 |
|
76 | 76 |
// extract model specific parameters from list |
77 | 77 |
params.seed = gapsParams.slot("seed"); |
... | ... |
@@ -85,11 +85,11 @@ GapsParameters getGapsParameters(const DataType &data, const Rcpp::List &allPara |
85 | 85 |
params.useSparseOptimization = gapsParams.slot("sparseOptimization"); |
86 | 86 |
|
87 | 87 |
// check if using fixed matrix |
88 |
- params.whichMatrixFixed = Rcpp::as<char>(allParams["whichMatrixFixed"]); |
|
88 |
+ params.whichMatrixFixed = Rcpp::as<char>(gapsParams.slot("whichMatrixFixed")); |
|
89 | 89 |
if (params.whichMatrixFixed != 'N') |
90 | 90 |
{ |
91 | 91 |
params.useFixedPatterns = true; |
92 |
- Rcpp::NumericMatrix fixedMatrixR = allParams["fixedPatterns"]; |
|
92 |
+ Rcpp::NumericMatrix fixedMatrixR = gapsParams.slot("fixedPatterns"); |
|
93 | 93 |
params.fixedPatterns = convertRMatrix(fixedMatrixR); |
94 | 94 |
} |
95 | 95 |
|
... | ... |
@@ -122,17 +122,17 @@ const DataType &uncertainty) |
122 | 122 |
|
123 | 123 |
// convert R parameters to GapsParameters struct |
124 | 124 |
GapsParameters params(getGapsParameters(data, allParams)); |
125 |
+#ifdef GAPS_DEBUG |
|
126 |
+ if (params.printMessages) |
|
127 |
+ { |
|
128 |
+ params.print(); |
|
129 |
+ } |
|
130 |
+#endif |
|
125 | 131 |
|
126 | 132 |
// create GapsRunner, note we must first initialize the random generator |
127 | 133 |
GapsRandomState randState(params.seed); |
128 | 134 |
GapsResult result(gaps::run(data, params, uncertainty, &randState)); |
129 | 135 |
|
130 |
- // write result to file if requested |
|
131 |
- if (allParams["outputToFile"] != R_NilValue) |
|
132 |
- { |
|
133 |
- result.writeToFile(Rcpp::as<std::string>(allParams["outputToFile"])); |
|
134 |
- } |
|
135 |
- |
|
136 | 136 |
// return R list |
137 | 137 |
return Rcpp::List::create( |
138 | 138 |
Rcpp::Named("Amean") = createRMatrix(result.Amean), |
... | ... |
@@ -1,4 +1,5 @@ |
1 | 1 |
#include "GapsParameters.h" |
2 |
+#include "utils/GapsPrint.h" |
|
2 | 3 |
|
3 | 4 |
Archive& operator<<(Archive &ar, const GapsParameters &p) |
4 | 5 |
{ |
... | ... |
@@ -15,6 +16,58 @@ Archive& operator>>(Archive &ar, GapsParameters &p) |
15 | 16 |
>> p.singleCell >> p.useSparseOptimization >> p.checkpointInterval; |
16 | 17 |
return ar; |
17 | 18 |
} |
19 |
+ |
|
20 |
+void GapsParameters::print() const |
|
21 |
+{ |
|
22 |
+ gaps_printf("\n---- C++ Parameters ----\n\n"); |
|
23 |
+ |
|
24 |
+ gaps_printf("transposeData: %s\n", transposeData ? "TRUE" : "FALSE"); |
|
25 |
+ gaps_printf("nGenes: %d\n", nGenes); |
|
26 |
+ gaps_printf("nSamples: %d\n", nSamples); |
|
27 |
+ gaps_printf("nPatterns: %d\n", nPatterns); |
|
28 |
+ gaps_printf("nIterations: %d\n", nIterations); |
|
29 |
+ gaps_printf("seed: %d\n", seed); |
|
30 |
+ gaps_printf("\n"); |
|
31 |
+ |
|
32 |
+ gaps_printf("maxThreads: %d\n", maxThreads); |
|
33 |
+ gaps_printf("printMessages: %s\n", printMessages ? "TRUE" : "FALSE"); |
|
34 |
+ gaps_printf("outputFrequency: %d\n", outputFrequency); |
|
35 |
+ gaps_printf("\n"); |
|
36 |
+ |
|
37 |
+ gaps_printf("singleCell: %s\n", singleCell ? "TRUE" : "FALSE"); |
|
38 |
+ gaps_printf("useSparseOptimization: %s\n", useSparseOptimization ? "TRUE" : "FALSE"); |
|
39 |
+ gaps_printf("takePumpSamples: %s\n", takePumpSamples ? "TRUE" : "FALSE"); |
|
40 |
+ gaps_printf("\n"); |
|
41 |
+ |
|
42 |
+ gaps_printf("runningDistributed: %s\n", runningDistributed ? "TRUE" : "FALSE"); |
|
43 |
+ gaps_printf("printThreadUsage: %s\n", printThreadUsage ? "TRUE" : "FALSE"); |
|
44 |
+ gaps_printf("workerID: %d\n", workerID); |
|
45 |
+ gaps_printf("\n"); |
|
46 |
+ |
|
47 |
+ gaps_printf("alphaA: %f\n", alphaA); |
|
48 |
+ gaps_printf("alphaP: %f\n", alphaP); |
|
49 |
+ gaps_printf("maxGibbsMassA: %f\n", maxGibbsMassA); |
|
50 |
+ gaps_printf("maxGibbsMassP: %f\n", maxGibbsMassP); |
|
51 |
+ gaps_printf("\n"); |
|
52 |
+ |
|
53 |
+ gaps_printf("useCheckPoint: %s\n", useCheckPoint ? "TRUE" : "FALSE"); |
|
54 |
+ gaps_printf("checkpointInterval: %d\n", checkpointInterval); |
|
55 |
+ gaps_printf("checkpointFile: %s\n", checkpointFile.c_str()); |
|
56 |
+ gaps_printf("checkpointOutFile: %s\n", checkpointOutFile.c_str()); |
|
57 |
+ gaps_printf("\n"); |
|
58 |
+ |
|
59 |
+ gaps_printf("subsetData: %s\n", subsetData ? "TRUE" : "FALSE"); |
|
60 |
+ gaps_printf("subsetGenes: %s\n", subsetGenes ? "TRUE" : "FALSE"); |
|
61 |
+ gaps_printf("dataIndicesSubset.size(): %lu\n", dataIndicesSubset.size()); |
|
62 |
+ gaps_printf("\n"); |
|
63 |
+ |
|
64 |
+ gaps_printf("useFixedPatterns: %s\n", useFixedPatterns ? "TRUE" : "FALSE"); |
|
65 |
+ gaps_printf("whichMatrixFixed: %c\n", whichMatrixFixed); |
|
66 |
+ gaps_printf("fixedPatterns.nRow(): %d\n", fixedPatterns.nRow()); |
|
67 |
+ gaps_printf("fixedPatterns.nCol(): %d\n", fixedPatterns.nCol()); |
|
68 |
+ |
|
69 |
+ gaps_printf("\n------------------------\n\n"); |
|
70 |
+} |
|
18 | 71 |
|
19 | 72 |
void GapsParameters::calculateDataDimensions(const std::string &file) |
20 | 73 |
{ |
... | ... |
@@ -22,10 +22,12 @@ public: |
22 | 22 |
bool t_subsetData=false, bool t_subsetGenes=false, |
23 | 23 |
const std::vector<unsigned> t_dataIndicesSubset=std::vector<unsigned>()); |
24 | 24 |
|
25 |
+ void print() const; |
|
26 |
+ |
|
25 | 27 |
Matrix fixedPatterns; |
26 | 28 |
|
27 | 29 |
std::vector<unsigned> dataIndicesSubset; |
28 |
- |
|
30 |
+ |
|
29 | 31 |
std::string checkpointFile; |
30 | 32 |
std::string checkpointOutFile; |
31 | 33 |
|
... | ... |
@@ -15,9 +15,10 @@ getHistogram <- function(set, anno) |
15 | 15 |
test_that("Subsetting Data", |
16 | 16 |
{ |
17 | 17 |
data(GIST) |
18 |
- #testMatrix <- GIST.matrix |
|
19 | 18 |
|
19 |
+ ## BAD TEST TO RUN |
|
20 | 20 |
# sampling with weights |
21 |
+ #testMatrix <- GIST.matrix |
|
21 | 22 |
#anno <- sample(letters[1:5], size=nrow(testMatrix), replace=TRUE) |
22 | 23 |
#weights <- c(0,1,4,1,0) |
23 | 24 |
#names(weights) <- letters[1:5] |
... | ... |
@@ -29,7 +30,6 @@ test_that("Subsetting Data", |
29 | 30 |
#hist <- sapply(getSubsets(result), getHistogram, anno=anno) |
30 | 31 |
#freq <- unname(rowSums(hist) / sum(hist)) |
31 | 32 |
|
32 |
- # dumb test |
|
33 | 33 |
#expect_true(all.equal(freq, unname(weights / sum(weights)), tol=0.1)) |
34 | 34 |
|
35 | 35 |
# running cogaps with given subsets |
... | ... |
@@ -37,9 +37,11 @@ test_that("Subsetting Data", |
37 | 37 |
mat <- GIST.matrix |
38 | 38 |
rownames(mat) <- NULL |
39 | 39 |
colnames(mat) <- NULL |
40 |
- result <- GWCoGAPS(mat, nPatterns=7, explicitSets=sets, nIterations=100, |
|
41 |
- messages=FALSE, whichMatrixFixed="P", |
|
42 |
- fixedPatterns=matrix(1,nrow=ncol(mat), ncol=7)) |
|
40 |
+ nPatterns <- ncol(getSampleFactors(GIST.result)) |
|
41 |
+ params <- CogapsParams() |
|
42 |
+ params <- setFixedPatterns(params, getSampleFactors(GIST.result), "P") |
|
43 |
+ result <- GWCoGAPS(mat, params, nPatterns=nPatterns, explicitSets=sets, |
|
44 |
+ nIterations=100, messages=FALSE, seed=42) |
|
43 | 45 |
subsets <- lapply(getSubsets(result), getIndices) |
44 | 46 |
expect_true(all(sapply(1:4, function(i) all.equal(sets[[i]], subsets[[i]])))) |
45 | 47 |
}) |
46 | 48 |
\ No newline at end of file |
... | ... |
@@ -98,16 +98,16 @@ test_that("Valid Top-Level CoGAPS Calls", |
98 | 98 |
|
99 | 99 |
# single-cell CoGAPS |
100 | 100 |
expect_error(res <- CoGAPS(testDataFrame, nIterations=100, |
101 |
- outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", singleCell=TRUE, |
|
102 |
- transposeData=TRUE), NA) |
|
101 |
+ outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", |
|
102 |
+ singleCell=TRUE, transposeData=TRUE), NA) |
|
103 | 103 |
expect_true(no_na_in_result(res)) |
104 | 104 |
|
105 | 105 |
expect_equal(nrow(res@featureLoadings), 9) |
106 | 106 |
expect_equal(nrow(res@sampleFactors), 1363) |
107 | 107 |
|
108 | 108 |
expect_error(res <- CoGAPS(gistTsvPath, nIterations=100, |
109 |
- outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", singleCell=TRUE, |
|
110 |
- transposeData=TRUE), NA) |
|
109 |
+ outputFrequency=50, seed=1, messages=FALSE, distributed="single-cell", |
|
110 |
+ singleCell=TRUE, transposeData=TRUE), NA) |
|
111 | 111 |
expect_true(no_na_in_result(res)) |
112 | 112 |
|
113 | 113 |
expect_equal(nrow(res@featureLoadings), 9) |
... | ... |
@@ -165,31 +165,32 @@ test_that("Valid Top-Level CoGAPS Calls", |
165 | 165 |
nPat <- 3 |
166 | 166 |
fixedA <- matrix(runif(nrow(testMatrix) * nPat, 1, 10), ncol=nPat) |
167 | 167 |
fixedP <- matrix(runif(ncol(testMatrix) * nPat, 1, 10), ncol=nPat) |
168 |
- res <- CoGAPS(testMatrix, nIterations=100, outputFrequency=100, seed=42, |
|
169 |
- messages=FALSE, nPatterns=nPat, whichMatrixFixed="A", |
|
170 |
- fixedPatterns=fixedA) |
|
168 |
+ params <- CogapsParams() |
|
169 |
+ params <- setFixedPatterns(params, fixedA, "A") |
|
170 |
+ res <- CoGAPS(testMatrix, params, nIterations=100, outputFrequency=100, |
|
171 |
+ seed=42, messages=FALSE, nPatterns=nPat) |
|
171 | 172 |
|
172 | 173 |
expect_true(all(dim(res@featureLoadings) == dim(fixedA))) |
173 | 174 |
for (i in 1:ncol(fixedA)) |
174 | 175 |
fixedA[,i] <- fixedA[,i] * (res@featureLoadings[1,i] / fixedA[1,i]) |
175 | 176 |
all.equal(unname(res@featureLoadings), fixedA, tolerance=0.001) |
176 | 177 |
|
177 |
- res <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=100, seed=42, |
|
178 |
- messages=FALSE, nPatterns=nPat, whichMatrixFixed="P", |
|
179 |
- fixedPatterns=fixedP) |
|
178 |
+ params <- CogapsParams() |
|
179 |
+ params <- setFixedPatterns(params, fixedP, "P") |
|
180 |
+ res <- CoGAPS(gistCsvPath, params, nIterations=100, outputFrequency=100, seed=42, |
|
181 |
+ messages=FALSE, nPatterns=nPat) |
|
180 | 182 |
|
181 | 183 |
expect_true(all(dim(res@sampleFactors) == dim(fixedP))) |
182 | 184 |
for (i in 1:ncol(fixedP)) |
183 | 185 |
fixedP[,i] <- fixedP[,i] * (res@sampleFactors[1,i] / fixedP[1,i]) |
184 | 186 |
all.equal(unname(res@sampleFactors), fixedP, tolerance=0.001) |
185 | 187 |
|
186 |
- |
|
187 | 188 |
# make sure that "none" gets converted to NULL for distributed |
188 | 189 |
res <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=100, seed=42, |
189 | 190 |
messages=FALSE, nPatterns=3, distributed="none") |
190 | 191 |
expect_true(is.null(res@metadata$params@distributed)) |
191 | 192 |
|
192 |
- params <- new("CogapsParams") |
|
193 |
+ params <- CogapsParams() |
|
193 | 194 |
params <- setParam(params, "distributed", "none") |
194 | 195 |
res <- CoGAPS(gistCsvPath, params, nIterations=100, outputFrequency=100, seed=42, |
195 | 196 |
messages=FALSE, nPatterns=3) |
... | ... |
@@ -434,7 +434,9 @@ result <- GWCoGAPS(GIST.matrix, messages=FALSE) |
434 | 434 |
consensusMatrix <- getUnmatchedPatterns(result)[[1]] |
435 | 435 |
|
436 | 436 |
# run with our custom matched patterns matrix |
437 |
-GWCoGAPS(GIST.matrix, fixedPatterns=consensusMatrix, explicitSets=getSubsets(result), whichMatrixFixed='P') |
|
437 |
+params <- CogapsParams() |
|
438 |
+params <- setFixedPatterns(params, consensusMatrix, 'P') |
|
439 |
+GWCoGAPS(GIST.matrix, params, explicitSets=getSubsets(result)) |
|
438 | 440 |
``` |
439 | 441 |
|
440 | 442 |
# sessionInfo() |
441 | 443 |
deleted file mode 100755 |
... | ... |
@@ -1,71 +0,0 @@ |
1 |
-## Pattern Markers |
|
2 |
- |
|
3 |
-## Selecting Appropiate Number of Patterns |
|
4 |
- |
|
5 |
-Selecting the best value for *nPatterns* is the most difficult part of the |
|
6 |
-analysis. For starters, there is not one "best" value for the number of |
|
7 |
-patterns - various numbers of patterns can capture various levels of |
|
8 |
-granularity in the data. To further complicate the problem, there's not a |
|
9 |
-clear way to compare runs for different numbers of patterns. |
|
10 |
- |
|
11 |
-Here we show the simplest approach of selecting dimensionality by plotting the |
|
12 |