... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
Package: CoGAPS |
2 |
-Version: 3.3.5 |
|
2 |
+Version: 3.3.6 |
|
3 | 3 |
Date: 2018-04-24 |
4 | 4 |
Title: Coordinated Gene Activity in Pattern Sets |
5 | 5 |
Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey, |
... | ... |
@@ -49,8 +49,10 @@ Collate: |
49 | 49 |
'class-CogapsParams.R' |
50 | 50 |
'CoGAPS.R' |
51 | 51 |
'DistributedCogaps.R' |
52 |
+ 'HelperFunctions.R' |
|
52 | 53 |
'Package.R' |
53 | 54 |
'RcppExports.R' |
55 |
+ 'SubsetData.R' |
|
54 | 56 |
'class-CogapsResult.R' |
55 | 57 |
'methods-CogapsParams.R' |
56 | 58 |
'methods-CogapsResult.R' |
... | ... |
@@ -9,15 +9,20 @@ export(calcCoGAPSStat) |
9 | 9 |
export(calcGeneGSStat) |
10 | 10 |
export(calcZ) |
11 | 11 |
export(computeGeneGSProb) |
12 |
+export(getClusteredPatterns) |
|
13 |
+export(getCorrelationToMeanPattern) |
|
12 | 14 |
export(getMeanChiSq) |
13 | 15 |
export(getOriginalParameters) |
14 | 16 |
export(getParam) |
17 |
+export(getSubsets) |
|
18 |
+export(getUnmatchedPatterns) |
|
15 | 19 |
export(getVersion) |
16 | 20 |
export(patternMarkers) |
17 | 21 |
export(plotPatternMarkers) |
18 | 22 |
export(plotResiduals) |
19 | 23 |
export(reconstructGene) |
20 | 24 |
export(scCoGAPS) |
25 |
+export(setAnnotationWeights) |
|
21 | 26 |
export(setDistributedParams) |
22 | 27 |
export(setParam) |
23 | 28 |
exportClasses(CogapsParams) |
... | ... |
@@ -45,6 +50,7 @@ importFrom(methods,"slot<-") |
45 | 50 |
importFrom(methods,callNextMethod) |
46 | 51 |
importFrom(methods,is) |
47 | 52 |
importFrom(methods,new) |
53 |
+importFrom(methods,show) |
|
48 | 54 |
importFrom(methods,slot) |
49 | 55 |
importFrom(methods,slotNames) |
50 | 56 |
importFrom(methods,validObject) |
... | ... |
@@ -1,51 +1,17 @@ |
1 | 1 |
#' @include class-CogapsParams.R |
2 | 2 |
NULL |
3 | 3 |
|
4 |
-#' Checks if file is supported |
|
5 |
-#' @param file path to file |
|
6 |
-#' @return TRUE if file is supported, FALSE if not |
|
7 |
-#' @importFrom tools file_ext |
|
8 |
-supported <- function(file) |
|
9 |
-{ |
|
10 |
- if (!is(file, "character")) |
|
11 |
- return(FALSE) |
|
12 |
- return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
13 |
-} |
|
14 |
- |
|
15 |
-#' get number of rows from supported file name or matrix |
|
16 |
-#' @param data either a file name or a matrix |
|
17 |
-#' @return number of rows |
|
18 |
-#' @importFrom data.table fread |
|
19 |
-#' @importFrom tools file_ext |
|
20 |
-nrow_helper <- function(data) |
|
21 |
-{ |
|
22 |
- if (is(data, "character")) |
|
23 |
- { |
|
24 |
- return(switch(tools::file_ext(data), |
|
25 |
- "csv" = nrow(data.table::fread(data, select=1)), |
|
26 |
- "tsv" = nrow(data.table::fread(data, select=1)), |
|
27 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
28 |
- )) |
|
29 |
- } |
|
30 |
- return(nrow(data)) |
|
31 |
-} |
|
32 |
- |
|
33 |
-#' get number of columns from supported file name or matrix |
|
34 |
-#' @param data either a file name or a matrix |
|
35 |
-#' @return number of columns |
|
36 |
-#' @importFrom data.table fread |
|
37 |
-#' @importFrom tools file_ext |
|
38 |
-ncol_helper <- function(data) |
|
4 |
+#' Information About Package Compilation |
|
5 |
+#' @export |
|
6 |
+#' |
|
7 |
+#' @details returns information about how the package was compiled, i.e. which |
|
8 |
+#' compiler/version was used, which compile time options were enabled, etc... |
|
9 |
+#' @return string containing build report |
|
10 |
+#' @examples |
|
11 |
+#' CoGAPS::buildReport() |
|
12 |
+buildReport <- function() |
|
39 | 13 |
{ |
40 |
- if (is(data, "character")) |
|
41 |
- { |
|
42 |
- return(switch(tools::file_ext(data), |
|
43 |
- "csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
44 |
- "tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
45 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
46 |
- )) |
|
47 |
- } |
|
48 |
- return(ncol(data)) |
|
14 |
+ getBuildReport_cpp() |
|
49 | 15 |
} |
50 | 16 |
|
51 | 17 |
#' CoGAPS Matrix Factorization Algorithm |
... | ... |
@@ -73,6 +39,11 @@ ncol_helper <- function(data) |
73 | 39 |
#' for data that is stored as samples x genes since CoGAPS requires data to be |
74 | 40 |
#' genes x samples |
75 | 41 |
#' @param BPPARAM BiocParallel backend |
42 |
+#' @param geneNames vector of names of genes in data |
|
43 |
+#' @param sampleNames vector of names of samples in data |
|
44 |
+#' @param matchedPatterns manually matched patterns for distributed CoGAPS |
|
45 |
+#' @param outputToFile name of a file to save the output to, will create 4 files |
|
46 |
+#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension" |
|
76 | 47 |
#' @param ... allows for overwriting parameters in params |
77 | 48 |
#' @return CogapsResult object |
78 | 49 |
#' @examples |
... | ... |
@@ -94,9 +65,12 @@ ncol_helper <- function(data) |
94 | 65 |
CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
95 | 66 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
96 | 67 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
97 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
68 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
69 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, |
|
70 |
+outputToFile=NULL, ...) |
|
98 | 71 |
{ |
99 | 72 |
# store all parameters in a list and parse parameters from ... |
73 |
+ validObject(params) |
|
100 | 74 |
allParams <- list("gaps"=params, |
101 | 75 |
"nThreads"=nThreads, |
102 | 76 |
"messages"=messages, |
... | ... |
@@ -106,12 +80,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
106 | 80 |
"checkpointInFile"=checkpointInFile, |
107 | 81 |
"transposeData"=transposeData, |
108 | 82 |
"bpBackend"=BPPARAM, |
83 |
+ "matchedPatterns"=matchedPatterns, |
|
84 |
+ "outputToFile"=outputToFile, |
|
109 | 85 |
"whichMatrixFixed"=NULL # internal parameter |
110 | 86 |
) |
111 | 87 |
allParams <- parseExtraParams(allParams, list(...)) |
112 | 88 |
|
113 | 89 |
# display start up message for the user |
114 |
- startupMessage(data, allParams$transposeData, allParams$gaps@distributed) |
|
90 |
+ startupMessage(data, allParams) |
|
115 | 91 |
|
116 | 92 |
# check file extension |
117 | 93 |
if (is(data, "character") & !supported(data)) |
... | ... |
@@ -125,7 +101,7 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
125 | 101 |
if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix")) |
126 | 102 |
stop("uncertainty must be a matrix unless data is a file path") |
127 | 103 |
if (!is(data, "character")) |
128 |
- checkDataMatrix(data, uncertainty, params) |
|
104 |
+ checkDataMatrix(data, uncertainty, allParams$gaps) |
|
129 | 105 |
|
130 | 106 |
# check single cell parameter |
131 | 107 |
if (!is.null(allParams$gaps@distributed)) |
... | ... |
@@ -136,16 +112,15 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
136 | 112 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
137 | 113 |
|
138 | 114 |
# convert data to matrix |
115 |
+ if (is(data, "matrix")) |
|
116 |
+ data <- data |
|
139 | 117 |
if (is(data, "data.frame")) |
140 | 118 |
data <- data.matrix(data) |
141 | 119 |
else if (is(data, "SummarizedExperiment")) |
142 | 120 |
data <- SummarizedExperiment::assay(data, "counts") |
143 | 121 |
else if (is(data, "SingleCellExperiment")) |
144 | 122 |
data <- SummarizedExperiment::assay(data, "counts") |
145 |
- |
|
146 |
- # label matrix |
|
147 |
- |
|
148 |
- |
|
123 |
+ |
|
149 | 124 |
# determine which function to call cogaps algorithm |
150 | 125 |
if (!is.null(allParams$gaps@distributed)) |
151 | 126 |
dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps |
... | ... |
@@ -160,9 +135,24 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
160 | 135 |
if (!is.null(allParams$gaps@distributed)) |
161 | 136 |
stop("checkpoints not supported for distributed cogaps") |
162 | 137 |
else |
163 |
- message("Running CoGAPS from a checkpoint") |
|
138 |
+ cat("Running CoGAPS from a checkpoint") |
|
164 | 139 |
} |
165 | 140 |
|
141 |
+ # get gene/sample names |
|
142 |
+ if (is.null(geneNames)) geneNames <- getGeneNames(data, allParams$transposeData) |
|
143 |
+ if (is.null(sampleNames)) sampleNames <- getSampleNames(data, allParams$transposeData) |
|
144 |
+ |
|
145 |
+ nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
146 |
+ nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
147 |
+ |
|
148 |
+ if (length(geneNames) != nGenes) |
|
149 |
+ stop("incorrect number of gene names given") |
|
150 |
+ if (length(sampleNames) != nSamples) |
|
151 |
+ stop("incorrect number of sample names given") |
|
152 |
+ |
|
153 |
+ allParams$geneNames <- geneNames |
|
154 |
+ allParams$sampleNames <- sampleNames |
|
155 |
+ |
|
166 | 156 |
# run cogaps |
167 | 157 |
gapsReturnList <- dispatchFunc(data, allParams, uncertainty) |
168 | 158 |
|
... | ... |
@@ -172,8 +162,9 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
172 | 162 |
Asd = gapsReturnList$Asd, |
173 | 163 |
Pmean = gapsReturnList$Pmean, |
174 | 164 |
Psd = gapsReturnList$Psd, |
175 |
- seed = gapsReturnList$seed, |
|
176 | 165 |
meanChiSq = gapsReturnList$meanChiSq, |
166 |
+ geneNames = gapsReturnList$geneNames, |
|
167 |
+ sampleNames = gapsReturnList$sampleNames, |
|
177 | 168 |
diagnostics = append(gapsReturnList$diagnostics, |
178 | 169 |
list("params"=allParams$gaps, "version"=utils::packageVersion("CoGAPS"))) |
179 | 170 |
)) |
... | ... |
@@ -189,13 +180,14 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
189 | 180 |
scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
190 | 181 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
191 | 182 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
192 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
183 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
184 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
193 | 185 |
{ |
194 | 186 |
params@distributed <- "single-cell" |
195 | 187 |
params@singleCell <- TRUE |
196 | 188 |
CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
197 | 189 |
checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
198 |
- BPPARAM, ...) |
|
190 |
+ BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
199 | 191 |
} |
200 | 192 |
|
201 | 193 |
#' Genome Wide CoGAPS |
... | ... |
@@ -208,83 +200,11 @@ checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
208 | 200 |
GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, |
209 | 201 |
messages=TRUE, outputFrequency=500, uncertainty=NULL, |
210 | 202 |
checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000, |
211 |
-checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, ...) |
|
203 |
+checkpointInFile=NULL, transposeData=FALSE, BPPARAM=NULL, |
|
204 |
+geneNames=NULL, sampleNames=NULL, matchedPatterns=NULL, ...) |
|
212 | 205 |
{ |
213 | 206 |
params@distributed <- "genome-wide" |
214 | 207 |
CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty, |
215 | 208 |
checkpointOutFile, checkpointInterval, checkpointInFile, transposeData, |
216 |
- BPPARAM, ...) |
|
217 |
-} |
|
218 |
- |
|
219 |
-#' write start up message |
|
220 |
-#' |
|
221 |
-#' @param data data set |
|
222 |
-#' @param transpose if we are transposing the data set |
|
223 |
-#' @param distributed if we are running distributed CoGAPS |
|
224 |
-#' @return message displayed to screen |
|
225 |
-startupMessage <- function(data, transpose, distributed) |
|
226 |
-{ |
|
227 |
- nGenes <- ifelse(transpose, ncol_helper(data), nrow_helper(data)) |
|
228 |
- nSamples <- ifelse(transpose, nrow_helper(data), ncol_helper(data)) |
|
229 |
- |
|
230 |
- dist_message <- "Standard" |
|
231 |
- if (!is.null(distributed)) |
|
232 |
- dist_message <- distributed |
|
233 |
- message(paste("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
|
234 |
- nSamples, "samples")) |
|
235 |
-} |
|
236 |
- |
|
237 |
-#' parse parameters passed through the ... variable |
|
238 |
-#' |
|
239 |
-#' @param allParams list of all parameters |
|
240 |
-#' @param extraParams list of parameters in ... |
|
241 |
-#' @return allParams with any valid parameters in extraParams added |
|
242 |
-#' @note will halt with an error if any parameters in extraParams are invalid |
|
243 |
-#' @importFrom methods slotNames |
|
244 |
-parseExtraParams <- function(allParams, extraParams) |
|
245 |
-{ |
|
246 |
- # parse direct params |
|
247 |
- for (s in slotNames(allParams$gaps)) |
|
248 |
- { |
|
249 |
- if (!is.null(extraParams[[s]])) |
|
250 |
- { |
|
251 |
- allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]]) |
|
252 |
- extraParams[[s]] <- NULL |
|
253 |
- } |
|
254 |
- } |
|
255 |
- |
|
256 |
- # check for unrecognized options |
|
257 |
- if (length(extraParams) > 0) |
|
258 |
- stop(paste("unrecognized argument:", names(extraParams)[1])) |
|
259 |
- |
|
260 |
- return(allParams) |
|
261 |
-} |
|
262 |
- |
|
263 |
-#' check that provided data is valid |
|
264 |
-#' |
|
265 |
-#' @param data data matrix |
|
266 |
-#' @param uncertainty uncertainty matrix, can be null |
|
267 |
-#' @param params CogapsParams object |
|
268 |
-#' @return throws an error if data has problems |
|
269 |
-checkDataMatrix <- function(data, uncertainty, params) |
|
270 |
-{ |
|
271 |
- if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0) |
|
272 |
- stop("negative values in data and/or uncertainty matrix") |
|
273 |
- if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns) |
|
274 |
- stop("nPatterns must be less than dimensions of data") |
|
275 |
- if (sum(uncertainty < 1e-5) > 0) |
|
276 |
- warning("small values in uncertainty matrix detected") |
|
277 |
-} |
|
278 |
- |
|
279 |
-#' Information About Package Compilation |
|
280 |
-#' @export |
|
281 |
-#' |
|
282 |
-#' @details returns information about how the package was compiled, i.e. which |
|
283 |
-#' compiler/version was used, which compile time options were enabled, etc... |
|
284 |
-#' @return string containing build report |
|
285 |
-#' @examples |
|
286 |
-#' CoGAPS::buildReport() |
|
287 |
-buildReport <- function() |
|
288 |
-{ |
|
289 |
- getBuildReport_cpp() |
|
290 |
-} |
|
209 |
+ BPPARAM, geneNames, sampleNames, matchedPatterns, ...) |
|
210 |
+} |
|
291 | 211 |
\ No newline at end of file |
... | ... |
@@ -1,4 +1,5 @@ |
1 | 1 |
#' CoGAPS Distributed Matrix Factorization Algorithm |
2 |
+#' @keywords internal |
|
2 | 3 |
#' |
3 | 4 |
#' @description runs CoGAPS over subsets of the data and stitches the results |
4 | 5 |
#' back together |
... | ... |
@@ -10,13 +11,20 @@ |
10 | 11 |
#' @importFrom BiocParallel bplapply MulticoreParam |
11 | 12 |
distributedCogaps <- function(data, allParams, uncertainty) |
12 | 13 |
{ |
13 |
- FUN <- function(index, sets, data, allParams, uncertainty, fixedMatrix=NULL) |
|
14 |
+ FUN <- function(index, sets, data, allParams, uncertainty, geneNames, |
|
15 |
+ sampleNames, fixedMatrix=NULL) |
|
14 | 16 |
{ |
17 |
+ if (allParams$gaps@distributed == "genome-wide") |
|
18 |
+ geneNames <- geneNames[sets[[index]]] |
|
19 |
+ else |
|
20 |
+ sampleNames <- sampleNames[sets[[index]]] |
|
21 |
+ |
|
15 | 22 |
internal <- ifelse(is(data, "character"), cogaps_cpp_from_file, cogaps_cpp) |
16 | 23 |
raw <- internal(data, allParams, uncertainty, sets[[index]], |
17 | 24 |
fixedMatrix, index == 1) |
18 | 25 |
new("CogapsResult", Amean=raw$Amean, Asd=raw$Asd, Pmean=raw$Pmean, |
19 |
- Psd=raw$Psd, seed=raw$seed, meanChiSq=raw$meanChiSq) |
|
26 |
+ Psd=raw$Psd, meanChiSq=raw$meanChiSq, geneNames=geneNames, |
|
27 |
+ sampleNames=sampleNames) |
|
20 | 28 |
} |
21 | 29 |
|
22 | 30 |
# randomly sample either rows or columns into subsets to break the data up |
... | ... |
@@ -25,71 +33,78 @@ distributedCogaps <- function(data, allParams, uncertainty) |
25 | 33 |
if (is.null(allParams$bpBackend)) |
26 | 34 |
allParams$bpBackend <- BiocParallel::MulticoreParam(workers=length(sets)) |
27 | 35 |
|
28 |
- # run Cogaps normally on each subset of the data |
|
29 |
- if (allParams$messages) |
|
30 |
- message("Running Across Subsets...") |
|
31 |
- initialResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend, |
|
32 |
- sets=sets, data=data, allParams=allParams, uncertainty=uncertainty) |
|
33 |
- |
|
34 |
- # get all unmatched patterns |
|
35 |
- if (allParams$gaps@distributed == "genome-wide") |
|
36 |
- unmatchedPatterns <- lapply(initialResult, function(x) x@sampleFactors) |
|
36 |
+ if (is.null(allParams$matchedPatterns)) |
|
37 |
+ { |
|
38 |
+ # run Cogaps normally on each subset of the data |
|
39 |
+ if (allParams$messages) |
|
40 |
+ cat("Running Across Subsets...\n\n") |
|
41 |
+ initialResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend, |
|
42 |
+ sets=sets, data=data, allParams=allParams, uncertainty=uncertainty, |
|
43 |
+ geneNames=allParams$geneNames, sampleNames=allParams$sampleNames) |
|
44 |
+ |
|
45 |
+ # get all unmatched patterns |
|
46 |
+ if (allParams$gaps@distributed == "genome-wide") |
|
47 |
+ unmatchedPatterns <- lapply(initialResult, function(x) x@sampleFactors) |
|
48 |
+ else |
|
49 |
+ unmatchedPatterns <- lapply(initialResult, function(x) x@featureLoadings) |
|
50 |
+ |
|
51 |
+ # match patterns in either A or P matrix |
|
52 |
+ if (allParams$messages) |
|
53 |
+ cat("\nMatching Patterns Across Subsets...\n") |
|
54 |
+ matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams) |
|
55 |
+ allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus) |
|
56 |
+ |
|
57 |
+ # set fixed matrix |
|
58 |
+ allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed |
|
59 |
+ == "genome-wide", "P", "A") |
|
60 |
+ } |
|
37 | 61 |
else |
38 |
- unmatchedPatterns <- lapply(initialResult, function(x) x@featureLoadings) |
|
39 |
- |
|
40 |
- # match patterns in either A or P matrix |
|
41 |
- if (allParams$messages) |
|
42 |
- message("Matching Patterns Across Subsets...") |
|
43 |
- matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams) |
|
44 |
- allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus) |
|
45 |
- |
|
46 |
- # set fixed matrix |
|
47 |
- allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed |
|
48 |
- == "genome-wide", "P", "A") |
|
49 |
- |
|
62 |
+ { |
|
63 |
+ matchedPatterns <- list(consensus=allParams$matchedPatterns) |
|
64 |
+ allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus) |
|
65 |
+ allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed |
|
66 |
+ == "genome-wide", "P", "A") |
|
67 |
+ } |
|
68 |
+ |
|
50 | 69 |
# run final phase with fixed matrix |
51 | 70 |
if (allParams$messages) |
52 |
- message("Running Final Stage...") |
|
71 |
+ cat("Running Final Stage...\n\n") |
|
53 | 72 |
finalResult <- bplapply(1:length(sets), FUN, BPPARAM=allParams$bpBackend, |
54 | 73 |
sets=sets, data=data, allParams=allParams, uncertainty=uncertainty, |
74 |
+ geneNames=allParams$geneNames, sampleNames=allParams$sampleNames, |
|
55 | 75 |
fixedMatrix=matchedPatterns$consensus) |
56 | 76 |
|
57 | 77 |
# concatenate final result |
58 | 78 |
fullResult <- stitchTogether(finalResult, allParams) |
59 | 79 |
|
60 | 80 |
# add diagnostic information before returning |
61 |
- fullResult$diagnostics$unmatchedPatterns <- unmatchedPatterns |
|
62 |
- fullResult$diagnostics$clusteredPatterns <- matchedPatterns$clusteredPatterns |
|
63 |
- fullResult$diagnostics$RtoMeanPattern <- lapply(matchedPatterns$clusteredPatterns, correlationToMeanPattern) |
|
64 |
- fullResult$diagnostics$subsets <- sets |
|
65 |
- return(fullResult) |
|
66 |
-} |
|
67 |
- |
|
68 |
-#' partition genes/samples into subsets |
|
69 |
-#' @description either genes or samples or partitioned depending on the type |
|
70 |
-#' of distributed CoGAPS (i.e. genome-wide or single-cell) |
|
71 |
-#' @param data either file name or matrix |
|
72 |
-#' @param allParams list of all CoGAPS parameters |
|
73 |
-#' @return list of sorted subsets of either genes or samples |
|
74 |
-createSets <- function(data, allParams) |
|
75 |
-{ |
|
76 |
- total <- ifelse(xor(allParams$transposeData, allParams$gaps@distributed == "genome-wide"), |
|
77 |
- nrow_helper(data), ncol_helper(data)) |
|
78 |
- setSize <- floor(total / allParams$gaps@nSets) |
|
79 |
- |
|
80 |
- sets <- list() |
|
81 |
- remaining <- 1:total |
|
82 |
- for (n in 1:(allParams$gaps@nSets - 1)) |
|
81 |
+ if (is.null(allParams$matchedPatterns)) |
|
83 | 82 |
{ |
84 |
- selected <- sample(remaining, setSize, replace=FALSE) |
|
85 |
- sets[[n]] <- sort(selected) |
|
86 |
- remaining <- setdiff(remaining, selected) |
|
83 |
+ fullResult$diagnostics$unmatchedPatterns <- unmatchedPatterns |
|
84 |
+ fullResult$diagnostics$clusteredPatterns <- matchedPatterns$clusteredPatterns |
|
85 |
+ fullResult$diagnostics$CorrToMeanPattern <- lapply(matchedPatterns$clusteredPatterns, corrToMeanPattern) |
|
87 | 86 |
} |
88 |
- sets[[allParams$gaps@nSets]] <- sort(remaining) |
|
89 |
- return(sets) |
|
87 |
+ |
|
88 |
+ if (allParams$gaps@distributed == "genome-wide") |
|
89 |
+ allNames <- allParams$geneNames |
|
90 |
+ else |
|
91 |
+ allNames <- allParams$sampleNames |
|
92 |
+ fullResult$diagnostics$subsets <- lapply(sets, function(s) allNames[s]) |
|
93 |
+ |
|
94 |
+ # rename genes/samples if dimension was subsetted incompletely |
|
95 |
+ allUsedIndices <- sort(unlist(sets)) |
|
96 |
+ if (allParams$gaps@distributed == "genome-wide") |
|
97 |
+ fullResult$geneNames <- allParams$geneNames[allUsedIndices] |
|
98 |
+ else |
|
99 |
+ fullResult$sampleNames <- allParams$sampleNames[allUsedIndices] |
|
100 |
+ |
|
101 |
+ return(fullResult) |
|
90 | 102 |
} |
91 | 103 |
|
104 |
+ |
|
92 | 105 |
#' find the consensus pattern matrix across all subsets |
106 |
+#' @keywords internal |
|
107 |
+#' |
|
93 | 108 |
#' @param unmatchedPatterns list of all unmatched pattern matrices from initial |
94 | 109 |
#' run of CoGAPS |
95 | 110 |
#' @param allParams list of all CoGAPS parameters |
... | ... |
@@ -103,73 +118,84 @@ findConsensusMatrix <- function(unmatchedPatterns, allParams) |
103 | 118 |
} |
104 | 119 |
|
105 | 120 |
#' Match Patterns Across Multiple Runs |
121 |
+#' @keywords internal |
|
122 |
+#' |
|
106 | 123 |
#' @param allPatterns matrix of patterns stored in the columns |
107 | 124 |
#' @param allParams list of all CoGAPS parameters |
108 | 125 |
#' @return a matrix of consensus patterns |
109 | 126 |
#' @importFrom stats weighted.mean |
110 | 127 |
patternMatch <- function(allPatterns, allParams) |
111 | 128 |
{ |
112 |
- PatsByClust <- corcut(allPatterns, allParams) |
|
129 |
+ # cluster patterns |
|
130 |
+ clusters <- corcut(allPatterns, allParams$gaps@cut, allParams$gaps@minNS) |
|
113 | 131 |
|
114 |
- # split by maxNS |
|
115 |
- indx <- which(sapply(PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS)) |
|
116 |
- while (length(indx) > 0) |
|
117 |
- { |
|
118 |
- allParams$gaps@cut <- 2 |
|
119 |
- internalPatsByClust <- corcut(PatsByClust[[indx[1]]], allParams) |
|
120 |
- |
|
121 |
- PatsByClust[[indx[1]]] <- internalPatsByClust[[1]] |
|
122 |
- if (length(internalPatsByClust) > 1) |
|
123 |
- { |
|
124 |
- PatsByClust <- append(PatsByClust, internalPatsByClust[2]) |
|
125 |
- } |
|
126 |
- indx <- which(sapply(PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS)) |
|
132 |
+ # function to split a cluster in two (might fail to do so) |
|
133 |
+ splitCluster <- function(list, index, minNS) |
|
134 |
+ { |
|
135 |
+ split <- corcut(list[[index]], 2, minNS) |
|
136 |
+ list[[index]] <- split[[1]] |
|
137 |
+ if (length(split) > 1) |
|
138 |
+ list <- append(list, split[2]) |
|
139 |
+ return(list) |
|
127 | 140 |
} |
128 | 141 |
|
129 |
- # create matrix of mean patterns - weighted by coefficient of determination |
|
130 |
- PatsByCDSWavg <- sapply(PatsByClust, function(clust) |
|
131 |
- apply(clust, 1, function(row) weighted.mean(row, correlationToMeanPattern(clust)^3))) |
|
132 |
- colnames(PatsByCDSWavg) <- paste("Pattern", 1:length(PatsByClust)) |
|
142 |
+ # split large clusters into two |
|
143 |
+ tooLarge <- function(x) ncol(x) > allParams$gaps@maxNS |
|
144 |
+ indx <- which(sapply(clusters, tooLarge)) |
|
145 |
+ while (length(indx) > 0) |
|
146 |
+ { |
|
147 |
+ clusters <- splitCluster(clusters, indx[1], allParams$gaps@minNS) |
|
148 |
+ indx <- which(sapply(clusters, tooLarge)) |
|
149 |
+ } |
|
150 |
+ names(clusters) <- as.character(1:length(clusters)) |
|
133 | 151 |
|
134 |
- # scale |
|
135 |
- return(list("clusteredPatterns"=PatsByClust, |
|
136 |
- "consensus"=apply(PatsByCDSWavg, 2, function(col) col / max(col)))) |
|
152 |
+ # create matrix of mean patterns - weighted by correlation to mean pattern |
|
153 |
+ meanPatterns <- sapply(clusters, function(clust) apply(clust, 1, |
|
154 |
+ function(row) weighted.mean(row, corrToMeanPattern(clust)^3))) |
|
155 |
+ colnames(meanPatterns) <- paste("Pattern", 1:length(clusters)) |
|
137 | 156 |
|
157 |
+ # returned patterns after scaling max to 1 |
|
158 |
+ return(list("clusteredPatterns"=clusters, |
|
159 |
+ "consensus"=apply(meanPatterns, 2, function(col) col / max(col)))) |
|
138 | 160 |
} |
139 | 161 |
|
140 |
-correlationToMeanPattern <- function(cluster) |
|
162 |
+#' calculate correlation of each pattern in a cluster to the cluster mean |
|
163 |
+#' @keywords internal |
|
164 |
+corrToMeanPattern <- function(cluster) |
|
141 | 165 |
{ |
142 | 166 |
meanPat <- rowMeans(cluster) |
143 | 167 |
sapply(1:ncol(cluster), function(j) round(cor(x=cluster[,j], y=meanPat), 3)) |
144 | 168 |
} |
145 | 169 |
|
146 | 170 |
#' cluster patterns together |
171 |
+#' @keywords internal |
|
172 |
+#' |
|
147 | 173 |
#' @param allPatterns matrix of all patterns across subsets |
148 |
-#' @param allParams list of all CoGAPS parameters |
|
174 |
+#' @param cut number of branches at which to cut dendrogram |
|
175 |
+#' @param minNS minimum of individual set contributions a cluster must contain |
|
149 | 176 |
#' @return patterns listed by which cluster they belong to |
150 | 177 |
#' @importFrom cluster agnes |
151 | 178 |
#' @importFrom stats cutree as.hclust cor |
152 |
-corcut <- function(allPatterns, allParams) |
|
179 |
+corcut <- function(allPatterns, cut, minNS) |
|
153 | 180 |
{ |
154 | 181 |
corr.dist <- cor(allPatterns) |
155 | 182 |
corr.dist <- 1 - corr.dist |
156 | 183 |
|
157 |
- clust <- cluster::agnes(x=corr.dist, diss=TRUE, "complete") |
|
158 |
- patternIds <- stats::cutree(stats::as.hclust(clust), k=allParams$gaps@cut) |
|
184 |
+ clusterSummary <- cluster::agnes(x=corr.dist, diss=TRUE, "complete") |
|
185 |
+ clusterIds <- stats::cutree(stats::as.hclust(clusterSummary), k=cut) |
|
159 | 186 |
|
160 |
- PatsByClust <- list() |
|
161 |
- for (cluster in unique(patternIds)) |
|
187 |
+ clusters <- list() |
|
188 |
+ for (id in unique(clusterIds)) |
|
162 | 189 |
{ |
163 |
- if (sum(patternIds==cluster) >= allParams$gaps@minNS) |
|
164 |
- { |
|
165 |
- clusterPats <- allPatterns[,patternIds==cluster] |
|
166 |
- PatsByClust[[as.character(cluster)]] <- clusterPats |
|
167 |
- } |
|
190 |
+ if (sum(clusterIds==id) >= minNS) |
|
191 |
+ clusters[[as.character(id)]] <- allPatterns[,clusterIds==id,drop=FALSE] |
|
168 | 192 |
} |
169 |
- return(PatsByClust) |
|
193 |
+ return(clusters) |
|
170 | 194 |
} |
171 | 195 |
|
172 | 196 |
#' concatenate final results across subsets |
197 |
+#' @keywords internal |
|
198 |
+#' |
|
173 | 199 |
#' @param result list of CogapsResult object from all runs across subsets |
174 | 200 |
#' @param allParams list of all CoGAPS parameters |
175 | 201 |
#' @return list with all CoGAPS output |
176 | 202 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,152 @@ |
1 |
+#' wrapper around cat |
|
2 |
+#' @keywords |
|
3 |
+#' |
|
4 |
+#' @description cleans up message printing |
|
5 |
+#' @param allParams all cogaps parameters |
|
6 |
+#' @param ... arguments forwarded to cat |
|
7 |
+#' @return displays text |
|
8 |
+gapsCat <- function(allParams, ...) |
|
9 |
+{ |
|
10 |
+ if (allParams$messages) |
|
11 |
+ cat(...) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+#' checks if file is supported |
|
15 |
+#' @keywords internal |
|
16 |
+#' |
|
17 |
+#' @param file path to file |
|
18 |
+#' @return TRUE if file is supported, FALSE if not |
|
19 |
+#' @importFrom tools file_ext |
|
20 |
+supported <- function(file) |
|
21 |
+{ |
|
22 |
+ if (!is(file, "character")) |
|
23 |
+ return(FALSE) |
|
24 |
+ return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
25 |
+} |
|
26 |
+ |
|
27 |
+#' get number of rows from supported file name or matrix |
|
28 |
+#' @keywords internal |
|
29 |
+#' |
|
30 |
+#' @param data either a file name or a matrix |
|
31 |
+#' @return number of rows |
|
32 |
+#' @importFrom data.table fread |
|
33 |
+#' @importFrom tools file_ext |
|
34 |
+nrowHelper <- function(data) |
|
35 |
+{ |
|
36 |
+ if (is(data, "character")) |
|
37 |
+ { |
|
38 |
+ return(switch(tools::file_ext(data), |
|
39 |
+ "csv" = nrow(data.table::fread(data, select=1)), |
|
40 |
+ "tsv" = nrow(data.table::fread(data, select=1)), |
|
41 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
42 |
+ )) |
|
43 |
+ } |
|
44 |
+ return(nrow(data)) |
|
45 |
+} |
|
46 |
+ |
|
47 |
+#' get number of columns from supported file name or matrix |
|
48 |
+#' @keywords internal |
|
49 |
+#' |
|
50 |
+#' @param data either a file name or a matrix |
|
51 |
+#' @return number of columns |
|
52 |
+#' @importFrom data.table fread |
|
53 |
+#' @importFrom tools file_ext |
|
54 |
+ncolHelper <- function(data) |
|
55 |
+{ |
|
56 |
+ if (is(data, "character")) |
|
57 |
+ { |
|
58 |
+ return(switch(tools::file_ext(data), |
|
59 |
+ "csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
60 |
+ "tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
61 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
62 |
+ )) |
|
63 |
+ } |
|
64 |
+ return(ncol(data)) |
|
65 |
+} |
|
66 |
+ |
|
67 |
+#' extract gene names from data |
|
68 |
+#' @keywords internal |
|
69 |
+getGeneNames <- function(data, transpose) |
|
70 |
+{ |
|
71 |
+ nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
|
72 |
+ return(paste("Gene", 1:nGenes, sep="_")) |
|
73 |
+} |
|
74 |
+ |
|
75 |
+#' extract sample names from data |
|
76 |
+#' @keywords internal |
|
77 |
+getSampleNames <- function(data, transpose) |
|
78 |
+{ |
|
79 |
+ nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
|
80 |
+ return(paste("Sample", 1:nSamples, sep="_")) |
|
81 |
+} |
|
82 |
+ |
|
83 |
+#' write start up message |
|
84 |
+#' @keywords internal |
|
85 |
+#' |
|
86 |
+#' @param data data set |
|
87 |
+#' @param allParams list of all parameters |
|
88 |
+#' @return message displayed to screen |
|
89 |
+#' @importFrom methods show |
|
90 |
+startupMessage <- function(data, allParams) |
|
91 |
+{ |
|
92 |
+ nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
93 |
+ nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
94 |
+ |
|
95 |
+ dist_message <- "Standard" |
|
96 |
+ if (!is.null(allParams$gaps@distributed)) |
|
97 |
+ dist_message <- allParams$gaps@distributed |
|
98 |
+ |
|
99 |
+ cat("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
|
100 |
+ nSamples, "samples") |
|
101 |
+ |
|
102 |
+ if (allParams$messages) |
|
103 |
+ { |
|
104 |
+ cat(" with parameters:\n\n") |
|
105 |
+ methods::show(allParams$gaps) |
|
106 |
+ } |
|
107 |
+ cat("\n") |
|
108 |
+} |
|
109 |
+ |
|
110 |
+#' parse parameters passed through the ... variable |
|
111 |
+#' @keywords internal |
|
112 |
+#' |
|
113 |
+#' @param allParams list of all parameters |
|
114 |
+#' @param extraParams list of parameters in ... |
|
115 |
+#' @return allParams with any valid parameters in extraParams added |
|
116 |
+#' @note will halt with an error if any parameters in extraParams are invalid |
|
117 |
+#' @importFrom methods slotNames |
|
118 |
+parseExtraParams <- function(allParams, extraParams) |
|
119 |
+{ |
|
120 |
+ # parse direct params |
|
121 |
+ for (s in slotNames(allParams$gaps)) |
|
122 |
+ { |
|
123 |
+ if (!is.null(extraParams[[s]])) |
|
124 |
+ { |
|
125 |
+ allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]]) |
|
126 |
+ extraParams[[s]] <- NULL |
|
127 |
+ } |
|
128 |
+ } |
|
129 |
+ |
|
130 |
+ # check for unrecognized options |
|
131 |
+ if (length(extraParams) > 0) |
|
132 |
+ stop(paste("unrecognized argument:", names(extraParams)[1])) |
|
133 |
+ |
|
134 |
+ return(allParams) |
|
135 |
+} |
|
136 |
+ |
|
137 |
+#' check that provided data is valid |
|
138 |
+#' @keywords internal |
|
139 |
+#' |
|
140 |
+#' @param data data matrix |
|
141 |
+#' @param uncertainty uncertainty matrix, can be null |
|
142 |
+#' @param params CogapsParams object |
|
143 |
+#' @return throws an error if data has problems |
|
144 |
+checkDataMatrix <- function(data, uncertainty, params) |
|
145 |
+{ |
|
146 |
+ if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0) |
|
147 |
+ stop("negative values in data and/or uncertainty matrix") |
|
148 |
+ if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns) |
|
149 |
+ stop("nPatterns must be less than dimensions of data") |
|
150 |
+ if (sum(uncertainty < 1e-5) > 0) |
|
151 |
+ warning("small values in uncertainty matrix detected") |
|
152 |
+} |
|
0 | 153 |
\ No newline at end of file |
1 | 154 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,95 @@ |
1 |
+#' use user provided subsets |
|
2 |
+#' @keywords internal |
|
3 |
+#' |
|
4 |
+#' @param allParams list of all CoGAPS parameters |
|
5 |
+#' @param total total number of rows (cols) that are being paritioned |
|
6 |
+#' @return list of subsets |
|
7 |
+sampleWithExplictSets <- function(allParams, total) |
|
8 |
+{ |
|
9 |
+ if (all(sapply(allParams$gaps@explicitSets, function(s) is(s, "numeric")))) |
|
10 |
+ { |
|
11 |
+ gapsCat(allParams, "using provided indexed subsets\n") |
|
12 |
+ return(allParams$gaps@explicitSets) |
|
13 |
+ } |
|
14 |
+ else if (all(sapply(allParams$gaps@explicitSets, function(s) is(s, "character")))) |
|
15 |
+ { |
|
16 |
+ gapsCat(allParams, "using provided named subsets\n") |
|
17 |
+ if (allParams$gaps@distributed == "genome-wide") |
|
18 |
+ allNames <- allParams$geneNames |
|
19 |
+ else |
|
20 |
+ allNames <- allParams$sampleNames |
|
21 |
+ return(lapply(allParams$gaps@explicitSets, function(set) which(allNames %in% set))) |
|
22 |
+ } |
|
23 |
+} |
|
24 |
+ |
|
25 |
+#' subset rows (cols) proportional to the user provided weights |
|
26 |
+#' @keywords internal |
|
27 |
+#' |
|
28 |
+#' @param allParams list of all CoGAPS parameters |
|
29 |
+#' @param setSize the size of each subset of the total |
|
30 |
+#' @return list of subsets |
|
31 |
+sampleWithAnnotationWeights <- function(allParams, setSize) |
|
32 |
+{ |
|
33 |
+ groups <- unique(allParams$gaps@samplingAnnotation) |
|
34 |
+ return(lapply(1:allParams$gaps@nSets, function(i) |
|
35 |
+ { |
|
36 |
+ groupCount <- sample(groups, size=setSize, replace=TRUE, |
|
37 |
+ prob=allParams$gaps@samplingWeight) |
|
38 |
+ sort(unlist(sapply(groups, function(g) |
|
39 |
+ { |
|
40 |
+ groupNdx <- which(allParams$gaps@samplingAnnotation == g) |
|
41 |
+ sample(groupNdx, size=sum(groupCount == g), replace=TRUE) |
|
42 |
+ }))) |
|
43 |
+ })) |
|
44 |
+} |
|
45 |
+ |
|
46 |
+#' subset data by uniformly partioning rows (cols) |
|
47 |
+#' @keywords internal |
|
48 |
+#' |
|
49 |
+#' @param allParams list of all CoGAPS parameters |
|
50 |
+#' @param total total number of rows (cols) that are being paritioned |
|
51 |
+#' @param setSize the size of each subset of the total |
|
52 |
+#' @return list of subsets |
|
53 |
+sampleUniformly <- function(allParams, total, setSize) |
|
54 |
+{ |
|
55 |
+ sets <- list() |
|
56 |
+ remaining <- 1:total |
|
57 |
+ for (n in 1:(allParams$gaps@nSets - 1)) |
|
58 |
+ { |
|
59 |
+ selected <- sample(remaining, setSize, replace=FALSE) |
|
60 |
+ sets[[n]] <- sort(selected) |
|
61 |
+ remaining <- setdiff(remaining, selected) |
|
62 |
+ } |
|
63 |
+ sets[[allParams$gaps@nSets]] <- sort(remaining) |
|
64 |
+ return(sets) |
|
65 |
+} |
|
66 |
+ |
|
67 |
+#' partition genes/samples into subsets |
|
68 |
+#' @keywords internal |
|
69 |
+#' |
|
70 |
+#' @description either genes or samples or partitioned depending on the type |
|
71 |
+#' of distributed CoGAPS (i.e. genome-wide or single-cell) |
|
72 |
+#' @param data either file name or matrix |
|
73 |
+#' @param allParams list of all CoGAPS parameters |
|
74 |
+#' @return list of sorted subsets of either genes or samples |
|
75 |
+createSets <- function(data, allParams) |
|
76 |
+{ |
|
77 |
+ subsetRows <- xor(allParams$transposeData, allParams$gaps@distributed == "genome-wide") |
|
78 |
+ total <- ifelse(subsetRows, nrowHelper(data), ncolHelper(data)) |
|
79 |
+ setSize <- floor(total / allParams$gaps@nSets) |
|
80 |
+ |
|
81 |
+ gapsCat(allParams, "Creating subsets...") |
|
82 |
+ |
|
83 |
+ if (!is.null(allParams$gaps@explicitSets)) |
|
84 |
+ { |
|
85 |
+ return(sampleWithExplictSets(allParams, total)) |
|
86 |
+ } |
|
87 |
+ |
|
88 |
+ if (!is.null(allParams$gaps@samplingAnnotation)) |
|
89 |
+ { |
|
90 |
+ gapsCat(allParams, "sampling with annotation weights\n") |
|
91 |
+ return(sampleWithAnnotationWeights(allParams, setSize)) |
|
92 |
+ } |
|
93 |
+ gapsCat(allParams, "\n") |
|
94 |
+ return(sampleUniformly(allParams, total, setSize)) |
|
95 |
+} |
... | ... |
@@ -20,6 +20,11 @@ |
20 | 20 |
#' a cluster must contain |
21 | 21 |
#' @slot maxNS [distributed parameter] maximum of individual set contributions |
22 | 22 |
#' a cluster can contain |
23 |
+#' @slot explicitSets [distributed parameter] specify subsets by index or name |
|
24 |
+#' @slot samplingAnnotation [distributed parameter] specify categories along |
|
25 |
+#' the rows (cols) to use for weighted sampling |
|
26 |
+#' @slot samplingWeight [distributed parameter] weights associated with |
|
27 |
+#' samplingAnnotation |
|
23 | 28 |
#' @importClassesFrom S4Vectors character_OR_NULL |
24 | 29 |
setClass("CogapsParams", slots = c( |
25 | 30 |
nPatterns = "numeric", |
... | ... |
@@ -34,7 +39,10 @@ setClass("CogapsParams", slots = c( |
34 | 39 |
nSets = "numeric", |
35 | 40 |
cut = "numeric", |
36 | 41 |
minNS = "numeric", |
37 |
- maxNS = "numeric" |
|
42 |
+ maxNS = "numeric", |
|
43 |
+ explicitSets = "ANY", |
|
44 |
+ samplingAnnotation = "character_OR_NULL", |
|
45 |
+ samplingWeight = "numeric" |
|
38 | 46 |
)) |
39 | 47 |
|
40 | 48 |
#' constructor for CogapsParams |
... | ... |
@@ -60,6 +68,9 @@ setMethod("initialize", "CogapsParams", |
60 | 68 |
.Object@nSets <- 4 |
61 | 69 |
.Object@minNS <- ceiling(.Object@nSets / 2) |
62 | 70 |
.Object@maxNS <- .Object@minNS + .Object@nSets |
71 |
+ .Object@explicitSets <- NULL |
|
72 |
+ .Object@samplingAnnotation <- NULL |
|
73 |
+ .Object@samplingWeight <- integer(0) |
|
63 | 74 |
|
64 | 75 |
.Object <- callNextMethod(.Object, ...) |
65 | 76 |
.Object |
... | ... |
@@ -84,6 +95,21 @@ setValidity("CogapsParams", |
84 | 95 |
"minNS must be an integer greater than one" |
85 | 96 |
if (object@nSets <= 1 | object@nSets %% 1 != 0) |
86 | 97 |
"minNS must be an integer greater than one" |
98 |
+ if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets) |
|
99 |
+ "nSets doesn't match length of explicitSets" |
|
100 |
+ if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight)) |
|
101 |
+ stop("samplingWeight has mismatched size with amount of distinct annotations") |
|
102 |
+ |
|
103 |
+ # check type of explicitSets |
|
104 |
+ if (!is.null(object@explicitSets) & !is(object@explicitSets, "list")) |
|
105 |
+ "explicitSets must be a list of numeric or character" |
|
106 |
+ isNum <- sapply(object@explicitSets, function(s) is(s, "numeric")) |
|
107 |
+ isChar <- sapply(object@explicitSets, function(s) is(s, "charcater")) |
|
108 |
+ if (!is.null(object@explicitSets) & !(all(isNum) | all(isChar))) |
|
109 |
+ "explicitSets must be a list of numeric or character" |
|
110 |
+ |
|
111 |
+ if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets) |
|
112 |
+ "wrong number of sets given" |
|
87 | 113 |
} |
88 | 114 |
) |
89 | 115 |
|
... | ... |
@@ -122,6 +148,22 @@ setGeneric("setDistributedParams", function(object, nSets, cut=NULL, |
122 | 148 |
minNS=NULL, maxNS=NULL) |
123 | 149 |
{standardGeneric("setDistributedParams")}) |
124 | 150 |
|
151 |
+#' set the annotation labels and weights for subsetting the data |
|
152 |
+#' @export |
|
153 |
+#' @docType methods |
|
154 |
+#' @rdname setAnnotationWeights-methods |
|
155 |
+#' |
|
156 |
+#' @description these parameters are interrelated so they must be set together |
|
157 |
+#' @param object an object of type CogapsParams |
|
158 |
+#' @param annotation vector of labels |
|
159 |
+#' @param weights vector of weights |
|
160 |
+#' @return the modified params object |
|
161 |
+#' @examples |
|
162 |
+#' params <- new("CogapsParams") |
|
163 |
+#' params <- setAnnotationWeights(params, c('a', 'b', 'c'), c(1,2,1)) |
|
164 |
+setGeneric("setAnnotationWeights", function(object, annotation, weights) |
|
165 |
+ {standardGeneric("setAnnotationWeights")}) |
|
166 |
+ |
|
125 | 167 |
#' get the value of a parameter |
126 | 168 |
#' @export |
127 | 169 |
#' @docType methods |
... | ... |
@@ -17,14 +17,16 @@ setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c( |
17 | 17 |
#' @param Pmean mean of sampled P matrices |
18 | 18 |
#' @param Asd std dev of sampled A matrices |
19 | 19 |
#' @param Psd std dev of sampled P matrices |
20 |
-#' @param seed random seed used for the run |
|
21 | 20 |
#' @param meanChiSq mean value of ChiSq statistic |
21 |
+#' @param geneNames names of genes in data |
|
22 |
+#' @param sampleNames names of samples in data |
|
22 | 23 |
#' @param diagnostics assorted diagnostic reports from the run |
23 | 24 |
#' @param ... initial values for slots |
24 | 25 |
#' @return initialized CogapsResult object |
25 | 26 |
#' @importFrom methods callNextMethod |
26 | 27 |
setMethod("initialize", "CogapsResult", |
27 |
-function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=NULL, ...) |
|
28 |
+function(.Object, Amean, Pmean, Asd, Psd, meanChiSq, geneNames, |
|
29 |
+sampleNames, diagnostics=NULL, ...) |
|
28 | 30 |
{ |
29 | 31 |
.Object@featureLoadings <- Amean |
30 | 32 |
.Object@sampleFactors <- Pmean |
... | ... |
@@ -32,12 +34,19 @@ function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=NULL, ... |
32 | 34 |
.Object@sampleStdDev <- Psd |
33 | 35 |
|
34 | 36 |
patternNames <- paste("Pattern", 1:ncol(Amean), sep="_") |
37 |
+ |
|
38 |
+ rownames(.Object@featureLoadings) <- geneNames |
|
35 | 39 |
colnames(.Object@featureLoadings) <- patternNames |
40 |
+ |
|
41 |
+ rownames(.Object@featureStdDev) <- geneNames |
|
36 | 42 |
colnames(.Object@featureStdDev) <- patternNames |
43 |
+ |
|
44 |
+ rownames(.Object@sampleFactors) <- sampleNames |
|
37 | 45 |
colnames(.Object@sampleFactors) <- patternNames |
46 |
+ |
|
47 |
+ rownames(.Object@sampleStdDev) <- sampleNames |
|
38 | 48 |
colnames(.Object@sampleStdDev) <- patternNames |
39 | 49 |
|
40 |
- .Object@metadata[["seed"]] <- seed |
|
41 | 50 |
.Object@metadata[["meanChiSq"]] <- meanChiSq |
42 | 51 |
.Object@metadata <- append(.Object@metadata, diagnostics) |
43 | 52 |
|
... | ... |
@@ -99,6 +108,62 @@ setGeneric("getVersion", function(object) |
99 | 108 |
setGeneric("getOriginalParameters", function(object) |
100 | 109 |
{standardGeneric("getOriginalParameters")}) |
101 | 110 |
|
111 |
+#' return unmatched patterns from each subset |
|
112 |
+#' @export |
|
113 |
+#' @docType methods |
|
114 |
+#' @rdname getUnmatchedPatterns-methods |
|
115 |
+#' |
|
116 |
+#' @param object an object of type CogapsResult |
|
117 |
+#' @return CogapsParams object |
|
118 |
+#' @examples |
|
119 |
+#' data(SimpSim) |
|
120 |
+#' result <- CoGAPS(SimpSim.data) |
|
121 |
+#' getUnmatchedPatterns(result) |
|
122 |
+setGeneric("getUnmatchedPatterns", function(object) |
|
123 |
+ {standardGeneric("getUnmatchedPatterns")}) |
|
124 |
+ |
|
125 |
+#' return clustered patterns from set of all patterns across all subsets |
|
126 |
+#' @export |
|
127 |
+#' @docType methods |
|
128 |
+#' @rdname getClusteredPatterns-methods |
|
129 |
+#' |
|
130 |
+#' @param object an object of type CogapsResult |
|
131 |
+#' @return CogapsParams object |
|
132 |
+#' @examples |
|
133 |
+#' data(SimpSim) |
|
134 |
+#' result <- CoGAPS(SimpSim.data) |
|
135 |
+#' getClusteredPatterns(result) |
|
136 |
+setGeneric("getClusteredPatterns", function(object) |
|
137 |
+ {standardGeneric("getClusteredPatterns")}) |
|
138 |
+ |
|
139 |
+#' return correlation between each pattern and the cluster mean |
|
140 |
+#' @export |
|
141 |
+#' @docType methods |
|
142 |
+#' @rdname getCorrelationToMeanPattern-methods |
|
143 |
+#' |
|
144 |
+#' @param object an object of type CogapsResult |
|
145 |
+#' @return CogapsParams object |
|
146 |
+#' @examples |
|
147 |
+#' data(SimpSim) |
|
148 |
+#' result <- CoGAPS(SimpSim.data) |
|
149 |
+#' getCorrelationToMeanPattern(result) |
|
150 |
+setGeneric("getCorrelationToMeanPattern", function(object) |
|
151 |
+ {standardGeneric("getCorrelationToMeanPattern")}) |
|
152 |
+ |
|
153 |
+#' return the names of the genes (samples) in each subset |
|
154 |
+#' @export |
|
155 |
+#' @docType methods |
|
156 |
+#' @rdname getSubsets-methods |
|
157 |
+#' |
|
158 |
+#' @param object an object of type CogapsResult |
|
159 |
+#' @return CogapsParams object |
|
160 |
+#' @examples |
|
161 |
+#' data(SimpSim) |
|
162 |
+#' result <- CoGAPS(SimpSim.data) |
|
163 |
+#' getSubsets(result) |
|
164 |
+setGeneric("getSubsets", function(object) |
|
165 |
+ {standardGeneric("getSubsets")}) |
|
166 |
+ |
|
102 | 167 |
#' compute z-score matrix |
103 | 168 |
#' @export |
104 | 169 |
#' @docType methods |
... | ... |
@@ -1,14 +1,13 @@ |
1 | 1 |
setMethod("show", signature("CogapsParams"), |
2 | 2 |
function(object) |
3 | 3 |
{ |
4 |
- cat("An Object of class \"CogapsParams\"\n") |
|
5 |
- cat("\n") |
|
6 | 4 |
cat("-- Standard Parameters --\n") |
7 | 5 |
cat("nPatterns ", object@nPatterns, "\n") |
8 | 6 |
cat("nIterations ", object@nIterations, "\n") |
9 | 7 |
cat("seed ", object@seed, "\n") |
10 | 8 |
cat("singleCell ", object@singleCell, "\n") |
11 |
- cat("distributed ", ifelse(is.null(object@distributed), FALSE, TRUE), "\n") |
|
9 |
+ if (!is.null(object@distributed)) |
|
10 |
+ cat("distributed ", object@distributed, "\n") |
|
12 | 11 |
cat("\n") |
13 | 12 |
cat("-- Sparsity Parameters --\n") |
14 | 13 |
if (object@alphaA == object@alphaP) |
... | ... |
@@ -61,6 +60,10 @@ function(object, whichParam, value) |
61 | 60 |
{ |
62 | 61 |
stop("please set this parameter with setDistributedParams") |
63 | 62 |
} |
63 |
+ else if (whichParam %in% c("samplingAnnotation", "samplingWeight")) |
|
64 |
+ { |
|
65 |
+ stop("please set this parameter with setAnnotationWeights") |
|
66 |
+ } |
|
64 | 67 |
else |
65 | 68 |
{ |
66 | 69 |
slot(object, whichParam) <- value |
... | ... |
@@ -75,6 +78,9 @@ function(object, whichParam, value) |
75 | 78 |
setMethod("setDistributedParams", signature(object="CogapsParams"), |
76 | 79 |
function(object, nSets, cut, minNS, maxNS) |
77 | 80 |
{ |
81 |
+ message("setting distributed parameters - call this again if you change ", |
|
82 |
+ "nPatterns") |
|
83 |
+ |
|
78 | 84 |
object@nSets <- nSets |
79 | 85 |
|
80 | 86 |
object@cut <- ifelse(is.null(cut), object@nPatterns, cut) |
... | ... |
@@ -85,6 +91,18 @@ function(object, nSets, cut, minNS, maxNS) |
85 | 91 |
return(object) |
86 | 92 |
}) |
87 | 93 |
|
94 |
+#' @rdname setAnnotationWeights-methods |
|
95 |
+#' @aliases setAnnotationWeights |
|
96 |
+setMethod("setAnnotationWeights", signature(object="CogapsParams"), |
|
97 |
+function(object, annotation, weights) |
|
98 |
+{ |
|
99 |
+ object@samplingAnnotation <- annotation |
|
100 |
+ object@samplingWeight <- weights |
|
101 |
+ |
|
102 |
+ validObject(object) |
|
103 |
+ return(object) |
|
104 |
+}) |
|
105 |
+ |
|
88 | 106 |
#' @rdname getParam-methods |
89 | 107 |
#' @aliases getParam |
90 | 108 |
setMethod("getParam", signature(object="CogapsParams"), |
... | ... |
@@ -47,7 +47,7 @@ function(object) |
47 | 47 |
setMethod("getVersion", signature(object="CogapsResult"), |
48 | 48 |
function(object) |
49 | 49 |
{ |
50 |
- object@metadata$diagnostics$version |
|
50 |
+ object@metadata$version |
|
51 | 51 |
}) |
52 | 52 |
|
53 | 53 |
#' @rdname getOriginalParameters-methods |
... | ... |
@@ -55,7 +55,55 @@ function(object) |
55 | 55 |
setMethod("getOriginalParameters", signature(object="CogapsResult"), |
56 | 56 |
function(object) |
57 | 57 |
{ |
58 |
- object@metadata$diagnostics$params |
|
58 |
+ object@metadata$params |
|
59 |
+}) |
|
60 |
+ |
|
61 |
+#' @rdname getUnmatchedPatterns-methods |
|
62 |
+#' @aliases getUnmatchedPatterns |
|
63 |
+setMethod("getUnmatchedPatterns", signature(object="CogapsResult"), |
|
64 |
+function(object) |
|
65 |
+{ |
|
66 |
+ if (!is.null(object@metadata$unmatchedPatterns)) |
|
67 |
+ return(object@metadata$unmatchedPatterns) |
|
68 |
+ |
|
69 |
+ message("this result was not generated with a call to GWCoGAPS or scCoGAPS") |
|
70 |
+ return(NULL) |
|
71 |
+}) |
|
72 |
+ |
|
73 |
+#' @rdname getClusteredPatterns-methods |
|
74 |
+#' @aliases getClusteredPatterns |
|
75 |
+setMethod("getClusteredPatterns", signature(object="CogapsResult"), |
|
76 |
+function(object) |
|
77 |
+{ |
|
78 |
+ if (!is.null(object@metadata$clusteredPatterns)) |
|
79 |
+ return(object@metadata$clusteredPatterns) |
|
80 |
+ |
|
81 |
+ message("this result was not generated with a call to GWCoGAPS or scCoGAPS") |
|
82 |
+ return(NULL) |
|
83 |
+}) |
|
84 |
+ |
|
85 |
+#' @rdname getCorrelationToMeanPattern-methods |
|
86 |
+#' @aliases getCorrelationToMeanPattern |
|
87 |
+setMethod("getCorrelationToMeanPattern", signature(object="CogapsResult"), |
|
88 |
+function(object) |
|
89 |
+{ |
|
90 |
+ if (!is.null(object@metadata$CorrToMeanPattern)) |
|
91 |
+ return(object@metadata$CorrToMeanPattern) |
|
92 |
+ |
|
93 |
+ message("this result was not generated with a call to GWCoGAPS or scCoGAPS") |
|
94 |
+ return(NULL) |
|
95 |
+}) |
|
96 |
+ |
|
97 |
+#' @rdname getSubsets-methods |
|
98 |
+#' @aliases getSubsets |
|
99 |
+setMethod("getSubsets", signature(object="CogapsResult"), |
|
100 |
+function(object) |
|
101 |
+{ |
|
102 |
+ if (!is.null(object@metadata$subsets)) |
|
103 |
+ return(object@metadata$subsets) |
|
104 |
+ |
|
105 |
+ message("this result was not generated with a call to GWCoGAPS or scCoGAPS") |
|
106 |
+ return(NULL) |
|
59 | 107 |
}) |
60 | 108 |
|
61 | 109 |
#' @rdname calcZ-methods |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-# CoGAPS Version: 3.3.5 |
|
1 |
+# CoGAPS Version: 3.3.6 |
|
2 | 2 |
|
3 | 3 |
[](https://bioconductor.org/packages/CoGAPS) |
4 | 4 |
[](https://bioconductor.org/packages/CoGAPS) |
... | ... |
@@ -8,7 +8,8 @@ CoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 | 10 |
checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
11 |
- ...) |
|
11 |
+ geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL, |
|
12 |
+ outputToFile = NULL, ...) |
|
12 | 13 |
} |
13 | 14 |
\arguments{ |
14 | 15 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -39,6 +40,15 @@ genes x samples} |
39 | 40 |
|
40 | 41 |
\item{BPPARAM}{BiocParallel backend} |
41 | 42 |
|
43 |
+\item{geneNames}{vector of names of genes in data} |
|
44 |
+ |
|
45 |
+\item{sampleNames}{vector of names of samples in data} |
|
46 |
+ |
|
47 |
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS} |
|
48 |
+ |
|
49 |
+\item{outputToFile}{name of a file to save the output to, will create 4 files |
|
50 |
+of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"} |
|
51 |
+ |
|
42 | 52 |
\item{...}{allows for overwriting parameters in params} |
43 | 53 |
} |
44 | 54 |
\value{ |
... | ... |
@@ -39,5 +39,13 @@ a cluster must contain} |
39 | 39 |
|
40 | 40 |
\item{\code{maxNS}}{[distributed parameter] maximum of individual set contributions |
41 | 41 |
a cluster can contain} |
42 |
+ |
|
43 |
+\item{\code{explicitSets}}{[distributed parameter] specify subsets by index or name} |
|
44 |
+ |
|
45 |
+\item{\code{samplingAnnotation}}{[distributed parameter] specify categories along |
|
46 |
+the rows (cols) to use for weighted sampling} |
|
47 |
+ |
|
48 |
+\item{\code{samplingWeight}}{[distributed parameter] weights associated with |
|
49 |
+samplingAnnotation} |
|
42 | 50 |
}} |
43 | 51 |
|
... | ... |
@@ -8,7 +8,7 @@ GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 | 10 |
checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
11 |
- ...) |
|
11 |
+ geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL, ...) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -39,6 +39,12 @@ genes x samples} |
39 | 39 |
|
40 | 40 |
\item{BPPARAM}{BiocParallel backend} |
41 | 41 |
|
42 |
+\item{geneNames}{vector of names of genes in data} |
|
43 |
+ |
|
44 |
+\item{sampleNames}{vector of names of samples in data} |
|
45 |
+ |
|
46 |
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS} |
|
47 |
+ |
|
42 | 48 |
\item{...}{allows for overwriting parameters in params} |
43 | 49 |
} |
44 | 50 |
\value{ |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 | 3 |
\name{checkDataMatrix} |
4 | 4 |
\alias{checkDataMatrix} |
5 | 5 |
\title{check that provided data is valid} |
... | ... |
@@ -19,3 +19,4 @@ throws an error if data has problems |
19 | 19 |
\description{ |
20 | 20 |
check that provided data is valid |
21 | 21 |
} |
22 |
+\keyword{internal} |
... | ... |
@@ -4,12 +4,14 @@ |
4 | 4 |
\alias{corcut} |
5 | 5 |
\title{cluster patterns together} |
6 | 6 |
\usage{ |
7 |
-corcut(allPatterns, allParams) |
|
7 |
+corcut(allPatterns, cut, minNS) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{allPatterns}{matrix of all patterns across subsets} |
11 | 11 |
|
12 |
-\item{allParams}{list of all CoGAPS parameters} |
|
12 |
+\item{cut}{number of branches at which to cut dendrogram} |
|
13 |
+ |
|
14 |
+\item{minNS}{minimum of individual set contributions a cluster must contain} |
|
13 | 15 |
} |
14 | 16 |
\value{ |
15 | 17 |
patterns listed by which cluster they belong to |
... | ... |
@@ -17,3 +19,4 @@ patterns listed by which cluster they belong to |
17 | 19 |
\description{ |
18 | 20 |
cluster patterns together |
19 | 21 |
} |
22 |
+\keyword{internal} |
20 | 23 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,12 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/DistributedCogaps.R |
|
3 |
+\name{corrToMeanPattern} |
|
4 |
+\alias{corrToMeanPattern} |
|
5 |
+\title{calculate correlation of each pattern in a cluster to the cluster mean} |
|
6 |
+\usage{ |
|
7 |
+corrToMeanPattern(cluster) |
|
8 |
+} |
|
9 |
+\description{ |
|
10 |
+calculate correlation of each pattern in a cluster to the cluster mean |
|
11 |
+} |
|
12 |
+\keyword{internal} |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/DistributedCogaps.R |
|
2 |
+% Please edit documentation in R/SubsetData.R |
|
3 | 3 |
\name{createSets} |
4 | 4 |
\alias{createSets} |
5 | 5 |
\title{partition genes/samples into subsets} |
... | ... |
@@ -18,3 +18,4 @@ list of sorted subsets of either genes or samples |
18 | 18 |
either genes or samples or partitioned depending on the type |
19 | 19 |
of distributed CoGAPS (i.e. genome-wide or single-cell) |
20 | 20 |
} |
21 |
+\keyword{internal} |
21 | 22 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{gapsCat} |
|
4 |
+\alias{gapsCat} |
|
5 |
+\title{wrapper around cat} |
|
6 |
+\usage{ |
|
7 |
+gapsCat(allParams, ...) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{allParams}{all cogaps parameters} |
|
11 |
+ |
|
12 |
+\item{...}{arguments forwarded to cat} |
|
13 |
+} |
|
14 |
+\value{ |
|
15 |
+displays text |
|
16 |
+} |
|
17 |
+\description{ |
|
18 |
+cleans up message printing |
|
19 |
+} |
|
20 |
+\keyword{} |
0 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getClusteredPatterns} |
|
5 |
+\alias{getClusteredPatterns} |
|
6 |
+\alias{getClusteredPatterns,CogapsResult-method} |
|
7 |
+\title{return clustered patterns from set of all patterns across all subsets} |
|
8 |
+\usage{ |
|
9 |
+getClusteredPatterns(object) |
|
10 |
+ |
|
11 |
+\S4method{getClusteredPatterns}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+CogapsParams object |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return clustered patterns from set of all patterns across all subsets |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(SimpSim) |
|
24 |
+result <- CoGAPS(SimpSim.data) |
|
25 |
+getClusteredPatterns(result) |
|
26 |
+} |
0 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getCorrelationToMeanPattern} |
|
5 |
+\alias{getCorrelationToMeanPattern} |
|
6 |
+\alias{getCorrelationToMeanPattern,CogapsResult-method} |
|
7 |
+\title{return correlation between each pattern and the cluster mean} |
|
8 |
+\usage{ |
|
9 |
+getCorrelationToMeanPattern(object) |
|
10 |
+ |
|
11 |
+\S4method{getCorrelationToMeanPattern}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+CogapsParams object |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return correlation between each pattern and the cluster mean |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(SimpSim) |
|
24 |
+result <- CoGAPS(SimpSim.data) |
|
25 |
+getCorrelationToMeanPattern(result) |
|
26 |
+} |
0 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,12 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{getGeneNames} |
|
4 |
+\alias{getGeneNames} |
|
5 |
+\title{extract gene names from data} |
|
6 |
+\usage{ |
|
7 |
+getGeneNames(data, transpose) |
|
8 |
+} |
|
9 |
+\description{ |
|
10 |
+extract gene names from data |
|
11 |
+} |
|
12 |
+\keyword{internal} |
0 | 13 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,12 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{getSampleNames} |
|
4 |
+\alias{getSampleNames} |
|
5 |
+\title{extract sample names from data} |
|
6 |
+\usage{ |
|
7 |
+getSampleNames(data, transpose) |
|
8 |
+} |
|
9 |
+\description{ |
|
10 |
+extract sample names from data |
|
11 |
+} |
|
12 |
+\keyword{internal} |
0 | 13 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getSubsets} |
|
5 |
+\alias{getSubsets} |
|
6 |
+\alias{getSubsets,CogapsResult-method} |
|
7 |
+\title{return the names of the genes (samples) in each subset} |
|
8 |
+\usage{ |
|
9 |
+getSubsets(object) |
|
10 |
+ |
|
11 |
+\S4method{getSubsets}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+CogapsParams object |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return the names of the genes (samples) in each subset |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(SimpSim) |
|
24 |
+result <- CoGAPS(SimpSim.data) |
|
25 |
+getSubsets(result) |
|
26 |
+} |
0 | 27 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,26 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{getUnmatchedPatterns} |
|
5 |
+\alias{getUnmatchedPatterns} |
|
6 |
+\alias{getUnmatchedPatterns,CogapsResult-method} |
|
7 |
+\title{return unmatched patterns from each subset} |
|
8 |
+\usage{ |
|
9 |
+getUnmatchedPatterns(object) |
|
10 |
+ |
|
11 |
+\S4method{getUnmatchedPatterns}{CogapsResult}(object) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsResult} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+CogapsParams object |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+return unmatched patterns from each subset |
|
21 |
+} |
|
22 |
+\examples{ |
|
23 |
+data(SimpSim) |
|
24 |
+result <- CoGAPS(SimpSim.data) |
|
25 |
+getUnmatchedPatterns(result) |
|
26 |
+} |
... | ... |
@@ -5,8 +5,8 @@ |
5 | 5 |
\alias{initialize,CogapsResult-method} |
6 | 6 |
\title{Constructor for CogapsResult} |
7 | 7 |
\usage{ |
8 |
-\S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd, seed, |
|
9 |
- meanChiSq, diagnostics = NULL, ...) |
|
8 |
+\S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd, |
|
9 |
+ meanChiSq, geneNames, sampleNames, diagnostics = NULL, ...) |
|
10 | 10 |
} |
11 | 11 |
\arguments{ |
12 | 12 |
\item{.Object}{CogapsResult object} |
... | ... |
@@ -19,10 +19,12 @@ |
19 | 19 |
|
20 | 20 |
\item{Psd}{std dev of sampled P matrices} |
21 | 21 |
|
22 |
-\item{seed}{random seed used for the run} |
|
23 |
- |
|
24 | 22 |
\item{meanChiSq}{mean value of ChiSq statistic} |
25 | 23 |
|
24 |
+\item{geneNames}{names of genes in data} |
|
25 |
+ |
|
26 |
+\item{sampleNames}{names of samples in data} |
|
27 |
+ |
|
26 | 28 |
\item{diagnostics}{assorted diagnostic reports from the run} |
27 | 29 |
|
28 | 30 |
\item{...}{initial values for slots} |
29 | 31 |
similarity index 69% |
30 | 32 |
rename from man/ncol_helper.Rd |
31 | 33 |
rename to man/ncolHelper.Rd |
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
3 |
-\name{ncol_helper} |
|
4 |
-\alias{ncol_helper} |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{ncolHelper} |
|
4 |
+\alias{ncolHelper} |
|
5 | 5 |
\title{get number of columns from supported file name or matrix} |
6 | 6 |
\usage{ |
7 |
-ncol_helper(data) |
|
7 |
+ncolHelper(data) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{either a file name or a matrix} |
... | ... |
@@ -15,3 +15,4 @@ number of columns |
15 | 15 |
\description{ |
16 | 16 |
get number of columns from supported file name or matrix |
17 | 17 |
} |
18 |
+\keyword{internal} |
18 | 19 |
similarity index 68% |
19 | 20 |
rename from man/nrow_helper.Rd |
20 | 21 |
rename to man/nrowHelper.Rd |
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
3 |
-\name{nrow_helper} |
|
4 |
-\alias{nrow_helper} |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 |
+\name{nrowHelper} |
|
4 |
+\alias{nrowHelper} |
|
5 | 5 |
\title{get number of rows from supported file name or matrix} |
6 | 6 |
\usage{ |
7 |
-nrow_helper(data) |
|
7 |
+nrowHelper(data) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{either a file name or a matrix} |
... | ... |
@@ -15,3 +15,4 @@ number of rows |
15 | 15 |
\description{ |
16 | 16 |
get number of rows from supported file name or matrix |
17 | 17 |
} |
18 |
+\keyword{internal} |
... | ... |
@@ -1,5 +1,5 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 | 3 |
\name{parseExtraParams} |
4 | 4 |
\alias{parseExtraParams} |
5 | 5 |
\title{parse parameters passed through the ... variable} |
... | ... |
@@ -20,3 +20,4 @@ parse parameters passed through the ... variable |
20 | 20 |
\note{ |
21 | 21 |
will halt with an error if any parameters in extraParams are invalid |
22 | 22 |
} |
23 |
+\keyword{internal} |
20 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,22 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/SubsetData.R |
|
3 |
+\name{sampleUniformly} |
|
4 |
+\alias{sampleUniformly} |
|
5 |
+\title{subset data by uniformly partioning rows (cols)} |
|
6 |
+\usage{ |
|
7 |
+sampleUniformly(allParams, total, setSize) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{allParams}{list of all CoGAPS parameters} |
|
11 |
+ |
|
12 |
+\item{total}{total number of rows (cols) that are being paritioned} |
|
13 |
+ |
|
14 |
+\item{setSize}{the size of each subset of the total} |
|
15 |
+} |
|
16 |
+\value{ |
|
17 |
+list of subsets |
|
18 |
+} |
|
19 |
+\description{ |
|
20 |
+subset data by uniformly partioning rows (cols) |
|
21 |
+} |
|
22 |
+\keyword{internal} |
0 | 23 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/SubsetData.R |
|
3 |
+\name{sampleWithAnnotationWeights} |
|
4 |
+\alias{sampleWithAnnotationWeights} |
|
5 |
+\title{subset rows (cols) proportional to the user provided weights} |
|
6 |
+\usage{ |
|
7 |
+sampleWithAnnotationWeights(allParams, setSize) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{allParams}{list of all CoGAPS parameters} |
|
11 |
+ |
|
12 |
+\item{setSize}{the size of each subset of the total} |
|
13 |
+} |
|
14 |
+\value{ |
|
15 |
+list of subsets |
|
16 |
+} |
|
17 |
+\description{ |
|
18 |
+subset rows (cols) proportional to the user provided weights |
|
19 |
+} |
|
20 |
+\keyword{internal} |
0 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/SubsetData.R |
|
3 |
+\name{sampleWithExplictSets} |
|
4 |
+\alias{sampleWithExplictSets} |
|
5 |
+\title{use user provided subsets} |
|
6 |
+\usage{ |
|
7 |
+sampleWithExplictSets(allParams, total) |
|
8 |
+} |
|
9 |
+\arguments{ |
|
10 |
+\item{allParams}{list of all CoGAPS parameters} |
|
11 |
+ |
|
12 |
+\item{total}{total number of rows (cols) that are being paritioned} |
|
13 |
+} |
|
14 |
+\value{ |
|
15 |
+list of subsets |
|
16 |
+} |
|
17 |
+\description{ |
|
18 |
+use user provided subsets |
|
19 |
+} |
|
20 |
+\keyword{internal} |
... | ... |
@@ -8,7 +8,7 @@ scCoGAPS(data, params = new("CogapsParams"), nThreads = 1, |
8 | 8 |
messages = TRUE, outputFrequency = 500, uncertainty = NULL, |
9 | 9 |
checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000, |
10 | 10 |
checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL, |
11 |
- ...) |
|
11 |
+ geneNames = NULL, sampleNames = NULL, matchedPatterns = NULL, ...) |
|
12 | 12 |
} |
13 | 13 |
\arguments{ |
14 | 14 |
\item{data}{File name or R object (see details for supported types)} |
... | ... |
@@ -39,6 +39,12 @@ genes x samples} |
39 | 39 |
|
40 | 40 |
\item{BPPARAM}{BiocParallel backend} |
41 | 41 |
|
42 |
+\item{geneNames}{vector of names of genes in data} |
|
43 |
+ |
|
44 |
+\item{sampleNames}{vector of names of samples in data} |
|
45 |
+ |
|
46 |
+\item{matchedPatterns}{manually matched patterns for distributed CoGAPS} |
|
47 |
+ |
|
42 | 48 |
\item{...}{allows for overwriting parameters in params} |
43 | 49 |
} |
44 | 50 |
\value{ |
45 | 51 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,29 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R |
|
3 |
+\docType{methods} |
|
4 |
+\name{setAnnotationWeights} |
|
5 |
+\alias{setAnnotationWeights} |
|
6 |
+\alias{setAnnotationWeights,CogapsParams-method} |
|
7 |
+\title{set the annotation labels and weights for subsetting the data} |
|
8 |
+\usage{ |
|
9 |
+setAnnotationWeights(object, annotation, weights) |
|
10 |
+ |
|
11 |
+\S4method{setAnnotationWeights}{CogapsParams}(object, annotation, weights) |
|
12 |
+} |
|
13 |
+\arguments{ |
|
14 |
+\item{object}{an object of type CogapsParams} |
|
15 |
+ |
|
16 |
+\item{annotation}{vector of labels} |
|
17 |
+ |
|
18 |
+\item{weights}{vector of weights} |
|
19 |
+} |
|
20 |
+\value{ |
|
21 |
+the modified params object |
|
22 |
+} |
|
23 |
+\description{ |
|
24 |
+these parameters are interrelated so they must be set together |
|
25 |
+} |
|
26 |
+\examples{ |
|
27 |
+ params <- new("CogapsParams") |
|
28 |
+ params <- setAnnotationWeights(params, c('a', 'b', 'c'), c(1,2,1)) |
|
29 |
+} |
... | ... |
@@ -1,17 +1,15 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 | 3 |
\name{startupMessage} |
4 | 4 |
\alias{startupMessage} |
5 | 5 |
\title{write start up message} |
6 | 6 |
\usage{ |
7 |
-startupMessage(data, transpose, distributed) |
|
7 |
+startupMessage(data, allParams) |
|
8 | 8 |
} |
9 | 9 |
\arguments{ |
10 | 10 |
\item{data}{data set} |
11 | 11 |
|
12 |
-\item{transpose}{if we are transposing the data set} |
|
13 |
- |
|
14 |
-\item{distributed}{if we are running distributed CoGAPS} |
|
12 |
+\item{allParams}{list of all parameters} |
|
15 | 13 |
} |
16 | 14 |
\value{ |
17 | 15 |
message displayed to screen |
... | ... |
@@ -19,3 +17,4 @@ message displayed to screen |
19 | 17 |
\description{ |
20 | 18 |
write start up message |
21 | 19 |
} |
20 |
+\keyword{internal} |
... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
% Generated by roxygen2: do not edit by hand |
2 |
-% Please edit documentation in R/CoGAPS.R |
|
2 |
+% Please edit documentation in R/HelperFunctions.R |
|
3 | 3 |
\name{supported} |
4 | 4 |
\alias{supported} |
5 |
-\title{Checks if file is supported} |
|
5 |
+\title{checks if file is supported} |
|
6 | 6 |
\usage{ |
7 | 7 |
supported(file) |
8 | 8 |
} |
... | ... |
@@ -13,5 +13,6 @@ supported(file) |
13 | 13 |
TRUE if file is supported, FALSE if not |
14 | 14 |
} |
15 | 15 |
\description{ |
16 |
-Checks if file is supported |
|
16 |
+checks if file is supported |
|
17 | 17 |
} |
18 |
+\keyword{internal} |
... | ... |
@@ -3,6 +3,7 @@ |
3 | 3 |
|
4 | 4 |
#include <Rcpp.h> |
5 | 5 |
#include <string> |
6 |
+#include <sstream> |
|
6 | 7 |
|
7 | 8 |
// these are helper functions for converting matrix/vector types |
8 | 9 |
// to and from R objects |
... | ... |
@@ -150,8 +151,16 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster) |
150 | 151 |
runner.setCheckpointOutFile(allParams["checkpointOutFile"]); |
151 | 152 |
runner.setCheckpointInterval(allParams["checkpointInterval"]); |
152 | 153 |
|
153 |
- // run cogaps and return the GapsResult in an R list |
|
154 |
+ // run cogaps |
|
154 | 155 |
GapsResult result(runner.run(printThreads)); |
156 |
+ |
|
157 |
+ // write result to file if requested |
|
158 |
+ if (allParams["outputToFile"] != R_NilValue) |
|
159 |
+ { |
|
160 |
+ result.writeToFile(Rcpp::as<std::string>(allParams["outputToFile"])); |
|
161 |
+ } |
|
162 |
+ |
|
163 |
+ // return R list |
|
155 | 164 |
return Rcpp::List::create( |
156 | 165 |
Rcpp::Named("Amean") = createRMatrix(result.Amean), |
157 | 166 |
Rcpp::Named("Pmean") = createRMatrix(result.Pmean, true), |
... | ... |
@@ -159,6 +168,8 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix, bool isMaster) |
159 | 168 |
Rcpp::Named("Psd") = createRMatrix(result.Psd, true), |
160 | 169 |
Rcpp::Named("seed") = runner.getSeed(), |
161 | 170 |
Rcpp::Named("meanChiSq") = result.meanChiSq, |
171 |
+ Rcpp::Named("geneNames") = allParams["geneNames"], |
|
172 |
+ Rcpp::Named("sampleNames") = allParams["sampleNames"], |
|
162 | 173 |
Rcpp::Named("diagnostics") = Rcpp::List::create() |
163 | 174 |
); |
164 | 175 |
} |
... | ... |
@@ -26,9 +26,48 @@ struct GapsResult |
26 | 26 |
Psd(stat.Psd()), meanChiSq(0.f), seed(0) |
27 | 27 |
{} |
28 | 28 |
|
29 |
- void writeCsv(const std::string &path); |
|
30 |
- void writeTsv(const std::string &path); |
|
31 |
- void writeGct(const std::string &path); |
|
29 |
+ void writeToFile(const std::string &fullPath) |
|
30 |
+ { |
|
31 |
+ std::size_t pos = fullPath.find_last_of('.'); |
|
32 |
+ std::string base = fullPath.substr(0, pos); |
|
33 |
+ |
|
34 |
+ switch (FileParser::fileType(fullPath)) |
|
35 |
+ { |
|
36 |
+ case GAPS_CSV: return writeCsv(base); |
|
37 |
+ case GAPS_TSV: return writeTsv(base); |
|
38 |
+ case GAPS_GCT: return writeGct(base); |
|
39 |
+ } |
|
40 |
+ } |
|
41 |
+ |
|
42 |
+ void writeCsv(const std::string &path) |
|
43 |
+ { |
|
44 |
+ unsigned nPatterns = Amean.nCol(); |
|
45 |
+ std::string label("_" + gaps::to_string(nPatterns) + "_"); |
|
46 |
+ FileParser::writeToCsv(path + label + "Amean.csv", Amean); |
|
47 |
+ FileParser::writeToCsv(path + label + "Pmean.csv", Pmean); |
|
48 |
+ FileParser::writeToCsv(path + label + "Asd.csv", Asd); |
|
49 |
+ FileParser::writeToCsv(path + label + "Psd.csv", Psd); |
|
50 |
+ } |
|
51 |
+ |
|
52 |
+ void writeTsv(const std::string &path) |
|
53 |
+ { |
|
54 |
+ unsigned nPatterns = Amean.nCol(); |
|
55 |
+ std::string label("_" + gaps::to_string(nPatterns) + "_"); |
|
56 |
+ FileParser::writeToCsv(path + label + "Amean.tsv", Amean); |
|
57 |
+ FileParser::writeToCsv(path + label + "Pmean.tsv", Pmean); |
|
58 |
+ FileParser::writeToCsv(path + label + "Asd.tsv", Asd); |
|
59 |
+ FileParser::writeToCsv(path + label + "Psd.tsv", Psd); |
|
60 |
+ } |
|
61 |
+ |
|
62 |
+ void writeGct(const std::string &path) |
|
63 |
+ { |
|
64 |
+ unsigned nPatterns = Amean.nCol(); |
|
65 |
+ std::string label("_" + gaps::to_string(nPatterns) + "_"); |
|
66 |
+ FileParser::writeToCsv(path + label + "Amean.gct", Amean); |
|
67 |
+ FileParser::writeToCsv(path + label + "Pmean.gct", Pmean); |
|
68 |
+ FileParser::writeToCsv(path + label + "Asd.gct", Asd); |
|
69 |
+ FileParser::writeToCsv(path + label + "Psd.gct", Psd); |
|
70 |
+ } |
|
32 | 71 |
}; |
33 | 72 |
|
34 | 73 |
class GapsRunner |
... | ... |
@@ -2,6 +2,7 @@ |
2 | 2 |
#include "../file_parser/CsvParser.h" |
3 | 3 |
#include "../file_parser/TsvParser.h" |
4 | 4 |
#include "../file_parser/MtxParser.h" |
5 |
+#include "../file_parser/GctParser.h" |
|
5 | 6 |
|
6 | 7 |
#include "../data_structures/Matrix.h" |
7 | 8 |
|
... | ... |
@@ -13,6 +14,7 @@ TEST_CASE("Test Parsers") |
13 | 14 |
std::string csvPath = Rcpp::as<std::string>(env["gistCsvPath"]); |
14 | 15 |
std::string tsvPath = Rcpp::as<std::string>(env["gistTsvPath"]); |
15 | 16 |
std::string mtxPath = Rcpp::as<std::string>(env["gistMtxPath"]); |
17 |
+ std::string gctPath = Rcpp::as<std::string>(env["gistGctPath"]); |
|
16 | 18 |
|
17 | 19 |
SECTION("Test CsvParser") |
18 | 20 |
{ |
... | ... |
@@ -83,4 +85,30 @@ TEST_CASE("Test Parsers") |
83 | 85 |
} |
84 | 86 |
REQUIRE(count == 12267); |
85 | 87 |
} |
88 |
+ |
|
89 |
+ SECTION("Test GctParser") |
|
90 |
+ { |
|
91 |
+ GctParser p(gctPath); |
|
92 |
+ REQUIRE(p.nRow() == 1363); |
|
93 |
+ REQUIRE(p.nCol() == 9); |
|
94 |
+ |
|
95 |
+ unsigned row = 0; |
|
96 |
+ unsigned col = 0; |
|
97 |
+ unsigned count = 0; |
|
98 |
+ while (p.hasNext()) |
|
99 |
+ { |
|
100 |
+ MatrixElement e(p.getNext()); |
|
101 |
+ REQUIRE(e.row == row); |
|
102 |
+ REQUIRE(e.col == col); |
|
103 |
+ |
|
104 |
+ ++count; |
|
105 |
+ ++col; |
|
106 |
+ if (col == 9) |
|
107 |
+ { |
|
108 |
+ ++row; |
|
109 |
+ col = 0; |
|
110 |
+ } |
|
111 |
+ } |
|
112 |
+ REQUIRE(count == 12267); |
|
113 |
+ } |
|
86 | 114 |
} |
87 | 115 |
\ No newline at end of file |
... | ... |
@@ -3,6 +3,7 @@ |
3 | 3 |
#include "FileParser.h" |
4 | 4 |
#include "MtxParser.h" |
5 | 5 |
#include "TsvParser.h" |
6 |
+#include "GctParser.h" |
|
6 | 7 |
|
7 | 8 |
#include <string> |
8 | 9 |
|
... | ... |
@@ -13,6 +14,7 @@ AbstractFileParser* AbstractFileParser::create(const std::string &path) |
13 | 14 |
case GAPS_MTX: return new MtxParser(path); |
14 | 15 |
case GAPS_CSV: return new CsvParser(path); |
15 | 16 |
case GAPS_TSV: return new TsvParser(path); |
17 |
+ case GAPS_GCT: return new GctParser(path); |
|
16 | 18 |
default: GAPS_ERROR("Invalid file type\n"); |
17 | 19 |
} |
18 | 20 |
} |
... | ... |
@@ -58,6 +60,7 @@ GapsFileType FileParser::fileType(const std::string &path) |
58 | 60 |
if (ext == ".mtx") { return GAPS_MTX; } |
59 | 61 |
if (ext == ".csv") { return GAPS_CSV; } |
60 | 62 |
if (ext == ".tsv") { return GAPS_TSV; } |
63 |
+ if (ext == ".gct") { return GAPS_GCT; } |
|
61 | 64 |
|
62 | 65 |
return GAPS_INVALID_FILE_TYPE; |
63 | 66 |
} |
... | ... |
@@ -11,6 +11,7 @@ enum GapsFileType |
11 | 11 |
GAPS_MTX, |
12 | 12 |
GAPS_CSV, |
13 | 13 |
GAPS_TSV, |
14 |
+ GAPS_GCT, |
|
14 | 15 |
GAPS_INVALID_FILE_TYPE |
15 | 16 |
}; |
16 | 17 |
|
... | ... |
@@ -68,6 +69,9 @@ public: |
68 | 69 |
|
69 | 70 |
template <class MatrixType> |
70 | 71 |
static void writeToMtx(const std::string &path, const MatrixType &mat); |
72 |
+ |
|
73 |
+ template <class MatrixType> |
|
74 |
+ static void writeToGct(const std::string &path, const MatrixType &mat); |
|
71 | 75 |
}; |
72 | 76 |
|
73 | 77 |
// temporary solution - should be moved into specific file parsers, ok for now |
... | ... |
@@ -158,4 +162,38 @@ void FileParser::writeToMtx(const std::string &path, const MatrixType &mat) |
158 | 162 |
outputFile.close(); |
159 | 163 |
} |
160 | 164 |
|
165 |
+template <class MatrixType> |
|
166 |
+void FileParser::writeToGct(const std::string &path, const MatrixType &mat) |
|
167 |
+{ |
|
168 |
+ GAPS_ASSERT(FileParser::fileType(path) == GAPS_GCT); |
|
169 |
+ |
|
170 |
+ std::ofstream outputFile; |
|
171 |
+ outputFile.open(path.c_str()); |
|
172 |
+ outputFile << "#1.2\n"; |
|
173 |
+ outputFile << mat.nRow() << "\t" << mat.nCol() << "\n"; |
|
174 |
+ |
|
175 |
+ outputFile << "\"NAME\"\t\"Description\""; |
|
176 |
+ |
|
177 |
+ // write column names |
|
178 |
+ for (unsigned i = 0; i < mat.nCol(); ++i) |
|
179 |
+ { |
|
180 |
+ outputFile << "\t\"Col" << i << "\""; |
|
181 |
+ } |
|
182 |
+ outputFile << "\n"; |
|
183 |
+ |
|
184 |
+ for (unsigned i = 0; i < mat.nRow(); ++i) |
|
185 |
+ { |
|
186 |
+ // write row names |
|
187 |
+ outputFile << "\"Row" << i << "\"\t\"BLANK\"\t"; |
|
188 |
+ |
|
189 |
+ // write data |
|
190 |
+ for (unsigned j = 0; j < mat.nCol(); ++j) |
|
191 |
+ { |
|
192 |
+ outputFile << "\t" << mat(i,j); |
|
193 |
+ } |
|
194 |
+ outputFile << "\n"; |
|
195 |
+ } |
|
196 |
+ outputFile.close(); |
|
197 |
+} |
|
198 |
+ |
|
161 | 199 |
#endif |
162 | 200 |
\ No newline at end of file |