... | ... |
@@ -165,7 +165,8 @@ startupMessage <- function(data, allParams) |
165 | 165 |
parseExtraParams <- function(allParams, extraParams) |
166 | 166 |
{ |
167 | 167 |
# parse direct params |
168 |
- for (s in slotNames(allParams$gaps)) |
|
168 |
+ deprecatedSlots <- c("singleCell") |
|
169 |
+ for (s in c(slotNames(allParams$gaps), deprecatedSlots)) |
|
169 | 170 |
{ |
170 | 171 |
if (!is.null(extraParams[[s]])) |
171 | 172 |
{ |
... | ... |
@@ -231,8 +231,6 @@ checkInputs <- function(data, uncertainty, allParams) |
231 | 231 |
|
232 | 232 |
if (!is.null(allParams$gaps@distributed)) |
233 | 233 |
{ |
234 |
- if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
|
235 |
- warning("running single-cell CoGAPS with singleCell=FALSE") |
|
236 | 234 |
if (allParams$nThreads > 1) |
237 | 235 |
warning("can't run multi-threaded and distributed CoGAPS at the same time, ignoring nThreads") |
238 | 236 |
if (!is.null(allParams$checkpointInFile)) |
... | ... |
@@ -223,6 +223,8 @@ checkInputs <- function(data, uncertainty, allParams) |
223 | 223 |
stop("must use default uncertainty when enabling sparseOptimization") |
224 | 224 |
if (!is.null(allParams$checkpointInFile) & !CoGAPS::checkpointsEnabled()) |
225 | 225 |
stop("CoGAPS was built with checkpoints disabled") |
226 |
+ if (!(allParams$snapshotPhase %in% c('equilibration', 'sampling', 'all'))) |
|
227 |
+ stop("snapshotPhase must be either equilibration, sampling, or all") |
|
226 | 228 |
if (allParams$nSnapshots > 0) |
227 | 229 |
warning("Snapshots slow down computation and should only be used for testing") |
228 | 230 |
|
The argument 'nSnapshots' specifies how many samples of the A and P matrix should be saved. The snapshots are equally spaced out during the sampling phase. This is useful for various post-run analysis techniques but should primarily be used to test ideas, not as part of an official analysis.
... | ... |
@@ -223,6 +223,9 @@ checkInputs <- function(data, uncertainty, allParams) |
223 | 223 |
stop("must use default uncertainty when enabling sparseOptimization") |
224 | 224 |
if (!is.null(allParams$checkpointInFile) & !CoGAPS::checkpointsEnabled()) |
225 | 225 |
stop("CoGAPS was built with checkpoints disabled") |
226 |
+ if (allParams$nSnapshots > 0) |
|
227 |
+ warning("Snapshots slow down computation and should only be used for testing") |
|
228 |
+ |
|
226 | 229 |
|
227 | 230 |
if (!is.null(allParams$gaps@distributed)) |
228 | 231 |
{ |
... | ... |
@@ -101,7 +101,6 @@ getValueOrRds <- function(input) |
101 | 101 |
#' |
102 | 102 |
#' @param data either a file name or a matrix |
103 | 103 |
#' @return number of rows |
104 |
-#' @importFrom data.table fread |
|
105 | 104 |
#' @importFrom tools file_ext |
106 | 105 |
nrowHelper <- function(data) |
107 | 106 |
{ |
... | ... |
@@ -117,7 +116,6 @@ nrowHelper <- function(data) |
117 | 116 |
#' |
118 | 117 |
#' @param data either a file name or a matrix |
119 | 118 |
#' @return number of columns |
120 |
-#' @importFrom data.table fread |
|
121 | 119 |
#' @importFrom tools file_ext |
122 | 120 |
ncolHelper <- function(data) |
123 | 121 |
{ |
... | ... |
@@ -231,7 +231,7 @@ checkInputs <- function(data, uncertainty, allParams) |
231 | 231 |
if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
232 | 232 |
warning("running single-cell CoGAPS with singleCell=FALSE") |
233 | 233 |
if (allParams$nThreads > 1) |
234 |
- stop("can't run multi-threaded and distributed CoGAPS at the same time") |
|
234 |
+ warning("can't run multi-threaded and distributed CoGAPS at the same time, ignoring nThreads") |
|
235 | 235 |
if (!is.null(allParams$checkpointInFile)) |
236 | 236 |
stop("checkpoints not supported for distributed cogaps") |
237 | 237 |
if (!is(data, "character")) |
... | ... |
@@ -128,38 +128,6 @@ ncolHelper <- function(data) |
128 | 128 |
return(ncol(data)) |
129 | 129 |
} |
130 | 130 |
|
131 |
-#' extract gene names from data |
|
132 |
-#' @keywords internal |
|
133 |
-#' @return vector of gene names |
|
134 |
-getGeneNames <- function(data, transpose) |
|
135 |
-{ |
|
136 |
- if (transpose) |
|
137 |
- return(getSampleNames(data, FALSE)) |
|
138 |
- if (is(data, "character")) |
|
139 |
- names <- getFileInfo_cpp(data)[["rowNames"]] |
|
140 |
- else |
|
141 |
- names <- rownames(data) |
|
142 |
- if (is.null(names) | length(names) == 0) |
|
143 |
- return(paste("Gene", 1:nrowHelper(data), sep="_")) |
|
144 |
- return(names) |
|
145 |
-} |
|
146 |
- |
|
147 |
-#' extract sample names from data |
|
148 |
-#' @keywords internal |
|
149 |
-#' @return vector of sample names |
|
150 |
-getSampleNames <- function(data, transpose) |
|
151 |
-{ |
|
152 |
- if (transpose) |
|
153 |
- return(getGeneNames(data, FALSE)) |
|
154 |
- if (is(data, "character")) |
|
155 |
- names <- getFileInfo_cpp(data)[["colNames"]] |
|
156 |
- else |
|
157 |
- names <- colnames(data) |
|
158 |
- if (is.null(names) | length(names) == 0) |
|
159 |
- return(paste("Sample", 1:ncolHelper(data), sep="_")) |
|
160 |
- return(names) |
|
161 |
-} |
|
162 |
- |
|
163 | 131 |
#' write start up message |
164 | 132 |
#' @keywords internal |
165 | 133 |
#' |
... | ... |
@@ -274,10 +242,42 @@ checkInputs <- function(data, uncertainty, allParams) |
274 | 242 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
275 | 243 |
if (is.null(allParams$geneNames)) |
276 | 244 |
stop("no gene names in parameters") |
277 |
- if (is.null(allParams$samplenames)) |
|
245 |
+ if (is.null(allParams$sampleNames)) |
|
278 | 246 |
stop("no sample names in parameters") |
279 | 247 |
} |
280 | 248 |
|
249 |
+#' extract gene names from data |
|
250 |
+#' @keywords internal |
|
251 |
+#' @return vector of gene names |
|
252 |
+getGeneNames <- function(data, transpose) |
|
253 |
+{ |
|
254 |
+ if (transpose) |
|
255 |
+ return(getSampleNames(data, FALSE)) |
|
256 |
+ if (is(data, "character")) |
|
257 |
+ names <- getFileInfo_cpp(data)[["rowNames"]] |
|
258 |
+ else |
|
259 |
+ names <- rownames(data) |
|
260 |
+ if (is.null(names) | length(names) == 0) |
|
261 |
+ return(paste("Gene", 1:nrowHelper(data), sep="_")) |
|
262 |
+ return(names) |
|
263 |
+} |
|
264 |
+ |
|
265 |
+#' extract sample names from data |
|
266 |
+#' @keywords internal |
|
267 |
+#' @return vector of sample names |
|
268 |
+getSampleNames <- function(data, transpose) |
|
269 |
+{ |
|
270 |
+ if (transpose) |
|
271 |
+ return(getGeneNames(data, FALSE)) |
|
272 |
+ if (is(data, "character")) |
|
273 |
+ names <- getFileInfo_cpp(data)[["colNames"]] |
|
274 |
+ else |
|
275 |
+ names <- colnames(data) |
|
276 |
+ if (is.null(names) | length(names) == 0) |
|
277 |
+ return(paste("Sample", 1:ncolHelper(data), sep="_")) |
|
278 |
+ return(names) |
|
279 |
+} |
|
280 |
+ |
|
281 | 281 |
#' extracts gene/sample names from the data |
282 | 282 |
#' @keywords internal |
283 | 283 |
#' |
... | ... |
@@ -272,6 +272,10 @@ checkInputs <- function(data, uncertainty, allParams) |
272 | 272 |
|
273 | 273 |
if (!is(data, "character")) |
274 | 274 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
275 |
+ if (is.null(allParams$geneNames)) |
|
276 |
+ stop("no gene names in parameters") |
|
277 |
+ if (is.null(allParams$samplenames)) |
|
278 |
+ stop("no sample names in parameters") |
|
275 | 279 |
} |
276 | 280 |
|
277 | 281 |
#' extracts gene/sample names from the data |
... | ... |
@@ -155,7 +155,7 @@ getSampleNames <- function(data, transpose) |
155 | 155 |
names <- getFileInfo_cpp(data)[["colNames"]] |
156 | 156 |
else |
157 | 157 |
names <- colnames(data) |
158 |
- if (is.null(names)) |
|
158 |
+ if (is.null(names) | length(names) == 0) |
|
159 | 159 |
return(paste("Sample", 1:ncolHelper(data), sep="_")) |
160 | 160 |
return(names) |
161 | 161 |
} |
... | ... |
@@ -177,8 +177,8 @@ startupMessage <- function(data, allParams) |
177 | 177 |
dist_message <- allParams$gaps@distributed |
178 | 178 |
|
179 | 179 |
cat("\nThis is CoGAPS version", as.character(packageVersion("CoGAPS")), "\n") |
180 |
- cat("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
|
181 |
- nSamples, "samples") |
|
180 |
+ cat("Running", dist_message, "CoGAPS on", allParams$dataName, |
|
181 |
+ paste("(", nGenes, " genes and ", nSamples, " samples)", sep="")) |
|
182 | 182 |
|
183 | 183 |
if (allParams$messages) |
184 | 184 |
{ |
... | ... |
@@ -310,9 +310,9 @@ getDimNames <- function(data, allParams) |
310 | 310 |
|
311 | 311 |
# check that names align with expected number of genes/samples |
312 | 312 |
if (length(geneNames) != nGenes) |
313 |
- stop("incorrect number of gene names given") |
|
313 |
+ stop(length(geneNames), " != ", nGenes, " incorrect number of gene names given") |
|
314 | 314 |
if (length(sampleNames) != nSamples) |
315 |
- stop("incorrect number of sample names given") |
|
315 |
+ stop(length(sampleNames), " != ", nSamples, " incorrect number of sample names given") |
|
316 | 316 |
|
317 | 317 |
# store processed gene/sample names directly in allParams list |
318 | 318 |
# this is an important distinction - allParams@gaps contains the |
... | ... |
@@ -105,24 +105,9 @@ getValueOrRds <- function(input) |
105 | 105 |
#' @importFrom tools file_ext |
106 | 106 |
nrowHelper <- function(data) |
107 | 107 |
{ |
108 |
- nrowMtx <- function(file) |
|
109 |
- { |
|
110 |
- i <- 1 |
|
111 |
- while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%")) |
|
112 |
- { |
|
113 |
- i <- i + 1 |
|
114 |
- } |
|
115 |
- return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,1]))) |
|
116 |
- } |
|
117 |
- |
|
118 | 108 |
if (is(data, "character")) |
119 | 109 |
{ |
120 |
- return(switch(tools::file_ext(data), |
|
121 |
- "csv" = nrow(data.table::fread(data, select=1)), |
|
122 |
- "tsv" = nrow(data.table::fread(data, select=1)), |
|
123 |
- "mtx" = nrowMtx(data), |
|
124 |
- "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][1]) |
|
125 |
- )) |
|
110 |
+ return(getFileInfo_cpp(data)[["dimensions"]][1]) |
|
126 | 111 |
} |
127 | 112 |
return(nrow(data)) |
128 | 113 |
} |
... | ... |
@@ -136,24 +121,9 @@ nrowHelper <- function(data) |
136 | 121 |
#' @importFrom tools file_ext |
137 | 122 |
ncolHelper <- function(data) |
138 | 123 |
{ |
139 |
- ncolMtx <- function(file) |
|
140 |
- { |
|
141 |
- i <- 1 |
|
142 |
- while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%")) |
|
143 |
- { |
|
144 |
- i <- i + 1 |
|
145 |
- } |
|
146 |
- return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,2]))) |
|
147 |
- } |
|
148 |
- |
|
149 | 124 |
if (is(data, "character")) |
150 | 125 |
{ |
151 |
- return(switch(tools::file_ext(data), |
|
152 |
- "csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
153 |
- "tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
154 |
- "mtx" = ncolMtx(data), |
|
155 |
- "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][2]) |
|
156 |
- )) |
|
126 |
+ return(getFileInfo_cpp(data)[["dimensions"]][2]) |
|
157 | 127 |
} |
158 | 128 |
return(ncol(data)) |
159 | 129 |
} |
... | ... |
@@ -165,22 +135,11 @@ getGeneNames <- function(data, transpose) |
165 | 135 |
{ |
166 | 136 |
if (transpose) |
167 | 137 |
return(getSampleNames(data, FALSE)) |
168 |
- |
|
169 |
- names <- NULL |
|
170 | 138 |
if (is(data, "character")) |
171 |
- { |
|
172 |
- names <- switch(tools::file_ext(data), |
|
173 |
- "csv" = as.matrix(data.table::fread(data, select=1))[,1], |
|
174 |
- "tsv" = as.matrix(data.table::fread(data, select=1))[,1], |
|
175 |
- "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1)))) |
|
176 |
- ) |
|
177 |
- } |
|
139 |
+ names <- getFileInfo_cpp(data)[["rowNames"]] |
|
178 | 140 |
else |
179 |
- { |
|
180 | 141 |
names <- rownames(data) |
181 |
- } |
|
182 |
- |
|
183 |
- if (is.null(names)) |
|
142 |
+ if (is.null(names) | length(names) == 0) |
|
184 | 143 |
return(paste("Gene", 1:nrowHelper(data), sep="_")) |
185 | 144 |
return(names) |
186 | 145 |
} |
... | ... |
@@ -192,21 +151,10 @@ getSampleNames <- function(data, transpose) |
192 | 151 |
{ |
193 | 152 |
if (transpose) |
194 | 153 |
return(getGeneNames(data, FALSE)) |
195 |
- |
|
196 |
- names <- NULL |
|
197 | 154 |
if (is(data, "character")) |
198 |
- { |
|
199 |
- names <- switch(tools::file_ext(data), |
|
200 |
- "csv" = colnames(data.table::fread(data, nrows=1))[-1], |
|
201 |
- "tsv" = colnames(data.table::fread(data, nrows=1))[-1], |
|
202 |
- "gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2]) |
|
203 |
- ) |
|
204 |
- } |
|
155 |
+ names <- getFileInfo_cpp(data)[["colNames"]] |
|
205 | 156 |
else |
206 |
- { |
|
207 | 157 |
names <- colnames(data) |
208 |
- } |
|
209 |
- |
|
210 | 158 |
if (is.null(names)) |
211 | 159 |
return(paste("Sample", 1:ncolHelper(data), sep="_")) |
212 | 160 |
return(names) |
... | ... |
@@ -307,6 +307,8 @@ checkInputs <- function(data, uncertainty, allParams) |
307 | 307 |
stop("uncertainty must be a matrix unless data is a file path") |
308 | 308 |
if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
309 | 309 |
stop("must use default uncertainty when enabling sparseOptimization") |
310 |
+ if (!is.null(allParams$checkpointInFile) & !CoGAPS::checkpointsEnabled()) |
|
311 |
+ stop("CoGAPS was built with checkpoints disabled") |
|
310 | 312 |
|
311 | 313 |
if (!is.null(allParams$gaps@distributed)) |
312 | 314 |
{ |
... | ... |
@@ -316,6 +316,8 @@ checkInputs <- function(data, uncertainty, allParams) |
316 | 316 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
317 | 317 |
if (!is.null(allParams$checkpointInFile)) |
318 | 318 |
stop("checkpoints not supported for distributed cogaps") |
319 |
+ if (!is(data, "character")) |
|
320 |
+ warning("running distributed cogaps without mtx/tsv/csv/gct data") |
|
319 | 321 |
} |
320 | 322 |
|
321 | 323 |
if (!is(data, "character")) |
... | ... |
@@ -105,12 +105,22 @@ getValueOrRds <- function(input) |
105 | 105 |
#' @importFrom tools file_ext |
106 | 106 |
nrowHelper <- function(data) |
107 | 107 |
{ |
108 |
+ nrowMtx <- function(file) |
|
109 |
+ { |
|
110 |
+ i <- 1 |
|
111 |
+ while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%")) |
|
112 |
+ { |
|
113 |
+ i <- i + 1 |
|
114 |
+ } |
|
115 |
+ return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,1]))) |
|
116 |
+ } |
|
117 |
+ |
|
108 | 118 |
if (is(data, "character")) |
109 | 119 |
{ |
110 | 120 |
return(switch(tools::file_ext(data), |
111 | 121 |
"csv" = nrow(data.table::fread(data, select=1)), |
112 | 122 |
"tsv" = nrow(data.table::fread(data, select=1)), |
113 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]), |
|
123 |
+ "mtx" = nrowMtx(data), |
|
114 | 124 |
"gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][1]) |
115 | 125 |
)) |
116 | 126 |
} |
... | ... |
@@ -126,12 +136,22 @@ nrowHelper <- function(data) |
126 | 136 |
#' @importFrom tools file_ext |
127 | 137 |
ncolHelper <- function(data) |
128 | 138 |
{ |
139 |
+ ncolMtx <- function(file) |
|
140 |
+ { |
|
141 |
+ i <- 1 |
|
142 |
+ while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%")) |
|
143 |
+ { |
|
144 |
+ i <- i + 1 |
|
145 |
+ } |
|
146 |
+ return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,2]))) |
|
147 |
+ } |
|
148 |
+ |
|
129 | 149 |
if (is(data, "character")) |
130 | 150 |
{ |
131 | 151 |
return(switch(tools::file_ext(data), |
132 | 152 |
"csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
133 | 153 |
"tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
134 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]), |
|
154 |
+ "mtx" = ncolMtx(data), |
|
135 | 155 |
"gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][2]) |
136 | 156 |
)) |
137 | 157 |
} |
... | ... |
@@ -247,6 +247,8 @@ parseExtraParams <- function(allParams, extraParams) |
247 | 247 |
return(allParams) |
248 | 248 |
} |
249 | 249 |
|
250 |
+## TODO these checks should be in the C++ code so that file names are checked |
|
251 |
+## just as much as R variables |
|
250 | 252 |
#' check that provided data is valid |
251 | 253 |
#' @keywords internal |
252 | 254 |
#' |
... | ... |
@@ -4,13 +4,14 @@ |
4 | 4 |
#' @description combines retina subsets from extdata directory |
5 | 5 |
#' @param n number of subsets to use |
6 | 6 |
#' @return matrix of RNA counts |
7 |
+#' @examples |
|
8 |
+#' retSubset <- getRetinaSubset() |
|
9 |
+#' dim(retSubset) |
|
7 | 10 |
#' @importFrom rhdf5 h5read |
8 | 11 |
getRetinaSubset <- function(n=1) |
9 | 12 |
{ |
10 | 13 |
if (!(n %in% 1:4)) |
11 |
- { |
|
12 | 14 |
stop("invalid number of subsets requested") |
13 |
- } |
|
14 | 15 |
|
15 | 16 |
subset_1_path <- system.file("extdata/retina_subset_1.h5", package="CoGAPS") |
16 | 17 |
subset_2_path <- system.file("extdata/retina_subset_2.h5", package="CoGAPS") |
... | ... |
@@ -66,6 +67,35 @@ supported <- function(file) |
66 | 67 |
return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct")) |
67 | 68 |
} |
68 | 69 |
|
70 |
+#' checks if file is rds format |
|
71 |
+#' @keywords internal |
|
72 |
+#' |
|
73 |
+#' @param file path to file |
|
74 |
+#' @return TRUE if file is .rds, FALSE if not |
|
75 |
+#' @importFrom tools file_ext |
|
76 |
+isRdsFile <- function(file) |
|
77 |
+{ |
|
78 |
+ if (is.null(file)) |
|
79 |
+ return(FALSE) |
|
80 |
+ if (length(file) == 0) |
|
81 |
+ return(FALSE) |
|
82 |
+ if (!is(file, "character")) |
|
83 |
+ return(FALSE) |
|
84 |
+ return(tools::file_ext(file) == ".rds") |
|
85 |
+} |
|
86 |
+ |
|
87 |
+#' get input that might be an RDS file |
|
88 |
+#' @keywords internal |
|
89 |
+#' |
|
90 |
+#' @param input some user input |
|
91 |
+#' @return if input is an RDS file, read it - otherwise return input |
|
92 |
+getValueOrRds <- function(input) |
|
93 |
+{ |
|
94 |
+ if (isRdsFile(input)) |
|
95 |
+ return(readRDS(input)) |
|
96 |
+ return(input) |
|
97 |
+} |
|
98 |
+ |
|
69 | 99 |
#' get number of rows from supported file name or matrix |
70 | 100 |
#' @keywords internal |
71 | 101 |
#' |
... | ... |
@@ -114,9 +144,7 @@ ncolHelper <- function(data) |
114 | 144 |
getGeneNames <- function(data, transpose) |
115 | 145 |
{ |
116 | 146 |
if (transpose) |
117 |
- { |
|
118 | 147 |
return(getSampleNames(data, FALSE)) |
119 |
- } |
|
120 | 148 |
|
121 | 149 |
names <- NULL |
122 | 150 |
if (is(data, "character")) |
... | ... |
@@ -127,16 +155,13 @@ getGeneNames <- function(data, transpose) |
127 | 155 |
"gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1)))) |
128 | 156 |
) |
129 | 157 |
} |
130 |
- else if (is(data, "matrix") | is(data, "data.frame")) |
|
158 |
+ else |
|
131 | 159 |
{ |
132 | 160 |
names <- rownames(data) |
133 | 161 |
} |
134 | 162 |
|
135 | 163 |
if (is.null(names)) |
136 |
- { |
|
137 |
- nGenes <- nrowHelper(data) |
|
138 |
- return(paste("Gene", 1:nGenes, sep="_")) |
|
139 |
- } |
|
164 |
+ return(paste("Gene", 1:nrowHelper(data), sep="_")) |
|
140 | 165 |
return(names) |
141 | 166 |
} |
142 | 167 |
|
... | ... |
@@ -146,9 +171,7 @@ getGeneNames <- function(data, transpose) |
146 | 171 |
getSampleNames <- function(data, transpose) |
147 | 172 |
{ |
148 | 173 |
if (transpose) |
149 |
- { |
|
150 | 174 |
return(getGeneNames(data, FALSE)) |
151 |
- } |
|
152 | 175 |
|
153 | 176 |
names <- NULL |
154 | 177 |
if (is(data, "character")) |
... | ... |
@@ -159,16 +182,13 @@ getSampleNames <- function(data, transpose) |
159 | 182 |
"gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2]) |
160 | 183 |
) |
161 | 184 |
} |
162 |
- else if (is(data, "matrix") | is(data, "data.frame")) |
|
185 |
+ else |
|
163 | 186 |
{ |
164 | 187 |
names <- colnames(data) |
165 | 188 |
} |
166 | 189 |
|
167 | 190 |
if (is.null(names)) |
168 |
- { |
|
169 |
- nSamples <- ncolHelper(data) |
|
170 |
- return(paste("Sample", 1:nSamples, sep="_")) |
|
171 |
- } |
|
191 |
+ return(paste("Sample", 1:ncolHelper(data), sep="_")) |
|
172 | 192 |
return(names) |
173 | 193 |
} |
174 | 194 |
|
... | ... |
@@ -236,9 +256,6 @@ parseExtraParams <- function(allParams, extraParams) |
236 | 256 |
#' @return throws an error if data has problems |
237 | 257 |
checkDataMatrix <- function(data, uncertainty, params) |
238 | 258 |
{ |
239 |
- if (!is(data, "matrix") & !is(data, "data.frame") |
|
240 |
- & !is(data, "SummarizedExperiment") & !is(data, "SingleCellExperiment")) |
|
241 |
- stop("unsupported object type of CoGAPS") |
|
242 | 259 |
if (any(is.na(data))) |
243 | 260 |
stop("NA values in data") |
244 | 261 |
if (!all(apply(data, 2, is.numeric))) |
... | ... |
@@ -260,9 +277,6 @@ checkDataMatrix <- function(data, uncertainty, params) |
260 | 277 |
#' @return throws an error if inputs are invalid |
261 | 278 |
checkInputs <- function(data, uncertainty, allParams) |
262 | 279 |
{ |
263 |
- if (is(data, "character") & !supported(data)) |
|
264 |
- stop("unsupported file extension for data") |
|
265 |
- |
|
266 | 280 |
if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
267 | 281 |
stop("uncertainty must be same data type as data (file name)") |
268 | 282 |
if (is(uncertainty, "character") & !supported(uncertainty)) |
... | ... |
@@ -272,34 +286,16 @@ checkInputs <- function(data, uncertainty, allParams) |
272 | 286 |
if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
273 | 287 |
stop("must use default uncertainty when enabling sparseOptimization") |
274 | 288 |
|
275 |
- if (!(allParams$whichMatrixFixed %in% c("A", "P", "N"))) |
|
276 |
- stop("Invalid choice of fixed matrix, must be 'A' or 'P'") |
|
277 |
- if (!is.null(allParams$fixedPatterns) & allParams$whichMatrixFixed == "N") |
|
278 |
- stop("fixedPatterns passed without setting whichMatrixFixed") |
|
279 |
- if (allParams$whichMatrixFixed %in% c("A", "P") & is.null(allParams$fixedPatterns)) |
|
280 |
- stop("whichMatrixFixed is set without passing fixedPatterns") |
|
281 |
- |
|
282 | 289 |
if (!is.null(allParams$gaps@distributed)) |
283 | 290 |
{ |
284 | 291 |
if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
285 | 292 |
warning("running single-cell CoGAPS with singleCell=FALSE") |
286 |
- if (!is.null(allParams$fixedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
287 |
- warning("doing manual pattern matching with using explicit subsets") |
|
288 | 293 |
if (allParams$nThreads > 1) |
289 | 294 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
290 | 295 |
if (!is.null(allParams$checkpointInFile)) |
291 | 296 |
stop("checkpoints not supported for distributed cogaps") |
292 |
- if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P") |
|
293 |
- stop("can't fix P matrix when running single-cell CoGAPS") |
|
294 |
- if (allParams$gaps@distributed == "genome-wide" & allParams$whichMatrixFixed == "A") |
|
295 |
- stop("can't fix A matrix when running genome-wide CoGAPS") |
|
296 | 297 |
} |
297 | 298 |
|
298 |
- if (!(allParams$subsetDim %in% c(0,1,2))) |
|
299 |
- stop("invalid subset dimension") |
|
300 |
- if (allParams$subsetDim > 0 & is.null(allParams$subsetIndices)) |
|
301 |
- stop("subsetDim provided without subsetIndices") |
|
302 |
- |
|
303 | 299 |
if (!is(data, "character")) |
304 | 300 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
305 | 301 |
} |
... | ... |
@@ -309,38 +305,72 @@ checkInputs <- function(data, uncertainty, allParams) |
309 | 305 |
#' |
310 | 306 |
#' @param data data matrix |
311 | 307 |
#' @param allParams list of all parameters |
312 |
-#' @param geneNames vector of names of genes in data |
|
313 |
-#' @param sampleNames vector of names of samples in data |
|
314 | 308 |
#' @return list of all parameters with added gene names |
315 |
-getNamesFromData <- function(data, allParams, geneNames, sampleNames) |
|
309 |
+getDimNames <- function(data, allParams) |
|
316 | 310 |
{ |
317 |
- # get gene/sample names |
|
318 |
- if (is.null(geneNames)) |
|
311 |
+ # get user supplied names |
|
312 |
+ geneNames <- allParams$gaps@geneNames |
|
313 |
+ sampleNames <- allParams$gaps@sampleNames |
|
314 |
+ |
|
315 |
+ # if user didn't supply any names, pull from data set or use default labels |
|
316 |
+ if (is.null(allParams$gaps@geneNames)) |
|
319 | 317 |
geneNames <- getGeneNames(data, allParams$transposeData) |
320 |
- if (is.null(sampleNames)) |
|
318 |
+ if (is.null(allParams$gaps@sampleNames)) |
|
321 | 319 |
sampleNames <- getSampleNames(data, allParams$transposeData) |
322 | 320 |
|
321 |
+ # get the number of genes/samples |
|
323 | 322 |
nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
324 | 323 |
nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
325 | 324 |
|
326 |
- if (allParams$subsetDim == 1) |
|
325 |
+ # handle any subsetting |
|
326 |
+ if (allParams$gaps@subsetDim == 1) |
|
327 | 327 |
{ |
328 |
- nGenes <- length(allParams$subsetIndices) |
|
329 |
- geneNames <- geneNames[allParams$subsetIndices] |
|
328 |
+ nGenes <- length(allParams$gaps@subsetIndices) |
|
329 |
+ geneNames <- geneNames[allParams$gaps@subsetIndices] |
|
330 | 330 |
} |
331 |
- else if (allParams$subsetDim == 2) |
|
331 |
+ else if (allParams$gaps@subsetDim == 2) |
|
332 | 332 |
{ |
333 |
- nSamples <- length(allParams$subsetIndices) |
|
334 |
- sampleNames <- sampleNames[allParams$subsetIndices] |
|
333 |
+ nSamples <- length(allParams$gaps@subsetIndices) |
|
334 |
+ sampleNames <- sampleNames[allParams$gaps@subsetIndices] |
|
335 | 335 |
} |
336 | 336 |
|
337 |
+ # check that names align with expected number of genes/samples |
|
337 | 338 |
if (length(geneNames) != nGenes) |
338 | 339 |
stop("incorrect number of gene names given") |
339 | 340 |
if (length(sampleNames) != nSamples) |
340 | 341 |
stop("incorrect number of sample names given") |
341 | 342 |
|
343 |
+ # store processed gene/sample names directly in allParams list |
|
344 |
+ # this is an important distinction - allParams@gaps contains the |
|
345 |
+ # gene/sample names originally passed by the user, allParams contains |
|
346 |
+ # the procseed gene/sample names to be used when labeling the result |
|
342 | 347 |
allParams$geneNames <- geneNames |
343 | 348 |
allParams$sampleNames <- sampleNames |
344 |
- |
|
345 | 349 |
return(allParams) |
350 |
+} |
|
351 |
+ |
|
352 |
+#' convert any acceptable data input to a numeric matrix |
|
353 |
+#' @keywords internal |
|
354 |
+#' |
|
355 |
+#' @description convert supported R objects containing the data to a |
|
356 |
+#' numeric matrix, if data is a file name do nothing. Exits with an error |
|
357 |
+#' if data is not a supported type. |
|
358 |
+#' @param data data input |
|
359 |
+#' @return data matrix |
|
360 |
+#' @importFrom methods is |
|
361 |
+#' @importFrom SummarizedExperiment assay |
|
362 |
+convertDataToMatrix <- function(data) |
|
363 |
+{ |
|
364 |
+ if (is(data, "character") & !supported(data)) |
|
365 |
+ stop("unsupported file extension for data") |
|
366 |
+ else if (is(data, "matrix") | is(data, "character")) |
|
367 |
+ return(data) |
|
368 |
+ else if (is(data, "data.frame")) |
|
369 |
+ return(data.matrix(data)) |
|
370 |
+ else if (is(data, "SummarizedExperiment")) |
|
371 |
+ return(SummarizedExperiment::assay(data, "counts")) |
|
372 |
+ else if (is(data, "SingleCellExperiment")) |
|
373 |
+ return(SummarizedExperiment::assay(data, "counts")) |
|
374 |
+ else |
|
375 |
+ stop("unsupported data type") |
|
346 | 376 |
} |
347 | 377 |
\ No newline at end of file |
... | ... |
@@ -331,7 +331,7 @@ getNamesFromData <- function(data, allParams, geneNames, sampleNames) |
331 | 331 |
else if (allParams$subsetDim == 2) |
332 | 332 |
{ |
333 | 333 |
nSamples <- length(allParams$subsetIndices) |
334 |
- sampleNames <- sampleames[allParams$subsetIndices] |
|
334 |
+ sampleNames <- sampleNames[allParams$subsetIndices] |
|
335 | 335 |
} |
336 | 336 |
|
337 | 337 |
if (length(geneNames) != nGenes) |
... | ... |
@@ -323,6 +323,17 @@ getNamesFromData <- function(data, allParams, geneNames, sampleNames) |
323 | 323 |
nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
324 | 324 |
nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
325 | 325 |
|
326 |
+ if (allParams$subsetDim == 1) |
|
327 |
+ { |
|
328 |
+ nGenes <- length(allParams$subsetIndices) |
|
329 |
+ geneNames <- geneNames[allParams$subsetIndices] |
|
330 |
+ } |
|
331 |
+ else if (allParams$subsetDim == 2) |
|
332 |
+ { |
|
333 |
+ nSamples <- length(allParams$subsetIndices) |
|
334 |
+ sampleNames <- sampleames[allParams$subsetIndices] |
|
335 |
+ } |
|
336 |
+ |
|
326 | 337 |
if (length(geneNames) != nGenes) |
327 | 338 |
stop("incorrect number of gene names given") |
328 | 339 |
if (length(sampleNames) != nSamples) |
... | ... |
@@ -260,11 +260,9 @@ checkDataMatrix <- function(data, uncertainty, params) |
260 | 260 |
#' @return throws an error if inputs are invalid |
261 | 261 |
checkInputs <- function(data, uncertainty, allParams) |
262 | 262 |
{ |
263 |
- # check file extension |
|
264 | 263 |
if (is(data, "character") & !supported(data)) |
265 | 264 |
stop("unsupported file extension for data") |
266 | 265 |
|
267 |
- # check uncertainty matrix |
|
268 | 266 |
if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
269 | 267 |
stop("uncertainty must be same data type as data (file name)") |
270 | 268 |
if (is(uncertainty, "character") & !supported(uncertainty)) |
... | ... |
@@ -274,13 +272,13 @@ checkInputs <- function(data, uncertainty, allParams) |
274 | 272 |
if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
275 | 273 |
stop("must use default uncertainty when enabling sparseOptimization") |
276 | 274 |
|
277 |
- # check fixed matrix choice |
|
278 |
- if (!(allParams$whichMatrixFixed %in% ("A", "P", "N"))) |
|
275 |
+ if (!(allParams$whichMatrixFixed %in% c("A", "P", "N"))) |
|
279 | 276 |
stop("Invalid choice of fixed matrix, must be 'A' or 'P'") |
280 |
- if (!is.null(allParams$fixedPatterns) & allParams@whichMatrixFixed == "N") |
|
277 |
+ if (!is.null(allParams$fixedPatterns) & allParams$whichMatrixFixed == "N") |
|
281 | 278 |
stop("fixedPatterns passed without setting whichMatrixFixed") |
279 |
+ if (allParams$whichMatrixFixed %in% c("A", "P") & is.null(allParams$fixedPatterns)) |
|
280 |
+ stop("whichMatrixFixed is set without passing fixedPatterns") |
|
282 | 281 |
|
283 |
- # check parameters specific to distributed cogaps |
|
284 | 282 |
if (!is.null(allParams$gaps@distributed)) |
285 | 283 |
{ |
286 | 284 |
if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
... | ... |
@@ -289,18 +287,19 @@ checkInputs <- function(data, uncertainty, allParams) |
289 | 287 |
warning("doing manual pattern matching with using explicit subsets") |
290 | 288 |
if (allParams$nThreads > 1) |
291 | 289 |
stop("can't run multi-threaded and distributed CoGAPS at the same time") |
290 |
+ if (!is.null(allParams$checkpointInFile)) |
|
291 |
+ stop("checkpoints not supported for distributed cogaps") |
|
292 | 292 |
if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P") |
293 |
+ stop("can't fix P matrix when running single-cell CoGAPS") |
|
294 |
+ if (allParams$gaps@distributed == "genome-wide" & allParams$whichMatrixFixed == "A") |
|
295 |
+ stop("can't fix A matrix when running genome-wide CoGAPS") |
|
293 | 296 |
} |
294 | 297 |
|
295 |
- # convert data to matrix |
|
296 |
- if (is(data, "matrix")) |
|
297 |
- data <- data |
|
298 |
- if (is(data, "data.frame")) |
|
299 |
- data <- data.matrix(data) |
|
300 |
- else if (is(data, "SummarizedExperiment")) |
|
301 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
302 |
- else if (is(data, "SingleCellExperiment")) |
|
303 |
- data <- SummarizedExperiment::assay(data, "counts") |
|
298 |
+ if (!(allParams$subsetDim %in% c(0,1,2))) |
|
299 |
+ stop("invalid subset dimension") |
|
300 |
+ if (allParams$subsetDim > 0 & is.null(allParams$subsetIndices)) |
|
301 |
+ stop("subsetDim provided without subsetIndices") |
|
302 |
+ |
|
304 | 303 |
if (!is(data, "character")) |
305 | 304 |
checkDataMatrix(data, uncertainty, allParams$gaps) |
306 | 305 |
} |
... | ... |
@@ -249,4 +249,88 @@ checkDataMatrix <- function(data, uncertainty, params) |
249 | 249 |
stop("nPatterns must be less than dimensions of data") |
250 | 250 |
if (sum(uncertainty < 1e-5) > 0) |
251 | 251 |
warning("small values in uncertainty matrix detected") |
252 |
+} |
|
253 |
+ |
|
254 |
+#' check that all inputs are valid |
|
255 |
+#' @keywords internal |
|
256 |
+#' |
|
257 |
+#' @param data data matrix |
|
258 |
+#' @param uncertainty uncertainty matrix, can be null |
|
259 |
+#' @param allParams list of all parameters |
|
260 |
+#' @return throws an error if inputs are invalid |
|
261 |
+checkInputs <- function(data, uncertainty, allParams) |
|
262 |
+{ |
|
263 |
+ # check file extension |
|
264 |
+ if (is(data, "character") & !supported(data)) |
|
265 |
+ stop("unsupported file extension for data") |
|
266 |
+ |
|
267 |
+ # check uncertainty matrix |
|
268 |
+ if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
|
269 |
+ stop("uncertainty must be same data type as data (file name)") |
|
270 |
+ if (is(uncertainty, "character") & !supported(uncertainty)) |
|
271 |
+ stop("unsupported file extension for uncertainty") |
|
272 |
+ if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix")) |
|
273 |
+ stop("uncertainty must be a matrix unless data is a file path") |
|
274 |
+ if (!is.null(uncertainty) & allParams$gaps@sparseOptimization) |
|
275 |
+ stop("must use default uncertainty when enabling sparseOptimization") |
|
276 |
+ |
|
277 |
+ # check fixed matrix choice |
|
278 |
+ if (!(allParams$whichMatrixFixed %in% ("A", "P", "N"))) |
|
279 |
+ stop("Invalid choice of fixed matrix, must be 'A' or 'P'") |
|
280 |
+ if (!is.null(allParams$fixedPatterns) & allParams@whichMatrixFixed == "N") |
|
281 |
+ stop("fixedPatterns passed without setting whichMatrixFixed") |
|
282 |
+ |
|
283 |
+ # check parameters specific to distributed cogaps |
|
284 |
+ if (!is.null(allParams$gaps@distributed)) |
|
285 |
+ { |
|
286 |
+ if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell) |
|
287 |
+ warning("running single-cell CoGAPS with singleCell=FALSE") |
|
288 |
+ if (!is.null(allParams$fixedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
289 |
+ warning("doing manual pattern matching with using explicit subsets") |
|
290 |
+ if (allParams$nThreads > 1) |
|
291 |
+ stop("can't run multi-threaded and distributed CoGAPS at the same time") |
|
292 |
+ if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P") |
|
293 |
+ } |
|
294 |
+ |
|
295 |
+ # convert data to matrix |
|
296 |
+ if (is(data, "matrix")) |
|
297 |
+ data <- data |
|
298 |
+ if (is(data, "data.frame")) |
|
299 |
+ data <- data.matrix(data) |
|
300 |
+ else if (is(data, "SummarizedExperiment")) |
|
301 |
+ data <- SummarizedExperiment::assay(data, "counts") |
|
302 |
+ else if (is(data, "SingleCellExperiment")) |
|
303 |
+ data <- SummarizedExperiment::assay(data, "counts") |
|
304 |
+ if (!is(data, "character")) |
|
305 |
+ checkDataMatrix(data, uncertainty, allParams$gaps) |
|
306 |
+} |
|
307 |
+ |
|
308 |
+#' extracts gene/sample names from the data |
|
309 |
+#' @keywords internal |
|
310 |
+#' |
|
311 |
+#' @param data data matrix |
|
312 |
+#' @param allParams list of all parameters |
|
313 |
+#' @param geneNames vector of names of genes in data |
|
314 |
+#' @param sampleNames vector of names of samples in data |
|
315 |
+#' @return list of all parameters with added gene names |
|
316 |
+getNamesFromData <- function(data, allParams, geneNames, sampleNames) |
|
317 |
+{ |
|
318 |
+ # get gene/sample names |
|
319 |
+ if (is.null(geneNames)) |
|
320 |
+ geneNames <- getGeneNames(data, allParams$transposeData) |
|
321 |
+ if (is.null(sampleNames)) |
|
322 |
+ sampleNames <- getSampleNames(data, allParams$transposeData) |
|
323 |
+ |
|
324 |
+ nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
325 |
+ nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
326 |
+ |
|
327 |
+ if (length(geneNames) != nGenes) |
|
328 |
+ stop("incorrect number of gene names given") |
|
329 |
+ if (length(sampleNames) != nSamples) |
|
330 |
+ stop("incorrect number of sample names given") |
|
331 |
+ |
|
332 |
+ allParams$geneNames <- geneNames |
|
333 |
+ allParams$sampleNames <- sampleNames |
|
334 |
+ |
|
335 |
+ return(allParams) |
|
252 | 336 |
} |
253 | 337 |
\ No newline at end of file |
... | ... |
@@ -236,6 +236,13 @@ parseExtraParams <- function(allParams, extraParams) |
236 | 236 |
#' @return throws an error if data has problems |
237 | 237 |
checkDataMatrix <- function(data, uncertainty, params) |
238 | 238 |
{ |
239 |
+ if (!is(data, "matrix") & !is(data, "data.frame") |
|
240 |
+ & !is(data, "SummarizedExperiment") & !is(data, "SingleCellExperiment")) |
|
241 |
+ stop("unsupported object type of CoGAPS") |
|
242 |
+ if (any(is.na(data))) |
|
243 |
+ stop("NA values in data") |
|
244 |
+ if (!all(apply(data, 2, is.numeric))) |
|
245 |
+ stop("data is not numeric") |
|
239 | 246 |
if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0) |
240 | 247 |
stop("negative values in data and/or uncertainty matrix") |
241 | 248 |
if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns) |
... | ... |
@@ -1,3 +1,45 @@ |
1 |
+#' get specified number of retina subsets |
|
2 |
+#' @export |
|
3 |
+#' |
|
4 |
+#' @description combines retina subsets from extdata directory |
|
5 |
+#' @param n number of subsets to use |
|
6 |
+#' @return matrix of RNA counts |
|
7 |
+#' @importFrom rhdf5 h5read |
|
8 |
+getRetinaSubset <- function(n=1) |
|
9 |
+{ |
|
10 |
+ if (!(n %in% 1:4)) |
|
11 |
+ { |
|
12 |
+ stop("invalid number of subsets requested") |
|
13 |
+ } |
|
14 |
+ |
|
15 |
+ subset_1_path <- system.file("extdata/retina_subset_1.h5", package="CoGAPS") |
|
16 |
+ subset_2_path <- system.file("extdata/retina_subset_2.h5", package="CoGAPS") |
|
17 |
+ subset_3_path <- system.file("extdata/retina_subset_3.h5", package="CoGAPS") |
|
18 |
+ subset_4_path <- system.file("extdata/retina_subset_4.h5", package="CoGAPS") |
|
19 |
+ |
|
20 |
+ data <- rhdf5::h5read(subset_1_path, "counts") |
|
21 |
+ cNames <- rhdf5::h5read(subset_1_path, "cellNames") |
|
22 |
+ if (n > 1) |
|
23 |
+ { |
|
24 |
+ data <- cbind(data, rhdf5::h5read(subset_2_path, "counts")) |
|
25 |
+ cNames <- c(cNames, rhdf5::h5read(subset_2_path, "cellNames")) |
|
26 |
+ } |
|
27 |
+ if (n > 2) |
|
28 |
+ { |
|
29 |
+ data <- cbind(data, rhdf5::h5read(subset_3_path, "counts")) |
|
30 |
+ cNames <- c(cNames, rhdf5::h5read(subset_3_path, "cellNames")) |
|
31 |
+ } |
|
32 |
+ if (n > 3) |
|
33 |
+ { |
|
34 |
+ data <- cbind(data, rhdf5::h5read(subset_4_path, "counts")) |
|
35 |
+ cNames <- c(cNames, rhdf5::h5read(subset_4_path, "cellNames")) |
|
36 |
+ } |
|
37 |
+ |
|
38 |
+ colnames(data) <- cNames |
|
39 |
+ rownames(data) <- rhdf5::h5read(subset_1_path, "geneNames") |
|
40 |
+ return(data) |
|
41 |
+} |
|
42 |
+ |
|
1 | 43 |
#' wrapper around cat |
2 | 44 |
#' @keywords internal |
3 | 45 |
#' |
... | ... |
@@ -146,6 +146,7 @@ startupMessage <- function(data, allParams) |
146 | 146 |
if (!is.null(allParams$gaps@distributed)) |
147 | 147 |
dist_message <- allParams$gaps@distributed |
148 | 148 |
|
149 |
+ cat("\nThis is CoGAPS version", as.character(packageVersion("CoGAPS")), "\n") |
|
149 | 150 |
cat("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
150 | 151 |
nSamples, "samples") |
151 | 152 |
|
... | ... |
@@ -71,13 +71,18 @@ ncolHelper <- function(data) |
71 | 71 |
#' @return vector of gene names |
72 | 72 |
getGeneNames <- function(data, transpose) |
73 | 73 |
{ |
74 |
+ if (transpose) |
|
75 |
+ { |
|
76 |
+ return(getSampleNames(data, FALSE)) |
|
77 |
+ } |
|
78 |
+ |
|
74 | 79 |
names <- NULL |
75 | 80 |
if (is(data, "character")) |
76 | 81 |
{ |
77 | 82 |
names <- switch(tools::file_ext(data), |
78 | 83 |
"csv" = as.matrix(data.table::fread(data, select=1))[,1], |
79 | 84 |
"tsv" = as.matrix(data.table::fread(data, select=1))[,1], |
80 |
- "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(gistGctPath, select=1)))) |
|
85 |
+ "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1)))) |
|
81 | 86 |
) |
82 | 87 |
} |
83 | 88 |
else if (is(data, "matrix") | is(data, "data.frame")) |
... | ... |
@@ -87,7 +92,7 @@ getGeneNames <- function(data, transpose) |
87 | 92 |
|
88 | 93 |
if (is.null(names)) |
89 | 94 |
{ |
90 |
- nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
|
95 |
+ nGenes <- nrowHelper(data) |
|
91 | 96 |
return(paste("Gene", 1:nGenes, sep="_")) |
92 | 97 |
} |
93 | 98 |
return(names) |
... | ... |
@@ -98,6 +103,11 @@ getGeneNames <- function(data, transpose) |
98 | 103 |
#' @return vector of sample names |
99 | 104 |
getSampleNames <- function(data, transpose) |
100 | 105 |
{ |
106 |
+ if (transpose) |
|
107 |
+ { |
|
108 |
+ return(getGeneNames(data, FALSE)) |
|
109 |
+ } |
|
110 |
+ |
|
101 | 111 |
names <- NULL |
102 | 112 |
if (is(data, "character")) |
103 | 113 |
{ |
... | ... |
@@ -114,7 +124,7 @@ getSampleNames <- function(data, transpose) |
114 | 124 |
|
115 | 125 |
if (is.null(names)) |
116 | 126 |
{ |
117 |
- nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
|
127 |
+ nSamples <- ncolHelper(data) |
|
118 | 128 |
return(paste("Sample", 1:nSamples, sep="_")) |
119 | 129 |
} |
120 | 130 |
return(names) |
... | ... |
@@ -71,8 +71,26 @@ ncolHelper <- function(data) |
71 | 71 |
#' @return vector of gene names |
72 | 72 |
getGeneNames <- function(data, transpose) |
73 | 73 |
{ |
74 |
- nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
|
75 |
- return(paste("Gene", 1:nGenes, sep="_")) |
|
74 |
+ names <- NULL |
|
75 |
+ if (is(data, "character")) |
|
76 |
+ { |
|
77 |
+ names <- switch(tools::file_ext(data), |
|
78 |
+ "csv" = as.matrix(data.table::fread(data, select=1))[,1], |
|
79 |
+ "tsv" = as.matrix(data.table::fread(data, select=1))[,1], |
|
80 |
+ "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(gistGctPath, select=1)))) |
|
81 |
+ ) |
|
82 |
+ } |
|
83 |
+ else if (is(data, "matrix") | is(data, "data.frame")) |
|
84 |
+ { |
|
85 |
+ names <- rownames(data) |
|
86 |
+ } |
|
87 |
+ |
|
88 |
+ if (is.null(names)) |
|
89 |
+ { |
|
90 |
+ nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
|
91 |
+ return(paste("Gene", 1:nGenes, sep="_")) |
|
92 |
+ } |
|
93 |
+ return(names) |
|
76 | 94 |
} |
77 | 95 |
|
78 | 96 |
#' extract sample names from data |
... | ... |
@@ -80,8 +98,26 @@ getGeneNames <- function(data, transpose) |
80 | 98 |
#' @return vector of sample names |
81 | 99 |
getSampleNames <- function(data, transpose) |
82 | 100 |
{ |
83 |
- nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
|
84 |
- return(paste("Sample", 1:nSamples, sep="_")) |
|
101 |
+ names <- NULL |
|
102 |
+ if (is(data, "character")) |
|
103 |
+ { |
|
104 |
+ names <- switch(tools::file_ext(data), |
|
105 |
+ "csv" = colnames(data.table::fread(data, nrows=1))[-1], |
|
106 |
+ "tsv" = colnames(data.table::fread(data, nrows=1))[-1], |
|
107 |
+ "gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2]) |
|
108 |
+ ) |
|
109 |
+ } |
|
110 |
+ else if (is(data, "matrix") | is(data, "data.frame")) |
|
111 |
+ { |
|
112 |
+ names <- colnames(data) |
|
113 |
+ } |
|
114 |
+ |
|
115 |
+ if (is.null(names)) |
|
116 |
+ { |
|
117 |
+ nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
|
118 |
+ return(paste("Sample", 1:nSamples, sep="_")) |
|
119 |
+ } |
|
120 |
+ return(names) |
|
85 | 121 |
} |
86 | 122 |
|
87 | 123 |
#' write start up message |
... | ... |
@@ -1,10 +1,10 @@ |
1 | 1 |
#' wrapper around cat |
2 |
-#' @keywords |
|
2 |
+#' @keywords internal |
|
3 | 3 |
#' |
4 | 4 |
#' @description cleans up message printing |
5 | 5 |
#' @param allParams all cogaps parameters |
6 | 6 |
#' @param ... arguments forwarded to cat |
7 |
-#' @return displays text |
|
7 |
+#' @return conditionally print message |
|
8 | 8 |
gapsCat <- function(allParams, ...) |
9 | 9 |
{ |
10 | 10 |
if (allParams$messages) |
... | ... |
@@ -68,6 +68,7 @@ ncolHelper <- function(data) |
68 | 68 |
|
69 | 69 |
#' extract gene names from data |
70 | 70 |
#' @keywords internal |
71 |
+#' @return vector of gene names |
|
71 | 72 |
getGeneNames <- function(data, transpose) |
72 | 73 |
{ |
73 | 74 |
nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
... | ... |
@@ -76,6 +77,7 @@ getGeneNames <- function(data, transpose) |
76 | 77 |
|
77 | 78 |
#' extract sample names from data |
78 | 79 |
#' @keywords internal |
80 |
+#' @return vector of sample names |
|
79 | 81 |
getSampleNames <- function(data, transpose) |
80 | 82 |
{ |
81 | 83 |
nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
... | ... |
@@ -21,7 +21,7 @@ supported <- function(file) |
21 | 21 |
{ |
22 | 22 |
if (!is(file, "character")) |
23 | 23 |
return(FALSE) |
24 |
- return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
24 |
+ return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct")) |
|
25 | 25 |
} |
26 | 26 |
|
27 | 27 |
#' get number of rows from supported file name or matrix |
... | ... |
@@ -38,7 +38,8 @@ nrowHelper <- function(data) |
38 | 38 |
return(switch(tools::file_ext(data), |
39 | 39 |
"csv" = nrow(data.table::fread(data, select=1)), |
40 | 40 |
"tsv" = nrow(data.table::fread(data, select=1)), |
41 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
41 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]), |
|
42 |
+ "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][1]) |
|
42 | 43 |
)) |
43 | 44 |
} |
44 | 45 |
return(nrow(data)) |
... | ... |
@@ -58,7 +59,8 @@ ncolHelper <- function(data) |
58 | 59 |
return(switch(tools::file_ext(data), |
59 | 60 |
"csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
60 | 61 |
"tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
61 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
62 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]), |
|
63 |
+ "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][2]) |
|
62 | 64 |
)) |
63 | 65 |
} |
64 | 66 |
return(ncol(data)) |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,152 @@ |
1 |
+#' wrapper around cat |
|
2 |
+#' @keywords |
|
3 |
+#' |
|
4 |
+#' @description cleans up message printing |
|
5 |
+#' @param allParams all cogaps parameters |
|
6 |
+#' @param ... arguments forwarded to cat |
|
7 |
+#' @return displays text |
|
8 |
+gapsCat <- function(allParams, ...) |
|
9 |
+{ |
|
10 |
+ if (allParams$messages) |
|
11 |
+ cat(...) |
|
12 |
+} |
|
13 |
+ |
|
14 |
+#' checks if file is supported |
|
15 |
+#' @keywords internal |
|
16 |
+#' |
|
17 |
+#' @param file path to file |
|
18 |
+#' @return TRUE if file is supported, FALSE if not |
|
19 |
+#' @importFrom tools file_ext |
|
20 |
+supported <- function(file) |
|
21 |
+{ |
|
22 |
+ if (!is(file, "character")) |
|
23 |
+ return(FALSE) |
|
24 |
+ return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
25 |
+} |
|
26 |
+ |
|
27 |
+#' get number of rows from supported file name or matrix |
|
28 |
+#' @keywords internal |
|
29 |
+#' |
|
30 |
+#' @param data either a file name or a matrix |
|
31 |
+#' @return number of rows |
|
32 |
+#' @importFrom data.table fread |
|
33 |
+#' @importFrom tools file_ext |
|
34 |
+nrowHelper <- function(data) |
|
35 |
+{ |
|
36 |
+ if (is(data, "character")) |
|
37 |
+ { |
|
38 |
+ return(switch(tools::file_ext(data), |
|
39 |
+ "csv" = nrow(data.table::fread(data, select=1)), |
|
40 |
+ "tsv" = nrow(data.table::fread(data, select=1)), |
|
41 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
42 |
+ )) |
|
43 |
+ } |
|
44 |
+ return(nrow(data)) |
|
45 |
+} |
|
46 |
+ |
|
47 |
+#' get number of columns from supported file name or matrix |
|
48 |
+#' @keywords internal |
|
49 |
+#' |
|
50 |
+#' @param data either a file name or a matrix |
|
51 |
+#' @return number of columns |
|
52 |
+#' @importFrom data.table fread |
|
53 |
+#' @importFrom tools file_ext |
|
54 |
+ncolHelper <- function(data) |
|
55 |
+{ |
|
56 |
+ if (is(data, "character")) |
|
57 |
+ { |
|
58 |
+ return(switch(tools::file_ext(data), |
|
59 |
+ "csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
60 |
+ "tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
|
61 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
62 |
+ )) |
|
63 |
+ } |
|
64 |
+ return(ncol(data)) |
|
65 |
+} |
|
66 |
+ |
|
67 |
+#' extract gene names from data |
|
68 |
+#' @keywords internal |
|
69 |
+getGeneNames <- function(data, transpose) |
|
70 |
+{ |
|
71 |
+ nGenes <- ifelse(transpose, ncolHelper(data), nrowHelper(data)) |
|
72 |
+ return(paste("Gene", 1:nGenes, sep="_")) |
|
73 |
+} |
|
74 |
+ |
|
75 |
+#' extract sample names from data |
|
76 |
+#' @keywords internal |
|
77 |
+getSampleNames <- function(data, transpose) |
|
78 |
+{ |
|
79 |
+ nSamples <- ifelse(transpose, nrowHelper(data), ncolHelper(data)) |
|
80 |
+ return(paste("Sample", 1:nSamples, sep="_")) |
|
81 |
+} |
|
82 |
+ |
|
83 |
+#' write start up message |
|
84 |
+#' @keywords internal |
|
85 |
+#' |
|
86 |
+#' @param data data set |
|
87 |
+#' @param allParams list of all parameters |
|
88 |
+#' @return message displayed to screen |
|
89 |
+#' @importFrom methods show |
|
90 |
+startupMessage <- function(data, allParams) |
|
91 |
+{ |
|
92 |
+ nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data)) |
|
93 |
+ nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data)) |
|
94 |
+ |
|
95 |
+ dist_message <- "Standard" |
|
96 |
+ if (!is.null(allParams$gaps@distributed)) |
|
97 |
+ dist_message <- allParams$gaps@distributed |
|
98 |
+ |
|
99 |
+ cat("Running", dist_message, "CoGAPS on", nGenes, "genes and", |
|
100 |
+ nSamples, "samples") |
|
101 |
+ |
|
102 |
+ if (allParams$messages) |
|
103 |
+ { |
|
104 |
+ cat(" with parameters:\n\n") |
|
105 |
+ methods::show(allParams$gaps) |
|
106 |
+ } |
|
107 |
+ cat("\n") |
|
108 |
+} |
|
109 |
+ |
|
110 |
+#' parse parameters passed through the ... variable |
|
111 |
+#' @keywords internal |
|
112 |
+#' |
|
113 |
+#' @param allParams list of all parameters |
|
114 |
+#' @param extraParams list of parameters in ... |
|
115 |
+#' @return allParams with any valid parameters in extraParams added |
|
116 |
+#' @note will halt with an error if any parameters in extraParams are invalid |
|
117 |
+#' @importFrom methods slotNames |
|
118 |
+parseExtraParams <- function(allParams, extraParams) |
|
119 |
+{ |
|
120 |
+ # parse direct params |
|
121 |
+ for (s in slotNames(allParams$gaps)) |
|
122 |
+ { |
|
123 |
+ if (!is.null(extraParams[[s]])) |
|
124 |
+ { |
|
125 |
+ allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]]) |
|
126 |
+ extraParams[[s]] <- NULL |
|
127 |
+ } |
|
128 |
+ } |
|
129 |
+ |
|
130 |
+ # check for unrecognized options |
|
131 |
+ if (length(extraParams) > 0) |
|
132 |
+ stop(paste("unrecognized argument:", names(extraParams)[1])) |
|
133 |
+ |
|
134 |
+ return(allParams) |
|
135 |
+} |
|
136 |
+ |
|
137 |
+#' check that provided data is valid |
|
138 |
+#' @keywords internal |
|
139 |
+#' |
|
140 |
+#' @param data data matrix |
|
141 |
+#' @param uncertainty uncertainty matrix, can be null |
|
142 |
+#' @param params CogapsParams object |
|
143 |
+#' @return throws an error if data has problems |
|
144 |
+checkDataMatrix <- function(data, uncertainty, params) |
|
145 |
+{ |
|
146 |
+ if (sum(data < 0) > 0 | sum(uncertainty < 0) > 0) |
|
147 |
+ stop("negative values in data and/or uncertainty matrix") |
|
148 |
+ if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns) |
|
149 |
+ stop("nPatterns must be less than dimensions of data") |
|
150 |
+ if (sum(uncertainty < 1e-5) > 0) |
|
151 |
+ warning("small values in uncertainty matrix detected") |
|
152 |
+} |
|
0 | 153 |
\ No newline at end of file |