... | ... |
@@ -86,13 +86,14 @@ outputToFile=NULL, ...) |
86 | 86 |
) |
87 | 87 |
allParams <- parseExtraParams(allParams, list(...)) |
88 | 88 |
|
89 |
- # display start up message for the user |
|
90 |
- startupMessage(data, allParams) |
|
91 |
- |
|
92 | 89 |
# check file extension |
93 | 90 |
if (is(data, "character") & !supported(data)) |
94 | 91 |
stop("unsupported file extension for data") |
95 | 92 |
|
93 |
+ # enforce the use of explicit subsets with manual pattern matching |
|
94 |
+ if (!is.null(allParams$matchedPatterns) & is.null(allParams$gaps@explicitSets)) |
|
95 |
+ stop("must provide explicit subsets when doing manual pattern matching") |
|
96 |
+ |
|
96 | 97 |
# check uncertainty matrix |
97 | 98 |
if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character")) |
98 | 99 |
stop("uncertainty must be same data type as data (file name)") |
... | ... |
@@ -154,6 +155,7 @@ outputToFile=NULL, ...) |
154 | 155 |
allParams$sampleNames <- sampleNames |
155 | 156 |
|
156 | 157 |
# run cogaps |
158 |
+ startupMessage(data, allParams) |
|
157 | 159 |
gapsReturnList <- dispatchFunc(data, allParams, uncertainty) |
158 | 160 |
|
159 | 161 |
# convert list to CogapsResult object |
... | ... |
@@ -21,7 +21,7 @@ supported <- function(file) |
21 | 21 |
{ |
22 | 22 |
if (!is(file, "character")) |
23 | 23 |
return(FALSE) |
24 |
- return(tools::file_ext(file) %in% c("tsv", "csv", "mtx")) |
|
24 |
+ return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct")) |
|
25 | 25 |
} |
26 | 26 |
|
27 | 27 |
#' get number of rows from supported file name or matrix |
... | ... |
@@ -38,7 +38,8 @@ nrowHelper <- function(data) |
38 | 38 |
return(switch(tools::file_ext(data), |
39 | 39 |
"csv" = nrow(data.table::fread(data, select=1)), |
40 | 40 |
"tsv" = nrow(data.table::fread(data, select=1)), |
41 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]) |
|
41 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]), |
|
42 |
+ "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][1]) |
|
42 | 43 |
)) |
43 | 44 |
} |
44 | 45 |
return(nrow(data)) |
... | ... |
@@ -58,7 +59,8 @@ ncolHelper <- function(data) |
58 | 59 |
return(switch(tools::file_ext(data), |
59 | 60 |
"csv" = ncol(data.table::fread(data, nrows=1)) - 1, |
60 | 61 |
"tsv" = ncol(data.table::fread(data, nrows=1)) - 1, |
61 |
- "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]) |
|
62 |
+ "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]), |
|
63 |
+ "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][2]) |
|
62 | 64 |
)) |
63 | 65 |
} |
64 | 66 |
return(ncol(data)) |
... | ... |
@@ -30,11 +30,16 @@ sampleWithExplictSets <- function(allParams, total) |
30 | 30 |
#' @return list of subsets |
31 | 31 |
sampleWithAnnotationWeights <- function(allParams, setSize) |
32 | 32 |
{ |
33 |
+ # sort annotation group and weights so they match up |
|
34 |
+ weight <- allParams$gaps@samplingWeight |
|
35 |
+ weight <- weight[order(names(weight))] |
|
33 | 36 |
groups <- unique(allParams$gaps@samplingAnnotation) |
37 |
+ groups <- sort(groups) |
|
38 |
+ |
|
39 |
+ # sample accordingly |
|
34 | 40 |
return(lapply(1:allParams$gaps@nSets, function(i) |
35 | 41 |
{ |
36 |
- groupCount <- sample(groups, size=setSize, replace=TRUE, |
|
37 |
- prob=allParams$gaps@samplingWeight) |
|
42 |
+ groupCount <- sample(groups, size=setSize, replace=TRUE, prob=weight) |
|
38 | 43 |
sort(unlist(sapply(groups, function(g) |
39 | 44 |
{ |
40 | 45 |
groupNdx <- which(allParams$gaps@samplingAnnotation == g) |
... | ... |
@@ -110,6 +110,11 @@ setValidity("CogapsParams", |
110 | 110 |
|
111 | 111 |
if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets) |
112 | 112 |
"wrong number of sets given" |
113 |
+ if (length(object@samplingWeight) & is.null(names(object@samplingWeight))) |
|
114 |
+ "samplingWeight must be a named vector" |
|
115 |
+ |
|
116 |
+ if (!is.null(object@explicitSets) & !is.null(object@samplingWeight)) |
|
117 |
+ "explicitSets and samplingAnnotation/samplingWeight are both set" |
|
113 | 118 |
} |
114 | 119 |
) |
115 | 120 |
|
117 | 122 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,31 @@ |
1 |
+context("CoGAPS") |
|
2 |
+ |
|
3 |
+test_that("Subsetting Data", |
|
4 |
+{ |
|
5 |
+ data(GIST) |
|
6 |
+ data(SimpSim) |
|
7 |
+ testMatrix <- GIST.matrix |
|
8 |
+ |
|
9 |
+ # sampling with weights |
|
10 |
+ anno <- sample(letters[1:5], size=nrow(testMatrix), replace=TRUE) |
|
11 |
+ weights <- c(0,1,4,1,0) |
|
12 |
+ names(weights) <- letters[1:5] |
|
13 |
+ params <- new("CogapsParams") |
|
14 |
+ params <- setAnnotationWeights(params, annotation=anno, weights=weights) |
|
15 |
+ result <- GWCoGAPS(testMatrix, params, messages=FALSE) |
|
16 |
+ |
|
17 |
+ getIndex <- function(s) as.numeric(strsplit(s, "_")[[1]][2]) |
|
18 |
+ getIndices <- function(set) unname(sapply(set, getIndex)) |
|
19 |
+ countType <- function(set, type) sum(getIndices(set) %in% which(anno == type)) |
|
20 |
+ getHistogram <- function(set) sapply(letters[1:5], function(letter) countType(set, letter)) |
|
21 |
+ hist <- sapply(getSubsets(result), getHistogram) |
|
22 |
+ freq <- unname(rowSums(hist) / sum(hist)) |
|
23 |
+ |
|
24 |
+ expect_true(all.equal(freq, unname(weights / sum(weights)), tol=0.05)) |
|
25 |
+ |
|
26 |
+ # running cogaps with given subsets |
|
27 |
+ sets <- list(1:225, 226:450, 451:675, 676:900) |
|
28 |
+ result <- GWCoGAPS(SimpSim.data, explicitSets=sets, messages=FALSE) |
|
29 |
+ subsets <- lapply(getSubsets(result), getIndices) |
|
30 |
+ expect_true(all(sapply(1:4, function(i) all.equal(sets[[i]], subsets[[i]])))) |
|
31 |
+}) |
|
0 | 32 |
\ No newline at end of file |
... | ... |
@@ -19,6 +19,7 @@ test_that("Valid Top-Level CoGAPS Calls", |
19 | 19 |
gistCsvPath <- system.file("extdata/GIST.csv", package="CoGAPS") |
20 | 20 |
gistTsvPath <- system.file("extdata/GIST.tsv", package="CoGAPS") |
21 | 21 |
gistMtxPath <- system.file("extdata/GIST.mtx", package="CoGAPS") |
22 |
+ gistGctPath <- system.file("extdata/GIST.gct", package="CoGAPS") |
|
22 | 23 |
|
23 | 24 |
# data types |
24 | 25 |
res <- list() |
... | ... |
@@ -27,15 +28,16 @@ test_that("Valid Top-Level CoGAPS Calls", |
27 | 28 |
res[[3]] <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=50, seed=1, messages=FALSE) |
28 | 29 |
res[[4]] <- CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50, seed=1, messages=FALSE) |
29 | 30 |
res[[5]] <- CoGAPS(gistMtxPath, nIterations=100, outputFrequency=50, seed=1, messages=FALSE) |
31 |
+ res[[6]] <- CoGAPS(gistGctPath, nIterations=100, outputFrequency=50, seed=1, messages=FALSE) |
|
30 | 32 |
expect_true(all(sapply(res, no_na_in_result))) |
31 | 33 |
|
32 | 34 |
expect_equal(nrow(res[[1]]@featureLoadings), 1363) |
33 | 35 |
expect_equal(ncol(res[[1]]@featureLoadings), 7) |
34 | 36 |
expect_equal(nrow(res[[1]]@sampleFactors), 9) |
35 | 37 |
expect_equal(ncol(res[[1]]@sampleFactors), 7) |
36 |
- expect_true(all(sapply(1:4, function(i) |
|
38 |
+ expect_true(all(sapply(1:5, function(i) |
|
37 | 39 |
res[[i]]@featureLoadings == res[[i+1]]@featureLoadings))) |
38 |
- expect_true(all(sapply(1:4, function(i) |
|
40 |
+ expect_true(all(sapply(1:5, function(i) |
|
39 | 41 |
res[[i]]@sampleFactors == res[[i+1]]@sampleFactors))) |
40 | 42 |
|
41 | 43 |
# transposing data |
... | ... |
@@ -50,15 +52,17 @@ test_that("Valid Top-Level CoGAPS Calls", |
50 | 52 |
outputFrequency=50, seed=1, messages=FALSE) |
51 | 53 |
res[[5]] <- CoGAPS(gistMtxPath, transposeData=TRUE, nIterations=100, |
52 | 54 |
outputFrequency=50, seed=1, messages=FALSE) |
55 |
+ res[[6]] <- CoGAPS(gistGctPath, transposeData=TRUE, nIterations=100, |
|
56 |
+ outputFrequency=50, seed=1, messages=FALSE) |
|
53 | 57 |
expect_true(all(sapply(res, no_na_in_result))) |
54 | 58 |
|
55 | 59 |
expect_equal(nrow(res[[1]]@featureLoadings), 9) |
56 | 60 |
expect_equal(ncol(res[[1]]@featureLoadings), 7) |
57 | 61 |
expect_equal(nrow(res[[1]]@sampleFactors), 1363) |
58 | 62 |
expect_equal(ncol(res[[1]]@sampleFactors), 7) |
59 |
- expect_true(all(sapply(1:4, function(i) |
|
63 |
+ expect_true(all(sapply(1:5, function(i) |
|
60 | 64 |
res[[i]]@featureLoadings == res[[i+1]]@featureLoadings))) |
61 |
- expect_true(all(sapply(1:4, function(i) |
|
65 |
+ expect_true(all(sapply(1:5, function(i) |
|
62 | 66 |
res[[i]]@sampleFactors == res[[i+1]]@sampleFactors))) |
63 | 67 |
|
64 | 68 |
# passing uncertainty |
... | ... |
@@ -380,8 +380,10 @@ sampling all rows (cols) according to the weight of each category. |
380 | 380 |
```{r} |
381 | 381 |
# sampling with weights |
382 | 382 |
anno <- sample(letters[1:5], size=nrow(SimpSim.data), replace=TRUE) |
383 |
+w <- c(1,1,2,2,1) |
|
384 |
+names(w) <- letters[1:5] |
|
383 | 385 |
params <- new("CogapsParams") |
384 |
-params <- setAnnotationWeights(params, annotation=anno, weights=c(1,1,2,2,1)) |
|
386 |
+params <- setAnnotationWeights(params, annotation=anno, weights=w) |
|
385 | 387 |
result <- GWCoGAPS(SimpSim.data, params, messages=FALSE) |
386 | 388 |
``` |
387 | 389 |
|