Browse code

tests passing

Tom Sherman authored on 31/07/2018 18:45:03
Showing52 changed files

... ...
@@ -16,6 +16,7 @@ Maintainer: Elana J. Fertig <ejfertig@jhmi.edu>,
16 16
 Depends:
17 17
     R (>= 3.5.0)
18 18
 Imports:
19
+    BiocParallel,
19 20
     cluster,
20 21
     data.table,
21 22
     methods,
... ...
@@ -37,7 +38,8 @@ biocViews: GeneExpression, Transcription, GeneSetEnrichment,
37 38
     DifferentialExpression, Bayesian, Clustering, TimeCourse, RNASeq, Microarray,
38 39
     MultipleComparison, DimensionReduction
39 40
 NeedsCompilation: yes
40
-RoxygenNote: 6.0.1
41
+RoxygenNote: 6.1.0
42
+Encoding: UTF-8
41 43
 Collate:
42 44
     'class-CogapsParams.R'
43 45
     'CoGAPS.R'
... ...
@@ -12,7 +12,6 @@ export(computeGeneGSProb)
12 12
 export(getMeanChiSq)
13 13
 export(getParam)
14 14
 export(patternMarkers)
15
-export(patternMatch)
16 15
 export(plotPatternMarkers)
17 16
 export(plotResiduals)
18 17
 export(reconstructGene)
... ...
@@ -24,6 +23,8 @@ exportClasses(CogapsResult)
24 23
 importClassesFrom(S4Vectors,Annotated)
25 24
 importClassesFrom(S4Vectors,character_OR_NULL)
26 25
 importClassesFrom(SingleCellExperiment,LinearEmbeddingMatrix)
26
+importFrom(BiocParallel,SnowParam)
27
+importFrom(BiocParallel,bplapply)
27 28
 importFrom(Rcpp,evalCpp)
28 29
 importFrom(SummarizedExperiment,assay)
29 30
 importFrom(cluster,agnes)
... ...
@@ -53,7 +53,7 @@ supported <- function(file)
53 53
 #' resultC <- CoGAPS(GIST.D, params)
54 54
 #' @importFrom methods new is
55 55
 #' @importFrom SummarizedExperiment assay
56
-CoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL,
56
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
57 57
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
58 58
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
59 59
 checkpointInFile=NULL, transposeData=FALSE, ...)
... ...
@@ -67,7 +67,7 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
67 67
         "checkpointInterval"=checkpointInterval,
68 68
         "checkpointInFile"=checkpointInFile,
69 69
         "transposeData"=transposeData,
70
-        "whichMatrixFixed"="" # internal parameter
70
+        "whichMatrixFixed"=NULL # internal parameter
71 71
     )
72 72
     allParams <- parseExtraParams(allParams, list(...))
73 73
 
... ...
@@ -76,13 +76,14 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
76 76
         stop("unsupported file extension for data")
77 77
 
78 78
     # check uncertainty matrix
79
-    if (is(data, "character") & !is(uncertainty, "character"))
79
+    if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character"))
80 80
         stop("uncertainty must be same data type as data (file name)")
81 81
     if (is(uncertainty, "character") & !supported(uncertainty))
82 82
         stop("unsupported file extension for uncertainty")
83 83
     if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix"))
84 84
         stop("uncertainty must be a matrix unless data is a file path")
85
-    checkDataMatrix(data, uncertainty, params)
85
+    if (!is(data, "character"))
86
+        checkDataMatrix(data, uncertainty, params)
86 87
 
87 88
     # convert data to matrix
88 89
     if (is(data, "data.frame"))
... ...
@@ -137,6 +138,7 @@ checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
137 138
 checkpointInFile=NULL, transposeData=FALSE, ...)
138 139
 {
139 140
     params@distributed <- "single-cell"
141
+    params@singleCell <- TRUE
140 142
     CoGAPS(data, params, nThreads, messages, outputFrequency, uncertainty,
141 143
         checkpointOutFile, checkpointInterval, checkpointInFile, transposeData,
142 144
         ...)
... ...
@@ -7,69 +7,87 @@
7 7
 #' @param allParams list of all parameters used in computation
8 8
 #' @param uncertainty uncertainty matrix (same supported types as data)
9 9
 #' @return list
10
+#' @importFrom BiocParallel bplapply SnowParam
10 11
 distributedCogaps <- function(data, allParams, uncertainty)
11 12
 {
13
+    FUN <- function(set, data, allParams, uncertainty, fixedMatrix=NULL)
14
+    {
15
+        internal <- ifelse(is(data, "character"), cogaps_cpp_from_file, cogaps_cpp)
16
+        raw <- internal(data, allParams, uncertainty, set, fixedMatrix)
17
+        new("CogapsResult", Amean=raw$Amean, Asd=raw$Asd, Pmean=raw$Pmean,
18
+            Psd=raw$Psd, seed=raw$seed, meanChiSq=raw$meanChiSq)    
19
+    }
20
+
12 21
     # randomly sample either rows or columns into subsets to break the data up
22
+    set.seed(allParams$gaps@seed)
13 23
     sets <- createSets(data, allParams)
24
+    snow <- SnowParam(workers=length(sets), type="SOCK")
14 25
 
15 26
     # run Cogaps normally on each subset of the data
16
-    initialResult <- foreach(i=1:allParams$gaps@nSets) %dopar%
17
-    {
18
-        cogaps_cpp(data, allParams, uncertainty, sets[[i]])
19
-    }
27
+    initialResult <- bplapply(sets, FUN, BPPARAM=snow, data=data,
28
+        allParams=allParams, uncertainty=uncertainty)
20 29
 
21 30
     # match patterns in either A or P matrix
22 31
     consensusMatrix <- findConsensusMatrix(initialResult, allParams)
32
+    allParams$gaps@nPatterns <- ncol(consensusMatrix)
23 33
 
24 34
     # set fixed matrix
25 35
     allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
26 36
         == "genome-wide", "P", "A")
27 37
 
28
-    # run all subsets with the same fixed matrix
29
-    finalResult <- foreach(i=1:allParams$gaps@nSets) %dopar%
30
-    {
31
-        cogaps_cpp(data, allParams, uncertainty, sets[[i]], consensusMatrix)
32
-    }
38
+    # ru final phase with fixed matrix
39
+    finalResult <- bplapply(sets, FUN, data=data, BPPARAM=snow,
40
+        allParams=allParams, uncertainty=uncertainty, fixedMatrix=consensusMatrix)
33 41
 
34 42
     # get result 
35
-    resultList <- stitchTogether(finalResult, allParams)
36
-    resultList$seed <- allParams$modelParams@seed
37
-    resultList$meanChiSq <- sum(sapply(finalResult, function(r) r$meanChiSq))
38
-    resultList$diagnostics <- list()
39
-    return(resultList)
43
+    return(stitchTogether(finalResult, allParams))
40 44
 }
41 45
 
46
+#' get number of rows from supported file name or matrix
47
+#' @param data either a file name or a matrix
48
+#' @return number of rows
42 49
 #' @importFrom data.table fread
50
+#' @importFrom tools file_ext
43 51
 nrow_helper <- function(data)
44 52
 {
45
-    if (class(data) == "character")
53
+    if (is(data, "character"))
46 54
     {
47
-        switch(file_ext(data),
55
+        return(switch(tools::file_ext(data),
48 56
             "csv" = nrow(data.table::fread(data, select=1)),
49 57
             "tsv" = nrow(data.table::fread(data, select=1)),
50 58
             "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1])
51
-        )
59
+        ))
52 60
     }
53 61
     return(nrow(data))
54 62
 }
55 63
 
64
+#' get number of columns from supported file name or matrix
65
+#' @param data either a file name or a matrix
66
+#' @return number of columns
56 67
 #' @importFrom data.table fread
68
+#' @importFrom tools file_ext
57 69
 ncol_helper <- function(data)
58 70
 {
59
-    if (class(data) == "character")
71
+    if (is(data, "character"))
60 72
     {
61
-        switch(file_ext(data),
73
+        return(switch(tools::file_ext(data),
62 74
             "csv" = ncol(data.table::fread(data, nrows=1)) - 1,
63 75
             "tsv" = ncol(data.table::fread(data, nrows=1)) - 1,
64 76
             "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2])
65
-        )
77
+        ))
66 78
     }
67 79
     return(ncol(data))
68
-}        
80
+}
69 81
 
82
+#' partition genes/samples into subsets
83
+#' @description either genes or samples or partitioned depending on the type
84
+#' of distributed CoGAPS (i.e. genome-wide or single-cell)
85
+#' @param data either file name or matrix
86
+#' @param allParams list of all CoGAPS parameters
87
+#' @return list of sorted subsets of either genes or samples
70 88
 createSets <- function(data, allParams)
71 89
 {
72
-    total <- ifelse(allParams$gaps@distributed == "genome-wide",
90
+    total <- ifelse(xor(allParams$transposeData, allParams$gaps@distributed == "genome-wide"),
73 91
         nrow_helper(data), ncol_helper(data))
74 92
     setSize <- floor(total / allParams$gaps@nSets)
75 93
 
... ...
@@ -78,13 +96,17 @@ createSets <- function(data, allParams)
78 96
     for (n in 1:(allParams$gaps@nSets - 1))
79 97
     {
80 98
         selected <- sample(remaining, setSize, replace=FALSE)
81
-        sets[[n]] <- selected
99
+        sets[[n]] <- sort(selected)
82 100
         remaining <- setdiff(remaining, selected)
83 101
     }
84
-    sets[[allParams$gaps@nSets]] <- remaining
102
+    sets[[allParams$gaps@nSets]] <- sort(remaining)
85 103
     return(sets)
86 104
 }
87 105
 
106
+#' find the consensus pattern matrix across all subsets
107
+#' @param result list of CogapsResult object from all runs across subsets
108
+#' @param allParams list of all CoGAPS parameters
109
+#' @return matrix of consensus patterns
88 110
 findConsensusMatrix <- function(result, allParams)
89 111
 {
90 112
     if (allParams$gaps@distributed == "genome-wide")
... ...
@@ -93,13 +115,11 @@ findConsensusMatrix <- function(result, allParams)
93 115
         patterns <- do.call(cbind, lapply(result, function(x) x@featureLoadings))
94 116
 
95 117
     comb <- expand.grid(1:allParams$gaps@nSets, 1:allParams$gaps@nPatterns)
96
-    rownames(patterns) <- paste(comb[,1], comb[,2], sep=".")
118
+    colnames(patterns) <- paste(comb[,1], comb[,2], sep=".")
97 119
     return(patternMatch(patterns, allParams))
98 120
 }
99 121
 
100 122
 #' Match Patterns Across Multiple Runs
101
-#' @export
102
-#'
103 123
 #' @param Atot a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}
104 124
 #' @param nSets number of parallel sets used to generate \code{Atot}
105 125
 #' @param cnt  number of branches at which to cut dendrogram
... ...
@@ -115,7 +135,7 @@ patternMatch <- function(allPatterns, allParams)
115 135
 {
116 136
     cc <- corcut(allPatterns, allParams)
117 137
 
118
-    ### split by maxNS
138
+    # split by maxNS
119 139
     indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
120 140
     while (length(indx) > 0)
121 141
     { 
... ...
@@ -124,22 +144,27 @@ patternMatch <- function(allPatterns, allParams)
124 144
 
125 145
         cc$PatsByClust[[indx[1]]] <- icc$PatsByClust[[1]]
126 146
         cc$RtoMeanPattern[[indx[1]]] <- icc$RtoMeanPattern[[1]]
127
-        cc$PatsByClust <- append(cc$PatsByClust, icc$PatsByClust[2])
128
-        cc$RtoMeanPattern <- append(cc$RtoMeanPattern, icc$RtoMeanPattern[2])
129
-
147
+        if (length(icc$PatsByClust) > 1)
148
+        {
149
+            cc$PatsByClust <- append(cc$PatsByClust, icc$PatsByClust[2])
150
+            cc$RtoMeanPattern <- append(cc$RtoMeanPattern, icc$RtoMeanPattern[2])
151
+        }
130 152
         indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
131 153
     }
132 154
 
133 155
     # create matrix of mean patterns - weighted by coefficient of determination
134
-    PatsByCDSWavg <- t(sapply(1:length(cc$PatsByClust), function(z)
135
-        apply(cc$PatsByClust[[z]], 1, function(x) weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
136
-    rownames(PatsByCDSWavg) <- lapply(1:length(cc$PatsByClust), function(x) paste("Pattern", x))
137
-    return(PatsByCDSWavg)
156
+    PatsByCDSWavg <- sapply(1:length(cc$PatsByClust), function(z)
157
+        apply(cc$PatsByClust[[z]], 1, function(x) weighted.mean(x, (cc$RtoMeanPattern[[z]])^3)))
158
+    colnames(PatsByCDSWavg) <- lapply(1:length(cc$PatsByClust), function(x) paste("Pattern", x))
138 159
 
139 160
     # scale
140
-    return(apply(PatsByCDSWavg, 1, function(row) row / max(row)))
161
+    return(apply(PatsByCDSWavg, 2, function(col) col / max(col)))
141 162
 }
142 163
 
164
+#' cluster patterns together
165
+#' @param allPatterns matrix of all patterns across subsets
166
+#' @param allParams list of all CoGAPS parameters
167
+#' @return patterns listed by which cluster they belong to
143 168
 #' @importFrom cluster agnes
144 169
 #' @importFrom stats cutree as.hclust cor
145 170
 corcut <- function(allPatterns, allParams)
... ...
@@ -166,27 +191,31 @@ corcut <- function(allPatterns, allParams)
166 191
     return(list("RtoMeanPattern"=RtoMeanPattern, "PatsByClust"=PatsByClust))
167 192
 }
168 193
 
194
+#' concatenate final results across subsets
195
+#' @param result list of CogapsResult object from all runs across subsets
196
+#' @param allParams list of all CoGAPS parameters
197
+#' @return list with all CoGAPS output
169 198
 stitchTogether <- function(result, allParams)
170 199
 {
171
-    if (allParams$modelParams@distributed == "genome-wide")
200
+    if (allParams$gaps@distributed == "genome-wide")
172 201
     {
173
-        consensus <- result[[1]]@Pmean
174
-        return(list(
175
-            "Amean" = do.call(rbind, lapply(result, function(x) x@featureLoadings)),
176
-            "Asd"   = do.call(rbind, lapply(result, function(x) x@featureStdDev)),
177
-            "Pmean" = consensus,
178
-            "Psd"   = matrix(0, nrow=nrow(consensus), ncol=ncol(consensus))
179
-        ))
202
+        consensus <- result[[1]]@sampleFactors
203
+        Amean <- do.call(rbind, lapply(result, function(x) x@featureLoadings))
204
+        Asd   <- do.call(rbind, lapply(result, function(x) x@featureStdDev))
205
+        Pmean <- consensus
206
+        Psd   <- matrix(0, nrow=nrow(consensus), ncol=ncol(consensus))
180 207
     }
181 208
     else
182 209
     {
183
-        consensus <- result[[1]]@Amean
184
-        return(list(
185
-            "Amean" = consensus,
186
-            "Asd"   = matrix(0, nrow=nrow(consensus), ncol=ncol(consensus)),
187
-            "Pmean" = do.call(rbind, lapply(result, function(x) x@sampleFactors)),
188
-            "Psd"   = do.call(rbind, lapply(result, function(x) x@sampleStdDev))
189
-        ))
210
+        consensus <- result[[1]]@featureLoadings
211
+        Amean <- consensus
212
+        Asd   <- matrix(0, nrow=nrow(consensus), ncol=ncol(consensus))
213
+        Pmean <- do.call(rbind, lapply(result, function(x) x@sampleFactors))
214
+        Psd   <- do.call(rbind, lapply(result, function(x) x@sampleStdDev))
190 215
     }
216
+
217
+    return(list("Amean"=Amean, "Asd"=Asd, "Pmean"=Pmean, "Psd"=Psd,
218
+        "seed"=allParams$gaps@seed,
219
+        "meanChiSq"=sum(sapply(result, function(r) r@metadata$meanChiSq))))
191 220
 }
192 221
 
... ...
@@ -1,11 +1,11 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps_cpp_from_file <- function(data, allParams, uncertainty, indices = NULL, fixedMatrix = NULL) {
4
+cogaps_cpp_from_file <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL) {
5 5
     .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
6 6
 }
7 7
 
8
-cogaps_cpp <- function(data, allParams, uncertainty, indices = NULL, fixedMatrix = NULL) {
8
+cogaps_cpp <- function(data, allParams, uncertainty = NULL, indices = NULL, fixedMatrix = NULL) {
9 9
     .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
10 10
 }
11 11
 
... ...
@@ -5,7 +5,7 @@
5 5
 #' @importClassesFrom S4Vectors Annotated
6 6
 #' @importClassesFrom SingleCellExperiment LinearEmbeddingMatrix
7 7
 setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
8
-    sampleStdDev = "ANY",   # Psd transpose
8
+    sampleStdDev = "ANY",   # Psd
9 9
     featureStdDev = "ANY"   # Asd
10 10
 ))
11 11
 
... ...
@@ -13,15 +13,16 @@ setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
13 13
 #' @return initialized CogapsResult object
14 14
 #' @importFrom methods callNextMethod
15 15
 setMethod("initialize", "CogapsResult",
16
-function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics, ...)
16
+function(.Object, Amean, Pmean, Asd, Psd, seed, meanChiSq, diagnostics=list(),
17
+...)
17 18
 {
18 19
     .Object@featureLoadings <- Amean
19
-    .Object@sampleFactors <- t(Pmean)
20
+    .Object@sampleFactors <- Pmean
20 21
 
21 22
     if (!is.null(Asd))
22 23
         .Object@featureStdDev <- Asd
23 24
     if (!is.null(Psd))
24
-        .Object@sampleStdDev <- t(Psd)
25
+        .Object@sampleStdDev <- Psd
25 26
 
26 27
     .Object@metadata[["seed"]] <- seed
27 28
     .Object@metadata[["meanChiSq"]] <- meanChiSq
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{CoGAPS}
5 5
 \title{CoGAPS Matrix Factorization Algorithm}
6 6
 \usage{
7
-CoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
7
+CoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10 10
   checkpointInFile = NULL, transposeData = FALSE, ...)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{binaryA}
5 5
 \alias{binaryA}
6 6
 \alias{binaryA,CogapsResult-method}
7
-\alias{binaryA}
8 7
 \title{binary heatmap for standardized feature matrix}
9 8
 \usage{
10 9
 binaryA(object, threshold = 3)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{calcCoGAPSStat}
5 5
 \alias{calcCoGAPSStat}
6 6
 \alias{calcCoGAPSStat,CogapsResult-method}
7
-\alias{calcCoGAPSStat}
8 7
 \title{calculate gene set statistics}
9 8
 \usage{
10 9
 calcCoGAPSStat(object, GStoGenes, numPerm = 500)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{calcGeneGSStat}
5 5
 \alias{calcGeneGSStat}
6 6
 \alias{calcGeneGSStat,CogapsResult-method}
7
-\alias{calcGeneGSStat}
8 7
 \title{probability gene belongs in gene set}
9 8
 \usage{
10 9
 calcGeneGSStat(object, GStoGenes, numPerm, Pw = rep(1,
... ...
@@ -4,7 +4,6 @@
4 4
 \name{calcZ}
5 5
 \alias{calcZ}
6 6
 \alias{calcZ,CogapsResult-method}
7
-\alias{calcZ}
8 7
 \title{compute z-score matrix}
9 8
 \usage{
10 9
 calcZ(object, which = "feature")
... ...
@@ -4,14 +4,14 @@
4 4
 \name{computeGeneGSProb}
5 5
 \alias{computeGeneGSProb}
6 6
 \alias{computeGeneGSProb,CogapsResult-method}
7
-\alias{computeGeneGSProb}
8 7
 \title{compute gene probability}
9 8
 \usage{
10 9
 computeGeneGSProb(object, GStoGenes, numPerm = 500, Pw = rep(1,
11 10
   ncol(object@featureLoadings)), PwNull = FALSE)
12 11
 
13
-\S4method{computeGeneGSProb}{CogapsResult}(object, GStoGenes, numPerm = 500,
14
-  Pw = rep(1, ncol(object@featureLoadings)), PwNull = FALSE)
12
+\S4method{computeGeneGSProb}{CogapsResult}(object, GStoGenes,
13
+  numPerm = 500, Pw = rep(1, ncol(object@featureLoadings)),
14
+  PwNull = FALSE)
15 15
 }
16 16
 \arguments{
17 17
 \item{object}{an object of type CogapsResult}
18 18
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{corcut}
4
+\alias{corcut}
5
+\title{cluster patterns together}
6
+\usage{
7
+corcut(allPatterns, allParams)
8
+}
9
+\arguments{
10
+\item{allPatterns}{matrix of all patterns across subsets}
11
+
12
+\item{allParams}{list of all CoGAPS parameters}
13
+}
14
+\value{
15
+patterns listed by which cluster they belong to
16
+}
17
+\description{
18
+cluster patterns together
19
+}
0 20
new file mode 100644
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{createSets}
4
+\alias{createSets}
5
+\title{partition genes/samples into subsets}
6
+\usage{
7
+createSets(data, allParams)
8
+}
9
+\arguments{
10
+\item{data}{either file name or matrix}
11
+
12
+\item{allParams}{list of all CoGAPS parameters}
13
+}
14
+\value{
15
+list of sorted subsets of either genes or samples
16
+}
17
+\description{
18
+either genes or samples or partitioned depending on the type
19
+of distributed CoGAPS (i.e. genome-wide or single-cell)
20
+}
0 21
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{findConsensusMatrix}
4
+\alias{findConsensusMatrix}
5
+\title{find the consensus pattern matrix across all subsets}
6
+\usage{
7
+findConsensusMatrix(result, allParams)
8
+}
9
+\arguments{
10
+\item{result}{list of CogapsResult object from all runs across subsets}
11
+
12
+\item{allParams}{list of all CoGAPS parameters}
13
+}
14
+\value{
15
+matrix of consensus patterns
16
+}
17
+\description{
18
+find the consensus pattern matrix across all subsets
19
+}
... ...
@@ -4,7 +4,6 @@
4 4
 \name{getMeanChiSq}
5 5
 \alias{getMeanChiSq}
6 6
 \alias{getMeanChiSq,CogapsResult-method}
7
-\alias{getMeanChiSq}
8 7
 \title{return chi-sq of final matrices}
9 8
 \usage{
10 9
 getMeanChiSq(object)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{getParam}
5 5
 \alias{getParam}
6 6
 \alias{getParam,CogapsParams-method}
7
-\alias{getParam}
8 7
 \title{get the value of a parameter}
9 8
 \usage{
10 9
 getParam(object, whichParam)
... ...
@@ -6,7 +6,7 @@
6 6
 \title{Constructor for CogapsResult}
7 7
 \usage{
8 8
 \S4method{initialize}{CogapsResult}(.Object, Amean, Pmean, Asd, Psd, seed,
9
-  meanChiSq, diagnostics, ...)
9
+  meanChiSq, diagnostics = list(), ...)
10 10
 }
11 11
 \value{
12 12
 initialized CogapsResult object
13 13
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{ncol_helper}
4
+\alias{ncol_helper}
5
+\title{get number of columns from supported file name or matrix}
6
+\usage{
7
+ncol_helper(data)
8
+}
9
+\arguments{
10
+\item{data}{either a file name or a matrix}
11
+}
12
+\value{
13
+number of columns
14
+}
15
+\description{
16
+get number of columns from supported file name or matrix
17
+}
0 18
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{nrow_helper}
4
+\alias{nrow_helper}
5
+\title{get number of rows from supported file name or matrix}
6
+\usage{
7
+nrow_helper(data)
8
+}
9
+\arguments{
10
+\item{data}{either a file name or a matrix}
11
+}
12
+\value{
13
+number of rows
14
+}
15
+\description{
16
+get number of rows from supported file name or matrix
17
+}
... ...
@@ -4,7 +4,6 @@
4 4
 \name{patternMarkers}
5 5
 \alias{patternMarkers}
6 6
 \alias{patternMarkers,CogapsResult-method}
7
-\alias{patternMarkers}
8 7
 \title{compute pattern markers statistic}
9 8
 \usage{
10 9
 patternMarkers(object, threshold = "all", lp = NA, full = FALSE)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{plotResiduals}
5 5
 \alias{plotResiduals}
6 6
 \alias{plotResiduals,CogapsResult-method}
7
-\alias{plotResiduals}
8 7
 \title{plot of residuals}
9 8
 \usage{
10 9
 plotResiduals(object, data, uncertainty = NULL)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{reconstructGene}
5 5
 \alias{reconstructGene}
6 6
 \alias{reconstructGene,CogapsResult-method}
7
-\alias{reconstructGene}
8 7
 \title{reconstruct gene}
9 8
 \usage{
10 9
 reconstructGene(object, genes = NULL)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{setDistributedParams}
5 5
 \alias{setDistributedParams}
6 6
 \alias{setDistributedParams,CogapsParams-method}
7
-\alias{setDistributedParams}
8 7
 \title{set the value of parameters for distributed CoGAPS}
9 8
 \usage{
10 9
 setDistributedParams(object, cut = NULL, minNS = NULL, maxNS = NULL)
... ...
@@ -4,7 +4,6 @@
4 4
 \name{setParam}
5 5
 \alias{setParam}
6 6
 \alias{setParam,CogapsParams-method}
7
-\alias{setParam}
8 7
 \title{set the value of a parameter}
9 8
 \usage{
10 9
 setParam(object, whichParam, value)
11 10
new file mode 100644
... ...
@@ -0,0 +1,19 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/DistributedCogaps.R
3
+\name{stitchTogether}
4
+\alias{stitchTogether}
5
+\title{concatenate final results across subsets}
6
+\usage{
7
+stitchTogether(result, allParams)
8
+}
9
+\arguments{
10
+\item{result}{list of CogapsResult object from all runs across subsets}
11
+
12
+\item{allParams}{list of all CoGAPS parameters}
13
+}
14
+\value{
15
+list with all CoGAPS output
16
+}
17
+\description{
18
+concatenate final results across subsets
19
+}
... ...
@@ -23,21 +23,7 @@ public:
23 23
 
24 24
     Archive(const std::string &path, std::ios_base::openmode flags)
25 25
         : mStream(path.c_str(), std::ios::binary | flags)
26
-    {
27
-        /*if (flags == ARCHIVE_WRITE)
28
-        {
29
-            *this << ARCHIVE_MAGIC_NUM;
30
-        }
31
-        else if (flags == ARCHIVE_READ)
32
-        {
33
-            uint32_t magicNum = 0;
34
-            *this >> magicNum;
35
-            if (magicNum != ARCHIVE_MAGIC_NUM)
36
-            {
37
-                Rcpp::Rcout << "warning: invalid checkpoint file" << std::endl;
38
-            }
39
-        }*/
40
-    }
26
+    {}
41 27
 
42 28
     void close() {mStream.close();}
43 29
 
... ...
@@ -80,4 +66,4 @@ public:
80 66
     friend Archive& operator>>(Archive &ar, boost::random::mt11213b &val) { return readFromArchive(ar, val); }
81 67
 };
82 68
 
83
-#endif
69
+#endif // __COGAPS_ARCHIVE_H__
... ...
@@ -7,28 +7,32 @@
7 7
 // these are helper functions for converting matrix/vector types
8 8
 // to and from R objects
9 9
 
10
-static Matrix convertRMatrix(const Rcpp::NumericMatrix &rmat)
10
+static Matrix convertRMatrix(const Rcpp::NumericMatrix &rmat, bool transpose=false)
11 11
 {
12
-    Matrix mat(rmat.nrow(), rmat.ncol());
13
-    for (unsigned i = 0; i < mat.nRow(); ++i)
12
+    unsigned nr = transpose ? rmat.ncol() : rmat.nrow();
13
+    unsigned nc = transpose ? rmat.nrow() : rmat.ncol();
14
+    Matrix mat(nr, nc);
15
+    for (unsigned i = 0; i < nr; ++i)
14 16
     {
15
-        for (unsigned j = 0; j < mat.nCol(); ++j)
17
+        for (unsigned j = 0; j < nc; ++j)
16 18
         {
17
-            mat(i,j) = rmat(i,j);
19
+            mat(i,j) = transpose ? rmat(j,i) : rmat(i,j);
18 20
         }
19 21
     }
20 22
     return mat;
21 23
 }
22 24
 
23 25
 template <class Matrix>
24
-static Rcpp::NumericMatrix createRMatrix(const Matrix &mat)
26
+static Rcpp::NumericMatrix createRMatrix(const Matrix &mat, bool transpose=false)
25 27
 {
26
-    Rcpp::NumericMatrix rmat(mat.nRow(), mat.nCol());
27
-    for (unsigned i = 0; i < mat.nRow(); ++i)
28
+    unsigned nr = transpose ? mat.nCol() : mat.nRow();
29
+    unsigned nc = transpose ? mat.nRow() : mat.nCol();
30
+    Rcpp::NumericMatrix rmat(nr, nc);
31
+    for (unsigned i = 0; i < nr; ++i)
28 32
     {
29
-        for (unsigned j = 0; j < mat.nCol(); ++j)
33
+        for (unsigned j = 0; j < nc; ++j)
30 34
         {
31
-            rmat(i,j) = mat(i,j);
35
+            rmat(i,j) = transpose ? mat(j,i) : mat(i,j);
32 36
         }
33 37
     }
34 38
     return rmat;
... ...
@@ -60,6 +64,7 @@ unsigned getNumPatterns(const Rcpp::List &allParams)
60 64
         ar >> nPatterns;
61 65
         ar.close();
62 66
     }
67
+    return nPatterns;
63 68
 }
64 69
 
65 70
 std::vector<unsigned> getSubsetIndices(const Rcpp::Nullable<Rcpp::IntegerVector> &indices)
... ...
@@ -80,7 +85,7 @@ bool processDistributedParameters(const Rcpp::List &allParams)
80 85
         GAPS_ASSERT(d == "genome-wide" || d == "single-cell");
81 86
         return d == "genome-wide";
82 87
     }
83
-    return true;
88
+    return false; // will be ignored anyways
84 89
 }
85 90
 
86 91
 // this is the main function that creates a GapsRunner and runs CoGAPS
... ...
@@ -118,11 +123,12 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
118 123
     else // no checkpoint, populate from given parameters
119 124
     {
120 125
         // set fixed matrix
121
-        if (fixedMatrix.isNotNull());
126
+        if (fixedMatrix.isNotNull())
122 127
         {
123 128
             GAPS_ASSERT(!Rf_isNull(allParams["whichMatrixFixed"]));
124 129
             std::string which = Rcpp::as<std::string>(allParams["whichMatrixFixed"]);
125
-            runner.setFixedMatrix(which[0], convertRMatrix(Rcpp::NumericMatrix(fixedMatrix)));
130
+            gaps_printf("%s %c\n", which.c_str(), which[0]);
131
+            runner.setFixedMatrix(which[0], convertRMatrix(Rcpp::NumericMatrix(fixedMatrix), which[0]=='P'));
126 132
         }
127 133
 
128 134
         // set parameters that would be saved in the checkpoint
... ...
@@ -147,9 +153,9 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
147 153
     GapsResult result(runner.run());
148 154
     return Rcpp::List::create(
149 155
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
150
-        Rcpp::Named("Pmean") = createRMatrix(result.Pmean),
156
+        Rcpp::Named("Pmean") = createRMatrix(result.Pmean, true),
151 157
         Rcpp::Named("Asd") = createRMatrix(result.Asd),
152
-        Rcpp::Named("Psd") = createRMatrix(result.Psd),
158
+        Rcpp::Named("Psd") = createRMatrix(result.Psd, true),
153 159
         Rcpp::Named("seed") = runner.getSeed(),
154 160
         Rcpp::Named("meanChiSq") = result.meanChiSq,
155 161
         Rcpp::Named("diagnostics") = Rcpp::List::create()
... ...
@@ -161,7 +167,7 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
161 167
 // [[Rcpp::export]]
162 168
 Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector &data,
163 169
 const Rcpp::List &allParams,
164
-const Rcpp::Nullable<Rcpp::CharacterVector> &uncertainty,
170
+const Rcpp::Nullable<Rcpp::CharacterVector> &uncertainty=R_NilValue,
165 171
 const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
166 172
 const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
167 173
 {
... ...
@@ -170,14 +176,13 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
170 176
     {
171 177
         unc = Rcpp::as<std::string>(Rcpp::CharacterVector(uncertainty));
172 178
     }
173
-
174 179
     return cogapsRun(Rcpp::as<std::string>(data), allParams, unc, indices, fixedMatrix);
175 180
 }
176 181
 
177 182
 // [[Rcpp::export]]
178 183
 Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &data,
179 184
 const Rcpp::List &allParams,
180
-const Rcpp::Nullable<Rcpp::NumericMatrix> &uncertainty,
185
+const Rcpp::Nullable<Rcpp::NumericMatrix> &uncertainty=R_NilValue,
181 186
 const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
182 187
 const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
183 188
 {
... ...
@@ -186,7 +191,6 @@ const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
186 191
     {
187 192
         unc = convertRMatrix(Rcpp::NumericMatrix(uncertainty));
188 193
     }
189
-
190 194
     return cogapsRun(convertRMatrix(data), allParams, unc, indices, fixedMatrix);
191 195
 }
192 196
 
... ...
@@ -40,9 +40,12 @@
40 40
                 gaps_stop();                                            \
41 41
             }                                                           \
42 42
         } while(0)
43
+
44
+    #define DEBUG_PING gaps_printf("here %s %d\n", __FILE__, __LINE__);
43 45
 #else
44 46
     #define GAPS_ASSERT(cond) do {} while(0)
45 47
     #define GAPS_ASSERT_MSG(cond, msg) do {} while(0)
48
+    #define DEBUG_PING   
46 49
 #endif
47 50
 
48 51
 #endif
49 52
\ No newline at end of file
... ...
@@ -79,7 +79,6 @@ void GapsRunner::setCheckpointInterval(unsigned interval)
79 79
 
80 80
 GapsResult GapsRunner::run()
81 81
 {
82
-#if 0
83 82
     mStartTime = bpt_now();
84 83
 
85 84
     // calculate appropiate number of threads if compiled with openmp
... ...
@@ -114,7 +113,6 @@ GapsResult GapsRunner::run()
114 113
             runOnePhase();
115 114
             break;
116 115
     }
117
-#endif
118 116
     GapsResult result(mStatistics);
119 117
     result.meanChiSq = mStatistics.meanChiSq(mASampler);
120 118
     return result;    
... ...
@@ -161,6 +159,7 @@ void GapsRunner::updateSampler(unsigned nA, unsigned nP)
161 159
         {
162 160
             mPSampler.sync(mASampler);
163 161
         }
162
+        GAPS_ASSERT(mASampler.internallyConsistent());
164 163
     }
165 164
 
166 165
     if (mFixedMatrix != 'P')
... ...
@@ -171,10 +170,8 @@ void GapsRunner::updateSampler(unsigned nA, unsigned nP)
171 170
         {
172 171
             mASampler.sync(mPSampler);
173 172
         }
173
+        GAPS_ASSERT(mPSampler.internallyConsistent());
174 174
     }
175
-
176
-    GAPS_ASSERT(mASampler.internallyConsistent());
177
-    GAPS_ASSERT(mPSampler.internallyConsistent());
178 175
 }
179 176
 
180 177
 void GapsRunner::displayStatus()
... ...
@@ -193,6 +190,7 @@ void GapsRunner::displayStatus()
193 190
         gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, elapsed time: %02d:%02d:%02d\n",
194 191
             mCurrentIteration + 1, mMaxIterations, mASampler.nAtoms(),
195 192
             mPSampler.nAtoms(), mASampler.chi2(), hours, minutes, seconds);
193
+        gaps_flush();
196 194
     }
197 195
 }
198 196
 
... ...
@@ -326,34 +326,41 @@ float GibbsSampler<T, MatA, MatB>::getAvgQueue() const
326 326
 template <class T, class MatA, class MatB>
327 327
 bool GibbsSampler<T, MatA, MatB>::internallyConsistent()
328 328
 {
329
-    Atom a = mDomain.front();
330
-    float current = a.mass;
331
-    uint64_t row = impl()->getRow(a.pos);
332
-    uint64_t col = impl()->getCol(a.pos);
333
-
334
-    while (mDomain.hasRight(a))
329
+    if (mDomain.size() > 0)
335 330
     {
336
-        a = mDomain.right(a);
337
-        if (row != impl()->getRow(a.pos) || col != impl()->getCol(a.pos))
331
+        Atom a = mDomain.front();
332
+        float current = a.mass;
333
+        uint64_t row = impl()->getRow(a.pos);
334
+        uint64_t col = impl()->getCol(a.pos);
335
+
336
+        while (mDomain.hasRight(a))
338 337
         {
339
-            float matVal = mMatrix(row, col);
340
-            if (std::abs(current - matVal) > 0.1f)
338
+            a = mDomain.right(a);
339
+            if (row != impl()->getRow(a.pos) || col != impl()->getCol(a.pos))
341 340
             {
342
-                gaps_printf("mass difference detected at row %lu, column %lu: %f %f\n",
343
-                    row, col, current, matVal); 
344
-                return false;
341
+                float matVal = mMatrix(row, col);
342
+                if (std::abs(current - matVal) > 0.1f)
343
+                {
344
+                    gaps_printf("mass difference detected at row %lu, column %lu: %f %f\n",
345
+                        row, col, current, matVal); 
346
+                    return false;
347
+                }
348
+                
349
+                row = impl()->getRow(a.pos);
350
+                col = impl()->getCol(a.pos);
351
+                current = a.mass;
352
+            }
353
+            else
354
+            {
355
+                current += a.mass;
345 356
             }
346
-            
347
-            row = impl()->getRow(a.pos);
348
-            col = impl()->getCol(a.pos);
349
-            current = a.mass;
350
-        }
351
-        else
352
-        {
353
-            current += a.mass;
354 357
         }
358
+        return true;
359
+    }
360
+    else
361
+    {
362
+        return gaps::algo::sum(mMatrix) == 0.f;
355 363
     }
356
-    return true;    
357 364
 }
358 365
 #endif
359 366
 
... ...
@@ -4,3 +4,4 @@
4 4
 
5 5
 #define MAT_SUM(nR, nC) ((nR + nC - 2) * nR * nC / 2.f)
6 6
 
7
+// TODO
7 8
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+// TODO
0 2
\ No newline at end of file
1 3
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+// TODO
0 2
\ No newline at end of file
1 3
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+// TODO
0 2
\ No newline at end of file
... ...
@@ -16,7 +16,7 @@ static std::vector<unsigned> sequentialVector(unsigned n)
16 16
 }
17 17
 
18 18
 template <class DataType>
19
-static void testFullConstructor(unsigned nr, unsigned nc, float expectedSum,
19
+static void testFullConstructor(float expectedSum, unsigned nr, unsigned nc, 
20 20
 const DataType &data, bool transpose=false, bool partitionRows=false,
21 21
 const std::vector<unsigned> &indices=std::vector<unsigned>(1))
22 22
 {
... ...
@@ -33,25 +33,69 @@ const std::vector<unsigned> &indices=std::vector<unsigned>(1))
33 33
 }
34 34
 
35 35
 template <class DataType>
36
-static void testAllConstructorSituations(const DataType &data)
36
+static void testAllConstructorSituations(const DataType &data, unsigned nr,
37
+unsigned nc, unsigned nIndices, float sum1, float sum2, float sum3)
37 38
 {
38 39
     // No Transpose, No Subset
39
-    testFullConstructor(0.f, 10, 25, data, false);
40
+    testFullConstructor(sum1, nr, nc, data, false);
40 41
 
41 42
     // Transpose, No Subset
42
-    testFullConstructor(0.f, 25, 10, data, true);
43
+    testFullConstructor(sum1, nc, nr, data, true);
43 44
 
44 45
     // No Transpose, Subset Rows
45
-    testFullConstructor(0.f, 5, 25, data, false, true, sequentialVector(5))
46
+    testFullConstructor(sum2, nIndices, nc, data, false, true,
47
+        sequentialVector(nIndices));
46 48
 
47 49
     // Transpose, Subset Rows
48
-    testFullConstructor(0.f, 5, 10, data, true, true, sequentialVector(5))
50
+    testFullConstructor(sum3, nIndices, nr, data, true, true,
51
+        sequentialVector(nIndices));
49 52
 
50 53
     // No Transpose, Subset Columns
51
-    testFullConstructor(0.f, 10, 5, data, false, false, sequentialVector(5))
54
+    testFullConstructor(sum3, nr, nIndices, data, false, false,
55
+        sequentialVector(nIndices));
52 56
 
53 57
     // Transpose, Subset Columns
54
-    testFullConstructor(0.f, 25, 5, data, true, false, sequentialVector(5))
58
+    testFullConstructor(sum2, nc, nIndices, data, true, false,
59
+        sequentialVector(nIndices));
60
+}
61
+
62
+TEST_CASE("Test Writing/Reading Matrices from File")
63
+{
64
+    // matrix to use for testing
65
+    Matrix ref(25, 50);
66
+    for (unsigned i = 0; i < ref.nRow(); ++i)
67
+    {
68
+        for (unsigned j = 0; j < ref.nCol(); ++j)
69
+        {
70
+            ref(i,j) = i + j;
71
+        }
72
+    }
73
+
74
+    // write matrix to file
75
+    FileParser::writeToTsv("testMatWrite.tsv", ref);
76
+    FileParser::writeToCsv("testMatWrite.csv", ref);
77
+    FileParser::writeToMtx("testMatWrite.mtx", ref);
78
+
79
+    // read matrices from file
80
+    RowMatrix rmTsv("testMatWrite.tsv", false, false, sequentialVector(1));
81
+    RowMatrix rmCsv("testMatWrite.csv", false, false, sequentialVector(1));
82
+    RowMatrix rmMtx("testMatWrite.mtx", false, false, sequentialVector(1));
83
+    ColMatrix cmTsv("testMatWrite.tsv", false, false, sequentialVector(1));
84
+    ColMatrix cmCsv("testMatWrite.csv", false, false, sequentialVector(1));
85
+    ColMatrix cmMtx("testMatWrite.mtx", false, false, sequentialVector(1));
86
+
87
+    // delete files
88
+    std::remove("testMatWrite.tsv");
89
+    std::remove("testMatWrite.csv");
90
+    std::remove("testMatWrite.mtx");
91
+
92
+    // test matrices
93
+    REQUIRE(gaps::algo::sum(rmTsv) == gaps::algo::sum(ref));
94
+    REQUIRE(gaps::algo::sum(rmCsv) == gaps::algo::sum(ref));
95
+    REQUIRE(gaps::algo::sum(rmMtx) == gaps::algo::sum(ref));
96
+    REQUIRE(gaps::algo::sum(cmTsv) == gaps::algo::sum(ref));
97
+    REQUIRE(gaps::algo::sum(cmCsv) == gaps::algo::sum(ref));
98
+    REQUIRE(gaps::algo::sum(cmMtx) == gaps::algo::sum(ref));
55 99
 }
56 100
 
57 101
 TEST_CASE("Test Matrix.h")
... ...
@@ -82,136 +126,36 @@ TEST_CASE("Test Matrix.h")
82 126
         REQUIRE(rm2.nCol() == 25);
83 127
     }
84 128
 
85
-    Matrix ref(10, 25);
86
-    for (unsigned i = 0; i < ref.nRow(); ++i)
129
+    SECTION("Full Constructor")
87 130
     {
88
-        for (unsigned j = 0; j < ref.nCol(); ++j)
131
+        Matrix ref(10, 25);
132
+        for (unsigned i = 0; i < ref.nRow(); ++i)
89 133
         {
90
-            ref(i,j) = i + j;
134
+            for (unsigned j = 0; j < ref.nCol(); ++j)
135
+            {
136
+                ref(i,j) = i + j;
137
+            }
91 138
         }
92
-    }
93
-
94
-    testAllConstructorSituations(ref);
95
-    testAllConstructorSituations(ref);
96
-    testAllConstructorSituations(ref);
97
-    testAllConstructorSituations(ref);    
98
-
99
-
100
-    SECTION("Construct from File - No Subset")
101
-    {
102
-
103
-    }
104
-
105
-    SECTION("Construct from File - Subset")
106
-    {
107 139
 
140
+        // write matrix to file
141
+        FileParser::writeToTsv("testMatWrite.tsv", ref);
142
+        FileParser::writeToCsv("testMatWrite.csv", ref);
143
+        FileParser::writeToMtx("testMatWrite.mtx", ref);
144
+
145
+        // test
146
+        testAllConstructorSituations(ref, 10, 25, 5, 4125.f, 1750.f, 325.f);
147
+        testAllConstructorSituations("testMatWrite.tsv", 10, 25, 5, 4125.f, 1750.f, 325.f);
148
+        testAllConstructorSituations("testMatWrite.csv", 10, 25, 5, 4125.f, 1750.f, 325.f);
149
+        testAllConstructorSituations("testMatWrite.mtx", 10, 25, 5, 4125.f, 1750.f, 325.f);
150
+
151
+        // delete files
152
+        std::remove("testMatWrite.tsv");
153
+        std::remove("testMatWrite.csv");
154
+        std::remove("testMatWrite.mtx");
108 155
     }
109 156
 
110
-    SECTION("Assignment")
157
+    SECTION("Arithmetic")
111 158
     {
112 159
 
113 160
     }
114
-
115
-    SECTION("Get Row/Col")
116
-    {
117
-
118
-    }
119
-
120
-    SECTION("arithmetic")
121
-    {
122
-
123
-    }
124
-}
125
-
126
-#if 0
127
-
128
-static void populateSequential(std::vector<unsigned> &vec, unsigned n)
129
-{
130
-    for (unsigned i = 0; i < n; ++i)
131
-    {
132
-        vec.push_back(i);
133
-    }
134 161
 }
135
-
136
-static void testMatrixConstruction(const std::string &path)
137
-{
138
-    std::vector<unsigned> allRows, allCols;
139
-    std::vector<unsigned> someRows, someCols;
140
-
141
-    populateSequential(allRows, 1363);
142
-    populateSequential(allCols, 9);
143
-    populateSequential(someRows, 363);
144
-    populateSequential(someCols, 2);
145
-
146
-    FileParser p1(path);
147
-    RowMatrix allRowMat(p1, true, allRows);
148
-    REQUIRE(allRowMat.nRow() == 1363);
149
-    REQUIRE(allRowMat.nCol() == 9);
150
-
151
-    FileParser p2(path);
152
-    ColMatrix allColMat(p2, true, allRows);
153
-    REQUIRE(allColMat.nRow() == 1363);
154
-    REQUIRE(allColMat.nCol() == 9);
155
-
156
-    FileParser p3(path);
157
-    RowMatrix allRowMatT(p3, false, allCols);
158
-    REQUIRE(allRowMatT.nRow() == 9);
159
-    REQUIRE(allRowMatT.nCol() == 1363);
160
-
161
-    FileParser p4(path);
162
-    ColMatrix allColMatT(p4, false, allCols);
163
-    REQUIRE(allColMatT.nRow() == 9);
164
-    REQUIRE(allColMatT.nCol() == 1363);
165
-
166
-    for (unsigned i = 0; i < 1363; ++i)
167
-    {
168
-        REQUIRE(allRowMat(i, 3) == allColMat(i, 3));
169
-        REQUIRE(allRowMat(i, 3) == allRowMatT(3, i));
170
-        REQUIRE(allRowMat(i, 3) == allColMatT(3, i));
171
-    }
172
-
173
-/*
174
-    FileParser p5(path);
175
-    RowMatrix someRowMat(p1, true, someRows);
176
-    REQUIRE(someRowMat.nRow() == 363);
177
-    REQUIRE(someRowMat.nCol() == 9);
178
-
179
-    FileParser p6(path);
180
-    ColMatrix someColMat(p2, true, someRows);
181
-    REQUIRE(someColMat.nRow() == 363);
182
-    REQUIRE(someColMat.nCol() == 9);
183
-
184
-    FileParser p7(path);
185
-    RowMatrix someRowMatT(p3, false, someCols);
186
-    REQUIRE(someRowMatT.nRow() == 2);
187
-    REQUIRE(someRowMatT.nCol() == 1363);
188
-
189
-    FileParser p8(path);
190
-    ColMatrix someColMatT(p4, false, someCols);
191
-    REQUIRE(someColMatT.nRow() == 2);
192
-    REQUIRE(someColMatT.nCol() == 1363);
193
-
194
-    for (unsigned i = 0; i < 363; ++i)
195
-    {
196
-        REQUIRE(someRowMat(i, 1) == someColMat(i, 1));
197
-        REQUIRE(someRowMat(i, 1) == someRowMatT(1, i));
198
-        REQUIRE(someRowMat(i, 1) == someColMatT(1, i));
199
-    }
200
-*/
201
-}
202
-
203
-#ifdef __GAPS_R_BUILD__
204
-TEST_CASE("Test Matrix Construction from file")
205
-{
206
-    Rcpp::Environment env = Rcpp::Environment::global_env();
207
-    std::string csvPath = Rcpp::as<std::string>(env["gistCsvPath"]);
208
-    std::string tsvPath = Rcpp::as<std::string>(env["gistTsvPath"]);
209
-    std::string mtxPath = Rcpp::as<std::string>(env["gistMtxPath"]);
210
-
211
-    testMatrixConstruction(csvPath);
212
-    testMatrixConstruction(tsvPath);
213
-    testMatrixConstruction(mtxPath);
214
-}
215
-#endif
216
-
217
-#endif
218 162
\ No newline at end of file
219 163
new file mode 100644
... ...
@@ -0,0 +1 @@
1
+// TODO
0 2
\ No newline at end of file
... ...
@@ -4,8 +4,6 @@
4 4
 #include "../GibbsSampler.h"
5 5
 #include "../math/Random.h"
6 6
 
7
-#if 0
8
-
9 7
 TEST_CASE("Test Archive.h")
10 8
 {
11 9
     SECTION("Reading/Writing to an Archive")
... ...
@@ -165,6 +163,7 @@ TEST_CASE("Test Archive.h")
165 163
             REQUIRE(gaps::random::exponential(5.5) == randSequence[i]);
166 164
         }
167 165
     }
168
-}
169 166
 
170
-#endif
171 167
\ No newline at end of file
168
+    // cleanup directory
169
+    std::remove("test_ar.temp");
170
+}
172 171
\ No newline at end of file
... ...
@@ -6,8 +6,8 @@
6 6
 template <class MatA, class MatB>
7 7
 inline void copyMatrix(MatA &dest, const MatB &source)
8 8
 {
9
-    GAPS_ASSERT(dest.nRow() == source.nRow());
10
-    GAPS_ASSERT(dest.nCol() == source.nCol());
9
+    GAPS_ASSERT_MSG(dest.nRow() == source.nRow(), dest.nRow() << " " << source.nRow());
10
+    GAPS_ASSERT_MSG(dest.nCol() == source.nCol(), dest.nCol() << " " << source.nCol());
11 11
 
12 12
     for (unsigned i = 0; i < dest.nRow(); ++i)
13 13
     {
... ...
@@ -80,11 +80,13 @@ RowMatrix::RowMatrix(const ColMatrix &mat) : GenericMatrix(mat.nRow(), mat.nCol(
80 80
 RowMatrix& RowMatrix::operator=(const RowMatrix &mat)
81 81
 {
82 82
     copyMatrix(*this, mat);
83
+    return *this;
83 84
 }
84 85
 
85 86
 RowMatrix& RowMatrix::operator=(const ColMatrix &mat)
86 87
 {
87 88
     copyMatrix(*this, mat);
89
+    return *this;
88 90
 }
89 91
 
90 92
 /******************************** COLUMN MATRIX *******************************/
... ...
@@ -149,9 +151,11 @@ ColMatrix::ColMatrix(const RowMatrix &mat) : GenericMatrix(mat.nRow(), mat.nCol(
149 151
 ColMatrix& ColMatrix::operator=(const ColMatrix &mat)
150 152
 {
151 153
     copyMatrix(*this, mat);
154
+    return *this;
152 155
 }
153 156
 
154 157
 ColMatrix& ColMatrix::operator=(const RowMatrix &mat)
155 158
 {
156 159
     copyMatrix(*this, mat);
160
+    return *this;
157 161
 }
... ...
@@ -168,7 +168,7 @@ bool partitionRows, const std::vector<unsigned> &indices)
168 168
         mNumCols = partitionRows ? (transpose ? mat.nRow() : mat.nCol()) : indices.size();
169 169
         impl()->allocate();
170 170
 
171
-        // fill matrix
171
+        // fill matrix, TODO use binary search on indices
172 172
         for (unsigned i = 0; i < mat.nRow(); ++i)
173 173
         {
174 174
             for (unsigned j = 0; j < mat.nCol(); ++j)
... ...
@@ -1,8 +1,11 @@
1 1
 #ifndef __COGAPS_FILE_PARSER_H__
2 2
 #define __COGAPS_FILE_PARSER_H__
3 3
 
4
+#include "../GapsAssert.h"
4 5
 #include "MatrixElement.h"
5 6
 
7
+#include <fstream>
8
+
6 9
 enum GapsFileType
7 10
 {
8 11
     GAPS_MTX,
... ...
@@ -21,6 +24,8 @@ private:
21 24
 
22 25
 public:
23 26
 
27
+    AbstractFileParser() {}
28
+
24 29
     static AbstractFileParser* create(const std::string &path);
25 30
 
26 31
     virtual ~AbstractFileParser() = 0;
... ...
@@ -47,8 +52,8 @@ public:
47 52
     explicit FileParser(const std::string &path);
48 53
     ~FileParser();
49 54
 
50
-    unsigned nRow();
51
-    unsigned nCol();
55
+    unsigned nRow() const;
56
+    unsigned nCol() const;
52 57
 
53 58
     bool hasNext();
54 59
     MatrixElement getNext();
... ...
@@ -71,6 +76,8 @@ public:
71 76
 template <class MatrixType>
72 77
 void FileParser::writeToTsv(const std::string &path, const MatrixType &mat)
73 78
 {
79
+    GAPS_ASSERT(FileParser::fileType(path) == GAPS_TSV);
80
+
74 81
     std::ofstream outputFile;
75 82
     outputFile.open(path.c_str());
76 83
     outputFile << "\"\"";
... ...
@@ -80,6 +87,7 @@ void FileParser::writeToTsv(const std::string &path, const MatrixType &mat)
80 87
     {
81 88
         outputFile << "\t\"Col" << i << "\"";
82 89
     }
90
+    outputFile << "\n";
83 91
 
84 92
     for (unsigned i = 0; i < mat.nRow(); ++i)
85 93
     {
... ...
@@ -99,6 +107,8 @@ void FileParser::writeToTsv(const std::string &path, const MatrixType &mat)
99 107
 template <class MatrixType>
100 108
 void FileParser::writeToCsv(const std::string &path, const MatrixType &mat)
101 109
 {
110
+    GAPS_ASSERT(FileParser::fileType(path) == GAPS_CSV);
111
+
102 112
     std::ofstream outputFile;
103 113
     outputFile.open(path.c_str());
104 114
     outputFile << "\"\"";
... ...
@@ -108,6 +118,7 @@ void FileParser::writeToCsv(const std::string &path, const MatrixType &mat)
108 118
     {
109 119
         outputFile << ",\"Col" << i << "\"";
110 120
     }
121
+    outputFile << "\n";
111 122
 
112 123
     for (unsigned i = 0; i < mat.nRow(); ++i)
113 124
     {
... ...
@@ -127,14 +138,16 @@ void FileParser::writeToCsv(const std::string &path, const MatrixType &mat)
127 138
 template <class MatrixType>
128 139
 void FileParser::writeToMtx(const std::string &path, const MatrixType &mat)
129 140
 {
141
+    GAPS_ASSERT(FileParser::fileType(path) == GAPS_MTX);
142
+
130 143
     std::ofstream outputFile;
131 144
     outputFile.open(path.c_str());
132 145
     outputFile << "%%\n";
133 146
     outputFile << mat.nRow() << " " << mat.nCol() << " " << mat.nRow() * mat.nCol();
134 147
     outputFile << "\n";
135
-    for (unsigned j = 0; j < mat.nRow(); ++j)
148
+    for (unsigned i = 0; i < mat.nRow(); ++i)
136 149
     {
137
-        for (unsigned i = 0; i < mat.nCol(); ++i)
150
+        for (unsigned j = 0; j < mat.nCol(); ++j)
138 151
         {
139 152
             if (mat(i,j) > 0.f)
140 153
             {
... ...
@@ -185,18 +185,3 @@ float gaps::random::inverseGammaSample(float a, float b, float mean, float sd)
185 185
     }
186 186
     return gaps::random::q_gamma(u, mean, sd);
187 187
 }
188
-
189
-std::vector<unsigned> sample(std::vector<unsigned> &elements, unsigned n)
190
-{
191
-    GAPS_ASSERT(elements.size() >= n);
192
-    std::vector<unsigned> sampleVect;
193
-    for (unsigned i = 0; i < n; ++i)
194
-    {
195
-        unsigned sampleIndex = gaps::random::uniform64(0, elements.size());
196
-        sampleVect.push_back(elements.at(sampleIndex));
197
-
198
-        elements[sampleIndex] = elements.back();
199
-        elements.pop_back();
200
-    }
201
-    return sampleVect;
202
-}
... ...
@@ -34,9 +34,7 @@ namespace gaps
34 34
 
35 35
         void save(Archive &ar);
36 36
         void load(Archive &ar);
37
-
38
-        std::vector<unsigned> sample(const std::vector<unsigned> &elements, unsigned n);
39 37
     } // namespace random
40 38
 } // namespace gaps
41 39
 
42
-#endif
40
+#endif // __COGAPS_RANDOM_H__
43 41
new file mode 100644
44 42
Binary files /dev/null and b/tests/testthat/gaps_checkpoint.out differ
45 43
deleted file mode 100644
... ...
@@ -1,6 +0,0 @@
1
-#context("lints")
2
-
3
-#test_that("Package Style", {
4
-#    skip("Don't lint for now")
5
-#    lintr::expect_lint_free()
6
-#})
7 0
deleted file mode 100644
8 1
Binary files a/tests/testthat/test_ar.temp and /dev/null differ
9 2
new file mode 100644
10 3
deleted file mode 100644
... ...
@@ -1,19 +0,0 @@
1
-context("GAPS")
2
-
3
-test_that("GAPS Simple Simulation",
4
-{
5
-    #data(SimpSim)
6
-    #nIter <- 1000
7
-    #res <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3, messages=FALSE)
8
-
9
-    #expect_true(!is.na(res$meanChi2))
10
-})
11
-
12
-#test_that("GAPSmap Simple Simulation",
13
-#{
14
-#    data(SimpSim)
15
-#    FP <- matrix(SimpSim.P[3, ], nrow=1)
16
-#    nIter <- 1000
17
-#    expect_failure(expect_error(gapsMapRun(SimpSim.D, SimpSim.S, FP,
18
-#                                           nFactor=3, messages=FALSE)))
19
-#})
20 0
new file mode 100644
21 1
deleted file mode 100644
... ...
@@ -1,28 +0,0 @@
1
-context("Seed")
2
-
3
-#test_that("Same seed, same output with GAPS Simple Simulation", {
4
-#    # load data
5
-#    data(SimpSim)
6
-#
7
-#    # number of burn-ins and posterior samples
8
-#    nIter <- 1000
9
-#
10
-#    # two runs with same seed
11
-#    results1 <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3,
12
-#                        nEquil=nIter, nSample=nIter, seed=42,
13
-#                        messages=FALSE)
14
-#    results2 <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3,
15
-#                        nEquil=nIter, nSample=nIter, seed=42,
16
-#                        messages=FALSE)
17
-#
18
-#    # one run with different seed
19
-#    results3 <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3,
20
-#                        nEquil=nIter, nSample=nIter, seed=8675309,
21
-#                        messages=FALSE)
22
-#
23
-#    # check that same seed results in identical outputs
24
-#    expect_identical(results1, results2)
25
-#
26
-#    # check that different seeds yield different outputs
27
-#    expect_false(identical(results1, results3))
28
-#})
29 0
deleted file mode 100644
... ...
@@ -1,16 +0,0 @@
1
-context("Snapshots")
2
-
3
-#test_that("Distinct gaps Snapshots", {
4
-#    data(SimpSim)
5
-#    x <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3, messages=FALSE)
6
-#    expect_false(identical(x$ASnapshots[, , 1], x$ASnapshots[, , 100]))
7
-#    expect_false(identical(x$PSnapshots[, , 1], x$PSnapshots[, , 100]))
8
-#})
9
-#
10
-#test_that("Distinct gapsMap Snapshots", {
11
-#    data(SimpSim)
12
-#    FP <- matrix(SimpSim.P[3, ], nrow=1)
13
-#    x <- gapsMapRun(SimpSim.D, SimpSim.S, FP, nFactor=3, messages=FALSE)
14
-#    expect_false(identical(x$ASnapshots[, , 1], x$ASnapshots[, , 100]))
15
-#    expect_false(identical(x$PSnapshots[, , 1], x$PSnapshots[, , 100]))
16
-#})
17 0
new file mode 100644
... ...
@@ -0,0 +1,81 @@
1
+context("CoGAPS")
2
+
3
+test_that("Valid Top-Level CoGAPS Calls",
4
+{
5
+    data(GIST)
6
+    testDataFrame <- GIST.D
7
+    testMatrix <- as.matrix(testDataFrame)
8
+    #testSummarizedExperiment <- 
9
+    #testSingleCellExperiment
10
+
11
+    gistCsvPath <- system.file("extdata/GIST.csv", package="CoGAPS")
12
+    gistTsvPath <- system.file("extdata/GIST.tsv", package="CoGAPS")
13
+    gistMtxPath <- system.file("extdata/GIST.mtx", package="CoGAPS")
14
+
15
+    # data types
16
+    res <- list()
17
+    res[[1]] <- CoGAPS(testDataFrame, nIterations=100, outputFrequency=50, seed=1)
18
+    res[[2]] <- CoGAPS(testMatrix, nIterations=100, outputFrequency=50, seed=1)
19
+    res[[3]] <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=50, seed=1)
20
+    res[[4]] <- CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50, seed=1)
21
+    res[[5]] <- CoGAPS(gistMtxPath, nIterations=100, outputFrequency=50, seed=1)
22
+    
23
+    expect_equal(nrow(res[[1]]@featureLoadings), 1363)
24
+    expect_equal(ncol(res[[1]]@featureLoadings), 7)
25
+    expect_equal(nrow(res[[1]]@sampleFactors), 9)
26
+    expect_equal(ncol(res[[1]]@sampleFactors), 7)
27
+    expect_true(all(sapply(1:4, function(i)
28
+        res[[i]]@featureLoadings == res[[i+1]]@featureLoadings)))
29
+    expect_true(all(sapply(1:4, function(i)
30
+        res[[i]]@sampleFactors == res[[i+1]]@sampleFactors)))
31
+
32
+    # transposing data
33
+    res <- list()
34
+    res[[1]] <- CoGAPS(testDataFrame, transposeData=TRUE, nIterations=100,
35
+        outputFrequency=50, seed=1)
36
+    res[[2]] <- CoGAPS(testMatrix, transposeData=TRUE, nIterations=100,
37
+        outputFrequency=50, seed=1)
38
+    res[[3]] <- CoGAPS(gistCsvPath, transposeData=TRUE, nIterations=100,
39
+        outputFrequency=50, seed=1)
40
+    res[[4]] <- CoGAPS(gistTsvPath, transposeData=TRUE, nIterations=100,
41
+        outputFrequency=50, seed=1)
42
+    res[[5]] <- CoGAPS(gistMtxPath, transposeData=TRUE, nIterations=100,
43
+        outputFrequency=50, seed=1)
44
+    
45
+    expect_equal(nrow(res[[1]]@featureLoadings), 9)
46
+    expect_equal(ncol(res[[1]]@featureLoadings), 7)
47
+    expect_equal(nrow(res[[1]]@sampleFactors), 1363)
48
+    expect_equal(ncol(res[[1]]@sampleFactors), 7)
49
+    expect_true(all(sapply(1:4, function(i)
50
+        res[[i]]@featureLoadings == res[[i+1]]@featureLoadings)))
51
+    expect_true(all(sapply(1:4, function(i)
52
+        res[[i]]@sampleFactors == res[[i+1]]@sampleFactors)))
53
+
54
+    # passing uncertainty
55
+    expect_error(CoGAPS(testDataFrame, uncertainty=as.matrix(GIST.S),
56
+        nIterations=100, outputFrequency=50, seed=1), NA)    
57
+
58
+    # multiple threads
59
+    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
60
+        seed=1, nThreads=2), NA)
61
+    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
62
+        seed=1, nThreads=6), NA)
63
+    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
64
+        seed=1, nThreads=12), NA)
65
+
66
+    # distributed CoGAPS 
67
+    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
68
+        seed=1, distributed="genome-wide"), NA)
69
+    expect_error(CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50,
70
+        seed=1, distributed="genome-wide"), NA)
71
+
72
+    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
73
+        seed=1, distributed="single-cell", transposeData=TRUE), NA)
74
+    expect_error(CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50,
75
+        seed=1, distributed="single-cell", transposeData=TRUE), NA)
76
+})
77
+
78
+#test_that("Invalid Top-Level CoGAPS Calls",
79
+#{
80
+
81
+#})
0 82
\ No newline at end of file