Browse code

moved file writers to FileParser

Tom Sherman authored on 30/07/2018 02:34:58
Showing35 changed files

... ...
@@ -14,15 +14,17 @@ Description: Coordinated Gene Activity in Pattern Sets (CoGAPS)
14 14
 Maintainer: Elana J. Fertig <ejfertig@jhmi.edu>,
15 15
     Thomas D. Sherman <tomsherman159@gmail.com>
16 16
 Depends:
17
-    R (>= 3.5.0),
18
-    Rcpp
17
+    R (>= 3.5.0)
19 18
 Imports:
20 19
     cluster,
20
+    data.table,
21 21
     methods,
22 22
     gplots,
23 23
     graphics,
24
+    Rcpp,
24 25
     S4Vectors,
25
-    SingleCellExperiment
26
+    SingleCellExperiment,
27
+    SummarizedExperiment
26 28
 Suggests:
27 29
     testthat,
28 30
     knitr,
... ...
@@ -2,6 +2,7 @@
2 2
 
3 3
 S3method(plot,CogapsResult)
4 4
 export(CoGAPS)
5
+export(GWCoGAPS)
5 6
 export(binaryA)
6 7
 export(buildReport)
7 8
 export(calcCoGAPSStat)
... ...
@@ -15,17 +16,40 @@ export(patternMatch)
15 16
 export(plotPatternMarkers)
16 17
 export(plotResiduals)
17 18
 export(reconstructGene)
19
+export(scCoGAPS)
20
+export(setDistributedParams)
18 21
 export(setParam)
19 22
 exportClasses(CogapsParams)
20 23
 exportClasses(CogapsResult)
24
+importClassesFrom(S4Vectors,Annotated)
25
+importClassesFrom(S4Vectors,character_OR_NULL)
21 26
 importClassesFrom(SingleCellExperiment,LinearEmbeddingMatrix)
27
+importFrom(Rcpp,evalCpp)
28
+importFrom(SummarizedExperiment,assay)
22 29
 importFrom(cluster,agnes)
30
+importFrom(data.table,fread)
23 31
 importFrom(gplots,bluered)
24 32
 importFrom(gplots,heatmap.2)
33
+importFrom(grDevices,colorRampPalette)
34
+importFrom(grDevices,rainbow)
35
+importFrom(graphics,legend)
36
+importFrom(graphics,lines)
37
+importFrom(graphics,mtext)
25 38
 importFrom(graphics,plot)
39
+importFrom(graphics,points)
40
+importFrom(methods,"slot<-")
26 41
 importFrom(methods,callNextMethod)
42
+importFrom(methods,is)
27 43
 importFrom(methods,new)
44
+importFrom(methods,slot)
45
+importFrom(methods,slotNames)
46
+importFrom(methods,validObject)
47
+importFrom(stats,as.dist)
28 48
 importFrom(stats,as.hclust)
49
+importFrom(stats,cor)
29 50
 importFrom(stats,cutree)
30 51
 importFrom(stats,hclust)
52
+importFrom(stats,runif)
53
+importFrom(stats,weighted.mean)
54
+importFrom(tools,file_ext)
31 55
 useDynLib(CoGAPS)
... ...
@@ -1,22 +1,42 @@
1 1
 #' @include class-CogapsParams.R
2 2
 NULL
3 3
 
4
+#' Checks if file is supported
5
+#' @param file path to file
6
+#' @return TRUE if file is supported, FALSE if not
7
+#' @importFrom tools file_ext
8
+supported <- function(file)
9
+{
10
+    if (!is(file, "character"))
11
+        return(FALSE)
12
+    return(tools::file_ext(file) %in% c("tsv", "csv", "mtx"))
13
+}
14
+
4 15
 #' CoGAPS Matrix Factorization Algorithm
5 16
 #' @export 
6
-#' @docType methods
7
-#' @rdname CoGAPS-methods
8 17
 #'
9 18
 #' @description calls the C++ MCMC code and performs Bayesian
10 19
 #' matrix factorization returning the two matrices that reconstruct
11 20
 #' the data matrix
12
-#' @details For file types CoGAPS supports csv, tsv, and mtx
21
+#' @details The supported R types are: matrix, data.frame, SummarizedExperiment,
22
+#' SingleCellExperiment. The supported file types are csv, tsv, and mtx.
13 23
 #' @param data File name or R object (see details for supported types)
14 24
 #' @param params CogapsParams object
15
-#' @param uncertainty uncertainty matrix (same supported types as data)
16
-#' @param fixedMatrix data for fixing the values of either the A or P matrix;
17
-#'  used in conjuction with whichMatrixFixed (see CogapsParams)
18
-#' @param checkpointInFile name of the checkpoint file
19
-#' @param ... keeps backwards compatibility with arguments from older versions
25
+#' @param nThreads maximum number of threads to run on
26
+#' @param messages T/F for displaying output
27
+#' @param outputFrequency number of iterations between each output (set to 0 to
28
+#' disable status updates, other output is controlled by @code messages)
29
+#' @param uncertainty uncertainty matrix - either a matrix or a supported
30
+#' file type
31
+#' @param checkpointOutFile name of the checkpoint file to create
32
+#' @param checkpointInterval number of iterations between each checkpoint (set
33
+#' to 0 to disable checkpoints)
34
+#' @param checkpointInFile if this is provided, CoGAPS runs from the checkpoint
35
+#' contained in this file
36
+#' @param transposeData T/F for transposing data while reading it in - useful
37
+#' for data that is stored as samples x genes since CoGAPS requires data to be
38
+#' genes x samples
39
+#' @param ... allows for overwriting parameters in params
20 40
 #' @return CogapsResult object
21 41
 #' @examples
22 42
 #' # Running from R object
... ...
@@ -31,13 +51,14 @@ NULL
31 51
 #' params <- new("CogapsParams")
32 52
 #' params <- setParam(params, "nPatterns", 5)
33 53
 #' resultC <- CoGAPS(GIST.D, params)
34
-#' @importFrom methods new
54
+#' @importFrom methods new is
55
+#' @importFrom SummarizedExperiment assay
35 56
 CoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL,
36 57
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
37 58
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
38 59
 checkpointInFile=NULL, transposeData=FALSE, ...)
39 60
 {
40
-    # parse parameters from ...
61
+    # store all parameters in a list and parse parameters from ...
41 62
     allParams <- list("gaps"=params,
42 63
         "nThreads"=nThreads,
43 64
         "messages"=messages,
... ...
@@ -51,54 +72,42 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
51 72
     allParams <- parseExtraParams(allParams, list(...))
52 73
 
53 74
     # check file extension
54
-    if (class(data) == "character" & !(file_ext(data) %in% c("tsv", "csv", "mtx")))
75
+    if (is(data, "character") & !supported(data))
55 76
         stop("unsupported file extension for data")
56 77
 
57 78
     # check uncertainty matrix
58
-    if (class(data) == "character" & class(uncertainty) != "character")
79
+    if (is(data, "character") & !is(uncertainty, "character"))
59 80
         stop("uncertainty must be same data type as data (file name)")
60
-    if (nchar(uncertainty) > 0 & !(file_ext(uncertainty) %in% c("tsv", "csv", "mtx")))
81
+    if (is(uncertainty, "character") & !supported(uncertainty))
61 82
         stop("unsupported file extension for uncertainty")
62
-
63
-    # check matrix
64
-    if (class(uncertainty) != "matrix")
65
-        stop("uncertainty must be same data type as data (matrix)")
83
+    if (!is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "matrix"))
84
+        stop("uncertainty must be a matrix unless data is a file path")
66 85
     checkDataMatrix(data, uncertainty, params)
67 86
 
68
-    # check if uncertainty is null
69
-    if (is.null(uncertainty) & class(data) == "character")
70
-        uncertainty <- ""
71
-    else if (is.null(uncertainty))
72
-        uncertainty <- matrix(0)
73
-
74 87
     # convert data to matrix
75
-    if (class(data) == "data.frame")
88
+    if (is(data, "data.frame"))
76 89
         data <- data.matrix(data)
77
-    else if (class(data) == "SummarizedExperiment")
78
-        data <- assay(data, "counts")
79
-    else if (class(data) == "SingleCellExperiment")
80
-        data <- assay(data, "counts")
90
+    else if (is(data, "SummarizedExperiment"))
91
+        data <- SummarizedExperiment::assay(data, "counts")
92
+    else if (is(data, "SingleCellExperiment"))
93
+        data <- SummarizedExperiment::assay(data, "counts")
81 94
 
82 95
     # determine which function to call cogaps algorithm
83
-    if (!is.null(callParams$gapsParams@distributed))
96
+    if (!is.null(allParams$gaps@distributed))
84 97
         dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps
85
-    else if (class(data) == "character")
86
-        dispatchFunc <- cogaps_cpp_from_file # data is a file name
98
+    else if (is(data, "character"))
99
+        dispatchFunc <- cogaps_cpp_from_file # data is a file path
87 100
     else
88 101
         dispatchFunc <- cogaps_cpp # default
89 102
 
90 103
     # check if we're running from a checkpoint
91 104
     if (!is.null(allParams$checkpointInFile))
92 105
     {
93
-        if (!is.null(callParams$gapsParams@distributed))
106
+        if (!is.null(allParams$gaps@distributed))
94 107
             stop("checkpoints not supported for distributed cogaps")
95 108
         else
96 109
             message("Running CoGAPS from a checkpoint")
97 110
     }
98
-    else
99
-    {
100
-        allParams$checkpointInFile <- ""
101
-    }
102 111
 
103 112
     # run cogaps
104 113
     gapsReturnList <- dispatchFunc(data, allParams, uncertainty)
... ...
@@ -115,6 +124,13 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
115 124
     ))
116 125
 }
117 126
 
127
+#' Single Cell CoGAPS
128
+#' @export
129
+#'
130
+#' @description wrapper around single-cell distributed algorithm for CoGAPS
131
+#' @inheritParams CoGAPS
132
+#' @return CogapsResult object
133
+#' @importFrom methods new
118 134
 scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL,
119 135
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
120 136
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
... ...
@@ -126,6 +142,13 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
126 142
         ...)
127 143
 }
128 144
 
145
+#' Genome Wide CoGAPS
146
+#' @export
147
+#'
148
+#' @description wrapper around genome-wide distributed algorithm for CoGAPS
149
+#' @inheritParams CoGAPS
150
+#' @return CogapsResult object
151
+#' @importFrom methods new
129 152
 GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL,
130 153
 messages=TRUE, outputFrequency=500, uncertainty=NULL,
131 154
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
... ...
@@ -137,77 +160,37 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
137 160
         ...)
138 161
 }   
139 162
 
163
+#' parse parameters passed through the ... variable
164
+#'
165
+#' @param allParams list of all parameters
166
+#' @param extraParams list of parameters in ...
167
+#' @return allParams with any valid parameters in extraParams added
168
+#' @note will halt with an error if any parameters in extraParams are invalid
169
+#' @importFrom methods slotNames
140 170
 parseExtraParams <- function(allParams, extraParams)
141 171
 {
142
-
143
-
144
-}
145
-
146
-#' @rdname parseOldParams-methods
147
-#' @aliases parseOldParams
148
-setMethod("parseOldParams", signature(object="CogapsParams"),
149
-function(object, oldArgs)
150
-{
151
-    helper <- function(arg, params, newArg)
152
-    {
153
-        if (!is.null(oldArgs[[arg]]))
154
-        {
155
-            warning(paste("parameter", arg, "is deprecated, it will still",
156
-                "work, but setting", newArg, "in the params object is the",
157
-                "preferred method"))
158
-            params <- setParam(params, newArg, oldArgs[[arg]])
159
-            oldArgs[[arg]] <- NULL
160
-        }            
161
-        return(params)
162
-    }
163
-
164
-    object <- helper("nFactor", object, "nPatterns")
165
-    object <- helper("nIter", object, "nIterations")
166
-    object <- helper("nEquil", object, "nIterations")
167
-    object <- helper("nSample", object, "nIterations")
168
-    object <- helper("nOutR", object, "outputFrequency")
169
-    object <- helper("nOutput", object, "outputFrequency")
170
-    object <- helper("maxGibbmassA", object, "maxGibbsMassA")
171
-    object <- helper("max_gibbmass_paraA", object, "maxGibbsMassA")
172
-    object <- helper("maxGibbmassP", object, "maxGibbsMassP")
173
-    object <- helper("max_gibbmass_paraP", object, "maxGibbsMassP")
174
-    object <- helper("singleCellRNASeq", object, "singleCell")
175
-
176
-    if (!is.null(oldArgs$nSnapshots) | !is.null(oldArgs$sampleSnapshots) | !is.null(oldArgs$numSnapshots))
177
-    {
178
-        warning("snapshots not currently supported in release build")
179
-        oldArgs$nSnapshots <- NULL
180
-        oldArgs$sampleSnapshots <- NULL
181
-        oldArgs$numSnapshots <- NULL
182
-    }
183
-    if (!is.null(oldArgs$fixedPatterns))
184
-        stop("pass fixed matrix in with 'fixedMatrix' argument")
185
-    if (!is.null(oldArgs$S))
186
-        stop("pass uncertainty matrix in with 'uncertainty', not 'S'")
187
-
188
-    return(object)
189
-})
190
-
191
-#' @rdname parseDirectParams-methods
192
-#' @aliases parseDirectParams
193
-setMethod("parseDirectParams", signature(object="CogapsParams"),
194
-function(object, args)
195
-{
196
-    for (s in slotNames(object))
172
+    # parse direct params
173
+    for (s in slotNames(allParams$gaps))
197 174
     {
198
-        if (!is.null(args[[s]]))
175
+        if (!is.null(extraParams[[s]]))
199 176
         {
200
-            object <- setParam(object, s, args[[s]])
177
+            allParams$gaps <- setParam(allParams$gaps, s, extraParams[[s]])
178
+            extraParams[[s]] <- NULL
201 179
         }
202 180
     }
203
-    return(object)
204
-})
205 181
 
182
+    # check for unrecognized options
183
+    if (length(extraParams) > 0)
184
+        stop(paste("unrecognized argument:", names(extraParams)[1]))
185
+
186
+    return(allParams)
187
+}
206 188
 
207
-#' Check that provided data is valid
189
+#' check that provided data is valid
208 190
 #'
209 191
 #' @param data data matrix
210
-#' @param uncertainty uncertainty matrix
192
+#' @param uncertainty uncertainty matrix, can be null
193
+#' @param params CogapsParams object
211 194
 #' @return throws an error if data has problems
212 195
 checkDataMatrix <- function(data, uncertainty, params)
213 196
 {
... ...
@@ -215,7 +198,7 @@ checkDataMatrix <- function(data, uncertainty, params)
215 198
         stop("negative values in data and/or uncertainty matrix")
216 199
     if (nrow(data) <= params@nPatterns | ncol(data) <= params@nPatterns)
217 200
         stop("nPatterns must be less than dimensions of data")
218
-    if (sum(dim(uncertainty)) != 2 & sum(uncertainty < 1e-5) > 0)
201
+    if (sum(uncertainty < 1e-5) > 0)
219 202
         warning("small values in uncertainty matrix detected")
220 203
 }
221 204
 
... ...
@@ -26,7 +26,7 @@ distributedCogaps <- function(data, allParams, uncertainty)
26 26
         == "genome-wide", "P", "A")
27 27
 
28 28
     # run all subsets with the same fixed matrix
29
-    finalResult <- foreach(i=1:nSets) %dopar%
29
+    finalResult <- foreach(i=1:allParams$gaps@nSets) %dopar%
30 30
     {
31 31
         cogaps_cpp(data, allParams, uncertainty, sets[[i]], consensusMatrix)
32 32
     }
... ...
@@ -39,6 +39,7 @@ distributedCogaps <- function(data, allParams, uncertainty)
39 39
     return(resultList)
40 40
 }
41 41
 
42
+#' @importFrom data.table fread
42 43
 nrow_helper <- function(data)
43 44
 {
44 45
     if (class(data) == "character")
... ...
@@ -52,6 +53,7 @@ nrow_helper <- function(data)
52 53
     return(nrow(data))
53 54
 }
54 55
 
56
+#' @importFrom data.table fread
55 57
 ncol_helper <- function(data)
56 58
 {
57 59
     if (class(data) == "character")
... ...
@@ -108,6 +110,7 @@ findConsensusMatrix <- function(result, allParams)
108 110
 #' @param ... additional parameters for \code{agnes}
109 111
 #' @return a matrix of consensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
110 112
 #' consensus pattern is also returned.
113
+#' @importFrom stats weighted.mean
111 114
 patternMatch <- function(allPatterns, allParams)
112 115
 {
113 116
     cc <- corcut(allPatterns, allParams)
... ...
@@ -138,7 +141,7 @@ patternMatch <- function(allPatterns, allParams)
138 141
 }
139 142
 
140 143
 #' @importFrom cluster agnes
141
-#' @importFrom stats cutree as.hclust
144
+#' @importFrom stats cutree as.hclust cor
142 145
 corcut <- function(allPatterns, allParams)
143 146
 {
144 147
     corr.dist <- cor(allPatterns)
... ...
@@ -21,6 +21,7 @@
21 21
 #' Bioinformatics. 2010 Nov 1;26(21):2792-3
22 22
 #' @docType package
23 23
 #' @name CoGAPS-package
24
+#' @importFrom Rcpp evalCpp
24 25
 #' @useDynLib CoGAPS
25 26
 NULL
26 27
 
... ...
@@ -1,11 +1,11 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps_cpp_from_file <- function(data, allParams, uncertainty, indices = numericVector(1), fixedMatrix = matrix(1,1)) {
4
+cogaps_cpp_from_file <- function(data, allParams, uncertainty, indices = NULL, fixedMatrix = NULL) {
5 5
     .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
6 6
 }
7 7
 
8
-cogaps_cpp <- function(data, allParams, uncertainty, indices = numericVector(1), fixedMatrix = matrix(1,1)) {
8
+cogaps_cpp <- function(data, allParams, uncertainty, indices = NULL, fixedMatrix = NULL) {
9 9
     .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
10 10
 }
11 11
 
... ...
@@ -20,6 +20,7 @@
20 20
 #' a cluster must contain
21 21
 #' @slot maxNS [distributed parameter] maximum of individual set contributions
22 22
 #' a cluster can contain
23
+#' @importClassesFrom S4Vectors character_OR_NULL
23 24
 setClass("CogapsParams", slots = c(
24 25
     nPatterns = "numeric",
25 26
     nIterations = "numeric",
... ...
@@ -29,9 +30,9 @@ setClass("CogapsParams", slots = c(
29 30
     maxGibbsMassP = "numeric",
30 31
     seed = "numeric",
31 32
     singleCell = "logical",
32
-    distributed = "character",
33
-    cut = "numeric",
33
+    distributed = "character_OR_NULL",
34 34
     nSets = "numeric",
35
+    cut = "numeric",
35 36
     minNS = "numeric",
36 37
     maxNS = "numeric"
37 38
 ))
... ...
@@ -51,9 +52,9 @@ setMethod("initialize", "CogapsParams",
51 52
         .Object@maxGibbsMassP <- 100
52 53
         .Object@seed <- getMilliseconds(as.POSIXlt(Sys.time()))
53 54
         .Object@singleCell <- FALSE
54
-        .Object@distributed <- ""
55
+        .Object@distributed <- NULL
55 56
         .Object@cut <- .Object@nPatterns
56
-        .Object@nSets <- 3
57
+        .Object@nSets <- 4
57 58
         .Object@minNS <- ceiling(.Object@nSets / 2)
58 59
         .Object@maxNS <- .Object@minNS + .Object@nSets
59 60
 
... ...
@@ -62,7 +63,7 @@ setMethod("initialize", "CogapsParams",
62 63
     }
63 64
 )
64 65
 
65
-#' defines a valid parameters object
66
+## defines a valid parameters object
66 67
 setValidity("CogapsParams",
67 68
     function(object)
68 69
     {
... ...
@@ -70,20 +71,12 @@ setValidity("CogapsParams",
70 71
             "number of patterns must be an integer greater than zero"
71 72
         if (object@nIterations <= 0 | object@nIterations %% 1 != 0)
72 73
             "number of iterations must be an integer greater than zero"
73
-        if (object@outputFrequency <= 0 | object@outputFrequency %% 1 != 0)
74
-            "the output frequency must be an integer greater than zero"
75 74
         if (object@alphaA  <= 0 | object@alphaP <= 0)
76 75
             "alpha parameter must be greater than zero"
77 76
         if (object@maxGibbsMassA  <= 0 | object@maxGibbsMassP <= 0)
78 77
             "maxGibbsMass must be greater than zero"
79 78
         if (object@seed <= 0 | object@seed %% 1 != 0)
80 79
             "random seed must be an integer greater than zero"
81
-        if (!(object@whichMatrixFixed %in% c("N", "A", "P")))
82
-            "whichMatrixFixed must be either A or P (N in the case of neither)"
83
-        if (object@checkpointInterval <= 0 | object@checkpointInterval %% 1 != 0)
84
-            "checkpointInterval must be an integer greater than zero"
85
-        if (object@nCores <= 0 | object@nCores %% 1 != 0)
86
-            "number of cores must be an integer greater than zero"
87 80
         if (object@minNS <= 1 | object@minNS %% 1 != 0)
88 81
             "minNS must be an integer greater than one"
89 82
     }
... ...
@@ -104,6 +97,24 @@ setValidity("CogapsParams",
104 97
 setGeneric("setParam", function(object, whichParam, value)
105 98
     {standardGeneric("setParam")})
106 99
 
100
+#' set the value of parameters for distributed CoGAPS
101
+#' @export
102
+#' @docType methods
103
+#' @rdname setDistributedParams-methods
104
+#'
105
+#' @description these parameters  are interrelated so they must be set together
106
+#' @param object an object of type CogapsParams
107
+#' @param cut a distributed CoGAPS parameter 
108
+#' @param minNS a distributed CoGAPS parameter 
109
+#' @param maxNS a distributed CoGAPS parameter 
110
+#' @return the modified params object
111
+#' @examples
112
+#'  params <- new("CogapsParams")
113
+#'  params <- setDistributedParams(3, 2, 4)
114
+setGeneric("setDistributedParams", function(object, cut=NULL,
115
+minNS=NULL, maxNS=NULL)
116
+    {standardGeneric("setDistributedParams")})
117
+
107 118
 #' get the value of a parameter
108 119
 #' @export
109 120
 #' @docType methods
... ...
@@ -2,6 +2,7 @@
2 2
 #' @export
3 3
 #'
4 4
 #' @description Contains all output from Cogaps run
5
+#' @importClassesFrom S4Vectors Annotated
5 6
 #' @importClassesFrom SingleCellExperiment LinearEmbeddingMatrix
6 7
 setClass("CogapsResult", contains="LinearEmbeddingMatrix", slots=c(
7 8
     sampleStdDev = "ANY",   # Psd transpose
... ...
@@ -152,7 +153,7 @@ setGeneric("calcCoGAPSStat", function(object, GStoGenes, numPerm=500)
152 153
 #' listed in a gene set behaves like other genes in the set within
153 154
 #' the given data set
154 155
 #' @param object an object of type CogapsResult
155
-#' @param GSGenes data.frame or list with gene sets
156
+#' @param GStoGenes data.frame or list with gene sets
156 157
 #' @param numPerm number of permutations for null
157 158
 #' @param Pw weight on genes
158 159
 #' @param nullGenes logical indicating gene adjustment
... ...
@@ -162,7 +163,7 @@ setGeneric("calcCoGAPSStat", function(object, GStoGenes, numPerm=500)
162 163
 #' result <- CoGAPS(SimpSim.D)
163 164
 #' calcGeneGSStat(result, GSGenes=GSets[[1]], numPerm=500)
164 165
 setGeneric("calcGeneGSStat", function(object, GStoGenes, numPerm,
165
-Pw=rep(1,ncol(Amean)), nullGenes=FALSE)
166
+Pw=rep(1,ncol(object@featureLoadings)), nullGenes=FALSE)
166 167
     {standardGeneric("calcGeneGSStat")})
167 168
 
168 169
 #' compute gene probability
... ...
@@ -176,7 +177,7 @@ Pw=rep(1,ncol(Amean)), nullGenes=FALSE)
176 177
 #' comparing the inferred activity of that gene to the average activity of the
177 178
 #' set.
178 179
 #' @param object an object of type CogapsResult
179
-#' @param GSGenes data.frame or list with gene sets
180
+#' @param GStoGenes data.frame or list with gene sets
180 181
 #' @param Pw weight on genes
181 182
 #' @param numPerm number of permutations for null
182 183
 #' @param PwNull - logical indicating gene adjustment
... ...
@@ -187,7 +188,7 @@ Pw=rep(1,ncol(Amean)), nullGenes=FALSE)
187 188
 #' result <- CoGAPS(SimpSim.D)
188 189
 #' computeGeneGSProb(result, GSGenes=GSets[[1]], numPerm=500)
189 190
 setGeneric("computeGeneGSProb", function(object, GStoGenes, numPerm=500,
190
-Pw=rep(1,ncol(Amean)), PwNull=FALSE)
191
+Pw=rep(1,ncol(object@featureLoadings)), PwNull=FALSE)
191 192
     {standardGeneric("computeGeneGSProb")})
192 193
 
193 194
 #' compute pattern markers statistic
... ...
@@ -2,27 +2,93 @@ setMethod("show", signature("CogapsParams"),
2 2
 function(object)
3 3
 {
4 4
     cat("An Object of class \"CogapsParams\"\n")
5
-    cat("nPatterns          ", object@nPatterns, "\n")
6
-    cat("nIterations        ", object@nIterations, "\n")
7
-    cat("outputFrequency    ", object@outputFrequency, "\n")
8
-    cat("nCores             ", object@nCores, "\n")
9
-    cat("singleCell         ", object@singleCell, "\n")
10
-    cat("seed               ", object@seed, "\n")
11
-    cat("messages           ", object@messages, "\n")
12
-    cat("checkpointInterval ", object@checkpointInterval, "\n")
13
-    cat("checkpointOutFile  ", object@checkpointOutFile, "\n")
5
+    cat("\n")
6
+    cat("-- Standard Parameters --\n")
7
+    cat("nPatterns     ", object@nPatterns, "\n")
8
+    cat("nIterations   ", object@nIterations, "\n")
9
+    cat("seed          ", object@seed, "\n")
10
+    cat("singleCell    ", object@singleCell, "\n")
11
+    cat("distributed   ", ifelse(is.null(object@distributed), FALSE, TRUE), "\n")
12
+    cat("\n")
13
+    cat("-- Sparsity Parameters --\n")
14
+    if (object@alphaA == object@alphaP)
15
+    {
16
+        cat("alpha         ", object@alphaA, "\n")
17
+    }
18
+    else
19
+    {
20
+        cat("alphaA        ", object@alphaA, "\n")
21
+        cat("alphaP        ", object@alphaP, "\n")
22
+    }
23
+
24
+    if (object@maxGibbsMassA == object@maxGibbsMassP)
25
+    {
26
+        cat("maxGibbsMass  ", object@maxGibbsMassA, "\n")
27
+    }
28
+    else
29
+    {
30
+        cat("maxGibbsMassA ", object@maxGibbsMassA, "\n")
31
+        cat("maxGibbsMassP ", object@maxGibbsMassP, "\n")
32
+    }
33
+    if (!is.null(object@distributed))
34
+    {
35
+        cat("\n")
36
+        cat("-- Distributed CoGAPS Parameters --", "\n")
37
+        cat("nSets         ", object@nSets, "\n")
38
+        cat("cut           ", object@cut, "\n")
39
+        cat("minNS         ", object@minNS, "\n")
40
+        cat("maxNS         ", object@maxNS, "\n")
41
+    }
14 42
 })
15 43
 
16 44
 #' @rdname setParam-methods
17 45
 #' @aliases setParam
46
+#' @importFrom methods slot slot<- validObject
18 47
 setMethod("setParam", signature(object="CogapsParams"),
19 48
 function(object, whichParam, value)
20 49
 {
21
-    slot(object, whichParam) <- value
50
+    if (whichParam == "alpha")
51
+    {
52
+        object@alphaA <- value
53
+        object@alphaP <- value
54
+    }
55
+    else if (whichParam == "maxGibbsMass")
56
+    {
57
+        object@maxGibbsMassA <- value
58
+        object@maxGibbsMassP <- value
59
+    }
60
+    else if (whichParam %in% c("cut", "minNS", "maxNS"))
61
+    {
62
+        stop("please set this parameter with setDistributedParams")
63
+    }
64
+    else
65
+    {
66
+        slot(object, whichParam) <- value
67
+    }
22 68
     validObject(object)
23 69
     return(object)
24 70
 })
25 71
 
72
+#' @rdname setDistributedParams-methods
73
+#' @aliases setDistributedParams
74
+#' @importFrom methods slot
75
+setMethod("setDistributedParams", signature(object="CogapsParams"),
76
+function(object, cut, minNS, maxNS)
77
+{
78
+    object@cut <- cut
79
+    object@minNS <- minNS
80
+    object@maxNS <- maxNS
81
+
82
+    if (is.null(object@cut))
83
+        object@cut <- object@nPatterns
84
+    if (is.null(object@minNS))
85
+        object@minNS <- ceiling(object@nSets / 2)
86
+    if (is.null(object@maxNS))
87
+        object@maxNS <- object@minNS + object@nSets
88
+
89
+    return(object)
90
+})
91
+
26 92
 #' @rdname getParam-methods
27 93
 #' @aliases getParam
28 94
 setMethod("getParam", signature(object="CogapsParams"),
... ...
@@ -11,7 +11,8 @@ function(object)
11 11
 })
12 12
 
13 13
 #' @export
14
-#' @importFrom graphics plot
14
+#' @importFrom graphics plot legend lines points
15
+#' @importFrom grDevices rainbow
15 16
 plot.CogapsResult <- function(x, ...)
16 17
 {
17 18
     nSamples <- nrow(object@sampleFactors)
... ...
@@ -75,6 +76,7 @@ function(object, genes)
75 76
 #' @rdname binaryA-methods
76 77
 #' @aliases binaryA
77 78
 #' @importFrom gplots heatmap.2
79
+#' @importFrom graphics mtext
78 80
 setMethod("binaryA", signature(object="CogapsResult"),
79 81
 function(object, threshold)
80 82
 {
... ...
@@ -92,6 +94,7 @@ function(object, threshold)
92 94
 #' @rdname plotResiduals-methods
93 95
 #' @aliases plotResiduals
94 96
 #' @importFrom gplots heatmap.2
97
+#' @importFrom grDevices colorRampPalette
95 98
 setMethod("plotResiduals", signature(object="CogapsResult"),
96 99
 function(object, data, uncertainty)
97 100
 {
... ...
@@ -194,7 +197,7 @@ function(object, threshold, lp)
194 197
 #' @seealso  \code{\link{heatmap.2}}
195 198
 #' @importFrom gplots bluered
196 199
 #' @importFrom gplots heatmap.2
197
-#' @importFrom stats hclust
200
+#' @importFrom stats hclust as.dist cor
198 201
 plotPatternMarkers <- function(object, data, patternPalette, sampleNames,
199 202
 samplePalette=NULL, heatmapCol=bluered, colDenogram=TRUE, scale="row", ...)
200 203
 {
... ...
@@ -229,6 +232,8 @@ samplePalette=NULL, heatmapCol=bluered, colDenogram=TRUE, scale="row", ...)
229 232
 
230 233
 #' @rdname calcCoGAPSStat-methods
231 234
 #' @aliases calcCoGAPSStat
235
+#' @importFrom stats runif
236
+#' @importFrom methods is
232 237
 setMethod("calcCoGAPSStat", signature(object="CogapsResult"),
233 238
 function(object, GStoGenes, numPerm)
234 239
 {
... ...
@@ -346,7 +351,7 @@ function(object, GStoGenes, numPerm)
346 351
 setMethod("calcGeneGSStat", signature(object="CogapsResult"),
347 352
 function(object, GStoGenes, numPerm, Pw, nullGenes)
348 353
 {
349
-    gsStat <- calcCoGAPSStat(object, data.frame(GSGenes), numPerm=numPerm)
354
+    gsStat <- calcCoGAPSStat(object, data.frame(GStoGenes), numPerm=numPerm)
350 355
     gsStat <- gsStat$GSUpreg
351 356
     gsStat <- -log(gsStat)
352 357
 
... ...
@@ -361,12 +366,12 @@ function(object, GStoGenes, numPerm, Pw, nullGenes)
361 366
 
362 367
     if (nullGenes)
363 368
     {
364
-        ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GSGenes),] /
365
-            object@featureStdDev[setdiff(row.names(object@featureLoadings), GSGenes),]
369
+        ZD <- object@featureLoadings[setdiff(row.names(object@featureLoadings), GStoGenes),] /
370
+            object@featureStdDev[setdiff(row.names(object@featureLoadings), GStoGenes),]
366 371
     }
367 372
     else
368 373
     {
369
-        ZD <- object@featureLoadings[GSGenes,]/object@featureStdDev[GSGenes,]
374
+        ZD <- object@featureLoadings[GStoGenes,]/object@featureStdDev[GStoGenes,]
370 375
     }
371 376
     outStats <- apply(sweep(ZD,2,gsStat,FUN="*"),1,sum) / (sum(gsStat))
372 377
     outStats <- outStats / apply(ZD,1,sum)
... ...
@@ -384,21 +389,21 @@ function(object, GStoGenes, numPerm, Pw, nullGenes)
384 389
 setMethod("computeGeneGSProb", signature(object="CogapsResult"),
385 390
 function(object, GStoGenes, numPerm, Pw, PwNull)
386 391
 {
387
-    geneGSStat <- calcGeneGSStat(object, Pw=Pw, GSGenes=GSGenes,
392
+    geneGSStat <- calcGeneGSStat(object, Pw=Pw, GStoGenes=GStoGenes,
388 393
         numPerm=numPerm)
389 394
 
390 395
     if (PwNull)
391 396
     {
392
-        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
397
+        permGSStat <- calcGeneGSStat(object, GStoGenes=GStoGenes, numPerm=numPerm,
393 398
             Pw=Pw, nullGenes=TRUE)
394 399
     }
395 400
     else
396 401
     {
397
-        permGSStat <- calcGeneGSStat(object, GSGenes=GSGenes, numPerm=numPerm,
402
+        permGSStat <- calcGeneGSStat(object, GStoGenes=GStoGenes, numPerm=numPerm,
398 403
             nullGenes=TRUE)
399 404
     }
400 405
 
401
-    finalStats <- sapply(GSGenes, function(x)
406
+    finalStats <- sapply(GStoGenes, function(x)
402 407
         length(which(permGSStat > geneGSStat[x])) / length(permGSStat))
403 408
 
404 409
     return(finalStats)
405 410
similarity index 52%
406 411
rename from man/CoGAPS-methods.Rd
407 412
rename to man/CoGAPS.Rd
... ...
@@ -1,6 +1,5 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/CoGAPS.R
3
-\docType{methods}
4 3
 \name{CoGAPS}
5 4
 \alias{CoGAPS}
6 5
 \title{CoGAPS Matrix Factorization Algorithm}
... ...
@@ -15,14 +14,29 @@ CoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
15 14
 
16 15
 \item{params}{CogapsParams object}
17 16
 
18
-\item{uncertainty}{uncertainty matrix (same supported types as data)}
17
+\item{nThreads}{maximum number of threads to run on}
19 18
 
20
-\item{checkpointInFile}{name of the checkpoint file}
19
+\item{messages}{T/F for displaying output}
21 20
 
22
-\item{...}{keeps backwards compatibility with arguments from older versions}
21
+\item{outputFrequency}{number of iterations between each output (set to 0 to
22
+disable status updates, other output is controlled by @code messages)}
23 23
 
24
-\item{fixedMatrix}{data for fixing the values of either the A or P matrix;
25
-used in conjuction with whichMatrixFixed (see CogapsParams)}
24
+\item{uncertainty}{uncertainty matrix - either a matrix or a supported
25
+file type}
26
+
27
+\item{checkpointOutFile}{name of the checkpoint file to create}
28
+
29
+\item{checkpointInterval}{number of iterations between each checkpoint (set
30
+to 0 to disable checkpoints)}
31
+
32
+\item{checkpointInFile}{if this is provided, CoGAPS runs from the checkpoint
33
+contained in this file}
34
+
35
+\item{transposeData}{T/F for transposing data while reading it in - useful
36
+for data that is stored as samples x genes since CoGAPS requires data to be
37
+genes x samples}
38
+
39
+\item{...}{allows for overwriting parameters in params}
26 40
 }
27 41
 \value{
28 42
 CogapsResult object
... ...
@@ -33,7 +47,8 @@ matrix factorization returning the two matrices that reconstruct
33 47
 the data matrix
34 48
 }
35 49
 \details{
36
-For file types CoGAPS supports csv, tsv, and mtx
50
+The supported R types are: matrix, data.frame, SummarizedExperiment,
51
+SingleCellExperiment. The supported file types are csv, tsv, and mtx.
37 52
 }
38 53
 \examples{
39 54
 # Running from R object
40 55
new file mode 100644
... ...
@@ -0,0 +1,46 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{GWCoGAPS}
4
+\alias{GWCoGAPS}
5
+\title{Genome Wide CoGAPS}
6
+\usage{
7
+GWCoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
8
+  messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9
+  checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
+  checkpointInFile = NULL, transposeData = FALSE, ...)
11
+}
12
+\arguments{
13
+\item{data}{File name or R object (see details for supported types)}
14
+
15
+\item{params}{CogapsParams object}
16
+
17
+\item{nThreads}{maximum number of threads to run on}
18
+
19
+\item{messages}{T/F for displaying output}
20
+
21
+\item{outputFrequency}{number of iterations between each output (set to 0 to
22
+disable status updates, other output is controlled by @code messages)}
23
+
24
+\item{uncertainty}{uncertainty matrix - either a matrix or a supported
25
+file type}
26
+
27
+\item{checkpointOutFile}{name of the checkpoint file to create}
28
+
29
+\item{checkpointInterval}{number of iterations between each checkpoint (set
30
+to 0 to disable checkpoints)}
31
+
32
+\item{checkpointInFile}{if this is provided, CoGAPS runs from the checkpoint
33
+contained in this file}
34
+
35
+\item{transposeData}{T/F for transposing data while reading it in - useful
36
+for data that is stored as samples x genes since CoGAPS requires data to be
37
+genes x samples}
38
+
39
+\item{...}{allows for overwriting parameters in params}
40
+}
41
+\value{
42
+CogapsResult object
43
+}
44
+\description{
45
+wrapper around genome-wide distributed algorithm for CoGAPS
46
+}
... ...
@@ -7,22 +7,22 @@
7 7
 \alias{calcGeneGSStat}
8 8
 \title{probability gene belongs in gene set}
9 9
 \usage{
10
-calcGeneGSStat(object, GStoGenes, numPerm, Pw = rep(1, ncol(Amean)),
11
-  nullGenes = FALSE)
10
+calcGeneGSStat(object, GStoGenes, numPerm, Pw = rep(1,
11
+  ncol(object@featureLoadings)), nullGenes = FALSE)
12 12
 
13 13
 \S4method{calcGeneGSStat}{CogapsResult}(object, GStoGenes, numPerm,
14
-  Pw = rep(1, ncol(Amean)), nullGenes = FALSE)
14
+  Pw = rep(1, ncol(object@featureLoadings)), nullGenes = FALSE)
15 15
 }
16 16
 \arguments{
17 17
 \item{object}{an object of type CogapsResult}
18 18
 
19
+\item{GStoGenes}{data.frame or list with gene sets}
20
+
19 21
 \item{numPerm}{number of permutations for null}
20 22
 
21 23
 \item{Pw}{weight on genes}
22 24
 
23 25
 \item{nullGenes}{logical indicating gene adjustment}
24
-
25
-\item{GSGenes}{data.frame or list with gene sets}
26 26
 }
27 27
 \value{
28 28
 gene similiarity statistic
... ...
@@ -2,18 +2,20 @@
2 2
 % Please edit documentation in R/CoGAPS.R
3 3
 \name{checkDataMatrix}
4 4
 \alias{checkDataMatrix}
5
-\title{Check that provided data is valid}
5
+\title{check that provided data is valid}
6 6
 \usage{
7 7
 checkDataMatrix(data, uncertainty, params)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{data matrix}
11 11
 
12
-\item{uncertainty}{uncertainty matrix}
12
+\item{uncertainty}{uncertainty matrix, can be null}
13
+
14
+\item{params}{CogapsParams object}
13 15
 }
14 16
 \value{
15 17
 throws an error if data has problems
16 18
 }
17 19
 \description{
18
-Check that provided data is valid
20
+check that provided data is valid
19 21
 }
... ...
@@ -8,21 +8,21 @@
8 8
 \title{compute gene probability}
9 9
 \usage{
10 10
 computeGeneGSProb(object, GStoGenes, numPerm = 500, Pw = rep(1,
11
-  ncol(Amean)), PwNull = FALSE)
11
+  ncol(object@featureLoadings)), PwNull = FALSE)
12 12
 
13 13
 \S4method{computeGeneGSProb}{CogapsResult}(object, GStoGenes, numPerm = 500,
14
-  Pw = rep(1, ncol(Amean)), PwNull = FALSE)
14
+  Pw = rep(1, ncol(object@featureLoadings)), PwNull = FALSE)
15 15
 }
16 16
 \arguments{
17 17
 \item{object}{an object of type CogapsResult}
18 18
 
19
+\item{GStoGenes}{data.frame or list with gene sets}
20
+
19 21
 \item{numPerm}{number of permutations for null}
20 22
 
21 23
 \item{Pw}{weight on genes}
22 24
 
23 25
 \item{PwNull}{- logical indicating gene adjustment}
24
-
25
-\item{GSGenes}{data.frame or list with gene sets}
26 26
 }
27 27
 \value{
28 28
 A vector of length GSGenes containing the p-values of set membership
... ...
@@ -3,13 +3,10 @@
3 3
 \docType{methods}
4 4
 \name{initialize,CogapsParams-method}
5 5
 \alias{initialize,CogapsParams-method}
6
-\title{Constructor for CogapsParams}
6
+\title{constructor for CogapsParams}
7 7
 \usage{
8 8
 \S4method{initialize}{CogapsParams}(.Object, ...)
9 9
 }
10
-\value{
11
-initialized CogapsParams object
12
-}
13 10
 \description{
14
-Constructor for CogapsParams
11
+constructor for CogapsParams
15 12
 }
16 13
deleted file mode 100644
... ...
@@ -1,24 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R
3
-\docType{methods}
4
-\name{parseDirectParams}
5
-\alias{parseDirectParams}
6
-\alias{parseDirectParams,CogapsParams-method}
7
-\alias{parseDirectParams}
8
-\title{parse list of parameters passed directly to CoGAPS}
9
-\usage{
10
-parseDirectParams(object, args)
11
-
12
-\S4method{parseDirectParams}{CogapsParams}(object, args)
13
-}
14
-\arguments{
15
-\item{object}{an object of type CogapsParams}
16
-
17
-\item{oldArgs}{named list of arguments}
18
-}
19
-\value{
20
-an object of type CogapsParams
21
-}
22
-\description{
23
-parse list of parameters passed directly to CoGAPS
24
-}
25 0
new file mode 100644
... ...
@@ -0,0 +1,22 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{parseExtraParams}
4
+\alias{parseExtraParams}
5
+\title{parse parameters passed through the ... variable}
6
+\usage{
7
+parseExtraParams(allParams, extraParams)
8
+}
9
+\arguments{
10
+\item{allParams}{list of all parameters}
11
+
12
+\item{extraParams}{list of parameters in ...}
13
+}
14
+\value{
15
+allParams with any valid parameters in extraParams added
16
+}
17
+\description{
18
+parse parameters passed through the ... variable
19
+}
20
+\note{
21
+will halt with an error if any parameters in extraParams are invalid
22
+}
0 23
deleted file mode 100644
... ...
@@ -1,24 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R
3
-\docType{methods}
4
-\name{parseOldParams}
5
-\alias{parseOldParams}
6
-\alias{parseOldParams,CogapsParams-method}
7
-\alias{parseOldParams}
8
-\title{parse list of old-style parameters, store relevant values}
9
-\usage{
10
-parseOldParams(object, oldArgs)
11
-
12
-\S4method{parseOldParams}{CogapsParams}(object, oldArgs)
13
-}
14
-\arguments{
15
-\item{object}{an object of type CogapsParams}
16
-
17
-\item{oldArgs}{named list of deprecated arguments}
18
-}
19
-\value{
20
-an object of type CogapsParams
21
-}
22
-\description{
23
-parse list of old-style parameters, store relevant values
24
-}
25 0
new file mode 100644
... ...
@@ -0,0 +1,46 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{scCoGAPS}
4
+\alias{scCoGAPS}
5
+\title{Single Cell CoGAPS}
6
+\usage{
7
+scCoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
8
+  messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9
+  checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
+  checkpointInFile = NULL, transposeData = FALSE, ...)
11
+}
12
+\arguments{
13
+\item{data}{File name or R object (see details for supported types)}
14
+
15
+\item{params}{CogapsParams object}
16
+
17
+\item{nThreads}{maximum number of threads to run on}
18
+
19
+\item{messages}{T/F for displaying output}
20
+
21
+\item{outputFrequency}{number of iterations between each output (set to 0 to
22
+disable status updates, other output is controlled by @code messages)}
23
+
24
+\item{uncertainty}{uncertainty matrix - either a matrix or a supported
25
+file type}
26
+
27
+\item{checkpointOutFile}{name of the checkpoint file to create}
28
+
29
+\item{checkpointInterval}{number of iterations between each checkpoint (set
30
+to 0 to disable checkpoints)}
31
+
32
+\item{checkpointInFile}{if this is provided, CoGAPS runs from the checkpoint
33
+contained in this file}
34
+
35
+\item{transposeData}{T/F for transposing data while reading it in - useful
36
+for data that is stored as samples x genes since CoGAPS requires data to be
37
+genes x samples}
38
+
39
+\item{...}{allows for overwriting parameters in params}
40
+}
41
+\value{
42
+CogapsResult object
43
+}
44
+\description{
45
+wrapper around single-cell distributed algorithm for CoGAPS
46
+}
0 47
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsParams.R, R/methods-CogapsParams.R
3
+\docType{methods}
4
+\name{setDistributedParams}
5
+\alias{setDistributedParams}
6
+\alias{setDistributedParams,CogapsParams-method}
7
+\alias{setDistributedParams}
8
+\title{set the value of parameters for distributed CoGAPS}
9
+\usage{
10
+setDistributedParams(object, cut = NULL, minNS = NULL, maxNS = NULL)
11
+
12
+\S4method{setDistributedParams}{CogapsParams}(object, cut = NULL,
13
+  minNS = NULL, maxNS = NULL)
14
+}
15
+\arguments{
16
+\item{object}{an object of type CogapsParams}
17
+
18
+\item{cut}{a distributed CoGAPS parameter}
19
+
20
+\item{minNS}{a distributed CoGAPS parameter}
21
+
22
+\item{maxNS}{a distributed CoGAPS parameter}
23
+}
24
+\value{
25
+the modified params object
26
+}
27
+\description{
28
+these parameters  are interrelated so they must be set together
29
+}
30
+\examples{
31
+ params <- new("CogapsParams")
32
+ params <- setDistributedParams(3, 2, 4)
33
+}
0 34
new file mode 100644
... ...
@@ -0,0 +1,17 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{supported}
4
+\alias{supported}
5
+\title{Checks if file is supported}
6
+\usage{
7
+supported(file)
8
+}
9
+\arguments{
10
+\item{file}{path to file}
11
+}
12
+\value{
13
+TRUE if file is supported, FALSE if not
14
+}
15
+\description{
16
+Checks if file is supported
17
+}
... ...
@@ -150,7 +150,6 @@ void AtomicDomain::erase(uint64_t pos)
150 150
 void AtomicDomain::cacheInsert(uint64_t pos, float mass) const
151 151
 {
152 152
     unsigned ndx = 0;
153
-
154 153
     #pragma omp critical(atomicInsert)
155 154
     {
156 155
         ndx = mInsertCacheIndex++;
... ...
@@ -161,7 +160,6 @@ void AtomicDomain::cacheInsert(uint64_t pos, float mass) const
161 160
 void AtomicDomain::cacheErase(uint64_t pos) const
162 161
 {
163 162
     unsigned ndx = 0;
164
-
165 163
     #pragma omp critical(atomicErase)
166 164
     {
167 165
         ndx = mEraseCacheIndex++;
... ...
@@ -2,22 +2,11 @@
2 2
 #include "math/SIMD.h"
3 3
 
4 4
 #include <Rcpp.h>
5
-
6 5
 #include <string>
7 6
 
8 7
 // these are helper functions for converting matrix/vector types
9 8
 // to and from R objects
10 9
 
11
-static std::vector<unsigned> convertRVec(const Rcpp::NumericVector &rvec)
12
-{
13
-    std::vector<unsigned> vec;
14
-    for (unsigned i = 0; i < rvec.size(); ++i)
15
-    {
16
-        vec.push_back(rvec[i]);
17
-    }
18
-    return vec;
19
-}
20
-
21 10
 static Matrix convertRMatrix(const Rcpp::NumericMatrix &rmat)
22 11
 {
23 12
     Matrix mat(rmat.nrow(), rmat.ncol());
... ...
@@ -45,75 +34,101 @@ static Rcpp::NumericMatrix createRMatrix(const Matrix &mat)
45 34
     return rmat;
46 35
 }
47 36
 
48
-// this provides a standard way for communicating which parameters
49
-// are null between R and C++
37
+// these helper functions provide an abtracted way for communicating which
38
+// parameters are null between R and C++
39
+
40
+static bool isNull(const std::string &file)
41
+{
42
+    return file.empty();
43
+}
50 44
 
51 45
 static bool isNull(const Matrix &mat)
52 46
 {
53 47
     return mat.nRow() == 1 && mat.nCol() == 1;
54 48
 }
55 49
 
56
-static bool isNull(const Rcpp::NumericMatrix &mat)
50
+// needed to create proper size of GapsRunner
51
+unsigned getNumPatterns(const Rcpp::List &allParams)
57 52
 {
58
-    return mat.nrow() == 1 && mat.ncol() == 1;
53
+    const Rcpp::S4 &gapsParams(allParams["gaps"]);
54
+    unsigned nPatterns = gapsParams.slot("nPatterns");
55
+    if (!Rf_isNull(allParams["checkpointInFile"]))
56
+    {
57
+        std::string file = Rcpp::as<std::string>(allParams["checkpointInFile"]);
58
+        Archive ar(file, ARCHIVE_READ);
59
+        gaps::random::load(ar);
60
+        ar >> nPatterns;
61
+        ar.close();
62
+    }
59 63
 }
60 64
 
61
-static bool isNull(const Rcpp::NumericVector &vec)
65
+std::vector<unsigned> getSubsetIndices(const Rcpp::Nullable<Rcpp::IntegerVector> &indices)
62 66
 {
63
-    return vec.size() == 1;
67
+    if (indices.isNotNull())
68
+    {
69
+        return Rcpp::as< std::vector<unsigned> >(Rcpp::IntegerVector(indices));
70
+    }
71
+    return std::vector<unsigned>(1); // interpreted as null, i.e. will be ignored
64 72
 }
65 73
 
66
-static bool isNull(const std::string &path)
74
+bool processDistributedParameters(const Rcpp::List &allParams)
67 75
 {
68
-    return path.empty();
76
+    const Rcpp::S4 &gapsParams(allParams["gaps"]);
77
+    if (!Rf_isNull(gapsParams.slot("distributed")))
78
+    {
79
+        std::string d = Rcpp::as<std::string>(gapsParams.slot("distributed"));
80
+        GAPS_ASSERT(d == "genome-wide" || d == "single-cell");
81
+        return d == "genome-wide";
82
+    }
83
+    return true;
69 84
 }
70 85
 
71 86
 // this is the main function that creates a GapsRunner and runs CoGAPS
72 87
 
73 88
 template <class DataType>
74 89
 static Rcpp::List cogapsRun(const DataType &data, const Rcpp::List &allParams,
75
-const DataType &uncertainty, const Rcpp::NumericVector &indices,
76
-const Rcpp::NumericMatrix &fixedMatrix)
90
+const DataType &uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector> &indices,
91
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix)
77 92
 {
78
-    // convert string parameters
79
-    Rcpp::S4 gapsParams = allParams["gaps"];
80
-    std::string checkpointInFile = Rcpp::as<std::string>(allParams["checkpointInFile"]);
81
-    std::string distributed = Rcpp::as<std::string>(gapsParams.slot("distributed"));
82
-    GAPS_ASSERT(distributed == "genome-wide" || distributed == "single-cell");
83
-    bool partitionRows = (distributed == "genome-wide");
84
-    
85
-    // read number of patterns from checkpoint file
86
-    unsigned nPatterns = gapsParams.slot("nPatterns");
87
-    unsigned seed = gapsParams.slot("seed"); // so we can return seed
88
-    Archive ar(checkpointInFile, ARCHIVE_READ);
89
-    if (!isNull(checkpointInFile))
90
-    {
91
-        gaps::random::load(ar);
92
-        ar >> nPatterns >> seed;
93
-    }
93
+    // calculate essential parameters needed for constructing GapsRunner
94
+    unsigned nPatterns = getNumPatterns(allParams);
95
+    bool partitionRows = processDistributedParameters(allParams);
96
+    std::vector<unsigned> cIndices(getSubsetIndices(indices));
94 97
 
95 98
     // construct GapsRunner
96
-    GapsRunner runner(data, allParams["transposeData"], nPatterns, seed,
97
-        partitionRows, convertRVec(indices));
99
+    GapsRunner runner(data, allParams["transposeData"], nPatterns,
100
+        partitionRows, cIndices);
98 101
 
102
+    // set uncertainty
103
+    if (!isNull(uncertainty))
104
+    {
105
+        runner.setUncertainty(uncertainty, allParams["transposeData"],
106
+            partitionRows, cIndices);
107
+    }
108
+    
99 109
     // populate GapsRunner from checkpoint file
100
-    if (!isNull(checkpointInFile))
110
+    if (!Rf_isNull(allParams["checkpointInFile"]))
101 111
     {
112
+        std::string file = Rcpp::as<std::string>(allParams["checkpointInFile"]);
113
+        Archive ar(file, ARCHIVE_READ);
114
+        gaps::random::load(ar);
102 115
         ar >> runner;
103 116
         ar.close();
104 117
     }
105
-    else
118
+    else // no checkpoint, populate from given parameters
106 119
     {
107 120
         // set fixed matrix
108
-        if (!isNull(fixedMatrix))
121
+        if (fixedMatrix.isNotNull());
109 122
         {
123
+            GAPS_ASSERT(!Rf_isNull(allParams["whichMatrixFixed"]));
110 124
             std::string which = Rcpp::as<std::string>(allParams["whichMatrixFixed"]);
111
-            GAPS_ASSERT(!isNull(which));
112
-            runner.setFixedMatrix(which[0], convertRMatrix(fixedMatrix));
125
+            runner.setFixedMatrix(which[0], convertRMatrix(Rcpp::NumericMatrix(fixedMatrix)));
113 126
         }
114 127
 
115
-        // set parameters that would be saved in the checkpoint 
116
-        gaps::random::setSeed(seed);
128
+        // set parameters that would be saved in the checkpoint
129
+        const Rcpp::S4 &gapsParams(allParams["gaps"]);
130
+        gaps::random::setSeed(gapsParams.slot("seed"));
131
+        runner.recordSeed(gapsParams.slot("seed"));
117 132
         runner.setMaxIterations(gapsParams.slot("nIterations"));
118 133
         runner.setSparsity(gapsParams.slot("alphaA"),
119 134
             gapsParams.slot("alphaP"), gapsParams.slot("singleCell"));
... ...
@@ -121,13 +136,6 @@ const Rcpp::NumericMatrix &fixedMatrix)
121 136
             gapsParams.slot("maxGibbsMassP"));
122 137
     }
123 138
 
124
-    // set uncertainty
125
-    if (!isNull(uncertainty))
126
-    {
127
-        runner.setUncertainty(uncertainty, allParams["transposeData"],
128
-            partitionRows, convertRVec(indices));
129
-    }
130
-    
131 139
     // set parameters that aren't saved in the checkpoint
132 140
     runner.setMaxThreads(allParams["nThreads"]);
133 141
     runner.setPrintMessages(allParams["messages"]);
... ...
@@ -137,13 +145,12 @@ const Rcpp::NumericMatrix &fixedMatrix)
137 145
 
138 146
     // run cogaps and return the GapsResult in an R list
139 147
     GapsResult result(runner.run());
140
-    GAPS_ASSERT(result.meanChiSq > 0.f);
141 148
     return Rcpp::List::create(
142 149
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
143 150
         Rcpp::Named("Pmean") = createRMatrix(result.Pmean),
144 151
         Rcpp::Named("Asd") = createRMatrix(result.Asd),
145 152
         Rcpp::Named("Psd") = createRMatrix(result.Psd),
146
-        Rcpp::Named("seed") = seed,
153
+        Rcpp::Named("seed") = runner.getSeed(),
147 154
         Rcpp::Named("meanChiSq") = result.meanChiSq,
148 155
         Rcpp::Named("diagnostics") = Rcpp::List::create()
149 156
     );
... ...
@@ -152,24 +159,35 @@ const Rcpp::NumericMatrix &fixedMatrix)
152 159
 // these are the functions exposed to the R package
153 160
 
154 161
 // [[Rcpp::export]]
155
-Rcpp::List cogaps_cpp_from_file(const std::string &data,
162
+Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector &data,
156 163
 const Rcpp::List &allParams,
157
-const std::string &uncertainty,
158
-const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
159
-const Rcpp::NumericMatrix &fixedMatrix=Rcpp::NumericMatrix(1,1))
164
+const Rcpp::Nullable<Rcpp::CharacterVector> &uncertainty,
165
+const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
166
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
160 167
 {
161
-    return cogapsRun(data, allParams, uncertainty, indices, fixedMatrix);
168
+    std::string unc = ""; // interpreted as null, i.e. will be ignored
169
+    if (uncertainty.isNotNull())
170
+    {
171
+        unc = Rcpp::as<std::string>(Rcpp::CharacterVector(uncertainty));
172
+    }
173
+
174
+    return cogapsRun(Rcpp::as<std::string>(data), allParams, unc, indices, fixedMatrix);
162 175
 }
163 176
 
164 177
 // [[Rcpp::export]]
165 178
 Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &data,
166 179
 const Rcpp::List &allParams,
167
-const Rcpp::NumericMatrix &uncertainty,
168
-const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
169
-const Rcpp::NumericMatrix &fixedMatrix=Rcpp::NumericMatrix(1,1))
180
+const Rcpp::Nullable<Rcpp::NumericMatrix> &uncertainty,
181
+const Rcpp::Nullable<Rcpp::IntegerVector> &indices=R_NilValue,
182
+const Rcpp::Nullable<Rcpp::NumericMatrix> &fixedMatrix=R_NilValue)
170 183
 {
171
-    return cogapsRun(convertRMatrix(data), allParams,
172
-        convertRMatrix(uncertainty), indices, fixedMatrix);
184
+    Matrix unc(1,1); // interpreted as null, i.e. will be ignored
185
+    if (uncertainty.isNotNull())
186
+    {
187
+        unc = convertRMatrix(Rcpp::NumericMatrix(uncertainty));
188
+    }
189
+
190
+    return cogapsRun(convertRMatrix(data), allParams, unc, indices, fixedMatrix);
173 191
 }
174 192
 
175 193
 // [[Rcpp::export]]
... ...
@@ -25,6 +25,16 @@ void GapsRunner::setFixedMatrix(char which, const Matrix &mat)
25 25
     }
26 26
 }
27 27
 
28
+void GapsRunner::recordSeed(uint32_t seed)
29
+{
30
+    mSeed = seed;
31
+}
32
+
33
+uint32_t GapsRunner::getSeed() const
34
+{
35
+    return mSeed;
36
+}
37
+
28 38
 void GapsRunner::setMaxIterations(unsigned nIterations)
29 39
 {
30 40
     mMaxIterations = nIterations;
... ...
@@ -69,6 +79,7 @@ void GapsRunner::setCheckpointInterval(unsigned interval)
69 79
 
70 80
 GapsResult GapsRunner::run()
71 81
 {
82
+#if 0
72 83
     mStartTime = bpt_now();
73 84
 
74 85
     // calculate appropiate number of threads if compiled with openmp
... ...
@@ -103,7 +114,7 @@ GapsResult GapsRunner::run()
103 114
             runOnePhase();
104 115
             break;
105 116
     }
106
-
117
+#endif
107 118
     GapsResult result(mStatistics);
108 119
     result.meanChiSq = mStatistics.meanChiSq(mASampler);
109 120
     return result;    
... ...
@@ -210,8 +221,8 @@ void GapsRunner::createCheckpoint()
210 221
 // assume random state has been loaded and nPatterns and seed have been read
211 222
 Archive& operator>>(Archive &ar, GapsRunner &gr)
212 223
 {
213
-    ar >> gr.mASampler >> gr.mPSampler >> gr.mStatistics >> gr.mFixedMatrix
214
-        >> gr.mMaxIterations >> gr.mPhase >> gr.mCurrentIteration
215
-        >> gr.mNumUpdatesA >> gr.mNumUpdatesP;
224
+    ar >> gr.mNumPatterns >> gr.mSeed >> gr.mASampler >> gr.mPSampler
225
+        >> gr.mStatistics >> gr.mFixedMatrix >> gr.mMaxIterations >> gr.mPhase
226
+        >> gr.mCurrentIteration >> gr.mNumUpdatesA >> gr.mNumUpdatesP;
216 227
     return ar;
217 228
 }
218 229
\ No newline at end of file
... ...
@@ -68,8 +68,7 @@ public:
68 68
 
69 69
     template <class DataType>
70 70
     GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
71
-        uint32_t seed, bool partitionRows,
72
-        const std::vector<unsigned> &indices);
71
+        bool partitionRows, const std::vector<unsigned> &indices);
73 72
 
74 73
     template <class DataType>
75 74
     void setUncertainty(const DataType &unc, bool transposeData,
... ...
@@ -77,6 +76,9 @@ public:
77 76
 
78 77
     void setFixedMatrix(char which, const Matrix &mat);
79 78
 
79
+    void recordSeed(uint32_t seed);
80
+    uint32_t getSeed() const;
81
+
80 82
     void setMaxIterations(unsigned nIterations);
81 83
     void setSparsity(float alphaA, float alphaP, bool singleCell);
82 84
     void setMaxGibbsMass(float maxA, float maxP);
... ...
@@ -95,7 +97,7 @@ public:
95 97
 
96 98
 // problem with passing file parser - need to read it twice
97 99
 template <class DataType>
98
-GapsRunner::GapsRunner(const DataType &data, bool transposeData, uint32_t seed,
100
+GapsRunner::GapsRunner(const DataType &data, bool transposeData,
99 101
 unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
100 102
     :
101 103
 mASampler(data, transposeData, nPatterns, partitionRows, indices),
... ...
@@ -104,7 +106,7 @@ mStatistics(mASampler.dataRows(), mASampler.dataCols(), nPatterns),
104 106
 mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
105 107
 mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
106 108
 mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
107
-mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0)
109
+mNumPatterns(nPatterns), mSeed(0), mNumUpdatesA(0), mNumUpdatesP(0)
108 110
 {
109 111
     mASampler.sync(mPSampler);
110 112
     mPSampler.sync(mASampler);
... ...
@@ -6,31 +6,31 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps_cpp_from_file
9
-Rcpp::List cogaps_cpp_from_file(const std::string& data, const Rcpp::List& allParams, const std::string& uncertainty, const Rcpp::NumericVector& indices, const Rcpp::NumericMatrix& fixedMatrix);
9
+Rcpp::List cogaps_cpp_from_file(const Rcpp::CharacterVector& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::CharacterVector>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix);
10 10
 RcppExport SEXP _CoGAPS_cogaps_cpp_from_file(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject rcpp_result_gen;
13 13
     Rcpp::RNGScope rcpp_rngScope_gen;
14
-    Rcpp::traits::input_parameter< const std::string& >::type data(dataSEXP);
14
+    Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type data(dataSEXP);
15 15
     Rcpp::traits::input_parameter< const Rcpp::List& >::type allParams(allParamsSEXP);
16
-    Rcpp::traits::input_parameter< const std::string& >::type uncertainty(uncertaintySEXP);
17
-    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type indices(indicesSEXP);
18
-    Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type fixedMatrix(fixedMatrixSEXP);
16
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::CharacterVector>& >::type uncertainty(uncertaintySEXP);
17
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector>& >::type indices(indicesSEXP);
18
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type fixedMatrix(fixedMatrixSEXP);
19 19
     rcpp_result_gen = Rcpp::wrap(cogaps_cpp_from_file(data, allParams, uncertainty, indices, fixedMatrix));
20 20
     return rcpp_result_gen;
21 21
 END_RCPP
22 22
 }
23 23
 // cogaps_cpp
24
-Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix& data, const Rcpp::List& allParams, const Rcpp::NumericMatrix& uncertainty, const Rcpp::NumericVector& indices, const Rcpp::NumericMatrix& fixedMatrix);
24
+Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix& data, const Rcpp::List& allParams, const Rcpp::Nullable<Rcpp::NumericMatrix>& uncertainty, const Rcpp::Nullable<Rcpp::IntegerVector>& indices, const Rcpp::Nullable<Rcpp::NumericMatrix>& fixedMatrix);
25 25
 RcppExport SEXP _CoGAPS_cogaps_cpp(SEXP dataSEXP, SEXP allParamsSEXP, SEXP uncertaintySEXP, SEXP indicesSEXP, SEXP fixedMatrixSEXP) {
26 26
 BEGIN_RCPP
27 27
     Rcpp::RObject rcpp_result_gen;
28 28
     Rcpp::RNGScope rcpp_rngScope_gen;
29 29
     Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type data(dataSEXP);
30 30
     Rcpp::traits::input_parameter< const Rcpp::List& >::type allParams(allParamsSEXP);
31
-    Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type uncertainty(uncertaintySEXP);
32
-    Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type indices(indicesSEXP);
33
-    Rcpp::traits::input_parameter< const Rcpp::NumericMatrix& >::type fixedMatrix(fixedMatrixSEXP);
31
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type uncertainty(uncertaintySEXP);
32
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector>& >::type indices(indicesSEXP);
33
+    Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type fixedMatrix(fixedMatrixSEXP);
34 34
     rcpp_result_gen = Rcpp::wrap(cogaps_cpp(data, allParams, uncertainty, indices, fixedMatrix));
35 35
     return rcpp_result_gen;
36 36
 END_RCPP
... ...
@@ -4,32 +4,3 @@
4 4
 
5 5
 #define MAT_SUM(nR, nC) ((nR + nC - 2) * nR * nC / 2.f)
6 6
 
7
-TEST_CASE("Test Algorithms.h")
8
-{
9
-    unsigned nrow = 25;
10
-    unsigned ncol = 20;
11
-    unsigned nfactor = 7;
12
-    ColMatrix A(nrow, nfactor);
13
-    RowMatrix P(nfactor, ncol);
14
-
15
-    for (unsigned i = 0; i < nrow; ++i)
16
-    {
17
-        for (unsigned j = 0; j < ncol; ++j)
18
-        {
19
-            for (unsigned k = 1; k < nfactor; ++k)
20
-            {
21
-                A(i,k) = i + k;
22
-                P(k,j) = k + j;
23
-            }
24
-        }
25
-    }
26
-
27
-    SECTION("is row/col zero")
28
-    {
29
-        REQUIRE(gaps::algo::isVectorZero(P.rowPtr(0), P.nCol()));
30
-        REQUIRE(!gaps::algo::isVectorZero(P.rowPtr(1), P.nCol()));
31
-        
32
-        REQUIRE(gaps::algo::isVectorZero(A.colPtr(0), A.nRow()));
33
-        REQUIRE(!gaps::algo::isVectorZero(A.colPtr(1), A.nRow()));
34
-    }
35
-}
... ...
@@ -31,7 +31,8 @@ TEST_CASE("Test Parsers")
31 31
 
32 32
             ++count;
33 33
             ++col;
34
-            if (col == 9) {
34
+            if (col == 9)
35
+            {
35 36
                 ++row;
36 37
                 col = 0;
37 38
             }
... ...
@@ -56,7 +57,8 @@ TEST_CASE("Test Parsers")
56 57
 
57 58
             ++count;
58 59
             ++col;
59
-            if (col == 9) {
60
+            if (col == 9)
61
+            {
60 62
                 ++row;
61 63
                 col = 0;
62 64
             }
... ...
@@ -81,4 +83,4 @@ TEST_CASE("Test Parsers")
81 83
         }
82 84
         REQUIRE(count == 12267);
83 85
     }
84
-}
86
+}
85 87
\ No newline at end of file
... ...
@@ -3,25 +3,128 @@
3 3
 #include "../file_parser/CsvParser.h"
4 4
 #include "../file_parser/TsvParser.h"
5 5
 #include "../file_parser/MtxParser.h"
6
+#include "../math/Algorithms.h"
6 7
 
7
-#if 0
8
+static std::vector<unsigned> sequentialVector(unsigned n)
9
+{
10
+    std::vector<unsigned> vec;
11
+    for (unsigned i = 0; i < n; ++i)
12
+    {
13
+        vec.push_back(i);
14
+    }
15
+    return vec;
16
+}
17
+
18
+template <class DataType>
19
+static void testFullConstructor(unsigned nr, unsigned nc, float expectedSum,
20
+const DataType &data, bool transpose=false, bool partitionRows=false,
21
+const std::vector<unsigned> &indices=std::vector<unsigned>(1))
22
+{
23
+    RowMatrix rm(data, transpose, partitionRows, indices);
24
+    ColMatrix cm(data, transpose, partitionRows, indices);
25
+
26
+    REQUIRE(rm.nRow() == nr);
27
+    REQUIRE(rm.nCol() == nc);
28
+    REQUIRE(cm.nRow() == nr);
29
+    REQUIRE(cm.nCol() == nc);
30
+
31
+    REQUIRE(expectedSum == gaps::algo::sum(rm));
32
+    REQUIRE(expectedSum == gaps::algo::sum(cm));
33
+}
34
+
35
+template <class DataType>
36
+static void testAllConstructorSituations(const DataType &data)
37
+{
38
+    // No Transpose, No Subset
39
+    testFullConstructor(0.f, 10, 25, data, false);
40
+
41
+    // Transpose, No Subset
42
+    testFullConstructor(0.f, 25, 10, data, true);
43
+
44
+    // No Transpose, Subset Rows
45
+    testFullConstructor(0.f, 5, 25, data, false, true, sequentialVector(5))
46
+
47
+    // Transpose, Subset Rows
48
+    testFullConstructor(0.f, 5, 10, data, true, true, sequentialVector(5))
49
+
50
+    // No Transpose, Subset Columns
51
+    testFullConstructor(0.f, 10, 5, data, false, false, sequentialVector(5))
52
+
53
+    // Transpose, Subset Columns
54
+    testFullConstructor(0.f, 25, 5, data, true, false, sequentialVector(5))
55
+}
8 56
 
9 57
 TEST_CASE("Test Matrix.h")
10 58
 {
11
-    SECTION("Matrix/Vector Initialization")
59
+    SECTION("Default Construction")
12 60
     {
13
-        Vector v(10);
14 61
         RowMatrix rm(10, 25);
15 62
         ColMatrix cm(10, 25);
16 63
 
17
-        REQUIRE(v.size() == 10);
18 64
         REQUIRE(rm.nRow() == 10);
19 65
         REQUIRE(rm.nCol() == 25);
20 66
         REQUIRE(cm.nRow() == 10);
21 67
         REQUIRE(cm.nCol() == 25);
22 68
     }
69
+
70
+    SECTION("Copy Construction")
71
+    {
72
+        RowMatrix rm1(10, 25);
73
+        ColMatrix cm1(rm1);
74
+
75
+        REQUIRE(cm1.nRow() == 10);
76
+        REQUIRE(cm1.nCol() == 25);
77
+
78
+        RowMatrix rm2(10, 25);
79
+        ColMatrix cm2(rm1);
80
+
81
+        REQUIRE(rm2.nRow() == 10);
82
+        REQUIRE(rm2.nCol() == 25);
83
+    }
84
+
85
+    Matrix ref(10, 25);
86
+    for (unsigned i = 0; i < ref.nRow(); ++i)
87
+    {
88
+        for (unsigned j = 0; j < ref.nCol(); ++j)
89
+        {
90
+            ref(i,j) = i + j;
91
+        }
92
+    }
93
+
94
+    testAllConstructorSituations(ref);
95
+    testAllConstructorSituations(ref);
96
+    testAllConstructorSituations(ref);
97
+    testAllConstructorSituations(ref);    
98
+
99
+
100
+    SECTION("Construct from File - No Subset")
101
+    {
102
+
103
+    }
104
+
105
+    SECTION("Construct from File - Subset")
106
+    {
107
+
108
+    }
109
+
110
+    SECTION("Assignment")
111
+    {
112
+
113
+    }
114
+
115
+    SECTION("Get Row/Col")
116
+    {
117
+
118
+    }
119
+
120
+    SECTION("arithmetic")
121
+    {
122
+
123
+    }
23 124
 }
24 125
 
126
+#if 0
127
+
25 128
 static void populateSequential(std::vector<unsigned> &vec, unsigned n)
26 129
 {
27 130
     for (unsigned i = 0; i < n; ++i)
... ...
@@ -109,7 +212,6 @@ TEST_CASE("Test Matrix Construction from file")
109 212
     testMatrixConstruction(tsvPath);
110 213
     testMatrixConstruction(mtxPath);
111 214
 }
112
-
113 215
 #endif
114 216
 
115 217
 #endif
116 218
\ No newline at end of file
... ...
@@ -4,6 +4,8 @@
4 4
 #include "../GibbsSampler.h"
5 5
 #include "../math/Random.h"
6 6
 
7
+#if 0
8
+
7 9
 TEST_CASE("Test Archive.h")
8 10
 {
9 11
     SECTION("Reading/Writing to an Archive")
... ...
@@ -164,3 +166,5 @@ TEST_CASE("Test Archive.h")
164 166
         }
165 167
     }
166 168
 }
169
+
170
+#endif
167 171
\ No newline at end of file
168 172
new file mode 100644
... ...
@@ -0,0 +1,23 @@
1
+#include "catch.h"
2
+#include "../data_structures/Vector.h"
3
+
4
+TEST_CASE("Test Vector.h")
5
+{
6
+    Vector v1(100);
7
+    Vector v2(std::vector<float>(100.f, 4));
8
+
9
+    SECTION("Test Construction")
10
+    {
11
+        REQUIRE(v1.size() == 100);
12
+        REQUIRE(v1[0] == 0.f);
13
+
14
+        REQUIRE(v2.size() == 4);
15
+        REQUIRE(v2[0] == 100.f);
16
+    }
17
+    
18
+    SECTION("Test Concatenation")
19
+    {
20
+        v1.concat(v2);
21
+        REUQIRE(v1.size() == 104);
22
+    }
23
+}
0 24
\ No newline at end of file
... ...
@@ -6,15 +6,50 @@
6 6
 
7 7
 #include <string>
8 8
 
9
-enum GapsFileType
9
+AbstractFileParser* AbstractFileParser::create(const std::string &path)
10
+{
11
+    switch (FileParser::fileType(path))
12
+    {
13
+        case GAPS_MTX: return new MtxParser(path);
14
+        case GAPS_CSV: return new CsvParser(path);
15
+        case GAPS_TSV: return new TsvParser(path);
16
+        default: GAPS_ERROR("Invalid file type\n");
17
+    }
18
+}
19
+
20
+AbstractFileParser::~AbstractFileParser() {}
21
+
22
+FileParser::FileParser(const std::string &path)
23
+{
24
+    mParser = AbstractFileParser::create(path);
25
+}
26
+
27
+FileParser::~FileParser()
28
+{
29
+    delete mParser;
30
+}
31
+
32
+unsigned FileParser::nRow() const
33
+{
34
+    return mParser->nRow();
35
+}
36
+
37
+unsigned FileParser::nCol() const
38
+{
39
+    return mParser->nCol();
40
+}
41
+
42
+bool FileParser::hasNext()
43
+{
44
+    return mParser->hasNext();
45
+}
46
+
47
+MatrixElement FileParser::getNext()
10 48
 {
11
-    GAPS_MTX,
12
-    GAPS_CSV,
13
-    GAPS_TSV,
14
-    GAPS_INVALID_FILE_TYPE
15
-};
49
+    return mParser->getNext();
50
+}
16 51
 
17
-static GapsFileType fileType(const std::string &path)
52
+GapsFileType FileParser::fileType(const std::string &path)
18 53
 {
19 54
     std::size_t pos = path.find_last_of('.');
20 55
     std::string ext = path.substr(pos);
... ...
@@ -26,16 +61,3 @@ static GapsFileType fileType(const std::string &path)
26 61
 
27 62
     return GAPS_INVALID_FILE_TYPE;
28 63
 }
29
-
30
-AbstractFileParser* AbstractFileParser::create(const std::string &path)
31
-{
32
-    switch (fileType(path))
33
-    {
34
-        case GAPS_MTX: return new MtxParser(path);
35
-        case GAPS_CSV: return new CsvParser(path);
36
-        case GAPS_TSV: return new TsvParser(path);
37
-        default: GAPS_ERROR("Invalid file type\n");
38
-    }
39
-}
40
-
41
-AbstractFileParser::~AbstractFileParser() {}
42 64
\ No newline at end of file
... ...
@@ -3,6 +3,14 @@
3 3
 
4 4
 #include "MatrixElement.h"
5 5
 
6
+enum GapsFileType
7
+{
8
+    GAPS_MTX,
9
+    GAPS_CSV,
10
+    GAPS_TSV,
11
+    GAPS_INVALID_FILE_TYPE
12
+};
13
+
6 14
 // file parser interface
7 15
 class AbstractFileParser
8 16
 {
... ...
@@ -13,8 +21,6 @@ private:
13 21
 
14 22
 public:
15 23
 
16
-    AbstractFileParser() {}
17
-
18 24
     static AbstractFileParser* create(const std::string &path);
19 25
 
20 26
     virtual ~AbstractFileParser() = 0;
... ...
@@ -38,18 +44,105 @@ private:
38 44
 
39 45
 public:
40 46
 
41
-    explicit FileParser(const std::string &path)
47
+    explicit FileParser(const std::string &path);
48
+    ~FileParser();
49
+
50
+    unsigned nRow();
51
+    unsigned nCol();
52
+
53
+    bool hasNext();
54
+    MatrixElement getNext();
55
+
56
+    static GapsFileType fileType(const std::string &path);
57
+
58
+    template <class MatrixType>
59
+    static void writeToTsv(const std::string &path, const MatrixType &mat);
60
+
61
+    template <class MatrixType>
62
+    static void writeToCsv(const std::string &path, const MatrixType &mat);
63
+
64
+    template <class MatrixType>
65
+    static void writeToMtx(const std::string &path, const MatrixType &mat);
66
+};
67
+
68
+// temporary solution - should be moved into specific file parsers, ok for now
69
+// since writing to file not exposed to user, only used for internal testing
70
+
71
+template <class MatrixType>
72
+void FileParser::writeToTsv(const std::string &path, const MatrixType &mat)
73
+{
74
+    std::ofstream outputFile;
75
+    outputFile.open(path.c_str());
76
+    outputFile << "\"\"";
77
+
78
+    // write column names
79
+    for (unsigned i = 0; i < mat.nCol(); ++i)
42 80
     {
43
-        mParser = AbstractFileParser::create(path);
81
+        outputFile << "\t\"Col" << i << "\"";
44 82
     }
45 83
 
46
-    ~FileParser() { delete mParser; }
84
+    for (unsigned i = 0; i < mat.nRow(); ++i)
85
+    {
86
+        // write row names
87
+        outputFile << "\"Row" << i << "\"";
88
+        
89
+        // write data
90
+        for (unsigned j = 0; j < mat.nCol(); ++j)
91
+        {
92
+            outputFile << "\t" << mat(i,j);
93
+        }
94
+        outputFile << "\n";
95
+    }
96
+    outputFile.close();
97
+}
47 98
 
48
-    unsigned nRow() const { return mParser->nRow(); }
49
-    unsigned nCol() const { return mParser->nCol(); }
99
+template <class MatrixType>
100
+void FileParser::writeToCsv(const std::string &path, const MatrixType &mat)
101
+{
102
+    std::ofstream outputFile;
103
+    outputFile.open(path.c_str());
104
+    outputFile << "\"\"";
50 105
 
51
-    bool hasNext() { return mParser->hasNext(); }
52
-    MatrixElement getNext() { return mParser->getNext(); }
53
-};
106
+    // write column names
107
+    for (unsigned i = 0; i < mat.nCol(); ++i)
108
+    {
109
+        outputFile << ",\"Col" << i << "\"";
110
+    }
111
+
112
+    for (unsigned i = 0; i < mat.nRow(); ++i)
113
+    {
114
+        // write row names
115
+        outputFile << "\"Row" << i << "\"";
116
+        
117
+        // write data
118
+        for (unsigned j = 0; j < mat.nCol(); ++j)
119
+        {
120
+            outputFile << "," << mat(i,j);
121
+        }
122
+        outputFile << "\n";
123
+    }
124
+    outputFile.close();
125
+}
126
+
127
+template <class MatrixType>
128
+void FileParser::writeToMtx(const std::string &path, const MatrixType &mat)
129
+{
130
+    std::ofstream outputFile;
131
+    outputFile.open(path.c_str());
132
+    outputFile << "%%\n";
133
+    outputFile << mat.nRow() << " " << mat.nCol() << " " << mat.nRow() * mat.nCol();
134
+    outputFile << "\n";
135
+    for (unsigned j = 0; j < mat.nRow(); ++j)
136
+    {
137
+        for (unsigned i = 0; i < mat.nCol(); ++i)
138
+        {
139
+            if (mat(i,j) > 0.f)
140
+            {
141
+                outputFile << i + 1 << " " << j + 1 << " " << mat(i,j) << "\n";
142
+            }
143
+        }
144
+    }
145
+    outputFile.close();
146
+}
54 147
 
55 148
 #endif
56 149
\ No newline at end of file
... ...
@@ -2,8 +2,8 @@ context("GAPS")
2 2
 
3 3
 test_that("GAPS Simple Simulation",
4 4
 {
5
-    data(SimpSim)
6
-    nIter <- 1000
5
+    #data(SimpSim)
6
+    #nIter <- 1000
7 7
     #res <- gapsRun(SimpSim.D, SimpSim.S, nFactor=3, messages=FALSE)
8 8
 
9 9
     #expect_true(!is.na(res$meanChi2))