Browse code

release version bump: merge

Tom Sherman authored on 23/02/2019 17:13:39
Showing 37 changed files

... ...
@@ -1,3 +1,38 @@
1
+version: 3.3.40
2
+date: 2019-2-22
3
+author: Tom Sherman <tomsherman159@gmail.com>
4
+
5
+    - more sensible default parameters
6
+    - fixed critical bug introduced in 3.3.37 where fixedPatterns was
7
+        ignored in the second phase of distributed runs
8
+
9
+version: 3.3.39
10
+date: 2019-2-20
11
+author: Tom Sherman <tomsherman159@gmail.com>
12
+
13
+    - fix bug when reading mtx and not ignoring comments
14
+    - allow setDistributedParams to be called to reset params
15
+
16
+version: 3.3.38
17
+date: 2019-2-18
18
+author: Tom Sherman <tomsherman159@gmail.com>
19
+
20
+    - print message when each worker starts
21
+
22
+version: 3.3.37
23
+date: 2019-2-18
24
+author: Tom Sherman <tomsherman159@gmail.com>
25
+
26
+    - move all critical parameters to CogapsParams class
27
+    - allow RDS files to be passed for parameters
28
+    - runs should be reproducible now with the data file and the parameters file
29
+
30
+version: 3.3.36
31
+date: 2019-2-13
32
+author: Tom Sherman <tomsherman159@gmail.com>
33
+
34
+    - fix bug when recording initial phase of distributed cogaps
35
+
1 36
 version: 3.3.35
2 37
 date: 2019-2-12
3 38
 author: Tom Sherman <tomsherman159@gmail.com>
... ...
@@ -1,6 +1,6 @@
1 1
 Package: CoGAPS
2
-Version: 3.2.35
3
-Date: 2019-02-12
2
+Version: 3.2.40
3
+Date: 2019-02-22
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
6 6
     Genevieve Stein-O'Brien, Michael Considine, Maggie Wodicka, John Stansfield,
... ...
@@ -2,18 +2,22 @@
2 2
 
3 3
 S3method(plot,CogapsResult)
4 4
 export(CoGAPS)
5
+export(CogapsParams)
5 6
 export(GWCoGAPS)
6 7
 export(binaryA)
7 8
 export(buildReport)
8 9
 export(calcCoGAPSStat)
9 10
 export(calcZ)
10 11
 export(checkpointsEnabled)
12
+export(findConsensusMatrix)
11 13
 export(getClusteredPatterns)
12 14
 export(getCorrelationToMeanPattern)
15
+export(getFeatureLoadings)
13 16
 export(getMeanChiSq)
14 17
 export(getOriginalParameters)
15 18
 export(getParam)
16 19
 export(getRetinaSubset)
20
+export(getSampleFactors)
17 21
 export(getSubsets)
18 22
 export(getUnmatchedPatterns)
19 23
 export(getVersion)
... ...
@@ -24,6 +28,7 @@ export(reconstructGene)
24 28
 export(scCoGAPS)
25 29
 export(setAnnotationWeights)
26 30
 export(setDistributedParams)
31
+export(setFixedPatterns)
27 32
 export(setParam)
28 33
 exportClasses(CogapsParams)
29 34
 exportClasses(CogapsResult)
... ...
@@ -49,19 +49,7 @@ checkpointsEnabled <- function()
49 49
 #' @param transposeData T/F for transposing data while reading it in - useful
50 50
 #' for data that is stored as samples x genes since CoGAPS requires data to be
51 51
 #' genes x samples
52
-#' @param subsetIndices set of indices to use from the data
53
-#' @param subsetDim which dimension (1=rows, 2=cols) to subset
54 52
 #' @param BPPARAM BiocParallel backend 
55
-#' @param geneNames vector of names of genes in data
56
-#' @param sampleNames vector of names of samples in data
57
-#' @param fixedPatterns fix either 'A' or 'P' matrix to these values, in the
58
-#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
59
-#' skipped and fixedPatterns is used for all sets - allowing manual pattern
60
-#' matching, as well as fixed runs of standard CoGAPS
61
-#' @param whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed
62
-#' @param takePumpSamples whether or not to take PUMP samples
63
-#' @param outputToFile name of a file to save the output to, will create 4 files
64
-#' of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"
65 53
 #' @param workerID if calling CoGAPS in parallel the worker ID can be specified,
66 54
 #' only worker 1 prints output and each worker outputs when it finishes, this
67 55
 #' is not neccesary when using the default parallel methods (i.e. distributed
... ...
@@ -82,17 +70,18 @@ checkpointsEnabled <- function()
82 70
 #' params <- setParam(params, "nPatterns", 3)
83 71
 #' resultC <- CoGAPS(GIST.data_frame, params, nIterations=25)
84 72
 #' @importFrom methods new is
85
-#' @importFrom SummarizedExperiment assay
86
-#' @importFrom utils packageVersion
87
-CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
88
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
89
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
90
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
91
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
92
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
73
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
74
+outputFrequency=2500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
75
+checkpointInterval=0, checkpointInFile=NULL, transposeData=FALSE,
76
+BPPARAM=NULL, workerID=1, ...)
93 77
 {
94
-    # store all parameters in a list and parse parameters from ...
78
+    # pre-process inputs
79
+    data <- getValueOrRds(data)
80
+    data <- convertDataToMatrix(data)
81
+    params <- getValueOrRds(params)
95 82
     validObject(params)
83
+
84
+    # store all parameters in a list and parse parameters from ...
96 85
     allParams <- list("gaps"=params,
97 86
         "nThreads"=nThreads,
98 87
         "messages"=messages,
... ...
@@ -100,48 +89,24 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
100 89
         "checkpointOutFile"=checkpointOutFile,
101 90
         "checkpointInterval"=checkpointInterval,
102 91
         "checkpointInFile"=checkpointInFile,
92
+        "geneNames"=NULL, # the gene/sample names in the params object are kept
93
+        "sampleNames"=NULL, # as a reference, these are the values actually used
103 94
         "transposeData"=transposeData,
104
-        "subsetIndices"=subsetIndices,
105
-        "subsetDim"=subsetDim,
106 95
         "BPPARAM"=BPPARAM,
107
-        "fixedPatterns"=fixedPatterns,
108
-        "whichMatrixFixed"=whichMatrixFixed,
109
-        "takePumpSamples"=takePumpSamples,
110
-        "outputToFile"=outputToFile,
96
+        "outputToFile"=NULL,
111 97
         "workerID"=workerID
112 98
     )
113 99
     allParams <- parseExtraParams(allParams, list(...))
114
-
115
-    # if rds was passed, we first read it in before any processing
116
-    if (is(data, "character"))
117
-    {
118
-        if (tools::file_ext(data) == "rds")
119
-        {
120
-            gapsCat(allParams, "reading RDS file...")
121
-            data <- readRDS(data)
122
-            gapsCat(allParams, "done\n")
123
-        }
124
-    }
125
-
126
-    # convert data if needed
127
-    if (is(data, "data.frame"))
128
-        data <- data.matrix(data)
129
-    else if (is(data, "SummarizedExperiment"))
130
-        data <- SummarizedExperiment::assay(data, "counts")
131
-    else if (is(data, "SingleCellExperiment"))
132
-        data <- SummarizedExperiment::assay(data, "counts")
133
-
134
-    # check that inputs are valid, then read the gene/sample names from the data
100
+    allParams <- getDimNames(data, allParams)
135 101
     checkInputs(data, uncertainty, allParams)
136
-    allParams <- getNamesFromData(data, allParams, geneNames, sampleNames)
137
-   
102
+
138 103
     # check if we're running from a checkpoint
139 104
     if (!is.null(allParams$checkpointInFile))
140 105
     {
141 106
         gapsCat(allParams, "Running CoGAPS from a checkpoint\n")
142 107
     }
143 108
 
144
-    # determine which function to call cogaps algorithm
109
+    # determine function to call cogaps algorithm
145 110
     dispatchFunc <- cogaps_cpp # default
146 111
     if (!is.null(allParams$gaps@distributed))
147 112
         dispatchFunc <- distributedCogaps # genome-wide or single-cell cogaps
... ...
@@ -170,12 +135,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
170 135
 #' params <- setParam(params, "nPatterns", 3)
171 136
 #' result <- scCoGAPS(t(GIST.matrix), params, BPPARAM=BiocParallel::SerialParam())
172 137
 #' }
173
-scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
174
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
175
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
176
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
177
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
178
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
138
+scCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
139
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
140
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE,
141
+BPPARAM=NULL, workerID=1, ...)
179 142
 {
180 143
     params@distributed <- "single-cell"
181 144
     params@singleCell <- TRUE
... ...
@@ -190,15 +153,7 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
190 153
         checkpointInterval=checkpointInterval,
191 154
         checkpointInFile=checkpointInFile,
192 155
         transposeData=transposeData,
193
-        subsetIndices=subsetIndices,
194
-        subsetDim=subsetDim,
195 156
         BPPARAM=BPPARAM,
196
-        geneNames=geneNames,
197
-        sampleNames=sampleNames,
198
-        fixedPatterns=fixedPatterns,
199
-        whichMatrixFixed=whichMatrixFixed,
200
-        takePumpSamples=takePumpSamples,
201
-        outputToFile=outputToFile,
202 157
         workerID=workerID,
203 158
         ...
204 159
     )
... ...
@@ -220,12 +175,10 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
220 175
 #' params <- setParam(params, "nPatterns", 3)
221 176
 #' result <- GWCoGAPS(GIST.matrix, params, BPPARAM=BiocParallel::SerialParam())
222 177
 #' }
223
-GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1,
224
-messages=TRUE, outputFrequency=500, uncertainty=NULL,
225
-checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
226
-checkpointInFile=NULL, transposeData=FALSE, subsetIndices=NULL, subsetDim=0,
227
-BPPARAM=NULL, geneNames=NULL, sampleNames=NULL, fixedPatterns=NULL,
228
-whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
178
+GWCoGAPS <- function(data, params=new("CogapsParams"), nThreads=1, messages=TRUE,
179
+outputFrequency=500, uncertainty=NULL, checkpointOutFile="gaps_checkpoint.out",
180
+checkpointInterval=1000, checkpointInFile=NULL, transposeData=FALSE,
181
+BPPARAM=NULL, workerID=1, ...)
229 182
 {
230 183
     params@distributed <- "genome-wide"
231 184
     CoGAPS(
... ...
@@ -239,16 +192,8 @@ whichMatrixFixed='N', takePumpSamples=FALSE, outputToFile=NULL, workerID=1, ...)
239 192
         checkpointInterval=checkpointInterval,
240 193
         checkpointInFile=checkpointInFile,
241 194
         transposeData=transposeData,
242
-        subsetIndices=subsetIndices,
243
-        subsetDim=subsetDim,
244 195
         BPPARAM=BPPARAM,
245
-        geneNames=geneNames,
246
-        sampleNames=sampleNames,
247
-        fixedPatterns=fixedPatterns,
248
-        whichMatrixFixed=whichMatrixFixed,
249
-        takePumpSamples=takePumpSamples,
250
-        outputToFile=outputToFile,
251 196
         workerID=workerID,
252 197
         ...
253 198
     )
254
-}
255 199
\ No newline at end of file
200
+}
... ...
@@ -26,8 +26,8 @@ workerID)
26 26
     else
27 27
         allParams$sampleNames <- allParams$sampleNames[subsetIndices]
28 28
 
29
-    allParams$subsetIndices <- subsetIndices
30
-    allParams$subsetDim <- ifelse(genomeWide, 1, 2)
29
+    allParams$gaps@subsetIndices <- subsetIndices
30
+    allParams$gaps@subsetDim <- ifelse(genomeWide, 1, 2)
31 31
     allParams$workerID <- workerID
32 32
 
33 33
     # call CoGAPS
... ...
@@ -59,7 +59,7 @@ distributedCogaps <- function(data, allParams, uncertainty)
59 59
         allParams$BPPARAM <- BiocParallel::MulticoreParam(workers=length(sets))
60 60
     
61 61
     initialResult <- NULL
62
-    if (is.null(allParams$fixedPatterns))
62
+    if (is.null(allParams$gaps@fixedPatterns))
63 63
     {
64 64
         # run Cogaps normally on each subset of the data
65 65
         gapsCat(allParams, "Running Across Subsets...\n\n")
... ...
@@ -76,17 +76,17 @@ distributedCogaps <- function(data, allParams, uncertainty)
76 76
 
77 77
         # match patterns in either A or P matrix
78 78
         gapsCat(allParams, "\nMatching Patterns Across Subsets...\n")
79
-        matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams)
79
+        matchedPatterns <- findConsensusMatrix(unmatchedPatterns, allParams$gaps)
80 80
     }
81 81
     else
82 82
     {
83
-        matchedPatterns <- list(consensus=allParams$fixedPatterns)
83
+        matchedPatterns <- list(consensus=allParams$gaps@fixedPatterns)
84 84
     }
85 85
 
86 86
     # set fixed matrix
87 87
     allParams$gaps@nPatterns <- ncol(matchedPatterns$consensus)
88
-    allParams$fixedPatterns <- matchedPatterns$consensus
89
-    allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
88
+    allParams$gaps@fixedPatterns <- matchedPatterns$consensus
89
+    allParams$gaps@whichMatrixFixed <- ifelse(allParams$gaps@distributed
90 90
         == "genome-wide", "P", "A")
91 91
         
92 92
     # run final phase with fixed matrix
... ...
@@ -99,8 +99,8 @@ distributedCogaps <- function(data, allParams, uncertainty)
99 99
     # concatenate final result
100 100
     fullResult <- stitchTogether(finalResult, allParams)
101 101
 
102
-    # add diagnostic information before returning
103
-    if (allParams$whichMatrixFixed == 'N') # no manual pattern matching
102
+    # add diagnostic information about initial run before returning
103
+    if (!is.null(initialResult)) # check that initial phase was run
104 104
     {
105 105
         fullResult$diagnostics$firstPass <- initialResult
106 106
         fullResult$diagnostics$unmatchedPatterns <- unmatchedPatterns
... ...
@@ -120,31 +120,31 @@ distributedCogaps <- function(data, allParams, uncertainty)
120 120
 
121 121
 
122 122
 #' find the consensus pattern matrix across all subsets
123
-#' @keywords internal
123
+#' @export
124 124
 #'
125 125
 #' @param unmatchedPatterns list of all unmatched pattern matrices from initial
126 126
 #' run of CoGAPS
127
-#' @param allParams list of all CoGAPS parameters
127
+#' @param gapsParams list of all CoGAPS parameters
128 128
 #' @return matrix of consensus patterns
129
-findConsensusMatrix <- function(unmatchedPatterns, allParams)
129
+findConsensusMatrix <- function(unmatchedPatterns, gapsParams)
130 130
 {
131 131
     allPatterns <- do.call(cbind, unmatchedPatterns)
132
-    comb <- expand.grid(1:allParams$gaps@nSets, 1:allParams$gaps@nPatterns)
132
+    comb <- expand.grid(1:gapsParams@nSets, 1:gapsParams@nPatterns)
133 133
     colnames(allPatterns) <- paste(comb[,1], comb[,2], sep=".")
134
-    return(patternMatch(allPatterns, allParams))
134
+    return(patternMatch(allPatterns, gapsParams))
135 135
 }
136 136
 
137 137
 #' Match Patterns Across Multiple Runs
138 138
 #' @keywords internal
139 139
 #'
140 140
 #' @param allPatterns matrix of patterns stored in the columns
141
-#' @param allParams list of all CoGAPS parameters
141
+#' @param gapsParams CoGAPS parameters object
142 142
 #' @return a matrix of consensus patterns
143 143
 #' @importFrom stats weighted.mean
144
-patternMatch <- function(allPatterns, allParams)
144
+patternMatch <- function(allPatterns, gapsParams)
145 145
 {
146 146
     # cluster patterns
147
-    clusters <- corcut(allPatterns, allParams$gaps@cut, allParams$gaps@minNS)
147
+    clusters <- corcut(allPatterns, gapsParams@cut, gapsParams@minNS)
148 148
 
149 149
     # function to split a cluster in two (might fail to do so)
150 150
     splitCluster <- function(list, index, minNS)
... ...
@@ -157,11 +157,11 @@ patternMatch <- function(allPatterns, allParams)
157 157
     }
158 158
 
159 159
     # split large clusters into two
160
-    tooLarge <- function(x) ncol(x) > allParams$gaps@maxNS
160
+    tooLarge <- function(x) ncol(x) > gapsParams@maxNS
161 161
     indx <- which(sapply(clusters, tooLarge))
162 162
     while (length(indx) > 0)
163 163
     {
164
-        clusters <- splitCluster(clusters, indx[1], allParams$gaps@minNS)
164
+        clusters <- splitCluster(clusters, indx[1], gapsParams@minNS)
165 165
         indx <- which(sapply(clusters, tooLarge))
166 166
     }
167 167
     names(clusters) <- as.character(1:length(clusters))
... ...
@@ -4,13 +4,14 @@
4 4
 #' @description combines retina subsets from extdata directory
5 5
 #' @param n number of subsets to use
6 6
 #' @return matrix of RNA counts
7
+#' @examples
8
+#' retSubset <- getRetinaSubset()
9
+#' dim(retSubset)
7 10
 #' @importFrom rhdf5 h5read
8 11
 getRetinaSubset <- function(n=1)
9 12
 {
10 13
     if (!(n %in% 1:4))
11
-    {
12 14
         stop("invalid number of subsets requested")
13
-    }
14 15
 
15 16
     subset_1_path <- system.file("extdata/retina_subset_1.h5", package="CoGAPS")
16 17
     subset_2_path <- system.file("extdata/retina_subset_2.h5", package="CoGAPS")
... ...
@@ -66,6 +67,35 @@ supported <- function(file)
66 67
     return(tools::file_ext(file) %in% c("tsv", "csv", "mtx", "gct"))
67 68
 }
68 69
 
70
+#' checks if file is rds format
71
+#' @keywords internal
72
+#'
73
+#' @param file path to file
74
+#' @return TRUE if file is .rds, FALSE if not
75
+#' @importFrom tools file_ext
76
+isRdsFile <- function(file)
77
+{
78
+    if (is.null(file))
79
+        return(FALSE)
80
+    if (length(file) == 0)
81
+        return(FALSE)
82
+    if (!is(file, "character"))
83
+        return(FALSE)
84
+    return(tools::file_ext(file) == "rds")
85
+}
86
+
87
+#' get input that might be an RDS file
88
+#' @keywords internal
89
+#'
90
+#' @param input some user input
91
+#' @return if input is an RDS file, read it - otherwise return input
92
+getValueOrRds <- function(input)
93
+{
94
+    if (isRdsFile(input))
95
+        return(readRDS(input))
96
+    return(input)
97
+}
98
+
69 99
 #' get number of rows from supported file name or matrix
70 100
 #' @keywords internal
71 101
 #'
... ...
@@ -75,12 +105,22 @@ supported <- function(file)
75 105
 #' @importFrom tools file_ext
76 106
 nrowHelper <- function(data)
77 107
 {
108
+    nrowMtx <- function(file)
109
+    {
110
+        i <- 1
111
+        while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%"))
112
+        {
113
+            i <- i + 1
114
+        }
115
+        return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,1])))
116
+    }
117
+
78 118
     if (is(data, "character"))
79 119
     {
80 120
         return(switch(tools::file_ext(data),
81 121
             "csv" = nrow(data.table::fread(data, select=1)),
82 122
             "tsv" = nrow(data.table::fread(data, select=1)),
83
-            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,1]),
123
+            "mtx" = nrowMtx(data),
84 124
             "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][1])
85 125
         ))
86 126
     }
... ...
@@ -96,12 +136,22 @@ nrowHelper <- function(data)
96 136
 #' @importFrom tools file_ext
97 137
 ncolHelper <- function(data)
98 138
 {
139
+    ncolMtx <- function(file)
140
+    {
141
+        i <- 1
142
+        while (unname(data.table::fread(file, nrows=i, fill=TRUE)[i,1] == "%"))
143
+        {
144
+            i <- i + 1
145
+        }
146
+        return(unname(as.numeric(data.table::fread(file, nrow=i, fill=TRUE)[i,2])))
147
+    }
148
+
99 149
     if (is(data, "character"))
100 150
     {
101 151
         return(switch(tools::file_ext(data),
102 152
             "csv" = ncol(data.table::fread(data, nrows=1)) - 1,
103 153
             "tsv" = ncol(data.table::fread(data, nrows=1)) - 1,
104
-            "mtx" = as.numeric(data.table::fread(data, nrows=1, fill=TRUE)[1,2]),
154
+            "mtx" = ncolMtx(data),
105 155
             "gct" = as.numeric(strsplit(as.matrix(data.table::fread(data, nrows=1, sep='\t')), "\\s+")[[1]][2])
106 156
         ))
107 157
     }
... ...
@@ -114,9 +164,7 @@ ncolHelper <- function(data)
114 164
 getGeneNames <- function(data, transpose)
115 165
 {
116 166
     if (transpose)
117
-    {
118 167
         return(getSampleNames(data, FALSE))
119
-    }
120 168
 
121 169
     names <- NULL
122 170
     if (is(data, "character"))
... ...
@@ -127,16 +175,13 @@ getGeneNames <- function(data, transpose)
127 175
             "gct" = suppressWarnings(gsub("\"", "", as.matrix(data.table::fread(data, select=1))))
128 176
         )
129 177
     }
130
-    else if (is(data, "matrix") | is(data, "data.frame"))
178
+    else
131 179
     {
132 180
         names <- rownames(data)
133 181
     }
134 182
 
135 183
     if (is.null(names))
136
-    {
137
-        nGenes <- nrowHelper(data)
138
-        return(paste("Gene", 1:nGenes, sep="_"))
139
-    }
184
+        return(paste("Gene", 1:nrowHelper(data), sep="_"))
140 185
     return(names)
141 186
 }
142 187
 
... ...
@@ -146,9 +191,7 @@ getGeneNames <- function(data, transpose)
146 191
 getSampleNames <- function(data, transpose)
147 192
 {
148 193
     if (transpose)
149
-    {
150 194
         return(getGeneNames(data, FALSE))
151
-    }
152 195
 
153 196
     names <- NULL
154 197
     if (is(data, "character"))
... ...
@@ -159,16 +202,13 @@ getSampleNames <- function(data, transpose)
159 202
             "gct" = suppressWarnings(colnames(data.table::fread(data, skip=2, nrows=1))[-1:-2])
160 203
         )
161 204
     }
162
-    else if (is(data, "matrix") | is(data, "data.frame"))
205
+    else
163 206
     {
164 207
         names <- colnames(data)
165 208
     }
166 209
 
167 210
     if (is.null(names))
168
-    {
169
-        nSamples <- ncolHelper(data)
170
-        return(paste("Sample", 1:nSamples, sep="_"))
171
-    }
211
+        return(paste("Sample", 1:ncolHelper(data), sep="_"))
172 212
     return(names)
173 213
 }
174 214
 
... ...
@@ -227,6 +267,8 @@ parseExtraParams <- function(allParams, extraParams)
227 267
     return(allParams)
228 268
 }
229 269
 
270
+## TODO these checks should be in the C++ code so that file names are checked
271
+## just as much as R variables
230 272
 #' check that provided data is valid
231 273
 #' @keywords internal
232 274
 #'
... ...
@@ -236,9 +278,6 @@ parseExtraParams <- function(allParams, extraParams)
236 278
 #' @return throws an error if data has problems
237 279
 checkDataMatrix <- function(data, uncertainty, params)
238 280
 {
239
-    if (!is(data, "matrix") & !is(data, "data.frame")
240
-    & !is(data, "SummarizedExperiment") & !is(data, "SingleCellExperiment"))
241
-        stop("unsupported object type of CoGAPS")
242 281
     if (any(is.na(data)))
243 282
         stop("NA values in data")
244 283
     if (!all(apply(data, 2, is.numeric)))
... ...
@@ -260,9 +299,6 @@ checkDataMatrix <- function(data, uncertainty, params)
260 299
 #' @return throws an error if inputs are invalid
261 300
 checkInputs <- function(data, uncertainty, allParams)
262 301
 {
263
-    if (is(data, "character") & !supported(data))
264
-        stop("unsupported file extension for data")
265
-
266 302
     if (is(data, "character") & !is.null(uncertainty) & !is(uncertainty, "character"))
267 303
         stop("uncertainty must be same data type as data (file name)")
268 304
     if (is(uncertainty, "character") & !supported(uncertainty))
... ...
@@ -272,34 +308,16 @@ checkInputs <- function(data, uncertainty, allParams)
272 308
     if (!is.null(uncertainty) & allParams$gaps@sparseOptimization)
273 309
         stop("must use default uncertainty when enabling sparseOptimization")
274 310
 
275
-    if (!(allParams$whichMatrixFixed %in% c("A", "P", "N")))
276
-        stop("Invalid choice of fixed matrix, must be 'A' or 'P'")
277
-    if (!is.null(allParams$fixedPatterns) & allParams$whichMatrixFixed == "N")
278
-        stop("fixedPatterns passed without setting whichMatrixFixed")
279
-    if (allParams$whichMatrixFixed %in% c("A", "P") & is.null(allParams$fixedPatterns))
280
-        stop("whichMatrixFixed is set without passing fixedPatterns")
281
-
282 311
     if (!is.null(allParams$gaps@distributed))
283 312
     {
284 313
         if (allParams$gaps@distributed == "single-cell" & !allParams$gaps@singleCell)
285 314
             warning("running single-cell CoGAPS with singleCell=FALSE")
286
-        if (!is.null(allParams$fixedPatterns) & is.null(allParams$gaps@explicitSets))
287
-            warning("doing manual pattern matching with using explicit subsets")
288 315
         if (allParams$nThreads > 1)
289 316
             stop("can't run multi-threaded and distributed CoGAPS at the same time")
290 317
         if (!is.null(allParams$checkpointInFile))
291 318
             stop("checkpoints not supported for distributed cogaps")
292
-        if (allParams$gaps@distributed == "single-cell" & allParams$whichMatrixFixed == "P")
293
-            stop("can't fix P matrix when running single-cell CoGAPS")
294
-        if (allParams$gaps@distributed == "genome-wide" & allParams$whichMatrixFixed == "A")
295
-            stop("can't fix A matrix when running genome-wide CoGAPS")
296 319
     }
297 320
 
298
-    if (!(allParams$subsetDim %in% c(0,1,2)))
299
-        stop("invalid subset dimension")
300
-    if (allParams$subsetDim > 0 & is.null(allParams$subsetIndices))
301
-        stop("subsetDim provided without subsetIndices")
302
-
303 321
     if (!is(data, "character"))
304 322
         checkDataMatrix(data, uncertainty, allParams$gaps)
305 323
 }
... ...
@@ -309,38 +327,72 @@ checkInputs <- function(data, uncertainty, allParams)
309 327
 #'
310 328
 #' @param data data matrix
311 329
 #' @param allParams list of all parameters
312
-#' @param geneNames vector of names of genes in data
313
-#' @param sampleNames vector of names of samples in data
314 330
 #' @return list of all parameters with added gene names
315
-getNamesFromData <- function(data, allParams, geneNames, sampleNames)
331
+getDimNames <- function(data, allParams)
316 332
 {
317
-    # get gene/sample names
318
-    if (is.null(geneNames))
333
+    # get user supplied names
334
+    geneNames <- allParams$gaps@geneNames
335
+    sampleNames <- allParams$gaps@sampleNames
336
+
337
+    # if user didn't supply any names, pull from data set or use default labels
338
+    if (is.null(allParams$gaps@geneNames))
319 339
         geneNames <- getGeneNames(data, allParams$transposeData)
320
-    if (is.null(sampleNames))
340
+    if (is.null(allParams$gaps@sampleNames))
321 341
         sampleNames <- getSampleNames(data, allParams$transposeData)
322 342
 
343
+    # get the number of genes/samples
323 344
     nGenes <- ifelse(allParams$transposeData, ncolHelper(data), nrowHelper(data))
324 345
     nSamples <- ifelse(allParams$transposeData, nrowHelper(data), ncolHelper(data))
325 346
 
326
-    if (allParams$subsetDim == 1)
347
+    # handle any subsetting
348
+    if (allParams$gaps@subsetDim == 1)
327 349
     {
328
-        nGenes <- length(allParams$subsetIndices)
329
-        geneNames <- geneNames[allParams$subsetIndices]
350
+        nGenes <- length(allParams$gaps@subsetIndices)
351
+        geneNames <- geneNames[allParams$gaps@subsetIndices]
330 352
     }
331
-    else if (allParams$subsetDim == 2)
353
+    else if (allParams$gaps@subsetDim == 2)
332 354
     {
333
-        nSamples <- length(allParams$subsetIndices)
334
-        sampleNames <- sampleames[allParams$subsetIndices]
355
+        nSamples <- length(allParams$gaps@subsetIndices)
356
+        sampleNames <- sampleNames[allParams$gaps@subsetIndices]
335 357
     }    
336 358
 
359
+    # check that names align with expected number of genes/samples
337 360
     if (length(geneNames) != nGenes)
338 361
         stop("incorrect number of gene names given")
339 362
     if (length(sampleNames) != nSamples)
340 363
         stop("incorrect number of sample names given")
341 364
 
365
+    # store processed gene/sample names directly in allParams list
366
+    # this is an important distinction - allParams@gaps contains the
367
+    # gene/sample names originally passed by the user, allParams contains
368
+    # the procseed gene/sample names to be used when labeling the result
342 369
     allParams$geneNames <- geneNames
343 370
     allParams$sampleNames <- sampleNames
344
-
345 371
     return(allParams)
372
+}
373
+
374
+#' convert any acceptable data input to a numeric matrix
375
+#' @keywords internal
376
+#'
377
+#' @description convert supported R objects containing the data to a
378
+#' numeric matrix, if data is a file name do nothing. Exits with an error
379
+#' if data is not a supported type.
380
+#' @param data data input
381
+#' @return data matrix
382
+#' @importFrom methods is
383
+#' @importFrom SummarizedExperiment assay
384
+convertDataToMatrix <- function(data)
385
+{
386
+    if (is(data, "character") & !supported(data))
387
+        stop("unsupported file extension for data")
388
+    else if (is(data, "matrix") | is(data, "character"))
389
+        return(data)
390
+    else if (is(data, "data.frame"))
391
+        return(data.matrix(data))
392
+    else if (is(data, "SummarizedExperiment"))
393
+        return(SummarizedExperiment::assay(data, "counts"))
394
+    else if (is(data, "SingleCellExperiment"))
395
+        return(SummarizedExperiment::assay(data, "counts"))
396
+    else
397
+        stop("unsupported data type")
346 398
 }
347 399
\ No newline at end of file
... ...
@@ -28,6 +28,16 @@
28 28
 #' the rows (cols) to use for weighted sampling
29 29
 #' @slot samplingWeight [distributed parameter] weights associated with 
30 30
 #' samplingAnnotation
31
+#' @slot subsetIndices set of indices to use from the data
32
+#' @slot subsetDim which dimension (1=rows, 2=cols) to subset
33
+#' @slot geneNames vector of names of genes in data
34
+#' @slot sampleNames vector of names of samples in data
35
+#' @slot fixedPatterns fix either 'A' or 'P' matrix to these values, in the
36
+#' context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
37
+#' skipped and fixedPatterns is used for all sets - allowing manual pattern
38
+#' matching, as well as fixed runs of standard CoGAPS
39
+#' @slot whichMatrixFixed either 'A' or 'P', indicating which matrix is fixed
40
+#' @slot takePumpSamples whether or not to take PUMP samples
31 41
 #' @importClassesFrom S4Vectors character_OR_NULL
32 42
 setClass("CogapsParams", slots = c(
33 43
     nPatterns = "numeric",
... ...
@@ -46,7 +56,14 @@ setClass("CogapsParams", slots = c(
46 56
     maxNS = "numeric",
47 57
     explicitSets = "ANY",
48 58
     samplingAnnotation = "character_OR_NULL",
49
-    samplingWeight = "numeric"
59
+    samplingWeight = "ANY",
60
+    subsetIndices="ANY",
61
+    subsetDim="numeric",
62
+    geneNames="character_OR_NULL",
63
+    sampleNames="character_OR_NULL",
64
+    fixedPatterns="ANY",
65
+    whichMatrixFixed="character",
66
+    takePumpSamples="logical"
50 67
 ))
51 68
 
52 69
 #' constructor for CogapsParams
... ...
@@ -70,12 +87,12 @@ setMethod("initialize", "CogapsParams",
70 87
         if (!is.null(list(...)$maxNS))
71 88
             stop("maxNS must be set after CogapsParams are intialized")
72 89
         if (!is.null(distributed))
73
-            if (distributed == "none")
90
+            if (distributed == "none") # allows it to be a pure string parameter
74 91
                 distributed <- NULL
75 92
         .Object@distributed <- distributed
76 93
         
77 94
         .Object@nPatterns <- 7
78
-        .Object@nIterations <- 1000
95
+        .Object@nIterations <- 5000
79 96
         .Object@alphaA <- 0.01
80 97
         .Object@alphaP <- 0.01
81 98
         .Object@maxGibbsMassA <- 100
... ...
@@ -89,7 +106,14 @@ setMethod("initialize", "CogapsParams",
89 106
         .Object@maxNS <- .Object@minNS + .Object@nSets
90 107
         .Object@explicitSets <- NULL
91 108
         .Object@samplingAnnotation <- NULL
92
-        .Object@samplingWeight <- integer(0)
109
+        .Object@samplingWeight <- NULL
110
+        .Object@subsetIndices <- NULL
111
+        .Object@subsetDim <- 0
112
+        .Object@geneNames <- NULL
113
+        .Object@sampleNames <- NULL
114
+        .Object@fixedPatterns <- NULL
115
+        .Object@whichMatrixFixed <- 'N'
116
+        .Object@takePumpSamples <- FALSE
93 117
 
94 118
         .Object <- callNextMethod(.Object, ...)
95 119
         .Object
... ...
@@ -104,42 +128,60 @@ setValidity("CogapsParams",
104 128
             "number of patterns must be an integer greater than zero"
105 129
         if (object@nIterations <= 0 | object@nIterations %% 1 != 0)
106 130
             "number of iterations must be an integer greater than zero"
107
-        if (object@alphaA  <= 0 | object@alphaP <= 0)
131
+        if (object@alphaA <= 0 | object@alphaP <= 0)
108 132
             "alpha parameter must be greater than zero"
109
-        if (object@maxGibbsMassA  <= 0 | object@maxGibbsMassP <= 0)
133
+        if (object@maxGibbsMassA <= 0 | object@maxGibbsMassP <= 0)
110 134
             "maxGibbsMass must be greater than zero"
111 135
         if (object@seed <= 0 | object@seed %% 1 != 0)
112 136
             "random seed must be an integer greater than zero"
113
-        if (object@minNS <= 1 | object@minNS %% 1 != 0)
114
-            "minNS must be an integer greater than one"
115
-        if (object@nSets <= 1 | object@nSets %% 1 != 0)
116
-            "minNS must be an integer greater than one"
117
-        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
118
-            "nSets doesn't match length of explicitSets"
119
-        if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight))
120
-            "samplingWeight has mismatched size with amount of distinct annotations"
121
-        if (object@cut > object@nPatterns)
122
-            "cut must be less than or equal to nPatterns"
123 137
 
124
-        # check type of explicitSets
125
-        if (!is.null(object@explicitSets) & !is(object@explicitSets, "list"))
126
-            "explicitSets must be a list of numeric or character"
127
-        isNum <- sapply(object@explicitSets, function(s) is(s, "numeric"))
128
-        isChar <- sapply(object@explicitSets, function(s) is(s, "charcater"))
129
-        if (!is.null(object@explicitSets) & !(all(isNum) | all(isChar)))
130
-            "explicitSets must be a list of numeric or character"
138
+        if (!(object@whichMatrixFixed %in% c("A", "P", "N")))
139
+            stop("Invalid choice of fixed matrix, must be 'A' or 'P'")
140
+        if (!is.null(object@fixedPatterns) & object@whichMatrixFixed == "N")
141
+            stop("fixedPatterns passed without setting whichMatrixFixed")
142
+        if (object@whichMatrixFixed %in% c("A", "P") & is.null(object@fixedPatterns))
143
+            stop("whichMatrixFixed is set without passing fixedPatterns")
131 144
 
132
-        if (!is.null(object@explicitSets) & length(object@explicitSets) != object@nSets)
133
-            "wrong number of sets given"
134
-        if (length(object@samplingWeight) & is.null(names(object@samplingWeight)))
135
-            "samplingWeight must be a named vector"
136
-
137
-        if (!is.null(object@explicitSets) & !is.null(object@samplingAnnotation))
138
-            "explicitSets and samplingAnnotation/samplingWeight are both set"
145
+        if (!(object@subsetDim %in% c(0,1,2)))
146
+            stop("invalid subset dimension")
147
+        if (object@subsetDim > 0 & is.null(object@subsetIndices))
148
+            stop("subsetDim provided without subsetIndices")
139 149
 
140 150
         if (!is.null(object@distributed))
151
+        {
141 152
             if (!(object@distributed %in% c("genome-wide", "single-cell")))
142 153
                 "distributed method must be either 'genome-wide' or 'single-cell'"
154
+            if (!is.null(object@fixedPatterns) & is.null(object@explicitSets))
155
+                "doing manual pattern matching without using explicit subsets"
156
+            if (object@distributed == "single-cell" & object@whichMatrixFixed == "P")
157
+                "can't fix P matrix when running single-cell CoGAPS"
158
+            if (object@distributed == "genome-wide" & object@whichMatrixFixed == "A")
159
+                "can't fix A matrix when running genome-wide CoGAPS"
160
+            if (object@minNS <= 1 | object@minNS %% 1 != 0)
161
+                "minNS must be an integer greater than one"
162
+            if (object@nSets <= 1 | object@nSets %% 1 != 0)
163
+                "minNS must be an integer greater than one"
164
+            if (length(unique(object@samplingAnnotation)) != length(object@samplingWeight))
165
+                "samplingWeight has mismatched size with amount of distinct annotations"
166
+            if (object@cut > object@nPatterns)
167
+                "cut must be less than or equal to nPatterns"
168
+            if (length(object@samplingWeight) & is.null(names(object@samplingWeight)))
169
+                "samplingWeight must be a named vector"
170
+
171
+            if (!is.null(object@explicitSets))
172
+            {
173
+                if (!is(object@explicitSets, "list"))
174
+                    "explicitSets must be a list"
175
+                if (length(object@explicitSets) != object@nSets)
176
+                    "nSets doesn't match length of explicitSets"
177
+                if (!is.null(object@samplingAnnotation))
178
+                    "explicitSets and samplingAnnotation/samplingWeight are both set"
179
+                isNum <- sapply(object@explicitSets, function(s) is(s, "numeric"))
180
+                isChar <- sapply(object@explicitSets, function(s) is(s, "character"))
181
+                if (!all(isNum) & !all(isChar))
182
+                    "explicitSets must be a list of numeric or character"
183
+            }
184
+        }
143 185
     }
144 186
 )
145 187
 
... ...
@@ -174,7 +216,7 @@ setGeneric("setParam", function(object, whichParam, value)
174 216
 #' @examples
175 217
 #'  params <- new("CogapsParams")
176 218
 #'  params <- setDistributedParams(params, 5)
177
-setGeneric("setDistributedParams", function(object, nSets, cut=NULL,
219
+setGeneric("setDistributedParams", function(object, nSets=NULL, cut=NULL,
178 220
 minNS=NULL, maxNS=NULL)
179 221
     {standardGeneric("setDistributedParams")})
180 222
 
... ...
@@ -194,6 +236,23 @@ minNS=NULL, maxNS=NULL)
194 236
 setGeneric("setAnnotationWeights", function(object, annotation, weights)
195 237
     {standardGeneric("setAnnotationWeights")})
196 238
 
239
+#' set the fixed patterns for either the A or the P matrix
240
+#' @export
241
+#' @docType methods
242
+#' @rdname setFixedPatterns-methods
243
+#'
244
+#' @description these parameters are interrelated so they must be set together
245
+#' @param object an object of type CogapsParams
246
+#' @param fixedPatterns values for either the A or P matrix
247
+#' @param whichMatrixFixed either 'A' or 'P' indicating which matrix is fixed
248
+#' @return the modified params object
249
+#' @examples
250
+#' params <- new("CogapsParams")
251
+#' data(GIST)
252
+#' params <- setFixedPatterns(params, getSampleFactors(GIST.result), 'P')
253
+setGeneric("setFixedPatterns", function(object, fixedPatterns, whichMatrixFixed)
254
+    {standardGeneric("setFixedPatterns")})
255
+
197 256
 #' get the value of a parameter
198 257
 #' @export
199 258
 #' @docType methods
... ...
@@ -41,7 +41,8 @@ sampleNames, diagnostics=NULL, ...)
41 41
     patternNames <- paste("Pattern", 1:ncol(Amean), sep="_")
42 42
 
43 43
     if (length(geneNames) != nrow(.Object@featureLoadings))
44
-        stop("number of gene names doesn't match data size")
44
+        stop("number of gene names doesn't match data size, ",
45
+            length(geneNames), " != ", nrow(.Object@featureLoadings))
45 46
     if (length(sampleNames) != nrow(.Object@sampleFactors))
46 47
         stop("number of sample names doesn't match data size")
47 48
 
... ...
@@ -82,6 +83,32 @@ setValidity("CogapsResult",
82 83
 
83 84
 ################################### GENERICS ###################################
84 85
 
86
+#' return featureLoadings matrix from CogapsResult object
87
+#' @export
88
+#' @docType methods
89
+#' @rdname getFeatureLoadings-methods
90
+#'
91
+#' @param object an object of type CogapsResult
92
+#' @return featureLoadings matrix
93
+#' @examples
94
+#' data(GIST)
95
+#' getFeatureLoadings(GIST.result)
96
+setGeneric("getFeatureLoadings", function(object)
97
+    {standardGeneric("getFeatureLoadings")})
98
+
99
+#' return sampleFactors matrix from CogapsResult object
100
+#' @export
101
+#' @docType methods
102
+#' @rdname getSampleFactors-methods
103
+#'
104
+#' @param object an object of type CogapsResult
105
+#' @return sampleFactors matrix
106
+#' @examples
107
+#' data(GIST)
108
+#' getSampleFactors(GIST.result)
109
+setGeneric("getSampleFactors", function(object)
110
+    {standardGeneric("getSampleFactors")})
111
+
85 112
 #' return chi-sq of final matrices
86 113
 #' @export
87 114
 #' @docType methods
... ...
@@ -1,3 +1,17 @@
1
+#' CogapsParams constructor
2
+#' @export
3
+#'
4
+#' @description create a CogapsParams object
5
+#' @param ... parameters for the initialization method
6
+#' @return CogapsParams object
7
+#' @examples
8
+#' params <- CogapsParams(nPatterns=10)
9
+#' params
10
+CogapsParams <- function(...)
11
+{
12
+    new("CogapsParams", ...)
13
+}
14
+
1 15
 setMethod("show", signature("CogapsParams"),
2 16
 function(object)
3 17
 {
... ...
@@ -59,11 +73,15 @@ function(object, whichParam, value)
59 73
     }
60 74
     else if (whichParam %in% c("nSets", "cut", "minNS", "maxNS"))
61 75
     {
62
-        stop("please set this parameter with setDistributedParams")
76
+        stop("please set \'", whichParam, "\' with setDistributedParams")
63 77
     }
64 78
     else if (whichParam %in% c("samplingAnnotation", "samplingWeight"))
65 79
     {
66
-        stop("please set this parameter with setAnnotationWeights")
80
+        stop("please set \'", whichParam, "\' with setAnnotationWeights")
81
+    }
82
+    else if (whichParam %in% c("fixedPatterns", "whichMatrixFixed"))
83
+    {
84
+        stop("please set \'", whichParam, "\' with setFixedPatterns")
67 85
     }
68 86
     else if (whichParam == "nPatterns")
69 87
     {
... ...
@@ -94,8 +112,7 @@ function(object, nSets, cut, minNS, maxNS)
94 112
     message("setting distributed parameters - call this again if you change ",
95 113
         "nPatterns")
96 114
 
97
-    object@nSets <- nSets
98
-
115
+    object@nSets <- ifelse(is.null(nSets), object@nSets, nSets)
99 116
     object@cut <- ifelse(is.null(cut), object@nPatterns, cut)
100 117
     object@minNS <- ifelse(is.null(minNS), ceiling(object@nSets / 2), minNS)
101 118
     object@maxNS <- ifelse(is.null(maxNS), object@minNS + object@nSets, maxNS)
... ...
@@ -116,6 +133,18 @@ function(object, annotation, weights)
116 133
     return(object)
117 134
 })
118 135
 
136
+#' @rdname setFixedPatterns-methods
137
+#' @aliases setFixedPatterns
138
+setMethod("setFixedPatterns", signature(object="CogapsParams"),
139
+function(object, fixedPatterns, whichMatrixFixed)
140
+{
141
+    object@fixedPatterns <- fixedPatterns
142
+    object@whichMatrixFixed <- whichMatrixFixed
143
+
144
+    validObject(object)
145
+    return(object)
146
+})
147
+
119 148
 #' @rdname getParam-methods
120 149
 #' @aliases getParam
121 150
 setMethod("getParam", signature(object="CogapsParams"),
... ...
@@ -4,6 +4,7 @@
4 4
 #' @param returnList list from cogaps_cpp
5 5
 #' @param allParams list of all parameters
6 6
 #' @return CogapsResult object
7
+#' @importFrom utils packageVersion
7 8
 createCogapsResult <- function(returnList, allParams)
8 9
 {
9 10
     res <- new("CogapsResult",
... ...
@@ -57,6 +58,22 @@ plot.CogapsResult <- function(x, ...)
57 58
         lty=1, cex=0.8, col=colors, bty="y", ncol=5)
58 59
 }
59 60
 
61
+#' @rdname getFeatureLoadings-methods
62
+#' @aliases getFeatureLoadings
63
+setMethod("getFeatureLoadings", signature(object="CogapsResult"),
64
+function(object)
65
+{
66
+    object@featureLoadings
67
+})
68
+
69
+#' @rdname getSampleFactors-methods
70
+#' @aliases getSampleFactors
71
+setMethod("getSampleFactors", signature(object="CogapsResult"),
72
+function(object)
73
+{
74
+    object@sampleFactors
75
+})
76
+
60 77
 #' @rdname getMeanChiSq-methods
61 78
 #' @aliases getMeanChiSq
62 79
 setMethod("getMeanChiSq", signature(object="CogapsResult"),
... ...
@@ -1,4 +1,4 @@
1
-# CoGAPS Version: 3.2.35
1
+# CoGAPS Version: 3.2.40
2 2
 
3 3
 [![Bioc](https://bioconductor.org/images/logo_bioconductor.gif)](https://bioconductor.org/packages/CoGAPS)
4 4
 [![downloads](https://bioconductor.org/shields/downloads/release/CoGAPS.svg)](http://bioconductor.org/packages/stats/bioc/CoGAPS/)
... ...
@@ -12,33 +12,21 @@ Coordinated Gene Activity in Pattern Sets (CoGAPS) implements a Bayesian MCMC ma
12 12
 as follows:
13 13
 
14 14
 ```
15
-source("https://bioconductor.org/biocLite.R")
16
-biocLite("CoGAPS")
15
+install.packages("BiocManager")
16
+BiocManager::install("CoGAPS")
17 17
 ```
18 18
 
19 19
 The most up-to-date version of *CoGAPS* can be installed directly from the 
20 20
 *FertigLab* Github Repository:
21 21
 
22 22
 ```
23
-## Method 1 using biocLite
24
-biocLite("FertigLab/CoGAPS", dependencies = TRUE, build_vignettes = TRUE)
23
+## Method 1 using BiocManager
24
+BiocManager::install("FertigLab/CoGAPS")
25 25
 
26 26
 ## Method 2 using devtools package
27 27
 devtools::install_github("FertigLab/CoGAPS")
28 28
 ```
29 29
 
30
-There is also an option to install the development version of *CoGAPS*, 
31
-while this version has the latest experimental features, it is not guaranteed
32
-to be stable.
33
-
34
-```
35
-## Method 1 using biocLite
36
-biocLite("FertigLab/CoGAPS", ref="develop", dependencies = TRUE, build_vignettes = TRUE)
37
-
38
-## Method 2 using devtools package
39
-devtools::install_github("FertigLab/CoGAPS", ref="develop")
40
-```
41
-
42 30
 # Using CoGAPS
43 31
 
44 32
 Follow the vignette here: http://htmlpreview.github.io/?https://github.com/FertigLab/CoGAPS/blob/develop/vignettes/CoGAPS.html
... ...
@@ -1,6 +1,6 @@
1 1
 #! /bin/sh
2 2
 # Guess values for system-dependent variables and create Makefiles.
3
-# Generated by GNU Autoconf 2.69 for CoGAPS 3.2.35.
3
+# Generated by GNU Autoconf 2.69 for CoGAPS 3.2.40.
4 4
 #
5 5
 #
6 6
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
... ...
@@ -577,8 +577,8 @@ MAKEFLAGS=
577 577
 # Identity of this package.
578 578
 PACKAGE_NAME='CoGAPS'
579 579
 PACKAGE_TARNAME='cogaps'
580
-PACKAGE_VERSION='3.2.35'
581
-PACKAGE_STRING='CoGAPS 3.2.35'
580
+PACKAGE_VERSION='3.2.40'
581
+PACKAGE_STRING='CoGAPS 3.2.40'
582 582
 PACKAGE_BUGREPORT=''
583 583
 PACKAGE_URL=''
584 584
 
... ...
@@ -1206,7 +1206,7 @@ if test "$ac_init_help" = "long"; then
1206 1206
   # Omit some internal or obsolete options to make the list less imposing.
1207 1207
   # This message is too long to be a string in the A/UX 3.1 sh.
1208 1208
   cat <<_ACEOF
1209
-\`configure' configures CoGAPS 3.2.35 to adapt to many kinds of systems.
1209
+\`configure' configures CoGAPS 3.2.40 to adapt to many kinds of systems.
1210 1210
 
1211 1211
 Usage: $0 [OPTION]... [VAR=VALUE]...
1212 1212
 
... ...
@@ -1268,7 +1268,7 @@ fi
1268 1268
 
1269 1269
 if test -n "$ac_init_help"; then
1270 1270
   case $ac_init_help in
1271
-     short | recursive ) echo "Configuration of CoGAPS 3.2.35:";;
1271
+     short | recursive ) echo "Configuration of CoGAPS 3.2.40:";;
1272 1272
    esac
1273 1273
   cat <<\_ACEOF
1274 1274
 
... ...
@@ -1359,7 +1359,7 @@ fi
1359 1359
 test -n "$ac_init_help" && exit $ac_status
1360 1360
 if $ac_init_version; then
1361 1361
   cat <<\_ACEOF
1362
-CoGAPS configure 3.2.35
1362
+CoGAPS configure 3.2.40
1363 1363
 generated by GNU Autoconf 2.69
1364 1364
 
1365 1365
 Copyright (C) 2012 Free Software Foundation, Inc.
... ...
@@ -1722,7 +1722,7 @@ cat >config.log <<_ACEOF
1722 1722
 This file contains any messages produced by compilers while
1723 1723
 running configure, to aid debugging if configure makes a mistake.
1724 1724
 
1725
-It was created by CoGAPS $as_me 3.2.35, which was
1725
+It was created by CoGAPS $as_me 3.2.40, which was
1726 1726
 generated by GNU Autoconf 2.69.  Invocation command line was
1727 1727
 
1728 1728
   $ $0 $@
... ...
@@ -4788,7 +4788,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
4788 4788
 # report actual input values of CONFIG_FILES etc. instead of their
4789 4789
 # values after options handling.
4790 4790
 ac_log="
4791
-This file was extended by CoGAPS $as_me 3.2.35, which was
4791
+This file was extended by CoGAPS $as_me 3.2.40, which was
4792 4792
 generated by GNU Autoconf 2.69.  Invocation command line was
4793 4793
 
4794 4794
   CONFIG_FILES    = $CONFIG_FILES
... ...
@@ -4841,7 +4841,7 @@ _ACEOF
4841 4841
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
4842 4842
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
4843 4843
 ac_cs_version="\\
4844
-CoGAPS config.status 3.2.35
4844
+CoGAPS config.status 3.2.40
4845 4845
 configured by $0, generated by GNU Autoconf 2.69,
4846 4846
   with options \\"\$ac_cs_config\\"
4847 4847
 
... ...
@@ -5,13 +5,10 @@
5 5
 \title{CoGAPS Matrix Factorization Algorithm}
6 6
 \usage{
7 7
 CoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8
-  messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9
-  checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE,
11
-  subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL,
12
-  geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL,
13
-  whichMatrixFixed = "N", takePumpSamples = FALSE,
14
-  outputToFile = NULL, workerID = 1, ...)
8
+  messages = TRUE, outputFrequency = 2500, uncertainty = NULL,
9
+  checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 0,
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  workerID = 1, ...)
15 12
 }
16 13
 \arguments{
17 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -40,28 +37,8 @@ contained in this file}
40 37
 for data that is stored as samples x genes since CoGAPS requires data to be
41 38
 genes x samples}
42 39
 
43
-\item{subsetIndices}{set of indices to use from the data}
44
-
45
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset}
46
-
47 40
 \item{BPPARAM}{BiocParallel backend}
48 41
 
49
-\item{geneNames}{vector of names of genes in data}
50
-
51
-\item{sampleNames}{vector of names of samples in data}
52
-
53
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the
54
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
55
-skipped and fixedPatterns is used for all sets - allowing manual pattern
56
-matching, as well as fixed runs of standard CoGAPS}
57
-
58
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed}
59
-
60
-\item{takePumpSamples}{whether or not to take PUMP samples}
61
-
62
-\item{outputToFile}{name of a file to save the output to, will create 4 files
63
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
64
-
65 42
 \item{workerID}{if calling CoGAPS in parallel the worker ID can be specified,
66 43
 only worker 1 prints output and each worker outputs when it finishes, this
67 44
 is not neccesary when using the default parallel methods (i.e. distributed
... ...
@@ -51,5 +51,22 @@ the rows (cols) to use for weighted sampling}
51 51
 
52 52
 \item{\code{samplingWeight}}{[distributed parameter] weights associated with 
53 53
 samplingAnnotation}
54
+
55
+\item{\code{subsetIndices}}{set of indices to use from the data}
56
+
57
+\item{\code{subsetDim}}{which dimension (1=rows, 2=cols) to subset}
58
+
59
+\item{\code{geneNames}}{vector of names of genes in data}
60
+
61
+\item{\code{sampleNames}}{vector of names of samples in data}
62
+
63
+\item{\code{fixedPatterns}}{fix either 'A' or 'P' matrix to these values, in the
64
+context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
65
+skipped and fixedPatterns is used for all sets - allowing manual pattern
66
+matching, as well as fixed runs of standard CoGAPS}
67
+
68
+\item{\code{whichMatrixFixed}}{either 'A' or 'P', indicating which matrix is fixed}
69
+
70
+\item{\code{takePumpSamples}}{whether or not to take PUMP samples}
54 71
 }}
55 72
 
56 73
new file mode 100755
... ...
@@ -0,0 +1,21 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/methods-CogapsParams.R
3
+\name{CogapsParams}
4
+\alias{CogapsParams}
5
+\title{CogapsParams constructor}
6
+\usage{
7
+CogapsParams(...)
8
+}
9
+\arguments{
10
+\item{...}{parameters for the initialization method}
11
+}
12
+\value{
13
+CogapsParams object
14
+}
15
+\description{
16
+create a CogapsParams object
17
+}
18
+\examples{
19
+params <- CogapsParams(nPatterns=10)
20
+params
21
+}
... ...
@@ -7,11 +7,8 @@
7 7
 GWCoGAPS(data, params = new("CogapsParams"), nThreads = 1,
8 8
   messages = TRUE, outputFrequency = 500, uncertainty = NULL,
9 9
   checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
10
-  checkpointInFile = NULL, transposeData = FALSE,
11
-  subsetIndices = NULL, subsetDim = 0, BPPARAM = NULL,
12
-  geneNames = NULL, sampleNames = NULL, fixedPatterns = NULL,
13
-  whichMatrixFixed = "N", takePumpSamples = FALSE,
14
-  outputToFile = NULL, workerID = 1, ...)
10
+  checkpointInFile = NULL, transposeData = FALSE, BPPARAM = NULL,
11
+  workerID = 1, ...)
15 12
 }
16 13
 \arguments{
17 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -40,28 +37,8 @@ contained in this file}
40 37
 for data that is stored as samples x genes since CoGAPS requires data to be
41 38
 genes x samples}
42 39
 
43
-\item{subsetIndices}{set of indices to use from the data}
44
-
45
-\item{subsetDim}{which dimension (1=rows, 2=cols) to subset}
46
-
47 40
 \item{BPPARAM}{BiocParallel backend}
48 41
 
49
-\item{geneNames}{vector of names of genes in data}
50
-
51
-\item{sampleNames}{vector of names of samples in data}
52
-
53
-\item{fixedPatterns}{fix either 'A' or 'P' matrix to these values, in the
54
-context of distributed CoGAPS (GWCoGAPS/scCoGAPS), the first phase is
55
-skipped and fixedPatterns is used for all sets - allowing manual pattern
56
-matching, as well as fixed runs of standard CoGAPS}
57
-
58
-\item{whichMatrixFixed}{either 'A' or 'P', indicating which matrix is fixed}
59
-
60
-\item{takePumpSamples}{whether or not to take PUMP samples}
61
-
62
-\item{outputToFile}{name of a file to save the output to, will create 4 files
63
-of the form "filename_nPatterns_[Amean, Asd, Pmean, Psd].extension"}
64
-
65 42
 \item{workerID}{if calling CoGAPS in parallel the worker ID can be specified,
66 43
 only worker 1 prints output and each worker outputs when it finishes, this
67 44
 is not neccesary when using the default parallel methods (i.e. distributed
68 45
new file mode 100755
... ...
@@ -0,0 +1,20 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{convertDataToMatrix}
4
+\alias{convertDataToMatrix}
5
+\title{convert any acceptable data input to a numeric matrix}
6
+\usage{
7
+convertDataToMatrix(data)
8
+}
9
+\arguments{
10
+\item{data}{data input}
11
+}
12
+\value{
13
+data matrix
14
+}
15
+\description{
16
+convert supported R objects containing the data to a
17
+numeric matrix, if data is a file name do nothing. Exits with an error
18
+if data is not a supported type.
19
+}
20
+\keyword{internal}
... ...
@@ -4,13 +4,13 @@
4 4
 \alias{findConsensusMatrix}
5 5
 \title{find the consensus pattern matrix across all subsets}
6 6
 \usage{
7
-findConsensusMatrix(unmatchedPatterns, allParams)
7
+findConsensusMatrix(unmatchedPatterns, gapsParams)
8 8
 }
9 9
 \arguments{
10 10
 \item{unmatchedPatterns}{list of all unmatched pattern matrices from initial
11 11
 run of CoGAPS}
12 12
 
13
-\item{allParams}{list of all CoGAPS parameters}
13
+\item{gapsParams}{list of all CoGAPS parameters}
14 14
 }
15 15
 \value{
16 16
 matrix of consensus patterns
... ...
@@ -18,4 +18,3 @@ matrix of consensus patterns
18 18
 \description{
19 19
 find the consensus pattern matrix across all subsets
20 20
 }
21
-\keyword{internal}
22 21
similarity index 63%
23 22
rename from man/getNamesFromData.Rd
24 23
rename to man/getDimNames.Rd
... ...
@@ -1,19 +1,15 @@
1 1
 % Generated by roxygen2: do not edit by hand
2 2
 % Please edit documentation in R/HelperFunctions.R
3
-\name{getNamesFromData}
4
-\alias{getNamesFromData}
3
+\name{getDimNames}
4
+\alias{getDimNames}
5 5
 \title{extracts gene/sample names from the data}
6 6
 \usage{
7
-getNamesFromData(data, allParams, geneNames, sampleNames)
7
+getDimNames(data, allParams)
8 8
 }
9 9
 \arguments{
10 10
 \item{data}{data matrix}
11 11
 
12 12
 \item{allParams}{list of all parameters}
13
-
14
-\item{geneNames}{vector of names of genes in data}
15
-
16
-\item{sampleNames}{vector of names of samples in data}
17 13
 }
18 14
 \value{
19 15
 list of all parameters with added gene names
20 16
new file mode 100755
... ...
@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getFeatureLoadings}
5
+\alias{getFeatureLoadings}
6
+\alias{getFeatureLoadings,CogapsResult-method}
7
+\title{return featureLoadings matrix from CogapsResult object}
8
+\usage{
9
+getFeatureLoadings(object)
10
+
11
+\S4method{getFeatureLoadings}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+featureLoadings matrix
18
+}
19
+\description{
20
+return featureLoadings matrix from CogapsResult object
21
+}
22
+\examples{
23
+data(GIST)
24
+getFeatureLoadings(GIST.result)
25
+}
... ...
@@ -15,3 +15,7 @@ matrix of RNA counts
15 15
 \description{
16 16
 combines retina subsets from extdata directory
17 17
 }
18
+\examples{
19
+retSubset <- getRetinaSubset()
20
+dim(retSubset)
21
+}
18 22
new file mode 100755
... ...
@@ -0,0 +1,25 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/class-CogapsResult.R, R/methods-CogapsResult.R
3
+\docType{methods}
4
+\name{getSampleFactors}
5
+\alias{getSampleFactors}
6
+\alias{getSampleFactors,CogapsResult-method}
7
+\title{return sampleFactors matrix from CogapsResult object}
8
+\usage{
9
+getSampleFactors(object)
10
+
11
+\S4method{getSampleFactors}{CogapsResult}(object)
12
+}
13
+\arguments{
14
+\item{object}{an object of type CogapsResult}
15
+}
16
+\value{
17
+sampleFactors matrix
18
+}
19
+\description{
20
+return sampleFactors matrix from CogapsResult object
21
+}
22
+\examples{
23
+data(GIST)
24
+getSampleFactors(GIST.result)
25
+}
0 26
new file mode 100755
... ...
@@ -0,0 +1,18 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R
3
+\name{getValueOrRds}
4
+\alias{getValueOrRds}
5
+\title{get input that might be an RDS file}
6
+\usage{
7
+getValueOrRds(input)
8
+}
9
+\arguments{
10
+\item{input}{some user input}
11
+}
12
+\value{
13
+if input is an RDS file, read it - otherwise return input
14
+}
15
+\description{
16
+get input that might be an RDS file
17
+}
18
+\keyword{internal}
0 19
new file mode 100755
... ...
@@ -0,0 +1,18 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/HelperFunctions.R