... | ... |
@@ -1,8 +1,8 @@ |
1 | 1 |
# Generated by using Rcpp::compileAttributes() -> do not edit by hand |
2 | 2 |
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 |
3 | 3 |
|
4 |
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed = -1L, messages = FALSE, singleCellRNASeq = FALSE) { |
|
5 |
- .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed, messages, singleCellRNASeq) |
|
4 |
+cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq) { |
|
5 |
+ .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq) |
|
6 | 6 |
} |
7 | 7 |
|
8 | 8 |
run_catch_unit_tests <- function() { |
... | ... |
@@ -57,191 +57,13 @@ |
57 | 57 |
#'@param messages Display progress messages |
58 | 58 |
#'@export |
59 | 59 |
|
60 |
-gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation", |
|
60 |
+gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 7, simulation_id = "simulation", |
|
61 | 61 |
nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P", |
62 | 62 |
fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01, |
63 | 63 |
nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0, |
64 | 64 |
seed=-1, messages=TRUE) |
65 | 65 |
{ |
66 |
- #Begin data type error checking code |
|
67 |
- charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix)) |
|
68 |
- charCheck = c("simulation_id", "fixedDomain", "fixedMatrix") |
|
69 |
- |
|
70 |
- boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs), !is.logical(sampleSnapshots)) |
|
71 |
- boolCheck = c("output_atomic", "fixedBinProbs", "sampleSnapshots") |
|
72 |
- |
|
73 |
- numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots), |
|
74 |
- !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
75 |
- !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP)) |
|
76 |
- numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA", |
|
77 |
- "max_gibbmass_paraA", "alphaP", "nMaxP", "max_gibbmass_paraP") |
|
78 |
- |
|
79 |
- dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP)) |
|
80 |
- dataFrameCheck = c("D", "S", "ABins", "PBins", "FP") |
|
81 |
- |
|
82 |
- matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP)) |
|
83 |
- |
|
84 |
- if(any(charDataErrors)) |
|
85 |
- { |
|
86 |
- #Check which of these is not a string |
|
87 |
- for(i in 1:length(charDataErrors)) |
|
88 |
- { |
|
89 |
- if(charDataErrors[i] == TRUE) |
|
90 |
- { |
|
91 |
- stop(paste("Error in gapsMapRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
92 |
- } |
|
93 |
- } |
|
94 |
- } |
|
95 |
- |
|
96 |
- if(any(boolDataErrors)) |
|
97 |
- { |
|
98 |
- #Check which of these is not a boolean |
|
99 |
- for(i in 1:length(boolDataErrors)) |
|
100 |
- { |
|
101 |
- if(boolDataErrors[i] == TRUE) |
|
102 |
- { |
|
103 |
- stop(paste("Error in gapsMapRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
104 |
- } |
|
105 |
- } |
|
106 |
- } |
|
107 |
- |
|
108 |
- if(any(numericDataErrors)) |
|
109 |
- { |
|
110 |
- #Check which of these is not an integer/double |
|
111 |
- for(i in 1:length(numericDataErrors)) |
|
112 |
- { |
|
113 |
- if(numericDataErrors[i] == TRUE) |
|
114 |
- { |
|
115 |
- stop(paste("Error in gapsMapRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
116 |
- |
|
117 |
- } |
|
118 |
- } |
|
119 |
- } |
|
120 |
- |
|
121 |
- |
|
122 |
- #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
123 |
- if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5]))) |
|
124 |
- { |
|
125 |
- for(i in 1:length(dataFrameCheck)) |
|
126 |
- { |
|
127 |
- if((dataFrameErrors[i] && matrixErrors[i]) == TRUE) |
|
128 |
- { |
|
129 |
- stop(paste("Error in gapsMapRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details.")) |
|
130 |
- |
|
131 |
- } |
|
132 |
- } |
|
133 |
- } |
|
134 |
- |
|
135 |
- #Floor the parameters that are integers to prevent allowing doubles. |
|
136 |
- nFactor = floor(nFactor) |
|
137 |
- nEquil = floor(nEquil) |
|
138 |
- nSample = floor(nSample) |
|
139 |
- nOutR = floor(nOutR) |
|
140 |
- numSnapshots = floor(numSnapshots) |
|
141 |
- nMaxA = floor(nMaxA) |
|
142 |
- nMaxP = floor(nMaxP) |
|
143 |
- |
|
144 |
- # pass all settings to C++ within a list |
|
145 |
- # if (is.null(P)) { |
|
146 |
- Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix, sampleSnapshots); |
|
147 |
- |
|
148 |
- ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
149 |
- alphaP, nMaxP, max_gibbmass_paraP, numSnapshots); |
|
150 |
- |
|
151 |
- #Begin logic error checking code |
|
152 |
- |
|
153 |
- #Check for negative or zero arguments |
|
154 |
- if(any(ConfigNums <= 0)) |
|
155 |
- { |
|
156 |
- stop("Error in gapsMapRun: Numeric Arguments cannot be non-zero!") |
|
157 |
- } |
|
158 |
- |
|
159 |
- #Check for nonsensical inputs (such as numSnapshots < nEquil or nSample) |
|
160 |
- if((numSnapshots > nEquil) || (numSnapshots > nSample)) |
|
161 |
- { |
|
162 |
- stop("Error in gapsMapRun: Cannot have more snapshots of A and P than equilibration and/or sampling iterations.") |
|
163 |
- } |
|
164 |
- |
|
165 |
- if((nOutR > nEquil) || (nOutR > nSample)) |
|
166 |
- { |
|
167 |
- stop("Error in gapsMapRun: Cannot have more output steps than equilibration and/or sampling iterations.") |
|
168 |
- } |
|
169 |
- |
|
170 |
- if(ncol(FP) != ncol(D)) |
|
171 |
- { |
|
172 |
- stop("Error in gapsMapRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.") |
|
173 |
- } |
|
174 |
- |
|
175 |
- if(nFactor < (nrow(FP))) |
|
176 |
- { |
|
177 |
- stop("Error in gapsMapRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.") |
|
178 |
- } |
|
179 |
- |
|
180 |
- if(nFactor > (ncol(D))) |
|
181 |
- { |
|
182 |
- warning("Warning in gapsMapRun: Number of requested patterns greater than columns of Data Matrix.") |
|
183 |
- } |
|
184 |
- |
|
185 |
- |
|
186 |
- |
|
187 |
- # P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass |
|
188 |
- # } else { |
|
189 |
- # Config = c(nFactor, simulation_id, nEquil, nSample, nOutR, |
|
190 |
- # output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor, |
|
191 |
- # alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1) |
|
192 |
- |
|
193 |
- # } |
|
194 |
- |
|
195 |
- geneNames = rownames(D); |
|
196 |
- sampleNames = colnames(D); |
|
197 |
- |
|
198 |
- # label patterns as Patt N |
|
199 |
- patternNames = c("0"); |
|
200 |
- for(i in 1:nFactor) |
|
201 |
- { |
|
202 |
- patternNames[i] = paste('Patt', i); |
|
203 |
- } |
|
204 |
- |
|
205 |
- # call to C++ Rcpp code |
|
206 |
- cogapResult = cogapsMap(D, S, FP, ABins, PBins, Config, ConfigNums, seed, |
|
207 |
- messages); |
|
208 |
- |
|
209 |
- # convert returned files to matrices to simplify visualization and processing |
|
210 |
- cogapResult$Amean = as.matrix(cogapResult$Amean); |
|
211 |
- cogapResult$Asd = as.matrix(cogapResult$Asd); |
|
212 |
- cogapResult$Pmean = as.matrix(cogapResult$Pmean); |
|
213 |
- cogapResult$Psd = as.matrix(cogapResult$Psd); |
|
214 |
- |
|
215 |
- # label matrices |
|
216 |
- colnames(cogapResult$Amean) = patternNames; |
|
217 |
- rownames(cogapResult$Amean) = geneNames; |
|
218 |
- colnames(cogapResult$Asd) = patternNames; |
|
219 |
- rownames(cogapResult$Asd) = geneNames; |
|
220 |
- colnames(cogapResult$Pmean) = sampleNames; |
|
221 |
- rownames(cogapResult$Pmean) = patternNames; |
|
222 |
- colnames(cogapResult$Psd) = sampleNames; |
|
223 |
- rownames(cogapResult$Psd) = patternNames; |
|
224 |
- |
|
225 |
- # calculate chi-squared of mean, this should be smaller than individual |
|
226 |
- # chi-squared sample values if sampling is good |
|
227 |
- calcChiSq = c(0); |
|
228 |
- MMatrix = (cogapResult$Amean %*% cogapResult$Pmean); |
|
229 |
- |
|
230 |
- |
|
231 |
- for(i in 1:(nrow(MMatrix))) |
|
232 |
- { |
|
233 |
- for(j in 1:(ncol(MMatrix))) |
|
234 |
- { |
|
235 |
- calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2); |
|
236 |
- } |
|
237 |
- } |
|
238 |
- |
|
239 |
- cogapResult = c(cogapResult, calcChiSq); |
|
240 |
- |
|
241 |
- |
|
242 |
- names(cogapResult)[12] = "meanChi2"; |
|
243 |
- |
|
244 |
- message(paste("Chi-Squared of Mean:",calcChiSq)) |
|
245 |
- |
|
246 |
- return(cogapResult); |
|
66 |
+ gapsRun(D, S, ABins, PBins, nFactor, simulation_id, nEquil, nSample, nOutR, output_atomic, fixedBinProbs, |
|
67 |
+ fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, alphaP, nMaxP, |
|
68 |
+ max_gibbmass_paraP, seed, messages, FALSE, FP, 'P') |
|
247 | 69 |
} |
248 | 70 |
deleted file mode 100755 |
... | ... |
@@ -1,234 +0,0 @@ |
1 |
- |
|
2 |
-# gapsMapTestRun: function to call C++ cogaps code |
|
3 |
-# History: v1.0 CK with MFO edits, August 2014 |
|
4 |
- |
|
5 |
-# Inputs: D - data matrix |
|
6 |
-# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
7 |
-# FP - fixed A columns or P rows |
|
8 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
9 |
-# simulation_id - name to attach to atoms files if created |
|
10 |
-# nEquil - number of iterations for burn-in |
|
11 |
-# nSample - number of iterations for sampling |
|
12 |
-# nOutR - how often to print status into R by iterations |
|
13 |
-# output_atomic - whether to write atom files (large) |
|
14 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
15 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
16 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
17 |
- |
|
18 |
- |
|
19 |
-# Output: list with A and P matrix estimates, chi-squared and atom |
|
20 |
-# numbers of sample by iteration, and chi-squared of mean |
|
21 |
- |
|
22 |
-#'\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian |
|
23 |
-#'matrix factorization returning the two matrices that reconstruct |
|
24 |
-#'the data matrix; as opposed to gapsRun, this method takes an |
|
25 |
-#'additional input specifying set patterns in the P matrix |
|
26 |
-#' |
|
27 |
-#'@param D data matrix |
|
28 |
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
29 |
-#'@param FP data.frame with rows giving fixed patterns for P |
|
30 |
-#'@param ABins a matrix of same size as A which gives relative |
|
31 |
-#' probability of that element being non-zero |
|
32 |
-#'@param PBins a matrix of same size as P which gives relative |
|
33 |
-#' probability of that element being non-zero |
|
34 |
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
35 |
-#'greater than or equal to the number of rows of FP |
|
36 |
-#'@param simulation_id name to attach to atoms files if created |
|
37 |
-#'@param nEquil number of iterations for burn-in |
|
38 |
-#'@param nSample number of iterations for sampling |
|
39 |
-#'@param nOutR how often to print status into R by iterations |
|
40 |
-#'@param output_atomic whether to write atom files (large) |
|
41 |
-#'@param fixedMatrix character indicating whether A or P matrix |
|
42 |
-#' has fixed columns or rows respectively |
|
43 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
44 |
-#' given in Abins and Pbins |
|
45 |
-#'@param fixedDomain character to indicate whether A or P is |
|
46 |
-#' domain for relative probabilities |
|
47 |
-#'@param alphaA sparsity parameter for A domain |
|
48 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
49 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
50 |
-#'@param alphaP sparsity parameter for P domain |
|
51 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
52 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
53 |
-#'@export |
|
54 |
- |
|
55 |
-gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
|
56 |
- nFactor = 7, simulation_id = "simulation", |
|
57 |
- nEquil = 1000, nSample = 1000, nOutR = 1000, |
|
58 |
- output_atomic = FALSE, fixedMatrix = "P", |
|
59 |
- fixedBinProbs = FALSE, fixedDomain = "N", |
|
60 |
- alphaA = 0.01, nMaxA = 100000, |
|
61 |
- max_gibbmass_paraA = 100.0, alphaP = 0.01, |
|
62 |
- nMaxP = 100000, max_gibbmass_paraP = 100.0) |
|
63 |
-{ |
|
64 |
- |
|
65 |
- #Begin data type error checking code |
|
66 |
- charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix)) |
|
67 |
- charCheck = c("simulation_id", "fixedDomain", "fixedMatrix") |
|
68 |
- |
|
69 |
- boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs)) |
|
70 |
- boolCheck = c("output_atomic", "fixedBinProbs") |
|
71 |
- |
|
72 |
- numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), |
|
73 |
- !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
74 |
- !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP)) |
|
75 |
- numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA", |
|
76 |
- "max_gibbmass_paraA", "alphaP", "nMaxP", "max_gibbmass_paraP") |
|
77 |
- |
|
78 |
- dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP)) |
|
79 |
- dataFrameCheck = c("D", "S", "ABins", "PBins", "FP") |
|
80 |
- |
|
81 |
- matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP)) |
|
82 |
- |
|
83 |
- if(any(charDataErrors)) |
|
84 |
- { |
|
85 |
- #Check which of these is not a string |
|
86 |
- for(i in 1:length(charDataErrors)) |
|
87 |
- { |
|
88 |
- if(charDataErrors[i] == TRUE) |
|
89 |
- { |
|
90 |
- stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
91 |
- } |
|
92 |
- } |
|
93 |
- } |
|
94 |
- |
|
95 |
- if(any(boolDataErrors)) |
|
96 |
- { |
|
97 |
- #Check which of these is not a boolean |
|
98 |
- for(i in 1:length(boolDataErrors)) |
|
99 |
- { |
|
100 |
- if(boolDataErrors[i] == TRUE) |
|
101 |
- { |
|
102 |
- stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
103 |
- } |
|
104 |
- } |
|
105 |
- } |
|
106 |
- |
|
107 |
- if(any(numericDataErrors)) |
|
108 |
- { |
|
109 |
- #Check which of these is not an integer/double |
|
110 |
- for(i in 1:length(numericDataErrors)) |
|
111 |
- { |
|
112 |
- if(numericDataErrors[i] == TRUE) |
|
113 |
- { |
|
114 |
- stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
115 |
- |
|
116 |
- } |
|
117 |
- } |
|
118 |
- } |
|
119 |
- |
|
120 |
- |
|
121 |
- #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame |
|
122 |
- if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5]))) |
|
123 |
- { |
|
124 |
- for(i in 1:length(dataFrameCheck)) |
|
125 |
- { |
|
126 |
- if((dataFrameErrors[i] && matrixErrors[i]) == TRUE) |
|
127 |
- { |
|
128 |
- stop(paste("Error in gapsRun: Argument", dataFrameCheck[i], "is not a matrix or data.frame. Please see documentation for details.")) |
|
129 |
- |
|
130 |
- } |
|
131 |
- } |
|
132 |
- } |
|
133 |
- |
|
134 |
- #Floor the parameters that are integers to prevent allowing doubles. |
|
135 |
- nFactor = floor(nFactor) |
|
136 |
- nEquil = floor(nEquil) |
|
137 |
- nSample = floor(nSample) |
|
138 |
- nOutR = floor(nOutR) |
|
139 |
- nMaxA = floor(nMaxA) |
|
140 |
- nMaxP = floor(nMaxP) |
|
141 |
- |
|
142 |
- # pass all settings to C++ within a list |
|
143 |
- # if (is.null(P)) { |
|
144 |
- Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix); |
|
145 |
- |
|
146 |
- ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
147 |
- alphaP, nMaxP, max_gibbmass_paraP); |
|
148 |
- |
|
149 |
- #Begin logic error checking code |
|
150 |
- |
|
151 |
- #Check for negative or zero arguments |
|
152 |
- if(any(ConfigNums <= 0)) |
|
153 |
- { |
|
154 |
- stop("Error in gapsRun: Numeric Arguments cannot be non-zero!") |
|
155 |
- } |
|
156 |
- |
|
157 |
- if((nOutR > nEquil) || (nOutR > nSample)) |
|
158 |
- { |
|
159 |
- stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.") |
|
160 |
- } |
|
161 |
- |
|
162 |
- if(ncol(FP) != ncol(D)) |
|
163 |
- { |
|
164 |
- stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.") |
|
165 |
- } |
|
166 |
- |
|
167 |
- if(nFactor < (nrow(FP))) |
|
168 |
- { |
|
169 |
- stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.") |
|
170 |
- } |
|
171 |
- |
|
172 |
- if(nFactor > (ncol(D))) |
|
173 |
- { |
|
174 |
- warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.") |
|
175 |
- } |
|
176 |
- |
|
177 |
- # P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass |
|
178 |
- # } else { |
|
179 |
- # Config = c(nFactor, simulation_id, nEquil, nSample, nOutR, |
|
180 |
- # output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor, |
|
181 |
- # alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1) |
|
182 |
- |
|
183 |
- # } |
|
184 |
- |
|
185 |
- geneNames = rownames(D); |
|
186 |
- sampleNames = colnames(D); |
|
187 |
- |
|
188 |
- # label patterns as Patt N |
|
189 |
- patternNames = c("0"); |
|
190 |
- for(i in 1:nFactor) |
|
191 |
- { |
|
192 |
- patternNames[i] = paste('Patt', i); |
|
193 |
- } |
|
194 |
- |
|
195 |
- # call to C++ Rcpp code |
|
196 |
- cogapResult = cogapsMapTest(D, S, FP, ABins, PBins, Config, ConfigNums); |
|
197 |
- |
|
198 |
- # convert returned files to matrices to simplify visualization and processing |
|
199 |
- cogapResult$Amean = as.matrix(cogapResult$Amean); |
|
200 |
- cogapResult$Asd = as.matrix(cogapResult$Asd); |
|
201 |
- cogapResult$Pmean = as.matrix(cogapResult$Pmean); |
|
202 |
- cogapResult$Psd = as.matrix(cogapResult$Psd); |
|
203 |
- |
|
204 |
- # label matrices |
|
205 |
- colnames(cogapResult$Amean) = patternNames; |
|
206 |
- rownames(cogapResult$Amean) = geneNames; |
|
207 |
- colnames(cogapResult$Asd) = patternNames; |
|
208 |
- rownames(cogapResult$Asd) = geneNames; |
|
209 |
- colnames(cogapResult$Pmean) = sampleNames; |
|
210 |
- rownames(cogapResult$Pmean) = patternNames; |
|
211 |
- colnames(cogapResult$Psd) = sampleNames; |
|
212 |
- rownames(cogapResult$Psd) = patternNames; |
|
213 |
- |
|
214 |
- # calculate chi-squared of mean, this should be smaller than individual |
|
215 |
- # chi-squared sample values if sampling is good |
|
216 |
- calcChiSq = c(0); |
|
217 |
- MMatrix = (cogapResult$Amean %*% cogapResult$Pmean); |
|
218 |
- |
|
219 |
- |
|
220 |
- for(i in 1:(nrow(MMatrix))) |
|
221 |
- { |
|
222 |
- for(j in 1:(ncol(MMatrix))) |
|
223 |
- { |
|
224 |
- calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2); |
|
225 |
- } |
|
226 |
- } |
|
227 |
- |
|
228 |
- cogapResult = c(cogapResult, calcChiSq); |
|
229 |
- names(cogapResult)[20] = "meanChi2"; |
|
230 |
- |
|
231 |
- message(paste("Chi-Squared of Mean:",calcChiSq)) |
|
232 |
- |
|
233 |
- return(cogapResult); |
|
234 |
-} |
... | ... |
@@ -50,6 +50,9 @@ |
50 | 50 |
#'@param seed Set seed for reproducibility. Positive values provide initial seed, negative values just use the time. |
51 | 51 |
#'@param messages Display progress messages |
52 | 52 |
#'@param singleCellRNASeq T/F indicating if the data is single cell RNA-seq data |
53 |
+#'@param fixedPatterns matrix of fixed values in either A or P matrix |
|
54 |
+#'@param whichMatrixFixed character to indicate whether A or P matrix |
|
55 |
+#' contains the fixed patterns |
|
53 | 56 |
#'@export |
54 | 57 |
|
55 | 58 |
#--CHANGES 1/20/15-- |
... | ... |
@@ -62,12 +65,29 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(), |
62 | 65 |
numSnapshots = 100, alphaA = 0.01, |
63 | 66 |
nMaxA = 100000, max_gibbmass_paraA = 100.0, |
64 | 67 |
alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0, |
65 |
- seed=-1, messages=TRUE, singleCellRNASeq=FALSE) |
|
68 |
+ seed=-1, messages=TRUE, singleCellRNASeq=FALSE, |
|
69 |
+ fixedPatterns = matrix(0), whichMatrixFixed = 'N') |
|
66 | 70 |
{ |
67 |
- #Floor the parameters that are integers to prevent allowing doubles. |
|
71 |
+ # Floor the parameters that are integers to prevent allowing doubles. |
|
68 | 72 |
nFactor <- floor(nFactor) |
69 | 73 |
nEquil <- floor(nEquil) |
70 | 74 |
nSample <- floor(nSample) |
75 |
+ nOutR <- floor(nOutR) |
|
76 |
+ numSnapshots <- floor(numSnapshots) |
|
77 |
+ nMaxA <- floor(nMaxA) |
|
78 |
+ nMaxP <- floor(nMaxP) |
|
79 |
+ |
|
80 |
+ # check the fixed patterns |
|
81 |
+ if ((whichMatrixFixed == 'A' & nrow(fixedPatterns) != nrow(D)) |
|
82 |
+ | (whichMatrixFixed == 'P' & nrow(fixedPatterns) > nFactor)) |
|
83 |
+ { |
|
84 |
+ stop('invalid number of rows in fixed pattern matrix') |
|
85 |
+ } |
|
86 |
+ if ((whichMatrixFixed == 'A' & ncol(fixedPatterns) > nFactor) |
|
87 |
+ | (whichMatrixFixed == 'P' & ncol(fixedPatterns) != ncol(D))) |
|
88 |
+ { |
|
89 |
+ stop('invalid number of cols in fixed pattern matrix') |
|
90 |
+ } |
|
71 | 91 |
|
72 | 92 |
geneNames <- rownames(D); |
73 | 93 |
sampleNames <- colnames(D); |
... | ... |
@@ -82,7 +102,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(), |
82 | 102 |
# call to C++ Rcpp code |
83 | 103 |
cogapResult <- cogaps(as.matrix(D), as.matrix(S), nFactor, alphaA, alphaP, |
84 | 104 |
nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, max_gibbmass_paraP, |
85 |
- seed, messages, singleCellRNASeq) |
|
105 |
+ fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq) |
|
86 | 106 |
|
87 | 107 |
# convert returned files to matrices to simplify visualization and processing |
88 | 108 |
cogapResult$Amean <- as.matrix(cogapResult$Amean); |
... | ... |
@@ -117,8 +137,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(), |
117 | 137 |
|
118 | 138 |
# names(cogapResult)[13] <- "meanChi2"; |
119 | 139 |
|
120 |
- #if (messages) |
|
121 |
- message(paste("Chi-Squared of Mean:", calcChiSq)) |
|
140 |
+ message(paste("Chi-Squared of Mean:", calcChiSq)) |
|
122 | 141 |
|
123 | 142 |
return(cogapResult); |
124 | 143 |
} |
125 | 144 |
deleted file mode 100755 |
... | ... |
@@ -1,216 +0,0 @@ |
1 |
- |
|
2 |
-# gapsTestRun: function to call C++ cogaps code |
|
3 |
-# History: v1.0 CK with MFO edits, August 2014 |
|
4 |
- |
|
5 |
-# Inputs: D - data matrix |
|
6 |
-# S - uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
7 |
-# nFactor - number of patterns (basis vectors, metagenes) |
|
8 |
-# simulation_id - name to attach to atoms files if created |
|
9 |
-# nEquil - number of iterations for burn-in |
|
10 |
-# nSample - number of iterations for sampling |
|
11 |
-# nOutR - how often to print status into R by iterations |
|
12 |
-# output_atomic - whether to write atom files (large) |
|
13 |
-# alphaA, alphaP - sparsity parameters for A and P domains |
|
14 |
-# max_gibbmass_paraA(P) - limit truncated normal to max size |
|
15 |
-# nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms |
|
16 |
- |
|
17 |
- |
|
18 |
-# Output: list with A and P matrix estimates, chi-squared and atom |
|
19 |
-# numbers of sample by iteration, and chi-squared of mean |
|
20 |
-#'\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian |
|
21 |
-#'matrix factorization returning the two matrices that reconstruct |
|
22 |
-#'the data matrix |
|
23 |
-#' |
|
24 |
-#'@param D data matrix |
|
25 |
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood) |
|
26 |
-#'@param ABins a matrix of same size as A which gives relative |
|
27 |
-#' probability of that element being non-zero |
|
28 |
-#'@param PBins a matrix of same size as P which gives relative |
|
29 |
-#' probability of that element being non-zero |
|
30 |
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be |
|
31 |
-#'greater than or equal to the number of rows of FP |
|
32 |
-#'@param simulation_id name to attach to atoms files if created |
|
33 |
-#'@param nEquil number of iterations for burn-in |
|
34 |
-#'@param nSample number of iterations for sampling |
|
35 |
-#'@param nOutR how often to print status into R by iterations |
|
36 |
-#'@param output_atomic whether to write atom files (large) |
|
37 |
-#'@param fixedBinProbs Boolean for using relative probabilities |
|
38 |
-#' given in Abins and Pbins |
|
39 |
-#'@param fixedDomain character to indicate whether A or P is |
|
40 |
-#' domain for relative probabilities |
|
41 |
-#'@param alphaA sparsity parameter for A domain |
|
42 |
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms |
|
43 |
-#'@param max_gibbmass_paraA limit truncated normal to max size |
|
44 |
-#'@param alphaP sparsity parameter for P domain |
|
45 |
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms |
|
46 |
-#'@param max_gibbmass_paraP limit truncated normal to max size |
|
47 |
-#'@export |
|
48 |
- |
|
49 |
-gapsTestRun <- function(D, S, ABins = data.frame(), PBins = data.frame(), |
|
50 |
- nFactor = 7, simulation_id = "simulation", nEquil = 1000, |
|
51 |
- nSample = 1000, nOutR = 1000, output_atomic = FALSE, |
|
52 |
- fixedBinProbs = FALSE, fixedDomain = "N", alphaA = 0.01, |
|
53 |
- nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, |
|
54 |
- nMaxP = 100000, max_gibbmass_paraP = 100.0) |
|
55 |
-{ |
|
56 |
- #Begin data type error checking code |
|
57 |
- charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain)) |
|
58 |
- charCheck = c("simulation_id", "fixedDomain") |
|
59 |
- |
|
60 |
- boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs)) |
|
61 |
- boolCheck = c("output_atomic", "fixedBinProbs") |
|
62 |
- |
|
63 |
- numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), |
|
64 |
- !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP), |
|
65 |
- !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP)) |
|
66 |
- numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA", |
|
67 |
- "max_gibbmass_paraA", "alphaP", "nMaxP", "max_gibbmass_paraP") |
|
68 |
- |
|
69 |
- dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins)) |
|
70 |
- dataFrameCheck = c("D", "S", "ABins", "PBins") |
|
71 |
- |
|
72 |
- matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins)) |
|
73 |
- |
|
74 |
- if(any(charDataErrors)) |
|
75 |
- { |
|
76 |
- #Check which of these is not a string |
|
77 |
- for(i in 1:length(charDataErrors)) |
|
78 |
- { |
|
79 |
- if(charDataErrors[i] == TRUE) |
|
80 |
- { |
|
81 |
- stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
82 |
- } |
|
83 |
- } |
|
84 |
- } |
|
85 |
- |
|
86 |
- if(any(boolDataErrors)) |
|
87 |
- { |
|
88 |
- #Check which of these is not a boolean |
|
89 |
- for(i in 1:length(boolDataErrors)) |
|
90 |
- { |
|
91 |
- if(boolDataErrors[i] == TRUE) |
|
92 |
- { |
|
93 |
- stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
94 |
- } |
|
95 |
- } |
|
96 |
- } |
|
97 |
- |
|
98 |
- if(any(numericDataErrors)) |
|
99 |
- { |
|
100 |
- #Check which of these is not an integer/double |
|
101 |
- for(i in 1:length(numericDataErrors)) |
|
102 |
- { |
|
103 |
- if(numericDataErrors[i] == TRUE) |
|
104 |
- { |
|
105 |
- stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details.")) |
|
106 |
- |
|
107 |
- } |
|
108 |
- } |
|
109 |
- } |
|
110 |
- |
|
111 |
- |
|
112 |
- #At least one of A, P, ABins, or PBins is not a matrix or data.frame |
|
113 |
- if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]))) |
|
114 |
- { |
|
115 |
- for(i in 1:length(dataFrameCheck)) |
|
116 |
- { |
|
117 |
- if((dataFrameErrors[i] && matrixErrors[i]) == TRUE) |
|
118 |
- { |
|
119 |
- stop(paste("Error in gapsRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details.")) |
|
120 |
- |
|
121 |
- } |
|
122 |
- } |
|
123 |
- } |
|
124 |
- |
|
125 |
- #Floor the parameters that are integers to prevent allowing doubles. |
|
126 |
- nFactor = floor(nFactor) |
|
127 |
- nEquil = floor(nEquil) |
|
128 |
- nSample = floor(nSample) |
|
129 |
- nOutR = floor(nOutR) |
|
130 |
- nMaxA = floor(nMaxA) |
|
131 |
- nMaxP = floor(nMaxP) |
|
132 |
- |
|
133 |
- # pass all settings to C++ within a list |
|
134 |
- # if (is.null(P)) { |
|
135 |
- Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain); |
|
136 |
- |
|
137 |
- ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA, |
|
138 |
- alphaP, nMaxP, max_gibbmass_paraP); |
|
139 |
- |
|
140 |
- |
|
141 |
- #Begin logic error checking code |
|
142 |
- |
|
143 |
- #Check for negative or zero arguments |
|
144 |
- if(any(ConfigNums <= 0)) |
|
145 |
- { |
|
146 |
- stop("Error in gapsRun: Numeric Arguments cannot be non-zero!") |
|
147 |
- } |
|
148 |
- |
|
149 |
- if((nOutR > nEquil) || (nOutR > nSample)) |
|
150 |
- { |
|
151 |
- stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.") |
|
152 |
- } |
|
153 |
- |
|
154 |
- if(nFactor > (ncol(D))) |
|
155 |
- { |
|
156 |
- warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.") |
|
157 |
- } |
|
158 |
- |
|
159 |
- # P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass |
|
160 |
- # } else { |
|
161 |
- # Config = c(nFactor, simulation_id, nEquil, nSample, nOutR, |
|
162 |
- # output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor, |
|
163 |
- # alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1) |
|
164 |
- |
|
165 |
- # } |
|
166 |
- |
|
167 |
- geneNames = rownames(D); |
|
168 |
- sampleNames = colnames(D); |
|
169 |
- |
|
170 |
- # label patterns as Patt N |
|
171 |
- patternNames = c("0"); |
|
172 |
- for(i in 1:nFactor) |
|
173 |
- { |
|
174 |
- patternNames[i] = paste('Patt', i); |
|
175 |
- } |
|
176 |
- |
|
177 |
- # call to C++ Rcpp code |
|
178 |
- cogapResult = cogapsTest(D, S, ABins, PBins, Config, ConfigNums); |
|
179 |
- |
|
180 |
- # convert returned files to matrices to simplify visualization and processing |
|
181 |
- cogapResult$Amean = as.matrix(cogapResult$Amean); |
|
182 |
- cogapResult$Asd = as.matrix(cogapResult$Asd); |
|
183 |
- cogapResult$Pmean = as.matrix(cogapResult$Pmean); |
|
184 |
- cogapResult$Psd = as.matrix(cogapResult$Psd); |
|
185 |
- |
|
186 |
- # label matrices |
|
187 |
- colnames(cogapResult$Amean) = patternNames; |
|
188 |
- rownames(cogapResult$Amean) = geneNames; |
|
189 |
- colnames(cogapResult$Asd) = patternNames; |
|
190 |
- rownames(cogapResult$Asd) = geneNames; |
|
191 |
- colnames(cogapResult$Pmean) = sampleNames; |
|
192 |
- rownames(cogapResult$Pmean) = patternNames; |
|
193 |
- colnames(cogapResult$Psd) = sampleNames; |
|
194 |
- rownames(cogapResult$Psd) = patternNames; |
|
195 |
- |
|
196 |
- # calculate chi-squared of mean, this should be smaller than individual |
|
197 |
- # chi-squared sample values if sampling is good |
|
198 |
- calcChiSq = c(0); |
|
199 |
- MMatrix = (cogapResult$Amean %*% cogapResult$Pmean); |
|
200 |
- |
|
201 |
- |
|
202 |
- for(i in 1:(nrow(MMatrix))) |
|
203 |
- { |
|
204 |
- for(j in 1:(ncol(MMatrix))) |
|
205 |
- { |
|
206 |
- calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2); |
|
207 |
- } |
|
208 |
- } |
|
209 |
- |
|
210 |
- cogapResult = c(cogapResult, calcChiSq); |
|
211 |
- names(cogapResult)[20] = "meanChi2"; |
|
212 |
- |
|
213 |
- message(paste("Chi-Squared of Mean:",calcChiSq)) |
|
214 |
- |
|
215 |
- return(cogapResult); |
|
216 |
-} |
... | ... |
@@ -8,7 +8,7 @@ the data matrix; as opposed to gapsRun, this method takes an |
8 | 8 |
additional input specifying set patterns in the P matrix} |
9 | 9 |
\usage{ |
10 | 10 |
gapsMapRun(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
11 |
- nFactor = 5, simulation_id = "simulation", nEquil = 1000, |
|
11 |
+ nFactor = 7, simulation_id = "simulation", nEquil = 1000, |
|
12 | 12 |
nSample = 1000, nOutR = 1000, output_atomic = FALSE, |
13 | 13 |
fixedMatrix = "P", fixedBinProbs = FALSE, fixedDomain = "N", |
14 | 14 |
sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01, |
15 | 15 |
deleted file mode 100755 |
... | ... |
@@ -1,70 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/gapsMapTestRun.R |
|
3 |
-\name{gapsMapTestRun} |
|
4 |
-\alias{gapsMapTestRun} |
|
5 |
-\title{\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian |
|
6 |
-matrix factorization returning the two matrices that reconstruct |
|
7 |
-the data matrix; as opposed to gapsRun, this method takes an |
|
8 |
-additional input specifying set patterns in the P matrix} |
|
9 |
-\usage{ |
|
10 |
-gapsMapTestRun(D, S, FP, ABins = data.frame(), PBins = data.frame(), |
|
11 |
- nFactor = 7, simulation_id = "simulation", nEquil = 1000, |
|
12 |
- nSample = 1000, nOutR = 1000, output_atomic = FALSE, |
|
13 |
- fixedMatrix = "P", fixedBinProbs = FALSE, fixedDomain = "N", |
|
14 |
- alphaA = 0.01, nMaxA = 1e+05, max_gibbmass_paraA = 100, alphaP = 0.01, |
|
15 |
- nMaxP = 1e+05, max_gibbmass_paraP = 100) |
|
16 |
-} |
|
17 |
-\arguments{ |
|
18 |
-\item{D}{data matrix} |
|
19 |
- |
|
20 |
-\item{S}{uncertainty matrix (std devs for chi-squared of Log Likelihood)} |
|
21 |
- |
|
22 |
-\item{FP}{data.frame with rows giving fixed patterns for P} |
|
23 |
- |
|
24 |
-\item{ABins}{a matrix of same size as A which gives relative |
|
25 |
-probability of that element being non-zero} |
|
26 |
- |
|
27 |
-\item{PBins}{a matrix of same size as P which gives relative |
|
28 |
-probability of that element being non-zero} |
|
29 |
- |
|
30 |
-\item{nFactor}{number of patterns (basis vectors, metagenes), which must be |
|
31 |
-greater than or equal to the number of rows of FP} |
|
32 |
- |
|
33 |
-\item{simulation_id}{name to attach to atoms files if created} |
|
34 |
- |
|
35 |
-\item{nEquil}{number of iterations for burn-in} |
|
36 |
- |
|
37 |
-\item{nSample}{number of iterations for sampling} |
|
38 |
- |
|
39 |
-\item{nOutR}{how often to print status into R by iterations} |
|
40 |
- |
|
41 |
-\item{output_atomic}{whether to write atom files (large)} |
|
42 |
- |
|
43 |
-\item{fixedMatrix}{character indicating whether A or P matrix |
|
44 |
-has fixed columns or rows respectively} |
|
45 |
- |
|
46 |
-\item{fixedBinProbs}{Boolean for using relative probabilities |
|
47 |
-given in Abins and Pbins} |
|
48 |
- |
|
49 |
-\item{fixedDomain}{character to indicate whether A or P is |
|
50 |
-domain for relative probabilities} |
|
51 |
- |
|
52 |
-\item{alphaA}{sparsity parameter for A domain} |
|
53 |
- |
|
54 |
-\item{nMaxA}{PRESENTLY UNUSED, future = limit number of atoms} |
|
55 |
- |
|
56 |
-\item{max_gibbmass_paraA}{limit truncated normal to max size} |
|
57 |
- |
|
58 |
-\item{alphaP}{sparsity parameter for P domain} |
|
59 |
- |
|
60 |
-\item{nMaxP}{PRESENTLY UNUSED, future = limit number of atoms} |
|
61 |
- |
|
62 |
-\item{max_gibbmass_paraP}{limit truncated normal to max size} |
|
63 |
-} |
|
64 |
-\description{ |
|
65 |
-\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian |
|
66 |
-matrix factorization returning the two matrices that reconstruct |
|
67 |
-the data matrix; as opposed to gapsRun, this method takes an |
|
68 |
-additional input specifying set patterns in the P matrix |
|
69 |
-} |
|
70 |
- |
... | ... |
@@ -12,7 +12,8 @@ gapsRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7, |
12 | 12 |
fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, |
13 | 13 |
alphaA = 0.01, nMaxA = 1e+05, max_gibbmass_paraA = 100, alphaP = 0.01, |
14 | 14 |
nMaxP = 1e+05, max_gibbmass_paraP = 100, seed = -1, messages = TRUE, |
15 |
- singleCellRNASeq = FALSE) |
|
15 |
+ singleCellRNASeq = FALSE, fixedPatterns = matrix(0), |
|
16 |
+ whichMatrixFixed = "N") |
|
16 | 17 |
} |
17 | 18 |
\arguments{ |
18 | 19 |
\item{D}{data matrix} |
... | ... |
@@ -66,6 +67,11 @@ individual samples from Markov chain during sampling} |
66 | 67 |
\item{messages}{Display progress messages} |
67 | 68 |
|
68 | 69 |
\item{singleCellRNASeq}{T/F indicating if the data is single cell RNA-seq data} |
70 |
+ |
|
71 |
+\item{fixedPatterns}{matrix of fixed values in either A or P matrix} |
|
72 |
+ |
|
73 |
+\item{whichMatrixFixed}{character to indicate whether A or P matrix |
|
74 |
+contains the fixed patterns} |
|
69 | 75 |
} |
70 | 76 |
\description{ |
71 | 77 |
\code{gapsRun} calls the C++ MCMC code and performs Bayesian |
72 | 78 |
deleted file mode 100755 |
... | ... |
@@ -1,63 +0,0 @@ |
1 |
-% Generated by roxygen2: do not edit by hand |
|
2 |
-% Please edit documentation in R/gapsTestRun.R |
|
3 |
-\name{gapsTestRun} |
|
4 |
-\alias{gapsTestRun} |
|
5 |
-\title{\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian |
|
6 |
-matrix factorization returning the two matrices that reconstruct |
|
7 |
-the data matrix} |
|
8 |
-\usage{ |
|
9 |
-gapsTestRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7, |
|
10 |
- simulation_id = "simulation", nEquil = 1000, nSample = 1000, |
|
11 |
- nOutR = 1000, output_atomic = FALSE, fixedBinProbs = FALSE, |
|
12 |
- fixedDomain = "N", alphaA = 0.01, nMaxA = 1e+05, |
|
13 |
- max_gibbmass_paraA = 100, alphaP = 0.01, nMaxP = 1e+05, |
|
14 |
- max_gibbmass_paraP = 100) |
|
15 |
-} |
|
16 |
-\arguments{ |
|
17 |
-\item{D}{data matrix} |
|
18 |
- |
|
19 |
-\item{S}{uncertainty matrix (std devs for chi-squared of Log Likelihood)} |
|
20 |
- |
|
21 |
-\item{ABins}{a matrix of same size as A which gives relative |
|
22 |
-probability of that element being non-zero} |
|
23 |
- |
|
24 |
-\item{PBins}{a matrix of same size as P which gives relative |
|
25 |
-probability of that element being non-zero} |
|
26 |
- |
|
27 |
-\item{nFactor}{number of patterns (basis vectors, metagenes), which must be |
|
28 |
-greater than or equal to the number of rows of FP} |
|
29 |
- |
|
30 |
-\item{simulation_id}{name to attach to atoms files if created} |
|
31 |
- |
|
32 |
-\item{nEquil}{number of iterations for burn-in} |
|
33 |
- |
|
34 |
-\item{nSample}{number of iterations for sampling} |
|
35 |
- |
|
36 |
-\item{nOutR}{how often to print status into R by iterations} |
|
37 |
- |
|
38 |
-\item{output_atomic}{whether to write atom files (large)} |
|
39 |
- |
|
40 |
-\item{fixedBinProbs}{Boolean for using relative probabilities |
|
41 |
-given in Abins and Pbins} |
|
42 |
- |
|
43 |
-\item{fixedDomain}{character to indicate whether A or P is |
|
44 |
-domain for relative probabilities} |
|
45 |
- |
|
46 |
-\item{alphaA}{sparsity parameter for A domain} |
|
47 |
- |
|
48 |
-\item{nMaxA}{PRESENTLY UNUSED, future = limit number of atoms} |
|
49 |
- |
|
50 |
-\item{max_gibbmass_paraA}{limit truncated normal to max size} |
|
51 |
- |
|
52 |
-\item{alphaP}{sparsity parameter for P domain} |
|
53 |
- |
|
54 |
-\item{nMaxP}{PRESENTLY UNUSED, future = limit number of atoms} |
|
55 |
- |
|
56 |
-\item{max_gibbmass_paraP}{limit truncated normal to max size} |
|
57 |
-} |
|
58 |
-\description{ |
|
59 |
-\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian |
|
60 |
-matrix factorization returning the two matrices that reconstruct |
|
61 |
-the data matrix |
|
62 |
-} |
|
63 |
- |
... | ... |
@@ -100,6 +100,24 @@ const RowMatrix &meanMat, unsigned nUpdates) |
100 | 100 |
return retMat; |
101 | 101 |
} |
102 | 102 |
|
103 |
+// horribly slow, don't call often |
|
104 |
+void gaps::algo::matrixMultiplication(TwoWayMatrix &C, const ColMatrix &A, |
|
105 |
+const RowMatrix &B) |
|
106 |
+{ |
|
107 |
+ for (unsigned i = 0; i < C.nRow(); ++i) |
|
108 |
+ { |
|
109 |
+ for (unsigned j = 0; j < C.nCol(); ++j) |
|
110 |
+ { |
|
111 |
+ double sum = 0.0; |
|
112 |
+ for (unsigned k = 0; k < A.nCol(); ++k) |
|
113 |
+ { |
|
114 |
+ sum += A(i,k) * B(k,j); |
|
115 |
+ } |
|
116 |
+ C.set(i, j, sum); |
|
117 |
+ } |
|
118 |
+ } |
|
119 |
+} |
|
120 |
+ |
|
103 | 121 |
Vector gaps::algo::scalarMultiple(const Vector &vec, double n) |
104 | 122 |
{ |
105 | 123 |
Vector retVec(vec.size()); |
... | ... |
@@ -133,6 +151,16 @@ ColMatrix gaps::algo::scalarDivision(const ColMatrix &mat, double n) |
133 | 151 |
return retMat; |
134 | 152 |
} |
135 | 153 |
|
154 |
+Vector gaps::algo::scalarDivision(const Vector &vec, matrix_data_t n) |
|
155 |
+{ |
|
156 |
+ Vector temp(vec.size()); |
|
157 |
+ for (unsigned i = 0; i < vec.size(); ++i) |
|
158 |
+ { |
|
159 |
+ temp(i) = vec(i) / n; |
|
160 |
+ } |
|
161 |
+ return temp; |
|
162 |
+} |
|
163 |
+ |
|
136 | 164 |
RowMatrix gaps::algo::scalarDivision(const RowMatrix &mat, double n) |
137 | 165 |
{ |
138 | 166 |
RowMatrix retMat(mat.nRow(), mat.nCol()); |
... | ... |
@@ -33,6 +33,9 @@ namespace algo |
33 | 33 |
matrix_data_t mean(const TwoWayMatrix &mat); |
34 | 34 |
matrix_data_t nonZeroMean(const TwoWayMatrix &mat); |
35 | 35 |
|
36 |
+ void matrixMultiplication(TwoWayMatrix &C, const ColMatrix &A, |
|
37 |
+ const RowMatrix &B); |
|
38 |
+ |
|
36 | 39 |
ColMatrix computeStdDev(const ColMatrix &stdMat, const ColMatrix &meanMat, |
37 | 40 |
unsigned nUpdates); |
38 | 41 |
|
... | ... |
@@ -19,7 +19,8 @@ Vector& pAtomVec, GapsPhase phase); |
19 | 19 |
Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, |
20 | 20 |
unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, |
21 | 21 |
unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, |
22 |
-double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=false) |
|
22 |
+double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, |
|
23 |
+char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq) |
|
23 | 24 |
{ |
24 | 25 |
// set seed |
25 | 26 |
uint32_t seedUsed = static_cast<uint32_t>(seed); |
... | ... |
@@ -34,7 +35,8 @@ double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=fa |
34 | 35 |
|
35 | 36 |
// create the gibbs sampler |
36 | 37 |
GibbsSampler sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP, |
37 |
- maxGibbsMassA, maxGibbsMassP, singleCellRNASeq); |
|
38 |
+ maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns, |
|
39 |
+ whichMatrixFixed); |
|
38 | 40 |
|
39 | 41 |
// initial number of iterations for each matrix |
40 | 42 |
unsigned nIterA = 10; |
... | ... |
@@ -63,11 +65,15 @@ double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=fa |
63 | 65 |
chi2Vec.concat(chi2VecSample); |
64 | 66 |
|
65 | 67 |
//Just leave the snapshots as empty lists |
68 |
+ Rcpp::List ASnapR = Rcpp::List::create(); |
|
69 |
+ Rcpp::List PSnapR = Rcpp::List::create(); |
|
66 | 70 |
return Rcpp::List::create( |
67 | 71 |
Rcpp::Named("Amean") = sampler.AMeanRMatrix(), |
68 | 72 |
Rcpp::Named("Asd") = sampler.AStdRMatrix(), |
69 | 73 |
Rcpp::Named("Pmean") = sampler.PMeanRMatrix(), |
70 | 74 |
Rcpp::Named("Psd") = sampler.PStdRMatrix(), |
75 |
+ Rcpp::Named("ASnapshots") = ASnapR, |
|
76 |
+ Rcpp::Named("PSnapshots") = PSnapR, |
|
71 | 77 |
Rcpp::Named("atomsAEquil") = nAtomsAEquil.rVec(), |
72 | 78 |
Rcpp::Named("atomsASamp") = nAtomsASample.rVec(), |
73 | 79 |
Rcpp::Named("atomsPEquil") = nAtomsPEquil.rVec(), |
... | ... |
@@ -7,7 +7,8 @@ static const double EPSILON = 1.e-10; |
7 | 7 |
|
8 | 8 |
GibbsSampler::GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S, |
9 | 9 |
unsigned int nFactor, double alphaA, double alphaP, double maxGibbsMassA, |
10 |
-double maxGibbsMassP, bool singleCellRNASeq) |
|
10 |
+double maxGibbsMassP, bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat, |
|
11 |
+char whichMat) |
|
11 | 12 |
: |
12 | 13 |
mDMatrix(D), mSMatrix(S), mAMatrix(D.nrow(), nFactor), |
13 | 14 |
mPMatrix(nFactor, D.ncol()), mAPMatrix(D.nrow(), D.ncol()), |
... | ... |
@@ -16,7 +17,7 @@ mMaxGibbsMassA(maxGibbsMassA), mMaxGibbsMassP(maxGibbsMassP), |
16 | 17 |
mAnnealingTemp(1.0), mChi2(0.0), mSingleCellRNASeq(singleCellRNASeq), |
17 | 18 |
mAMeanMatrix(D.nrow(), nFactor), mAStdMatrix(D.nrow(), nFactor), |
18 | 19 |
mPMeanMatrix(nFactor, D.ncol()), mPStdMatrix(nFactor, D.ncol()), |
19 |
-mStatUpdates(0) |
|
20 |
+mStatUpdates(0), mFixedMat(whichMat) |
|
20 | 21 |
{ |
21 | 22 |
double meanD = mSingleCellRNASeq ? gaps::algo::nonZeroMean(mDMatrix) |
22 | 23 |
: gaps::algo::mean(mDMatrix); |
... | ... |
@@ -29,6 +30,27 @@ mStatUpdates(0) |
29 | 30 |
mMaxGibbsMassA /= mADomain.lambda(); |
30 | 31 |
mMaxGibbsMassP /= mPDomain.lambda(); |
31 | 32 |
|
33 |
+ if (mFixedMat == 'A') |
|
34 |
+ { |
|
35 |
+ mNumFixedPatterns = fixedPat.ncol(); |
|
36 |
+ ColMatrix temp(fixedPat); |
|
37 |
+ for (unsigned j = 0; j < mNumFixedPatterns; ++j) |
|
38 |
+ { |
|
39 |
+ mAMatrix.getCol(j) = gaps::algo::scalarDivision(temp.getCol(j), |
|
40 |
+ gaps::algo::sum(temp.getCol(j))); |
|
41 |
+ } |
|
42 |
+ } |
|
43 |
+ else if (mFixedMat == 'P') |
|
44 |
+ { |
|
45 |
+ mNumFixedPatterns = fixedPat.nrow(); |
|
46 |
+ RowMatrix temp(fixedPat); |
|
47 |
+ for (unsigned i = 0; i < mNumFixedPatterns; ++i) |
|
48 |
+ { |
|
49 |
+ mPMatrix.getRow(i) = gaps::algo::scalarDivision(temp.getRow(i), |
|
50 |
+ gaps::algo::sum(temp.getRow(i))); |
|
51 |
+ } |
|
52 |
+ } |
|
53 |
+ gaps::algo::matrixMultiplication(mAPMatrix, mAMatrix, mPMatrix); |
|
32 | 54 |
mChi2 = 2.0 * gaps::algo::loglikelihood(mDMatrix, mSMatrix, mAPMatrix); |
33 | 55 |
} |
34 | 56 |
|
... | ... |
@@ -93,6 +115,20 @@ void GibbsSampler::update(char matrixLabel) |
93 | 115 |
// get proposal and convert to a matrix update |
94 | 116 |
AtomicProposal proposal = domain.makeProposal(); |
95 | 117 |
|
118 |
+ // check if proposal is in fixed region |
|
119 |
+ if (mFixedMat == 'A' || mFixedMat == 'P') |
|
120 |
+ { |
|
121 |
+ MatrixChange change = domain.getMatrixChange(proposal); |
|
122 |
+ unsigned index1 = mFixedMat == 'A' ? change.col1 : change.row1; |
|
123 |
+ unsigned index2 = mFixedMat == 'A' ? change.col2 : change.row2; |
|
124 |
+ index2 = change.nChanges == 1 ? mNumFixedPatterns : index2; |
|
125 |
+ |
|
126 |
+ if (index1 < mNumFixedPatterns || index2 < mNumFixedPatterns) |
|
127 |
+ { |
|
128 |
+ return; |
|
129 |
+ } |
|
130 |
+ } |
|
131 |
+ |
|
96 | 132 |
// Update based on the proposal type |
97 | 133 |
switch (proposal.type) |
98 | 134 |
{ |
... | ... |
@@ -31,6 +31,9 @@ public: |
31 | 31 |
|
32 | 32 |
bool mSingleCellRNASeq; |
33 | 33 |
|
34 |
+ unsigned mNumFixedPatterns; |
|
35 |
+ char mFixedMat; |
|
36 |
+ |
|
34 | 37 |
bool death(AtomicSupport &domain, AtomicProposal &proposal); |
35 | 38 |
bool birth(AtomicSupport &domain, AtomicProposal &proposal); |
36 | 39 |
bool move(AtomicSupport &domain, AtomicProposal &proposal); |
... | ... |
@@ -54,7 +57,7 @@ public: |
54 | 57 |
|
55 | 58 |
GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S, unsigned nFactor, |
56 | 59 |
double alphaA, double alphaP, double maxGibbsMassA, double maxGibbsMassP, |
57 |
- bool singleCellRNASeq); |
|
60 |
+ bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat, char whichMat); |
|
58 | 61 |
|
59 | 62 |
void update(char matrixLabel); |
60 | 63 |
|
... | ... |
@@ -6,8 +6,8 @@ |
6 | 6 |
using namespace Rcpp; |
7 | 7 |
|
8 | 8 |
// cogaps |
9 |
-Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, int seed, bool messages, bool singleCellRNASeq); |
|
10 |
-RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP) { |
|
9 |
+Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq); |
|
10 |
+RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP) { |
|
11 | 11 |
BEGIN_RCPP |
12 | 12 |
Rcpp::RObject rcpp_result_gen; |
13 | 13 |
Rcpp::RNGScope rcpp_rngScope_gen; |
... | ... |
@@ -21,10 +21,12 @@ BEGIN_RCPP |
21 | 21 |
Rcpp::traits::input_parameter< unsigned >::type nSample(nSampleSEXP); |
22 | 22 |
Rcpp::traits::input_parameter< double >::type maxGibbsMassA(maxGibbsMassASEXP); |
23 | 23 |
Rcpp::traits::input_parameter< double >::type maxGibbsMassP(maxGibbsMassPSEXP); |
24 |
+ Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type fixedPatterns(fixedPatternsSEXP); |
|
25 |
+ Rcpp::traits::input_parameter< char >::type whichMatrixFixed(whichMatrixFixedSEXP); |
|
24 | 26 |
Rcpp::traits::input_parameter< int >::type seed(seedSEXP); |
25 | 27 |
Rcpp::traits::input_parameter< bool >::type messages(messagesSEXP); |
26 | 28 |
Rcpp::traits::input_parameter< bool >::type singleCellRNASeq(singleCellRNASeqSEXP); |
27 |
- rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed, messages, singleCellRNASeq)); |
|
29 |
+ rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq)); |
|
28 | 30 |
return rcpp_result_gen; |
29 | 31 |
END_RCPP |
30 | 32 |
} |
... | ... |
@@ -40,7 +42,7 @@ END_RCPP |
40 | 42 |
} |
41 | 43 |
|
42 | 44 |
static const R_CallMethodDef CallEntries[] = { |
43 |
- {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 13}, |
|
45 |
+ {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 15}, |
|
44 | 46 |
{"_CoGAPS_run_catch_unit_tests", (DL_FUNC) &_CoGAPS_run_catch_unit_tests, 0}, |
45 | 47 |
{NULL, NULL, 0} |
46 | 48 |
}; |