Browse code

added back option for fixed patterns

sherman5 authored on 02/01/2018 18:13:00
Showing16 changed files

... ...
@@ -8,9 +8,7 @@ export(calcGeneGSStat)
8 8
 export(calcZ)
9 9
 export(createGWCoGAPSSets)
10 10
 export(gapsMapRun)
11
-export(gapsMapTestRun)
12 11
 export(gapsRun)
13
-export(gapsTestRun)
14 12
 export(generateSeeds)
15 13
 export(patternMarkers)
16 14
 export(patternMatch4Parallel)
... ...
@@ -1,8 +1,8 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed = -1L, messages = FALSE, singleCellRNASeq = FALSE) {
5
-    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed, messages, singleCellRNASeq)
4
+cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq) {
5
+    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq)
6 6
 }
7 7
 
8 8
 run_catch_unit_tests <- function() {
... ...
@@ -57,191 +57,13 @@
57 57
 #'@param messages Display progress messages
58 58
 #'@export
59 59
 
60
-gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 5, simulation_id = "simulation",
60
+gapsMapRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(), nFactor = 7, simulation_id = "simulation",
61 61
                        nEquil = 1000, nSample = 1000, nOutR = 1000, output_atomic = FALSE, fixedMatrix = "P",
62 62
                        fixedBinProbs = FALSE, fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01,
63 63
                        nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0,
64 64
                        seed=-1, messages=TRUE)
65 65
 {
66
-  #Begin data type error checking code
67
-  charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix))
68
-  charCheck = c("simulation_id", "fixedDomain", "fixedMatrix")
69
-
70
-  boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs), !is.logical(sampleSnapshots))
71
-  boolCheck = c("output_atomic", "fixedBinProbs", "sampleSnapshots")
72
-
73
-  numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR), !is.numeric(numSnapshots),
74
-                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP),
75
-                        !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP))
76
-  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "numSnapshots", "alphaA", "nMaxA",
77
-                   "max_gibbmass_paraA", "alphaP",    "nMaxP", "max_gibbmass_paraP")
78
-
79
-  dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP))
80
-  dataFrameCheck = c("D", "S", "ABins", "PBins", "FP")
81
-
82
-  matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP))
83
-
84
-  if(any(charDataErrors))
85
-  {
86
-    #Check which of these is not a string
87
-    for(i in 1:length(charDataErrors))
88
-    {
89
-      if(charDataErrors[i] == TRUE)
90
-      {
91
-        stop(paste("Error in gapsMapRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details."))
92
-      }
93
-    }
94
-  }
95
-
96
-  if(any(boolDataErrors))
97
-  {
98
-    #Check which of these is not a boolean
99
-    for(i in 1:length(boolDataErrors))
100
-    {
101
-      if(boolDataErrors[i] == TRUE)
102
-      {
103
-        stop(paste("Error in gapsMapRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details."))
104
-      }
105
-    }
106
-  }
107
-
108
-  if(any(numericDataErrors))
109
-  {
110
-    #Check which of these is not an integer/double
111
-    for(i in 1:length(numericDataErrors))
112
-    {
113
-      if(numericDataErrors[i] == TRUE)
114
-      {
115
-        stop(paste("Error in gapsMapRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details."))
116
-
117
-      }
118
-    }
119
-  }
120
-
121
-
122
-  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame
123
-  if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5])))
124
-  {
125
-    for(i in 1:length(dataFrameCheck))
126
-    {
127
-      if((dataFrameErrors[i] && matrixErrors[i]) == TRUE)
128
-      {
129
-        stop(paste("Error in gapsMapRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details."))
130
-
131
-      }
132
-    }
133
-  }
134
-
135
-  #Floor the parameters that are integers to prevent allowing doubles.
136
-  nFactor = floor(nFactor)
137
-  nEquil = floor(nEquil)
138
-  nSample = floor(nSample)
139
-  nOutR = floor(nOutR)
140
-  numSnapshots = floor(numSnapshots)
141
-  nMaxA = floor(nMaxA)
142
-  nMaxP = floor(nMaxP)
143
-
144
-  # pass all settings to C++ within a list
145
-  #    if (is.null(P)) {
146
-  Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix, sampleSnapshots);
147
-
148
-  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA,
149
-                 alphaP, nMaxP, max_gibbmass_paraP, numSnapshots);
150
-
151
-  #Begin logic error checking code
152
-
153
-  #Check for negative or zero arguments
154
-  if(any(ConfigNums <= 0))
155
-  {
156
-    stop("Error in gapsMapRun: Numeric Arguments cannot be non-zero!")
157
-  }
158
-
159
-  #Check for nonsensical inputs (such as numSnapshots < nEquil or nSample)
160
-  if((numSnapshots > nEquil) || (numSnapshots > nSample))
161
-  {
162
-    stop("Error in gapsMapRun: Cannot have more snapshots of A and P than equilibration and/or sampling iterations.")
163
-  }
164
-
165
-  if((nOutR > nEquil) || (nOutR > nSample))
166
-  {
167
-    stop("Error in gapsMapRun: Cannot have more output steps than equilibration and/or sampling iterations.")
168
-  }
169
-
170
-  if(ncol(FP) != ncol(D))
171
-  {
172
-    stop("Error in gapsMapRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.")
173
-  }
174
-
175
-  if(nFactor < (nrow(FP)))
176
-  {
177
-    stop("Error in gapsMapRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.")
178
-  }
179
-
180
-  if(nFactor > (ncol(D)))
181
-  {
182
-    warning("Warning in gapsMapRun: Number of requested patterns greater than columns of Data Matrix.")
183
-  }
184
-
185
-
186
-
187
-  #        P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass
188
-  #    } else {
189
-  #        Config = c(nFactor, simulation_id, nEquil, nSample, nOutR,
190
-  #        output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor,
191
-  #        alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1)
192
-
193
-  #   }
194
-
195
-  geneNames = rownames(D);
196
-  sampleNames = colnames(D);
197
-
198
-  # label patterns as Patt N
199
-  patternNames = c("0");
200
-  for(i in 1:nFactor)
201
-  {
202
-    patternNames[i] = paste('Patt', i);
203
-  }
204
-
205
-  # call to C++ Rcpp code
206
-  cogapResult = cogapsMap(D, S, FP, ABins, PBins, Config, ConfigNums, seed,
207
-                          messages);
208
-
209
-  # convert returned files to matrices to simplify visualization and processing
210
-  cogapResult$Amean = as.matrix(cogapResult$Amean);
211
-  cogapResult$Asd = as.matrix(cogapResult$Asd);
212
-  cogapResult$Pmean = as.matrix(cogapResult$Pmean);
213
-  cogapResult$Psd = as.matrix(cogapResult$Psd);
214
-
215
-  # label matrices
216
-  colnames(cogapResult$Amean) = patternNames;
217
-  rownames(cogapResult$Amean) = geneNames;
218
-  colnames(cogapResult$Asd) = patternNames;
219
-  rownames(cogapResult$Asd) = geneNames;
220
-  colnames(cogapResult$Pmean) = sampleNames;
221
-  rownames(cogapResult$Pmean) = patternNames;
222
-  colnames(cogapResult$Psd) = sampleNames;
223
-  rownames(cogapResult$Psd) = patternNames;
224
-
225
-  # calculate chi-squared of mean, this should be smaller than individual
226
-  # chi-squared sample values if sampling is good
227
-  calcChiSq = c(0);
228
-  MMatrix = (cogapResult$Amean %*% cogapResult$Pmean);
229
-
230
-
231
-  for(i in 1:(nrow(MMatrix)))
232
-  {
233
-    for(j in 1:(ncol(MMatrix)))
234
-    {
235
-      calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2);
236
-    }
237
-  }
238
-
239
-  cogapResult = c(cogapResult, calcChiSq);
240
-
241
-
242
-  names(cogapResult)[12] = "meanChi2";
243
-
244
-  message(paste("Chi-Squared of Mean:",calcChiSq))
245
-
246
-  return(cogapResult);
66
+    gapsRun(D, S, ABins, PBins, nFactor, simulation_id, nEquil, nSample, nOutR, output_atomic, fixedBinProbs,
67
+        fixedDomain, sampleSnapshots, numSnapshots, alphaA, nMaxA, max_gibbmass_paraA, alphaP, nMaxP,
68
+        max_gibbmass_paraP, seed, messages, FALSE, FP, 'P')
247 69
 }
248 70
deleted file mode 100755
... ...
@@ -1,234 +0,0 @@
1
-
2
-# gapsMapTestRun: function to call C++ cogaps code
3
-# History: v1.0 CK with MFO edits, August 2014
4
-
5
-# Inputs: D - data matrix
6
-#         S - uncertainty matrix (std devs for chi-squared of Log Likelihood)
7
-#         FP - fixed A columns or P rows
8
-#         nFactor - number of patterns (basis vectors, metagenes)
9
-#         simulation_id - name to attach to atoms files if created
10
-#         nEquil - number of iterations for burn-in
11
-#         nSample - number of iterations for sampling
12
-#         nOutR - how often to print status into R by iterations
13
-#         output_atomic - whether to write atom files (large)
14
-#         alphaA, alphaP - sparsity parameters for A and P domains
15
-#         max_gibbmass_paraA(P) - limit truncated normal to max size
16
-#         nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms
17
-
18
-
19
-# Output: list with A and P matrix estimates, chi-squared and atom
20
-#         numbers of sample by iteration, and chi-squared of mean
21
-
22
-#'\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian
23
-#'matrix factorization returning the two matrices that reconstruct
24
-#'the data matrix; as opposed to gapsRun, this method takes an
25
-#'additional input specifying set patterns in the P matrix
26
-#'
27
-#'@param D data matrix
28
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
29
-#'@param FP data.frame with rows giving fixed patterns for P
30
-#'@param ABins a matrix of same size as A which gives relative
31
-#' probability of that element being non-zero
32
-#'@param PBins a matrix of same size as P which gives relative
33
-#' probability of that element being non-zero
34
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be
35
-#'greater than or equal to the number of rows of FP
36
-#'@param  simulation_id name to attach to atoms files if created
37
-#'@param  nEquil number of iterations for burn-in
38
-#'@param  nSample number of iterations for sampling
39
-#'@param  nOutR how often to print status into R by iterations
40
-#'@param  output_atomic whether to write atom files (large)
41
-#'@param  fixedMatrix character indicating whether A or P matrix
42
-#' has fixed columns or rows respectively
43
-#'@param fixedBinProbs Boolean for using relative probabilities
44
-#' given in Abins and Pbins
45
-#'@param fixedDomain character to indicate whether A or P is
46
-#' domain for relative probabilities
47
-#'@param alphaA sparsity parameter for A domain
48
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms
49
-#'@param max_gibbmass_paraA limit truncated normal to max size
50
-#'@param alphaP sparsity parameter for P domain
51
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms
52
-#'@param max_gibbmass_paraP limit truncated normal to max size
53
-#'@export
54
-
55
-gapsMapTestRun <- function(D, S, FP, ABins = data.frame(), PBins = data.frame(),
56
-                           nFactor = 7, simulation_id = "simulation",
57
-                           nEquil = 1000, nSample = 1000, nOutR = 1000,
58
-                           output_atomic = FALSE, fixedMatrix = "P",
59
-                           fixedBinProbs = FALSE, fixedDomain = "N",
60
-                           alphaA = 0.01,  nMaxA = 100000,
61
-                           max_gibbmass_paraA = 100.0, alphaP = 0.01,
62
-                           nMaxP = 100000, max_gibbmass_paraP = 100.0)
63
-{
64
-
65
-  #Begin data type error checking code
66
-  charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain), !is.character(fixedMatrix))
67
-  charCheck = c("simulation_id", "fixedDomain", "fixedMatrix")
68
-
69
-  boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs))
70
-  boolCheck = c("output_atomic", "fixedBinProbs")
71
-
72
-  numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR),
73
-                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP),
74
-                        !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP))
75
-  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA",
76
-                   "max_gibbmass_paraA", "alphaP",    "nMaxP", "max_gibbmass_paraP")
77
-
78
-  dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins), !is.data.frame(FP))
79
-  dataFrameCheck = c("D", "S", "ABins", "PBins", "FP")
80
-
81
-  matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins), !is.matrix(FP))
82
-
83
-  if(any(charDataErrors))
84
-  {
85
-    #Check which of these is not a string
86
-    for(i in 1:length(charDataErrors))
87
-    {
88
-      if(charDataErrors[i] == TRUE)
89
-      {
90
-        stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details."))
91
-      }
92
-    }
93
-  }
94
-
95
-  if(any(boolDataErrors))
96
-  {
97
-    #Check which of these is not a boolean
98
-    for(i in 1:length(boolDataErrors))
99
-    {
100
-      if(boolDataErrors[i] == TRUE)
101
-      {
102
-        stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details."))
103
-      }
104
-    }
105
-  }
106
-
107
-  if(any(numericDataErrors))
108
-  {
109
-    #Check which of these is not an integer/double
110
-    for(i in 1:length(numericDataErrors))
111
-    {
112
-      if(numericDataErrors[i] == TRUE)
113
-      {
114
-        stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details."))
115
-
116
-      }
117
-    }
118
-  }
119
-
120
-
121
-  #At least one of A, P, ABins, PBins, or FP is not a matrix or data.frame
122
-  if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4]), (dataFrameErrors[5] && matrixErrors[5])))
123
-  {
124
-    for(i in 1:length(dataFrameCheck))
125
-    {
126
-      if((dataFrameErrors[i] && matrixErrors[i]) == TRUE)
127
-      {
128
-        stop(paste("Error in gapsRun: Argument", dataFrameCheck[i], "is not a matrix or data.frame. Please see documentation for details."))
129
-
130
-      }
131
-    }
132
-  }
133
-
134
-  #Floor the parameters that are integers to prevent allowing doubles.
135
-  nFactor = floor(nFactor)
136
-  nEquil = floor(nEquil)
137
-  nSample = floor(nSample)
138
-  nOutR = floor(nOutR)
139
-  nMaxA = floor(nMaxA)
140
-  nMaxP = floor(nMaxP)
141
-
142
-  # pass all settings to C++ within a list
143
-  #    if (is.null(P)) {
144
-  Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain, fixedMatrix);
145
-
146
-  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA,
147
-                 alphaP, nMaxP, max_gibbmass_paraP);
148
-
149
-  #Begin logic error checking code
150
-
151
-  #Check for negative or zero arguments
152
-  if(any(ConfigNums <= 0))
153
-  {
154
-    stop("Error in gapsRun: Numeric Arguments cannot be non-zero!")
155
-  }
156
-
157
-  if((nOutR > nEquil) || (nOutR > nSample))
158
-  {
159
-    stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.")
160
-  }
161
-
162
-  if(ncol(FP) != ncol(D))
163
-  {
164
-    stop("Error in gapsRun: Columns of Data Matrix and Fixed Pattern Matrix do not line up. Please see documentation for details.")
165
-  }
166
-
167
-  if(nFactor < (nrow(FP)))
168
-  {
169
-    stop("Error in gapsRun: Number of patterns cannot be less than the rows of the patterns to fix (FP). Please see documentation for details.")
170
-  }
171
-
172
-  if(nFactor > (ncol(D)))
173
-  {
174
-    warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.")
175
-  }
176
-
177
-  #        P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass
178
-  #    } else {
179
-  #        Config = c(nFactor, simulation_id, nEquil, nSample, nOutR,
180
-  #        output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor,
181
-  #        alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1)
182
-
183
-  #   }
184
-
185
-  geneNames = rownames(D);
186
-  sampleNames = colnames(D);
187
-
188
-  # label patterns as Patt N
189
-  patternNames = c("0");
190
-  for(i in 1:nFactor)
191
-  {
192
-    patternNames[i] = paste('Patt', i);
193
-  }
194
-
195
-  # call to C++ Rcpp code
196
-  cogapResult = cogapsMapTest(D, S, FP, ABins, PBins, Config, ConfigNums);
197
-
198
-  # convert returned files to matrices to simplify visualization and processing
199
-  cogapResult$Amean = as.matrix(cogapResult$Amean);
200
-  cogapResult$Asd = as.matrix(cogapResult$Asd);
201
-  cogapResult$Pmean = as.matrix(cogapResult$Pmean);
202
-  cogapResult$Psd = as.matrix(cogapResult$Psd);
203
-
204
-  # label matrices
205
-  colnames(cogapResult$Amean) = patternNames;
206
-  rownames(cogapResult$Amean) = geneNames;
207
-  colnames(cogapResult$Asd) = patternNames;
208
-  rownames(cogapResult$Asd) = geneNames;
209
-  colnames(cogapResult$Pmean) = sampleNames;
210
-  rownames(cogapResult$Pmean) = patternNames;
211
-  colnames(cogapResult$Psd) = sampleNames;
212
-  rownames(cogapResult$Psd) = patternNames;
213
-
214
-  # calculate chi-squared of mean, this should be smaller than individual
215
-  # chi-squared sample values if sampling is good
216
-  calcChiSq = c(0);
217
-  MMatrix = (cogapResult$Amean %*% cogapResult$Pmean);
218
-
219
-
220
-  for(i in 1:(nrow(MMatrix)))
221
-  {
222
-    for(j in 1:(ncol(MMatrix)))
223
-    {
224
-      calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2);
225
-    }
226
-  }
227
-
228
-  cogapResult = c(cogapResult, calcChiSq);
229
-  names(cogapResult)[20] = "meanChi2";
230
-
231
-  message(paste("Chi-Squared of Mean:",calcChiSq))
232
-
233
-  return(cogapResult);
234
-}
... ...
@@ -50,6 +50,9 @@
50 50
 #'@param seed Set seed for reproducibility. Positive values provide initial seed, negative values just use the time.
51 51
 #'@param messages Display progress messages
52 52
 #'@param singleCellRNASeq T/F indicating if the data is single cell RNA-seq data
53
+#'@param fixedPatterns matrix of fixed values in either A or P matrix
54
+#'@param whichMatrixFixed character to indicate whether A or P matrix
55
+#'  contains the fixed patterns
53 56
 #'@export
54 57
 
55 58
 #--CHANGES 1/20/15--
... ...
@@ -62,12 +65,29 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
62 65
                     numSnapshots = 100, alphaA = 0.01,
63 66
                     nMaxA = 100000, max_gibbmass_paraA = 100.0,
64 67
                     alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0,
65
-                    seed=-1, messages=TRUE, singleCellRNASeq=FALSE)
68
+                    seed=-1, messages=TRUE, singleCellRNASeq=FALSE,
69
+                    fixedPatterns = matrix(0), whichMatrixFixed = 'N')
66 70
 {
67
-    #Floor the parameters that are integers to prevent allowing doubles.
71
+    # Floor the parameters that are integers to prevent allowing doubles.
68 72
     nFactor <- floor(nFactor)
69 73
     nEquil <- floor(nEquil)
70 74
     nSample <- floor(nSample)
75
+    nOutR <- floor(nOutR)
76
+    numSnapshots <- floor(numSnapshots)
77
+    nMaxA <- floor(nMaxA)
78
+    nMaxP <- floor(nMaxP)
79
+
80
+    # check the fixed patterns
81
+    if ((whichMatrixFixed == 'A' & nrow(fixedPatterns) != nrow(D))
82
+    | (whichMatrixFixed == 'P' & nrow(fixedPatterns) > nFactor)) 
83
+    {
84
+        stop('invalid number of rows in fixed pattern matrix')
85
+    }
86
+    if ((whichMatrixFixed == 'A' & ncol(fixedPatterns) > nFactor)
87
+    | (whichMatrixFixed == 'P' & ncol(fixedPatterns) != ncol(D))) 
88
+    {
89
+        stop('invalid number of cols in fixed pattern matrix')
90
+    }
71 91
 
72 92
     geneNames <- rownames(D);
73 93
     sampleNames <- colnames(D);
... ...
@@ -82,7 +102,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
82 102
     # call to C++ Rcpp code
83 103
     cogapResult <- cogaps(as.matrix(D), as.matrix(S), nFactor, alphaA, alphaP,
84 104
         nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, max_gibbmass_paraP,
85
-        seed, messages, singleCellRNASeq)
105
+        fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq)
86 106
 
87 107
     # convert returned files to matrices to simplify visualization and processing
88 108
     cogapResult$Amean <- as.matrix(cogapResult$Amean);
... ...
@@ -117,8 +137,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
117 137
     
118 138
     # names(cogapResult)[13] <- "meanChi2";
119 139
 
120
-    #if (messages)
121
-        message(paste("Chi-Squared of Mean:", calcChiSq))
140
+    message(paste("Chi-Squared of Mean:", calcChiSq))
122 141
 
123 142
     return(cogapResult);
124 143
 }
125 144
deleted file mode 100755
... ...
@@ -1,216 +0,0 @@
1
-
2
-# gapsTestRun: function to call C++ cogaps code
3
-# History: v1.0 CK with MFO edits, August 2014
4
-
5
-# Inputs: D - data matrix
6
-#         S - uncertainty matrix (std devs for chi-squared of Log Likelihood)
7
-#         nFactor - number of patterns (basis vectors, metagenes)
8
-#         simulation_id - name to attach to atoms files if created
9
-#         nEquil - number of iterations for burn-in
10
-#         nSample - number of iterations for sampling
11
-#         nOutR - how often to print status into R by iterations
12
-#         output_atomic - whether to write atom files (large)
13
-#         alphaA, alphaP - sparsity parameters for A and P domains
14
-#         max_gibbmass_paraA(P) - limit truncated normal to max size
15
-#         nMaxA, nMaxP - PRESENTLY UNUSED, future = limit number of atoms
16
-
17
-
18
-# Output: list with A and P matrix estimates, chi-squared and atom
19
-#         numbers of sample by iteration, and chi-squared of mean
20
-#'\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian
21
-#'matrix factorization returning the two matrices that reconstruct
22
-#'the data matrix
23
-#'
24
-#'@param D data matrix
25
-#'@param S uncertainty matrix (std devs for chi-squared of Log Likelihood)
26
-#'@param ABins a matrix of same size as A which gives relative
27
-#' probability of that element being non-zero
28
-#'@param PBins a matrix of same size as P which gives relative
29
-#' probability of that element being non-zero
30
-#'@param nFactor number of patterns (basis vectors, metagenes), which must be
31
-#'greater than or equal to the number of rows of FP
32
-#'@param  simulation_id name to attach to atoms files if created
33
-#'@param  nEquil number of iterations for burn-in
34
-#'@param  nSample number of iterations for sampling
35
-#'@param  nOutR how often to print status into R by iterations
36
-#'@param  output_atomic whether to write atom files (large)
37
-#'@param fixedBinProbs Boolean for using relative probabilities
38
-#' given in Abins and Pbins
39
-#'@param fixedDomain character to indicate whether A or P is
40
-#' domain for relative probabilities
41
-#'@param alphaA sparsity parameter for A domain
42
-#'@param nMaxA PRESENTLY UNUSED, future = limit number of atoms
43
-#'@param max_gibbmass_paraA limit truncated normal to max size
44
-#'@param alphaP sparsity parameter for P domain
45
-#'@param nMaxP PRESENTLY UNUSED, future = limit number of atoms
46
-#'@param max_gibbmass_paraP limit truncated normal to max size
47
-#'@export
48
-
49
-gapsTestRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
50
-                        nFactor = 7, simulation_id = "simulation", nEquil = 1000,
51
-                        nSample = 1000, nOutR = 1000, output_atomic = FALSE,
52
-                        fixedBinProbs = FALSE, fixedDomain = "N", alphaA = 0.01,
53
-                        nMaxA = 100000, max_gibbmass_paraA = 100.0, alphaP = 0.01,
54
-                        nMaxP = 100000, max_gibbmass_paraP = 100.0)
55
-{
56
-  #Begin data type error checking code
57
-  charDataErrors = c(!is.character(simulation_id), !is.character(fixedDomain))
58
-  charCheck = c("simulation_id", "fixedDomain")
59
-
60
-  boolDataErrors = c(!is.logical(output_atomic), !is.logical(fixedBinProbs))
61
-  boolCheck = c("output_atomic", "fixedBinProbs")
62
-
63
-  numericDataErrors = c(!is.numeric(nFactor), !is.numeric(nEquil), !is.numeric(nSample), !is.numeric(nOutR),
64
-                        !is.numeric(alphaA), !is.numeric(nMaxA), !is.numeric(max_gibbmass_paraA), !is.numeric(alphaP),
65
-                        !is.numeric(nMaxP), !is.numeric(max_gibbmass_paraP))
66
-  numericCheck = c("nFactor", "nEquil", "nSample", "nOutR", "alphaA", "nMaxA",
67
-                   "max_gibbmass_paraA", "alphaP",    "nMaxP", "max_gibbmass_paraP")
68
-
69
-  dataFrameErrors = c(!is.data.frame(D), !is.data.frame(S), !is.data.frame(ABins), !is.data.frame(PBins))
70
-  dataFrameCheck = c("D", "S", "ABins", "PBins")
71
-
72
-  matrixErrors = c(!is.matrix(D), !is.matrix(S), !is.matrix(ABins), !is.matrix(PBins))
73
-
74
-  if(any(charDataErrors))
75
-  {
76
-    #Check which of these is not a string
77
-    for(i in 1:length(charDataErrors))
78
-    {
79
-      if(charDataErrors[i] == TRUE)
80
-      {
81
-        stop(paste("Error in gapsRun: Argument",charCheck[i],"is of the incorrect type. Please see documentation for details."))
82
-      }
83
-    }
84
-  }
85
-
86
-  if(any(boolDataErrors))
87
-  {
88
-    #Check which of these is not a boolean
89
-    for(i in 1:length(boolDataErrors))
90
-    {
91
-      if(boolDataErrors[i] == TRUE)
92
-      {
93
-        stop(paste("Error in gapsRun: Argument",boolCheck[i],"is of the incorrect type. Please see documentation for details."))
94
-      }
95
-    }
96
-  }
97
-
98
-  if(any(numericDataErrors))
99
-  {
100
-    #Check which of these is not an integer/double
101
-    for(i in 1:length(numericDataErrors))
102
-    {
103
-      if(numericDataErrors[i] == TRUE)
104
-      {
105
-        stop(paste("Error in gapsRun: Argument",numericCheck[i],"is of the incorrect type. Please see documentation for details."))
106
-
107
-      }
108
-    }
109
-  }
110
-
111
-
112
-  #At least one of A, P, ABins, or PBins is not a matrix or data.frame
113
-  if(any((dataFrameErrors[1] && matrixErrors[1]), (dataFrameErrors[2] && matrixErrors[2]), (dataFrameErrors[3] && matrixErrors[3]), (dataFrameErrors[4] && matrixErrors[4])))
114
-  {
115
-    for(i in 1:length(dataFrameCheck))
116
-    {
117
-      if((dataFrameErrors[i] && matrixErrors[i]) == TRUE)
118
-      {
119
-        stop(paste("Error in gapsRun: Argument",dataFrameCheck[i],"is not a matrix or data.frame. Please see documentation for details."))
120
-
121
-      }
122
-    }
123
-  }
124
-
125
-  #Floor the parameters that are integers to prevent allowing doubles.
126
-  nFactor = floor(nFactor)
127
-  nEquil = floor(nEquil)
128
-  nSample = floor(nSample)
129
-  nOutR = floor(nOutR)
130
-  nMaxA = floor(nMaxA)
131
-  nMaxP = floor(nMaxP)
132
-
133
-  # pass all settings to C++ within a list
134
-  #    if (is.null(P)) {
135
-  Config = c(simulation_id, output_atomic, fixedBinProbs, fixedDomain);
136
-
137
-  ConfigNums = c(nFactor, nEquil, nSample, nOutR, alphaA, nMaxA, max_gibbmass_paraA,
138
-                 alphaP, nMaxP, max_gibbmass_paraP);
139
-
140
-
141
-  #Begin logic error checking code
142
-
143
-  #Check for negative or zero arguments
144
-  if(any(ConfigNums <= 0))
145
-  {
146
-    stop("Error in gapsRun: Numeric Arguments cannot be non-zero!")
147
-  }
148
-
149
-  if((nOutR > nEquil) || (nOutR > nSample))
150
-  {
151
-    stop("Error in gapsRun: Cannot have more output steps than equilibration and/or sampling iterations.")
152
-  }
153
-
154
-  if(nFactor > (ncol(D)))
155
-  {
156
-    warning("Warning in gapsRun: Number of requested patterns greater than columns of Data Matrix.")
157
-  }
158
-
159
-  #        P <- as.data.frame(matrix(nrow=1,c(1,1,1))) # make something to pass
160
-  #    } else {
161
-  #        Config = c(nFactor, simulation_id, nEquil, nSample, nOutR,
162
-  #        output_atomic, alphaA, nMaxA, max_gibbmass_paraA, lambdaA_scale_factor,
163
-  #        alphaP, nMaxP, max_gibbmass_paraP, lambdaP_scale_factor, 1)
164
-
165
-  #   }
166
-
167
-  geneNames = rownames(D);
168
-  sampleNames = colnames(D);
169
-
170
-  # label patterns as Patt N
171
-  patternNames = c("0");
172
-  for(i in 1:nFactor)
173
-  {
174
-    patternNames[i] = paste('Patt', i);
175
-  }
176
-
177
-  # call to C++ Rcpp code
178
-  cogapResult = cogapsTest(D, S, ABins, PBins, Config, ConfigNums);
179
-
180
-  # convert returned files to matrices to simplify visualization and processing
181
-  cogapResult$Amean = as.matrix(cogapResult$Amean);
182
-  cogapResult$Asd = as.matrix(cogapResult$Asd);
183
-  cogapResult$Pmean = as.matrix(cogapResult$Pmean);
184
-  cogapResult$Psd = as.matrix(cogapResult$Psd);
185
-
186
-  # label matrices
187
-  colnames(cogapResult$Amean) = patternNames;
188
-  rownames(cogapResult$Amean) = geneNames;
189
-  colnames(cogapResult$Asd) = patternNames;
190
-  rownames(cogapResult$Asd) = geneNames;
191
-  colnames(cogapResult$Pmean) = sampleNames;
192
-  rownames(cogapResult$Pmean) = patternNames;
193
-  colnames(cogapResult$Psd) = sampleNames;
194
-  rownames(cogapResult$Psd) = patternNames;
195
-
196
-  # calculate chi-squared of mean, this should be smaller than individual
197
-  # chi-squared sample values if sampling is good
198
-  calcChiSq = c(0);
199
-  MMatrix = (cogapResult$Amean %*% cogapResult$Pmean);
200
-
201
-
202
-  for(i in 1:(nrow(MMatrix)))
203
-  {
204
-    for(j in 1:(ncol(MMatrix)))
205
-    {
206
-      calcChiSq = (calcChiSq) + ((D[i,j] - MMatrix[i,j])/(S[i,j]))^(2);
207
-    }
208
-  }
209
-
210
-  cogapResult = c(cogapResult, calcChiSq);
211
-  names(cogapResult)[20] = "meanChi2";
212
-
213
-  message(paste("Chi-Squared of Mean:",calcChiSq))
214
-
215
-  return(cogapResult);
216
-}
... ...
@@ -8,7 +8,7 @@ the data matrix; as opposed to gapsRun, this method takes an
8 8
 additional input specifying set patterns in the P matrix}
9 9
 \usage{
10 10
 gapsMapRun(D, S, FP, ABins = data.frame(), PBins = data.frame(),
11
-  nFactor = 5, simulation_id = "simulation", nEquil = 1000,
11
+  nFactor = 7, simulation_id = "simulation", nEquil = 1000,
12 12
   nSample = 1000, nOutR = 1000, output_atomic = FALSE,
13 13
   fixedMatrix = "P", fixedBinProbs = FALSE, fixedDomain = "N",
14 14
   sampleSnapshots = TRUE, numSnapshots = 100, alphaA = 0.01,
15 15
deleted file mode 100755
... ...
@@ -1,70 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/gapsMapTestRun.R
3
-\name{gapsMapTestRun}
4
-\alias{gapsMapTestRun}
5
-\title{\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian
6
-matrix factorization returning the two matrices that reconstruct
7
-the data matrix; as opposed to gapsRun, this method takes an
8
-additional input specifying set patterns in the P matrix}
9
-\usage{
10
-gapsMapTestRun(D, S, FP, ABins = data.frame(), PBins = data.frame(),
11
-  nFactor = 7, simulation_id = "simulation", nEquil = 1000,
12
-  nSample = 1000, nOutR = 1000, output_atomic = FALSE,
13
-  fixedMatrix = "P", fixedBinProbs = FALSE, fixedDomain = "N",
14
-  alphaA = 0.01, nMaxA = 1e+05, max_gibbmass_paraA = 100, alphaP = 0.01,
15
-  nMaxP = 1e+05, max_gibbmass_paraP = 100)
16
-}
17
-\arguments{
18
-\item{D}{data matrix}
19
-
20
-\item{S}{uncertainty matrix (std devs for chi-squared of Log Likelihood)}
21
-
22
-\item{FP}{data.frame with rows giving fixed patterns for P}
23
-
24
-\item{ABins}{a matrix of same size as A which gives relative
25
-probability of that element being non-zero}
26
-
27
-\item{PBins}{a matrix of same size as P which gives relative
28
-probability of that element being non-zero}
29
-
30
-\item{nFactor}{number of patterns (basis vectors, metagenes), which must be
31
-greater than or equal to the number of rows of FP}
32
-
33
-\item{simulation_id}{name to attach to atoms files if created}
34
-
35
-\item{nEquil}{number of iterations for burn-in}
36
-
37
-\item{nSample}{number of iterations for sampling}
38
-
39
-\item{nOutR}{how often to print status into R by iterations}
40
-
41
-\item{output_atomic}{whether to write atom files (large)}
42
-
43
-\item{fixedMatrix}{character indicating whether A or P matrix
44
-has fixed columns or rows respectively}
45
-
46
-\item{fixedBinProbs}{Boolean for using relative probabilities
47
-given in Abins and Pbins}
48
-
49
-\item{fixedDomain}{character to indicate whether A or P is
50
-domain for relative probabilities}
51
-
52
-\item{alphaA}{sparsity parameter for A domain}
53
-
54
-\item{nMaxA}{PRESENTLY UNUSED, future = limit number of atoms}
55
-
56
-\item{max_gibbmass_paraA}{limit truncated normal to max size}
57
-
58
-\item{alphaP}{sparsity parameter for P domain}
59
-
60
-\item{nMaxP}{PRESENTLY UNUSED, future = limit number of atoms}
61
-
62
-\item{max_gibbmass_paraP}{limit truncated normal to max size}
63
-}
64
-\description{
65
-\code{gapsMapTestRun} calls the C++ MCMC code and performs Bayesian
66
-matrix factorization returning the two matrices that reconstruct
67
-the data matrix; as opposed to gapsRun, this method takes an
68
-additional input specifying set patterns in the P matrix
69
-}
70
-
... ...
@@ -12,7 +12,8 @@ gapsRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7,
12 12
   fixedDomain = "N", sampleSnapshots = TRUE, numSnapshots = 100,
13 13
   alphaA = 0.01, nMaxA = 1e+05, max_gibbmass_paraA = 100, alphaP = 0.01,
14 14
   nMaxP = 1e+05, max_gibbmass_paraP = 100, seed = -1, messages = TRUE,
15
-  singleCellRNASeq = FALSE)
15
+  singleCellRNASeq = FALSE, fixedPatterns = matrix(0),
16
+  whichMatrixFixed = "N")
16 17
 }
17 18
 \arguments{
18 19
 \item{D}{data matrix}
... ...
@@ -66,6 +67,11 @@ individual samples from Markov chain during sampling}
66 67
 \item{messages}{Display progress messages}
67 68
 
68 69
 \item{singleCellRNASeq}{T/F indicating if the data is single cell RNA-seq data}
70
+
71
+\item{fixedPatterns}{matrix of fixed values in either A or P matrix}
72
+
73
+\item{whichMatrixFixed}{character to indicate whether A or P matrix
74
+contains the fixed patterns}
69 75
 }
70 76
 \description{
71 77
 \code{gapsRun} calls the C++ MCMC code and performs Bayesian
72 78
deleted file mode 100755
... ...
@@ -1,63 +0,0 @@
1
-% Generated by roxygen2: do not edit by hand
2
-% Please edit documentation in R/gapsTestRun.R
3
-\name{gapsTestRun}
4
-\alias{gapsTestRun}
5
-\title{\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian
6
-matrix factorization returning the two matrices that reconstruct
7
-the data matrix}
8
-\usage{
9
-gapsTestRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7,
10
-  simulation_id = "simulation", nEquil = 1000, nSample = 1000,
11
-  nOutR = 1000, output_atomic = FALSE, fixedBinProbs = FALSE,
12
-  fixedDomain = "N", alphaA = 0.01, nMaxA = 1e+05,
13
-  max_gibbmass_paraA = 100, alphaP = 0.01, nMaxP = 1e+05,
14
-  max_gibbmass_paraP = 100)
15
-}
16
-\arguments{
17
-\item{D}{data matrix}
18
-
19
-\item{S}{uncertainty matrix (std devs for chi-squared of Log Likelihood)}
20
-
21
-\item{ABins}{a matrix of same size as A which gives relative
22
-probability of that element being non-zero}
23
-
24
-\item{PBins}{a matrix of same size as P which gives relative
25
-probability of that element being non-zero}
26
-
27
-\item{nFactor}{number of patterns (basis vectors, metagenes), which must be
28
-greater than or equal to the number of rows of FP}
29
-
30
-\item{simulation_id}{name to attach to atoms files if created}
31
-
32
-\item{nEquil}{number of iterations for burn-in}
33
-
34
-\item{nSample}{number of iterations for sampling}
35
-
36
-\item{nOutR}{how often to print status into R by iterations}
37
-
38
-\item{output_atomic}{whether to write atom files (large)}
39
-
40
-\item{fixedBinProbs}{Boolean for using relative probabilities
41
-given in Abins and Pbins}
42
-
43
-\item{fixedDomain}{character to indicate whether A or P is
44
-domain for relative probabilities}
45
-
46
-\item{alphaA}{sparsity parameter for A domain}
47
-
48
-\item{nMaxA}{PRESENTLY UNUSED, future = limit number of atoms}
49
-
50
-\item{max_gibbmass_paraA}{limit truncated normal to max size}
51
-
52
-\item{alphaP}{sparsity parameter for P domain}
53
-
54
-\item{nMaxP}{PRESENTLY UNUSED, future = limit number of atoms}
55
-
56
-\item{max_gibbmass_paraP}{limit truncated normal to max size}
57
-}
58
-\description{
59
-\code{gapsTestRun} calls the C++ MCMC code and performs Bayesian
60
-matrix factorization returning the two matrices that reconstruct
61
-the data matrix
62
-}
63
-
... ...
@@ -100,6 +100,24 @@ const RowMatrix &meanMat, unsigned nUpdates)
100 100
     return retMat;
101 101
 }
102 102
 
103
+// horribly slow, don't call often
104
+void gaps::algo::matrixMultiplication(TwoWayMatrix &C, const ColMatrix &A,
105
+const RowMatrix &B)
106
+{
107
+    for (unsigned i = 0; i < C.nRow(); ++i)
108
+    {
109
+        for (unsigned j = 0; j < C.nCol(); ++j)
110
+        {
111
+            double sum = 0.0;
112
+            for (unsigned k = 0; k < A.nCol(); ++k)
113
+            {
114
+                sum += A(i,k) * B(k,j);
115
+            }
116
+            C.set(i, j, sum);
117
+        }
118
+    }
119
+}
120
+
103 121
 Vector gaps::algo::scalarMultiple(const Vector &vec, double n)
104 122
 {
105 123
     Vector retVec(vec.size());
... ...
@@ -133,6 +151,16 @@ ColMatrix gaps::algo::scalarDivision(const ColMatrix &mat, double n)
133 151
     return retMat;
134 152
 }
135 153
 
154
+Vector gaps::algo::scalarDivision(const Vector &vec, matrix_data_t n)
155
+{
156
+    Vector temp(vec.size());
157
+    for (unsigned i = 0; i < vec.size(); ++i)
158
+    {
159
+        temp(i) = vec(i) / n;
160
+    }
161
+    return temp;
162
+}
163
+
136 164
 RowMatrix gaps::algo::scalarDivision(const RowMatrix &mat, double n)
137 165
 {
138 166
     RowMatrix retMat(mat.nRow(), mat.nCol());
... ...
@@ -33,6 +33,9 @@ namespace algo
33 33
     matrix_data_t mean(const TwoWayMatrix &mat);
34 34
     matrix_data_t nonZeroMean(const TwoWayMatrix &mat);
35 35
 
36
+    void matrixMultiplication(TwoWayMatrix &C, const ColMatrix &A,
37
+        const RowMatrix &B);
38
+
36 39
     ColMatrix computeStdDev(const ColMatrix &stdMat, const ColMatrix &meanMat,
37 40
         unsigned nUpdates);
38 41
 
... ...
@@ -19,7 +19,8 @@ Vector& pAtomVec, GapsPhase phase);
19 19
 Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
20 20
 unsigned nFactor, double alphaA, double alphaP, unsigned nEquil,
21 21
 unsigned nEquilCool, unsigned nSample, double maxGibbsMassA,
22
-double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=false)
22
+double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
23
+char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq)
23 24
 {
24 25
     // set seed
25 26
     uint32_t seedUsed = static_cast<uint32_t>(seed);
... ...
@@ -34,7 +35,8 @@ double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=fa
34 35
 
35 36
     // create the gibbs sampler
36 37
     GibbsSampler sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP,
37
-        maxGibbsMassA, maxGibbsMassP, singleCellRNASeq);
38
+        maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns,
39
+        whichMatrixFixed);
38 40
 
39 41
     // initial number of iterations for each matrix
40 42
     unsigned nIterA = 10;
... ...
@@ -63,11 +65,15 @@ double maxGibbsMassP, int seed=-1, bool messages=false, bool singleCellRNASeq=fa
63 65
     chi2Vec.concat(chi2VecSample);
64 66
 
65 67
     //Just leave the snapshots as empty lists
68
+    Rcpp::List ASnapR = Rcpp::List::create();
69
+    Rcpp::List PSnapR = Rcpp::List::create();
66 70
     return Rcpp::List::create(
67 71
         Rcpp::Named("Amean") = sampler.AMeanRMatrix(),
68 72
         Rcpp::Named("Asd") = sampler.AStdRMatrix(),
69 73
         Rcpp::Named("Pmean") = sampler.PMeanRMatrix(),
70 74
         Rcpp::Named("Psd") = sampler.PStdRMatrix(),
75
+        Rcpp::Named("ASnapshots") = ASnapR,
76
+        Rcpp::Named("PSnapshots") = PSnapR,
71 77
         Rcpp::Named("atomsAEquil") = nAtomsAEquil.rVec(),
72 78
         Rcpp::Named("atomsASamp") = nAtomsASample.rVec(),
73 79
         Rcpp::Named("atomsPEquil") = nAtomsPEquil.rVec(),
... ...
@@ -7,7 +7,8 @@ static const double EPSILON = 1.e-10;
7 7
 
8 8
 GibbsSampler::GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S,
9 9
 unsigned int nFactor, double alphaA, double alphaP, double maxGibbsMassA,
10
-double maxGibbsMassP, bool singleCellRNASeq)
10
+double maxGibbsMassP, bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat,
11
+char whichMat)
11 12
     :
12 13
 mDMatrix(D), mSMatrix(S), mAMatrix(D.nrow(), nFactor),
13 14
 mPMatrix(nFactor, D.ncol()), mAPMatrix(D.nrow(), D.ncol()),
... ...
@@ -16,7 +17,7 @@ mMaxGibbsMassA(maxGibbsMassA), mMaxGibbsMassP(maxGibbsMassP),
16 17
 mAnnealingTemp(1.0), mChi2(0.0), mSingleCellRNASeq(singleCellRNASeq),
17 18
 mAMeanMatrix(D.nrow(), nFactor), mAStdMatrix(D.nrow(), nFactor),
18 19
 mPMeanMatrix(nFactor, D.ncol()), mPStdMatrix(nFactor, D.ncol()),
19
-mStatUpdates(0)
20
+mStatUpdates(0), mFixedMat(whichMat)
20 21
 {
21 22
     double meanD = mSingleCellRNASeq ? gaps::algo::nonZeroMean(mDMatrix)
22 23
         : gaps::algo::mean(mDMatrix);
... ...
@@ -29,6 +30,27 @@ mStatUpdates(0)
29 30
     mMaxGibbsMassA /= mADomain.lambda();
30 31
     mMaxGibbsMassP /= mPDomain.lambda();
31 32
 
33
+    if (mFixedMat == 'A')
34
+    {
35
+        mNumFixedPatterns = fixedPat.ncol();
36
+        ColMatrix temp(fixedPat);
37
+        for (unsigned j = 0; j < mNumFixedPatterns; ++j)
38
+        {
39
+            mAMatrix.getCol(j) = gaps::algo::scalarDivision(temp.getCol(j),
40
+                gaps::algo::sum(temp.getCol(j)));
41
+        }
42
+    }
43
+    else if (mFixedMat == 'P')
44
+    {
45
+        mNumFixedPatterns = fixedPat.nrow();
46
+        RowMatrix temp(fixedPat);
47
+        for (unsigned i = 0; i < mNumFixedPatterns; ++i)
48
+        {
49
+            mPMatrix.getRow(i) = gaps::algo::scalarDivision(temp.getRow(i),
50
+                gaps::algo::sum(temp.getRow(i)));
51
+        }
52
+    }
53
+    gaps::algo::matrixMultiplication(mAPMatrix, mAMatrix, mPMatrix);
32 54
     mChi2 = 2.0 * gaps::algo::loglikelihood(mDMatrix, mSMatrix, mAPMatrix);
33 55
 }
34 56
 
... ...
@@ -93,6 +115,20 @@ void GibbsSampler::update(char matrixLabel)
93 115
     // get proposal and convert to a matrix update
94 116
     AtomicProposal proposal = domain.makeProposal();
95 117
 
118
+    // check if proposal is in fixed region
119
+    if (mFixedMat == 'A' || mFixedMat == 'P')
120
+    {
121
+        MatrixChange change = domain.getMatrixChange(proposal);
122
+        unsigned index1 = mFixedMat == 'A' ? change.col1 : change.row1;
123
+        unsigned index2 = mFixedMat == 'A' ? change.col2 : change.row2;
124
+        index2 = change.nChanges == 1 ? mNumFixedPatterns : index2;
125
+
126
+        if (index1 < mNumFixedPatterns || index2 < mNumFixedPatterns)
127
+        {
128
+            return;
129
+        }
130
+    }
131
+
96 132
     // Update based on the proposal type
97 133
     switch (proposal.type)
98 134
     {
... ...
@@ -31,6 +31,9 @@ public:
31 31
 
32 32
     bool mSingleCellRNASeq;
33 33
 
34
+    unsigned mNumFixedPatterns;
35
+    char mFixedMat;
36
+
34 37
     bool death(AtomicSupport &domain, AtomicProposal &proposal);
35 38
     bool birth(AtomicSupport &domain, AtomicProposal &proposal);
36 39
     bool move(AtomicSupport &domain, AtomicProposal &proposal);
... ...
@@ -54,7 +57,7 @@ public:
54 57
 
55 58
     GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S, unsigned nFactor,
56 59
         double alphaA, double alphaP, double maxGibbsMassA, double maxGibbsMassP,
57
-        bool singleCellRNASeq);
60
+        bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat, char whichMat);
58 61
 
59 62
     void update(char matrixLabel);
60 63
 
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps
9
-Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, int seed, bool messages, bool singleCellRNASeq);
10
-RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP) {
9
+Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq);
10
+RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject rcpp_result_gen;
13 13
     Rcpp::RNGScope rcpp_rngScope_gen;
... ...
@@ -21,10 +21,12 @@ BEGIN_RCPP
21 21
     Rcpp::traits::input_parameter< unsigned >::type nSample(nSampleSEXP);
22 22
     Rcpp::traits::input_parameter< double >::type maxGibbsMassA(maxGibbsMassASEXP);
23 23
     Rcpp::traits::input_parameter< double >::type maxGibbsMassP(maxGibbsMassPSEXP);
24
+    Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type fixedPatterns(fixedPatternsSEXP);
25
+    Rcpp::traits::input_parameter< char >::type whichMatrixFixed(whichMatrixFixedSEXP);
24 26
     Rcpp::traits::input_parameter< int >::type seed(seedSEXP);
25 27
     Rcpp::traits::input_parameter< bool >::type messages(messagesSEXP);
26 28
     Rcpp::traits::input_parameter< bool >::type singleCellRNASeq(singleCellRNASeqSEXP);
27
-    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, seed, messages, singleCellRNASeq));
29
+    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq));
28 30
     return rcpp_result_gen;
29 31
 END_RCPP
30 32
 }
... ...
@@ -40,7 +42,7 @@ END_RCPP
40 42
 }
41 43
 
42 44
 static const R_CallMethodDef CallEntries[] = {
43
-    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 13},
45
+    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 15},
44 46
     {"_CoGAPS_run_catch_unit_tests", (DL_FUNC) &_CoGAPS_run_catch_unit_tests, 0},
45 47
     {NULL, NULL, 0}
46 48
 };