sherman5 authored on 12/01/2018 16:25:29
Showing25 changed files

... ...
@@ -9,6 +9,7 @@ export(calcZ)
9 9
 export(createGWCoGAPSSets)
10 10
 export(gapsMapRun)
11 11
 export(gapsRun)
12
+export(gapsRunFromCheckpoint)
12 13
 export(generateSeeds)
13 14
 export(patternMarkers)
14 15
 export(patternMatch4Parallel)
... ...
@@ -1,8 +1,12 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots) {
5
-    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots)
4
+cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval) {
5
+    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval)
6
+}
7
+
8
+cogapsFromCheckpoint <- function(fileName) {
9
+    .Call('_CoGAPS_cogapsFromCheckpoint', PACKAGE = 'CoGAPS', fileName)
6 10
 }
7 11
 
8 12
 run_catch_unit_tests <- function() {
... ...
@@ -51,6 +51,8 @@
51 51
 #'@param fixedPatterns matrix of fixed values in either A or P matrix
52 52
 #'@param whichMatrixFixed character to indicate whether A or P matrix
53 53
 #'  contains the fixed patterns
54
+#'@param checkpoint_file_name name of file to store checkpoint
55
+#'@param checkpoint_interval time (in seconds) between cogaps checkpoints
54 56
 #'@export
55 57
 
56 58
 #--CHANGES 1/20/15--
... ...
@@ -63,7 +65,8 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
63 65
                     nMaxA = 100000, max_gibbmass_paraA = 100.0,
64 66
                     alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0,
65 67
                     seed=-1, messages=TRUE, singleCellRNASeq=FALSE,
66
-                    fixedPatterns = matrix(0), whichMatrixFixed = 'N')
68
+                    fixedPatterns = matrix(0), whichMatrixFixed = 'N',
69
+                    checkpoint_interval = 0)
67 70
 {
68 71
     # Floor the parameters that are integers to prevent allowing doubles.
69 72
     nFactor <- floor(nFactor)
... ...
@@ -100,7 +103,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
100 103
     cogapResult <- cogaps(as.matrix(D), as.matrix(S), nFactor, alphaA, alphaP,
101 104
         nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, max_gibbmass_paraP,
102 105
         fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq,
103
-        nOutR, numSnapshots)
106
+        nOutR, numSnapshots, checkpoint_interval)
104 107
 
105 108
     # convert returned files to matrices to simplify visualization and processing
106 109
     cogapResult$Amean <- as.matrix(cogapResult$Amean);
... ...
@@ -138,3 +141,56 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
138 141
     message(paste("Chi-Squared of Mean:", calcChiSq))
139 142
     return(cogapResult);
140 143
 }
144
+
145
+#' @export
146
+gapsRunFromCheckpoint <- function(D, S, path)
147
+{
148
+    # call to C++ Rcpp code
149
+    cogapResult <- cogapsFromCheckpoint(path)
150
+
151
+    # convert returned files to matrices to simplify visualization and processing
152
+    cogapResult$Amean <- as.matrix(cogapResult$Amean);
153
+    cogapResult$Asd <- as.matrix(cogapResult$Asd);
154
+    cogapResult$Pmean <- as.matrix(cogapResult$Pmean);
155
+    cogapResult$Psd <- as.matrix(cogapResult$Psd);
156
+
157
+    geneNames <- rownames(D);
158
+    sampleNames <- colnames(D);
159
+
160
+    # label patterns as Patt N
161
+    patternNames <- c("0");
162
+    for(i in 1:ncol(cogapResult$Amean))
163
+    {
164
+        patternNames[i] <- paste('Patt', i);
165
+    }
166
+
167
+    ## label matrices
168
+    colnames(cogapResult$Amean) <- patternNames;
169
+    rownames(cogapResult$Amean) <- geneNames;
170
+    colnames(cogapResult$Asd) <- patternNames;
171
+    rownames(cogapResult$Asd) <- geneNames;
172
+    colnames(cogapResult$Pmean) <- sampleNames;
173
+    rownames(cogapResult$Pmean) <- patternNames;
174
+    colnames(cogapResult$Psd) <- sampleNames;
175
+    rownames(cogapResult$Psd) <- patternNames;
176
+
177
+    ## calculate chi-squared of mean, this should be smaller than individual
178
+    ## chi-squared sample values if sampling is good
179
+    calcChiSq <- c(0);
180
+    MMatrix <- (cogapResult$Amean %*% cogapResult$Pmean);
181
+
182
+    for(i in 1:(nrow(MMatrix)))
183
+    {
184
+        for(j in 1:(ncol(MMatrix)))
185
+        {
186
+            calcChiSq <- calcChiSq + ((D[i,j] - MMatrix[i,j])/S[i,j])^2;
187
+        }
188
+    }
189
+
190
+    cogapResult = c(cogapResult, calcChiSq);
191
+    
192
+    # names(cogapResult)[13] <- "meanChi2";
193
+
194
+    message(paste("Chi-Squared of Mean:", calcChiSq))
195
+    return(cogapResult);
196
+}
141 197
\ No newline at end of file
... ...
@@ -65,16 +65,15 @@ for(i in cls){
65 65
   }
66 66
 }
67 67
 
68
-#drop n<4 and drop less than .7
68
+#drop n<minNS
69 69
 PByClustDrop <- list()
70 70
 RtoMPDrop <- list()
71 71
 for(i in cls){
72 72
   if(is.null(dim(PByClust[[i]]))==TRUE){next}
73 73
   if(dim(PByClust[[i]])[1]<minNS){next}
74 74
   else{
75
-    indx <- which(RtoMeanPattern[[i]]>.7,arr.ind = TRUE)
76
-    PByClustDrop <- append(PByClustDrop,list(PByClust[[i]][indx,]))
77
-    RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]][indx]))
75
+    PByClustDrop <- append(PByClustDrop,list(PByClust[[i]]))
76
+    RtoMPDrop <- append(RtoMPDrop,list(RtoMeanPattern[[i]]))
78 77
   }
79 78
 }
80 79
 
81 80
new file mode 100644
82 81
Binary files /dev/null and b/inst/benchmarks/combinedData.RData differ
... ...
@@ -1,101 +1,107 @@
1
-# save file time at beginning to ensure uniqueness
2
-file_name <- paste("cogaps_benchmark_", format(Sys.time(), "%m_%d_%y_%H_%M_%OS"),
3
-    '.RData', sep='')
4
-
5
-# load packages
6
-library(CoGAPS)
7
-
8
-# display package version
9
-print(packageVersion('CoGAPS'))
10
-
11
-# benchmark dimensions
12
-
13
-M_dimensions <- c(10, 25, 50, 100, 250, 500, 750, 1000, 2500, 5000)
14
-N_dimensions <- c(10, 25, 50, 100, 250, 500, 750, 1000, 2500, 5000)
15
-nFactor_dimensions <- c(3, 5, 7, 10, 15, 20, 25, 30, 40, 50, 75, 100)
16
-nIter_dimensions <- c(1000, 1250, 1500, 1750, 2000, 2500, 3000, 4000, 5000)
17
-
18
-M_default <- floor(median(M_dimensions))
19
-N_default <- floor(median(N_dimensions))
20
-nFactor_default <- floor(median(nFactor_dimensions))
21
-nIter_default <- floor(median(nIter_default))
22
-
23
-seed_default <- 12345
24
-reps_default <- 5
25
-
26
-# benchmark function
27
-
28
-data(GIST_TS_20084)
29
-runBenchmark <- function(m, n, k, iter, seed, reps)
30
-{
31
-    set.seed(123)
32
-    test_mat_D <- matrix(sample(as.matrix(GIST.D), m * n, replace=T),
33
-        nrow = m, ncol = n)
34
-
35
-    test_mat_S <- matrix(sample(as.matrix(GIST.S), m * n, replace=T),
36
-        nrow = m, ncol = n)
37
-
38
-    times <- c()
39
-    for (r in 1:reps)
40
-    {
41
-        start_time <- Sys.time()
42
-        gapsRun(test_mat_D, test_mat_S, nEquil=iter, nSample=iter,
43
-<<<<<<< HEAD
44
-            nFactor=k, seed=seed+r, messages=FALSE, sampleSnapshots=FALSE,
45
-            numSnapshots=iter / 2, nOutR = iter / 2)
46
-=======
47
-            nFactor=k, seed=seed+r, messages=FALSE)
48
->>>>>>> develop
49
-        times[r] <- as.numeric(Sys.time() - start_time)
50
-    }
51
-    params <- c(m, n, k, iter, seed, reps)
52
-    secs <- c(min(times), mean(times), median(times), max(times))
53
-    return(c(params, secs))
54
-}
55
-
56
-# store sample times and parameters
57
-samples <- NULL
58
-
59
-# run M benchmark
60
-for (n in M_dimensions)
61
-{
62
-    print(paste("benchmarking M =", n))
63
-    bm <- runBenchmark(n, N_default, nFactor_default, nIter_default, seed_default,
64
-        reps_default)
65
-    samples <- rbind(samples, bm)
66
-}
67
-
68
-# run N benchmark
69
-for (n in N_dimensions)
70
-{
71
-    print(paste("benchmarking N =", n))
72
-    bm <- runBenchmark(M_default, n, nFactor_default, nIter_default, seed_default,
73
-        reps_default)
74
-    samples <- rbind(samples, bm)
75
-}
76
-
77
-# run nFactor benchmark
78
-for (n in nFactor_dimensions)
79
-{
80
-    print(paste("benchmarking K =", n))
81
-    bm <- runBenchmark(M_default, N_default, n, nIter_default, seed_default,
82
-        reps_default)
83
-    samples <- rbind(samples, bm)
84
-}
85
-
86
-# run nIter benchmark
87
-for (n in nIter_dimensions)
88
-{
89
-    print(paste("benchmarking nIter =", n))
90
-    bm <- runBenchmark(M_default, N_default, nFactor_default, n, seed_default,
91
-        reps_default)
92
-    samples <- rbind(samples, bm)
93
-}
94
-
95
-rownames(samples) <- NULL
96
-colnames(samples) <- c('M', 'N', 'K', 'nIter', 'seed', 'reps', 'min', 'mean',
97
-    'median', 'max')
98
-cogaps_benchmark <- data.frame(samples)
99
-cogaps_version <- packageVersion('CoGAPS')
100
-
101
-save(cogaps_benchmark, cogaps_version, file=file_name)
1
+# save file time at beginning to ensure uniqueness
2
+file_name <- paste("cogaps_benchmark_", format(Sys.time(), "%m_%d_%y_%H_%M_%OS"),
3
+    '.RData', sep='')
4
+
5
+# load packages
6
+library(CoGAPS)
7
+
8
+# display package version
9
+print(packageVersion('CoGAPS'))
10
+
11
+# benchmark dimensions
12
+
13
+M_dimensions <- c(50, 75, 100, 250, 500, 750, 1000)
14
+N_dimensions <- M_dimensions
15
+nFactor_dimensions <- c(5, 10, 15, 20, 30, 40, 50)
16
+nIter_dimensions <- c(1000, 1250, 1500, 1750, 2000, 2500, 3000)
17
+
18
+M_default <- floor(median(M_dimensions))
19
+N_default <- floor(median(N_dimensions))
20
+nFactor_default <- floor(median(nFactor_dimensions))
21
+nIter_default <- floor(median(nIter_dimensions))
22
+
23
+print(paste("Defaults, M=", M_default, " N=", N_default, " K=",
24
+    nFactor_default, " I=", nIter_default, sep=''))
25
+
26
+seed_default <- 12345
27
+reps_default <- 5
28
+
29
+# benchmark function
30
+
31
+timeToSeconds <- function(time)
32
+{
33
+     time$hour * 3600 + time$min * 60 + time$sec
34
+}   
35
+
36
+data(GIST_TS_20084)
37
+runBenchmark <- function(m, n, k, iter, seed, reps)
38
+{
39
+    set.seed(123)
40
+    test_mat_D <- matrix(sample(as.matrix(GIST.D), m * n, replace=T),
41
+        nrow = m, ncol = n)
42
+
43
+    test_mat_S <- matrix(sample(as.matrix(GIST.S), m * n, replace=T),
44
+        nrow = m, ncol = n)
45
+
46
+    times <- c()
47
+    for (r in 1:reps)
48
+    {
49
+        start_time <- as.POSIXlt(Sys.time())
50
+        gapsRun(test_mat_D, test_mat_S, nEquil=iter, nSample=iter,
51
+            nFactor=k, seed=seed+r, messages=TRUE, nOutR = iter/2,
52
+#            sampleSnapshots=FALSE, numSnapshots=iter / 2)
53
+            numSnapshots=0)
54
+        times[r] <- timeToSeconds(as.POSIXlt(Sys.time())) - timeToSeconds(start_time)
55
+        print(round(times[r], 2))
56
+    }
57
+    params <- c(m, n, k, iter, seed, reps)
58
+    secs <- c(min(times), mean(times), median(times), max(times))
59
+    return(c(params, secs))
60
+}
61
+
62
+# store sample times and parameters
63
+samples <- NULL
64
+
65
+# run M benchmark
66
+for (n in M_dimensions)
67
+{
68
+    print(paste("benchmarking M =", n))
69
+    bm <- runBenchmark(n, N_default, nFactor_default, nIter_default, seed_default,
70
+        reps_default)
71
+    samples <- rbind(samples, bm)
72
+}
73
+
74
+# run N benchmark
75
+for (n in N_dimensions)
76
+{
77
+    print(paste("benchmarking N =", n))
78
+    bm <- runBenchmark(M_default, n, nFactor_default, nIter_default, seed_default,
79
+        reps_default)
80
+    samples <- rbind(samples, bm)
81
+}
82
+
83
+# run nFactor benchmark
84
+for (n in nFactor_dimensions)
85
+{
86
+    print(paste("benchmarking K =", n))
87
+    bm <- runBenchmark(M_default, N_default, n, nIter_default, seed_default,
88
+        reps_default)
89
+    samples <- rbind(samples, bm)
90
+}
91
+
92
+# run nIter benchmark
93
+for (n in nIter_dimensions)
94
+{
95
+    print(paste("benchmarking nIter =", n))
96
+    bm <- runBenchmark(M_default, N_default, nFactor_default, n, seed_default,
97
+        reps_default)
98
+    samples <- rbind(samples, bm)
99
+}
100
+
101
+rownames(samples) <- NULL
102
+colnames(samples) <- c('M', 'N', 'K', 'nIter', 'seed', 'reps', 'min', 'mean',
103
+    'median', 'max')
104
+cogaps_benchmark <- data.frame(samples)
105
+cogaps_version <- packageVersion('CoGAPS')
106
+
107
+save(cogaps_benchmark, cogaps_version, file=file_name)
... ...
@@ -13,7 +13,7 @@ gapsRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7,
13 13
   max_gibbmass_paraA = 100, alphaP = 0.01, nMaxP = 1e+05,
14 14
   max_gibbmass_paraP = 100, seed = -1, messages = TRUE,
15 15
   singleCellRNASeq = FALSE, fixedPatterns = matrix(0),
16
-  whichMatrixFixed = "N")
16
+  whichMatrixFixed = "N", checkpoint_interval = 0)
17 17
 }
18 18
 \arguments{
19 19
 \item{D}{data matrix}
... ...
@@ -69,6 +69,10 @@ domain for relative probabilities}
69 69
 
70 70
 \item{whichMatrixFixed}{character to indicate whether A or P matrix
71 71
 contains the fixed patterns}
72
+
73
+\item{checkpoint_interval}{time (in seconds) between cogaps checkpoints}
74
+
75
+\item{checkpoint_file_name}{name of file to store checkpoint}
72 76
 }
73 77
 \description{
74 78
 \code{gapsRun} calls the C++ MCMC code and performs Bayesian
... ...
@@ -56,12 +56,13 @@ matrix_data_t gaps::algo::nonZeroMean(const TwoWayMatrix &mat)
56 56
     unsigned int nNonZeros = 0;
57 57
     for (unsigned int r = 0; r < mat.nRow(); ++r)
58 58
     {
59
+        const Vector &row = mat.getRow(r);
59 60
         for (unsigned int c = 0; c < mat.nCol(); ++c)
60 61
         {
61
-            if (mat(r,c) != 0.0)
62
+            if (row[c] != 0.0)
62 63
             {
63 64
                 nNonZeros++;
64
-                sum += mat(r,c);
65
+                sum += row[c];
65 66
             }
66 67
         }
67 68
     }
... ...
@@ -190,9 +191,12 @@ const TwoWayMatrix &S, const TwoWayMatrix &AP)
190 191
     float chi2 = 0.0;
191 192
     for (unsigned r = 0; r < D.nRow(); ++r)
192 193
     {
194
+        const Vector &Drow = D.getRow(r);
195
+        const Vector &Srow = S.getRow(r);
196
+        const Vector &AProw = AP.getRow(r);
193 197
         for (unsigned c = 0; c < D.nCol(); ++c)
194 198
         {
195
-            chi2 += GAPS_SQ(D(r,c) - AP(r,c)) / GAPS_SQ(S(r,c));
199
+            chi2 += GAPS_SQ(Drow[c] - AProw[c]) / GAPS_SQ(Srow[c]);
196 200
         }
197 201
     }
198 202
     return chi2;
... ...
@@ -256,7 +260,6 @@ float gaps::algo::deltaLL(const MatrixChange &ch, const TwoWayMatrix &D,
256 260
 const TwoWayMatrix &S, const ColMatrix &A, const RowMatrix &P,
257 261
 const TwoWayMatrix &AP)
258 262
 {
259
-    // change in A matrix
260 263
     if (ch.label == 'A' && ch.nChanges == 2 && ch.row1 != ch.row2)
261 264
     {
262 265
         return deltaLL_A(D, S, P, AP, ch.row1, ch.col1, ch.delta1)
... ...
@@ -267,14 +270,12 @@ const TwoWayMatrix &AP)
267 270
         return deltaLL_A(D, S, P, AP, ch.row1, ch.col1, ch.delta1, ch.col2,
268 271
             ch.delta2, ch.nChanges == 2);
269 272
     }
270
-
271
-    // change in P matrix
272
-    if (ch.label == 'P' && ch.nChanges == 2 && ch.col1 != ch.col2)
273
+    else if (ch.label == 'P' && ch.nChanges == 2 && ch.col1 != ch.col2)
273 274
     {
274 275
         return deltaLL_P(D, S, A, AP, ch.col1, ch.row1, ch.delta1)
275 276
             + deltaLL_P(D, S, A, AP, ch.col2, ch.row2, ch.delta2);
276 277
     }
277
-    else if (ch.label == 'P')
278
+    else
278 279
     {
279 280
         return deltaLL_P(D, S, A, AP, ch.col1, ch.row1, ch.delta1, ch.row2,
280 281
             ch.delta2, ch.nChanges == 2);
... ...
@@ -325,29 +326,24 @@ AlphaParameters gaps::algo::alphaParameters(const MatrixChange &ch,
325 326
 const TwoWayMatrix &D, const TwoWayMatrix &S, const ColMatrix &A,
326 327
 const RowMatrix &P, const TwoWayMatrix &AP)
327 328
 {
328
-    // change in A matrix
329
-    AlphaParameters p(0,0);
330 329
     if (ch.label == 'A' && ch.nChanges == 2 && ch.row1 != ch.row2)
331 330
     {
332
-        p = alphaParameters_A(D, S, P, AP, ch.row1, ch.col1)
331
+        return alphaParameters_A(D, S, P, AP, ch.row1, ch.col1)
333 332
             + alphaParameters_A(D, S, P, AP, ch.row2, ch.col2);
334 333
     }
335 334
     else if (ch.label == 'A')
336 335
     {
337
-        p = alphaParameters_A(D, S, P, AP, ch.row1, ch.col1, ch.col2,
336
+        return alphaParameters_A(D, S, P, AP, ch.row1, ch.col1, ch.col2,
338 337
             ch.nChanges == 2);
339 338
     }
340
-
341
-    // change in P matrix
342
-    if (ch.label == 'P' && ch.nChanges == 2 && ch.col1 != ch.col2)
339
+    else if (ch.label == 'P' && ch.nChanges == 2 && ch.col1 != ch.col2)
343 340
     {
344
-        p = alphaParameters_P(D, S, A, AP, ch.col1, ch.row1)
341
+        return alphaParameters_P(D, S, A, AP, ch.col1, ch.row1)
345 342
             + alphaParameters_P(D, S, A, AP, ch.col2, ch.row2);
346 343
     }
347
-    else if (ch.label == 'P')
344
+    else
348 345
     {
349
-        p = alphaParameters_P(D, S, A, AP, ch.col1, ch.row1, ch.row2,
346
+        return alphaParameters_P(D, S, A, AP, ch.col1, ch.row1, ch.row2,
350 347
             ch.nChanges == 2);
351 348
     }
352
-    return p;
353 349
 }
354 350
\ No newline at end of file
355 351
new file mode 100644
... ...
@@ -0,0 +1,43 @@
1
+#ifndef __COGAPS_ARCHIVE_H__
2
+#define __COGAPS_ARCHIVE_H__
3
+
4
+#include <boost/random/mersenne_twister.hpp>
5
+#include <fstream>
6
+
7
+#define ARCHIVE_READ  std::ios::in
8
+#define ARCHIVE_WRITE std::ios::out | std::ios::trunc
9
+
10
+class Archive
11
+{
12
+private:
13
+
14
+    std::fstream mStream;
15
+
16
+public:
17
+
18
+    Archive(const std::string &path, std::ios_base::openmode flags)
19
+        : mStream(path.c_str(), std::ios::binary | flags)
20
+    {}
21
+
22
+    void close() {mStream.close();}
23
+
24
+    template<typename T>
25
+    friend void operator<<(Archive &ar, T val);
26
+
27
+    template<typename T>
28
+    friend void operator>>(Archive &ar, T &val);
29
+};
30
+
31
+template<typename T>
32
+void operator<<(Archive &ar, T val)
33
+{
34
+    ar.mStream.write(reinterpret_cast<char*>(&val), sizeof(T));
35
+}
36
+
37
+template<typename T>
38
+void operator>>(Archive &ar, T &val)
39
+{
40
+    ar.mStream.read(reinterpret_cast<char*>(&val), sizeof(T));
41
+}
42
+
43
+#endif
0 44
\ No newline at end of file
... ...
@@ -5,10 +5,11 @@ static const float EPSILON = 1.e-10;
5 5
 AtomicSupport::AtomicSupport(char label, uint64_t nrow, uint64_t ncol,
6 6
 float alpha, float lambda)
7 7
     :
8
-mNumRows(nrow), mNumCols(ncol), mNumBins(nrow * ncol),
9
-mNumAtoms(0), mTotalMass(0.0), mLabel(label), mAlpha(alpha), 
10
-mMaxNumAtoms(std::numeric_limits<uint64_t>::max()), mLambda(lambda),
11
-mBinSize(std::numeric_limits<uint64_t>::max() / (nrow * ncol))
8
+mLabel(label), mNumAtoms(0),
9
+mMaxNumAtoms(std::numeric_limits<uint64_t>::max()),
10
+mTotalMass(0.0), mNumRows(nrow), mNumCols(ncol), mNumBins(nrow * ncol),
11
+mBinSize(std::numeric_limits<uint64_t>::max() / (nrow * ncol)),
12
+mAlpha(alpha), mLambda(lambda)
12 13
 {}
13 14
 
14 15
 uint64_t AtomicSupport::getRow(uint64_t pos) const
... ...
@@ -113,7 +114,7 @@ AtomicProposal AtomicSupport::proposeExchange() const
113 114
     return AtomicProposal(mLabel, 'E', pos1, delta1, pos2, delta2);
114 115
 }
115 116
 
116
-float AtomicSupport::updateAtomMass(char type, uint64_t pos, float delta)
117
+float AtomicSupport::updateAtomMass(uint64_t pos, float delta)
117 118
 {
118 119
     if (mAtomicDomain.count(pos)) // update atom if it exists
119 120
     {
... ...
@@ -172,10 +173,10 @@ AtomicProposal AtomicSupport::makeProposal() const
172 173
 MatrixChange AtomicSupport::acceptProposal(const AtomicProposal &prop)
173 174
 {
174 175
     MatrixChange change = getMatrixChange(prop);
175
-    change.delta1 = updateAtomMass(prop.type, prop.pos1, prop.delta1);
176
+    change.delta1 = updateAtomMass(prop.pos1, prop.delta1);
176 177
     if (prop.nChanges > 1)
177 178
     {
178
-        change.delta2 = updateAtomMass(prop.type, prop.pos2, prop.delta2);
179
+        change.delta2 = updateAtomMass(prop.pos2, prop.delta2);
179 180
     }
180 181
     return change;
181 182
 }
... ...
@@ -191,4 +192,52 @@ MatrixChange AtomicSupport::getMatrixChange(const AtomicProposal &prop) const
191 192
     {   
192 193
         return MatrixChange(prop.label, getRow(prop.pos1), getCol(prop.pos1), prop.delta1);
193 194
     }
195
+}
196
+
197
+void operator<<(Archive &ar, AtomicSupport &domain)
198
+{
199
+    ar << domain.mLabel;
200
+    ar << domain.mNumAtoms;
201
+    ar << domain.mMaxNumAtoms;
202
+    ar << domain.mTotalMass;
203
+    ar << domain.mNumRows;
204
+    ar << domain.mNumCols;
205
+    ar << domain.mNumBins;
206
+    ar << domain.mBinSize;
207
+    ar << domain.mAlpha;
208
+    ar << domain.mLambda;
209
+    
210
+    std::map<uint64_t, float>::iterator iter = domain.mAtomicDomain.begin();
211
+    for (; iter != domain.mAtomicDomain.end(); ++iter)
212
+    {
213
+        uint64_t pos = iter->first;
214
+        float mass = iter->second;
215
+        ar << pos;
216
+        ar << mass;
217
+        //ar << iter->first;
218
+        //ar << iter->second;
219
+    }
220
+}
221
+
222
+void operator>>(Archive &ar, AtomicSupport &domain)
223
+{
224
+    ar >> domain.mLabel;
225
+    ar >> domain.mNumAtoms;
226
+    ar >> domain.mMaxNumAtoms;
227
+    ar >> domain.mTotalMass;
228
+    ar >> domain.mNumRows;
229
+    ar >> domain.mNumCols;
230
+    ar >> domain.mNumBins;
231
+    ar >> domain.mBinSize;
232
+    ar >> domain.mAlpha;
233
+    ar >> domain.mLambda;
234
+
235
+    uint64_t pos = 0;
236
+    float mass = 0.0;
237
+    for (unsigned i = 0; i < domain.mNumAtoms; ++i)
238
+    {
239
+        ar >> pos;
240
+        ar >> mass;
241
+        domain.mAtomicDomain.insert(std::pair<uint64_t, float>(pos, mass));
242
+    }    
194 243
 }
195 244
\ No newline at end of file
... ...
@@ -8,6 +8,7 @@
8 8
 #include <fstream>
9 9
 #include <vector>
10 10
 #include <stdint.h>
11
+//#include <boost/serialization/map.hpp>
11 12
 
12 13
 struct AtomicProposal
13 14
 {
... ...
@@ -76,7 +77,7 @@ public:
76 77
     AtomicProposal proposeExchange() const;
77 78
 
78 79
     // update the mass of an atom, return the total amount changed
79
-    float updateAtomMass(char type, uint64_t pos, float delta);
80
+    float updateAtomMass(uint64_t pos, float delta);
80 81
 
81 82
 public:
82 83
 
... ...
@@ -101,7 +102,9 @@ public:
101 102
     // setters
102 103
     void setAlpha(float alpha) {mAlpha = alpha;}
103 104
     void setLambda(float lambda) {mLambda = lambda;}
104
-    //void setMaxNumAtoms(uint64_t max) {mMaxNumAtoms = max;}
105
+
106
+    friend void operator<<(Archive &ar, AtomicSupport &sampler);
107
+    friend void operator>>(Archive &ar, AtomicSupport &sampler);
105 108
 };
106 109
 
107 110
 #endif
... ...
@@ -1,144 +1,212 @@
1 1
 #include "GibbsSampler.h"
2
+#include "Matrix.h"
3
+#include "Archive.h"
4
+#include "InternalState.h"
2 5
 
3 6
 #include <Rcpp.h>
4 7
 #include <ctime>
8
+#include <fstream>
5 9
 #include <boost/date_time/posix_time/posix_time.hpp>
10
+#include <boost/archive/text_oarchive.hpp>
11
+#include <boost/archive/text_iarchive.hpp>
6 12
 
7
-enum GapsPhase
8
-{
9
-    GAPS_CALIBRATION,
10
-    GAPS_COOLING,
11
-    GAPS_SAMPLING
12
-};
13
-
14
-static std::vector<Rcpp::NumericMatrix> snapshotsA;
15
-static std::vector<Rcpp::NumericMatrix> snapshotsP;
16
-
17
-static void runGibbsSampler(GibbsSampler &sampler, unsigned nIterTotal,
18
-unsigned& nIterA, unsigned& nIterP, Vector& chi2Vec, Vector& aAtomVec,
19
-Vector& pAtomVec, GapsPhase phase, unsigned numOutputs, bool messages,
20
-unsigned numSnapshots);
13
+// no C++11 std::to_string
14
+#include <sstream>
15
+#define SSTR( x ) static_cast< std::ostringstream & >( \
16
+        ( std::ostringstream() << std::dec << x ) ).str()
21 17
 
22
-// [[Rcpp::export]]
23
-Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
24
-unsigned nFactor, float alphaA, float alphaP, unsigned nEquil,
25
-unsigned nEquilCool, unsigned nSample, float maxGibbsMassA,
26
-float maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
27
-char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq,
28
-unsigned numOutputs, unsigned numSnapshots)
29
-{
30
-    // set seed
31
-    uint32_t seedUsed = static_cast<uint32_t>(seed);
32
-    if (seed < 0)
33
-    {
34
-        boost::posix_time::ptime epoch(boost::gregorian::date(1970,1,1)); 
35
-        boost::posix_time::ptime now = boost::posix_time::microsec_clock::local_time();
36
-        boost::posix_time::time_duration diff = now - epoch;
37
-        seedUsed = static_cast<uint32_t>(diff.total_milliseconds() % 1000);
38
-    }
39
-    gaps::random::setSeed(seedUsed);
18
+#define ARCHIVE_MAGIC_NUM 0xCE45D32A
40 19
 
41
-    // create the gibbs sampler
42
-    GibbsSampler sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP,
43
-        maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns,
44
-        whichMatrixFixed);
45
-
46
-    // initial number of iterations for each matrix
47
-    unsigned nIterA = 10;
48
-    unsigned nIterP = 10;
49
-
50
-    // calibration phase
51
-    Vector chi2Vec(nEquil);
52
-    Vector nAtomsAEquil(nEquil);
53
-    Vector nAtomsPEquil(nEquil);
54
-    runGibbsSampler(sampler, nEquil, nIterA, nIterP, chi2Vec,
55
-        nAtomsAEquil, nAtomsPEquil, GAPS_CALIBRATION, numOutputs, messages,
56
-        numSnapshots);
57
-
58
-    // cooling phase
59
-    Vector trash(nEquilCool);
60
-    runGibbsSampler(sampler, nEquilCool, nIterA, nIterP, trash,
61
-        trash, trash, GAPS_COOLING, numOutputs, messages, numSnapshots);
62
-
63
-    // sampling phase
64
-    Vector chi2VecSample(nSample);
65
-    Vector nAtomsASample(nSample);
66
-    Vector nAtomsPSample(nSample);
67
-    runGibbsSampler(sampler, nSample, nIterA, nIterP, chi2VecSample,
68
-        nAtomsASample, nAtomsPSample, GAPS_SAMPLING, numOutputs, messages,
69
-        numSnapshots);
20
+namespace bpt = boost::posix_time;
70 21
 
71
-    // combine chi2 vectors
72
-    chi2Vec.concat(chi2VecSample);
22
+static bpt::ptime lastCheckpoint;
73 23
 
74
-    return Rcpp::List::create(
75
-        Rcpp::Named("Amean") = sampler.AMeanRMatrix(),
76
-        Rcpp::Named("Asd") = sampler.AStdRMatrix(),
77
-        Rcpp::Named("Pmean") = sampler.PMeanRMatrix(),
78
-        Rcpp::Named("Psd") = sampler.PStdRMatrix(),
79
-        Rcpp::Named("ASnapshots") = Rcpp::wrap(snapshotsA),
80
-        Rcpp::Named("PSnapshots") = Rcpp::wrap(snapshotsP),
81
-        Rcpp::Named("atomsAEquil") = nAtomsAEquil.rVec(),
82
-        Rcpp::Named("atomsASamp") = nAtomsASample.rVec(),
83
-        Rcpp::Named("atomsPEquil") = nAtomsPEquil.rVec(),
84
-        Rcpp::Named("atomsPSamp") = nAtomsPSample.rVec(),
85
-        Rcpp::Named("chiSqValues") = chi2Vec.rVec(),
86
-        Rcpp::Named("randSeed") = seedUsed
87
-    );
24
+static void createCheckpoint(GapsInternalState &state)
25
+{
26
+    state.numCheckpoints++;
27
+    std::cout << "creating gaps checkpoint...";
28
+    bpt::ptime start = bpt::microsec_clock::local_time();
29
+    Archive ar("gaps_checkpoint_" + SSTR(state.numCheckpoints) + ".out",
30
+        ARCHIVE_WRITE);
31
+    ar << ARCHIVE_MAGIC_NUM;
32
+    gaps::random::save(ar);
33
+    ar << state.nEquil;
34
+    ar << state.nSample;
35
+    ar << state.sampler.nRow();
36
+    ar << state.sampler.nCol();
37
+    ar << state.sampler.nFactor();
38
+    ar << state;
39
+    ar.close();
40
+    bpt::time_duration diff = bpt::microsec_clock::local_time() - start;
41
+    double elapsed = diff.total_milliseconds() / 1000.;
42
+    std::cout << " finished in " << elapsed << " seconds\n";
88 43
 }
89 44
 
90
-static void runGibbsSampler(GibbsSampler &sampler, unsigned nIterTotal,
91
-unsigned& nIterA, unsigned& nIterP, Vector& chi2Vec, Vector& aAtomVec,
92
-Vector& pAtomVec, GapsPhase phase, unsigned numOutputs, bool messages,
93
-unsigned numSnapshots)
45
+static void runGibbsSampler(GapsInternalState &state, unsigned nIterTotal,
46
+Vector &chi2Vec, Vector &aAtomVec, Vector &pAtomVec)
94 47
 {
95
-    float tempDenom = (float)nIterTotal / 2.f;
96
-
97
-    for (unsigned i = 0; i < nIterTotal; ++i)
48
+    for (; state.iter < nIterTotal; ++state.iter)
98 49
     {
99
-        if (phase == GAPS_CALIBRATION)
50
+        bpt::ptime now = bpt::microsec_clock::local_time();
51
+        bpt::time_duration diff = now - lastCheckpoint;
52
+        if (state.checkpointInterval > 0 && diff.total_milliseconds() > state.checkpointInterval * 1000)
100 53
         {
101
-            sampler.setAnnealingTemp(std::min(((float)i + 2.f) / tempDenom, 1.f));
54
+            createCheckpoint(state);
55
+            lastCheckpoint = bpt::microsec_clock::local_time();
102 56
         }
103 57
 
104
-        for (unsigned j = 0; j < nIterA; ++j)
58
+        if (state.phase == GAPS_CALIBRATION)
105 59
         {
106
-            sampler.update('A');
60
+            state.sampler.setAnnealingTemp(std::min(1.0,
61
+                ((float)state.iter + 2.0) / ((float)state.nEquil / 2.0)));
107 62
         }
108 63
 
109
-        for (unsigned j = 0; j < nIterP; ++j)
64
+        for (unsigned j = 0; j < state.nIterA; ++j)
110 65
         {
111
-            sampler.update('P');
66
+            state.sampler.update('A');
112 67
         }
113 68
 
114
-        if (phase == GAPS_SAMPLING)
69
+        for (unsigned j = 0; j < state.nIterP; ++j)
115 70
         {
116
-            sampler.updateStatistics();
117
-            if (numSnapshots > 0 && (i + 1) % (nIterTotal / numSnapshots) == 0)
71
+            state.sampler.update('P');
72
+        }
73
+
74
+        if (state.phase == GAPS_SAMPLING)
75
+        {
76
+            state.sampler.updateStatistics();
77
+            if (state.nSnapshots && !((state.iter + 1) % (nIterTotal / state.nSnapshots)))
118 78
             {
119
-                snapshotsA.push_back(sampler.getNormedMatrix('A'));
120
-                snapshotsP.push_back(sampler.getNormedMatrix('P'));
79
+                state.snapshotsA.push_back(state.sampler.getNormedMatrix('A'));
80
+                state.snapshotsP.push_back(state.sampler.getNormedMatrix('P'));
121 81
             }
122 82
         }
123 83
 
124
-        float numAtomsA = sampler.totalNumAtoms('A');
125
-        float numAtomsP = sampler.totalNumAtoms('P');
126
-        aAtomVec[i] = numAtomsA;
127
-        pAtomVec[i] = numAtomsP;
128
-        chi2Vec[i] = sampler.chi2();
129
-
130
-        if (phase != GAPS_COOLING)
84
+        if (state.phase != GAPS_COOLING)
131 85
         {
132
-            if ((i + 1) % numOutputs == 0 && messages)
86
+            aAtomVec[state.iter] = state.sampler.totalNumAtoms('A');
87
+            pAtomVec[state.iter] = state.sampler.totalNumAtoms('P');
88
+            chi2Vec[state.iter] = state.sampler.chi2();
89
+            state.nIterA = gaps::random::poisson(std::max(aAtomVec[state.iter], 10.f));
90
+            state.nIterP = gaps::random::poisson(std::max(pAtomVec[state.iter], 10.f));
91
+
92
+            if ((state.iter + 1) % state.nOutputs == 0 && state.messages)
133 93
             {
134
-                std::string temp = phase == GAPS_CALIBRATION ? "Equil: " : "Samp: ";
135
-                std::cout << temp << i + 1 << " of " << nIterTotal << ", Atoms:"
136
-                    << numAtomsA << "(" << numAtomsP << ") Chi2 = "
137
-                    << sampler.chi2() << '\n';
94
+                std::string temp = state.phase == GAPS_CALIBRATION ? "Equil: " : "Samp: ";
95
+                std::cout << temp << state.iter + 1 << " of " << nIterTotal
96
+                    << ", Atoms:" << aAtomVec[state.iter] << "(" << pAtomVec[state.iter]
97
+                    << ") Chi2 = " << state.sampler.chi2() << '\n';
138 98
             }
139
-
140
-            nIterA = gaps::random::poisson(std::max(numAtomsA, 10.f));
141
-            nIterP = gaps::random::poisson(std::max(numAtomsP, 10.f));
142 99
         }
143 100
     }
144
-}
145 101
\ No newline at end of file
102
+}
103
+
104
+static Rcpp::List runCogaps(GapsInternalState &state)
105
+{
106
+    // reset the checkpoint timer
107
+    lastCheckpoint = bpt::microsec_clock::local_time();
108
+
109
+    if (state.phase == GAPS_CALIBRATION)
110
+    {
111
+        runGibbsSampler(state, state.nEquil, state.chi2VecEquil,
112
+            state.nAtomsAEquil, state.nAtomsPEquil);
113
+        state.iter = 0;
114
+        state.phase = GAPS_COOLING;
115
+    }
116
+
117
+    if (state.phase == GAPS_COOLING)
118
+    {
119
+        Vector trash(1);
120
+        runGibbsSampler(state, state.nEquilCool, trash, trash, trash);
121
+        state.iter = 0;
122
+        state.phase = GAPS_SAMPLING;
123
+    }
124
+
125
+    if (state.phase == GAPS_SAMPLING)
126
+    {
127
+        runGibbsSampler(state, state.nSample, state.chi2VecSample,
128
+            state.nAtomsASample, state.nAtomsPSample);
129
+    }
130
+
131
+    // combine chi2 vectors
132
+    Vector chi2Vec(state.chi2VecEquil);
133
+    chi2Vec.concat(state.chi2VecSample);
134
+
135
+    return Rcpp::List::create(
136
+        Rcpp::Named("Amean") = state.sampler.AMeanRMatrix(),
137
+        Rcpp::Named("Asd") = state.sampler.AStdRMatrix(),
138
+        Rcpp::Named("Pmean") = state.sampler.PMeanRMatrix(),
139
+        Rcpp::Named("Psd") = state.sampler.PStdRMatrix(),
140
+        Rcpp::Named("ASnapshots") = Rcpp::wrap(state.snapshotsA),
141
+        Rcpp::Named("PSnapshots") = Rcpp::wrap(state.snapshotsP),
142
+        Rcpp::Named("atomsAEquil") = state.nAtomsAEquil.rVec(),
143
+        Rcpp::Named("atomsASamp") = state.nAtomsASample.rVec(),
144
+        Rcpp::Named("atomsPEquil") = state.nAtomsPEquil.rVec(),
145
+        Rcpp::Named("atomsPSamp") = state.nAtomsPSample.rVec(),
146
+        Rcpp::Named("chiSqValues") = chi2Vec.rVec(),
147
+        Rcpp::Named("randSeed") = state.seed
148
+    );
149
+}
150
+
151
+// [[Rcpp::export]]
152
+Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
153
+unsigned nFactor, float alphaA, float alphaP, unsigned nEquil,
154
+unsigned nEquilCool, unsigned nSample, float maxGibbsMassA,
155
+float maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
156
+char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq,
157
+unsigned numOutputs, unsigned numSnapshots, unsigned checkpointInterval)
158
+{
159
+    // set seed
160
+    uint32_t seedUsed = static_cast<uint32_t>(seed);
161
+    if (seed < 0)
162
+    {
163
+        bpt::ptime epoch(boost::gregorian::date(1970,1,1)); 
164
+        bpt::ptime now = boost::posix_time::microsec_clock::local_time();
165
+        bpt::time_duration diff = now - epoch;
166
+        seedUsed = static_cast<uint32_t>(diff.total_milliseconds() % 1000);
167
+    }
168
+    gaps::random::setSeed(seedUsed);
169
+
170
+    // create internal state from parameters
171
+    GapsInternalState state(DMatrix, SMatrix, nFactor, alphaA, alphaP,
172
+        nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP,
173
+        fixedPatterns, whichMatrixFixed, messages, singleCellRNASeq,
174
+        numOutputs, numSnapshots, seedUsed, checkpointInterval);
175
+
176
+    // run cogaps from this internal state
177
+    return runCogaps(state);
178
+}
179
+
180
+// [[Rcpp::export]]
181
+Rcpp::List cogapsFromCheckpoint(const std::string &fileName)
182
+{   
183
+    // open file
184
+    Archive ar(fileName, ARCHIVE_READ);
185
+
186
+    // verify magic number
187
+    uint32_t magicNum = 0;
188
+    ar >> magicNum;
189
+    if (magicNum != ARCHIVE_MAGIC_NUM)
190
+    {
191
+        std::cout << "invalid checkpoint file" << std::endl;
192
+        return Rcpp::List::create();
193
+    }
194
+    
195
+    // seed random number generator and create internal state
196
+    gaps::random::load(ar);
197
+
198
+    // read needed parameters
199
+    unsigned nE = 0, nS = 0, nRow = 0, nCol = 0, nFactor = 0;
200
+    ar >> nE;
201
+    ar >> nS;
202
+    ar >> nRow;
203
+    ar >> nCol;
204
+    ar >> nFactor;
205
+    
206
+    // construct empty state of the correct size, populate from file
207
+    GapsInternalState state(nE, nS, nRow, nCol, nFactor);
208
+    ar >> state;
209
+
210
+    // run cogaps from this internal state
211
+    return runCogaps(state);
212
+}
213
+
... ...
@@ -5,19 +5,28 @@
5 5
 
6 6
 static const float EPSILON = 1.e-10;
7 7
 
8
+GibbsSampler::GibbsSampler(unsigned nRow, unsigned nCol, unsigned nFactor)
9
+    :
10
+mDMatrix(nRow, nCol), mSMatrix(nRow, nCol), mAPMatrix(nRow, nCol),
11
+mAMatrix(nRow, nFactor), mPMatrix(nFactor, nCol), mADomain('A', nRow, nFactor),
12
+mPDomain('P', nFactor, nCol), mAMeanMatrix(nRow, nFactor),
13
+mAStdMatrix(nRow, nFactor), mPMeanMatrix(nFactor, nCol),
14
+mPStdMatrix(nFactor, nCol)
15
+{}
16
+
8 17
 GibbsSampler::GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S,
9 18
 unsigned int nFactor, float alphaA, float alphaP, float maxGibbsMassA,
10 19
 float maxGibbsMassP, bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat,
11 20
 char whichMat)
12 21
     :
13
-mDMatrix(D), mSMatrix(S), mAMatrix(D.nrow(), nFactor),
14
-mPMatrix(nFactor, D.ncol()), mAPMatrix(D.nrow(), D.ncol()),
22
+mDMatrix(D), mSMatrix(S), mAPMatrix(D.nrow(), D.ncol()),
23
+mAMatrix(D.nrow(), nFactor), mPMatrix(nFactor, D.ncol()), 
15 24
 mADomain('A', D.nrow(), nFactor), mPDomain('P', nFactor, D.ncol()),
16
-mMaxGibbsMassA(maxGibbsMassA), mMaxGibbsMassP(maxGibbsMassP),
17
-mAnnealingTemp(1.0), mChi2(0.0), mSingleCellRNASeq(singleCellRNASeq),
18 25
 mAMeanMatrix(D.nrow(), nFactor), mAStdMatrix(D.nrow(), nFactor),
19 26
 mPMeanMatrix(nFactor, D.ncol()), mPStdMatrix(nFactor, D.ncol()),
20
-mStatUpdates(0), mFixedMat(whichMat)
27
+mStatUpdates(0), mMaxGibbsMassA(maxGibbsMassA), mMaxGibbsMassP(maxGibbsMassP),
28
+mAnnealingTemp(1.0), mChi2(0.0), mSingleCellRNASeq(singleCellRNASeq),
29
+mFixedMat(whichMat)
21 30
 {
22 31
     float meanD = mSingleCellRNASeq ? gaps::algo::nonZeroMean(mDMatrix)
23 32
         : gaps::algo::mean(mDMatrix);
... ...
@@ -30,6 +39,7 @@ mStatUpdates(0), mFixedMat(whichMat)
30 39
     mMaxGibbsMassA /= mADomain.lambda();
31 40
     mMaxGibbsMassP /= mPDomain.lambda();
32 41
 
42
+    // need to update atomic in order to create checkpoints
33 43
     if (mFixedMat == 'A')
34 44
     {
35 45
         mNumFixedPatterns = fixedPat.ncol();
... ...
@@ -436,4 +446,50 @@ void GibbsSampler::updateStatistics()
436 446
         mPStdMatrix.getRow(r) += gaps::algo::squaredScalarDivision(mPMatrix.getRow(r),
437 447
             normVec[r]);
438 448
     }
449
+}
450
+
451
+void operator<<(Archive &ar, GibbsSampler &sampler)
452
+{
453
+    ar << sampler.mDMatrix;
454
+    ar << sampler.mSMatrix;
455
+    ar << sampler.mAPMatrix;
456
+    ar << sampler.mAMatrix;
457
+    ar << sampler.mPMatrix;
458
+    ar << sampler.mADomain;
459
+    ar << sampler.mPDomain;
460
+    ar << sampler.mAMeanMatrix;
461
+    ar << sampler.mAStdMatrix;
462
+    ar << sampler.mPMeanMatrix;
463
+    ar << sampler.mPStdMatrix;
464
+    ar << sampler.mStatUpdates;
465
+    ar << sampler.mMaxGibbsMassA;
466
+    ar << sampler.mMaxGibbsMassP;
467
+    ar << sampler.mAnnealingTemp;
468
+    ar << sampler.mChi2;
469
+    ar << sampler.mSingleCellRNASeq;
470
+    ar << sampler.mNumFixedPatterns;
471
+    ar << sampler.mFixedMat;
472
+}
473
+
474
+void operator>>(Archive &ar, GibbsSampler &sampler)
475
+{
476
+    ar >> sampler.mDMatrix;
477
+    ar >> sampler.mSMatrix;
478
+    ar >> sampler.mAPMatrix;
479
+    ar >> sampler.mAMatrix;
480
+    ar >> sampler.mPMatrix;
481
+    ar >> sampler.mADomain;
482
+    ar >> sampler.mPDomain;
483
+    ar >> sampler.mAMeanMatrix;
484
+    ar >> sampler.mAStdMatrix;
485
+    ar >> sampler.mPMeanMatrix;
486
+    ar >> sampler.mPStdMatrix;
487
+    ar >> sampler.mStatUpdates;
488
+    ar >> sampler.mMaxGibbsMassA;
489
+    ar >> sampler.mMaxGibbsMassP;
490
+    ar >> sampler.mAnnealingTemp;
491
+    ar >> sampler.mChi2;
492
+    ar >> sampler.mSingleCellRNASeq;
493
+    ar >> sampler.mNumFixedPatterns;
494
+    ar >> sampler.mFixedMat;
439 495
 }
440 496
\ No newline at end of file
... ...
@@ -13,14 +13,16 @@ private:
13 13
 public:
14 14
 #endif
15 15
 
16
-    AtomicSupport mADomain, mPDomain;
17
-
18 16
     TwoWayMatrix mDMatrix, mSMatrix, mAPMatrix;
19
-    RowMatrix mPMatrix;
17
+
20 18
     ColMatrix mAMatrix;
19
+    RowMatrix mPMatrix;
20
+
21
+    AtomicSupport mADomain, mPDomain;
21 22
 
22
-    RowMatrix mPMeanMatrix, mPStdMatrix;
23 23
     ColMatrix mAMeanMatrix, mAStdMatrix;
24
+    RowMatrix mPMeanMatrix, mPStdMatrix;
25
+
24 26
     unsigned mStatUpdates;
25 27
 
26 28
     float mMaxGibbsMassA;
... ...
@@ -55,6 +57,7 @@ public:
55 57
 
56 58
 public:
57 59
 
60
+    GibbsSampler(unsigned nRow, unsigned nCol, unsigned nFactor);
58 61
     GibbsSampler(Rcpp::NumericMatrix D, Rcpp::NumericMatrix S, unsigned nFactor,
59 62
         float alphaA, float alphaP, float maxGibbsMassA, float maxGibbsMassP,
60 63
         bool singleCellRNASeq, Rcpp::NumericMatrix fixedPat, char whichMat);
... ...
@@ -73,6 +76,13 @@ public:
73 76
     void updateStatistics();
74 77
 
75 78
     Rcpp::NumericMatrix getNormedMatrix(char mat);
79
+
80
+    unsigned nRow() const {return mDMatrix.nRow();}
81
+    unsigned nCol() const {return mDMatrix.nCol();}
82
+    unsigned nFactor() const {return mAMatrix.nCol();}
83
+
84
+    friend void operator<<(Archive &ar, GibbsSampler &sampler);
85
+    friend void operator>>(Archive &ar, GibbsSampler &sampler);
76 86
 };
77 87
 
78 88
 #endif
79 89
new file mode 100644
... ...
@@ -0,0 +1,127 @@
1
+#ifndef __COGAPS_INTERNAL_STATE_H__
2
+#define __COGAPS_INTERNAL_STATE_H__
3
+
4
+#include "Archive.h"
5
+#include "Matrix.h"
6
+
7
+#include <Rcpp.h>
8
+
9
+typedef std::vector<Rcpp::NumericMatrix> SnapshotList;
10
+
11
+enum GapsPhase
12
+{
13
+    GAPS_CALIBRATION,
14
+    GAPS_COOLING,
15
+    GAPS_SAMPLING
16
+};
17
+
18
+struct GapsInternalState
19
+{
20
+    Vector chi2VecEquil;
21
+    Vector nAtomsAEquil;
22
+    Vector nAtomsPEquil;
23
+
24
+    Vector chi2VecSample;
25
+    Vector nAtomsASample;
26
+    Vector nAtomsPSample;
27
+
28
+    unsigned nIterA;
29
+    unsigned nIterP;
30
+    
31
+    unsigned nEquil;
32
+    unsigned nEquilCool;
33
+    unsigned nSample;
34
+
35
+    unsigned nSnapshots;
36
+    unsigned nOutputs;
37
+    bool messages;
38
+
39
+    unsigned iter;
40
+    GapsPhase phase;
41
+    uint32_t seed;
42
+
43
+    long checkpointInterval;
44
+    unsigned numCheckpoints;
45
+
46
+    GibbsSampler sampler;
47
+
48
+    SnapshotList snapshotsA;
49
+    SnapshotList snapshotsP;
50
+
51
+    GapsInternalState(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
52
+        unsigned nFactor, float alphaA, float alphaP, unsigned nE,
53
+        unsigned nEC, unsigned nS, float maxGibbsMassA,
54
+        float maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
55
+        char whichMatrixFixed, bool msg, bool singleCellRNASeq,
56
+        unsigned numOutputs, unsigned numSnapshots, uint32_t in_seed,
57
+        unsigned cptInterval)
58
+            :
59
+        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
60
+        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
61
+        nIterA(10), nIterP(10), nEquil(nE), nEquilCool(nEC), nSample(nS),
62
+        nSnapshots(numSnapshots), nOutputs(numOutputs), messages(msg),
63
+        iter(0), phase(GAPS_CALIBRATION), seed(in_seed),
64
+        checkpointInterval(cptInterval), numCheckpoints(0),
65
+        sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP,
66
+            maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns,
67
+            whichMatrixFixed)
68
+    {}
69
+
70
+    GapsInternalState(unsigned nE, unsigned nS, unsigned nRow, unsigned nCol,
71
+    unsigned nFactor)
72
+            :
73
+        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
74
+        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
75
+        sampler(nRow, nCol, nFactor)
76
+    {}
77
+};
78
+
79
+inline void operator<<(Archive &ar, GapsInternalState &state)
80
+{
81
+    ar << state.chi2VecEquil;
82
+    ar << state.nAtomsAEquil;
83
+    ar << state.nAtomsPEquil;
84
+    ar << state.chi2VecSample;
85
+    ar << state.nAtomsASample;
86
+    ar << state.nAtomsPSample;
87
+    ar << state.nIterA;
88
+    ar << state.nIterP;
89
+    ar << state.nEquil;
90
+    ar << state.nEquilCool;
91
+    ar << state.nSample;
92
+    ar << state.nSnapshots;
93
+    ar << state.nOutputs;
94
+    ar << state.messages;
95
+    ar << state.iter;
96
+    ar << state.phase;
97
+    ar << state.seed;
98
+    ar << state.checkpointInterval;
99
+    ar << state.numCheckpoints;
100
+    ar << state.sampler;
101
+}
102
+
103
+inline void operator>>(Archive &ar, GapsInternalState &state)
104
+{
105
+    ar >> state.chi2VecEquil;
106
+    ar >> state.nAtomsAEquil;
107
+    ar >> state.nAtomsPEquil;
108
+    ar >> state.chi2VecSample;
109
+    ar >> state.nAtomsASample;
110
+    ar >> state.nAtomsPSample;
111
+    ar >> state.nIterA;
112
+    ar >> state.nIterP;
113
+    ar >> state.nEquil;
114
+    ar >> state.nEquilCool;
115
+    ar >> state.nSample;
116
+    ar >> state.nSnapshots;
117
+    ar >> state.nOutputs;
118
+    ar >> state.messages;
119
+    ar >> state.iter;
120
+    ar >> state.phase;
121
+    ar >> state.seed;
122
+    ar >> state.checkpointInterval;
123
+    ar >> state.numCheckpoints;
124
+    ar >> state.sampler;
125
+}
126
+
127
+#endif
0 128
\ No newline at end of file
... ...
@@ -1,4 +1,4 @@
1
-PKG_CPPFLAGS = -O2
1
+PKG_CPPFLAGS = -Wall -Wextra -O2
2 2
 OBJECTS =   Algorithms.o \
3 3
             AtomicSupport.o \
4 4
             Cogaps.o \
... ...
@@ -11,5 +11,5 @@ OBJECTS =   Algorithms.o \
11 11
             cpp_tests/testAtomicSupport.o \
12 12
             cpp_tests/testGibbsSampler.o \
13 13
             cpp_tests/testMatrix.o \
14
-            cpp_tests/testRandom.o
15
-
14
+            cpp_tests/testRandom.o \
15
+            cpp_tests/testSerialization.o
... ...
@@ -6,27 +6,66 @@ static const float EPSILON = 1.e-10;
6 6
 
7 7
 /******************************** HELPER *******************************/
8 8
 
9
-static void updateHelper(float& val, float delta)
9
+static void updateHelper2(float& val, float delta)
10 10
 {
11
-    val += delta;
12
-    if (std::abs(val) < EPSILON)
11
+    val = std::abs(val + delta) < EPSILON ? 0.0 : val + delta;
12
+}
13
+
14
+template<class GenericMatrix>
15
+static void updateHelper(GenericMatrix &mat, const MatrixChange &change)
16
+{
17
+    updateHelper2(mat(change.row1, change.col1), change.delta1);
18
+    if (change.nChanges > 1)
13 19
     {
14
-        val = 0.0;
20
+        updateHelper2(mat(change.row2, change.col2), change.delta2);
15 21
     }
16 22
 }
17 23
 
18
-/******************************** VECTOR *******************************/
19
-
20
-Rcpp::NumericVector Vector::rVec() const
24
+template<class GenericMatrix>
25
+static Rcpp::NumericMatrix convertToRMatrix(const GenericMatrix &mat)
21 26
 {
22
-    return Rcpp::wrap(mValues);
27
+    Rcpp::NumericMatrix rmat(mat.nRow(), mat.nCol());
28
+    for (unsigned i = 0; i < mat.nRow(); ++i)
29
+    {
30
+        for (unsigned j = 0; j < mat.nCol(); ++j)
31
+        {
32
+            rmat(i,j) = mat(i,j);
33
+        }
34
+    }
35
+    return rmat;
23 36
 }
24 37
 
38
+/******************************** VECTOR *******************************/
39
+
25 40
 void Vector::concat(const Vector& vec)
26 41
 {
27 42
     mValues.insert(mValues.end(), vec.mValues.begin(), vec.mValues.end());
28 43
 }
29 44
 
45
+void Vector::operator+=(const Vector &vec)
46
+{
47
+    for (unsigned i = 0; i < size(); ++i)
48
+    {
49
+        mValues[i] += vec[i];
50
+    }
51
+}
52
+
53
+void operator<<(Archive &ar, Vector &vec)
54
+{
55
+    for (unsigned i = 0; i < vec.size(); ++i)
56
+    {
57
+        ar << vec[i];
58
+    }
59
+}
60
+
61
+void operator>>(Archive &ar, Vector &vec)
62
+{
63
+    for (unsigned i = 0; i < vec.size(); ++i)
64
+    {
65
+        ar >> vec.mValues[i];
66
+    }
67
+}
68
+
30 69
 /****************************** ROW MATRIX *****************************/
31 70
 
32 71
 RowMatrix::RowMatrix(unsigned nrow, unsigned ncol)
... ...
@@ -60,44 +99,29 @@ RowMatrix::RowMatrix(const RowMatrix &mat)
60 99
     }
61 100
 }
62 101
 
63
-Vector& RowMatrix::getRow(unsigned row)
64
-{
65
-    return mRows[row];
66
-}
67
-
68
-const Vector& RowMatrix::getRow(unsigned row) const
69
-{
70
-    return mRows[row];
71
-}
72
-
73 102
 void RowMatrix::update(const MatrixChange &change)
74 103
 {
75
-    updateHelper(mRows[change.row1][change.col1], change.delta1);
76
-    if (change.nChanges > 1)
77
-    {
78
-        updateHelper(mRows[change.row2][change.col2], change.delta2);
79
-    }
104
+    updateHelper<RowMatrix>(*this, change);
80 105
 }
81 106
 
82 107
 Rcpp::NumericMatrix RowMatrix::rMatrix() const
83 108
 {
84
-    Rcpp::NumericMatrix mat(mNumRows, mNumCols);
109
+    return convertToRMatrix<RowMatrix>(*this);
110
+}
85 111
 
86
-    for (unsigned i = 0; i < mNumRows; ++i)
112
+void operator<<(Archive &ar, RowMatrix &mat)
113
+{
114
+    for (unsigned i = 0; i < mat.nRow(); ++i)
87 115
     {
88
-        for (unsigned j = 0; j < mNumCols; ++j)
89
-        {
90
-            mat(i,j) = this->operator()(i,j);
91
-        }
116
+        ar << mat.mRows[i];
92 117
     }
93
-    return mat;
94 118
 }
95 119
 
96
-void Vector::operator+=(const Vector &vec)
120
+void operator>>(Archive &ar, RowMatrix &mat)
97 121
 {
98
-    for (unsigned i = 0; i < size(); ++i)
122
+    for (unsigned i = 0; i < mat.nRow(); ++i)
99 123
     {
100
-        mValues[i] += vec[i];
124
+        ar >> mat.mRows[i];
101 125
     }
102 126
 }
103 127
 
... ...
@@ -134,35 +158,42 @@ ColMatrix::ColMatrix(const ColMatrix &mat)
134 158
     }
135 159
 }
136 160
 
137
-Vector& ColMatrix::getCol(unsigned col)
161
+void ColMatrix::update(const MatrixChange &change)
138 162
 {
139
-    return mCols[col];
163
+    updateHelper<ColMatrix>(*this, change);
140 164
 }
141 165
 
142
-const Vector& ColMatrix::getCol(unsigned col) const
166
+Rcpp::NumericMatrix ColMatrix::rMatrix() const
143 167
 {
144
-    return mCols[col];
168
+    return convertToRMatrix<ColMatrix>(*this);
145 169
 }
146 170
 
147
-void ColMatrix::update(const MatrixChange &change)
171
+void operator<<(Archive &ar, ColMatrix &mat)
148 172
 {
149
-    updateHelper(mCols[change.col1][change.row1], change.delta1);
150
-    if (change.nChanges > 1)
173
+    for (unsigned j = 0; j < mat.nCol(); ++j)
151 174
     {
152
-        updateHelper(mCols[change.col2][change.row2], change.delta2);
175
+        ar << mat.mCols[j];
153 176
     }
154 177
 }
155 178
 
156
-Rcpp::NumericMatrix ColMatrix::rMatrix() const
179
+void operator>>(Archive &ar, ColMatrix &mat)
157 180
 {
158
-    Rcpp::NumericMatrix mat(mNumRows, mNumCols);
159
-
160
-    for (unsigned i = 0; i < mNumRows; ++i)
181
+    for (unsigned j = 0; j < mat.nCol(); ++j)
161 182
     {
162
-        for (unsigned j = 0; j < mNumCols; ++j)
163
-        {
164
-            mat(i,j) = this->operator()(i,j);
165
-        }
183
+        ar >> mat.mCols[j];
166 184
     }
167
-    return mat;
168 185
 }
186
+
187
+/**************************** TWO-WAY MATRIX ***************************/
188
+
189
+void operator<<(Archive &ar, TwoWayMatrix &mat)
190
+{
191
+    ar << mat.mRowMatrix;
192
+    ar << mat.mColMatrix;
193
+}
194
+
195
+void operator>>(Archive &ar, TwoWayMatrix &mat)
196
+{
197
+    ar >> mat.mRowMatrix;
198
+    ar >> mat.mColMatrix;
199
+}
169 200
\ No newline at end of file
... ...
@@ -1,6 +1,8 @@
1 1
 #ifndef __COGAPS_MATRIX_H__
2 2
 #define __COGAPS_MATRIX_H__
3 3
 
4
+#include "Archive.h"
5
+
4 6
 #include <Rcpp.h>
5 7
 #include <vector>
6 8
 
... ...
@@ -34,14 +36,6 @@ struct MatrixChange
34 36
     {}
35 37
 };
36 38
 
37
-class Vector;
38
-class RowMatrix;
39
-class ColMatrix;
40
-class TwoWayMatrix;
41
-
42
-// no polymorphism to prevent virtual function overhead, not really
43
-// needed anyways since few functions are used on all types of matrices
44
-
45 39
 class Vector
46 40
 {
47 41
 private:
... ...
@@ -52,21 +46,24 @@ public:
52 46
 
53 47
     Vector(unsigned size) : mValues(std::vector<matrix_data_t>(size, 0.0)) {}
54 48
     Vector(const std::vector<matrix_data_t>& v) : mValues(v) {}
49
+    Vector(const Vector &vec) : mValues(vec.mValues) {}
55 50
 
56 51
     matrix_data_t& operator[](unsigned i) {return mValues[i];}
57 52
     matrix_data_t operator[](unsigned i) const {return mValues[i];}
58 53
     unsigned size() const {return mValues.size();}
59 54
 
60
-    Rcpp::NumericVector rVec() const;
55
+    Rcpp::NumericVector rVec() const {return Rcpp::wrap(mValues);}
61 56
     void concat(const Vector& vec);
62
-
63 57
     void operator+=(const Vector &vec);
58
+
59
+    friend void operator<<(Archive &ar, Vector &vec);
60
+    friend void operator>>(Archive &ar, Vector &vec);
64 61
 };
65 62
 
66 63
 class RowMatrix
67 64
 {
68 65
 private:
69
-
66
+    
70 67
     std::vector<Vector> mRows;
71 68
     unsigned mNumRows, mNumCols;
72 69
 
... ...
@@ -82,11 +79,14 @@ public:
82 79
     matrix_data_t& operator()(unsigned r, unsigned c) {return mRows[r][c];}
83 80
     matrix_data_t operator()(unsigned r, unsigned c) const {return mRows[r][c];}
84 81
 
85
-    Vector& getRow(unsigned row);
86
-    const Vector& getRow(unsigned row) const;
82
+    Vector& getRow(unsigned row) {return mRows[row];}
83
+    const Vector& getRow(unsigned row) const {return mRows[row];}
87 84
 
88 85
     void update(const MatrixChange &change);
89 86
     Rcpp::NumericMatrix rMatrix() const;
87
+
88
+    friend void operator<<(Archive &ar, RowMatrix &mat);
89
+    friend void operator>>(Archive &ar, RowMatrix &mat);
90 90
 };
91 91
 
92 92
 class ColMatrix
... ...
@@ -108,14 +108,17 @@ public:
108 108
     matrix_data_t& operator()(unsigned r, unsigned c) {return mCols[c][r];}
109 109
     matrix_data_t operator()(unsigned r, unsigned c) const {return mCols[c][r];}
110 110
 
111
-    Vector& getCol(unsigned col);
112
-    const Vector& getCol(unsigned col) const;
111
+    Vector& getCol(unsigned col) {return mCols[col];}
112
+    const Vector& getCol(unsigned col) const {return mCols[col];}
113 113
 
114 114
     void update(const MatrixChange &change);
115 115
     Rcpp::NumericMatrix rMatrix() const;
116
+
117
+    friend void operator<<(Archive &ar, ColMatrix &mat);
118
+    friend void operator>>(Archive &ar, ColMatrix &mat);
116 119
 };
117 120
 
118
-// gain performance at the cost of memory
121
+// gain performance at the expense of memory
119 122
 class TwoWayMatrix
120 123
 {
121 124
 private:
... ...
@@ -135,25 +138,23 @@ public:
135 138
 
136 139
     unsigned nRow() const {return mRowMatrix.nRow();}
137 140
     unsigned nCol() const {return mRowMatrix.nCol();}
138
-
139
-    matrix_data_t operator()(unsigned r, unsigned c) const
140
-    {
141
-        return mRowMatrix(r,c);
142
-    }
143
-
141
+    
144 142
     const Vector& getRow(unsigned row) const {return mRowMatrix.getRow(row);}
145 143
     const Vector& getCol(unsigned col) const {return mColMatrix.getCol(col);}
146 144
 
147 145
     void set(unsigned row, unsigned col, float value)
148 146
     {
149
-        mRowMatrix(row, col) = value;
150
-        mColMatrix(row, col) = value;
147
+        mRowMatrix(row,col) = value;
148
+        mColMatrix(row,col) = value;
151 149
     }
152 150
     
153 151
     Rcpp::NumericMatrix rMatrix() const
154 152
     {
155 153
         return mRowMatrix.rMatrix();
156 154
     }
155
+
156
+    friend void operator<<(Archive &ar, TwoWayMatrix &mat);
157
+    friend void operator>>(Archive &ar, TwoWayMatrix &mat);
157 158
 };
158 159
 
159 160
 #endif
160 161
\ No newline at end of file
... ...
@@ -1,8 +1,11 @@
1 1
 // [[Rcpp::depends(BH)]]
2 2
 
3
+// need -O0 to run in valgrind
4
+#pragma GCC push_options
5
+#pragma GCC optimize ("O2")
6
+
3 7
 #include "Random.h"
4 8
 
5
-#include <boost/random/mersenne_twister.hpp>
6 9
 #include <boost/random/uniform_01.hpp>
7 10
 #include <boost/random/uniform_real_distribution.hpp>
8 11
 #include <boost/random/normal_distribution.hpp>
... ...
@@ -13,6 +16,8 @@
13 16
 #include <boost/math/distributions/normal.hpp>
14 17
 #include <boost/math/distributions/exponential.hpp>
15 18
 
19
+#include <boost/random/mersenne_twister.hpp>
20
+
16 21
 #include <stdint.h>
17 22
 
18 23
 #define Q_GAMMA_THRESHOLD 1E-6
... ...
@@ -20,8 +25,19 @@
20 25
 
21 26
 typedef boost::random::mt19937 RNGType;
22 27
 //typedef boost::random::mt11213b RNGType; // should be faster
28
+
23 29
 static RNGType rng;
24 30
 
31
+void gaps::random::save(Archive &ar)
32
+{
33
+    ar << rng;
34
+}
35
+
36
+void gaps::random::load(Archive &ar)
37
+{
38
+    ar >> rng;
39
+}
40
+
25 41
 void gaps::random::setSeed(uint32_t seed)
26 42
 {
27 43
     rng.seed(seed);
... ...
@@ -57,7 +73,7 @@ float gaps::random::uniform(float a, float b)
57 73
     {
58 74
         return a;
59 75
     }
60
-    else if (a < b)
76
+    else
61 77
     {
62 78
         boost::random::uniform_real_distribution<> dist(a,b);
63 79
         return dist(rng);
... ...
@@ -77,7 +93,7 @@ uint64_t gaps::random::uniform64(uint64_t a, uint64_t b)
77 93
     {
78 94
         return a;
79 95
     }
80
-    else if (a < b)
96
+    else
81 97
     {
82 98
         boost::random::uniform_int_distribution<uint64_t> dist(a,b);
83 99
         return dist(rng);
... ...
@@ -127,3 +143,4 @@ float gaps::random::p_norm(float p, float mean, float sd)
127 143
     return cdf(norm, p);
128 144
 }
129 145
 
146
+#pragma GCC pop_options
... ...
@@ -1,8 +1,11 @@
1 1
 #ifndef __COGAPS_RANDOM_H__
2 2
 #define __COGAPS_RANDOM_H__
3 3
 
4
+#include "Archive.h"
5
+
4 6
 #include <stdint.h>
5 7
 #include <vector>
8
+#include <fstream>
6 9
 
7 10
 namespace gaps
8 11
 {
... ...
@@ -27,6 +30,9 @@ namespace random
27 30
     float d_norm(float d, float mean, float sd);
28 31
     float q_norm(float q, float mean, float sd);
29 32
     float p_norm(float p, float mean, float sd);
33
+
34
+    void save(Archive &ar);
35
+    void load(Archive &ar);
30 36
 }
31 37
 
32 38
 }
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps
9
-Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, float alphaA, float alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, float maxGibbsMassA, float maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq, unsigned numOutputs, unsigned numSnapshots);
10
-RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP, SEXP numOutputsSEXP, SEXP numSnapshotsSEXP) {
9
+Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, float alphaA, float alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, float maxGibbsMassA, float maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq, unsigned numOutputs, unsigned numSnapshots, unsigned checkpointInterval);
10
+RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP, SEXP numOutputsSEXP, SEXP numSnapshotsSEXP, SEXP checkpointIntervalSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject rcpp_result_gen;
13 13
     Rcpp::RNGScope rcpp_rngScope_gen;
... ...
@@ -28,7 +28,19 @@ BEGIN_RCPP
28 28
     Rcpp::traits::input_parameter< bool >::type singleCellRNASeq(singleCellRNASeqSEXP);
29 29
     Rcpp::traits::input_parameter< unsigned >::type numOutputs(numOutputsSEXP);
30 30
     Rcpp::traits::input_parameter< unsigned >::type numSnapshots(numSnapshotsSEXP);
31
-    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots));
31
+    Rcpp::traits::input_parameter< unsigned >::type checkpointInterval(checkpointIntervalSEXP);
32
+    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval));
33
+    return rcpp_result_gen;
34
+END_RCPP
35
+}
36
+// cogapsFromCheckpoint
37
+Rcpp::List cogapsFromCheckpoint(const std::string& fileName);
38
+RcppExport SEXP _CoGAPS_cogapsFromCheckpoint(SEXP fileNameSEXP) {
39
+BEGIN_RCPP
40
+    Rcpp::RObject rcpp_result_gen;
41
+    Rcpp::RNGScope rcpp_rngScope_gen;
42
+    Rcpp::traits::input_parameter< const std::string& >::type fileName(fileNameSEXP);
43
+    rcpp_result_gen = Rcpp::wrap(cogapsFromCheckpoint(fileName));
32 44
     return rcpp_result_gen;
33 45
 END_RCPP
34 46
 }
... ...
@@ -44,7 +56,8 @@ END_RCPP
44 56
 }
45 57
 
46 58
 static const R_CallMethodDef CallEntries[] = {
47
-    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 17},
59
+    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 18},
60
+    {"_CoGAPS_cogapsFromCheckpoint", (DL_FUNC) &_CoGAPS_cogapsFromCheckpoint, 1},
48 61
     {"_CoGAPS_run_catch_unit_tests", (DL_FUNC) &_CoGAPS_run_catch_unit_tests, 0},
49 62
     {NULL, NULL, 0}
50 63
 };
... ...
@@ -11537,7 +11537,7 @@ int main (int argc, char * const argv[]) {
11537 11537
 #endif
11538 11538
 
11539 11539
 #define INFO( msg ) INTERNAL_CATCH_INFO( "INFO", msg )
11540
-#define WARN( msg ) INTERNAL_CATCH_MSG( "WARN", Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, msg )
11540
+//#define WARN( msg ) INTERNAL_CATCH_MSG( "WARN", Catch::ResultWas::Warning, Catch::ResultDisposition::ContinueOnFailure, msg )
11541 11541
 #define SCOPED_INFO( msg ) INTERNAL_CATCH_INFO( "INFO", msg )
11542 11542
 #define CAPTURE( msg ) INTERNAL_CATCH_INFO( "CAPTURE", #msg " := " << Catch::toString(msg) )
11543 11543
 #define SCOPED_CAPTURE( msg ) INTERNAL_CATCH_INFO( "CAPTURE", #msg " := " << Catch::toString(msg) )
... ...
@@ -42,10 +42,8 @@ TEST_CASE("Test AtomicSupport.h")
42 42
             
43 43
             REQUIRE(change.label == 'A');
44 44
             REQUIRE(change.nChanges == prop.nChanges);
45
-            cond = change.row1 >= 0 && change.row2 < nrow;
46
-            REQUIRE(cond);
47
-            cond = change.col1 >= 0 && change.col2 < ncol;
48
-            REQUIRE(cond);
45
+            REQUIRE(change.row2 < nrow);
46
+            REQUIRE(change.col2 < ncol);
49 47
             REQUIRE(change.delta1 == prop.delta1);
50 48
             REQUIRE(change.delta2 == prop.delta2);
51 49
 
... ...
@@ -128,14 +126,10 @@ TEST_CASE("Internal AtomicSupport Tests")
128 126
     {
129 127
         for (unsigned i = 0; i < 10000; ++i)
130 128
         {
131
-            REQUIRE(Adomain.getRow(gaps::random::uniform64()) >= 0);
132 129
             REQUIRE(Adomain.getRow(gaps::random::uniform64()) < nrow);
133
-            REQUIRE(Adomain.getCol(gaps::random::uniform64()) >= 0);
134 130
             REQUIRE(Adomain.getCol(gaps::random::uniform64()) < ncol);
135 131
 
136
-            REQUIRE(Pdomain.getRow(gaps::random::uniform64()) >= 0);
137 132
             REQUIRE(Pdomain.getRow(gaps::random::uniform64()) < nrow);
138
-            REQUIRE(Pdomain.getCol(gaps::random::uniform64()) >= 0);
139 133
             REQUIRE(Pdomain.getCol(gaps::random::uniform64()) < ncol);            
140 134
         }
141 135
     }
... ...
@@ -30,9 +30,6 @@ TEST_CASE("Test Random.h - Random Number Generation")
30 30
 
31 31
     SECTION("Test uniform distribution over general interval")
32 32
     {
33
-        // invalid bounds
34
-        REQUIRE_THROWS(gaps::random::uniform(2.0, 1.4));
35
-
36 33
         // bounds equal
37 34
         REQUIRE(gaps::random::uniform(4.3,4.3) == 4.3);
38 35
 
39 36
new file mode 100644
... ...
@@ -0,0 +1,219 @@
1
+#include "catch.h"
2
+#include "../Archive.h"
3
+#include "../Matrix.h"
4
+#include "../AtomicSupport.h"
5
+#include "../GibbsSampler.h"
6
+#include "../InternalState.h"
7
+#include "../Random.h"
8
+
9
+TEST_CASE("Test Archive.h")
10
+{
11
+    SECTION("Reading/Writing to an Archive")
12
+    {
13
+        Archive ar1("test_ar.temp", ARCHIVE_WRITE);
14
+        ar1 << 3;
15
+        ar1.close();
16
+
17
+        Archive ar2("test_ar.temp", ARCHIVE_READ);
18
+        unsigned i = 0;
19
+        ar2 >> i;
20
+        REQUIRE(i == 3);
21
+        ar2.close();
22
+    }
23
+
24
+    SECTION("Serialization of primitive types")
25
+    {
26
+        // test values
27
+        unsigned u_read = 0, u_write = 456;
28
+        uint32_t u32_read = 0, u32_write = 512;
29
+        uint64_t u64_read = 0, u64_write = 0xAABBCCDDEE;
30
+        float f_read = 0.f, f_write = 0.123542f;
31
+        double d_read = 0., d_write = 0.54362;
32
+        bool b_read = false, b_write = true;
33
+
34
+        // write to archive
35
+        Archive arWrite("test_ar.temp", ARCHIVE_WRITE);
36
+        arWrite << u_write;
37
+        arWrite << u32_write;
38
+        arWrite << u64_write;
39
+        arWrite << f_write;
40
+        arWrite << d_write;
41
+        arWrite << b_write;
42
+        arWrite.close();
43
+
44
+        // read from archive
45
+        Archive arRead("test_ar.temp", ARCHIVE_READ);
46
+        arRead >> u_read;
47
+        arRead >> u32_read;
48
+        arRead >> u64_read;
49
+        arRead >> f_read;
50
+        arRead >> d_read;
51
+        arRead >> b_read;
52
+        arRead.close();
53
+
54
+        // test that values are the same
55
+        REQUIRE(u_read == u_write);
56
+        REQUIRE(u32_read == u32_write);
57
+        REQUIRE(u64_read == u64_write);
58
+        REQUIRE(f_read == f_write);
59
+        REQUIRE(d_read == d_write);
60
+        REQUIRE(b_read == b_write);
61
+    }
62
+    
63
+    SECTION("Vector Serialization")
64
+    {
65
+        Vector vec_read(100), vec_write(100);
66
+        for (unsigned i = 0; i < 100; ++i)
67
+        {
68
+            vec_write[i] = gaps::random::normal(0.0, 2.0);
69
+        }
70
+
71
+        Archive arWrite("test_ar.temp", ARCHIVE_WRITE);
72
+        arWrite << vec_write;
73
+        arWrite.close();
74
+
75
+        Archive arRead("test_ar.temp", ARCHIVE_READ);
76
+        arRead >> vec_read;
77
+        arRead.close();
78
+
79
+        REQUIRE(vec_read.size() == vec_write.size());
80
+
81
+        for (unsigned i = 0; i < 100; ++i)
82
+        {
83
+            REQUIRE(vec_read[i] == vec_write[i]);
84
+        }
85
+    }
86
+
87
+    SECTION("Matrix Serialization")
88
+    {
89
+        RowMatrix rMat_read(100,100), rMat_write(100,100);
90
+        ColMatrix cMat_read(100,100), cMat_write(100,100);
91
+        TwoWayMatrix twMat_read(100,100), twMat_write(100,100);
92
+
93
+        for (unsigned i = 0; i < 100; ++i)
94
+        {
95
+            for (unsigned j = 0; j < 100; ++j)
96
+            {
97
+                rMat_write(i,j) = gaps::random::normal(0.0, 2.0);
98
+                cMat_write(i,j) = gaps::random::normal(0.0, 2.0);
99
+                twMat_write.set(i,j,gaps::random::normal(0.0, 2.0));
100
+            }
101
+        }
102
+
103
+        Archive arWrite("test_ar.temp", ARCHIVE_WRITE);
104
+        arWrite << rMat_write;
105
+        arWrite << cMat_write;
106
+        arWrite << twMat_write;
107
+        arWrite.close();
108
+
109
+        Archive arRead("test_ar.temp", ARCHIVE_READ);
110
+        arRead >> rMat_read;
111
+        arRead >> cMat_read;
112
+        arRead >> twMat_read;
113
+        arRead.close();
114
+
115
+        REQUIRE(rMat_read.nRow() == rMat_write.nRow());
116
+        REQUIRE(rMat_read.nCol() == rMat_write.nCol());
117
+        REQUIRE(cMat_read.nRow() == cMat_write.nRow());
118
+        REQUIRE(cMat_read.nCol() == cMat_write.nCol());
119
+        REQUIRE(twMat_read.nRow() == twMat_write.nRow());
120
+        REQUIRE(twMat_read.nCol() == twMat_write.nCol());
121
+    
122
+        for (unsigned i = 0; i < 100; ++i)
123
+        {
124
+            for (unsigned j = 0; j < 100; ++j)
125
+            {
126
+                REQUIRE(rMat_read(i,j) == rMat_write(i,j));
127
+                REQUIRE(cMat_read(i,j) == cMat_write(i,j));
128
+                REQUIRE(twMat_read.getRow(i)[j] == twMat_write.getRow(i)[j]);
129
+            }
130
+        }
131
+    }
132
+    
133
+    SECTION("Atomic Serialization")
134
+    {
135
+        AtomicSupport domain_read('A',100,100), domain_write('A',100,100);
136
+        std::vector<uint64_t> locations;
137
+        for (unsigned i = 0; i < 10000; ++i)
138
+        {
139
+            AtomicProposal prop = domain_write.makeProposal();
140
+            locations.push_back(prop.pos1);
141
+            locations.push_back(prop.pos2);
142
+            domain_write.acceptProposal(prop);
143
+        }
144
+
145
+        Archive arWrite("test_ar.temp", ARCHIVE_WRITE);
146
+        arWrite << domain_write;
147
+        arWrite.close();
148
+    
149
+        Archive arRead("test_ar.temp", ARCHIVE_READ);
150
+        arRead >> domain_read;
151
+        arRead.close();
152
+
153
+        REQUIRE(domain_read.alpha() == domain_write.alpha());
154
+        REQUIRE(domain_read.lambda() == domain_write.lambda());
155
+        REQUIRE(domain_read.totalMass() == domain_write.totalMass());
156
+        REQUIRE(domain_read.numAtoms() == domain_write.numAtoms());
157
+
158
+#ifdef GAPS_INTERNAL_TESTS
159
+        std::map<uint64_t, float>::iterator readIter, writeIter;
160
+        readIter = domain_read.mAtomicDomain.begin();
161
+        writeIter = domain_write.mAtomicDomain.begin();        
162
+        while (readIter != domain_read.mAtomicDomain.end())
163
+        {
164
+            REQUIRE(readIter->first == writeIter->first);   
165
+            REQUIRE(readIter->second == writeIter->second);
166
+            ++readIter;
167
+            ++writeIter;
168
+        }
169
+#endif
170
+    }
171
+
172
+    SECTION("GibbsSampler Serialization")
173
+    {
174
+
175
+    }
176
+
177
+    SECTION("GapsInternalState Serialization")
178
+    {
179
+
180
+    }
181
+
182
+    SECTION("Random Generator Serialization")
183
+    {
184
+        std::vector<float> randSequence;
185
+
186
+        gaps::random::setSeed(123);
187
+        volatile float burn_in = 0.0;
188
+        for (unsigned i = 0; i < 1000; ++i)
189
+        {
190
+            burn_in = gaps::random::uniform(0,1);
191
+        }
192
+        REQUIRE(burn_in < 1);
193
+
194
+        Archive arWrite("test_ar.temp", ARCHIVE_WRITE);
195
+        gaps::random::save(arWrite);
196
+        arWrite.close();
197
+
198
+        for (unsigned i = 0; i < 1000; ++i)
199
+        {
200
+            randSequence.push_back(gaps::random::uniform());
201
+            randSequence.push_back(gaps::random::uniform(0.3, 6.4));
202
+            randSequence.push_back(gaps::random::normal(0.0, 2.0));
203
+            randSequence.push_back(gaps::random::exponential(5.5));
204
+        }
205
+    
206
+        gaps::random::setSeed(11);
207
+        Archive arRead("test_ar.temp", ARCHIVE_READ);
208
+        gaps::random::load(arRead);
209
+        arRead.close();
210
+
211
+        for (unsigned i = 0; i < 1000; ++i)
212
+        {
213
+            REQUIRE(gaps::random::uniform() == randSequence[i++]);
214
+            REQUIRE(gaps::random::uniform(0.3, 6.4) == randSequence[i++]);
215
+            REQUIRE(gaps::random::normal(0.0, 2.0) == randSequence[i++]);
216
+            REQUIRE(gaps::random::exponential(5.5) == randSequence[i]);
217
+        }
218
+    }
219
+}
0 220
\ No newline at end of file