Browse code

added interval parameter for creating checkpoints

sherman5 authored on 09/01/2018 15:22:34
Showing6 changed files

... ...
@@ -1,8 +1,8 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots) {
5
-    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots)
4
+cogaps <- function(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval) {
5
+    .Call('_CoGAPS_cogaps', PACKAGE = 'CoGAPS', DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval)
6 6
 }
7 7
 
8 8
 cogapsFromCheckpoint <- function(fileName) {
... ...
@@ -51,6 +51,8 @@
51 51
 #'@param fixedPatterns matrix of fixed values in either A or P matrix
52 52
 #'@param whichMatrixFixed character to indicate whether A or P matrix
53 53
 #'  contains the fixed patterns
54
+#'@param checkpoint_file_name name of file to store checkpoint
55
+#'@param checkpoint_interval time (in seconds) between cogaps checkpoints
54 56
 #'@export
55 57
 
56 58
 #--CHANGES 1/20/15--
... ...
@@ -63,7 +65,9 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
63 65
                     nMaxA = 100000, max_gibbmass_paraA = 100.0,
64 66
                     alphaP = 0.01, nMaxP = 100000, max_gibbmass_paraP = 100.0,
65 67
                     seed=-1, messages=TRUE, singleCellRNASeq=FALSE,
66
-                    fixedPatterns = matrix(0), whichMatrixFixed = 'N')
68
+                    fixedPatterns = matrix(0), whichMatrixFixed = 'N',
69
+                    checkpoint_file_name = "gaps_checkpoint.out",
70
+                    checkpoint_interval = 0)
67 71
 {
68 72
     # Floor the parameters that are integers to prevent allowing doubles.
69 73
     nFactor <- floor(nFactor)
... ...
@@ -100,7 +104,7 @@ gapsRun <- function(D, S, ABins = data.frame(), PBins = data.frame(),
100 104
     cogapResult <- cogaps(as.matrix(D), as.matrix(S), nFactor, alphaA, alphaP,
101 105
         nEquil, floor(nEquil/10), nSample, max_gibbmass_paraA, max_gibbmass_paraP,
102 106
         fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq,
103
-        nOutR, numSnapshots)
107
+        nOutR, numSnapshots, checkpoint_interval)
104 108
 
105 109
     # convert returned files to matrices to simplify visualization and processing
106 110
     cogapResult$Amean <- as.matrix(cogapResult$Amean);
... ...
@@ -13,7 +13,8 @@ gapsRun(D, S, ABins = data.frame(), PBins = data.frame(), nFactor = 7,
13 13
   max_gibbmass_paraA = 100, alphaP = 0.01, nMaxP = 1e+05,
14 14
   max_gibbmass_paraP = 100, seed = -1, messages = TRUE,
15 15
   singleCellRNASeq = FALSE, fixedPatterns = matrix(0),
16
-  whichMatrixFixed = "N")
16
+  whichMatrixFixed = "N", checkpoint_file_name = "gaps_checkpoint.out",
17
+  checkpoint_interval = 0)
17 18
 }
18 19
 \arguments{
19 20
 \item{D}{data matrix}
... ...
@@ -69,6 +70,10 @@ domain for relative probabilities}
69 70
 
70 71
 \item{whichMatrixFixed}{character to indicate whether A or P matrix
71 72
 contains the fixed patterns}
73
+
74
+\item{checkpoint_file_name}{name of file to store checkpoint}
75
+
76
+\item{checkpoint_interval}{time (in seconds) between cogaps checkpoints}
72 77
 }
73 78
 \description{
74 79
 \code{gapsRun} calls the C++ MCMC code and performs Bayesian
... ...
@@ -1,6 +1,7 @@
1 1
 #include "GibbsSampler.h"
2 2
 #include "Matrix.h"
3 3
 #include "Archive.h"
4
+#include "InternalState.h"
4 5
 
5 6
 #include <Rcpp.h>
6 7
 #include <ctime>
... ...
@@ -11,113 +12,27 @@
11 12
 
12 13
 #define ARCHIVE_MAGIC_NUM 0xCE45D32A
13 14
 
14
-typedef std::vector<Rcpp::NumericMatrix> SnapshotList;
15
+namespace bpt = boost::posix_time;
15 16
 
16
-enum GapsPhase
17
-{
18
-    GAPS_CALIBRATION,
19
-    GAPS_COOLING,
20
-    GAPS_SAMPLING
21
-};
17
+static bpt::ptime lastCheckpoint;
22 18
 
23
-struct GapsInternalState
19
+static void createCheckpoint(GapsInternalState &state)
24 20
 {
25
-    Vector chi2VecEquil;
26
-    Vector nAtomsAEquil;
27
-    Vector nAtomsPEquil;
28
-
29
-    Vector chi2VecSample;
30
-    Vector nAtomsASample;
31
-    Vector nAtomsPSample;
32
-
33
-    unsigned nIterA;
34
-    unsigned nIterP;
35
-    
36
-    unsigned nEquil;
37
-    unsigned nEquilCool;
38
-    unsigned nSample;
39
-
40
-    unsigned nSnapshots;
41
-    unsigned nOutputs;
42
-    bool messages;
43
-
44
-    unsigned iter;
45
-    GapsPhase phase;
46
-    uint32_t seed;
47
-
48
-    GibbsSampler sampler;
49
-
50
-    SnapshotList snapshotsA;
51
-    SnapshotList snapshotsP;
52
-
53
-    GapsInternalState(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
54
-        unsigned nFactor, double alphaA, double alphaP, unsigned nE,
55
-        unsigned nEC, unsigned nS, double maxGibbsMassA,
56
-        double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
57
-        char whichMatrixFixed, bool msg, bool singleCellRNASeq,
58
-        unsigned numOutputs, unsigned numSnapshots, uint32_t in_seed)
59
-            :
60
-        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
61
-        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
62
-        nIterA(10), nIterP(10), nEquil(nE), nEquilCool(nEC), nSample(nS),
63
-        nSnapshots(numSnapshots), nOutputs(numOutputs), messages(msg),
64
-        iter(0), phase(GAPS_CALIBRATION), seed(in_seed),
65
-        sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP,
66
-            maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns,
67
-            whichMatrixFixed)
68
-    {}
69
-
70
-    GapsInternalState(unsigned nE, unsigned nS, unsigned nRow, unsigned nCol,
71
-    unsigned nFactor)
72
-            :
73
-        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
74
-        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
75
-        sampler(nRow, nCol, nFactor)
76
-    {}
77
-};
78
-
79
-void operator<<(Archive &ar, GapsInternalState &state)
80
-{
81
-    ar << state.chi2VecEquil;
82
-    ar << state.nAtomsAEquil;
83
-    ar << state.nAtomsPEquil;
84
-    ar << state.chi2VecSample;
85
-    ar << state.nAtomsASample;
86
-    ar << state.nAtomsPSample;
87
-    ar << state.nIterA;
88
-    ar << state.nIterP;
21
+    std::cout << "creating gaps checkpoint...";
22
+    bpt::ptime start = bpt::microsec_clock::local_time();
23
+    Archive ar("gaps_checkpoint.out", ARCHIVE_WRITE);
24
+    ar << ARCHIVE_MAGIC_NUM;
25
+    gaps::random::save(ar);
89 26
     ar << state.nEquil;
90
-    ar << state.nEquilCool;
91 27
     ar << state.nSample;
92
-    ar << state.nSnapshots;
93
-    ar << state.nOutputs;
94
-    ar << state.messages;
95
-    ar << state.iter;
96
-    ar << state.phase;
97
-    ar << state.seed;
98
-    ar << state.sampler;
99
-}
100
-
101
-void operator>>(Archive &ar, GapsInternalState &state)
102
-{
103
-    ar >> state.chi2VecEquil;
104
-    ar >> state.nAtomsAEquil;
105
-    ar >> state.nAtomsPEquil;
106
-    ar >> state.chi2VecSample;
107
-    ar >> state.nAtomsASample;
108
-    ar >> state.nAtomsPSample;
109
-    ar >> state.nIterA;
110
-    ar >> state.nIterP;
111
-    ar >> state.nEquil;
112
-    ar >> state.nEquilCool;
113
-    ar >> state.nSample;
114
-    ar >> state.nSnapshots;
115
-    ar >> state.nOutputs;
116
-    ar >> state.messages;
117
-    ar >> state.iter;
118
-    ar >> state.phase;
119
-    ar >> state.seed;
120
-    ar >> state.sampler;
28
+    ar << state.sampler.nRow();
29
+    ar << state.sampler.nCol();
30
+    ar << state.sampler.nFactor();
31
+    ar << state;
32
+    ar.close();
33
+    bpt::time_duration diff = bpt::microsec_clock::local_time() - start;
34
+    double elapsed = diff.total_milliseconds() / 1000.;
35
+    std::cout << " finished in " << elapsed << " seconds\n";
121 36
 }
122 37
 
123 38
 static void runGibbsSampler(GapsInternalState &state, unsigned nIterTotal,
... ...
@@ -125,7 +40,13 @@ Vector &chi2Vec, Vector &aAtomVec, Vector &pAtomVec)
125 40
 {
126 41
     for (; state.iter < nIterTotal; ++state.iter)
127 42
     {
128
-        // TODO check if checkpoint should be created
43
+        bpt::ptime now = bpt::microsec_clock::local_time();
44
+        bpt::time_duration diff = now - lastCheckpoint;
45
+        if (state.checkpointInterval > 0 && diff.total_milliseconds() > state.checkpointInterval * 1000)
46
+        {
47
+            createCheckpoint(state);
48
+            lastCheckpoint = bpt::microsec_clock::local_time();
49
+        }
129 50
 
130 51
         if (state.phase == GAPS_CALIBRATION)
131 52
         {
... ...
@@ -174,6 +95,9 @@ Vector &chi2Vec, Vector &aAtomVec, Vector &pAtomVec)
174 95
 
175 96
 static Rcpp::List runCogaps(GapsInternalState &state)
176 97
 {
98
+    // reset the checkpoint timer
99
+    lastCheckpoint = bpt::microsec_clock::local_time();
100
+
177 101
     if (state.phase == GAPS_CALIBRATION)
178 102
     {
179 103
         runGibbsSampler(state, state.nEquil, state.chi2VecEquil,
... ...
@@ -182,17 +106,6 @@ static Rcpp::List runCogaps(GapsInternalState &state)
182 106
         state.phase = GAPS_COOLING;
183 107
     }
184 108
 
185
-    Archive ar("gaps_checkpoint.out", ARCHIVE_WRITE);
186
-    ar << ARCHIVE_MAGIC_NUM;
187
-    gaps::random::save(ar);
188
-    ar << state.nEquil;
189
-    ar << state.nSample;
190
-    ar << state.sampler.nRow();
191
-    ar << state.sampler.nCol();
192
-    ar << state.sampler.nFactor();
193
-    ar << state;
194
-    ar.close();
195
-
196 109
     if (state.phase == GAPS_COOLING)
197 110
     {
198 111
         Vector trash(1);
... ...
@@ -233,15 +146,15 @@ unsigned nFactor, double alphaA, double alphaP, unsigned nEquil,
233 146
 unsigned nEquilCool, unsigned nSample, double maxGibbsMassA,
234 147
 double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
235 148
 char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq,
236
-unsigned numOutputs, unsigned numSnapshots)
149
+unsigned numOutputs, unsigned numSnapshots, unsigned checkpointInterval)
237 150
 {
238 151
     // set seed
239 152
     uint32_t seedUsed = static_cast<uint32_t>(seed);
240 153
     if (seed < 0)
241 154
     {
242
-        boost::posix_time::ptime epoch(boost::gregorian::date(1970,1,1)); 
243
-        boost::posix_time::ptime now = boost::posix_time::microsec_clock::local_time();
244
-        boost::posix_time::time_duration diff = now - epoch;
155
+        bpt::ptime epoch(boost::gregorian::date(1970,1,1)); 
156
+        bpt::ptime now = boost::posix_time::microsec_clock::local_time();
157
+        bpt::time_duration diff = now - epoch;
245 158
         seedUsed = static_cast<uint32_t>(diff.total_milliseconds() % 1000);
246 159
     }
247 160
     gaps::random::setSeed(seedUsed);
... ...
@@ -250,7 +163,7 @@ unsigned numOutputs, unsigned numSnapshots)
250 163
     GapsInternalState state(DMatrix, SMatrix, nFactor, alphaA, alphaP,
251 164
         nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP,
252 165
         fixedPatterns, whichMatrixFixed, messages, singleCellRNASeq,
253
-        numOutputs, numSnapshots, seedUsed);
166
+        numOutputs, numSnapshots, seedUsed, checkpointInterval);
254 167
 
255 168
     // run cogaps from this internal state
256 169
     return runCogaps(state);
257 170
new file mode 100644
... ...
@@ -0,0 +1,124 @@
1
+#ifndef __COGAPS_INTERNAL_STATE_H__
2
+#define __COGAPS_INTERNAL_STATE_H__
3
+
4
+#include "Archive.h"
5
+#include "Matrix.h"
6
+
7
+#include <Rcpp.h>
8
+
9
+typedef std::vector<Rcpp::NumericMatrix> SnapshotList;
10
+
11
+enum GapsPhase
12
+{
13
+    GAPS_CALIBRATION,
14
+    GAPS_COOLING,
15
+    GAPS_SAMPLING
16
+};
17
+
18
+struct GapsInternalState
19
+{
20
+    Vector chi2VecEquil;
21
+    Vector nAtomsAEquil;
22
+    Vector nAtomsPEquil;
23
+
24
+    Vector chi2VecSample;
25
+    Vector nAtomsASample;
26
+    Vector nAtomsPSample;
27
+
28
+    unsigned nIterA;
29
+    unsigned nIterP;
30
+    
31
+    unsigned nEquil;
32
+    unsigned nEquilCool;
33
+    unsigned nSample;
34
+
35
+    unsigned nSnapshots;
36
+    unsigned nOutputs;
37
+    bool messages;
38
+
39
+    unsigned iter;
40
+    GapsPhase phase;
41
+    uint32_t seed;
42
+
43
+    long checkpointInterval;
44
+
45
+    GibbsSampler sampler;
46
+
47
+    SnapshotList snapshotsA;
48
+    SnapshotList snapshotsP;
49
+
50
+    GapsInternalState(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix,
51
+        unsigned nFactor, double alphaA, double alphaP, unsigned nE,
52
+        unsigned nEC, unsigned nS, double maxGibbsMassA,
53
+        double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns,
54
+        char whichMatrixFixed, bool msg, bool singleCellRNASeq,
55
+        unsigned numOutputs, unsigned numSnapshots, uint32_t in_seed,
56
+        unsigned cptInterval)
57
+            :
58
+        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
59
+        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
60
+        nIterA(10), nIterP(10), nEquil(nE), nEquilCool(nEC), nSample(nS),
61
+        nSnapshots(numSnapshots), nOutputs(numOutputs), messages(msg),
62
+        iter(0), phase(GAPS_CALIBRATION), seed(in_seed),
63
+        checkpointInterval(cptInterval),
64
+        sampler(DMatrix, SMatrix, nFactor, alphaA, alphaP,
65
+            maxGibbsMassA, maxGibbsMassP, singleCellRNASeq, fixedPatterns,
66
+            whichMatrixFixed)
67
+    {}
68
+
69
+    GapsInternalState(unsigned nE, unsigned nS, unsigned nRow, unsigned nCol,
70
+    unsigned nFactor)
71
+            :
72
+        chi2VecEquil(nE), nAtomsAEquil(nE), nAtomsPEquil(nE),
73
+        chi2VecSample(nS), nAtomsASample(nS), nAtomsPSample(nS),
74
+        sampler(nRow, nCol, nFactor)
75
+    {}
76
+};
77
+
78
+void operator<<(Archive &ar, GapsInternalState &state)
79
+{
80
+    ar << state.chi2VecEquil;
81
+    ar << state.nAtomsAEquil;
82
+    ar << state.nAtomsPEquil;
83
+    ar << state.chi2VecSample;
84
+    ar << state.nAtomsASample;
85
+    ar << state.nAtomsPSample;
86
+    ar << state.nIterA;
87
+    ar << state.nIterP;
88
+    ar << state.nEquil;
89
+    ar << state.nEquilCool;
90
+    ar << state.nSample;
91
+    ar << state.nSnapshots;
92
+    ar << state.nOutputs;
93
+    ar << state.messages;
94
+    ar << state.iter;
95
+    ar << state.phase;
96
+    ar << state.seed;
97
+    ar << state.checkpointInterval;
98
+    ar << state.sampler;
99
+}
100
+
101
+void operator>>(Archive &ar, GapsInternalState &state)
102
+{
103
+    ar >> state.chi2VecEquil;
104
+    ar >> state.nAtomsAEquil;
105
+    ar >> state.nAtomsPEquil;
106
+    ar >> state.chi2VecSample;
107
+    ar >> state.nAtomsASample;
108
+    ar >> state.nAtomsPSample;
109
+    ar >> state.nIterA;
110
+    ar >> state.nIterP;
111
+    ar >> state.nEquil;
112
+    ar >> state.nEquilCool;
113
+    ar >> state.nSample;
114
+    ar >> state.nSnapshots;
115
+    ar >> state.nOutputs;
116
+    ar >> state.messages;
117
+    ar >> state.iter;
118
+    ar >> state.phase;
119
+    ar >> state.seed;
120
+    ar << state.checkpointInterval;
121
+    ar >> state.sampler;
122
+}
123
+
124
+#endif
0 125
\ No newline at end of file
... ...
@@ -6,8 +6,8 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps
9
-Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq, unsigned numOutputs, unsigned numSnapshots);
10
-RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP, SEXP numOutputsSEXP, SEXP numSnapshotsSEXP) {
9
+Rcpp::List cogaps(Rcpp::NumericMatrix DMatrix, Rcpp::NumericMatrix SMatrix, unsigned nFactor, double alphaA, double alphaP, unsigned nEquil, unsigned nEquilCool, unsigned nSample, double maxGibbsMassA, double maxGibbsMassP, Rcpp::NumericMatrix fixedPatterns, char whichMatrixFixed, int seed, bool messages, bool singleCellRNASeq, unsigned numOutputs, unsigned numSnapshots, unsigned checkpointInterval);
10
+RcppExport SEXP _CoGAPS_cogaps(SEXP DMatrixSEXP, SEXP SMatrixSEXP, SEXP nFactorSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP maxGibbsMassASEXP, SEXP maxGibbsMassPSEXP, SEXP fixedPatternsSEXP, SEXP whichMatrixFixedSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP, SEXP numOutputsSEXP, SEXP numSnapshotsSEXP, SEXP checkpointIntervalSEXP) {
11 11
 BEGIN_RCPP
12 12
     Rcpp::RObject rcpp_result_gen;
13 13
     Rcpp::RNGScope rcpp_rngScope_gen;
... ...
@@ -28,7 +28,8 @@ BEGIN_RCPP
28 28
     Rcpp::traits::input_parameter< bool >::type singleCellRNASeq(singleCellRNASeqSEXP);
29 29
     Rcpp::traits::input_parameter< unsigned >::type numOutputs(numOutputsSEXP);
30 30
     Rcpp::traits::input_parameter< unsigned >::type numSnapshots(numSnapshotsSEXP);
31
-    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots));
31
+    Rcpp::traits::input_parameter< unsigned >::type checkpointInterval(checkpointIntervalSEXP);
32
+    rcpp_result_gen = Rcpp::wrap(cogaps(DMatrix, SMatrix, nFactor, alphaA, alphaP, nEquil, nEquilCool, nSample, maxGibbsMassA, maxGibbsMassP, fixedPatterns, whichMatrixFixed, seed, messages, singleCellRNASeq, numOutputs, numSnapshots, checkpointInterval));
32 33
     return rcpp_result_gen;
33 34
 END_RCPP
34 35
 }
... ...
@@ -55,7 +56,7 @@ END_RCPP
55 56
 }
56 57
 
57 58
 static const R_CallMethodDef CallEntries[] = {
58
-    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 17},
59
+    {"_CoGAPS_cogaps", (DL_FUNC) &_CoGAPS_cogaps, 18},
59 60
     {"_CoGAPS_cogapsFromCheckpoint", (DL_FUNC) &_CoGAPS_cogapsFromCheckpoint, 1},
60 61
     {"_CoGAPS_run_catch_unit_tests", (DL_FUNC) &_CoGAPS_run_catch_unit_tests, 0},
61 62
     {NULL, NULL, 0}