Browse code

compiling

Tom Sherman authored on 26/07/2018 00:41:45
Showing 21 changed files

... ...
@@ -55,6 +55,7 @@ Collate:
55 55
     'class-CogapsResult.R'
56 56
     'createGWCoGAPSSets.R'
57 57
     'createscCoGAPSSets.R'
58
+    'distributedCogaps.R'
58 59
     'generateSeeds.R'
59 60
     'package.R'
60 61
     'patternMarkers.R'
... ...
@@ -15,6 +15,7 @@ export(createscCoGAPSSets)
15 15
 export(getMeanChiSq)
16 16
 export(getParam)
17 17
 export(patternMarkers)
18
+export(patternMatch)
18 19
 export(patternMatch4Parallel)
19 20
 export(plot.CogapsResult)
20 21
 export(plotResiduals)
... ...
@@ -30,8 +31,6 @@ import(shiny)
30 31
 importClassesFrom(S4Vectors,Annotated)
31 32
 importClassesFrom(S4Vectors,DataFrame)
32 33
 importClassesFrom(S4Vectors,character_OR_NULL)
33
-importClassesFrom(SingleCellExperiment,SingleCellExperiment)
34
-importClassesFrom(SummarizedExperiment,SummarizedExperiment)
35 34
 importFrom(RColorBrewer,brewer.pal)
36 35
 importFrom(Rcpp,evalCpp)
37 36
 importFrom(cluster,agnes)
... ...
@@ -73,7 +72,6 @@ importFrom(stats,sd)
73 72
 importFrom(stats,update)
74 73
 importFrom(stats,variable.names)
75 74
 importFrom(stats,weighted.mean)
76
-importFrom(tools,file_ext)
77 75
 importFrom(utils,file_test)
78 76
 importFrom(utils,read.table)
79 77
 importFrom(utils,str)
... ...
@@ -32,14 +32,14 @@ NULL
32 32
 #' params <- setParam(params, "nPatterns", 5)
33 33
 #' resultC <- CoGAPS(GIST.D, params)
34 34
 #' @importFrom methods new
35
-CoGAPS <- function(data, params=new("CogapsParams"), nCores=NULL,
35
+CoGAPS <- function(data, params=new("CogapsParams"), nThreads=NULL,
36 36
 messages=TRUE, outputFrequency=1000, uncertainty=NULL,
37 37
 checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
38 38
 checkpointInFile=NULL, transposeData=FALSE, ...)
39 39
 {
40 40
     # parse parameters from ...
41 41
     allParams <- list("gaps"=params,
42
-        "nCores"=nCores,
42
+        "nThreads"=nThreads,
43 43
         "messages"=messages,
44 44
         "outputFrequency"=outputFrequency,
45 45
         "checkpointOutFile"=checkpointOutFile,
... ...
@@ -113,7 +113,7 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
113 113
         meanChiSq   = gapsReturnList$meanChiSq,
114 114
         diagnostics = gapsReturnList$diagnostics
115 115
     ))
116
-})
116
+}
117 117
 
118 118
 #' Check that provided data is valid
119 119
 #'
... ...
@@ -1,12 +1,12 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
-cogaps_cpp_from_file <- function(data, params, unc, fixedMatrix, checkpointInFile) {
5
-    .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, params, unc, fixedMatrix, checkpointInFile)
4
+cogaps_cpp_from_file <- function(data, allParams, uncertainty, indices = numericVector(1), fixedMatrix = matrix(1,1)) {
5
+    .Call('_CoGAPS_cogaps_cpp_from_file', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
6 6
 }
7 7
 
8
-cogaps_cpp <- function(data, params, unc, fixedMatrix, checkpointInFile) {
9
-    .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, params, unc, fixedMatrix, checkpointInFile)
8
+cogaps_cpp <- function(data, allParams, uncertainty, indices = numericVector(1), fixedMatrix = matrix(1,1)) {
9
+    .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', data, allParams, uncertainty, indices, fixedMatrix)
10 10
 }
11 11
 
12 12
 getBuildReport_cpp <- function() {
... ...
@@ -22,22 +22,13 @@ distributedCogaps <- function(data, allParams, uncertainty)
22 22
     consensusMatrix <- findConsensusMatrix(initialResult, allParams)
23 23
 
24 24
     # set fixed matrix
25
-    initialA <- initialP <- matrix(0)
26
-    if (allParams$gaps@distributed == "genome-wide")
27
-    {
28
-        allParams$whichMatrixFixed <- "P"
29
-        initialP <- consensusMatrix
30
-    }
31
-    else
32
-    {
33
-        allParams$whichMatrixFixed <- "A"
34
-        initialA <- consensusMatrix
35
-    }
25
+    allParams$whichMatrixFixed <- ifelse(allParams$gaps@distributed
26
+        == "genome-wide", "P", "A")
36 27
 
37 28
     # run all subsets with the same fixed matrix
38 29
     finalResult <- foreach(i=1:nSets) %dopar%
39 30
     {
40
-        cogaps_cpp(data, allParams, uncertainty, sets[[i]], initialA, initialP)
31
+        cogaps_cpp(data, allParams, uncertainty, sets[[i]], consensusMatrix)
41 32
     }
42 33
 
43 34
     # get result 
... ...
@@ -3,32 +3,12 @@
3 3
 \docType{methods}
4 4
 \name{CoGAPS}
5 5
 \alias{CoGAPS}
6
-\alias{CoGAPS,SingleCellExperiment,CogapsParams-method}
7
-\alias{CoGAPS,SummarizedExperiment,CogapsParams-method}
8
-\alias{CoGAPS,character,CogapsParams-method}
9
-\alias{CoGAPS,data.frame,CogapsParams-method}
10
-\alias{CoGAPS,matrix,CogapsParams-method}
11 6
 \title{CoGAPS Matrix Factorization Algorithm}
12 7
 \usage{
13
-CoGAPS(data, params = new("CogapsParams"), uncertainty = NULL,
14
-  fixedMatrix = matrix(0), checkpointInFile = "", ...)
15
-
16
-\S4method{CoGAPS}{character,CogapsParams}(data, params = new("CogapsParams"),
17
-  uncertainty = NULL, fixedMatrix = matrix(0), checkpointInFile = "", ...)
18
-
19
-\S4method{CoGAPS}{matrix,CogapsParams}(data, params = new("CogapsParams"),
20
-  uncertainty = NULL, fixedMatrix = matrix(0), checkpointInFile = "", ...)
21
-
22
-\S4method{CoGAPS}{data.frame,CogapsParams}(data, params = new("CogapsParams"),
23
-  uncertainty = NULL, fixedMatrix = matrix(0), checkpointInFile = "", ...)
24
-
25
-\S4method{CoGAPS}{SummarizedExperiment,CogapsParams}(data,
26
-  params = new("CogapsParams"), uncertainty = NULL,
27
-  fixedMatrix = matrix(0), checkpointInFile = "", ...)
28
-
29
-\S4method{CoGAPS}{SingleCellExperiment,CogapsParams}(data,
30
-  params = new("CogapsParams"), uncertainty = NULL,
31
-  fixedMatrix = matrix(0), checkpointInFile = "", ...)
8
+CoGAPS(data, params = new("CogapsParams"), nThreads = NULL,
9
+  messages = TRUE, outputFrequency = 1000, uncertainty = NULL,
10
+  checkpointOutFile = "gaps_checkpoint.out", checkpointInterval = 1000,
11
+  checkpointInFile = NULL, transposeData = FALSE, ...)
32 12
 }
33 13
 \arguments{
34 14
 \item{data}{File name or R object (see details for supported types)}
... ...
@@ -37,12 +17,12 @@ CoGAPS(data, params = new("CogapsParams"), uncertainty = NULL,
37 17
 
38 18
 \item{uncertainty}{uncertainty matrix (same supported types as data)}
39 19
 
40
-\item{fixedMatrix}{data for fixing the values of either the A or P matrix;
41
-used in conjuction with whichMatrixFixed (see CogapsParams)}
42
-
43 20
 \item{checkpointInFile}{name of the checkpoint file}
44 21
 
45 22
 \item{...}{keeps backwards compatibility with arguments from older versions}
23
+
24
+\item{fixedMatrix}{data for fixing the values of either the A or P matrix;
25
+used in conjuction with whichMatrixFixed (see CogapsParams)}
46 26
 }
47 27
 \value{
48 28
 CogapsResult object
... ...
@@ -53,8 +33,7 @@ matrix factorization returning the two matrices that reconstruct
53 33
 the data matrix
54 34
 }
55 35
 \details{
56
-Currently, raw count matrices are the only supported R object. For
57
- file types CoGAPS supports csv, tsv, and mtx
36
+For file types CoGAPS supports csv, tsv, and mtx
58 37
 }
59 38
 \examples{
60 39
 # Running from R object
61 40
new file mode 100644
... ...
@@ -0,0 +1,26 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/distributedCogaps.R
3
+\name{distributedCogaps}
4
+\alias{distributedCogaps}
5
+\title{CoGAPS Distributed Matrix Factorization Algorithm}
6
+\usage{
7
+distributedCogaps(data, allParams, uncertainty)
8
+}
9
+\arguments{
10
+\item{data}{File name or R object (see details for supported types)}
11
+
12
+\item{allParams}{list of all parameters used in computation}
13
+
14
+\item{uncertainty}{uncertainty matrix (same supported types as data)}
15
+}
16
+\value{
17
+list
18
+}
19
+\description{
20
+runs CoGAPS over subsets of the data and stitches the results
21
+back together
22
+}
23
+\details{
24
+For file types CoGAPS supports csv, tsv, and mtx
25
+}
26
+
0 27
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/distributedCogaps.R
3
+\name{patternMatch}
4
+\alias{patternMatch}
5
+\title{Match Patterns Across Multiple Runs}
6
+\usage{
7
+patternMatch(allPatterns, allParams)
8
+}
9
+\arguments{
10
+\item{Atot}{a matrix containing the total by set estimates of Pmean output from \code{reOrderBySet}}
11
+
12
+\item{nSets}{number of parallel sets used to generate \code{Atot}}
13
+
14
+\item{cnt}{number of branches at which to cut dendrogram}
15
+
16
+\item{minNS}{minimum of individual set contributions a cluster must contain}
17
+
18
+\item{maxNS}{maximum of individual set contributions a cluster must contain}
19
+
20
+\item{ignore.NA}{logical indicating whether or not to ignore NAs from potential over dimensionalization. Default is FALSE.}
21
+
22
+\item{bySet}{logical indicating whether to return list of matched set solutions from \code{Atot}}
23
+
24
+\item{...}{additional parameters for \code{agnes}}
25
+}
26
+\value{
27
+a matrix of consensus patterns by samples. If \code{bySet=TRUE} then a list of the set contributions to each
28
+consensus pattern is also returned.
29
+}
30
+\description{
31
+Match Patterns Across Multiple Runs
32
+}
33
+
... ...
@@ -1,10 +1,13 @@
1
-#include "GapsDispatcher.h"
1
+#include "GapsRunner.h"
2 2
 #include "math/SIMD.h"
3 3
 
4 4
 #include <Rcpp.h>
5 5
 
6 6
 #include <string>
7 7
 
8
+// these are helper functions for converting matrix/vector types
9
+// to and from R objects
10
+
8 11
 static std::vector<unsigned> convertRVec(const Rcpp::NumericVector &rvec)
9 12
 {
10 13
     std::vector<unsigned> vec;
... ...
@@ -15,9 +18,9 @@ static std::vector<unsigned> convertRVec(const Rcpp::NumericVector &rvec)
15 18
     return vec;
16 19
 }
17 20
 
18
-static RowMatrix convertRMatrix(const Rcpp::NumericMatrix &rmat)
21
+static Matrix convertRMatrix(const Rcpp::NumericMatrix &rmat)
19 22
 {
20
-    RowMatrix mat(rmat.nrow(), rmat.ncol());
23
+    Matrix mat(rmat.nrow(), rmat.ncol());
21 24
     for (unsigned i = 0; i < mat.nRow(); ++i)
22 25
     {
23 26
         for (unsigned j = 0; j < mat.nCol(); ++j)
... ...
@@ -42,7 +45,10 @@ static Rcpp::NumericMatrix createRMatrix(const Matrix &mat)
42 45
     return rmat;
43 46
 }
44 47
 
45
-static bool isNull(const RowMatrix &mat)
48
+// this provides a standard way for communicating which parameters
49
+// are null between R and C++
50
+
51
+static bool isNull(const Matrix &mat)
46 52
 {
47 53
     return mat.nRow() == 1 && mat.nCol() == 1;
48 54
 }
... ...
@@ -62,105 +68,97 @@ static bool isNull(const std::string &path)
62 68
     return path.empty();
63 69
 }
64 70
 
65
-template <class T>
66
-static Rcpp::List cogapsRun(const T &data, const Rcpp::List &allParams,
67
-const T &uncertainty, const Rcpp::NumericVector &indices,
68
-const Rcpp::NumericMatrix &initialA, const Rcpp::NumericMatrix &initialP)
71
+// this is the main function that creates a GapsRunner and runs CoGAPS
72
+
73
+template <class DataType>
74
+static Rcpp::List cogapsRun(const DataType &data, const Rcpp::List &allParams,
75
+const DataType &uncertainty, const Rcpp::NumericVector &indices,
76
+const Rcpp::NumericMatrix &fixedMatrix)
69 77
 {
70
-    GapsDispatcher dispatcher;
78
+    // convert string parameters
79
+    Rcpp::S4 gapsParams = allParams["gaps"];
80
+    std::string checkpointInFile = Rcpp::as<std::string>(allParams["checkpointInFile"]);
81
+    std::string distributed = Rcpp::as<std::string>(gapsParams.slot("distributed"));
82
+    GAPS_ASSERT(distributed == "genome-wide" || distributed == "single-cell");
83
+    bool partitionRows = (distributed == "genome-wide");
84
+    
85
+    // read number of patterns from checkpoint file
86
+    unsigned nPatterns = gapsParams.slot("nPatterns");
87
+    unsigned seed = gapsParams.slot("seed"); // so we can return seed
88
+    Archive ar(checkpointInFile, ARCHIVE_READ);
89
+    if (!isNull(checkpointInFile))
90
+    {
91
+        gaps::random::load(ar);
92
+        ar >> nPatterns >> seed;
93
+    }
71 94
 
72
-    // check if we're initializing with a checkpoint or not
73
-    if (!isNull(Rcpp::as<std::string>(allParams["checkpointInFile"])))
95
+    // construct GapsRunner
96
+    GapsRunner runner(data, allParams["transposeData"], nPatterns, seed,
97
+        partitionRows, convertRVec(indices));
98
+
99
+    // populate GapsRunner from checkpoint file
100
+    if (!isNull(checkpointInFile))
74 101
     {
75
-        dispatcher.initialize(data, allParams["transposeData"],
76
-            allParams["checkpointInFile"]);
102
+        ar >> runner;
103
+        ar.close();
77 104
     }
78 105
     else
79 106
     {
80
-        Rcpp::S4 gapsParams = allParams["gaps"];
81
-
82
-        if (!isNull(Rcpp::as<std::string>(gapsParams.slot("distributed"))))
107
+        // set fixed matrix
108
+        if (!isNull(fixedMatrix))
83 109
         {
84
-            dispatcher.initialize(data, allParams["transposeData"],
85
-                gapsParams.slot("nPatterns"), gapsParams.slot("seed"),
86
-                gapsParams.slot("distributed") == "genome-wide",
87
-                convertRVec(indices));
88
-
89
-            if (!isNull(uncertainty))
90
-            {
91
-                dispatcher.setUncertainty(uncertainty,
92
-                    allParams["transposeData"],
93
-                    gapsParams.slot("distributed") == "genome-wide",
94
-                    convertRVec(indices));
95
-            }
96
-        }
97
-        else
98
-        {
99
-            dispatcher.initialize(data, allParams["transposeData"],
100
-                gapsParams.slot("nPatterns"), gapsParams.slot("seed"));
101
-
102
-            if (!isNull(uncertainty))
103
-            {
104
-                dispatcher.setUncertainty(uncertainty, allParams["transposeData"]);
105
-            }
110
+            std::string which = Rcpp::as<std::string>(allParams["whichMatrixFixed"]);
111
+            GAPS_ASSERT(!isNull(which));
112
+            runner.setFixedMatrix(which[0], convertRMatrix(fixedMatrix));
106 113
         }
107 114
 
108
-        // set optional parameters
109
-        dispatcher.setMaxIterations(gapsParams.slot("nIterations"));
110
-        dispatcher.setSparsity(gapsParams.slot("alphaA"),
115
+        // set parameters that would be saved in the checkpoint 
116
+        gaps::random::setSeed(seed);
117
+        runner.setMaxIterations(gapsParams.slot("nIterations"));
118
+        runner.setSparsity(gapsParams.slot("alphaA"),
111 119
             gapsParams.slot("alphaP"), gapsParams.slot("singleCell"));
112
-        dispatcher.setMaxGibbsMass(gapsParams.slot("maxGibbsMassA"),
120
+        runner.setMaxGibbsMass(gapsParams.slot("maxGibbsMassA"),
113 121
             gapsParams.slot("maxGibbsMassP"));
114
-
115
-        // set initial values for A and P matrix
116
-        if (!isNull(initialA))
117
-        {
118
-            dispatcher.setAMatrix(convertRMatrix(initialA));
119
-        }
120
-        if (!isNull(initialP))
121
-        {
122
-            dispatcher.setPMatrix(convertRMatrix(initialP));
123
-        }
124
-
125
-        // check if running with a fixed matrix
126
-        if (!isNull(Rcpp::as<std::string>(allParams["whichMatrixFixed"])))
127
-        {
128
-            dispatcher.setFixedMatrix(allParams["whichMatrixFixed"]);
129
-        }
130 122
     }
131 123
 
132
-    // set the uncertainty matrix
124
+    // set uncertainty
125
+    if (!isNull(uncertainty))
126
+    {
127
+        runner.setUncertainty(uncertainty, allParams["transposeData"],
128
+            partitionRows, convertRVec(indices));
129
+    }
133 130
     
134 131
     // set parameters that aren't saved in the checkpoint
135
-    dispatcher.setNumCoresPerSet(allParams["nCores"]);
136
-    dispatcher.printMessages(allParams["messages"]);
137
-    dispatcher.setOutputFrequency(allParams["outputFrequency"]);
138
-    dispatcher.setCheckpointOutFile(allParams["checkpointOutFile"]);
139
-    dispatcher.setCheckpointInterval(allParams["checkpointInterval"]);
140
-
141
-    // run the dispatcher and return the GapsResult in an R list
142
-    GapsResult result(dispatcher.run());
132
+    runner.setMaxThreads(allParams["nThreads"]);
133
+    runner.setPrintMessages(allParams["messages"]);
134
+    runner.setOutputFrequency(allParams["outputFrequency"]);
135
+    runner.setCheckpointOutFile(allParams["checkpointOutFile"]);
136
+    runner.setCheckpointInterval(allParams["checkpointInterval"]);
137
+
138
+    // run cogaps and return the GapsResult in an R list
139
+    GapsResult result(runner.run());
143 140
     GAPS_ASSERT(result.meanChiSq > 0.f);
144 141
     return Rcpp::List::create(
145 142
         Rcpp::Named("Amean") = createRMatrix(result.Amean),
146 143
         Rcpp::Named("Pmean") = createRMatrix(result.Pmean),
147 144
         Rcpp::Named("Asd") = createRMatrix(result.Asd),
148 145
         Rcpp::Named("Psd") = createRMatrix(result.Psd),
149
-        Rcpp::Named("seed") = result.seed,
146
+        Rcpp::Named("seed") = seed,
150 147
         Rcpp::Named("meanChiSq") = result.meanChiSq,
151 148
         Rcpp::Named("diagnostics") = Rcpp::List::create()
152 149
     );
153 150
 }
154 151
 
152
+// these are the functions exposed to the R package
153
+
155 154
 // [[Rcpp::export]]
156 155
 Rcpp::List cogaps_cpp_from_file(const std::string &data,
157 156
 const Rcpp::List &allParams,
158 157
 const std::string &uncertainty,
159 158
 const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
160
-const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(1,1),
161
-const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix(1,1))
159
+const Rcpp::NumericMatrix &fixedMatrix=Rcpp::NumericMatrix(1,1))
162 160
 {
163
-    return cogapsRun(data, allParams, uncertainty, indices, initialA, initialP);
161
+    return cogapsRun(data, allParams, uncertainty, indices, fixedMatrix);
164 162
 }
165 163
 
166 164
 // [[Rcpp::export]]
... ...
@@ -168,11 +166,10 @@ Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &data,
168 166
 const Rcpp::List &allParams,
169 167
 const Rcpp::NumericMatrix &uncertainty,
170 168
 const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
171
-const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(1,1),
172
-const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix(1,1))
169
+const Rcpp::NumericMatrix &fixedMatrix=Rcpp::NumericMatrix(1,1))
173 170
 {
174 171
     return cogapsRun(convertRMatrix(data), allParams,
175
-        convertRMatrix(uncertainty), indices, initialA, initialP);
172
+        convertRMatrix(uncertainty), indices, fixedMatrix);
176 173
 }
177 174
 
178 175
 // [[Rcpp::export]]
179 176
deleted file mode 100644
... ...
@@ -1,155 +0,0 @@
1
-#include "GapsDispatcher.h"
2
-
3
-#include <string>
4
-
5
-#ifdef __GAPS_OPENMP__
6
-    #include <omp.h>
7
-#endif
8
-
9
-GapsDispatcher::GapsDispatcher() : mSeed(0), mNumPatterns(3),
10
-    mMaxIterations(1000), mNumCoresPerSet(1), mPrintMessages(true),
11
-    mCheckpointsCreated(0), mPhase('C'), mCheckpointInterval(0),
12
-    mCheckpointOutFile("gaps_checkpoint.out"), mInitialized(false)
13
-{}
14
-
15
-GapsDispatcher::~GapsDispatcher()
16
-{
17
-    for (unsigned i = 0; i < mRunners.size(); ++i)
18
-    {
19
-        delete mRunners[i];
20
-    }
21
-}
22
-
23
-void GapsDispatcher::setMaxIterations(unsigned n)
24
-{
25
-    mMaxIterations = n;
26
-}
27
-
28
-void GapsDispatcher::printMessages(bool print)
29
-{
30
-    GAPS_ASSERT(mInitialized);
31
-    mPrintMessages = print;
32
-    mRunners[0]->printMessages(print);
33
-}
34
-
35
-void GapsDispatcher::setOutputFrequency(unsigned n)
36
-{
37
-    GAPS_ASSERT(mInitialized);
38
-    mRunners[0]->setOutputFrequency(n);
39
-}
40
-
41
-void GapsDispatcher::setSparsity(float alphaA, float alphaP, bool singleCell)
42
-{
43
-    GAPS_ASSERT(mInitialized);
44
-    mRunners[0]->setSparsity(alphaA, alphaP, singleCell);
45
-}
46
-
47
-void GapsDispatcher::setMaxGibbsMass(float maxA, float maxP)
48
-{
49
-    GAPS_ASSERT(mInitialized);
50
-    mRunners[0]->setMaxGibbsMass(maxA, maxP);
51
-}
52
-
53
-void GapsDispatcher::setAMatrix()
54
-{
55
-    GAPS_ASSERT(mInitialized);
56
-}
57
-
58
-void GapsDispatcher::setPMatrix()
59
-{
60
-    GAPS_ASSERT(mInitialized);
61
-    mRunners[0]->
62
-}
63
-
64
-void GapsDispatcher::setFixedMatrix(char which)
65
-{
66
-    GAPS_ASSERT(mInitialized);
67
-    mRunners[0]->setFixedMatrix(which, mat);
68
-}
69
-
70
-void GapsDispatcher::setNumCoresPerSet(unsigned n)
71
-{
72
-    mNumCoresPerSet = n;
73
-}
74
-
75
-void GapsDispatcher::setCheckpointInterval(unsigned n)
76
-{
77
-    mCheckpointInterval = n;
78
-}
79
-
80
-void GapsDispatcher::setCheckpointOutFile(const std::string &path)
81
-{
82
-    mCheckpointOutFile = path;
83
-}
84
-
85
-GapsResult GapsDispatcher::run()
86
-{
87
-    GAPS_ASSERT(mInitialized);
88
-    GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
89
-
90
-    // calculate appropiate number of cores if compiled with openmp
91
-    #ifdef __GAPS_OPENMP__
92
-    if (mPrintMessages)
93
-    {
94
-        unsigned availableCores = omp_get_max_threads();
95
-        mNumCoresPerSet = std::min(availableCores, mNumCoresPerSet);
96
-        gaps_printf("Running on %d out of %d available cores\n",
97
-            mNumCoresPerSet, availableCores);
98
-    }
99
-    #endif
100
-
101
-    // this switch allows for the algorithm to be interruptable
102
-    mRunners[0]->startClock();
103
-    switch (mPhase)
104
-    {
105
-        case 'C':
106
-            if (mPrintMessages)
107
-            {
108
-                gaps_printf("-- Calibration Phase --\n");
109
-            }
110
-            runOneCycle(mMaxIterations);
111
-            mPhase = 'S';
112
-
113
-        case 'S':
114
-            if (mPrintMessages)
115
-            {
116
-                gaps_printf("-- Sampling Phase --\n");
117
-            }
118
-            mRunners[0]->startSampling();
119
-            runOneCycle(mMaxIterations);
120
-            break;
121
-    }
122
-
123
-    // extract useful information from runners
124
-    GapsResult result(mRunners[0]->nRow(), mRunners[0]->nCol(), mSeed);
125
-    result.Amean = mRunners[0]->Amean();
126
-    result.Pmean = mRunners[0]->Pmean();
127
-    result.Asd = mRunners[0]->Asd();
128
-    result.Psd = mRunners[0]->Psd();
129
-    result.meanChiSq = mRunners[0]->meanChiSq();
130
-    return result;
131
-}
132
-
133
-void GapsDispatcher::createCheckpoint() const
134
-{
135
-    Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
136
-
137
-    gaps::random::save(ar);
138
-    ar << mSeed << mNumPatterns << mMaxIterations << mPrintMessages <<
139
-        mCheckpointsCreated << mPhase;
140
-
141
-    ar << *mRunners[0];
142
-}
143
-
144
-void GapsDispatcher::runOneCycle(unsigned k)
145
-{
146
-    unsigned nCheckpoints = mCheckpointInterval > 0 ? k / mCheckpointInterval : 0;
147
-    while (mCheckpointsCreated < nCheckpoints)
148
-    {
149
-        mRunners[0]->run(mCheckpointInterval, mNumCoresPerSet);
150
-        createCheckpoint();
151
-        ++mCheckpointsCreated;
152
-    }
153
-    mRunners[0]->run(k - mCheckpointInterval * mCheckpointsCreated, mNumCoresPerSet);
154
-    mCheckpointsCreated = 0; // reset checkpoint count for next cycle
155
-}
156 0
deleted file mode 100644
... ...
@@ -1,180 +0,0 @@
1
-#ifndef __COGAPS_GAPS_DISPATCHER_H__
2
-#define __COGAPS_GAPS_DISPATCHER_H__
3
-
4
-#include "GapsRunner.h"
5
-#include "math/SIMD.h"
6
-
7
-#include <string>
8
-
9
-struct GapsResult
10
-{
11
-    ColMatrix Amean;
12
-    ColMatrix Asd;
13
-    RowMatrix Pmean;
14
-    RowMatrix Psd;
15
-    
16
-    float meanChiSq;
17
-    uint32_t seed;
18
-
19
-    GapsResult(unsigned nrow, unsigned ncol, uint32_t rngSeed) :
20
-        Amean(nrow, ncol), Asd(nrow, ncol), Pmean(nrow, ncol), Psd(nrow, ncol),
21
-        meanChiSq(0.f), seed(rngSeed)
22
-    {}
23
-
24
-    void writeCsv(const std::string &path);
25
-    void writeTsv(const std::string &path);
26
-    void writeGct(const std::string &path);
27
-};
28
-
29
-// should be agnostic to external caller (R/Python/CLI)
30
-class GapsDispatcher
31
-{
32
-private:
33
-
34
-    uint32_t mSeed;
35
-    unsigned mNumPatterns;
36
-
37
-    unsigned mMaxIterations;
38
-    unsigned mNumCoresPerSet;
39
-    bool mPrintMessages;
40
-
41
-    unsigned mCheckpointsCreated;
42
-    char mPhase; // 'C' for calibration, 'S' for sample
43
-
44
-    unsigned mCheckpointInterval;
45
-    std::string mCheckpointOutFile;
46
-
47
-    bool mInitialized;    
48
-    std::vector<GapsRunner*> mRunners;
49
-
50
-    void runOneCycle(unsigned k);
51
-    void createCheckpoint() const;
52
-
53
-    GapsDispatcher(const GapsDispatcher &p); // don't allow copies
54
-    GapsDispatcher& operator=(const GapsDispatcher &p); // don't allow copies
55
-
56
-    template <class DataType>
57
-    void loadData(const DataType &data, bool transposeData);
58
-
59
-    template <class DataType>
60
-    void loadData(const DataType &data, bool transposeData, bool partitionRows,
61
-        const std::vector<unsigned> &indices);
62
-
63
-public:
64
-
65
-    GapsDispatcher();
66
-    ~GapsDispatcher();
67
-
68
-    template <class DataType>
69
-    void initialize(const DataType &data, bool transposeData,
70
-        unsigned nPatterns, uint32_t seed);
71
-
72
-    template <class DataType>
73
-    void initialize(const DataType &data, bool transposeData,
74
-        unsigned nPatterns, uint32_t seed, bool partitionRows,
75
-        const std::vector<unsigned> &indices);
76
-
77
-    template <class DataType>
78
-    void initialize(const DataType &data, bool transposeData,
79
-        const std::string &cptFile);
80
-
81
-    template <class DataType>
82
-    void setUncertainty(const DataType &unc, bool transposeData);
83
-
84
-    template <class DataType>
85
-    void setUncertainty(const DataType &unc, bool transposeData,
86
-        bool partitionRows, const std::vector<unsigned> &indices);
87
-
88
-    void setMaxIterations(unsigned n);
89
-    void printMessages(bool print);
90
-    void setOutputFrequency(unsigned n);
91
-    void setSparsity(float alphaA, float alphaP, bool singleCell);
92
-    void setMaxGibbsMass(float maxA, float maxP);
93
-
94
-    void setAMatrix(const RowMatrix &mat);
95
-    void setPMatrix(const RowMatrix &mat);
96
-    void setFixedMatrix(char which);
97
-    
98
-    void setNumCoresPerSet(unsigned n);
99
-    void setCheckpointInterval(unsigned n);
100
-    void setCheckpointOutFile(const std::string &path);
101
-    
102
-    GapsResult run();
103
-};
104
-
105
-template <class DataType>
106
-void GapsDispatcher::initialize(const DataType &data, bool transposeData,
107
-unsigned nPatterns, uint32_t seed)
108
-{
109
-    mSeed = seed;
110
-    mNumPatterns = nPatterns;
111
-    gaps::random::setSeed(mSeed);
112
-    
113
-    loadData(data, transposeData);
114
-    mInitialized = true;
115
-}
116
-
117
-template <class DataType>
118
-void GapsDispatcher::initialize(const DataType &data, bool transposeData,
119
-unsigned nPatterns, uint32_t seed, bool partitionRows,
120
-const std::vector<unsigned> &indices)
121
-{
122
-    mSeed = seed;
123
-    mNumPatterns = nPatterns;
124
-    gaps::random::setSeed(mSeed);
125
-    
126
-    loadData(data, transposeData, partitionRows, indices);
127
-    mInitialized = true;
128
-}
129
-
130
-template <class DataType>
131
-void GapsDispatcher::initialize(const DataType &data, bool transposeData,
132
-const std::string &cptFile)
133
-{
134
-    Archive ar(cptFile, ARCHIVE_READ);
135
-    gaps::random::load(ar);
136
-
137
-    ar >> mSeed >> mNumPatterns >> mMaxIterations >> mPrintMessages >>
138
-        mCheckpointsCreated >> mPhase;
139
-
140
-    loadData(data, transposeData);
141
-    ar >> *mRunners[0];
142
-    mInitialized = true;
143
-}
144
-
145
-template <class DataType>
146
-void GapsDispatcher::setUncertainty(const DataType &unc, bool transposeData)
147
-{
148
-    GAPS_ASSERT(mInitialized);
149
-    mRunners[0]->setUncertainty(unc, transposeData);
150
-}
151
-
152
-template <class DataType>
153
-void GapsDispatcher::setUncertainty(const DataType &unc, bool transposeData,
154
-bool partitionRows, const std::vector<unsigned> &indices)
155
-{
156
-    GAPS_ASSERT(mInitialized);
157
-    mRunners[0]->setUncertainty(unc, transposeData, partitionRows, indices);
158
-}
159
-
160
-template <class DataType>
161
-void GapsDispatcher::loadData(const DataType &data, bool transposeData)
162
-{
163
-    gaps_printf("Loading Data...");
164
-    gaps_flush();
165
-    mRunners.push_back(new GapsRunner(data, transposeData, mNumPatterns));
166
-    gaps_printf("Done!\n");
167
-}
168
-
169
-template <class DataType>
170
-void GapsDispatcher::loadData(const DataType &data, bool transposeData,
171
-bool partitionRows, const std::vector<unsigned> &indices)
172
-{
173
-    gaps_printf("Loading Data...");
174
-    gaps_flush();
175
-    mRunners.push_back(new GapsRunner(data, transposeData, partitionRows,
176
-        indices, mNumPatterns));
177
-    gaps_printf("Done!\n");
178
-}
179
-
180
-#endif // __COGAPS_GAPS_DISPATCHER_H__
181 0
\ No newline at end of file
... ...
@@ -8,14 +8,26 @@
8 8
 #include <Rcpp.h>
9 9
 #endif
10 10
 
11
-void GapsRunner::printMessages(bool print)
11
+#ifdef __GAPS_OPENMP__
12
+#include <omp.h>
13
+#endif
14
+
15
+void GapsRunner::setFixedMatrix(char which, const Matrix &mat)
12 16
 {
13
-    mPrintMessages = print;
17
+    mFixedMatrix = which;
18
+    if (which == 'A')
19
+    {
20
+        mASampler.setMatrix(mat);
21
+    }
22
+    else if (which == 'P')
23
+    {
24
+        mPSampler.setMatrix(mat);
25
+    }
14 26
 }
15 27
 
16
-void GapsRunner::setOutputFrequency(unsigned n)
28
+void GapsRunner::setMaxIterations(unsigned nIterations)
17 29
 {
18
-    mOutputFrequency = n;
30
+    mMaxIterations = nIterations;
19 31
 }
20 32
 
21 33
 void GapsRunner::setSparsity(float alphaA, float alphaP, bool singleCell)
... ...
@@ -30,48 +42,85 @@ void GapsRunner::setMaxGibbsMass(float maxA, float maxP)
30 42
     mPSampler.setMaxGibbsMass(maxP);
31 43
 }
32 44
 
33
-void GapsRunner::setFixedMatrix(char which, const RowMatrix &mat)
45
+void GapsRunner::setMaxThreads(unsigned nThreads)
34 46
 {
35
-    mFixedMatrix = which;
36
-    if (which == 'A')
37
-    {
38
-        gaps_printf("setting fixed A\n");
39
-        mASampler.setMatrix(ColMatrix(mat));
40
-        gaps_printf("updating AP\n");
41
-        mASampler.recalculateAPMatrix();
42
-        gaps_printf("syncing\n");
43
-        mPSampler.sync(mASampler);
44
-    }
45
-    else if (which == 'P')
46
-    {
47
-        gaps_printf("setting fixed P\n");
48
-        mPSampler.setMatrix(mat);
49
-        mPSampler.recalculateAPMatrix();
50
-        mASampler.sync(mPSampler);
51
-    }
47
+    mMaxThreads = nThreads;
52 48
 }
53 49
 
54
-void GapsRunner::startSampling()
50
+void GapsRunner::setPrintMessages(bool print)
55 51
 {
56
-    mSamplePhase = true;
52
+    mPrintMessages = print;
57 53
 }
58 54
 
59
-void GapsRunner::startClock()
55
+void GapsRunner::setOutputFrequency(unsigned n)
56
+{
57
+    mOutputFrequency = n;
58
+}
59
+
60
+void GapsRunner::setCheckpointOutFile(const std::string &file)
61
+{
62
+    mCheckpointOutFile = file;
63
+}
64
+
65
+void GapsRunner::setCheckpointInterval(unsigned interval)
66
+{
67
+    mCheckpointInterval = interval;
68
+}
69
+
70
+GapsResult GapsRunner::run()
60 71
 {
61 72
     mStartTime = bpt_now();
73
+
74
+    // calculate appropiate number of threads if compiled with openmp
75
+    #ifdef __GAPS_OPENMP__
76
+    if (mPrintMessages)
77
+    {
78
+        unsigned availableThreads = omp_get_max_threads();
79
+        mMaxThreads = std::min(availableThreads, mMaxThreads);
80
+        gaps_printf("Running on %d out of %d available threads\n",
81
+            mMaxThreads, availableThreads);
82
+    }
83
+    #endif
84
+
85
+    // cascade through phases, allows algorithm to be resumed in either phase
86
+    GAPS_ASSERT(mPhase == 'C' || mPhase == 'S');
87
+    switch (mPhase)
88
+    {
89
+        case 'C':
90
+            if (mPrintMessages)
91
+            {
92
+                gaps_printf("-- Calibration Phase --\n");
93
+            }
94
+            runOnePhase();
95
+            mPhase = 'S';
96
+            mCurrentIteration = 0;
97
+
98
+        case 'S':
99
+            if (mPrintMessages)
100
+            {
101
+                gaps_printf("-- Sampling Phase --\n");
102
+            }
103
+            runOnePhase();
104
+            break;
105
+    }
106
+
107
+    GapsResult result(mStatistics);
108
+    result.meanChiSq = mStatistics.meanChiSq(mASampler);
109
+    return result;    
62 110
 }
63 111
 
64
-void GapsRunner::run(unsigned nIter, unsigned nCores)
112
+void GapsRunner::runOnePhase()
65 113
 {
66
-    for (unsigned i = 0; i < nIter; ++i)
114
+    for (; mCurrentIteration < mMaxIterations; ++mCurrentIteration)
67 115
     {
68 116
         #ifdef __GAPS_R_BUILD__
69 117
         Rcpp::checkUserInterrupt();
70 118
         #endif
71 119
         
72
-        if (!mSamplePhase)
120
+        if (mPhase == 'C')
73 121
         {        
74
-            float temp = static_cast<float>(2 * i) / static_cast<float>(nIter);
122
+            float temp = static_cast<float>(2 * mCurrentIteration)
123
+                / static_cast<float>(mMaxIterations);
75 124
             mASampler.setAnnealingTemp(std::min(1.f, temp));
76 125
             mPSampler.setAnnealingTemp(std::min(1.f, temp));
77 126
         }
... ...
@@ -79,79 +128,24 @@ void GapsRunner::run(unsigned nIter, unsigned nCores)
79 128
         // number of updates per iteration is poisson 
80 129
         unsigned nA = gaps::random::poisson(std::max(mASampler.nAtoms(), 10ul));
81 130
         unsigned nP = gaps::random::poisson(std::max(mPSampler.nAtoms(), 10ul));
82
-        updateSampler(nA, nP, nCores);
83
-
84
-        if (mPrintMessages)
85
-        {
86
-            displayStatus(i, nIter);
87
-        }
131
+        updateSampler(nA, nP);
88 132
 
89
-        if (mSamplePhase)
133
+        if (mPhase == 'S')
90 134
         {
91 135
             mStatistics.update(mASampler, mPSampler);
92 136
         }
93
-    }
94
-}
95
-
96
-unsigned GapsRunner::nRow() const
97
-{
98
-    return mASampler.dataRows();
99
-}
100
-
101
-unsigned GapsRunner::nCol() const
102
-{
103
-    return mASampler.dataCols();
104
-}
105
-
106
-ColMatrix GapsRunner::Amean() const
107
-{
108
-    return mStatistics.Amean();
109
-}
110
-
111
-RowMatrix GapsRunner::Pmean() const
112
-{
113
-    return mStatistics.Pmean();
114
-}
115
-
116
-ColMatrix GapsRunner::Asd() const
117
-{
118
-    return mStatistics.Asd();
119
-}
120
-
121
-RowMatrix GapsRunner::Psd() const
122
-{
123
-    return mStatistics.Psd();
124
-}
125
-
126
-float GapsRunner::meanChiSq() const
127
-{
128
-    return mStatistics.meanChiSq(mASampler);
129
-}
130
-
131
-Archive& operator<<(Archive &ar, GapsRunner &runner)
132
-{
133
-    ar << runner.mASampler << runner.mPSampler << runner.mStatistics <<
134
-        runner.mPrintMessages << runner.mOutputFrequency <<
135
-        runner.mFixedMatrix << runner.mSamplePhase << runner.mNumUpdatesA <<
136
-        runner.mNumUpdatesP;
137
-    return ar;
138
-}
139 137
 
140
-Archive& operator>>(Archive &ar, GapsRunner &runner)
141
-{
142
-    ar >> runner.mASampler >> runner.mPSampler >> runner.mStatistics >>
143
-        runner.mPrintMessages >> runner.mOutputFrequency >>
144
-        runner.mFixedMatrix >> runner.mSamplePhase >> runner.mNumUpdatesA >>
145
-        runner.mNumUpdatesP;
146
-    return ar;
138
+        displayStatus();
139
+        createCheckpoint();
140
+    }
147 141
 }
148 142
 
149
-void GapsRunner::updateSampler(unsigned nA, unsigned nP, unsigned nCores)
143
+void GapsRunner::updateSampler(unsigned nA, unsigned nP)
150 144
 {
151 145
     if (mFixedMatrix != 'A')
152 146
     {
153 147
         mNumUpdatesA += nA;
154
-        mASampler.update(nA, nCores);
148
+        mASampler.update(nA, mMaxThreads);
155 149
         if (mFixedMatrix != 'P')
156 150
         {
157 151
             mPSampler.sync(mASampler);
... ...
@@ -161,19 +155,20 @@ void GapsRunner::updateSampler(unsigned nA, unsigned nP, unsigned nCores)
161 155
     if (mFixedMatrix != 'P')
162 156
     {
163 157
         mNumUpdatesP += nP;
164
-        mPSampler.update(nP, nCores);
158
+        mPSampler.update(nP, mMaxThreads);
165 159
         if (mFixedMatrix != 'A')
166 160
         {
167 161
             mASampler.sync(mPSampler);
168 162
         }
169 163
     }
164
+
170 165
     GAPS_ASSERT(mASampler.internallyConsistent());
171 166
     GAPS_ASSERT(mPSampler.internallyConsistent());
172 167
 }
173 168
 
174
-void GapsRunner::displayStatus(unsigned current, unsigned total)
169
+void GapsRunner::displayStatus()
175 170
 {
176
-    if (mOutputFrequency > 0 && ((current + 1) % mOutputFrequency) == 0)
171
+    if (mPrintMessages && mOutputFrequency > 0 && ((mCurrentIteration + 1) % mOutputFrequency) == 0)
177 172
     {
178 173
         bpt::time_duration diff = bpt_now() - mStartTime;
179 174
         unsigned elapsedSeconds = static_cast<unsigned>(diff.total_seconds());
... ...
@@ -185,8 +180,38 @@ void GapsRunner::displayStatus(unsigned current, unsigned total)
185 180
         unsigned seconds = elapsedSeconds;
186 181
 
187 182
         gaps_printf("%d of %d, Atoms: %lu(%lu), ChiSq: %.0f, elapsed time: %02d:%02d:%02d\n",
188
-            current + 1, total, mASampler.nAtoms(), mPSampler.nAtoms(),
189
-            mASampler.chi2(), hours, minutes, seconds);
183
+            mCurrentIteration + 1, mMaxIterations, mASampler.nAtoms(),
184
+            mPSampler.nAtoms(), mASampler.chi2(), hours, minutes, seconds);
190 185
     }
191 186
 }
192 187
 
188
+void GapsRunner::createCheckpoint()
189
+{
190
+    if (mCheckpointInterval > 0 && ((mCurrentIteration + 1) % mCheckpointInterval) == 0)
191
+    {
192
+        // create backup file
193
+        std::rename(mCheckpointOutFile.c_str(), (mCheckpointOutFile + ".backup").c_str());
194
+    
195
+        // create checkpoint file
196
+        Archive ar(mCheckpointOutFile, ARCHIVE_WRITE);
197
+        
198
+        gaps::random::save(ar);
199
+        ar << mNumPatterns << mSeed << mASampler << mPSampler << mStatistics
200
+            << mFixedMatrix << mMaxIterations << mPhase << mCurrentIteration
201
+            << mNumUpdatesA << mNumUpdatesP;
202
+
203
+        ar.close();
204
+
205
+        // delete backup file
206
+        std::remove((mCheckpointOutFile + ".backup").c_str());
207
+    }
208
+}
209
+
210
+// assume random state has been loaded and nPatterns and seed have been read
211
+Archive& operator>>(Archive &ar, GapsRunner &gr)
212
+{
213
+    ar >> gr.mASampler >> gr.mPSampler >> gr.mStatistics >> gr.mFixedMatrix
214
+        >> gr.mMaxIterations >> gr.mPhase >> gr.mCurrentIteration
215
+        >> gr.mNumUpdatesA >> gr.mNumUpdatesP;
216
+    return ar;
217
+}
193 218
\ No newline at end of file
... ...
@@ -11,6 +11,26 @@
11 11
 namespace bpt = boost::posix_time;
12 12
 #define bpt_now() bpt::microsec_clock::local_time()
13 13
 
14
+struct GapsResult
15
+{
16
+    ColMatrix Amean;
17
+    ColMatrix Asd;
18
+    RowMatrix Pmean;
19
+    RowMatrix Psd;
20
+    
21
+    float meanChiSq;
22
+    uint32_t seed;
23
+
24
+    GapsResult(const GapsStatistics &stat) :
25
+        Amean(stat.Amean()), Asd(stat.Asd()), Pmean(stat.Pmean()),
26
+        Psd(stat.Psd()), meanChiSq(0.f), seed(0)
27
+    {}
28
+
29
+    void writeCsv(const std::string &path);
30
+    void writeTsv(const std::string &path);
31
+    void writeGct(const std::string &path);
32
+};
33
+
14 34
 class GapsRunner
15 35
 {
16 36
 private:
... ...
@@ -19,96 +39,77 @@ private:
19 39
     PatternGibbsSampler mPSampler;
20 40
     GapsStatistics mStatistics;
21 41
 
42
+    char mFixedMatrix;
43
+    unsigned mMaxIterations;
44
+    
45
+    unsigned mMaxThreads;
22 46
     bool mPrintMessages;
23 47
     unsigned mOutputFrequency;
24
-    char mFixedMatrix;
25
-    bool mSamplePhase;
48
+    std::string mCheckpointOutFile;
49
+    unsigned mCheckpointInterval;
26 50
 
27
-    unsigned mNumUpdatesA;
28
-    unsigned mNumUpdatesP;
29
-        
30 51
     bpt::ptime mStartTime;
52
+    char mPhase;
53
+    unsigned mCurrentIteration;
31 54
 
32
-    void updateSampler(unsigned nA, unsigned nP, unsigned nCores);
33
-    void displayStatus(unsigned current, unsigned total);
55
+    // only kept since they need to be written to the start of every checkpoint
56
+    unsigned mNumPatterns;
57
+    uint32_t mSeed;
34 58
 
35
-    double estimatePercentComplete();
59
+    unsigned mNumUpdatesA;
60
+    unsigned mNumUpdatesP;
61
+        
62
+    void runOnePhase();
63
+    void updateSampler(unsigned nA, unsigned nP);
64
+    void displayStatus();
65
+    void createCheckpoint();
36 66
 
37 67
 public:
38 68
 
39 69
     template <class DataType>
40
-    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns);
41
-
42
-    template <class DataType>
43
-    GapsRunner(const DataType &data, bool transposeData, bool partitionRows,
44
-        const std::vector<unsigned> &indices, unsigned nPatterns);
45
-
46
-    template <class DataType>
47
-    void setUncertainty(const DataType &unc, bool transposeData);
70
+    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns,
71
+        uint32_t seed, bool partitionRows,
72
+        const std::vector<unsigned> &indices);
48 73
 
49 74
     template <class DataType>
50 75
     void setUncertainty(const DataType &unc, bool transposeData,
51 76
         bool partitionRows, const std::vector<unsigned> &indices);
52 77
 
53
-    void printMessages(bool print);
54
-    void setOutputFrequency(unsigned n);
78
+    void setFixedMatrix(char which, const Matrix &mat);
79
+
80
+    void setMaxIterations(unsigned nIterations);
55 81
     void setSparsity(float alphaA, float alphaP, bool singleCell);
56 82
     void setMaxGibbsMass(float maxA, float maxP);
83
+    
84
+    void setMaxThreads(unsigned nThreads);
85
+    void setPrintMessages(bool print);
86
+    void setOutputFrequency(unsigned n);
87
+    void setCheckpointOutFile(const std::string &outFile);
88
+    void setCheckpointInterval(unsigned interval);
57 89
 
58
-    void setFixedMatrix(char which, const RowMatrix &mat);
59
-
60
-    void startSampling();
61
-
62
-    void run(unsigned nIter, unsigned nCores);
63
-
64
-    unsigned nRow() const;
65
-    unsigned nCol() const;
66
-
67
-    ColMatrix Amean() const;
68
-    RowMatrix Pmean() const;
69
-    ColMatrix Asd() const;
70
-    RowMatrix Psd() const;
71
-    float meanChiSq() const;
72
-
73
-    void startClock();
90
+    GapsResult run();
74 91
 
75 92
     // serialization
76
-    friend Archive& operator<<(Archive &ar, GapsRunner &runner);
77 93
     friend Archive& operator>>(Archive &ar, GapsRunner &runner);
78 94
 };
79 95
 
96
+// problem with passing file parser - need to read it twice
80 97
 template <class DataType>
81
-GapsRunner::GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns)
82
-    :
83
-mASampler(data, transposeData, nPatterns),
84
-mPSampler(data, transposeData, nPatterns),
85
-mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
86
-mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
87
-{
88
-    mASampler.sync(mPSampler);
89
-    mPSampler.sync(mASampler);
90
-}
91
-
92
-template <class DataType>
93
-GapsRunner::GapsRunner(const DataType &data, bool transposeData,
94
-bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns)
98
+GapsRunner::GapsRunner(const DataType &data, bool transposeData, uint32_t seed,
99
+unsigned nPatterns, bool partitionRows, const std::vector<unsigned> &indices)
95 100
     :
96 101
 mASampler(data, transposeData, nPatterns, partitionRows, indices),
97 102
 mPSampler(data, transposeData, nPatterns, partitionRows, indices),
98
-mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
99
-mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
103
+mStatistics(mASampler.dataRows(), mASampler.dataCols(), nPatterns),
104
+mFixedMatrix('N'), mMaxIterations(1000), mMaxThreads(1), mPrintMessages(true),
105
+mOutputFrequency(500), mCheckpointOutFile("gaps_checkpoint.out"),
106
+mCheckpointInterval(0), mPhase('C'), mCurrentIteration(0),
107
+mNumPatterns(nPatterns), mSeed(seed), mNumUpdatesA(0), mNumUpdatesP(0)
100 108
 {
101 109
     mASampler.sync(mPSampler);
102 110
     mPSampler.sync(mASampler);
103 111
 }
104 112
 
105
-template <class DataType>
106
-void GapsRunner::setUncertainty(const DataType &unc, bool transposeData)
107
-{
108
-    mASampler.setUncertainty(unc, transposeData);
109
-    mPSampler.setUncertainty(unc, transposeData);
110
-}
111
-
112 113
 template <class DataType>
113 114
 void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
114 115
 bool partitionRows, const std::vector<unsigned> &indices)
... ...
@@ -21,10 +21,10 @@ template <class T, class MatA, class MatB>
21 21
 class GibbsSampler;
22 22
 
23 23
 template <class T, class MatA, class MatB>
24
-Archive& operator<<(Archive &ar, GibbsSampler<T, MatA, MatB> &samp);
24
+inline Archive& operator<<(Archive &ar, GibbsSampler<T, MatA, MatB> &samp);
25 25
 
26 26
 template <class T, class MatA, class MatB>
27
-Archive& operator>>(Archive &ar, GibbsSampler<T, MatA, MatB> &samp);
27
+inline Archive& operator>>(Archive &ar, GibbsSampler<T, MatA, MatB> &samp);
28 28
 
29 29
 /*************************** GIBBS SAMPLER INTERFACE **************************/
30 30
 
... ...
@@ -81,17 +81,10 @@ protected:
81 81
 
82 82
 public:
83 83
 
84
-    template <class DataType>
85
-    GibbsSampler(const DataType &data, bool transpose, unsigned nPatterns,
86
-        bool amp);
87
-
88 84
     template <class DataType>
89 85
     GibbsSampler(const DataType &data, bool transpose, unsigned nPatterns,
90 86
         bool amp, bool partitionRows, const std::vector<unsigned> &indices);
91 87
 
92
-    template <class DataType>
93
-    void setUncertainty(const DataType &unc, bool transposeData);
94
-
95 88
     template <class DataType>
96 89
     void setUncertainty(const DataType &unc, bool transposeData,
97 90
         bool partitionRows, const std::vector<unsigned> &indices);
... ...
@@ -100,7 +93,7 @@ public:
100 93
     void setMaxGibbsMass(float max);
101 94
     void setAnnealingTemp(float temp);
102 95
 
103
-    void setMatrix(const MatA &mat);
96
+    void setMatrix(const Matrix &mat);
104 97
 
105 98
     void update(unsigned nSteps, unsigned nCores);
106 99
 
... ...
@@ -143,10 +136,6 @@ private:
143 136
 
144 137
 public:
145 138
 
146
-    template <class DataType>
147
-    AmplitudeGibbsSampler(const DataType &data, bool transposeData,
148
-        unsigned nPatterns);
149
-
150 139
     template <class DataType>
151 140
     AmplitudeGibbsSampler(const DataType &data, bool transposeData,
152 141
         unsigned nPatterns, bool partitionRows,
... ...
@@ -179,10 +168,6 @@ private:
179 168
 
180 169
 public:
181 170
 
182
-    template <class DataType>
183
-    PatternGibbsSampler(const DataType &data, bool transposeData,
184
-        unsigned nPatterns);
185
-
186 171
     template <class DataType>
187 172
     PatternGibbsSampler(const DataType &data, bool transposeData,
188 173
         unsigned nPatterns, bool partitionRows,
... ...
@@ -194,15 +179,6 @@ public:
194 179
 
195 180
 /******************** IMPLEMENTATION OF TEMPLATED FUNCTIONS *******************/
196 181
 
197
-template <class DataType>
198
-AmplitudeGibbsSampler::AmplitudeGibbsSampler(const DataType &data,
199
-bool transposeData, unsigned nPatterns)
200
-    :
201
-GibbsSampler(data, transposeData, nPatterns, true)
202
-{
203
-    mQueue.setDimensionSize(mBinSize, mNumCols);
204
-}
205
-
206 182
 template <class DataType>
207 183
 AmplitudeGibbsSampler::AmplitudeGibbsSampler(const DataType &data,
208 184
 bool transposeData, unsigned nPatterns, bool partitionRows,
... ...
@@ -213,15 +189,6 @@ GibbsSampler(data, transposeData, nPatterns, true, partitionRows, indices)
213 189
     mQueue.setDimensionSize(mBinSize, mNumCols);
214 190
 }
215 191
 
216
-template <class DataType>
217
-PatternGibbsSampler::PatternGibbsSampler(const DataType &data,
218
-bool transposeData, unsigned nPatterns)
219
-    :
220
-GibbsSampler(data, transposeData, nPatterns, false)
221
-{
222
-    mQueue.setDimensionSize(mBinSize, mNumRows);
223
-}
224
-
225 192
 template <class DataType>
226 193
 PatternGibbsSampler::PatternGibbsSampler(const DataType &data,
227 194
 bool transposeData, unsigned nPatterns, bool partitionRows,
... ...
@@ -232,30 +199,6 @@ GibbsSampler(data, transposeData, nPatterns, false, partitionRows, indices)
232 199
     mQueue.setDimensionSize(mBinSize, mNumRows);
233 200
 }
234 201
 
235
-// normal constructor
236
-template <class T, class MatA, class MatB>
237
-template <class DataType>
238
-GibbsSampler<T, MatA, MatB>::GibbsSampler(const DataType &data,
239
-bool transposeData, unsigned nPatterns, bool amp)
240
-    :
241
-mDMatrix(data, transposeData), mSMatrix(mDMatrix.pmax(0.1f, 0.1f)), 
242
-mAPMatrix(mDMatrix.nRow(), mDMatrix.nCol()),
243
-mMatrix(amp ? mDMatrix.nRow() : nPatterns, amp ? nPatterns : mDMatrix.nCol()),
244
-mOtherMatrix(NULL), mQueue(mMatrix.nRow() * mMatrix.nCol()), mLambda(0.f),
245
-mMaxGibbsMass(100.f), mAnnealingTemp(1.f), mNumRows(mMatrix.nRow()),
246
-mNumCols(mMatrix.nCol()), mAvgQueue(0.f), mNumQueues(0.f)
247
-{
248
-    // calculate atomic domain size
249
-    mBinSize = std::numeric_limits<uint64_t>::max()
250
-        / static_cast<uint64_t>(mNumRows * mNumCols);
251
-    mQueue.setDomainSize(mBinSize * mNumRows * mNumCols);
252
-    mDomain.setDomainSize(mBinSize * mNumRows * mNumCols);
253
-
254
-    // default sparsity parameters
255
-    setSparsity(0.01, false);
256
-}
257
-
258
-// constructor that allows for subsetting the data
259 202
 template <class T, class MatA, class MatB>
260 203
 template <class DataType>
261 204
 GibbsSampler<T, MatA, MatB>::GibbsSampler(const DataType &data,
... ...
@@ -280,14 +223,6 @@ mNumCols(mMatrix.nCol()), mAvgQueue(0.f), mNumQueues(0.f)
280 223
     setSparsity(0.01, false);
281 224
 }
282 225
 
283
-template <class T, class MatA, class MatB>
284
-template <class DataType>
285
-void GibbsSampler<T, MatA, MatB>::setUncertainty(const DataType &unc,
286
-bool transpose)
287
-{
288
-    mSMatrix = MatB(unc, transpose);
289
-}
290
-
291 226
 template <class T, class MatA, class MatB>
292 227
 template <class DataType>
293 228
 void GibbsSampler<T, MatA, MatB>::setUncertainty(const DataType &unc,
... ...
@@ -323,7 +258,7 @@ void GibbsSampler<T, MatA, MatB>::setAnnealingTemp(float temp)
323 258
 }
324 259
 
325 260
 template <class T, class MatA, class MatB>
326
-void GibbsSampler<T, MatA, MatB>::setMatrix(const MatA &mat)
261
+void GibbsSampler<T, MatA, MatB>::setMatrix(const Matrix &mat)
327 262
 {   
328 263
     mMatrix = mat;
329 264
 }
... ...
@@ -4,7 +4,6 @@ PKG_LIBS = @GAPS_LIBS@
4 4
 
5 5
 OBJECTS =   AtomicDomain.o \
6 6
             Cogaps.o \
7
-            GapsDispatcher.o \
8 7
             GapsRunner.o \
9 8
             GapsStatistics.o \
10 9
             GibbsSampler.o \
... ...
@@ -6,32 +6,32 @@
6 6
 using namespace Rcpp;
7 7
 
8 8
 // cogaps_cpp_from_file
9
-Rcpp::List cogaps_cpp_from_file(const std::string& data, const Rcpp::S4& params, const std::string& unc, const Rcpp::NumericMatrix& fixedMatrix, const std::string& checkpointInFile);
10
-RcppExport SEXP _CoGAPS_cogaps_cpp_from_file(SEXP dataSEXP, SEXP paramsSEXP, SEXP uncSEXP, SEXP fixedMatrixSEXP, SEXP checkpointInFileSEXP) {
9
+Rcpp::List cogaps_cpp_from_file(const std::string& data, const Rcpp::List& allParams, const std::string& uncertainty, const Rcpp::NumericVector& indices, const Rcpp::NumericMatrix& fixedMatrix);
10
+RcppEx