Browse code

matrix CRTP

Tom Sherman authored on 23/07/2018 20:21:55
Showing 9 changed files

... ...
@@ -38,8 +38,7 @@ checkpointOutFile="gaps_checkpoint.out", checkpointInterval=1000,
38 38
 checkpointInFile=NULL, transposeData=FALSE, ...)
39 39
 {
40 40
     # parse parameters from ...
41
-    # TODO remove args from ... as they're processed, check if null
42
-    fullParams <- list("gaps"=params,
41
+    allParams <- list("gaps"=params,
43 42
         "nCores"=nCores,
44 43
         "messages"=messages,
45 44
         "outputFrequency"=outputFrequency,
... ...
@@ -47,10 +46,9 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
47 46
         "checkpointInterval"=checkpointInterval,
48 47
         "checkpointInFile"=checkpointInFile,
49 48
         "transposeData"=transposeData,
50
-        "whichMatrixFixed"="N" # internal parameter
49
+        "whichMatrixFixed"="" # internal parameter
51 50
     )
52
-    fullParams <- parseOldParams(fullParams, list(...))
53
-    fullParams <- parseDirectParams(fullParams, list(...))
51
+    allParams <- parseExtraParams(allParams, list(...))
54 52
 
55 53
     # check file extension
56 54
     if (class(data) == "character" & !(file_ext(data) %in% c("tsv", "csv", "mtx")))
... ...
@@ -97,6 +95,10 @@ checkpointInFile=NULL, transposeData=FALSE, ...)
97 95
         else
98 96
             message("Running CoGAPS from a checkpoint")
99 97
     }
98
+    else
99
+    {
100
+        allParams$checkpointInFile <- ""
101
+    }
100 102
 
101 103
     # run cogaps
102 104
     gapsReturnList <- dispatchFunc(data, allParams, uncertainty)
... ...
@@ -34,7 +34,7 @@ setMethod("initialize", "CogapsParams",
34 34
         .Object@maxGibbsMassP <- 100
35 35
         .Object@seed <- getMilliseconds(as.POSIXlt(Sys.time()))
36 36
         .Object@singleCell <- FALSE
37
-        .Object@distributed <- "A"
37
+        .Object@distributed <- ""
38 38
         .Object@nSets <- 3
39 39
         .Object@cut <- .Object@nPatterns
40 40
         .Object@minNS <- ceiling(.Object@nSets / 2)
... ...
@@ -23,7 +23,7 @@ distributedCogaps <- function(data, allParams, uncertainty)
23 23
 
24 24
     # set fixed matrix
25 25
     initialA <- initialP <- matrix(0)
26
-    if (allParams$modelParams@distributed == "genome-wide")
26
+    if (allParams$gaps@distributed == "genome-wide")
27 27
     {
28 28
         allParams$whichMatrixFixed <- "P"
29 29
         initialP <- consensusMatrix
... ...
@@ -122,42 +122,28 @@ patternMatch <- function(allPatterns, allParams)
122 122
     cc <- corcut(allPatterns, allParams)
123 123
 
124 124
     ### split by maxNS
125
-    indx <- which(unlist(lapply(cc$PatsByClust, function(x) nrow(x) > maxNS)))
126
-    i <- 1
125
+    indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
127 126
     while (length(indx) > 0)
128 127
     { 
129
-        icc <- corcut(cc$AByClust[[indx[1]]], minNS, 2, cluster.method)
130
-        if (length(icc[[2]]) == 0)
131
-        {
132
-            indx <- indx[-1]
133
-            next
134
-        }
135
-        else
136
-        {
137
-            cc$AByClust[[indx[1]]] <- icc[[2]][[1]]
138
-            cc$RtoMeanPattern[[indx[1]]] <- icc[[1]][[1]]
139
-            if (length(icc[[2]]) > 1)
140
-            {
141
-                cc$AByClust<-append(cc$AByClust,icc[[2]][2])
142
-                cc$RtoMeanPattern<-append(cc$RtoMeanPattern,icc[[1]][2])
143
-            } 
144
-            indx <- which(unlist(lapply(cc$AByClust,function(x) nrow(x) > maxNS)))
145
-        }
128
+        allParams$gaps@cut <- 2
129
+        icc <- corcut(cc$PatsByClust[[indx[1]]], allParams)
130
+
131
+        cc$PatsByClust[[indx[1]]] <- icc$PatsByClust[[1]]
132
+        cc$RtoMeanPattern[[indx[1]]] <- icc$RtoMeanPattern[[1]]
133
+        cc$PatsByClust <- append(cc$PatsByClust, icc$PatsByClust[2])
134
+        cc$RtoMeanPattern <- append(cc$RtoMeanPattern, icc$RtoMeanPattern[2])
135
+
136
+        indx <- which(sapply(cc$PatsByClust, function(x) ncol(x) > allParams$gaps@maxNS))
146 137
     }
147 138
 
148
-    #weighted.mean(AByClustDrop[[1]],RtoMPDrop[[1]])
149
-    AByCDSWavg <- t(sapply(1:length(cc$AByClust), function(z)
150
-        apply(cc$AByClust[[z]], 1, function(x)
151
-            weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
152
-    rownames(AByCDSWavg) <- lapply(1:length(cc$AByClust), function(x)
153
-        paste("Pattern",x))
154
-    #scale As
155
-    Amax <- apply(AByCDSWavg, 1, max)
156
-    AByCDSWavgScaled <- t(sapply(1:dim(AByCDSWavg)[1], function(x)
157
-        AByCDSWavg[x,] / Amax[x]))
158
-    rownames(AByCDSWavgScaled) <- rownames(AByCDSWavg)
159
-
160
-    return(AByCDSWavgScaled)
139
+    # created matrix of mean patterns - weighted by R closeness of fit
140
+    PatsByCDSWavg <- t(sapply(1:length(cc$PatsByClust), function(z)
141
+        apply(cc$PatsByClust[[z]], 1, function(x) weighted.mean(x, (cc$RtoMeanPattern[[z]])^3))))
142
+    rownames(PatsByCDSWavg) <- lapply(1:length(cc$PatsByClust), function(x) paste("Pattern", x))
143
+    return(PatsByCDSWavg)
144
+
145
+    # scale
146
+    return(apply(PatsByCDSWavg, 1, function(row) row / max(row)))
161 147
 }
162 148
 
163 149
 corcut <- function(allPatterns, allParams)
... ...
@@ -5,6 +5,16 @@
5 5
 
6 6
 #include <string>
7 7
 
8
+static std::vector<unsigned> convertRVec(const Rcpp::NumericVector &rvec)
9
+{
10
+    std::vector<unsigned> vec;
11
+    for (unsigned i = 0; i < rvec.size(); ++i)
12
+    {
13
+        vec.push_back(rvec[i]);
14
+    }
15
+    return vec;
16
+}
17
+
8 18
 static RowMatrix convertRMatrix(const Rcpp::NumericMatrix &rmat)
9 19
 {
10 20
     RowMatrix mat(rmat.nrow(), rmat.ncol());
... ...
@@ -37,55 +47,86 @@ static bool isNull(const RowMatrix &mat)
37 47
     return mat.nRow() == 1 && mat.nCol() == 1;
38 48
 }
39 49
 
50
+static bool isNull(const Rcpp::NumericMatrix &mat)
51
+{
52
+    return mat.nrow() == 1 && mat.ncol();
53
+}
54
+
55
+static bool isNull(const Rcpp::NumericVector &vec)
56
+{
57
+    return vec.size() == 1;
58
+}
59
+
40 60
 static bool isNull(const std::string &path)
41 61
 {
42 62
     return path.empty();
43 63
 }
44 64
 
45 65
 template <class T>
46
-static Rcpp::List cogapsRun(const T &data, Rcpp::S4 params, const T &unc,
47
-const RowMatrix &fixedMatrix, const std::string &checkpointInFile)
66
+static Rcpp::List cogapsRun(const T &data, const Rcpp::List &allParams,
67
+const T &uncertainty, const Rcpp::NumericVector &indices,
68
+const Rcpp::NumericMatrix &initialA, const Rcpp::NumericMatrix &initialP)
48 69
 {
49 70
     GapsDispatcher dispatcher;
50 71
 
51 72
     // check if we're initializing with a checkpoint or not
52
-    if (!isNull(checkpointInFile))
73
+    if (!isNull(allParams["checkpointInFile"]))
53 74
     {
54
-        dispatcher.initialize(data, checkpointInFile);
75
+        dispatcher.initialize(data, allParams["transposeData"],
76
+            allParams["checkpointInFile"]);
55 77
     }
56 78
     else
57 79
     {
58
-        dispatcher.initialize(data, params.slot("nPatterns"), params.slot("seed"));
80
+        // check to see if we're subsetting the data
81
+        if (!isNull(allParams["gaps"].slot("distributed")))
82
+        {
83
+            dispatcher.initialize(data, allParams["transposeData"],
84
+                allParams["gaps"].slot("distributed") == "genome-wide",
85
+                convertRVec(indices), allParams["gaps"].slot("nPatterns"),
86
+                allParams["gaps"].slot("seed"));
87
+        }
88
+        else
89
+        {
90
+            dispatcher.initialize(data, allParams["transposeData"],
91
+                allParams["gaps"].slot("nPatterns"), allParams["gaps"].slot("seed"));
92
+        }
59 93
 
60 94
         // set optional parameters
61
-        dispatcher.setMaxIterations(params.slot("nIterations"));
62
-        dispatcher.setOutputFrequency(params.slot("outputFrequency"));
63
-
64
-        dispatcher.setSparsity(params.slot("alphaA"), params.slot("alphaP"),
65
-            params.slot("singleCell"));
66
-        
67
-        dispatcher.setMaxGibbsMass(params.slot("maxGibbsMassA"),
68
-            params.slot("maxGibbsMassP"));
69
-
70
-        dispatcher.printMessages(params.slot("messages"));
95
+        dispatcher.setMaxIterations(allParams["gaps"].slot("nIterations"));
96
+        dispatcher.setSparsity(allParams["gaps"].slot("alphaA"),
97
+            allParams["gaps"].slot("alphaP"), allParams["gaps"].slot("singleCell"));
98
+        dispatcher.setMaxGibbsMass(allParams["gaps"].slot("maxGibbsMassA"),
99
+            allParams["gaps"].slot("maxGibbsMassP"));
100
+
101
+        // set initial values for A and P matrix
102
+        if (!isNull(initialA))
103
+        {
104
+            dispatcher.setAMatrix(convertRMatrix(initialA));
105
+        }
106
+        if (!isNull(initialP))
107
+        {
108
+            dispatcher.setPMatrix(convertRMatrix(initialP));
109
+        }
71 110
 
72 111
         // check if running with a fixed matrix
73
-        if (!isNull(fixedMatrix))
112
+        if (!isNull(allParams["whichMatrixFixed"]))
74 113
         {
75
-            dispatcher.setFixedMatrix(params.slot("whichMatrixFixed"), fixedMatrix);
114
+            dispatcher.setFixedMatrix(allParams["whichMatrixFixed"]);
76 115
         }
77 116
     }
78 117
 
79 118
     // set the uncertainty matrix
80
-    if (!isNull(unc))
119
+    if (!isNull(uncertainty))
81 120
     {
82
-        dispatcher.setUncertainty(unc);
121
+        dispatcher.setUncertainty(uncertainty);
83 122
     }
84 123
     
85 124
     // set parameters that aren't saved in the checkpoint
86 125
     dispatcher.setNumCoresPerSet(params.slot("nCores"));
87
-    dispatcher.setCheckpointInterval(params.slot("checkpointInterval"));
126
+    dispatcher.printMessages(params.slot("messages"));
127
+    dispatcher.setOutputFrequency(params.slot("outputFrequency"));
88 128
     dispatcher.setCheckpointOutFile(params.slot("checkpointOutFile"));
129
+    dispatcher.setCheckpointInterval(params.slot("checkpointInterval"));
89 130
 
90 131
     // run the dispatcher and return the GapsResult in an R list
91 132
     GapsResult result(dispatcher.run());
... ...
@@ -105,24 +146,23 @@ const RowMatrix &fixedMatrix, const std::string &checkpointInFile)
105 146
 Rcpp::List cogaps_cpp_from_file(const std::string &data,
106 147
 const Rcpp::List &allParams,
107 148
 const std::string &uncertainty,
108
-const Rcpp::NumericVector &indices=Rcpp::NumericVector(),
109
-const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(),
110
-const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix())
149
+const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
150
+const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(1,1),
151
+const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix(1,1))
111 152
 {
112
-    return cogapsRun(data, params, unc, convertRMatrix(fixedMatrix),
113
-        checkpointInFile);
153
+    return cogapsRun(data, allParams, uncertainty, indices, initialA, initialP);
114 154
 }
115 155
 
116 156
 // [[Rcpp::export]]
117 157
 Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &data,
118 158
 const Rcpp::List &allParams,
119 159
 const Rcpp::NumericMatrix &uncertainty,
120
-const Rcpp::NumericVector &indices=Rcpp::NumericVector(),
121
-const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(),
122
-const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix())
160
+const Rcpp::NumericVector &indices=Rcpp::NumericVector(1),
161
+const Rcpp::NumericMatrix &initialA=Rcpp::NumericMatrix(1,1),
162
+const Rcpp::NumericMatrix &initialP=Rcpp::NumericMatrix(1,1))
123 163
 {
124
-    return cogapsRun(convertRMatrix(data), params, convertRMatrix(unc),
125
-        convertRMatrix(fixedMatrix), checkpointInFile);
164
+    return cogapsRun(convertRMatrix(data), allParams,
165
+        convertRMatrix(uncertainty), indices, initialA, initialP);
126 166
 }
127 167
 
128 168
 // [[Rcpp::export]]
... ...
@@ -54,7 +54,11 @@ private:
54 54
     GapsDispatcher& operator=(const GapsDispatcher &p); // don't allow copies
55 55
 
56 56
     template <class DataType>
57
-    void loadData(const DataType &data);
57
+    void loadData(const DataType &data, bool transposeData);
58
+
59
+    template <class DataType>
60
+    void loadData(const DataType &data, bool transposeData, bool partitionRows,
61
+        const std:vector<unsigned> &indices);
58 62
 
59 63
 public:
60 64
 
... ...
@@ -62,10 +66,13 @@ public:
62 66
     ~GapsDispatcher();
63 67
 
64 68
     template <class DataType>
65
-    void initialize(const DataType &data, unsigned nPatterns, uint32_t seed=0);
69
+    void initialize(const DataType &data, bool transposeData,
70
+        bool partitionRows, const std::vector<unsigned> &indices,
71
+        unsigned nPatterns, uint32_t seed);
66 72
 
67 73
     template <class DataType>
68
-    void initialize(const DataType &data, const std::string &cptFile);
74
+    void initialize(const DataType &data, bool transposeData,
75
+        const std::string &cptFile);
69 76
 
70 77
     template <class DataType>
71 78
     void setUncertainty(const DataType &unc);
... ...
@@ -75,7 +82,10 @@ public:
75 82
     void setOutputFrequency(unsigned n);
76 83
     void setSparsity(float alphaA, float alphaP, bool singleCell);
77 84
     void setMaxGibbsMass(float maxA, float maxP);
78
-    void setFixedMatrix(char which, const RowMatrix &mat);
85
+
86
+    void setAMatrix(const RowMatrix &mat);
87
+    void setPMatrix(const RowMatrix &mat);
88
+    void setFixedMatrix(char which);
79 89
     
80 90
     void setNumCoresPerSet(unsigned n);
81 91
     void setCheckpointInterval(unsigned n);
... ...
@@ -85,14 +95,15 @@ public:
85 95
 };
86 96
 
87 97
 template <class DataType>
88
-void GapsDispatcher::initialize(const DataType &data, unsigned nPatterns,
98
+void GapsDispatcher::initialize(const DataType &data, bool transposeData,
99
+bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns,
89 100
 uint32_t seed)
90 101
 {
91 102
     mSeed = seed;
92 103
     mNumPatterns = nPatterns;
93 104
     gaps::random::setSeed(mSeed);
94 105
     
95
-    loadData(data);
106
+    loadData(data, transposeData, partitionRows, indices);
96 107
     mInitialized = true;
97 108
 }
98 109
 
... ...
@@ -105,7 +116,7 @@ void GapsDispatcher::initialize(const DataType &data, const std::string &cptFile
105 116
     ar >> mSeed >> mNumPatterns >> mMaxIterations >> mPrintMessages >>
106 117
         mCheckpointsCreated >> mPhase;
107 118
 
108
-    loadData(data);
119
+    loadData(data, transposeData);
109 120
     ar >> *mRunners[0];
110 121
     mInitialized = true;
111 122
 }
... ...
@@ -118,11 +129,22 @@ void GapsDispatcher::setUncertainty(const DataType &unc)
118 129
 }
119 130
 
120 131
 template <class DataType>
121
-void GapsDispatcher::loadData(const DataType &data)
132
+void GapsDispatcher::loadData(const DataType &data, bool transposeData)
133
+{
134
+    gaps_printf("Loading Data...");
135
+    gaps_flush();
136
+    mRunners.push_back(new GapsRunner(data, transposeData, mNumPatterns));
137
+    gaps_printf("Done!\n");
138
+}
139
+
140
+template <class DataType>
141
+void GapsDispatcher::loadData(const DataType &data, bool transposeData,
142
+bool partitionRows, const std:vector<unsigned> &indices)
122 143
 {
123 144
     gaps_printf("Loading Data...");
124 145
     gaps_flush();
125
-    mRunners.push_back(new GapsRunner(data, mNumPatterns));
146
+    mRunners.push_back(new GapsRunner(data, transposeData, partitionRows,
147
+        indices, mNumPatterns));
126 148
     gaps_printf("Done!\n");
127 149
 }
128 150
 
... ...
@@ -37,7 +37,11 @@ private:
37 37
 public:
38 38
 
39 39
     template <class DataType>
40
-    GapsRunner(const DataType &data, unsigned nPatterns);
40
+    GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns);
41
+
42
+    template <class DataType>
43
+    GapsRunner(const DataType &data, bool transposeData, bool partitionRows,
44
+        const std::vector<unsigned> &indices, unsigned nPatterns);
41 45
 
42 46
     template <class DataType>
43 47
     void setUncertainty(const DataType &unc);
... ...
@@ -70,9 +74,10 @@ public:
70 74
 };
71 75
 
72 76
 template <class DataType>
73
-GapsRunner::GapsRunner(const DataType &data, unsigned nPatterns)
77
+GapsRunner::GapsRunner(const DataType &data, bool transposeData, unsigned nPatterns)
74 78
     :
75
-mASampler(data, nPatterns), mPSampler(data, nPatterns),
79
+mASampler(data, transposeData, nPatterns),
80
+mPSampler(data, transposeData, nPatterns),
76 81
 mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
77 82
 mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
78 83
 {
... ...
@@ -81,10 +86,31 @@ mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
81 86
 }
82 87
 
83 88
 template <class DataType>
84
-void GapsRunner::setUncertainty(const DataType &unc)
89
+GapsRunner::GapsRunner(const DataType &data, bool transposeData,
90
+bool partitionRows, const std::vector<unsigned> &indices, unsigned nPatterns)
91
+    :
92
+mASampler(data, transposeData, partitionRows, indices, nPatterns),
93
+mPSampler(data, transposeData, partitionRows, indices, nPatterns),
94
+mStatistics(mASampler.dataRows(), mPSampler.dataCols(), nPatterns),
95
+mSamplePhase(false), mNumUpdatesA(0), mNumUpdatesP(0), mFixedMatrix('N')
96
+{
97
+    mASampler.sync(mPSampler);
98
+    mPSampler.sync(mASampler);
99
+}
100
+
101
+template <class DataType>
102
+void GapsRunner::setUncertainty(const DataType &unc, bool transposeData)
103
+{
104
+    mASampler.setUncertainty(unc, transposeData);
105
+    mPSampler.setUncertainty(unc, transposeData);
106
+}
107
+
108
+template <class DataType>
109
+void GapsRunner::setUncertainty(const DataType &unc, bool transposeData,
110
+bool partitionRows, const std::vector<unsigned> &indices)
85 111
 {
86
-    mASampler.setUncertainty(unc);
87
-    mPSampler.setUncertainty(unc);
112
+    mASampler.setUncertainty(unc, transposeData, partitionRows, indices);
113
+    mPSampler.setUncertainty(unc, transposeData, partitionRows, indices);
88 114
 }
89 115
 
90 116
 #endif // __COGAPS_GAPS_RUNNER_H__
91 117
\ No newline at end of file
... ...
@@ -174,27 +174,75 @@ public:
174 174
 /******************** IMPLEMENTATION OF TEMPLATED FUNCTIONS *******************/
175 175
 
176 176
 template <class DataType>
177
-AmplitudeGibbsSampler::AmplitudeGibbsSampler(const DataType &data, unsigned nPatterns)
177
+AmplitudeGibbsSampler::AmplitudeGibbsSampler(const DataType &data,
178
+bool transposeData, unsigned nPatterns)
178 179
     :
179
-GibbsSampler(data, true, nPatterns)
180
+GibbsSampler(data, transposeData, true, nPatterns)
180 181
 {
181 182
     mQueue.setDimensionSize(mBinSize, mNumCols);
182 183
 }
183 184
 
184 185
 template <class DataType>
185
-PatternGibbsSampler::PatternGibbsSampler(const DataType &data, unsigned nPatterns)
186
+AmplitudeGibbsSampler::AmplitudeGibbsSampler(const DataType &data,
187
+bool transposeData, bool partitionRows, const std::vector<unsigned> &indices,
188
+unsigned nPatterns)
186 189
     :
187
-GibbsSampler(data, false, nPatterns)
190
+GibbsSampler(data, transposeData, partitionRows, indices, true, nPatterns)
191
+{
192
+    mQueue.setDimensionSize(mBinSize, mNumCols);
193
+}
194
+
195
+template <class DataType>
196
+PatternGibbsSampler::PatternGibbsSampler(const DataType &data,
197
+bool transposeData, unsigned nPatterns)
198
+    :
199
+GibbsSampler(data, transposeData, false, nPatterns)
200
+{
201
+    mQueue.setDimensionSize(mBinSize, mNumRows);
202
+}
203
+
204
+template <class DataType>
205
+PatternGibbsSampler::PatternGibbsSampler(const DataType &data,
206
+bool transposeData, bool partitionRows, const std::vector<unsigned> &indices,
207
+unsigned nPatterns)
208
+    :
209
+GibbsSampler(data, transposeData, partitionRows, indices, false, nPatterns)
188 210
 {
189 211
     mQueue.setDimensionSize(mBinSize, mNumRows);
190 212
 }
191 213
 
214
+// normal constructor
215
+template <class T, class MatA, class MatB>
216
+template <class DataType>
217
+GibbsSampler<T, MatA, MatB>::GibbsSampler(const DataType &data,
218
+bool transposeData, bool amp, unsigned nPatterns)
219
+    :
220
+mDMatrix(data, transposeData), mSMatrix(mDMatrix.pmax(0.1f, 0.1f)), 
221
+mAPMatrix(mDMatrix.nRow(), mDMatrix.nCol()),
222
+mMatrix(amp ? mDMatrix.nRow() : nPatterns, amp ? nPatterns : mDMatrix.nCol()),
223
+mOtherMatrix(NULL), mQueue(mMatrix.nRow() * mMatrix.nCol()), mLambda(0.f),
224
+mMaxGibbsMass(100.f), mAnnealingTemp(1.f), mNumRows(mMatrix.nRow()),
225
+mNumCols(mMatrix.nCol()), mAvgQueue(0.f), mNumQueues(0.f)
226
+{
227
+    // calculate atomic domain size
228
+    mBinSize = std::numeric_limits<uint64_t>::max()
229
+        / static_cast<uint64_t>(mNumRows * mNumCols);
230
+    mQueue.setDomainSize(mBinSize * mNumRows * mNumCols);
231
+    mDomain.setDomainSize(mBinSize * mNumRows * mNumCols);
232
+
233
+    // default sparsity parameters
234
+    setSparsity(0.01, false);
235
+}
236
+
237
+// constructor that allows for subsetting the data
192 238
 template <class T, class MatA, class MatB>
193 239
 template <class DataType>
194 240
 GibbsSampler<T, MatA, MatB>::GibbsSampler(const DataType &data,
241
+bool transposeData, bool partitionRows, const std::vector<unsigned> &indices,
195 242
 bool amp, unsigned nPatterns)
196 243
     :
197
-mDMatrix(data), mSMatrix(mDMatrix.pmax(0.1f, 0.1f)), 
244
+mDMatrix(data, transposeData, partitionRows, indices),
245
+mSMatrix(mDMatrix.pmax(0.1f, 0.1f)),
198 246
 mAPMatrix(mDMatrix.nRow(), mDMatrix.nCol()),
199 247
 mMatrix(amp ? mDMatrix.nRow() : nPatterns, amp ? nPatterns : mDMatrix.nCol()),
200 248
 mOtherMatrix(NULL), mQueue(mMatrix.nRow() * mMatrix.nCol()), mLambda(0.f),
... ...
@@ -3,86 +3,224 @@
3 3
 #include "../file_parser/MatrixElement.h"
4 4
 #include "../GapsAssert.h"
5 5
 
6
-template<class MatA, class MatB>
7
-static void copyMatrix(MatA &dest, const MatB &source)
6
+void RowMatrix::allocate()
8 7
 {
9
-    GAPS_ASSERT(dest.nRow() == source.nRow());
10
-    GAPS_ASSERT(dest.nCol() == source.nCol());
11
-    for (unsigned i = 0; i < source.nRow(); ++i)
8
+    for (unsigned i = 0; i < mNumRows; ++i)
9
+    {
10
+        mData.push_back(Vector(mNumCols));
11
+    }
12
+}
13
+
14
+float& RowMatrix::operator()(unsigned r, unsigned c)
15
+{
16
+    return mData[r][c];
17
+}
18
+
19
+float RowMatrix::operator()(unsigned r, unsigned c) const
20
+{
21
+    return mData[r][c];
22
+}
23
+
24
+Vector& getRow(unsigned row) {return mData[row];}
25
+const Vector& getRow(unsigned row) const {return mData[row];}
26
+
27
+float* rowPtr(unsigned row) {return mData[row].ptr();}
28
+const float* rowPtr(unsigned row) const {return mData[row].ptr();}
29
+
30
+void RowMatrix::allocate()
31
+{
32
+    for (unsigned i = 0; i < mNumRows; ++i)
33
+    {
34
+        mRows.push_back(Vector(mNumCols));
35
+    }
36
+}
37
+
38
+float& operator()(unsigned r, unsigned c) {return mData[c][r];}
39
+float operator()(unsigned r, unsigned c) const {return mData[c][r];}
40
+
41
+Vector& getCol(unsigned col) {return mData[col];}
42
+const Vector& getCol(unsigned col) const {return mData[col];}
43
+
44
+float* colPtr(unsigned col) {return mData[col].ptr();}
45
+const float* colPtr(unsigned col) const {return mData[col].ptr();}
46
+
47
+
48
+template <class MatA, class MatB>
49
+static void copyMatrix(MatA &dest, const MatB &source, bool transpose=false)
50
+{
51
+    GAPS_ASSERT(dest.nRow() == (transpose ? source.nCol() : source.nRow()));
52
+    GAPS_ASSERT(dest.nCol() == (transpose ? source.nRow() : source.nCol()))
53
+    for (unsigned i = 0; i < dest.nRow(); ++i)
12 54
     {
13
-        for (unsigned j = 0; j < source.nCol(); ++j)
55
+        for (unsigned j = 0; j < dest.nCol(); ++j)
14 56
         {
15
-            dest(i,j) = source(i,j);
57
+            dest(i,j) = transpose ? source(j,i) : source(i,j);
16 58
         }
17 59
     }
18 60
 }
19 61
 
20
-// if partitionRows is false, partition columns instead
21
-// rows of matrix should be partition dimension, i.e. need to transpose
22
-// is partitionRows is false
23
-/*
24 62
 template <class Matrix>
25
-static void fill(Matrix &mat, FileParser &p, bool partitionRows, std::vector<unsigned> whichIndices)
63
+static void createFromFile(Matrix &mat, const std::string &path, bool transpose)
26 64
 {
27
-    unsigned rowSelect = partitionRows ? 0 : 1;
28
-    unsigned colSelect = partitionRows ? 1 : 0;
65
+    FileParser p(path);
66
+    mNumRows = transpose ? p.nCol() : p.nRow();
67
+    mNumCols = transpose ? p.nRow() : p.nCol();
68
+    allocate();
29 69
 
30 70
     while (p.hasNext())
31 71
     {
32 72
         MatrixElement e(p.getNext());
33
-        std::vector<unsigned>::iterator newRowIndex = std::find(whichIndices.begin(), whichIndices.end(), e[rowSelect]);
34
-        if (newRowIndex != whichIndices.end())
73
+        unsigned row = transpose ? e.col() : e.row();
74
+        unsigned col = transpose ? e.row() : e.col();
75
+        this->operator()(row, col) = e.value();
76
+    }
77
+}
78
+
79
+// apply transpose first, then partition either rows or columns
80
+static void createFromMatrix(const RowMatrix &mat, bool transpose, bool partitionRows,
81
+const std::vector<unsigned> &indices)
82
+{
83
+    // create matrix
84
+    mNumRows = partitionRows ? indices.size() : (transpose ? mat.nCol() : mat.nRow());
85
+    mNumCols = partitionRows ? (transpose ? mat.nRow() : mat.nCol()) : indices.size();
86
+    allocate();
87
+
88
+    // fill matrix
89
+    for (unsigned i = 0; i < mat.nRow(); ++i)
90
+    {
91
+        for (unsigned j = 0; j < mat.nCol(); ++j)
35 92
         {
36
-            unsigned row = std::distance(whichIndices.begin(), newRowIndex);
37
-            mat.operator()(row, e[colSelect]) = e.value();
93
+            unsigned dataIndex = transpose ? (partitionRows ? j : i) : (partitionRows ? i : j);
94
+            std::vector<unsigned>::iterator pos = std::find(indices.begin(), indices.end(), dataIndex);
95
+            if (pos != indices.end())
96
+            {
97
+                unsigned index = std::distance(indices.begin(), pos);
98
+                unsigned row = partitionRows ? index : (transpose ? j : i);
99
+                unsigned col = partitionRows ? (transpose ? i : j) : index;
100
+                this->operator()(row,col) = mat(i,j)
101
+            }
38 102
         }
39 103
     }
40 104
 }
41
-*/
42 105
 
43 106
 /****************************** ROW MATRIX *****************************/
44 107
 
45
-RowMatrix::RowMatrix(unsigned nrow, unsigned ncol)
46
-: mNumRows(nrow), mNumCols(ncol)
108
+void RowMatrix::allocate()
47 109
 {
48 110
     for (unsigned i = 0; i < mNumRows; ++i)
49 111
     {
50 112
         mRows.push_back(Vector(mNumCols));
51 113
     }
114
+}    
115
+
116
+RowMatrix::RowMatrix(unsigned nrow, unsigned ncol)
117
+: mNumRows(nrow), mNumCols(ncol)
118
+{
119
+    allocate();
52 120
 }
53 121
 
54 122
 RowMatrix::RowMatrix(const ColMatrix &mat)
55 123
 : mNumRows(mat.nRow()), mNumCols(mat.nCol())
56 124
 {
57
-    for (unsigned i = 0; i < mNumRows; ++i)
125
+    allocate();
126
+    copyMatrix(*this, mat);
127
+}
128
+
129
+RowMatrix::RowMatrix(const RowMatrix &mat, bool transpose)
130
+    :
131
+mNumRows(transpose ? mat.nCol() : mat.nRow()),
132
+mNumCols(transpose ? mat.nRow() : mat.nCol())
133
+{
134
+    allocate();
135
+    copyMatrix(*this, mat, true);
136
+}
137
+
138
+RowMatrix::RowMatrix(const std::string &path, bool transpose)
139
+{
140
+    createFromFile(path, transpose);
141
+}
142
+
143
+// apply transpose first, then partition either rows or columns
144
+RowMatrix::RowMatrix(const RowMatrix &mat, bool transpose, bool partitionRows,
145
+const std::vector<unsigned> &indices)
146
+{
147
+    // create matrix
148
+    mNumRows = partitionRows ? indices.size() : (transpose ? mat.nCol() : mat.nRow());
149
+    mNumCols = partitionRows ? (transpose ? mat.nRow() : mat.nCol()) : indices.size();
150
+    allocate();
151
+
152
+    // fill matrix
153
+    for (unsigned i = 0; i < mat.nRow(); ++i)
58 154
     {
59
-        mRows.push_back(Vector(mNumCols));
60
-        for (unsigned j = 0; j < mNumCols; ++j)
155
+        for (unsigned j = 0; j < mat.nCol(); ++j)
61 156
         {
62
-            this->operator()(i,j) = mat(i,j);
157
+            unsigned dataIndex = transpose ? (partitionRows ? j : i) : (partitionRows ? i : j);
158
+            std::vector<unsigned>::iterator pos = std::find(indices.begin(), indices.end(), dataIndex);
159
+            if (pos != indices.end())
160
+            {
161
+                unsigned index = std::distance(indices.begin(), pos);
162
+                unsigned row = partitionRows ? index : (transpose ? j : i);
163
+                unsigned col = partitionRows ? (transpose ? i : j) : index;
164
+                this->operator()(row,col) = mat(i,j)
165
+            }
63 166
         }
64 167
     }
65 168
 }
66 169
 
67
-RowMatrix::RowMatrix(const std::string &path)
170
+// apply transpose first, then partition either rows or columns
171
+RowMatrix::RowMatrix(const std::string &path, bool transpose, bool partitionRows,
172
+const std::vector<unsigned> &indices)
68 173
 {
174
+    // create matrix
69 175
     FileParser p(path);
70
-    mNumRows = p.nRow();
71
-    mNumCols = p.nCol();
72
-
73
-    // allocate matrix
74
-    for (unsigned i = 0; i < mNumRows; ++i)
176
+    mNumRows = partitionRows ? indices.size() : (transpose ? p.nCol() : p.nRow());
177
+    mNumCols = partitionRows ? (transpose ? p.nRow() : p.nCol()) : indices.size();
178
+    allocate();
179
+
180
+    // fill
181
+    unsigned indexLoc = transpose ? (partitionRows ? 1 : 0) : (partitionRows ? 0 : 1);
182
+    unsigned rowLoc = transpose ? 1 : 0;
183
+    unsigned colLoc = transpose ? 0 : 1;
184
+    while (p.hasNext())
75 185
     {
76
-        mRows.push_back(Vector(mNumCols));
186
+        MatrixElement e(p.getNext());
187
+
188
+        unsigned dataIndex = e[indexLoc];
189
+        std::vector<unsigned>::iterator pos = std::find(indices.begin(), indices.end(), dataIndex);
190
+        if (pos != indices.end())
191
+        {
192
+            unsigned index = std::distance(indices.begin(), pos);
193
+            unsigned row = partitionRows ? index : e[rowLoc];
194
+            unsigned col = partitionRows ? e[colLoc] : index;
195
+            this->operator()(row, col) = e.value();
196
+        }
77 197
     }
198
+}
199
+
200
+
201
+// if partitionRows is false, partition columns instead
202
+// rows of matrix should be partition dimension, i.e. need to transpose
203
+// is partitionRows is false
204
+/*
205
+template <class Matrix>
206
+static void fill(Matrix &mat, FileParser &p, bool partitionRows, std::vector<unsigned> whichIndices)
207
+{
208
+    unsigned rowSelect = partitionRows ? 0 : 1;
209
+    unsigned colSelect = partitionRows ? 1 : 0;
78 210
 
79
-    // populate matrix
80 211
     while (p.hasNext())
81 212
     {
82 213
         MatrixElement e(p.getNext());
83
-        this->operator()(e.row(), e.col()) = e.value();
214
+        std::vector<unsigned>::iterator newRowIndex = std::find(whichIndices.begin(), whichIndices.end(), e[rowSelect]);
215
+        if (newRowIndex != whichIndices.end())
216
+        {
217
+            unsigned row = 
218
+            mat.operator()(row, e[colSelect]) = e.value();
219
+        }
84 220
     }
85 221
 }
222
+*/
223
+
86 224
 
87 225
 /*
88 226
 RowMatrix::RowMatrix(FileParser &p, bool partitionRows, std::vector<unsigned> whichIndices)
... ...
@@ -8,6 +8,174 @@
8 8
 #include <algorithm>
9 9
 #include <vector>
10 10
 
11
+/****************************** MATRIX INTERFACE ******************************/
12
+
13
+class RowMatrix;
14
+class ColMatrix;
15
+typedef Matrix RowMatrix; // when we don't care about row/col major order
16
+
17
+template <class T>
18
+class GenericMatrix
19
+{
20
+protected:
21
+
22
+    std::vector<Vector> mData;
23
+    unsigned mNumRows, mNumCols;
24
+
25
+    T* impl();
26
+
27
+public:
28
+
29
+    GenericMatrix(unsigned nrow, unsigned ncol);
30
+
31
+    GenericMatrix(const Matrix &mat, bool transpose=false);
32
+    GenericMatrix(const std::string &path, bool transpose=false);
33
+
34
+    GenericMatrix(const Matrix &mat, bool transpose, bool partitionRows,
35
+        const std::vector<unsigned> &indices);
36
+    GenericMatrix(const std::string &path, bool transpose, bool partitionRows,
37
+        const std::vector<unsigned> &indices);
38
+
39
+    unsigned nRow() const;
40
+    unsigned nCol() const;
41
+
42
+    T operator*(float val) const;
43
+    T operator/(float val) const;
44
+    T pmax(float scale, float max) const;
45
+
46
+    friend Archive& operator<< <T>(Archive &ar, GenericMatrix &mat);
47
+    friend Archive& operator>> <T>(Archive &ar, GenericMatrix &mat);
48
+};
49
+
50
+class RowMatrix : public Matrix<RowMatrix>
51
+{
52
+private:
53
+
54
+    friend class GenericMatrix;
55
+    void allocate();
56
+
57
+public:
58
+
59
+    RowMatrix(unsigned nrow, unsigned ncol);
60
+
61
+    RowMatrix(const Matrix &mat, bool transpose=false);
62
+    RowMatrix(const std::string &path, bool transpose=false);
63
+
64
+    RowMatrix(const Matrix &mat, bool transpose, bool partitionRows,
65
+        const std::vector<unsigned> &indices);
66
+    RowMatrix(const std::string &path, bool transpose, bool partitionRows,
67
+        const std::vector<unsigned> &indices);
68
+
69
+    // simplifies assigning to/from standard Matrix (RowMatrix)
70
+    RowMatrix& operator=(const ColMatrix &mat);
71
+
72
+    // for single element access - do not loop over elements with this
73
+    float& operator()(unsigned r, unsigned c) {return mData[r][c];}
74
+    float operator()(unsigned r, unsigned c) const {return mData[r][c];}
75
+
76
+    // for convenience when doing non-performance critical math
77
+    Vector& getRow(unsigned row) {return mData[row];}
78
+    const Vector& getRow(unsigned row) const {return mData[row];}
79
+
80
+    // raw pointer allows for high performance looping over rows with SIMD
81
+    float* rowPtr(unsigned row) {return mData[row].ptr();}
82
+    const float* rowPtr(unsigned row) const {return mData[row].ptr();}
83
+};
84
+
85
+class ColMatrix : public Matrix<ColMatrix>
86
+{
87
+private:
88
+
89
+    friend class GenericMatrix;
90
+    void allocate();
91
+
92
+public:
93
+
94
+    ColMatrix(unsigned nrow, unsigned ncol);
95
+
96
+    ColMatrix(const Matrix &mat, bool transpose=false);
97
+    ColMatrix(const std::string &path, bool transpose=false);
98
+
99
+    ColMatrix(const Matrix &mat, bool transpose, bool partitionRows,
100
+        const std::vector<unsigned> &indices);
101
+    ColMatrix(const std::string &path, bool transpose, bool partitionRows,
102
+        const std::vector<unsigned> &indices);
103
+
104
+    // simplifies assigning to/from standard Matrix (RowMatrix)
105
+    ColMatrix& operator=(const RowMatrix &mat);
106
+
107
+    // for single element access - do not loop over elements with this
108
+    float& operator()(unsigned r, unsigned c) {return mData[c][r];}
109
+    float operator()(unsigned r, unsigned c) const {return mData[c][r];}
110
+
111
+    // for convenience when doing non-performance critical math
112
+    Vector& getCol(unsigned col) {return mData[col];}
113
+    const Vector& getCol(unsigned col) const {return mData[col];}
114
+
115
+    // raw pointer allows for high performance looping over rows with SIMD
116
+    float* colPtr(unsigned col) {return mData[col].ptr();}
117
+    const float* colPtr(unsigned col) const {return mData[col].ptr();}
118
+};
119
+
120
+/******************** IMPLEMENTATION OF TEMPLATED FUNCTIONS *******************/
121
+
122
+template <class T>
123
+GenericMatrix::GenericMatrix(unsigned nrow, unsigned ncol)
124
+    :
125
+mNumRows(nrow), mNumCols(ncol)
126
+{
127
+    impl()->allocate();
128
+}
129
+
130
+GenericMatrix::GenericMatrix(const Matrix &mat, bool transpose)
131
+    :
132
+mNumRows(transpose ? mat.nCol() : mat.nRow()),
133
+mNumCols(transpose ? mat.nRow() : mat.nCol())
134
+{
135
+    impl()->allocate();
136
+    for (unsigned i = 0; i < mNumRows; ++i)
137
+    {
138
+        for (unsigned j = 0; j < mNumCols; ++j)
139
+        {
140
+            impl()->operator()(i,j) = transpose ? mat(j,i) : mat(i,j);
141
+        }
142
+    }
143
+}
144
+
145
+GenericMatrix::GenericMatrix(const std::string &path, bool transpose)
146
+{
147
+    FileParser p(path);
148
+    mNumRows = transpose ? p.nCol() : p.nRow();
149
+    mNumCols = transpose ? p.nRow() : p.nCol();
150
+    allocate();
151
+
152
+    while (p.hasNext())
153
+    {
154
+        MatrixElement e(p.getNext());
155
+        unsigned row = transpose ? e.col() : e.row();
156
+        unsigned col = transpose ? e.row() : e.col();
157
+        impl()->operator()(row,col) = e.value();
158
+    }
159
+}
160
+
161
+    GenericMatrix(const Matrix &mat, bool transpose, bool partitionRows,
162
+        const std::vector<unsigned> &indices);
163
+    GenericMatrix(const std::string &path, bool transpose, bool partitionRows,
164
+        const std::vector<unsigned> &indices);
165
+
166
+    unsigned nRow() const;
167
+    unsigned nCol() const;
168
+
169
+    T operator*(float val) const;
170
+    T operator/(float val) const;
171
+    T& operator=(const ColMatrix &mat);
172
+    T pmax(float scale, float max) const;
173
+
174
+    friend Archive& operator<<(Archive &ar, T &mat);
175
+    friend Archive& operator>>(Archive &ar, T &mat);
176
+
177
+
178
+#if 0
11 179
 // forward declarations
12 180
 class RowMatrix;
13 181
 class ColMatrix;
... ...
@@ -17,27 +185,22 @@ class RowMatrix
17 185
 private:
18 186
 
19 187
     std::vector<Vector> mRows;
20
-    unsigned mNumRows, mNumCols;
188
+
189
+
190
+    void allocate();
21 191
 
22 192
 public:
23 193
 
24 194
     RowMatrix(unsigned nrow, unsigned ncol);
25 195
     explicit RowMatrix(const ColMatrix &mat);
26 196
 
27
-    explicit RowMatrix(const std::string &path);
28
-    //RowMatrix(const std::string &p, bool parseRows, std::vector<unsigned> whichIndices);
29
-
30
-    unsigned nRow() const {return mNumRows;}
31
-    unsigned nCol() const {return mNumCols;}
32
-
33
-    float& operator()(unsigned r, unsigned c) {return mRows[r][c];}
34
-    float operator()(unsigned r, unsigned c) const {return mRows[r][c];}
35
-
36
-    Vector& getRow(unsigned row) {return mRows[row];}
37
-    const Vector& getRow(unsigned row) const {return mRows[row];}
197
+    RowMatrix(const RowMatrix &mat, bool transpose=false);
198
+    RowMatrix(const std::string &path, bool transpose=false);
38 199
 
39
-    float* rowPtr(unsigned row) {return mRows[row].ptr();}
40
-    const float* rowPtr(unsigned row) const {return mRows[row].ptr();}
200
+    RowMatrix(const RowMatrix &mat, bool transpose, bool partitionRows,
201
+        const std::vector<unsigned> &indices);
202
+    RowMatrix(const std::string &p, bool transpose, bool partitionRows,
203
+        const std::vector<unsigned> &indices);
41 204
 
42 205
     RowMatrix operator*(float val) const;
43 206
     RowMatrix operator/(float val) const;
... ...
@@ -60,20 +223,13 @@ public:
60 223
     ColMatrix(unsigned nrow, unsigned ncol);
61 224
     explicit ColMatrix(const RowMatrix &mat);
62 225
 
63
-    explicit ColMatrix(const std::string &path);
64
-    //ColMatrix(const std::string &p, bool parseRows, std::vector<unsigned> whichIndices);
226
+    ColMatrix(ColMatrix mat, bool transpose=false);
227
+    ColMatrix(const std::string &path, bool transpose=false);
65 228
 
66
-    unsigned nRow() const {return mNumRows;}
67
-    unsigned nCol() const {return mNumCols;}
68
-
69
-    float& operator()(unsigned r, unsigned c) {return mCols[c][r];}
70
-    float operator()(unsigned r, unsigned c) const {return mCols[c][r];}
71
-
72
-    Vector& getCol(unsigned col) {return mCols[col];}
73
-    const Vector& getCol(unsigned col) const {return mCols[col];}
74
-
75
-    float* colPtr(unsigned col) {return mCols[col].ptr();}
76
-    const float* colPtr(unsigned col) const {return mCols[col].ptr();}
229
+    ColMatrix(ColMatrix mat, bool transpose, bool partitionRows,
230
+        const std::vector<unsigned> &indices);
231
+    ColMatrix(const std::string &p, bool transpose, bool partitionRows,
232
+        const std::vector<unsigned> &indices);
77 233
 
78 234
     ColMatrix operator*(float val) const;
79 235
     ColMatrix operator/(float val) const;
... ...
@@ -85,3 +241,5 @@ public:
85 241
 };
86 242
 
87 243
 #endif
244
+
245
+#endif // __COGAPS_MATRIX_H__