Browse code

basic infrastructure for sparse cogaps

Tom Sherman authored on 17/10/2018 00:45:29
Showing 26 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 3.3.33
2
+Version: 3.3.50
3 3
 Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
... ...
@@ -103,6 +103,8 @@ outputToFile=NULL, ...)
103 103
         stop("uncertainty must be a matrix unless data is a file path")
104 104
     if (!is(data, "character"))
105 105
         checkDataMatrix(data, uncertainty, allParams$gaps)
106
+    if (!is.null(uncertainty) & allParams$gaps@useSparseOptimization)
107
+        stop("must use default uncertainty when enabling useSparseOptimization")
106 108
 
107 109
     # check single cell parameter
108 110
     if (!is.null(allParams$gaps@distributed))
... ...
@@ -11,6 +11,8 @@
11 11
 #' @slot maxGibbsMassP atomic mass restriction for sample matrix
12 12
 #' @slot seed random number generator seed
13 13
 #' @slot singleCell is the data single cell?
14
+#' @slot useSparseOptimization speeds up performance with sparse data, note
15
+#' this can only be used with the default uncertainty
14 16
 #' @slot distributed either "genome-wide" or "single-cell" indicating which
15 17
 #' distributed algorithm should be used
16 18
 #' @slot nSets [distributed parameter] number of sets to break data into
... ...
@@ -35,6 +37,7 @@ setClass("CogapsParams", slots = c(
35 37
     maxGibbsMassP = "numeric",
36 38
     seed = "numeric",
37 39
     singleCell = "logical",
40
+    useSparseOptimization = "logical",
38 41
     distributed = "character_OR_NULL",
39 42
     nSets = "numeric",
40 43
     cut = "numeric",
... ...
@@ -63,6 +66,7 @@ setMethod("initialize", "CogapsParams",
63 66
         .Object@maxGibbsMassP <- 100
64 67
         .Object@seed <- getMilliseconds(as.POSIXlt(Sys.time()))
65 68
         .Object@singleCell <- FALSE
69
+        .Object@useSparseOptimization <- FALSE
66 70
         .Object@distributed <- NULL
67 71
         .Object@cut <- .Object@nPatterns
68 72
         .Object@nSets <- 4
... ...
@@ -72,6 +72,7 @@ const Rcpp::Nullable<Rcpp::IntegerVector> &indices)
72 72
     params.maxGibbsMassA = gapsParams.slot("maxGibbsMassA");
73 73
     params.maxGibbsMassP = gapsParams.slot("maxGibbsMassP");
74 74
     params.singleCell = gapsParams.slot("singleCell");
75
+    params.useSparseOptimization = gapsParams.slot("useSparseOptimization");
75 76
 
76 77
     // check if using fixed matrix
77 78
     if (fixedMatrix.isNotNull())
... ...
@@ -47,6 +47,11 @@ GapsResult AbstractGapsRunner::run()
47 47
 
48 48
     mStartTime = bpt_now();
49 49
 
50
+    // check if running in debug mode
51
+    #ifdef GAPS_DEBUG
52
+    gaps_printf("Running in debug mode\n");
53
+    #endif
54
+
50 55
     // calculate appropiate number of threads if compiled with openmp
51 56
     #ifdef __GAPS_OPENMP__
52 57
     if (mPrintMessages && mPrintThreadUsage)
... ...
@@ -47,4 +47,5 @@ OBJECTS =   Cogaps.o \
47 47
             cpp_tests/testSparseVector.o \
48 48
             cpp_tests/testVector.o \
49 49
             cpp_tests/testFileParsers.o \
50
-            cpp_tests/testDenseGibbsSampler.o
51 50
\ No newline at end of file
51
+            cpp_tests/testDenseGibbsSampler.o \
52
+            cpp_tests/testSparseGibbsSampler.o
52 53
\ No newline at end of file
53 54
new file mode 100644
... ...
@@ -0,0 +1,247 @@
1
+#include "catch.h"
2
+#include "../gibbs_sampler/DenseGibbsSampler.h"
3
+#include "../gibbs_sampler/SparseGibbsSampler.h"
4
+
5
+#define TEST_APPROX(x) Approx(x).epsilon(0.001f)
6
+
7
+TEST_CASE("Test SparseGibbsSampler")
8
+{
9
+    SECTION("Construct from data matrix")
10
+    {
11
+        Matrix data(25, 50);
12
+        for (unsigned i = 0; i < data.nRow(); ++i)
13
+        {
14
+            for (unsigned j = 0; j < data.nCol(); ++j)
15
+            {
16
+                data(i,j) = i + j + 1.f;
17
+            }
18
+        }
19
+
20
+        GapsParameters params(data);
21
+        SparseGibbsSampler ASampler(data, true, false, params.alphaA,
22
+            params.maxGibbsMassA, params);
23
+        SparseGibbsSampler PSampler(data, false, false, params.alphaP,
24
+            params.maxGibbsMassP, params);
25
+    
26
+        ASampler.sync(PSampler);
27
+        PSampler.sync(ASampler);
28
+
29
+        REQUIRE(ASampler.chiSq() == 100.f * data.nRow() * data.nCol());
30
+        REQUIRE(PSampler.chiSq() == 100.f * data.nRow() * data.nCol());
31
+
32
+    #ifdef GAPS_DEBUG
33
+        REQUIRE(ASampler.internallyConsistent());
34
+        REQUIRE(PSampler.internallyConsistent());
35
+    #endif
36
+    }
37
+
38
+#ifdef GAPS_INTERNAL_TESTS
39
+    SECTION("Test consistency between alpha parameters calculations")
40
+    {
41
+        // create the "data"
42
+        Matrix data(100, 75);
43
+        GapsRng::setSeed(123);
44
+        GapsRng rng;
45
+        for (unsigned i = 0; i < data.nRow(); ++i)
46
+        {
47
+            for (unsigned j = 0; j < data.nCol(); ++j)
48
+            {
49
+                data(i,j) = rng.uniform32(1,14) * (rng.uniform() < 0.5f ? 0.f : 1.f);
50
+            }
51
+        }
52
+
53
+        // create pair of sparse gibbs samplers
54
+        GapsParameters params(data);
55
+        SparseGibbsSampler sparse_ASampler(data, true, false, params.alphaA,
56
+            params.maxGibbsMassA, params);
57
+        SparseGibbsSampler sparse_PSampler(data, false, false, params.alphaP,
58
+            params.maxGibbsMassP, params);
59
+        sparse_ASampler.sync(sparse_PSampler);
60
+        sparse_PSampler.sync(sparse_ASampler);
61
+
62
+        // create pair of dense gibbs samplers
63
+        DenseGibbsSampler dense_ASampler(data, true, false, params.alphaA,
64
+            params.maxGibbsMassA, params);
65
+        DenseGibbsSampler dense_PSampler(data, false, false, params.alphaP,
66
+            params.maxGibbsMassP, params);
67
+        dense_ASampler.sync(dense_PSampler);
68
+        dense_PSampler.sync(dense_ASampler);
69
+
70
+        // set the A and P matrix to the same thing
71
+        for (unsigned i = 0; i < data.nRow(); ++i)
72
+        {
73
+            for (unsigned k = 0; k < params.nPatterns; ++k)
74
+            {
75
+                float val = rng.uniform(0.f, 10.f) * (rng.uniform() < 0.2f ? 0.f : 1.f);
76
+                dense_ASampler.mMatrix(i,k) = val;
77
+                sparse_ASampler.mMatrix.add(i, k, val);
78
+            }
79
+        }        
80
+        REQUIRE(gaps::sum(dense_ASampler.mMatrix) == gaps::sum(sparse_ASampler.mMatrix));
81
+
82
+        for (unsigned j = 0; j < data.nCol(); ++j)
83
+        {
84
+            for (unsigned k = 0; k < params.nPatterns; ++k)
85
+            {
86
+                float val = rng.uniform(0.f, 10.f) * (rng.uniform() < 0.2f ? 0.f : 1.f);
87
+                dense_PSampler.mMatrix(j,k) = val;
88
+                sparse_PSampler.mMatrix.add(j, k, val);
89
+            }
90
+        }        
91
+        REQUIRE(gaps::sum(dense_PSampler.mMatrix) == gaps::sum(sparse_PSampler.mMatrix));
92
+
93
+        // sync them back up
94
+        sparse_ASampler.sync(sparse_PSampler);
95
+        sparse_PSampler.sync(sparse_ASampler);
96
+        dense_ASampler.sync(dense_PSampler);
97
+        dense_PSampler.sync(dense_ASampler);
98
+        dense_ASampler.recalculateAPMatrix();
99
+        dense_PSampler.recalculateAPMatrix();
100
+
101
+///////////////// test that alphaParameters are the same ///////////////////////
102
+        for (unsigned i = 0; i < data.nRow(); ++i)
103
+        {
104
+            for (unsigned k = 0; k < params.nPatterns; ++k)
105
+            {
106
+                AlphaParameters sa = sparse_ASampler.alphaParameters(i,k);
107
+                AlphaParameters da = dense_ASampler.alphaParameters(i,k);
108
+                REQUIRE(sa.s >= 0.f);
109
+                REQUIRE(da.s >= 0.f);
110
+                if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
111
+                {
112
+                    REQUIRE(sa.s <= gaps::epsilon);
113
+                    REQUIRE(da.s <= gaps::epsilon);
114
+                }
115
+                REQUIRE(sa.s == TEST_APPROX(da.s));
116
+                REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
117
+            }
118
+        }
119
+
120
+        for (unsigned j = 0; j < data.nCol(); ++j)
121
+        {
122
+            for (unsigned k = 0; k < params.nPatterns; ++k)
123
+            {
124
+                AlphaParameters sa = sparse_PSampler.alphaParameters(j,k);
125
+                AlphaParameters da = dense_PSampler.alphaParameters(j,k);
126
+                REQUIRE(sa.s >= 0.f);
127
+                REQUIRE(da.s >= 0.f);
128
+                if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
129
+                {
130
+                    REQUIRE(sa.s <= gaps::epsilon);
131
+                    REQUIRE(da.s <= gaps::epsilon);
132
+                }
133
+                REQUIRE(sa.s == TEST_APPROX(da.s));
134
+                REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
135
+            }
136
+        }
137
+
138
+///////////// test two dimensional alphaParameters are the same ////////////////
139
+        for (unsigned i = 0; i < data.nRow(); ++i)
140
+        {
141
+            for (unsigned k1 = 0; k1 < params.nPatterns; ++k1)
142
+            {
143
+                for (unsigned k2 = k1+1; k2 < params.nPatterns; ++k2)
144
+                {
145
+                    AlphaParameters sa = sparse_ASampler.alphaParameters(i,k1,i,k2);
146
+                    AlphaParameters da = dense_ASampler.alphaParameters(i,k1,i,k2);
147
+                    REQUIRE(sa.s >= 0.f);
148
+                    REQUIRE(da.s >= 0.f);
149
+                    if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
150
+                    {
151
+                        REQUIRE(sa.s <= gaps::epsilon);
152
+                        REQUIRE(da.s <= gaps::epsilon);
153
+                    }
154
+                    REQUIRE(sa.s == TEST_APPROX(da.s));
155
+                    REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
156
+
157
+                    // symmetry
158
+                    sa = sparse_ASampler.alphaParameters(i,k2,i,k1);
159
+                    da = dense_ASampler.alphaParameters(i,k2,i,k1);
160
+                    REQUIRE(sa.s >= 0.f);
161
+                    REQUIRE(da.s >= 0.f);
162
+                    if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
163
+                    {
164
+                        REQUIRE(sa.s <= gaps::epsilon);
165
+                        REQUIRE(da.s <= gaps::epsilon);
166
+                    }
167
+                    REQUIRE(sa.s == TEST_APPROX(da.s));
168
+                    REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
169
+                }
170
+            }
171
+        }
172
+
173
+        for (unsigned j = 0; j < data.nCol(); ++j)
174
+        {
175
+            for (unsigned k1 = 0; k1 < params.nPatterns; ++k1)
176
+            {
177
+                for (unsigned k2 = k1+1; k2 < params.nPatterns; ++k2)
178
+                {
179
+                    AlphaParameters sa = sparse_PSampler.alphaParameters(j,k1,j,k2);
180
+                    AlphaParameters da = dense_PSampler.alphaParameters(j,k1,j,k2);
181
+                    REQUIRE(sa.s >= 0.f);
182
+                    REQUIRE(da.s >= 0.f);
183
+                    if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
184
+                    {
185
+                        REQUIRE(sa.s <= gaps::epsilon);
186
+                        REQUIRE(da.s <= gaps::epsilon);
187
+                    }
188
+                    REQUIRE(sa.s == TEST_APPROX(da.s));
189
+                    REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
190
+
191
+                    // symmetry
192
+                    sa = sparse_PSampler.alphaParameters(j,k2,j,k1);
193
+                    da = dense_PSampler.alphaParameters(j,k2,j,k1);
194
+                    REQUIRE(sa.s >= 0.f);
195
+                    REQUIRE(da.s >= 0.f);
196
+                    if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
197
+                    {
198
+                        REQUIRE(sa.s <= gaps::epsilon);
199
+                        REQUIRE(da.s <= gaps::epsilon);
200
+                    }
201
+                    REQUIRE(sa.s == TEST_APPROX(da.s));
202
+                    REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
203
+                }
204
+            }
205
+        }
206
+
207
+///////////// test alphaParameters with change are the same ////////////////////
208
+        for (unsigned i = 0; i < data.nRow(); ++i)
209
+        {
210
+            for (unsigned k = 0; k < params.nPatterns; ++k)
211
+            {
212
+                float ch = rng.uniform(0.f, 25.f);
213
+                AlphaParameters sa = sparse_ASampler.alphaParametersWithChange(i,k,ch);
214
+                AlphaParameters da = dense_ASampler.alphaParametersWithChange(i,k,ch);
215
+                REQUIRE(sa.s >= 0.f);
216
+                REQUIRE(da.s >= 0.f);
217
+                if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
218
+                {
219
+                    REQUIRE(sa.s <= gaps::epsilon);
220
+                    REQUIRE(da.s <= gaps::epsilon);
221
+                }
222
+                REQUIRE(sa.s == TEST_APPROX(da.s));
223
+                REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
224
+            }
225
+        }
226
+
227
+        for (unsigned j = 0; j < data.nCol(); ++j)
228
+        {
229
+            for (unsigned k = 0; k < params.nPatterns; ++k)
230
+            {
231
+                float ch = rng.uniform(0.f, 25.f);
232
+                AlphaParameters sa = sparse_PSampler.alphaParametersWithChange(j,k,ch);
233
+                AlphaParameters da = dense_PSampler.alphaParametersWithChange(j,k,ch);
234
+                REQUIRE(sa.s >= 0.f);
235
+                REQUIRE(da.s >= 0.f);
236
+                if (sa.s <= gaps::epsilon || da.s <= gaps::epsilon)
237
+                {
238
+                    REQUIRE(sa.s <= gaps::epsilon);
239
+                    REQUIRE(da.s <= gaps::epsilon);
240
+                }
241
+                REQUIRE(sa.s == TEST_APPROX(da.s));
242
+                REQUIRE(sa.s_mu == TEST_APPROX(da.s_mu));
243
+            }
244
+        }
245
+    }
246
+#endif
247
+}
0 248
\ No newline at end of file
... ...
@@ -7,6 +7,8 @@
7 7
 #include "../data_structures/HybridMatrix.h"
8 8
 #include "../data_structures/SparseIterator.h"
9 9
 
10
+#include <bitset>
11
+
10 12
 TEST_CASE("Test SparseIterator.h - One Dimensional")
11 13
 {
12 14
 #ifdef GAPS_INTERNAL_TESTS
... ...
@@ -86,6 +88,112 @@ TEST_CASE("Test SparseIterator.h - Two Dimensional")
86 88
         it.next();
87 89
         REQUIRE(it.atEnd());
88 90
     }
91
+
92
+    SECTION("First overlap happens after 64 entries")
93
+    {
94
+        SparseVector sv(100);
95
+        sv.insert(1, 1.f);
96
+        sv.insert(2, 2.f);
97
+        sv.insert(3, 3.f);
98
+        sv.insert(4, 4.f);
99
+        sv.insert(5, 5.f);
100
+        sv.insert(74, 74.f);
101
+        sv.insert(75, 75.f);
102
+        sv.insert(76, 76.f);
103
+
104
+        HybridVector hv(100);
105
+        hv.add(6, 7.f);
106
+        hv.add(7, 8.f);
107
+        hv.add(8, 9.f);
108
+        hv.add(75, 76.f);
109
+
110
+        SparseIteratorTwo it(sv, hv);
111
+        REQUIRE(it.getValue_1() == 75.f);
112
+        REQUIRE(it.getValue_2() == 76.f);
113
+        it.next();
114
+        REQUIRE(it.atEnd());
115
+    }
116
+
117
+    SECTION("Test Dot Product with gap")
118
+    {
119
+        SparseVector sv(300);
120
+        HybridVector hv(300);
121
+        Vector dv1(300), dv2(300);
122
+    
123
+        // fill vectors
124
+        GapsRng::setSeed(123);
125
+        GapsRng rng;
126
+
127
+        for (unsigned i = 0; i < 30; ++i)
128
+        {
129
+            float val = rng.uniform(50.f,500.f);
130
+            sv.insert(i, val);
131
+            dv1[i] = val;
132
+        }
133
+
134
+        for (unsigned i = 32; i < 60; ++i)
135
+        {
136
+            float val = rng.uniform(50.f,500.f);
137
+            hv.add(i, val);
138
+            dv2[i] = val;
139
+        }
140
+
141
+        for (unsigned i = 70; i < 120; i+=3)
142
+        {
143
+            float v1 = rng.uniform(50.f,500.f);
144
+            sv.insert(i, v1);
145
+            dv1[i] = v1;   
146
+
147
+            float v2 = rng.uniform(50.f,500.f);
148
+            hv.add(i, v2);
149
+            dv2[i] = v2;
150
+        }
151
+
152
+        // this part needs to be accounted for
153
+        for (unsigned i = 128; i < 196; ++i)
154
+        {
155
+            float val = rng.uniform(5.f,10.f);
156
+            sv.insert(i, val);
157
+            dv1[i] = val;
158
+        }
159
+
160
+        for (unsigned i = 200; i < 300; ++i)
161
+        {   
162
+            float v1 = rng.uniform(50.f,500.f);
163
+            sv.insert(i, v1);
164
+            dv1[i] = v1;   
165
+
166
+            float v2 = rng.uniform(50.f,500.f);
167
+            hv.add(i, v2);
168
+            dv2[i] = v2;
169
+        }
170
+
171
+        // calculate dot product
172
+        float sdot = 0.f, ddot = 0.f;
173
+        SparseIteratorTwo it(sv, hv);
174
+        unsigned i = 0;
175
+        while (!it.atEnd())
176
+        {
177
+            while (dv1[i] == 0.f || dv2[i] == 0.f)
178
+            {
179
+                ++i;
180
+            }
181
+
182
+            if (i < dv1.size())
183
+            {
184
+                ddot += dv1[i] * dv2[i];
185
+                REQUIRE(dv1[i] == it.getValue_1());
186
+                REQUIRE(dv2[i] == it.getValue_2());
187
+                ++i;
188
+            }
189
+
190
+            sdot += it.getValue_1() * it.getValue_2();
191
+
192
+            it.next();
193
+        }
194
+        REQUIRE(ddot == gaps::dot(dv1, dv2));
195
+        REQUIRE(sdot == ddot);        
196
+    }
89 197
 #endif
90 198
 
91 199
     // could this fail because of SIMD?
... ...
@@ -1,14 +1,21 @@
1 1
 #include "HybridVector.h"
2 2
 #include "../math/Math.h"
3
+#include "../math/SIMD.h"
4
+
5
+#define PAD_SIZE_FOR_SIMD(x) (gaps::simd::Index::increment() * (1 + ((x) - 1) / gaps::simd::Index::increment()))
3 6
 
4 7
 HybridVector::HybridVector(unsigned size)
5
-    : mIndexBitFlags(size / 64 + 1, 0), mData(size, 0.f)
8
+    :
9
+mIndexBitFlags(size / 64 + 1, 0),
10
+mData(PAD_SIZE_FOR_SIMD(size), 0.f),
11
+mSize(size)
6 12
 {}
7 13
 
8 14
 HybridVector::HybridVector(const std::vector<float> &v)
9 15
     :
10 16
 mIndexBitFlags(v.size() / 64 + 1, 0),
11
-mData(v.size(), 0.f)
17
+mData(PAD_SIZE_FOR_SIMD(v.size()), 0.f),
18
+mSize(v.size())
12 19
 {
13 20
     for (unsigned i = 0; i < v.size(); ++i)
14 21
     {
... ...
@@ -34,7 +41,7 @@ bool HybridVector::empty() const
34 41
 
35 42
 unsigned HybridVector::size() const
36 43
 {
37
-    return mData.size();
44
+    return mSize;
38 45
 }
39 46
 
40 47
 bool HybridVector::add(unsigned i, float v)
... ...
@@ -65,13 +72,13 @@ const float* HybridVector::densePtr() const
65 72
 
66 73
 Archive& operator<<(Archive &ar, HybridVector &vec)
67 74
 {
68
-    ar << vec.mData.size();
75
+    ar << vec.mSize;
69 76
     for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i)
70 77
     {
71 78
         ar << vec.mIndexBitFlags[i];
72 79
     }
73 80
 
74
-    for (unsigned i = 0; i < vec.mData.size(); ++i)
81
+    for (unsigned i = 0; i < vec.mSize; ++i)
75 82
     {
76 83
         ar << vec.mData[i];
77 84
     }
... ...
@@ -82,14 +89,14 @@ Archive& operator>>(Archive &ar, HybridVector &vec)
82 89
 {
83 90
     unsigned sz = 0;
84 91
     ar >> sz;
85
-    GAPS_ASSERT(sz == vec.mData.size());
92
+    GAPS_ASSERT(sz == vec.size());
86 93
 
87 94
     for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i)
88 95
     {
89 96
         ar >> vec.mIndexBitFlags[i];
90 97
     }
91 98
 
92
-    for (unsigned i = 0; i < vec.mData.size(); ++i)
99
+    for (unsigned i = 0; i < vec.mSize; ++i)
93 100
     {
94 101
         ar >> vec.mData[i];
95 102
     }
... ...
@@ -40,6 +40,7 @@ private:
40 40
 
41 41
     std::vector<uint64_t> mIndexBitFlags;
42 42
     aligned_vector mData;
43
+    unsigned mSize;
43 44
 };
44 45
 
45 46
 #endif // __COGAPS_HYBRID_VECTOR_H__
46 47
\ No newline at end of file
... ...
@@ -11,7 +11,11 @@ static unsigned countLowerBits(uint64_t u, unsigned pos)
11 11
 // clears all bits as low as pos or lower
12 12
 static uint64_t clearLowerBits(uint64_t u, unsigned pos)
13 13
 {
14
-    return u & ~((1ull << (pos + 1ull)) - 1ull);    
14
+    if (pos == 63)
15
+    {
16
+        return 0; // TODO understand this bug - should happen automatically
17
+    }
18
+    return u & ~((1ull << (pos + 1ull)) - 1ull);
15 19
 }
16 20
 
17 21
 SparseIterator::SparseIterator(const SparseVector &v)
... ...
@@ -49,8 +53,9 @@ mSparseIndex(0),
49 53
 mAtEnd(false)
50 54
 {   
51 55
     GAPS_ASSERT(v1.size() == v2.size());
56
+
52 57
     next();
53
-    mSparseIndex -= 1; // this gets advanced one too far
58
+    mSparseIndex -= 1; // next puts us at position 1, this resets to 0
54 59
 }
55 60
 
56 61
 bool SparseIteratorTwo::atEnd() const
... ...
@@ -60,17 +65,23 @@ bool SparseIteratorTwo::atEnd() const
60 65
 
61 66
 void SparseIteratorTwo::next()
62 67
 {
68
+    // get the common indices in this chunk
69
+    mCommon = mFlags_1 & mFlags_2;
70
+
71
+    // if nothing common in this chunk, find a chunk that has common indices
63 72
     while (!mCommon)
64 73
     {
65
-        // no common values in this chunk, go to next one
66
-        ++mBigIndex;
67
-        if (mBigIndex == mTotalIndices)
74
+        // first count how many sparse indices we are skipping
75
+        mSparseIndex += __builtin_popcountll(mFlags_1);
76
+        
77
+        // advance to next chunk
78
+        if (++mBigIndex == mTotalIndices)
68 79
         {   
69 80
             mAtEnd = true;
70 81
             return;
71 82
         }
72 83
 
73
-        // check if we have any common values here
84
+        // update the flags
74 85
         mFlags_1 = mSparse.mIndexBitFlags[mBigIndex];
75 86
         mFlags_2 = mHybrid.mIndexBitFlags[mBigIndex];
76 87
         mCommon = mFlags_1 & mFlags_2;
... ...
@@ -83,11 +94,7 @@ void SparseIteratorTwo::next()
83 94
     mSparseIndex += 1 + countLowerBits(mFlags_1, mSmallIndex);
84 95
 
85 96
     // clear out all skipped indices and the current index from the bitflags
86
-    // this is needed so that countLowerBits is accurate in the next iteration
87 97
     mFlags_1 = clearLowerBits(mFlags_1, mSmallIndex);
88
-
89
-    // clear out this bit from common
90
-    mCommon ^= (1ull << mSmallIndex);
91 98
 }
92 99
 
93 100
 float SparseIteratorTwo::getValue_1() const
... ...
@@ -123,8 +130,9 @@ mAtEnd(false)
123 130
 {   
124 131
     GAPS_ASSERT(v1.size() == v2.size());
125 132
     GAPS_ASSERT(v2.size() == v3.size());
133
+
126 134
     next();
127
-    mSparseIndex -= 1; // this gets advanced one too far
135
+    mSparseIndex -= 1;
128 136
 }
129 137
 
130 138
 bool SparseIteratorThree::atEnd() const
... ...
@@ -134,17 +142,23 @@ bool SparseIteratorThree::atEnd() const
134 142
 
135 143
 void SparseIteratorThree::next()
136 144
 {
145
+    // get the common indices in this chunk
146
+    mCommon = mFlags_1 & mFlags_2 & mFlags_3;
147
+
148
+    // if nothing common in this chunk, find a chunk that has common indices
137 149
     while (!mCommon)
138 150
     {
139
-        // no common values in this chunk, go to next one
140
-        ++mBigIndex;
141
-        if (mBigIndex == mTotalIndices)
151
+        // first count how many sparse indices we are skipping
152
+        mSparseIndex += __builtin_popcountll(mFlags_1);
153
+
154
+        // advance to next chunk
155
+        if (++mBigIndex == mTotalIndices)
142 156
         {   
143 157
             mAtEnd = true;
144 158
             return;
145 159
         }
146 160
 
147
-        // check if we have any common values here
161
+        // update the flags
148 162
         mFlags_1 = mSparse.mIndexBitFlags[mBigIndex];
149 163
         mFlags_2 = mHybrid_1.mIndexBitFlags[mBigIndex];
150 164
         mFlags_3 = mHybrid_2.mIndexBitFlags[mBigIndex];
... ...
@@ -158,11 +172,7 @@ void SparseIteratorThree::next()
158 172
     mSparseIndex += 1 + countLowerBits(mFlags_1, mSmallIndex);
159 173
 
160 174
     // clear out all skipped indices and the current index from the bitflags
161
-    // this is needed so that countLowerBits is accurate in the next iteration
162 175
     mFlags_1 = clearLowerBits(mFlags_1, mSmallIndex);
163
-
164
-    // clear out this bit from common
165
-    mCommon ^= (1ull << mSmallIndex);
166 176
 }
167 177
 
168 178
 float SparseIteratorThree::getValue_1() const
... ...
@@ -35,7 +35,9 @@ public:
35 35
     float getValue_2() const;
36 36
     unsigned getIndex() const;
37 37
 
38
+#ifndef GAPS_INTERNAL_TESTS
38 39
 private:
40
+#endif
39 41
 
40 42
     const SparseVector &mSparse;
41 43
     const HybridVector &mHybrid;
... ...
@@ -4,13 +4,15 @@
4 4
 SparseVector::SparseVector(unsigned size)
5 5
     :
6 6
 mSize(size),
7
-mIndexBitFlags(size / 64 + 1, 0)
7
+mIndexBitFlags(size / 64 + 1, 0),
8
+mIndexStart(mIndexBitFlags.size(), 0)
8 9
 {}
9 10
 
10 11
 SparseVector::SparseVector(const std::vector<float> &v)
11 12
     :
12 13
 mSize(v.size()),
13
-mIndexBitFlags(v.size() / 64 + 1, 0)
14
+mIndexBitFlags(v.size() / 64 + 1, 0),
15
+mIndexStart(mIndexBitFlags.size(), 0)
14 16
 {
15 17
     for (unsigned i = 0; i < v.size(); ++i)
16 18
     {
... ...
@@ -18,6 +20,7 @@ mIndexBitFlags(v.size() / 64 + 1, 0)
18 20
         {
19 21
             mData.push_back(v[i]);
20 22
             mIndexBitFlags[i / 64] ^= (1ull << (i % 64));
23
+            propogate(i / 64);
21 24
         }
22 25
     }
23 26
 }
... ...
@@ -25,7 +28,8 @@ mIndexBitFlags(v.size() / 64 + 1, 0)
25 28
 SparseVector::SparseVector(const Vector &v)
26 29
     :
27 30
 mSize(v.size()),
28
-mIndexBitFlags(v.size() / 64 + 1, 0)
31
+mIndexBitFlags(v.size() / 64 + 1, 0),
32
+mIndexStart(mIndexBitFlags.size(), 0)
29 33
 {
30 34
     for (unsigned i = 0; i < v.size(); ++i)
31 35
     {
... ...
@@ -33,6 +37,7 @@ mIndexBitFlags(v.size() / 64 + 1, 0)
33 37
         {
34 38
             mData.push_back(v[i]);
35 39
             mIndexBitFlags[i / 64] ^= (1ull << (i % 64));
40
+            propogate(i / 64);
36 41
         }
37 42
     }
38 43
 }
... ...
@@ -64,6 +69,15 @@ void SparseVector::insert(unsigned i, float v)
64 69
 
65 70
     mData.insert(mData.begin() + dataIndex, v);
66 71
     mIndexBitFlags[i / 64] |= (1ull << (i % 64));
72
+    propogate(i / 64);
73
+}
74
+
75
+void SparseVector::propogate(unsigned ndx)
76
+{
77
+    for (unsigned i = ndx + 1; i < mIndexStart.size(); ++i)
78
+    {
79
+        ++mIndexStart[i];
80
+    }
67 81
 }
68 82
 
69 83
 Vector SparseVector::getDense() const
... ...
@@ -32,11 +32,13 @@ public:
32 32
 private:
33 33
 #endif
34 34
     
35
+    unsigned mSize;
35 36
     std::vector<uint64_t> mIndexBitFlags;
37
+    std::vector<unsigned> mIndexStart;
36 38
     std::vector<float> mData;
37
-    unsigned mSize;
38 39
 
39 40
     void insert(unsigned i, float v);
41
+    void propogate(unsigned ndx);
40 42
 };
41 43
 
42 44
 #endif // __COGAPS_SPARSE_VECTOR_H__
43 45
\ No newline at end of file
... ...
@@ -1,8 +1,18 @@
1 1
 #include "Vector.h"
2
+#include "../math/SIMD.h"
2 3
 
3
-Vector::Vector(unsigned size) : mData(size, 0.f) {}
4
+#define PAD_SIZE_FOR_SIMD(x) (gaps::simd::Index::increment() * (1 + ((x) - 1) / gaps::simd::Index::increment()))
4 5
 
5
-Vector::Vector(const std::vector<float> &v) : mData(v.size(), 0.f)
6
+Vector::Vector(unsigned size)
7
+    :
8
+mData(PAD_SIZE_FOR_SIMD(size), 0.f),
9
+mSize(size)
10
+{}
11
+
12
+Vector::Vector(const std::vector<float> &v)
13
+    :
14
+mData(PAD_SIZE_FOR_SIMD(v.size()), 0.f),
15
+mSize(v.size())
6 16
 {
7 17
     for (unsigned i = 0; i < v.size(); ++i)
8 18
     {
... ...
@@ -32,12 +42,12 @@ const float* Vector::ptr() const
32 42
 
33 43
 unsigned Vector::size() const
34 44
 {
35
-    return mData.size();
45
+    return mSize;
36 46
 }
37 47
 
38 48
 void Vector::operator+=(const Vector &v)
39 49
 {
40
-    for (unsigned i = 0; i < mData.size(); ++i)
50
+    for (unsigned i = 0; i < mSize; ++i)
41 51
     {
42 52
         mData[i] += v[i];
43 53
     }
... ...
@@ -45,7 +55,7 @@ void Vector::operator+=(const Vector &v)
45 55
 
46 56
 Archive& operator<<(Archive &ar, Vector &vec)
47 57
 {
48
-    for (unsigned i = 0; i < vec.mData.size(); ++i)
58
+    for (unsigned i = 0; i < vec.mSize; ++i)
49 59
     {
50 60
         ar << vec.mData[i];
51 61
     }
... ...
@@ -54,7 +64,7 @@ Archive& operator<<(Archive &ar, Vector &vec)
54 64
 
55 65
 Archive& operator>>(Archive &ar, Vector &vec)
56 66
 {
57
-    for (unsigned i = 0; i < vec.mData.size(); ++i)
67
+    for (unsigned i = 0; i < vec.mSize; ++i)
58 68
     {
59 69
         ar >> vec.mData[i];
60 70
     }
... ...
@@ -34,6 +34,7 @@ public:
34 34
 private:
35 35
 
36 36
     aligned_vector mData;
37
+    unsigned mSize;
37 38
 };
38 39
 
39 40
 #endif
40 41
\ No newline at end of file
... ...
@@ -80,27 +80,19 @@ AlphaParameters DenseGibbsSampler::alphaParameters(unsigned row, unsigned col)
80 80
     const float *mat = mOtherMatrix->getCol(col).ptr();
81 81
 
82 82
     gaps::simd::PackedFloat pMat, pD, pAP, pS;
83
-    gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
83
+    gaps::simd::PackedFloat s(0.f), s_mu(0.f);
84 84
     gaps::simd::Index i(0);
85
-    for (; i <= size - i.increment(); ++i)
85
+    for (; i < size; ++i)
86 86
     {   
87 87
         pMat.load(mat + i);
88 88
         pD.load(D + i);
89 89
         pAP.load(AP + i);
90 90
         pS.load(S + i);
91 91
         gaps::simd::PackedFloat ratio(pMat / (pS * pS));
92
-        partialS += pMat * ratio;
93
-        partialS_mu += ratio * (pD - pAP);
92
+        s += pMat * ratio;
93
+        s_mu += ratio * (pD - pAP);
94 94
     }
95
-
96
-    float s = partialS.scalar(), s_mu = partialS_mu.scalar();
97
-    for (unsigned j = i.value(); j < size; ++j)
98
-    {
99
-        float ratio = mat[j] / (S[j] * S[j]);
100
-        s += mat[j] * ratio;
101
-        s_mu += ratio * (D[j] - AP[j]);
102
-    }
103
-    return AlphaParameters(s, s_mu);
95
+    return AlphaParameters(s.scalar(), s_mu.scalar());
104 96
 }
105 97
 
106 98
 // PERFORMANCE_CRITICAL
... ...
@@ -117,28 +109,20 @@ unsigned r2, unsigned c2)
117 109
         const float *mat2 = mOtherMatrix->getCol(c2).ptr();
118 110
 
119 111
         gaps::simd::PackedFloat pMat1, pMat2, pD, pAP, pS;
120
-        gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
112
+        gaps::simd::PackedFloat s(0.f), s_mu(0.f);
121 113
         gaps::simd::Index i(0);
122
-        for (; i <= size - i.increment(); ++i)
123
-        {   
114
+        for (; i < size; ++i)
115
+        {
124 116
             pMat1.load(mat1 + i);
125 117
             pMat2.load(mat2 + i);
126 118
             pD.load(D + i);
127 119
             pAP.load(AP + i);
128 120
             pS.load(S + i);
129 121
             gaps::simd::PackedFloat ratio((pMat1 - pMat2) / (pS * pS));
130
-            partialS += (pMat1 - pMat2) * ratio;
131
-            partialS_mu += ratio * (pD - pAP);
122
+            s += (pMat1 - pMat2) * ratio;
123
+            s_mu += ratio * (pD - pAP);
132 124
         }
133
-
134
-        float s = partialS.scalar(), s_mu = partialS_mu.scalar();
135
-        for (unsigned j = i.value(); j < size; ++j)
136
-        {
137
-            float ratio = (mat1[j] - mat2[j]) / (S[j] * S[j]);
138
-            s += (mat1[j] - mat2[j]) * ratio;
139
-            s_mu += ratio * (D[j] - AP[j]);
140
-        }
141
-        return AlphaParameters(s, s_mu);
125
+        return AlphaParameters(s.scalar(), s_mu.scalar());
142 126
     }
143 127
     return alphaParameters(r1, c1) + alphaParameters(r2, c2);
144 128
 }
... ...
@@ -155,28 +139,19 @@ unsigned col, float ch)
155 139
 
156 140
     gaps::simd::PackedFloat pCh(ch);
157 141
     gaps::simd::PackedFloat pMat, pD, pAP, pS;
158
-    gaps::simd::PackedFloat partialS(0.f), partialS_mu(0.f);
142
+    gaps::simd::PackedFloat s(0.f), s_mu(0.f);
159 143
     gaps::simd::Index i(0);
160
-    for (; i <= size - i.increment(); ++i)
144
+    for (; i < size; ++i)
161 145
     {   
162 146
         pMat.load(mat + i);
163 147
         pD.load(D + i);
164 148
         pAP.load(AP + i);
165 149
         pS.load(S + i);
166 150
         gaps::simd::PackedFloat ratio(pMat / (pS * pS));
167
-        partialS += pMat * ratio;
168
-        partialS_mu += ratio * (pD - (pAP + pCh * pMat));
169
-    }
170
-
171
-    float s = partialS.scalar(), s_mu = partialS_mu.scalar();
172
-    for (unsigned j = i.value(); j < size; ++j)
173
-    {
174
-        float ratio = mat[j] / (S[j] * S[j]);
175
-        s += mat[j] * ratio;
176
-        s_mu += ratio * (D[j] - (AP[j] + ch * mat[j]));
151
+        s += pMat * ratio;
152
+        s_mu += ratio * (pD - (pAP + pCh * pMat));
177 153
     }
178
-    return AlphaParameters(s, s_mu);
179
-
154
+    return AlphaParameters(s.scalar(), s_mu.scalar());
180 155
 }
181 156
 
182 157
 // PERFORMANCE_CRITICAL
... ...
@@ -187,20 +162,15 @@ void DenseGibbsSampler::updateAPMatrix(unsigned row, unsigned col, float delta)
187 162
     unsigned size = mAPMatrix.nRow();
188 163
 
189 164
     gaps::simd::PackedFloat pOther, pAP;
190
-    gaps::simd::Index i(0);
191 165
     gaps::simd::PackedFloat pDelta(delta);
192
-    for (; i <= size - i.increment(); ++i)
166
+    gaps::simd::Index i(0);
167
+    for (; i < size; ++i)
193 168
     {
194 169
         pOther.load(other + i);
195 170
         pAP.load(ap + i);
196 171
         pAP += pDelta * pOther;
197 172
         pAP.store(ap + i);
198 173
     }
199
-
200
-    for (unsigned j = i.value(); j < size; ++j)
201
-    {
202
-        ap[j] += delta * other[j];
203
-    }
204 174
 }
205 175
 
206 176
 Archive& operator<<(Archive &ar, DenseGibbsSampler &s)
... ...
@@ -28,7 +28,9 @@ public:
28 28
     friend Archive& operator<<(Archive &ar, DenseGibbsSampler &s);
29 29
     friend Archive& operator>>(Archive &ar, DenseGibbsSampler &s);
30 30
 
31
+#ifndef GAPS_INTERNAL_TESTS
31 32
 private:
33
+#endif
32 34
 
33 35
     Matrix mSMatrix; // uncertainty values for each data point
34 36
     Matrix mAPMatrix; // cached product of A and P
... ...
@@ -51,7 +51,9 @@ public:
51 51
     bool internallyConsistent() const;
52 52
 #endif
53 53
 
54
+#ifndef GAPS_INTERNAL_TESTS
54 55
 protected:
56
+#endif
55 57
 
56 58
     DataMatrix mDMatrix; // samples by genes for A, genes by samples for P
57 59
     FactorMatrix mMatrix; // genes by patterns for A, samples by patterns for P
... ...
@@ -257,6 +259,8 @@ void GibbsSampler<Derived, DataMatrix, FactorMatrix>::death(const AtomicProposal
257 259
 template <class Derived, class DataMatrix, class FactorMatrix>
258 260
 void GibbsSampler<Derived, DataMatrix, FactorMatrix>::move(const AtomicProposal &prop)
259 261
 {
262
+    GAPS_ASSERT(prop.r1 != prop.r2 || prop.c1 != prop.c2);
263
+
260 264
     AlphaParameters alpha = impl()->alphaParameters(prop.r1, prop.c1, prop.r2, prop.c2);
261 265
     if (std::log(prop.rng.uniform()) < getDeltaLL(alpha, -prop.atom1->mass) * mAnnealingTemp)
262 266
     {
... ...
@@ -271,6 +275,8 @@ void GibbsSampler<Derived, DataMatrix, FactorMatrix>::move(const AtomicProposal
271 275
 template <class Derived, class DataMatrix, class FactorMatrix>
272 276
 void GibbsSampler<Derived, DataMatrix, FactorMatrix>::exchange(const AtomicProposal &prop)
273 277
 {
278
+    GAPS_ASSERT(prop.r1 != prop.r2 || prop.c1 != prop.c2);
279
+
274 280
     // attempt gibbs distribution exchange
275 281
     AlphaParameters alpha = impl()->alphaParameters(prop.r1, prop.c1, prop.r2, prop.c2);
276 282
     if (canUseGibbs(prop.c1, prop.c2))
... ...
@@ -12,13 +12,14 @@ float SparseGibbsSampler::chiSq() const
12 12
     for (unsigned j = 0; j < mDMatrix.nCol(); ++j)
13 13
     {
14 14
         Vector D(mDMatrix.getCol(j).getDense());
15
+        Vector S(gaps::pmax(D, 0.1f));
15 16
         for (unsigned i = 0; i < D.size(); ++i)
16 17
         {
17 18
             float ap = gaps::dot(mMatrix.getRow(j), mOtherMatrix->getRow(i));
18
-            chisq += GAPS_SQ(D[i] - ap) / GAPS_SQ(D[i]);
19
+            chisq += GAPS_SQ(D[i] - ap) / GAPS_SQ(S[i]);
19 20
         }
20 21
     }
21
-    return mBeta * chisq;
22
+    return chisq;
22 23
 }
23 24
 
24 25
 void SparseGibbsSampler::sync(const SparseGibbsSampler &sampler, unsigned nThreads)
... ...
@@ -44,78 +45,76 @@ unsigned col, float delta)
44 45
     GAPS_ASSERT(mMatrix(row, col) >= 0.f);
45 46
 }
46 47
 
47
-AlphaParameters SparseGibbsSampler::alphaParameters(unsigned row,
48
-unsigned col)
48
+AlphaParameters SparseGibbsSampler::alphaParameters(unsigned row, unsigned col)
49 49
 {
50
-    float s = -1.f * Z1(col) * mBeta;
50
+    float s = Z1(col);
51 51
     float s_mu = 0.f;
52 52
     for (unsigned i = 0; i < mNumPatterns; ++i)
53 53
     {
54 54
         s_mu += mMatrix(row,i) * Z2(col,i);
55 55
     }
56
+    s_mu *= -1.f;
56 57
 
57 58
     SparseIteratorTwo it(mDMatrix.getCol(row), mOtherMatrix->getCol(col));
58 59
     while (!it.atEnd())
59 60
     {
60
-        float term1 = it.getValue_1() / it.getValue_2();
61
-        float term2 = term1 * term1 + it.getValue_1() * it.getValue_1() * mBeta;
62
-        float term3 = mBeta * (it.getValue_1() - term1 / it.getValue_2());
63
-        s += mBeta * term2;
64
-        s_mu += mBeta * term1 + term3 * gaps::dot(mMatrix.getRow(row),
61
+        float term1 = it.getValue_2() / it.getValue_1();
62
+        float term2 = it.getValue_2() - term1 / it.getValue_1();
63
+        s += term1 * term1 - it.getValue_2() * it.getValue_2();
64
+        s_mu += term1 + term2 * gaps::dot(mMatrix.getRow(row),
65 65
             mOtherMatrix->getRow(it.getIndex()));
66 66
         it.next();
67 67
     }
68
-    return AlphaParameters(s, s_mu);
68
+    return AlphaParameters(s, s_mu) * mBeta;
69 69
 }
70 70
 
71
-AlphaParameters SparseGibbsSampler::alphaParameters(unsigned r1,
72
-unsigned c1, unsigned r2, unsigned c2)
71
+AlphaParameters SparseGibbsSampler::alphaParameters(unsigned r1, unsigned c1,
72
+unsigned r2, unsigned c2)
73 73
 {
74 74
     if (r1 == r2)
75 75
     {
76 76
         AlphaParameters a1 = alphaParameters(r1, c1);
77 77
         AlphaParameters a2 = alphaParameters(r2, c2);
78
-
79 78
         float s = -2.f * mBeta * Z2(c1,c2) + a1.s + a2.s;
80
-        float s_mu = a1.s_mu - a2.s_mu;
81 79
 
82 80
         SparseIteratorThree it(mDMatrix.getCol(r1), mOtherMatrix->getCol(c1),
83 81
             mOtherMatrix->getCol(c2));
84 82
         while (!it.atEnd())
85 83
         {
86
-            float term1 = 2.f * it.getValue_1() * it.getValue_2();
87
-            s_mu += mBeta * term1 * (1.f - 1.f / it.getValue_3());
84
+            float term1 = 2.f * it.getValue_2() * it.getValue_3();
85
+            s += mBeta * (term1 - term1 / GAPS_SQ(it.getValue_1()));
88 86
             it.next();
89 87
         }
90
-        return AlphaParameters(s, s_mu);
88
+        GAPS_ASSERT(s >= 0.f);
89
+        return AlphaParameters(s, a1.s_mu - a2.s_mu);
91 90
     }
92 91
     return alphaParameters(r1, c1) + alphaParameters(r2, c2);
93 92
 }
94 93
 
95
-AlphaParameters SparseGibbsSampler::alphaParametersWithChange(
96
-unsigned row, unsigned col, float ch)
94
+AlphaParameters SparseGibbsSampler::alphaParametersWithChange(unsigned row,
95
+unsigned col, float ch)
97 96
 {
98
-    float s = -1.f * Z1(col) * mBeta;
97
+    float s = Z1(col);
99 98
     float s_mu = 0.f;
100 99
     for (unsigned i = 0; i < mNumPatterns; ++i)
101 100
     {
102
-        s_mu += mMatrix(row,i) * Z2(col,i);
101
+       s_mu += mMatrix(row,i) * Z2(col,i);
103 102
     }
104
-    s_mu += Z2(col,col) * ch;
103
+    s_mu += ch * Z2(col,col);
104
+    s_mu *= -1.f;
105 105
 
106 106
     SparseIteratorTwo it(mDMatrix.getCol(row), mOtherMatrix->getCol(col));
107 107
     while (!it.atEnd())
108 108
     {
109
-        float term1 = it.getValue_1() / it.getValue_2();
110
-        float term2 = term1 * term1 + it.getValue_1() * it.getValue_1() * mBeta;
111
-        float term3 = mBeta * (it.getValue_1() - term1 / it.getValue_2());
112
-        s += mBeta * term2;
113
-        s_mu += mBeta * term1 + term3 * gaps::dot(mMatrix.getRow(row),
114
-            mOtherMatrix->getRow(it.getIndex()));
115
-        s_mu += term3 * mOtherMatrix->operator()(it.getIndex(), col) * ch;
116
-        it.next();
109
+       float term1 = it.getValue_2() / it.getValue_1();
110
+       float term2 = it.getValue_2() - term1 / it.getValue_1();
111
+       s += term1 * term1 - it.getValue_2() * it.getValue_2();
112
+       s_mu += term1 + term2 * gaps::dot(mMatrix.getRow(row),
113
+           mOtherMatrix->getRow(it.getIndex()));
114
+       s_mu += term2 * mOtherMatrix->operator()(it.getIndex(), col) * ch;
115
+       it.next();
117 116
     }
118
-    return AlphaParameters(s, s_mu);
117
+    return AlphaParameters(s, s_mu) * mBeta;
119 118
 }
120 119
 
121 120
 float& SparseGibbsSampler::Z1(unsigned pattern)
... ...
@@ -125,9 +124,13 @@ float& SparseGibbsSampler::Z1(unsigned pattern)
125 124
 
126 125
 float& SparseGibbsSampler::Z2(unsigned pattern1, unsigned pattern2)
127 126
 {
128
-    unsigned dist = mNumPatterns - pattern1;
129
-    unsigned offset = mNumPatterns * mNumPatterns + mNumPatterns
130
-        - dist * dist + dist;
127
+    if (pattern1 > pattern2)
128
+    {
129
+        unsigned temp = pattern2;
130
+        pattern2 = pattern1;
131
+        pattern1 = temp;
132
+    }
133
+    unsigned offset = pattern1 * (2 * mNumPatterns - pattern1 - 1);
131 134
     offset /= 2;
132 135
     return mZ2[offset + pattern2];
133 136
 }
... ...
@@ -136,7 +139,11 @@ void SparseGibbsSampler::generateLookupTables()
136 139
 {
137 140
     for (unsigned i = 0; i < mNumPatterns; ++i)
138 141
     {
139
-        Z1(i) = gaps::sum(mOtherMatrix->getCol(i));
142
+        Z1(i) = 0.f;
143
+        for (unsigned k = 0; k < mOtherMatrix->nRow(); ++k)
144
+        {
145
+            Z1(i) += GAPS_SQ(mOtherMatrix->operator()(k,i));
146
+        }
140 147
         for (unsigned j = i; j < mNumPatterns; ++j)
141 148
         {
142 149
             Z2(i,j) = gaps::dot(mOtherMatrix->getCol(i),
... ...
@@ -5,6 +5,7 @@
5 5
 
6 6
 #include "../data_structures/HybridMatrix.h"
7 7
 #include "../data_structures/SparseMatrix.h"
8
+#include "../data_structures/SparseIterator.h"
8 9
 
9 10
 #include <vector>
10 11
 
... ...
@@ -27,7 +28,9 @@ public:
27 28
     friend Archive& operator<<(Archive &ar, SparseGibbsSampler &s);
28 29
     friend Archive& operator>>(Archive &ar, SparseGibbsSampler &s);
29 30
 
30
-private :
31
+#ifndef GAPS_INTERNAL_TESTS
32
+private:
33
+#endif
31 34
 
32 35
     std::vector<float> mZ1;
33 36
     std::vector<float> mZ2;
... ...
@@ -53,7 +56,25 @@ SparseGibbsSampler::SparseGibbsSampler(const DataType &data, bool transpose,
53 56
 bool subsetRows, float alpha, float maxGibbsMass, const GapsParameters &params)
54 57
     :
55 58
 GibbsSampler(data, transpose, subsetRows, alpha, maxGibbsMass, params),
59
+mZ1(params.nPatterns, 0.f),
60
+mZ2((params.nPatterns * (params.nPatterns + 1)) / 2),
56 61
 mBeta(100.f)
57
-{}
62
+{
63
+    // check data for values less than 1
64
+    for (unsigned j = 0; j < mDMatrix.nCol(); ++j)
65
+    {
66
+        SparseIterator it(mDMatrix.getCol(j));
67
+        while (!it.atEnd())
68
+        {
69
+            if (it.getValue() < 1.f)
70
+            {
71
+                gaps_printf("\nError: Non-zero values less than 1 detected\n");
72
+                gaps_printf("\n       Not allowed when useSparseOptimization is enabled\n");
73
+                gaps_stop();
74
+            }
75
+            it.next();
76
+        }
77
+    }
78
+}
58 79
 
59 80
 #endif // __COGAPS_SPARSE_GIIBS_SAMPLER_H__
60 81
\ No newline at end of file
... ...
@@ -16,6 +16,19 @@ float gaps::sum(const Matrix &mat)
16 16
     return sum;
17 17
 }
18 18
 
19
+float gaps::sum(const HybridMatrix &mat)
20
+{
21
+    float sum = 0.f;
22
+    for (unsigned j = 0; j < mat.nCol(); ++j)
23
+    {
24
+        for (unsigned i = 0; i < mat.nRow(); ++i)
25
+        {
26
+            sum += mat(i,j);
27
+        }   
28
+    }
29
+    return sum;
30
+}
31
+
19 32
 float gaps::sum(const SparseMatrix &mat)
20 33
 {
21 34
     float sum = 0.f;
... ...
@@ -2,11 +2,13 @@
2 2
 #define __COGAPS_MATRIX_MATH_H__
3 3
 
4 4
 #include "../data_structures/Matrix.h"
5
+#include "../data_structures/HybridMatrix.h"
5 6
 #include "../data_structures/SparseMatrix.h"
6 7
 
7 8
 namespace gaps
8 9
 {
9 10
     float sum(const Matrix &mat);
11
+    float sum(const HybridMatrix &mat);
10 12
     float sum(const SparseMatrix &mat);
11 13
 
12 14
     float mean(const Matrix &mat);
... ...
@@ -6,7 +6,6 @@
6 6
     #define __GAPS_AVX__
7 7
     #include <immintrin.h>
8 8
     typedef __m256 gaps_packed_t;
9
-    const unsigned index_increment = 8;
10 9
     #define SET_SCALAR(x) _mm256_set1_ps(x)
11 10
     #define LOAD_PACKED(x) _mm256_load_ps(x)
12 11
     #define STORE_PACKED(p,x) _mm256_store_ps(p,x)
... ...
@@ -20,7 +19,6 @@
20 19
     #define __GAPS_SSE__
21 20
     #include <nmmintrin.h>
22 21
     typedef __m128 gaps_packed_t;
23
-    const unsigned index_increment = 4;
24 22
     #define SET_SCALAR(x) _mm_set1_ps(x)
25 23
     #define LOAD_PACKED(x) _mm_load_ps(x)
26 24
     #define STORE_PACKED(p,x) _mm_store_ps(p,x)
... ...
@@ -32,7 +30,6 @@
32 30
 #else
33 31
 
34 32
     typedef float gaps_packed_t;
35
-    const unsigned index_increment = 1;
36 33
     #define SET_SCALAR(x) x
37 34
     #define LOAD_PACKED(x) *(x)
38 35
     #define STORE_PACKED(p,x) *(p) = (x)
... ...
@@ -56,9 +53,19 @@ public:
56 53
     Index& operator=(unsigned val) { index = val; return *this; }
57 54
     bool operator<(unsigned comp) const { return index < comp; }
58 55
     bool operator<=(unsigned comp) const { return index <= comp; }
59
-    void operator++() { index += index_increment; }
56
+    void operator++() { index += gaps::simd::Index::increment(); }
60 57
     unsigned value() const { return index; }
61
-    unsigned increment() const { return index_increment; }
58
+    
59
+    static unsigned increment()
60
+    {
61
+    #if defined( __GAPS_AVX__ )
62
+        return 8;
63
+    #elif defined( __GAPS_SSE__ )
64
+        return 4;
65
+    #else
66
+        return 1;
67
+    #endif
68
+    }
62 69
 
63 70
     friend const float* operator+(const float *ptr, Index ndx);
64 71
     friend float* operator+(float *ptr, Index ndx);
... ...
@@ -1,23 +1,18 @@
1
+#include "Math.h"
1 2
 #include "VectorMath.h"
2 3
 #include "SIMD.h"
3 4
 
4 5
 static float dot_helper(const float *v1, const float *v2, unsigned size)
5 6
 {
6
-    gaps::simd::PackedFloat p1, p2, partialSum(0.f);
7
+    gaps::simd::PackedFloat pp1, pp2, sum(0.f);
7 8
     gaps::simd::Index i(0);
8
-    for (; i <= size - i.increment(); ++i)
9
+    for (; i < size; ++i)
9 10
     {
10
-        p1.load(v1 + i);
11
-        p2.load(v2 + i);
12
-        partialSum += p1 * p2;
11
+        pp1.load(v1 + i);
12
+        pp2.load(v2 + i);
13
+        sum += pp1 * pp2;
13 14
     }
14
-
15
-    float sum = partialSum.scalar();
16
-    for (unsigned j = i.value(); j < size; ++j)
17
-    {
18
-        sum += v1[j] * v2[j];
19
-    }
20
-    return sum;
15
+    return sum.scalar();
21 16
 }
22 17
 
23 18
 float gaps::min(const Vector &v)
... ...
@@ -99,7 +94,7 @@ float gaps::dot(const Vector &v1, const Vector &v2)
99 94
 float gaps::dot(const HybridVector &v1, const HybridVector &v2)
100 95
 {
101 96
     GAPS_ASSERT(v1.size() == v2.size());
102
-    
97
+
103 98
     return dot_helper(v1.densePtr(), v2.densePtr(), v1.size());
104 99
 }
105 100
 
... ...
@@ -168,4 +163,13 @@ Vector operator/(const HybridVector &hv, float f)
168 163
         v[i] = hv[i] / f;
169 164
     }
170 165
     return v;
166
+}
167
+
168
+Vector gaps::pmax(Vector v, float p)
169
+{
170
+    for (unsigned i = 0; i < v.size(); ++i)
171
+    {
172
+        v[i] = gaps::max(v[i] * p, p);
173
+    }
174
+    return v;
171 175
 }
172 176
\ No newline at end of file
... ...
@@ -22,6 +22,7 @@ namespace gaps
22 22
     float sum(const HybridVector &v);
23 23
 
24 24
     Vector elementSq(Vector v);
25
+    Vector pmax(Vector v, float p);
25 26
 }
26 27
 
27 28
 Vector operator*(Vector v, float f);