... | ... |
@@ -1,6 +1,8 @@ |
1 | 1 |
#include "HybridVector.h" |
2 | 2 |
#include "../math/Math.h" |
3 | 3 |
#include "../math/SIMD.h" |
4 |
+#include "../utils/Archive.h" |
|
5 |
+#include "../utils/GapsAssert.h" |
|
4 | 6 |
|
5 | 7 |
#define SIMD_PAD(x) (gaps::simd::Index::increment() + \ |
6 | 8 |
gaps::simd::Index::increment() * ((x) / gaps::simd::Index::increment())) |
... | ... |
@@ -49,6 +51,7 @@ unsigned HybridVector::size() const |
49 | 51 |
return mSize; |
50 | 52 |
} |
51 | 53 |
|
54 |
+// can be called from multiple concurrent OpenMP threads |
|
52 | 55 |
bool HybridVector::add(unsigned i, float v) |
53 | 56 |
{ |
54 | 57 |
GAPS_ASSERT(i < mSize); |
... | ... |
@@ -65,6 +68,7 @@ bool HybridVector::add(unsigned i, float v) |
65 | 68 |
return false; |
66 | 69 |
} |
67 | 70 |
|
71 |
+// can be called from multiple concurrent OpenMP threads |
|
68 | 72 |
bool HybridVector::set(unsigned i, float v) |
69 | 73 |
{ |
70 | 74 |
GAPS_ASSERT(i < mSize); |
... | ... |
@@ -84,9 +84,9 @@ bool HybridVector::set(unsigned i, float v) |
84 | 84 |
float HybridVector::operator[](unsigned i) const |
85 | 85 |
{ |
86 | 86 |
GAPS_ASSERT(i < mSize); |
87 |
- GAPS_ASSERT(mData[i] == 0.f |
|
88 |
- ? !mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
|
89 |
- : mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
|
87 |
+ GAPS_ASSERT((mData[i] == 0.f) |
|
88 |
+ ? !(mIndexBitFlags[i / 64] & (1ull << (i % 64))) |
|
89 |
+ : (mIndexBitFlags[i / 64] & (1ull << (i % 64))) |
|
90 | 90 |
); |
91 | 91 |
return mData[i]; |
92 | 92 |
} |
... | ... |
@@ -87,7 +87,7 @@ float HybridVector::operator[](unsigned i) const |
87 | 87 |
GAPS_ASSERT(mData[i] == 0.f |
88 | 88 |
? !mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
89 | 89 |
: mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
90 |
- ) |
|
90 |
+ ); |
|
91 | 91 |
return mData[i]; |
92 | 92 |
} |
93 | 93 |
|
... | ... |
@@ -103,7 +103,6 @@ Archive& operator<<(Archive &ar, const HybridVector &vec) |
103 | 103 |
{ |
104 | 104 |
ar << vec.mIndexBitFlags[i]; |
105 | 105 |
} |
106 |
- |
|
107 | 106 |
for (unsigned i = 0; i < vec.mSize; ++i) |
108 | 107 |
{ |
109 | 108 |
ar << vec.mData[i]; |
... | ... |
@@ -116,12 +115,10 @@ Archive& operator>>(Archive &ar, HybridVector &vec) |
116 | 115 |
unsigned sz = 0; |
117 | 116 |
ar >> sz; |
118 | 117 |
GAPS_ASSERT(sz == vec.size()); |
119 |
- |
|
120 | 118 |
for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
121 | 119 |
{ |
122 | 120 |
ar >> vec.mIndexBitFlags[i]; |
123 | 121 |
} |
124 |
- |
|
125 | 122 |
for (unsigned i = 0; i < vec.mSize; ++i) |
126 | 123 |
{ |
127 | 124 |
ar >> vec.mData[i]; |
... | ... |
@@ -84,6 +84,10 @@ bool HybridVector::set(unsigned i, float v) |
84 | 84 |
float HybridVector::operator[](unsigned i) const |
85 | 85 |
{ |
86 | 86 |
GAPS_ASSERT(i < mSize); |
87 |
+ GAPS_ASSERT(mData[i] == 0.f |
|
88 |
+ ? !mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
|
89 |
+ : mIndexBitFlags[i / 64] & (1ull << (i % 64)) |
|
90 |
+ ) |
|
87 | 91 |
return mData[i]; |
88 | 92 |
} |
89 | 93 |
|
... | ... |
@@ -54,10 +54,12 @@ bool HybridVector::add(unsigned i, float v) |
54 | 54 |
GAPS_ASSERT(i < mSize); |
55 | 55 |
if (mData[i] + v < gaps::epsilon) |
56 | 56 |
{ |
57 |
+ #pragma omp atomic |
|
57 | 58 |
mIndexBitFlags[i / 64] &= ~(1ull << (i % 64)); |
58 | 59 |
mData[i] = 0.f; |
59 | 60 |
return true; |
60 | 61 |
} |
62 |
+ #pragma omp atomic |
|
61 | 63 |
mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
62 | 64 |
mData[i] += v; |
63 | 65 |
return false; |
... | ... |
@@ -68,10 +70,12 @@ bool HybridVector::set(unsigned i, float v) |
68 | 70 |
GAPS_ASSERT(i < mSize); |
69 | 71 |
if (v < gaps::epsilon) |
70 | 72 |
{ |
73 |
+ #pragma omp atomic |
|
71 | 74 |
mIndexBitFlags[i / 64] &= ~(1ull << (i % 64)); |
72 | 75 |
mData[i] = 0.f; |
73 | 76 |
return true; |
74 | 77 |
} |
78 |
+ #pragma omp atomic |
|
75 | 79 |
mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
76 | 80 |
mData[i] = v; |
77 | 81 |
return false; |
... | ... |
@@ -51,6 +51,7 @@ unsigned HybridVector::size() const |
51 | 51 |
|
52 | 52 |
bool HybridVector::add(unsigned i, float v) |
53 | 53 |
{ |
54 |
+ GAPS_ASSERT(i < mSize); |
|
54 | 55 |
if (mData[i] + v < gaps::epsilon) |
55 | 56 |
{ |
56 | 57 |
mIndexBitFlags[i / 64] &= ~(1ull << (i % 64)); |
... | ... |
@@ -62,6 +63,20 @@ bool HybridVector::add(unsigned i, float v) |
62 | 63 |
return false; |
63 | 64 |
} |
64 | 65 |
|
66 |
+bool HybridVector::set(unsigned i, float v) |
|
67 |
+{ |
|
68 |
+ GAPS_ASSERT(i < mSize); |
|
69 |
+ if (v < gaps::epsilon) |
|
70 |
+ { |
|
71 |
+ mIndexBitFlags[i / 64] &= ~(1ull << (i % 64)); |
|
72 |
+ mData[i] = 0.f; |
|
73 |
+ return true; |
|
74 |
+ } |
|
75 |
+ mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
|
76 |
+ mData[i] = v; |
|
77 |
+ return false; |
|
78 |
+} |
|
79 |
+ |
|
65 | 80 |
float HybridVector::operator[](unsigned i) const |
66 | 81 |
{ |
67 | 82 |
GAPS_ASSERT(i < mSize); |
... | ... |
@@ -57,12 +57,9 @@ bool HybridVector::add(unsigned i, float v) |
57 | 57 |
mData[i] = 0.f; |
58 | 58 |
return true; |
59 | 59 |
} |
60 |
- else |
|
61 |
- { |
|
62 |
- mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
|
63 |
- mData[i] += v; |
|
64 |
- return false; |
|
65 |
- } |
|
60 |
+ mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
|
61 |
+ mData[i] += v; |
|
62 |
+ return false; |
|
66 | 63 |
} |
67 | 64 |
|
68 | 65 |
float HybridVector::operator[](unsigned i) const |
... | ... |
@@ -2,20 +2,27 @@ |
2 | 2 |
#include "../math/Math.h" |
3 | 3 |
#include "../math/SIMD.h" |
4 | 4 |
|
5 |
+#define SIMD_PAD(x) (gaps::simd::Index::increment() + \ |
|
6 |
+ gaps::simd::Index::increment() * ((x) / gaps::simd::Index::increment())) |
|
7 |
+ |
|
5 | 8 |
HybridVector::HybridVector(unsigned sz) |
6 | 9 |
: |
7 | 10 |
mIndexBitFlags(sz / 64 + 1, 0), |
8 |
-mData(sz, 0.f), |
|
11 |
+mData(SIMD_PAD(sz), 0.f), |
|
9 | 12 |
mSize(sz) |
10 |
-{} |
|
13 |
+{ |
|
14 |
+ GAPS_ASSERT(mData.size() % gaps::simd::Index::increment() == 0); |
|
15 |
+} |
|
11 | 16 |
|
12 | 17 |
HybridVector::HybridVector(const std::vector<float> &v) |
13 | 18 |
: |
14 | 19 |
mIndexBitFlags(v.size() / 64 + 1, 0), |
15 |
-mData(v.size(), 0.f), |
|
20 |
+mData(SIMD_PAD(v.size()), 0.f), |
|
16 | 21 |
mSize(v.size()) |
17 | 22 |
{ |
18 |
- for (unsigned i = 0; i < v.size(); ++i) |
|
23 |
+ GAPS_ASSERT(mData.size() % gaps::simd::Index::increment() == 0); |
|
24 |
+ |
|
25 |
+ for (unsigned i = 0; i < mSize; ++i) |
|
19 | 26 |
{ |
20 | 27 |
mData[i] = v[i]; |
21 | 28 |
if (v[i] > 0.f) |
... | ... |
@@ -60,6 +67,7 @@ bool HybridVector::add(unsigned i, float v) |
60 | 67 |
|
61 | 68 |
float HybridVector::operator[](unsigned i) const |
62 | 69 |
{ |
70 |
+ GAPS_ASSERT(i < mSize); |
|
63 | 71 |
return mData[i]; |
64 | 72 |
} |
65 | 73 |
|
... | ... |
@@ -68,7 +68,7 @@ const float* HybridVector::densePtr() const |
68 | 68 |
return &(mData[0]); |
69 | 69 |
} |
70 | 70 |
|
71 |
-Archive& operator<<(Archive &ar, HybridVector &vec) |
|
71 |
+Archive& operator<<(Archive &ar, const HybridVector &vec) |
|
72 | 72 |
{ |
73 | 73 |
ar << vec.mSize; |
74 | 74 |
for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
... | ... |
@@ -2,19 +2,17 @@ |
2 | 2 |
#include "../math/Math.h" |
3 | 3 |
#include "../math/SIMD.h" |
4 | 4 |
|
5 |
-#define PAD_SIZE_FOR_SIMD(x) (gaps::simd::Index::increment() * (1 + ((x) - 1) / gaps::simd::Index::increment())) |
|
6 |
- |
|
7 |
-HybridVector::HybridVector(unsigned size) |
|
5 |
+HybridVector::HybridVector(unsigned sz) |
|
8 | 6 |
: |
9 |
-mIndexBitFlags(size / 64 + 1, 0), |
|
10 |
-mData(PAD_SIZE_FOR_SIMD(size), 0.f), |
|
11 |
-mSize(size) |
|
7 |
+mIndexBitFlags(sz / 64 + 1, 0), |
|
8 |
+mData(sz, 0.f), |
|
9 |
+mSize(sz) |
|
12 | 10 |
{} |
13 | 11 |
|
14 | 12 |
HybridVector::HybridVector(const std::vector<float> &v) |
15 | 13 |
: |
16 | 14 |
mIndexBitFlags(v.size() / 64 + 1, 0), |
17 |
-mData(PAD_SIZE_FOR_SIMD(v.size()), 0.f), |
|
15 |
+mData(v.size(), 0.f), |
|
18 | 16 |
mSize(v.size()) |
19 | 17 |
{ |
20 | 18 |
for (unsigned i = 0; i < v.size(); ++i) |
... | ... |
@@ -1,14 +1,21 @@ |
1 | 1 |
#include "HybridVector.h" |
2 | 2 |
#include "../math/Math.h" |
3 |
+#include "../math/SIMD.h" |
|
4 |
+ |
|
5 |
+#define PAD_SIZE_FOR_SIMD(x) (gaps::simd::Index::increment() * (1 + ((x) - 1) / gaps::simd::Index::increment())) |
|
3 | 6 |
|
4 | 7 |
HybridVector::HybridVector(unsigned size) |
5 |
- : mIndexBitFlags(size / 64 + 1, 0), mData(size, 0.f) |
|
8 |
+ : |
|
9 |
+mIndexBitFlags(size / 64 + 1, 0), |
|
10 |
+mData(PAD_SIZE_FOR_SIMD(size), 0.f), |
|
11 |
+mSize(size) |
|
6 | 12 |
{} |
7 | 13 |
|
8 | 14 |
HybridVector::HybridVector(const std::vector<float> &v) |
9 | 15 |
: |
10 | 16 |
mIndexBitFlags(v.size() / 64 + 1, 0), |
11 |
-mData(v.size(), 0.f) |
|
17 |
+mData(PAD_SIZE_FOR_SIMD(v.size()), 0.f), |
|
18 |
+mSize(v.size()) |
|
12 | 19 |
{ |
13 | 20 |
for (unsigned i = 0; i < v.size(); ++i) |
14 | 21 |
{ |
... | ... |
@@ -34,7 +41,7 @@ bool HybridVector::empty() const |
34 | 41 |
|
35 | 42 |
unsigned HybridVector::size() const |
36 | 43 |
{ |
37 |
- return mData.size(); |
|
44 |
+ return mSize; |
|
38 | 45 |
} |
39 | 46 |
|
40 | 47 |
bool HybridVector::add(unsigned i, float v) |
... | ... |
@@ -65,13 +72,13 @@ const float* HybridVector::densePtr() const |
65 | 72 |
|
66 | 73 |
Archive& operator<<(Archive &ar, HybridVector &vec) |
67 | 74 |
{ |
68 |
- ar << vec.mData.size(); |
|
75 |
+ ar << vec.mSize; |
|
69 | 76 |
for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
70 | 77 |
{ |
71 | 78 |
ar << vec.mIndexBitFlags[i]; |
72 | 79 |
} |
73 | 80 |
|
74 |
- for (unsigned i = 0; i < vec.mData.size(); ++i) |
|
81 |
+ for (unsigned i = 0; i < vec.mSize; ++i) |
|
75 | 82 |
{ |
76 | 83 |
ar << vec.mData[i]; |
77 | 84 |
} |
... | ... |
@@ -82,14 +89,14 @@ Archive& operator>>(Archive &ar, HybridVector &vec) |
82 | 89 |
{ |
83 | 90 |
unsigned sz = 0; |
84 | 91 |
ar >> sz; |
85 |
- GAPS_ASSERT(sz == vec.mData.size()); |
|
92 |
+ GAPS_ASSERT(sz == vec.size()); |
|
86 | 93 |
|
87 | 94 |
for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
88 | 95 |
{ |
89 | 96 |
ar >> vec.mIndexBitFlags[i]; |
90 | 97 |
} |
91 | 98 |
|
92 |
- for (unsigned i = 0; i < vec.mData.size(); ++i) |
|
99 |
+ for (unsigned i = 0; i < vec.mSize; ++i) |
|
93 | 100 |
{ |
94 | 101 |
ar >> vec.mData[i]; |
95 | 102 |
} |
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,99 @@ |
1 |
+#include "HybridVector.h" |
|
2 |
+#include "../math/Math.h" |
|
3 |
+ |
|
4 |
+HybridVector::HybridVector(unsigned size) |
|
5 |
+ : mIndexBitFlags(size / 64 + 1, 0), mData(size, 0.f) |
|
6 |
+{} |
|
7 |
+ |
|
8 |
+HybridVector::HybridVector(const std::vector<float> &v) |
|
9 |
+ : |
|
10 |
+mIndexBitFlags(v.size() / 64 + 1, 0), |
|
11 |
+mData(v.size(), 0.f) |
|
12 |
+{ |
|
13 |
+ for (unsigned i = 0; i < v.size(); ++i) |
|
14 |
+ { |
|
15 |
+ mData[i] = v[i]; |
|
16 |
+ if (v[i] > 0.f) |
|
17 |
+ { |
|
18 |
+ mIndexBitFlags[i / 64] ^= (1ull << (i % 64)); |
|
19 |
+ } |
|
20 |
+ } |
|
21 |
+} |
|
22 |
+ |
|
23 |
+bool HybridVector::empty() const |
|
24 |
+{ |
|
25 |
+ for (unsigned i = 0; i < mIndexBitFlags.size(); ++i) |
|
26 |
+ { |
|
27 |
+ if (mIndexBitFlags[i] != 0) |
|
28 |
+ { |
|
29 |
+ return false; |
|
30 |
+ } |
|
31 |
+ } |
|
32 |
+ return true; |
|
33 |
+} |
|
34 |
+ |
|
35 |
+unsigned HybridVector::size() const |
|
36 |
+{ |
|
37 |
+ return mData.size(); |
|
38 |
+} |
|
39 |
+ |
|
40 |
+bool HybridVector::add(unsigned i, float v) |
|
41 |
+{ |
|
42 |
+ if (mData[i] + v < gaps::epsilon) |
|
43 |
+ { |
|
44 |
+ mIndexBitFlags[i / 64] ^= (1ull << (i % 64)); |
|
45 |
+ mData[i] = 0.f; |
|
46 |
+ return true; |
|
47 |
+ } |
|
48 |
+ else |
|
49 |
+ { |
|
50 |
+ mIndexBitFlags[i / 64] |= (1ull << (i % 64)); |
|
51 |
+ mData[i] += v; |
|
52 |
+ return false; |
|
53 |
+ } |
|
54 |
+} |
|
55 |
+ |
|
56 |
+float HybridVector::operator[](unsigned i) const |
|
57 |
+{ |
|
58 |
+ return mData[i]; |
|
59 |
+} |
|
60 |
+ |
|
61 |
+const float* HybridVector::densePtr() const |
|
62 |
+{ |
|
63 |
+ return &(mData[0]); |
|
64 |
+} |
|
65 |
+ |
|
66 |
+Archive& operator<<(Archive &ar, HybridVector &vec) |
|
67 |
+{ |
|
68 |
+ ar << vec.mData.size(); |
|
69 |
+ for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
|
70 |
+ { |
|
71 |
+ ar << vec.mIndexBitFlags[i]; |
|
72 |
+ } |
|
73 |
+ |
|
74 |
+ for (unsigned i = 0; i < vec.mData.size(); ++i) |
|
75 |
+ { |
|
76 |
+ ar << vec.mData[i]; |
|
77 |
+ } |
|
78 |
+ return ar; |
|
79 |
+} |
|
80 |
+ |
|
81 |
+Archive& operator>>(Archive &ar, HybridVector &vec) |
|
82 |
+{ |
|
83 |
+ unsigned sz = 0; |
|
84 |
+ ar >> sz; |
|
85 |
+ GAPS_ASSERT(sz == vec.mData.size()); |
|
86 |
+ |
|
87 |
+ for (unsigned i = 0; i < vec.mIndexBitFlags.size(); ++i) |
|
88 |
+ { |
|
89 |
+ ar >> vec.mIndexBitFlags[i]; |
|
90 |
+ } |
|
91 |
+ |
|
92 |
+ for (unsigned i = 0; i < vec.mData.size(); ++i) |
|
93 |
+ { |
|
94 |
+ ar >> vec.mData[i]; |
|
95 |
+ } |
|
96 |
+ return ar; |
|
97 |
+} |
|
98 |
+ |
|
99 |
+ |