Browse code

Disable SIMD for MinGW

sherman5 authored on 20/10/2020 04:06:00
Showing1 changed files
... ...
@@ -1,7 +1,11 @@
1 1
 #ifndef __COGAPS_SIMD_H__
2 2
 #define __COGAPS_SIMD_H__
3 3
 
4
-#if (defined ( __AVX2__ ) || defined ( __AVX__ )) && (!defined(_WIN32) || !defined(WIN32))
4
+#if defined(_WIN32) || defined(WIN32) || defined(__MINGW32__)
5
+    #define COGAPS_SIMD_H_DISABLE_SIMD
6
+#endif
7
+
8
+#if (defined ( __AVX2__ ) || defined ( __AVX__ )) && !defined(COGAPS_SIMD_H_DISABLE_SIMD)
5 9
 
6 10
     #define SIMD_INC 8
7 11
     #define __GAPS_AVX__
... ...
@@ -15,7 +19,7 @@
15 19
     #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
16 20
     #define DIV_PACKED(a,b) _mm256_div_ps(a,b)
17 21
 
18
-#elif (defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )) && (!defined(_WIN32) || !defined(WIN32))
22
+#elif (defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )) && !defined(COGAPS_SIMD_H_DISABLE_SIMD)
19 23
 
20 24
     #define SIMD_INC 4
21 25
     #define __GAPS_SSE__
... ...
@@ -132,5 +136,8 @@ inline float getScalar(gaps_packed_t pf)
132 136
 } // namespace simd
133 137
 } // namespace gaps
134 138
 
135
-#endif // __COGAPS_SIMD_H__
139
+#ifdef COGAPS_SIMD_H_DISABLE_SIMD
140
+    #undef COGAPS_SIMD_H_DISABLE_SIMD
141
+#endif
136 142
 
143
+#endif // __COGAPS_SIMD_H__
Browse code

Disable hand-rolled SIMD on Windows

sherman5 authored on 20/10/2020 03:45:47
Showing1 changed files
... ...
@@ -1,7 +1,7 @@
1 1
 #ifndef __COGAPS_SIMD_H__
2 2
 #define __COGAPS_SIMD_H__
3 3
 
4
-#if defined ( __AVX2__ ) || defined ( __AVX__ )
4
+#if (defined ( __AVX2__ ) || defined ( __AVX__ )) && (!defined(_WIN32) || !defined(WIN32))
5 5
 
6 6
     #define SIMD_INC 8
7 7
     #define __GAPS_AVX__
... ...
@@ -15,7 +15,7 @@
15 15
     #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
16 16
     #define DIV_PACKED(a,b) _mm256_div_ps(a,b)
17 17
 
18
-#elif defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )
18
+#elif (defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )) && (!defined(_WIN32) || !defined(WIN32))
19 19
 
20 20
     #define SIMD_INC 4
21 21
     #define __GAPS_SSE__
Browse code

infrastructure in place for extending samplers and models

Tom Sherman authored on 22/05/2019 21:27:14
Showing1 changed files
... ...
@@ -97,21 +97,15 @@ public:
97 97
     float scalar()
98 98
     {
99 99
     #if defined( __GAPS_AVX__ )
100
-
101 100
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
102 101
         mData = _mm256_hadd_ps(mData, mData);
103 102
         mData = _mm256_hadd_ps(mData, mData);
104 103
         return ra[0] + ra[4];
105
-
106 104
     #elif defined( __GAPS_SSE__ )
107
-
108 105
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
109 106
         return ra[0] + ra[1] + ra[2] + ra[3];
110
-
111 107
     #else
112
-
113 108
         return mData;
114
-
115 109
     #endif
116 110
     }
117 111
 
... ...
@@ -123,21 +117,15 @@ private:
123 117
 inline float getScalar(gaps_packed_t pf)
124 118
 {
125 119
     #if defined( __GAPS_AVX__ )
126
-
127 120
         pf = _mm256_hadd_ps(pf, pf);
128 121
         pf = _mm256_hadd_ps(pf, pf);
129 122
         float* ra = reinterpret_cast<float*>(&pf); // NOLINT
130 123
         return ra[0] + ra[4];
131
-
132 124
     #elif defined( __GAPS_SSE__ )
133
-
134 125
         float* ra = reinterpret_cast<float*>(&pf); // NOLINT
135 126
         return ra[0] + ra[1] + ra[2] + ra[3];
136
-
137 127
     #else
138
-
139 128
         return pf;
140
-
141 129
     #endif
142 130
 }
143 131
 
Browse code

sparse cogaps optimized

Tom Sherman authored on 12/11/2018 17:34:22
Showing1 changed files
... ...
@@ -3,6 +3,7 @@
3 3
 
4 4
 #if defined ( __AVX2__ ) || defined ( __AVX__ )
5 5
 
6
+    #define SIMD_INC 8
6 7
     #define __GAPS_AVX__
7 8
     #include <immintrin.h>
8 9
     typedef __m256 gaps_packed_t;
... ...
@@ -16,6 +17,7 @@
16 17
 
17 18
 #elif defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )
18 19
 
20
+    #define SIMD_INC 4
19 21
     #define __GAPS_SSE__
20 22
     #include <nmmintrin.h>
21 23
     typedef __m128 gaps_packed_t;
... ...
@@ -30,6 +32,7 @@
30 32
 #else
31 33
 
32 34
     typedef float gaps_packed_t;
35
+    #define SIMD_INC 1
33 36
     #define SET_SCALAR(x) x
34 37
     #define LOAD_PACKED(x) *(x)
35 38
     #define STORE_PACKED(p,x) *(p) = (x)
... ...
@@ -58,13 +61,7 @@ public:
58 61
     
59 62
     static unsigned increment()
60 63
     {
61
-    #if defined( __GAPS_AVX__ )
62
-        return 8;
63
-    #elif defined( __GAPS_SSE__ )
64
-        return 4;
65
-    #else
66
-        return 1;
67
-    #endif
64
+        return SIMD_INC;
68 65
     }
69 66
 
70 67
     friend const float* operator+(const float *ptr, Index ndx);
... ...
@@ -82,7 +79,7 @@ class PackedFloat
82 79
 {
83 80
 public:
84 81
 
85
-    PackedFloat() : mData() {}
82
+    PackedFloat() : mData(SET_SCALAR(0.f)) {}
86 83
     explicit PackedFloat(float val) : mData(SET_SCALAR(val)) {}
87 84
 #if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ ) // avoid redefintion when gaps_packed_t == float
88 85
     explicit PackedFloat(gaps_packed_t val) : mData(val) {}
... ...
@@ -123,6 +120,27 @@ private:
123 120
     gaps_packed_t mData;
124 121
 };
125 122
 
123
+inline float getScalar(gaps_packed_t pf)
124
+{
125
+    #if defined( __GAPS_AVX__ )
126
+
127
+        pf = _mm256_hadd_ps(pf, pf);
128
+        pf = _mm256_hadd_ps(pf, pf);
129
+        float* ra = reinterpret_cast<float*>(&pf); // NOLINT
130
+        return ra[0] + ra[4];
131
+
132
+    #elif defined( __GAPS_SSE__ )
133
+
134
+        float* ra = reinterpret_cast<float*>(&pf); // NOLINT
135
+        return ra[0] + ra[1] + ra[2] + ra[3];
136
+
137
+    #else
138
+
139
+        return pf;
140
+
141
+    #endif
142
+}
143
+
126 144
 } // namespace simd
127 145
 } // namespace gaps
128 146
 
Browse code

cleaned up dot product functions with templates

Tom Sherman authored on 07/11/2018 17:55:51
Showing1 changed files
... ...
@@ -84,7 +84,7 @@ public:
84 84
 
85 85
     PackedFloat() : mData() {}
86 86
     explicit PackedFloat(float val) : mData(SET_SCALAR(val)) {}
87
-#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
87
+#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ ) // avoid redefintion when gaps_packed_t == float
88 88
     explicit PackedFloat(gaps_packed_t val) : mData(val) {}
89 89
 #endif
90 90
 
Browse code

updated config to commit file permissions

Tom Sherman authored on 29/10/2018 19:56:14
Showing1 changed files
1 1
old mode 100644
2 2
new mode 100755
Browse code

basic infrastructure for sparse cogaps

Tom Sherman authored on 17/10/2018 00:45:29
Showing1 changed files
... ...
@@ -6,7 +6,6 @@
6 6
     #define __GAPS_AVX__
7 7
     #include <immintrin.h>
8 8
     typedef __m256 gaps_packed_t;
9
-    const unsigned index_increment = 8;
10 9
     #define SET_SCALAR(x) _mm256_set1_ps(x)
11 10
     #define LOAD_PACKED(x) _mm256_load_ps(x)
12 11
     #define STORE_PACKED(p,x) _mm256_store_ps(p,x)
... ...
@@ -20,7 +19,6 @@
20 19
     #define __GAPS_SSE__
21 20
     #include <nmmintrin.h>
22 21
     typedef __m128 gaps_packed_t;
23
-    const unsigned index_increment = 4;
24 22
     #define SET_SCALAR(x) _mm_set1_ps(x)
25 23
     #define LOAD_PACKED(x) _mm_load_ps(x)
26 24
     #define STORE_PACKED(p,x) _mm_store_ps(p,x)
... ...
@@ -32,7 +30,6 @@
32 30
 #else
33 31
 
34 32
     typedef float gaps_packed_t;
35
-    const unsigned index_increment = 1;
36 33
     #define SET_SCALAR(x) x
37 34
     #define LOAD_PACKED(x) *(x)
38 35
     #define STORE_PACKED(p,x) *(p) = (x)
... ...
@@ -56,9 +53,19 @@ public:
56 53
     Index& operator=(unsigned val) { index = val; return *this; }
57 54
     bool operator<(unsigned comp) const { return index < comp; }
58 55
     bool operator<=(unsigned comp) const { return index <= comp; }
59
-    void operator++() { index += index_increment; }
56
+    void operator++() { index += gaps::simd::Index::increment(); }
60 57
     unsigned value() const { return index; }
61
-    unsigned increment() const { return index_increment; }
58
+    
59
+    static unsigned increment()
60
+    {
61
+    #if defined( __GAPS_AVX__ )
62
+        return 8;
63
+    #elif defined( __GAPS_SSE__ )
64
+        return 4;
65
+    #else
66
+        return 1;
67
+    #endif
68
+    }
62 69
 
63 70
     friend const float* operator+(const float *ptr, Index ndx);
64 71
     friend float* operator+(float *ptr, Index ndx);
Browse code

basic framework in place for full async

Tom Sherman authored on 29/08/2018 21:41:05
Showing1 changed files
... ...
@@ -1,34 +1,10 @@
1 1
 #ifndef __COGAPS_SIMD_H__
2 2
 #define __COGAPS_SIMD_H__
3 3
 
4
-#ifndef SSE_INSTR_SET
5
-    #if defined ( __AVX2__ )
6
-        #define SSE_INSTR_SET 8
7
-    #elif defined ( __AVX__ )
8
-        #define SSE_INSTR_SET 7
9
-    #elif defined ( __SSE4_2__ )
10
-        #define SSE_INSTR_SET 6
11
-    #elif defined ( __SSE4_1__ )
12
-        #define SSE_INSTR_SET 5
13
-    #else
14
-        #define SSE_INSTR_SET 0
15
-    #endif
16
-#endif
4
+#if defined ( __AVX2__ ) || defined ( __AVX__ )
17 5
 
18
-#if SSE_INSTR_SET > 6
19 6
     #define __GAPS_AVX__
20 7
     #include <immintrin.h>
21
-#elif SSE_INSTR_SET == 6 || SSE_INSTR_SET == 5
22
-    #define __GAPS_SSE__
23
-    #include <nmmintrin.h>
24
-#endif
25
-
26
-namespace gaps
27
-{
28
-namespace simd
29
-{
30
-
31
-#if defined( __GAPS_AVX__ )
32 8
     typedef __m256 gaps_packed_t;
33 9
     const unsigned index_increment = 8;
34 10
     #define SET_SCALAR(x) _mm256_set1_ps(x)
... ...
@@ -38,7 +14,11 @@ namespace simd
38 14
     #define SUB_PACKED(a,b) _mm256_sub_ps(a,b)
39 15
     #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
40 16
     #define DIV_PACKED(a,b) _mm256_div_ps(a,b)
41
-#elif defined( __GAPS_SSE__ )
17
+
18
+#elif defined ( __SSE4_2__ ) || defined ( __SSE4_1__ )
19
+
20
+    #define __GAPS_SSE__
21
+    #include <nmmintrin.h>
42 22
     typedef __m128 gaps_packed_t;
43 23
     const unsigned index_increment = 4;
44 24
     #define SET_SCALAR(x) _mm_set1_ps(x)
... ...
@@ -48,7 +28,9 @@ namespace simd
48 28
     #define SUB_PACKED(a,b) _mm_sub_ps(a,b)
49 29
     #define MUL_PACKED(a,b) _mm_mul_ps(a,b)
50 30
     #define DIV_PACKED(a,b) _mm_div_ps(a,b)
31
+
51 32
 #else
33
+
52 34
     typedef float gaps_packed_t;
53 35
     const unsigned index_increment = 1;
54 36
     #define SET_SCALAR(x) x
... ...
@@ -58,73 +40,80 @@ namespace simd
58 40
     #define SUB_PACKED(a,b) ((a)-(b))
59 41
     #define MUL_PACKED(a,b) ((a)*(b))
60 42
     #define DIV_PACKED(a,b) ((a)/(b))
43
+
61 44
 #endif
62 45
 
63
-class Index
46
+namespace gaps
47
+{
48
+namespace simd
64 49
 {
65
-private:
66
-
67
-    unsigned index;
68 50
 
51
+class Index
52
+{
69 53
 public:
70 54
 
71 55
     explicit Index(unsigned i) : index(i) {}
72 56
     Index& operator=(unsigned val) { index = val; return *this; }
73
-    bool operator<(unsigned comp) { return index < comp; }
74
-    bool operator<=(unsigned comp) { return index <= comp; }
57
+    bool operator<(unsigned comp) const { return index < comp; }
58
+    bool operator<=(unsigned comp) const { return index <= comp; }
75 59
     void operator++() { index += index_increment; }
76 60
     unsigned value() const { return index; }
77 61
     unsigned increment() const { return index_increment; }
62
+
78 63
     friend const float* operator+(const float *ptr, Index ndx);
79 64
     friend float* operator+(float *ptr, Index ndx);
65
+
66
+private:
67
+
68
+    unsigned index;
80 69
 };
81 70
 
82 71
 inline const float* operator+(const float *ptr, Index ndx) { return ptr + ndx.index; }
83 72
 inline float* operator+(float *ptr, Index ndx) { return ptr + ndx.index; }
84 73
 
85
-class packedFloat
74
+class PackedFloat
86 75
 {
87
-private:
88
-
89
-    gaps_packed_t mData;
90
-
91 76
 public:
92 77
 
93
-    packedFloat() : mData() {}
94
-    explicit packedFloat(float val) : mData(SET_SCALAR(val)) {}
95
-#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ ) || defined( __GAPS_AVX512__ )
96
-    explicit packedFloat(gaps_packed_t val) : mData(val) {}
78
+    PackedFloat() : mData() {}
79
+    explicit PackedFloat(float val) : mData(SET_SCALAR(val)) {}
80
+#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
81
+    explicit PackedFloat(gaps_packed_t val) : mData(val) {}
97 82
 #endif
98 83
 
99
-    packedFloat operator+(packedFloat b) const { return packedFloat(ADD_PACKED(mData, b.mData)); }
100
-    packedFloat operator-(packedFloat b) const { return packedFloat(SUB_PACKED(mData, b.mData)); }
101
-    packedFloat operator*(packedFloat b) const { return packedFloat(MUL_PACKED(mData, b.mData)); }
102
-    packedFloat operator/(packedFloat b) const { return packedFloat(DIV_PACKED(mData, b.mData)); }
84
+    PackedFloat operator+(PackedFloat b) const { return PackedFloat(ADD_PACKED(mData, b.mData)); }
85
+    PackedFloat operator-(PackedFloat b) const { return PackedFloat(SUB_PACKED(mData, b.mData)); }
86
+    PackedFloat operator*(PackedFloat b) const { return PackedFloat(MUL_PACKED(mData, b.mData)); }
87
+    PackedFloat operator/(PackedFloat b) const { return PackedFloat(DIV_PACKED(mData, b.mData)); }
103 88
 
104
-    void operator+=(packedFloat val) { mData = ADD_PACKED(mData, val.mData); }
89
+    void operator+=(PackedFloat val) { mData = ADD_PACKED(mData, val.mData); }
105 90
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 91
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 92
 
108
-#if defined( __GAPS_AVX__ )
109 93
     float scalar()
110 94
     {
95
+    #if defined( __GAPS_AVX__ )
96
+
111 97
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
112 98
         mData = _mm256_hadd_ps(mData, mData);
113 99
         mData = _mm256_hadd_ps(mData, mData);
114 100
         return ra[0] + ra[4];
115
-    }
116
-#elif defined( __GAPS_SSE__ )
117
-    float scalar()
118
-    {
101
+
102
+    #elif defined( __GAPS_SSE__ )
103
+
119 104
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
120 105
         return ra[0] + ra[1] + ra[2] + ra[3];
121
-    }
122
-#else
123
-    float scalar()
124
-    {
106
+
107
+    #else
108
+
125 109
         return mData;
110
+
111
+    #endif
126 112
     }
127
-#endif
113
+
114
+private:
115
+
116
+    gaps_packed_t mData;
128 117
 };
129 118
 
130 119
 } // namespace simd
Browse code

started making changes

Tom Sherman authored on 28/08/2018 19:53:08
Showing1 changed files
... ...
@@ -130,47 +130,5 @@ public:
130 130
 } // namespace simd
131 131
 } // namespace gaps
132 132
 
133
-#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && ! defined(__x86_64__)
134
-    #define __x86_64__ 1
135
-#endif
136
-
137
-#ifdef _OPENMP
138
-    #define __GAPS_OPENMP__
139
-#endif
140
-
141
-// used to convert defined macro values into strings
142
-#define STR_HELPER(x) #x
143
-#define STR(x) STR_HELPER(x)
144
-
145
-inline std::string buildReport()
146
-{
147
-#if defined( __clang__ )
148
-    std::string compiler = "Compiled with Clang\n";
149
-#elif defined( __INTEL_COMPILER )
150
-    std::string compiler = "Compiled with Intel ICC/ICPC\n";
151
-#elif defined( __GNUC__ )
152
-    std::string compiler = "Compiled with GCC v" + std::string(STR( __GNUC__ ))
153
-    + "." + std::string(STR( __GNUC_MINOR__ )) + '\n';
154
-#elif defined( _MSC_VER )
155
-    std::string compiler = "Compiled with Microsoft Visual Studio\n";
156
-#endif
157
-
158
-#if defined( __GAPS_AVX__ )
159
-    std::string simd = "SIMD: AVX instructions enabled\n";
160
-#elif defined( __GAPS_SSE__ )
161
-    std::string simd = "SIMD: SSE instructions enabled\n";
162
-#else
163
-    std::string simd = "SIMD not enabled\n";
164
-#endif
165
-
166
-#ifdef __GAPS_OPENMP__
167
-    std::string openmp = "Compiled with OpenMP\n";
168
-#else
169
-    std::string openmp = "Compiler did not support OpenMP\n";
170
-#endif
171
-
172
-    return compiler + simd + openmp;
173
-}
174
-
175 133
 #endif // __COGAPS_SIMD_H__
176 134
 
Browse code

fixed NA's in std dev matrix

Tom Sherman authored on 06/08/2018 17:57:31
Showing1 changed files
... ...
@@ -105,8 +105,6 @@ public:
105 105
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 106
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 107
 
108
-// RTTI may be expensive
109
-
110 108
 #if defined( __GAPS_AVX__ )
111 109
     float scalar()
112 110
     {
Browse code

moved computation inside if statement

Tom Sherman authored on 03/08/2018 19:58:43
Showing1 changed files
... ...
@@ -105,6 +105,8 @@ public:
105 105
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 106
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 107
 
108
+// RTTI may be expensive
109
+
108 110
 #if defined( __GAPS_AVX__ )
109 111
     float scalar()
110 112
     {
Browse code

vignette coming together

Tom Sherman authored on 01/08/2018 20:53:40
Showing1 changed files
... ...
@@ -156,9 +156,9 @@ inline std::string buildReport()
156 156
 #endif
157 157
 
158 158
 #if defined( __GAPS_AVX__ )
159
-    std::string simd = "AVX enabled\n";
159
+    std::string simd = "SIMD: AVX instructions enabled\n";
160 160
 #elif defined( __GAPS_SSE__ )
161
-    std::string simd = "SSE enabled\n";
161
+    std::string simd = "SIMD: SSE instructions enabled\n";
162 162
 #else
163 163
     std::string simd = "SIMD not enabled\n";
164 164
 #endif
Browse code

added params and results class to R

Tom Sherman authored on 27/06/2018 17:29:43
Showing1 changed files
... ...
@@ -138,5 +138,39 @@ public:
138 138
     #define __GAPS_OPENMP__
139 139
 #endif
140 140
 
141
+// used to convert defined macro values into strings
142
+#define STR_HELPER(x) #x
143
+#define STR(x) STR_HELPER(x)
144
+
145
+inline std::string buildReport()
146
+{
147
+#if defined( __clang__ )
148
+    std::string compiler = "Compiled with Clang\n";
149
+#elif defined( __INTEL_COMPILER )
150
+    std::string compiler = "Compiled with Intel ICC/ICPC\n";
151
+#elif defined( __GNUC__ )
152
+    std::string compiler = "Compiled with GCC v" + std::string(STR( __GNUC__ ))
153
+    + "." + std::string(STR( __GNUC_MINOR__ )) + '\n';
154
+#elif defined( _MSC_VER )
155
+    std::string compiler = "Compiled with Microsoft Visual Studio\n";
156
+#endif
157
+
158
+#if defined( __GAPS_AVX__ )
159
+    std::string simd = "AVX enabled\n";
160
+#elif defined( __GAPS_SSE__ )
161
+    std::string simd = "SSE enabled\n";
162
+#else
163
+    std::string simd = "SIMD not enabled\n";
164
+#endif
165
+
166
+#ifdef __GAPS_OPENMP__
167
+    std::string openmp = "Compiled with OpenMP\n";
168
+#else
169
+    std::string openmp = "Compiler did not support OpenMP\n";
170
+#endif
171
+
172
+    return compiler + simd + openmp;
173
+}
174
+
141 175
 #endif // __COGAPS_SIMD_H__
142 176
 
Browse code

cleanup header for simd

Tom Sherman authored on 25/06/2018 13:56:48
Showing1 changed files
... ...
@@ -15,7 +15,7 @@
15 15
     #endif
16 16
 #endif
17 17
 
18
-#if SSE_INSTR_SET == 7
18
+#if SSE_INSTR_SET > 6
19 19
     #define __GAPS_AVX__
20 20
     #include <immintrin.h>
21 21
 #elif SSE_INSTR_SET == 6 || SSE_INSTR_SET == 5
... ...
@@ -105,14 +105,7 @@ public:
105 105
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 106
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 107
 
108
-#if defined( __GAPS_AVX512__ )
109
-    float scalar()
110
-    {
111
-        float* ra = reinterpret_cast<float*>(&mData); // NOLINT
112
-        return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7] +
113
-            ra[8] + ra[9] + ra[10] + ra[11] + ra[12] + ra[13] + ra[14] + ra[15];
114
-    }
115
-#elif defined( __GAPS_AVX__ )
108
+#if defined( __GAPS_AVX__ )
116 109
     float scalar()
117 110
     {
118 111
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
Browse code

better message for horizontal add of avx packed type

Tom Sherman authored on 22/06/2018 20:34:40
Showing1 changed files
... ...
@@ -116,7 +116,9 @@ public:
116 116
     float scalar()
117 117
     {
118 118
         float* ra = reinterpret_cast<float*>(&mData); // NOLINT
119
-        return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7];
119
+        mData = _mm256_hadd_ps(mData, mData);
120
+        mData = _mm256_hadd_ps(mData, mData);
121
+        return ra[0] + ra[4];
120 122
     }
121 123
 #elif defined( __GAPS_SSE__ )
122 124
     float scalar()
Browse code

added back PUMP and FixedPatterns option

Tom Sherman authored on 13/06/2018 21:35:14
Showing1 changed files
... ...
@@ -108,20 +108,20 @@ public:
108 108
 #if defined( __GAPS_AVX512__ )
109 109
     float scalar()
110 110
     {
111
-        float* ra = reinterpret_cast<float*>(&mData);
111
+        float* ra = reinterpret_cast<float*>(&mData); // NOLINT
112 112
         return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7] +
113 113
             ra[8] + ra[9] + ra[10] + ra[11] + ra[12] + ra[13] + ra[14] + ra[15];
114 114
     }
115 115
 #elif defined( __GAPS_AVX__ )
116 116
     float scalar()
117 117
     {
118
-        float* ra = reinterpret_cast<float*>(&mData);
118
+        float* ra = reinterpret_cast<float*>(&mData); // NOLINT
119 119
         return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7];
120 120
     }
121 121
 #elif defined( __GAPS_SSE__ )
122 122
     float scalar()
123 123
     {
124
-        float* ra = reinterpret_cast<float*>(&mData);
124
+        float* ra = reinterpret_cast<float*>(&mData); // NOLINT
125 125
         return ra[0] + ra[1] + ra[2] + ra[3];
126 126
     }
127 127
 #else
Browse code

decent approach for configuring debug and simd

Tom Sherman authored on 07/06/2018 23:21:50
Showing1 changed files
... ...
@@ -1,14 +1,6 @@
1 1
 #ifndef __COGAPS_SIMD_H__
2 2
 #define __COGAPS_SIMD_H__
3 3
 
4
-#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && ! defined(__x86_64__)
5
-    #define __x86_64__ 1
6
-#endif
7
-
8
-#ifdef _OPENMP
9
-    #define __GAPS_OPENMP__
10
-#endif
11
-
12 4
 #ifndef SSE_INSTR_SET
13 5
     #if defined ( __AVX2__ )
14 6
         #define SSE_INSTR_SET 8
... ...
@@ -26,12 +18,9 @@
26 18
 #if SSE_INSTR_SET == 7
27 19
     #define __GAPS_AVX__
28 20
     #include <immintrin.h>
29
-#elif SSE_INSTR_SET == 6
21
+#elif SSE_INSTR_SET == 6 || SSE_INSTR_SET == 5
30 22
     #define __GAPS_SSE__
31 23
     #include <nmmintrin.h>
32
-#elif SSE_INSTR_SET == 5
33
-    #define __GAPS_SSE__
34
-    #include <smmintrin.h>
35 24
 #endif
36 25
 
37 26
 namespace gaps
... ...
@@ -44,6 +33,7 @@ namespace simd
44 33
     const unsigned index_increment = 8;
45 34
     #define SET_SCALAR(x) _mm256_set1_ps(x)
46 35
     #define LOAD_PACKED(x) _mm256_load_ps(x)
36
+    #define STORE_PACKED(p,x) _mm256_store_ps(p,x)
47 37
     #define ADD_PACKED(a,b) _mm256_add_ps(a,b)
48 38
     #define SUB_PACKED(a,b) _mm256_sub_ps(a,b)
49 39
     #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
... ...
@@ -102,7 +92,7 @@ public:
102 92
 
103 93
     packedFloat() : mData() {}
104 94
     explicit packedFloat(float val) : mData(SET_SCALAR(val)) {}
105
-#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
95
+#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ ) || defined( __GAPS_AVX512__ )
106 96
     explicit packedFloat(gaps_packed_t val) : mData(val) {}
107 97
 #endif
108 98
 
... ...
@@ -115,7 +105,14 @@ public:
115 105
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
116 106
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
117 107
 
118
-#if defined( __GAPS_AVX__ )
108
+#if defined( __GAPS_AVX512__ )
109
+    float scalar()
110
+    {
111
+        float* ra = reinterpret_cast<float*>(&mData);
112
+        return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7] +
113
+            ra[8] + ra[9] + ra[10] + ra[11] + ra[12] + ra[13] + ra[14] + ra[15];
114
+    }
115
+#elif defined( __GAPS_AVX__ )
119 116
     float scalar()
120 117
     {
121 118
         float* ra = reinterpret_cast<float*>(&mData);
... ...
@@ -138,5 +135,13 @@ public:
138 135
 } // namespace simd
139 136
 } // namespace gaps
140 137
 
138
+#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && ! defined(__x86_64__)
139
+    #define __x86_64__ 1
140
+#endif
141
+
142
+#ifdef _OPENMP
143
+    #define __GAPS_OPENMP__
144
+#endif
145
+
141 146
 #endif // __COGAPS_SIMD_H__
142 147
 
Browse code

added configure script

Tom Sherman authored on 07/06/2018 17:23:39
Showing1 changed files
... ...
@@ -5,16 +5,12 @@
5 5
     #define __x86_64__ 1
6 6
 #endif
7 7
 
8
-#ifndef _OPENMP
9
-    #pragma message("Compiler does not support OpenMP")
10
-#else
8
+#ifdef _OPENMP
11 9
     #define __GAPS_OPENMP__
12 10
 #endif
13 11
 
14 12
 #ifndef SSE_INSTR_SET
15
-    #ifndef SIMD
16
-        #define SSE_INSTR_SET 0
17
-    #elif defined ( __AVX2__ )
13
+    #if defined ( __AVX2__ )
18 14
         #define SSE_INSTR_SET 8
19 15
     #elif defined ( __AVX__ )
20 16
         #define SSE_INSTR_SET 7
... ...
@@ -23,7 +19,6 @@
23 19
     #elif defined ( __SSE4_1__ )
24 20
         #define SSE_INSTR_SET 5
25 21
     #else
26
-        #error "SIMD not supported"
27 22
         #define SSE_INSTR_SET 0
28 23
     #endif
29 24
 #endif
Browse code

lintr checks

Tom Sherman authored on 04/06/2018 12:39:29
Showing1 changed files
... ...
@@ -123,13 +123,13 @@ public:
123 123
 #if defined( __GAPS_AVX__ )
124 124
     float scalar()
125 125
     {
126
-        float* ra = (float*)&mData;
126
+        float* ra = reinterpret_cast<float*>(&mData);
127 127
         return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7];
128 128
     }
129 129
 #elif defined( __GAPS_SSE__ )
130 130
     float scalar()
131 131
     {
132
-        float* ra = (float*)&mData;
132
+        float* ra = reinterpret_cast<float*>(&mData);
133 133
         return ra[0] + ra[1] + ra[2] + ra[3];
134 134
     }
135 135
 #else
Browse code

make linter happy

Tom Sherman authored on 03/06/2018 23:57:18
Showing1 changed files
... ...
@@ -68,7 +68,7 @@ namespace simd
68 68
     const unsigned index_increment = 1;
69 69
     #define SET_SCALAR(x) x
70 70
     #define LOAD_PACKED(x) *(x)
71
-    #define STORE_PACKED(p,x) *p = x
71
+    #define STORE_PACKED(p,x) *(p) = (x)
72 72
     #define ADD_PACKED(a,b) ((a)+(b))
73 73
     #define SUB_PACKED(a,b) ((a)-(b))
74 74
     #define MUL_PACKED(a,b) ((a)*(b))
... ...
@@ -78,10 +78,13 @@ namespace simd
78 78
 class Index
79 79
 {
80 80
 private:
81
+
81 82
     unsigned index;
83
+
82 84
 public:
83
-    Index(unsigned i) : index(i) {}
84
-    void operator=(unsigned val) { index = val; }
85
+
86
+    explicit Index(unsigned i) : index(i) {}
87
+    Index& operator=(unsigned val) { index = val; return *this; }
85 88
     bool operator<(unsigned comp) { return index < comp; }
86 89
     bool operator<=(unsigned comp) { return index <= comp; }
87 90
     void operator++() { index += index_increment; }
... ...
@@ -102,16 +105,16 @@ private:
102 105
 
103 106
 public:
104 107
 
105
-    packedFloat() {}
106
-    packedFloat(float val) : mData(SET_SCALAR(val)) {}
108
+    packedFloat() : mData() {}
109
+    explicit packedFloat(float val) : mData(SET_SCALAR(val)) {}
107 110
 #if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
108
-    packedFloat(gaps_packed_t val) : mData(val) {}
111
+    explicit packedFloat(gaps_packed_t val) : mData(val) {}
109 112
 #endif
110 113
 
111
-    packedFloat operator+(packedFloat b) const { return ADD_PACKED(mData, b.mData); }
112
-    packedFloat operator-(packedFloat b) const { return SUB_PACKED(mData, b.mData); }
113
-    packedFloat operator*(packedFloat b) const { return MUL_PACKED(mData, b.mData); }
114
-    packedFloat operator/(packedFloat b) const { return DIV_PACKED(mData, b.mData); }
114
+    packedFloat operator+(packedFloat b) const { return packedFloat(ADD_PACKED(mData, b.mData)); }
115
+    packedFloat operator-(packedFloat b) const { return packedFloat(SUB_PACKED(mData, b.mData)); }
116
+    packedFloat operator*(packedFloat b) const { return packedFloat(MUL_PACKED(mData, b.mData)); }
117
+    packedFloat operator/(packedFloat b) const { return packedFloat(DIV_PACKED(mData, b.mData)); }
115 118
 
116 119
     void operator+=(packedFloat val) { mData = ADD_PACKED(mData, val.mData); }
117 120
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
Browse code

make lintr happy

Tom Sherman authored on 31/05/2018 21:28:52
Showing1 changed files
... ...
@@ -6,7 +6,7 @@
6 6
 #endif
7 7
 
8 8
 #ifndef _OPENMP
9
-    #warning "Compiler does not support OpenMP"
9
+    #pragma message("Compiler does not support OpenMP")
10 10
 #else
11 11
     #define __GAPS_OPENMP__
12 12
 #endif
... ...
@@ -69,10 +69,10 @@ namespace simd
69 69
     #define SET_SCALAR(x) x
70 70
     #define LOAD_PACKED(x) *(x)
71 71
     #define STORE_PACKED(p,x) *p = x
72
-    #define ADD_PACKED(a,b) (a+b)
73
-    #define SUB_PACKED(a,b) (a-b)
74
-    #define MUL_PACKED(a,b) (a*b)
75
-    #define DIV_PACKED(a,b) (a/b)
72
+    #define ADD_PACKED(a,b) ((a)+(b))
73
+    #define SUB_PACKED(a,b) ((a)-(b))
74
+    #define MUL_PACKED(a,b) ((a)*(b))
75
+    #define DIV_PACKED(a,b) ((a)/(b))
76 76
 #endif
77 77
 
78 78
 class Index
Browse code

reset to old commit; added back safe changes

Tom Sherman authored on 24/05/2018 20:32:14
Showing1 changed files
... ...
@@ -5,8 +5,10 @@
5 5
     #define __x86_64__ 1
6 6
 #endif
7 7
 
8
-#if !defined(_OPENMP)
8
+#ifndef _OPENMP
9 9
     #warning "Compiler does not support OpenMP"
10
+#else
11
+    #define __GAPS_OPENMP__
10 12
 #endif
11 13
 
12 14
 #ifndef SSE_INSTR_SET
Browse code

full conflict checks in place; need to optimize and test

Tom Sherman authored on 07/05/2018 18:31:59
Showing1 changed files
... ...
@@ -5,6 +5,10 @@
5 5
     #define __x86_64__ 1
6 6
 #endif
7 7
 
8
+#if !defined(_OPENMP)
9
+    #warning "Compiler does not support OpenMP"
10
+#endif
11
+
8 12
 #ifndef SSE_INSTR_SET
9 13
     #ifndef SIMD
10 14
         #define SSE_INSTR_SET 0
Browse code

cleaned up - ready to parallelize

Tom Sherman authored on 04/05/2018 21:23:09
Showing1 changed files
... ...
@@ -52,6 +52,7 @@ namespace simd
52 52
     const unsigned index_increment = 4;
53 53
     #define SET_SCALAR(x) _mm_set1_ps(x)
54 54
     #define LOAD_PACKED(x) _mm_load_ps(x)
55
+    #define STORE_PACKED(p,x) _mm_store_ps(p,x)
55 56
     #define ADD_PACKED(a,b) _mm_add_ps(a,b)
56 57
     #define SUB_PACKED(a,b) _mm_sub_ps(a,b)
57 58
     #define MUL_PACKED(a,b) _mm_mul_ps(a,b)
... ...
@@ -61,6 +62,7 @@ namespace simd
61 62
     const unsigned index_increment = 1;
62 63
     #define SET_SCALAR(x) x
63 64
     #define LOAD_PACKED(x) *(x)
65
+    #define STORE_PACKED(p,x) *p = x
64 66
     #define ADD_PACKED(a,b) (a+b)
65 67
     #define SUB_PACKED(a,b) (a-b)
66 68
     #define MUL_PACKED(a,b) (a*b)
... ...
@@ -80,9 +82,11 @@ public:
80 82
     unsigned value() const { return index; }
81 83
     unsigned increment() const { return index_increment; }
82 84
     friend const float* operator+(const float *ptr, Index ndx);
85
+    friend float* operator+(float *ptr, Index ndx);
83 86
 };
84 87
 
85 88
 inline const float* operator+(const float *ptr, Index ndx) { return ptr + ndx.index; }
89
+inline float* operator+(float *ptr, Index ndx) { return ptr + ndx.index; }
86 90
 
87 91
 class packedFloat
88 92
 {
... ...
@@ -105,6 +109,7 @@ public:
105 109
 
106 110
     void operator+=(packedFloat val) { mData = ADD_PACKED(mData, val.mData); }
107 111
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
112
+    void store(float *ptr) { STORE_PACKED(ptr, mData); }
108 113
 
109 114
 #if defined( __GAPS_AVX__ )
110 115
     float scalar()
Browse code

separated math into own folder

Tom Sherman authored on 03/05/2018 21:05:24
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,133 @@
1
+#ifndef __COGAPS_SIMD_H__
2
+#define __COGAPS_SIMD_H__
3
+
4
+#if (defined(_M_AMD64) || defined(_M_X64) || defined(__amd64)) && ! defined(__x86_64__)
5
+    #define __x86_64__ 1
6
+#endif
7
+
8
+#ifndef SSE_INSTR_SET
9
+    #ifndef SIMD
10
+        #define SSE_INSTR_SET 0
11
+    #elif defined ( __AVX2__ )
12
+        #define SSE_INSTR_SET 8
13
+    #elif defined ( __AVX__ )
14
+        #define SSE_INSTR_SET 7
15
+    #elif defined ( __SSE4_2__ )
16
+        #define SSE_INSTR_SET 6
17
+    #elif defined ( __SSE4_1__ )
18
+        #define SSE_INSTR_SET 5
19
+    #else
20
+        #error "SIMD not supported"
21
+        #define SSE_INSTR_SET 0
22
+    #endif
23
+#endif
24
+
25
+#if SSE_INSTR_SET == 7
26
+    #define __GAPS_AVX__
27
+    #include <immintrin.h>
28
+#elif SSE_INSTR_SET == 6
29
+    #define __GAPS_SSE__
30
+    #include <nmmintrin.h>
31
+#elif SSE_INSTR_SET == 5
32
+    #define __GAPS_SSE__
33
+    #include <smmintrin.h>
34
+#endif
35
+
36
+namespace gaps
37
+{
38
+namespace simd
39
+{
40
+
41
+#if defined( __GAPS_AVX__ )
42
+    typedef __m256 gaps_packed_t;
43
+    const unsigned index_increment = 8;
44
+    #define SET_SCALAR(x) _mm256_set1_ps(x)
45
+    #define LOAD_PACKED(x) _mm256_load_ps(x)
46
+    #define ADD_PACKED(a,b) _mm256_add_ps(a,b)
47
+    #define SUB_PACKED(a,b) _mm256_sub_ps(a,b)
48
+    #define MUL_PACKED(a,b) _mm256_mul_ps(a,b)
49
+    #define DIV_PACKED(a,b) _mm256_div_ps(a,b)
50
+#elif defined( __GAPS_SSE__ )
51
+    typedef __m128 gaps_packed_t;
52
+    const unsigned index_increment = 4;
53
+    #define SET_SCALAR(x) _mm_set1_ps(x)
54
+    #define LOAD_PACKED(x) _mm_load_ps(x)
55
+    #define ADD_PACKED(a,b) _mm_add_ps(a,b)
56
+    #define SUB_PACKED(a,b) _mm_sub_ps(a,b)
57
+    #define MUL_PACKED(a,b) _mm_mul_ps(a,b)
58
+    #define DIV_PACKED(a,b) _mm_div_ps(a,b)
59
+#else
60
+    typedef float gaps_packed_t;
61
+    const unsigned index_increment = 1;
62
+    #define SET_SCALAR(x) x
63
+    #define LOAD_PACKED(x) *(x)
64
+    #define ADD_PACKED(a,b) (a+b)
65
+    #define SUB_PACKED(a,b) (a-b)
66
+    #define MUL_PACKED(a,b) (a*b)
67
+    #define DIV_PACKED(a,b) (a/b)
68
+#endif
69
+
70
+class Index
71
+{
72
+private:
73
+    unsigned index;
74
+public:
75
+    Index(unsigned i) : index(i) {}
76
+    void operator=(unsigned val) { index = val; }
77
+    bool operator<(unsigned comp) { return index < comp; }
78
+    bool operator<=(unsigned comp) { return index <= comp; }
79
+    void operator++() { index += index_increment; }
80
+    unsigned value() const { return index; }
81
+    unsigned increment() const { return index_increment; }
82
+    friend const float* operator+(const float *ptr, Index ndx);
83
+};
84
+
85
+inline const float* operator+(const float *ptr, Index ndx) { return ptr + ndx.index; }
86
+
87
+class packedFloat
88
+{
89
+private:
90
+
91
+    gaps_packed_t mData;
92
+
93
+public:
94
+
95
+    packedFloat() {}
96
+    packedFloat(float val) : mData(SET_SCALAR(val)) {}
97
+#if defined( __GAPS_SSE__ ) || defined( __GAPS_AVX__ )
98
+    packedFloat(gaps_packed_t val) : mData(val) {}
99
+#endif
100
+
101
+    packedFloat operator+(packedFloat b) const { return ADD_PACKED(mData, b.mData); }
102
+    packedFloat operator-(packedFloat b) const { return SUB_PACKED(mData, b.mData); }
103
+    packedFloat operator*(packedFloat b) const { return MUL_PACKED(mData, b.mData); }
104
+    packedFloat operator/(packedFloat b) const { return DIV_PACKED(mData, b.mData); }
105
+
106
+    void operator+=(packedFloat val) { mData = ADD_PACKED(mData, val.mData); }
107
+    void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
108
+
109
+#if defined( __GAPS_AVX__ )
110
+    float scalar()
111
+    {
112
+        float* ra = (float*)&mData;
113
+        return ra[0] + ra[1] + ra[2] + ra[3] + ra[4] + ra[5] + ra[6] + ra[7];
114
+    }
115
+#elif defined( __GAPS_SSE__ )
116
+    float scalar()
117
+    {
118
+        float* ra = (float*)&mData;
119
+        return ra[0] + ra[1] + ra[2] + ra[3];
120
+    }
121
+#else
122
+    float scalar()
123
+    {
124
+        return mData;
125
+    }
126
+#endif
127
+};
128
+
129
+} // namespace simd
130
+} // namespace gaps
131
+
132
+#endif // __COGAPS_SIMD_H__
133
+