1 | 1 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,350 @@ |
1 |
+////////////////////////////////////////////////////////////////////////////// |
|
2 |
+// |
|
3 |
+// (C) Copyright Ion Gaztanaga 2015-2016. |
|
4 |
+// Distributed under the Boost Software License, Version 1.0. |
|
5 |
+// (See accompanying file LICENSE_1_0.txt or copy at |
|
6 |
+// http://www.boost.org/LICENSE_1_0.txt) |
|
7 |
+// |
|
8 |
+// See http://www.boost.org/libs/move for documentation. |
|
9 |
+// |
|
10 |
+////////////////////////////////////////////////////////////////////////////// |
|
11 |
+ |
|
12 |
+#ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP |
|
13 |
+#define BOOST_MOVE_ADAPTIVE_MERGE_HPP |
|
14 |
+ |
|
15 |
+#include <boost/move/detail/config_begin.hpp> |
|
16 |
+#include <boost/move/algo/detail/adaptive_sort_merge.hpp> |
|
17 |
+ |
|
18 |
+namespace boost { |
|
19 |
+namespace movelib { |
|
20 |
+ |
|
21 |
+///@cond |
|
22 |
+namespace detail_adaptive { |
|
23 |
+ |
|
24 |
+template<class RandIt, class Compare, class XBuf> |
|
25 |
+inline void adaptive_merge_combine_blocks( RandIt first |
|
26 |
+ , typename iterator_traits<RandIt>::size_type len1 |
|
27 |
+ , typename iterator_traits<RandIt>::size_type len2 |
|
28 |
+ , typename iterator_traits<RandIt>::size_type collected |
|
29 |
+ , typename iterator_traits<RandIt>::size_type n_keys |
|
30 |
+ , typename iterator_traits<RandIt>::size_type l_block |
|
31 |
+ , bool use_internal_buf |
|
32 |
+ , bool xbuf_used |
|
33 |
+ , Compare comp |
|
34 |
+ , XBuf & xbuf |
|
35 |
+ ) |
|
36 |
+{ |
|
37 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
38 |
+ size_type const len = len1+len2; |
|
39 |
+ size_type const l_combine = len-collected; |
|
40 |
+ size_type const l_combine1 = len1-collected; |
|
41 |
+ |
|
42 |
+ if(n_keys){ |
|
43 |
+ RandIt const first_data = first+collected; |
|
44 |
+ RandIt const keys = first; |
|
45 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); |
|
46 |
+ if(xbuf_used){ |
|
47 |
+ if(xbuf.size() < l_block){ |
|
48 |
+ xbuf.initialize_until(l_block, *first); |
|
49 |
+ } |
|
50 |
+ BOOST_ASSERT(xbuf.size() >= l_block); |
|
51 |
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
|
52 |
+ combine_params( keys, comp, l_combine |
|
53 |
+ , l_combine1, l_block, xbuf |
|
54 |
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
|
55 |
+ op_merge_blocks_with_buf |
|
56 |
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); |
|
57 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); |
|
58 |
+ } |
|
59 |
+ else{ |
|
60 |
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
|
61 |
+ combine_params( keys, comp, l_combine |
|
62 |
+ , l_combine1, l_block, xbuf |
|
63 |
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
|
64 |
+ if(use_internal_buf){ |
|
65 |
+ op_merge_blocks_with_buf |
|
66 |
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block); |
|
67 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); |
|
68 |
+ } |
|
69 |
+ else{ |
|
70 |
+ merge_blocks_bufferless |
|
71 |
+ (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); |
|
72 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); |
|
73 |
+ } |
|
74 |
+ } |
|
75 |
+ } |
|
76 |
+ else{ |
|
77 |
+ xbuf.shrink_to_fit(l_block); |
|
78 |
+ if(xbuf.size() < l_block){ |
|
79 |
+ xbuf.initialize_until(l_block, *first); |
|
80 |
+ } |
|
81 |
+ size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); |
|
82 |
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
|
83 |
+ combine_params( uint_keys, less(), l_combine |
|
84 |
+ , l_combine1, l_block, xbuf |
|
85 |
+ , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs |
|
86 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); |
|
87 |
+ BOOST_ASSERT(xbuf.size() >= l_block); |
|
88 |
+ op_merge_blocks_with_buf |
|
89 |
+ (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data()); |
|
90 |
+ xbuf.clear(); |
|
91 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); |
|
92 |
+ } |
|
93 |
+} |
|
94 |
+ |
|
95 |
+template<class RandIt, class Compare, class XBuf> |
|
96 |
+inline void adaptive_merge_final_merge( RandIt first |
|
97 |
+ , typename iterator_traits<RandIt>::size_type len1 |
|
98 |
+ , typename iterator_traits<RandIt>::size_type len2 |
|
99 |
+ , typename iterator_traits<RandIt>::size_type collected |
|
100 |
+ , typename iterator_traits<RandIt>::size_type l_intbuf |
|
101 |
+ , typename iterator_traits<RandIt>::size_type //l_block |
|
102 |
+ , bool //use_internal_buf |
|
103 |
+ , bool xbuf_used |
|
104 |
+ , Compare comp |
|
105 |
+ , XBuf & xbuf |
|
106 |
+ ) |
|
107 |
+{ |
|
108 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
109 |
+ size_type n_keys = collected-l_intbuf; |
|
110 |
+ size_type len = len1+len2; |
|
111 |
+ if (!xbuf_used || n_keys) { |
|
112 |
+ xbuf.clear(); |
|
113 |
+ const size_type middle = xbuf_used && n_keys ? n_keys: collected; |
|
114 |
+ unstable_sort(first, first + middle, comp, xbuf); |
|
115 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); |
|
116 |
+ stable_merge(first, first + middle, first + len, comp, xbuf); |
|
117 |
+ } |
|
118 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); |
|
119 |
+} |
|
120 |
+ |
|
121 |
+template<class SizeType> |
|
122 |
+inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf) |
|
123 |
+{ |
|
124 |
+ typedef SizeType size_type; |
|
125 |
+ //This is the minimum number of keys to implement the ideal algorithm |
|
126 |
+ size_type n_keys = len1/l_block+len2/l_block; |
|
127 |
+ const size_type second_half_blocks = len2/l_block; |
|
128 |
+ const size_type first_half_aux = len1-l_intbuf; |
|
129 |
+ while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){ |
|
130 |
+ --n_keys; |
|
131 |
+ } |
|
132 |
+ ++n_keys; |
|
133 |
+ return n_keys; |
|
134 |
+} |
|
135 |
+ |
|
136 |
+template<class SizeType> |
|
137 |
+inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf) |
|
138 |
+{ |
|
139 |
+ typedef SizeType size_type; |
|
140 |
+ //This is the minimum number of keys to implement the ideal algorithm |
|
141 |
+ size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block; |
|
142 |
+ return n_keys; |
|
143 |
+} |
|
144 |
+ |
|
145 |
+template<class SizeType, class Xbuf> |
|
146 |
+inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) |
|
147 |
+{ |
|
148 |
+ typedef SizeType size_type; |
|
149 |
+ size_type l_block = rl_block; |
|
150 |
+ size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; |
|
151 |
+ |
|
152 |
+ if (xbuf.capacity() > l_block){ |
|
153 |
+ l_block = xbuf.capacity(); |
|
154 |
+ } |
|
155 |
+ |
|
156 |
+ //This is the minimum number of keys to implement the ideal algorithm |
|
157 |
+ size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf); |
|
158 |
+ BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); |
|
159 |
+ |
|
160 |
+ if(xbuf.template supports_aligned_trailing<size_type> |
|
161 |
+ ( l_block |
|
162 |
+ , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf))) |
|
163 |
+ { |
|
164 |
+ n_keys = 0u; |
|
165 |
+ } |
|
166 |
+ l_intbuf_inout = l_intbuf; |
|
167 |
+ rl_block = l_block; |
|
168 |
+ return n_keys; |
|
169 |
+} |
|
170 |
+ |
|
171 |
+// Main explanation of the merge algorithm. |
|
172 |
+// |
|
173 |
+// csqrtlen = ceil(sqrt(len)); |
|
174 |
+// |
|
175 |
+// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect |
|
176 |
+// unique elements are extracted from elements to be sorted and placed in the beginning of the range. |
|
177 |
+// |
|
178 |
+// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements |
|
179 |
+// are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. |
|
180 |
+// |
|
181 |
+// Explanation of the "combine_blocks" step: |
|
182 |
+// |
|
183 |
+// * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. |
|
184 |
+// Remaining elements that can't form a group are grouped in front of those elements. |
|
185 |
+// * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. |
|
186 |
+// Remaining elements that can't form a group are grouped in the back of those elements. |
|
187 |
+// * In parallel the following two steps are performed: |
|
188 |
+// * Groups are selection-sorted by first or last element (depending whether they are going |
|
189 |
+// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. |
|
190 |
+// * Elements of each block pair are merged using the csqrtlen buffer taking into account |
|
191 |
+// if they belong to the first half or second half (marked by the key). |
|
192 |
+// |
|
193 |
+// * In the final merge step leading "to_collect" elements are merged with rotations |
|
194 |
+// with the rest of merged elements in the "combine_blocks" step. |
|
195 |
+// |
|
196 |
+// Corner cases: |
|
197 |
+// |
|
198 |
+// * If no "to_collect" elements can be extracted: |
|
199 |
+// |
|
200 |
+// * If more than a minimum number of elements is extracted |
|
201 |
+// then reduces the number of elements used as buffer and keys in the |
|
202 |
+// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction |
|
203 |
+// then uses a rotation based smart merge. |
|
204 |
+// |
|
205 |
+// * If the minimum number of keys can't be extracted, a rotation-based merge is performed. |
|
206 |
+// |
|
207 |
+// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. |
|
208 |
+// |
|
209 |
+// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. |
|
210 |
+// |
|
211 |
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), |
|
212 |
+// then no csqrtlen need to be extracted and "combine_blocks" will use integral |
|
213 |
+// keys to combine blocks. |
|
214 |
+template<class RandIt, class Compare, class XBuf> |
|
215 |
+void adaptive_merge_impl |
|
216 |
+ ( RandIt first |
|
217 |
+ , typename iterator_traits<RandIt>::size_type len1 |
|
218 |
+ , typename iterator_traits<RandIt>::size_type len2 |
|
219 |
+ , Compare comp |
|
220 |
+ , XBuf & xbuf |
|
221 |
+ ) |
|
222 |
+{ |
|
223 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
224 |
+ |
|
225 |
+ if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ |
|
226 |
+ buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); |
|
227 |
+ } |
|
228 |
+ else{ |
|
229 |
+ const size_type len = len1+len2; |
|
230 |
+ //Calculate ideal parameters and try to collect needed unique keys |
|
231 |
+ size_type l_block = size_type(ceil_sqrt(len)); |
|
232 |
+ |
|
233 |
+ //One range is not big enough to extract keys and the internal buffer so a |
|
234 |
+ //rotation-based based merge will do just fine |
|
235 |
+ if(len1 <= l_block*2 || len2 <= l_block*2){ |
|
236 |
+ merge_bufferless(first, first+len1, first+len1+len2, comp); |
|
237 |
+ return; |
|
238 |
+ } |
|
239 |
+ |
|
240 |
+ //Detail the number of keys and internal buffer. If xbuf has enough memory, no |
|
241 |
+ //internal buffer is needed so l_intbuf will remain 0. |
|
242 |
+ size_type l_intbuf = 0; |
|
243 |
+ size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); |
|
244 |
+ size_type const to_collect = l_intbuf+n_keys; |
|
245 |
+ //Try to extract needed unique values from the first range |
|
246 |
+ size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); |
|
247 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); |
|
248 |
+ |
|
249 |
+ //Not the minimum number of keys is not available on the first range, so fallback to rotations |
|
250 |
+ if(collected != to_collect && collected < 4){ |
|
251 |
+ merge_bufferless(first, first+collected, first+len1, comp); |
|
252 |
+ merge_bufferless(first, first + len1, first + len1 + len2, comp); |
|
253 |
+ return; |
|
254 |
+ } |
|
255 |
+ |
|
256 |
+ //If not enough keys but more than minimum, adjust the internal buffer and key count |
|
257 |
+ bool use_internal_buf = collected == to_collect; |
|
258 |
+ if (!use_internal_buf){ |
|
259 |
+ l_intbuf = 0u; |
|
260 |
+ n_keys = collected; |
|
261 |
+ l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); |
|
262 |
+ //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used |
|
263 |
+ l_intbuf = use_internal_buf ? l_block : 0u; |
|
264 |
+ } |
|
265 |
+ |
|
266 |
+ bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; |
|
267 |
+ //Merge trailing elements using smart merges |
|
268 |
+ adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
|
269 |
+ //Merge buffer and keys with the rest of the values |
|
270 |
+ adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); |
|
271 |
+ } |
|
272 |
+} |
|
273 |
+ |
|
274 |
+} //namespace detail_adaptive { |
|
275 |
+ |
|
276 |
+///@endcond |
|
277 |
+ |
|
278 |
+//! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last) |
|
279 |
+//! into one sorted range [first, last) according to the given comparison function comp. |
|
280 |
+//! The algorithm is stable (if there are equivalent elements in the original two ranges, |
|
281 |
+//! the elements from the first range (preserving their original order) precede the elements |
|
282 |
+//! from the second range (preserving their original order). |
|
283 |
+//! |
|
284 |
+//! <b>Requires</b>: |
|
285 |
+//! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator. |
|
286 |
+//! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible. |
|
287 |
+//! |
|
288 |
+//! <b>Parameters</b>: |
|
289 |
+//! - first: the beginning of the first sorted range. |
|
290 |
+//! - middle: the end of the first sorted range and the beginning of the second |
|
291 |
+//! - last: the end of the second sorted range |
|
292 |
+//! - comp: comparison function object which returns true if the first argument is is ordered before the second. |
|
293 |
+//! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len" |
|
294 |
+//! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len |
|
295 |
+//! is min(std::distance(first, middle), std::distance(middle, last)). |
|
296 |
+//! |
|
297 |
+//! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type |
|
298 |
+//! of dereferenced RandIt throws. |
|
299 |
+//! |
|
300 |
+//! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps. |
|
301 |
+//! Constant factor for comparisons and data movement is minimized when uninitialized_len |
|
302 |
+//! is min(std::distance(first, middle), std::distance(middle, last)). |
|
303 |
+//! Pretty good enough performance is achieved when uninitialized_len is |
|
304 |
+//! ceil(sqrt(std::distance(first, last)))*2. |
|
305 |
+//! |
|
306 |
+//! <b>Caution</b>: Experimental implementation, not production-ready. |
|
307 |
+template<class RandIt, class Compare> |
|
308 |
+void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp |
|
309 |
+ , typename iterator_traits<RandIt>::value_type* uninitialized = 0 |
|
310 |
+ , typename iterator_traits<RandIt>::size_type uninitialized_len = 0) |
|
311 |
+{ |
|
312 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
313 |
+ typedef typename iterator_traits<RandIt>::value_type value_type; |
|
314 |
+ |
|
315 |
+ if (first == middle || middle == last){ |
|
316 |
+ return; |
|
317 |
+ } |
|
318 |
+ |
|
319 |
+ //Reduce ranges to merge if possible |
|
320 |
+ do { |
|
321 |
+ if (comp(*middle, *first)){ |
|
322 |
+ break; |
|
323 |
+ } |
|
324 |
+ ++first; |
|
325 |
+ if (first == middle) |
|
326 |
+ return; |
|
327 |
+ } while(1); |
|
328 |
+ |
|
329 |
+ RandIt first_high(middle); |
|
330 |
+ --first_high; |
|
331 |
+ do { |
|
332 |
+ --last; |
|
333 |
+ if (comp(*last, *first_high)){ |
|
334 |
+ ++last; |
|
335 |
+ break; |
|
336 |
+ } |
|
337 |
+ if (last == middle) |
|
338 |
+ return; |
|
339 |
+ } while(1); |
|
340 |
+ |
|
341 |
+ ::boost::movelib::adaptive_xbuf<value_type, value_type*, size_type> xbuf(uninitialized, size_type(uninitialized_len)); |
|
342 |
+ ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf); |
|
343 |
+} |
|
344 |
+ |
|
345 |
+} //namespace movelib { |
|
346 |
+} //namespace boost { |
|
347 |
+ |
|
348 |
+#include <boost/move/detail/config_end.hpp> |
|
349 |
+ |
|
350 |
+#endif //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP |
0 | 351 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,638 @@ |
1 |
+////////////////////////////////////////////////////////////////////////////// |
|
2 |
+// |
|
3 |
+// (C) Copyright Ion Gaztanaga 2015-2016. |
|
4 |
+// Distributed under the Boost Software License, Version 1.0. |
|
5 |
+// (See accompanying file LICENSE_1_0.txt or copy at |
|
6 |
+// http://www.boost.org/LICENSE_1_0.txt) |
|
7 |
+// |
|
8 |
+// See http://www.boost.org/libs/move for documentation. |
|
9 |
+// |
|
10 |
+////////////////////////////////////////////////////////////////////////////// |
|
11 |
+ |
|
12 |
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP |
|
13 |
+#define BOOST_MOVE_ADAPTIVE_SORT_HPP |
|
14 |
+ |
|
15 |
+#include <boost/move/detail/config_begin.hpp> |
|
16 |
+#include <boost/move/algo/detail/adaptive_sort_merge.hpp> |
|
17 |
+#include <boost/core/ignore_unused.hpp> |
|
18 |
+ |
|
19 |
+namespace boost { |
|
20 |
+namespace movelib { |
|
21 |
+ |
|
22 |
+///@cond |
|
23 |
+namespace detail_adaptive { |
|
24 |
+ |
|
25 |
+template<class RandIt> |
|
26 |
+void move_data_backward( RandIt cur_pos |
|
27 |
+ , typename iterator_traits<RandIt>::size_type const l_data |
|
28 |
+ , RandIt new_pos |
|
29 |
+ , bool const xbuf_used) |
|
30 |
+{ |
|
31 |
+ //Move buffer to the total combination right |
|
32 |
+ if(xbuf_used){ |
|
33 |
+ boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); |
|
34 |
+ } |
|
35 |
+ else{ |
|
36 |
+ boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); |
|
37 |
+ //Rotate does less moves but it seems slower due to cache issues |
|
38 |
+ //rotate_gcd(first-l_block, first+len-l_block, first+len); |
|
39 |
+ } |
|
40 |
+} |
|
41 |
+ |
|
42 |
+template<class RandIt> |
|
43 |
+void move_data_forward( RandIt cur_pos |
|
44 |
+ , typename iterator_traits<RandIt>::size_type const l_data |
|
45 |
+ , RandIt new_pos |
|
46 |
+ , bool const xbuf_used) |
|
47 |
+{ |
|
48 |
+ //Move buffer to the total combination right |
|
49 |
+ if(xbuf_used){ |
|
50 |
+ boost::move(cur_pos, cur_pos+l_data, new_pos); |
|
51 |
+ } |
|
52 |
+ else{ |
|
53 |
+ boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); |
|
54 |
+ //Rotate does less moves but it seems slower due to cache issues |
|
55 |
+ //rotate_gcd(first-l_block, first+len-l_block, first+len); |
|
56 |
+ } |
|
57 |
+} |
|
58 |
+ |
|
59 |
+// build blocks of length 2*l_build_buf. l_build_buf is power of two |
|
60 |
+// input: [0, l_build_buf) elements are buffer, rest unsorted elements |
|
61 |
+// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted |
|
62 |
+// |
|
63 |
+// First elements are merged from right to left until elements start |
|
64 |
+// at first. All old elements [first, first + l_build_buf) are placed at the end |
|
65 |
+// [first+len-l_build_buf, first+len). To achieve this: |
|
66 |
+// - If we have external memory to merge, we save elements from the buffer |
|
67 |
+// so that a non-swapping merge is used. Buffer elements are restored |
|
68 |
+// at the end of the buffer from the external memory. |
|
69 |
+// |
|
70 |
+// - When the external memory is not available or it is insufficient |
|
71 |
+// for a merge operation, left swap merging is used. |
|
72 |
+// |
|
73 |
+// Once elements are merged left to right in blocks of l_build_buf, then a single left |
|
74 |
+// to right merge step is performed to achieve merged blocks of size 2K. |
|
75 |
+// If external memory is available, usual merge is used, swap merging otherwise. |
|
76 |
+// |
|
77 |
+// As a last step, if auxiliary memory is available in-place merge is performed. |
|
78 |
+// until all is merged or auxiliary memory is not large enough. |
|
79 |
+template<class RandIt, class Compare, class XBuf> |
|
80 |
+typename iterator_traits<RandIt>::size_type |
|
81 |
+ adaptive_sort_build_blocks |
|
82 |
+ ( RandIt const first |
|
83 |
+ , typename iterator_traits<RandIt>::size_type const len |
|
84 |
+ , typename iterator_traits<RandIt>::size_type const l_base |
|
85 |
+ , typename iterator_traits<RandIt>::size_type const l_build_buf |
|
86 |
+ , XBuf & xbuf |
|
87 |
+ , Compare comp) |
|
88 |
+{ |
|
89 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
90 |
+ BOOST_ASSERT(l_build_buf <= len); |
|
91 |
+ BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); |
|
92 |
+ |
|
93 |
+ //Place the start pointer after the buffer |
|
94 |
+ RandIt first_block = first + l_build_buf; |
|
95 |
+ size_type const elements_in_blocks = len - l_build_buf; |
|
96 |
+ |
|
97 |
+ ////////////////////////////////// |
|
98 |
+ // Start of merge to left step |
|
99 |
+ ////////////////////////////////// |
|
100 |
+ size_type l_merged = 0u; |
|
101 |
+ |
|
102 |
+ BOOST_ASSERT(l_build_buf); |
|
103 |
+ //If there is no enough buffer for the insertion sort step, just avoid the external buffer |
|
104 |
+ size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); |
|
105 |
+ kbuf = kbuf < l_base ? 0 : kbuf; |
|
106 |
+ |
|
107 |
+ if(kbuf){ |
|
108 |
+ //Backup internal buffer values in external buffer so they can be overwritten |
|
109 |
+ xbuf.move_assign(first+l_build_buf-kbuf, kbuf); |
|
110 |
+ l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); |
|
111 |
+ |
|
112 |
+ //Now combine them using the buffer. Elements from buffer can be |
|
113 |
+ //overwritten since they've been saved to xbuf |
|
114 |
+ l_merged = op_merge_left_step_multiple |
|
115 |
+ ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); |
|
116 |
+ |
|
117 |
+ //Restore internal buffer from external buffer unless kbuf was l_build_buf, |
|
118 |
+ //in that case restoration will happen later |
|
119 |
+ if(kbuf != l_build_buf){ |
|
120 |
+ boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); |
|
121 |
+ } |
|
122 |
+ } |
|
123 |
+ else{ |
|
124 |
+ l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); |
|
125 |
+ rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); |
|
126 |
+ } |
|
127 |
+ |
|
128 |
+ //Now combine elements using the buffer. Elements from buffer can't be |
|
129 |
+ //overwritten since xbuf was not big enough, so merge swapping elements. |
|
130 |
+ l_merged = op_merge_left_step_multiple |
|
131 |
+ (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); |
|
132 |
+ |
|
133 |
+ BOOST_ASSERT(l_merged == l_build_buf); |
|
134 |
+ |
|
135 |
+ ////////////////////////////////// |
|
136 |
+ // Start of merge to right step |
|
137 |
+ ////////////////////////////////// |
|
138 |
+ |
|
139 |
+ //If kbuf is l_build_buf then we can merge right without swapping |
|
140 |
+ //Saved data is still in xbuf |
|
141 |
+ if(kbuf && kbuf == l_build_buf){ |
|
142 |
+ op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); |
|
143 |
+ //Restore internal buffer from external buffer if kbuf was l_build_buf. |
|
144 |
+ //as this operation was previously delayed. |
|
145 |
+ boost::move(xbuf.data(), xbuf.data() + kbuf, first); |
|
146 |
+ } |
|
147 |
+ else{ |
|
148 |
+ op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); |
|
149 |
+ } |
|
150 |
+ xbuf.clear(); |
|
151 |
+ //2*l_build_buf or total already merged |
|
152 |
+ return min_value<size_type>(elements_in_blocks, 2*l_build_buf); |
|
153 |
+} |
|
154 |
+ |
|
155 |
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> |
|
156 |
+void adaptive_sort_combine_blocks |
|
157 |
+ ( RandItKeys const keys |
|
158 |
+ , KeyCompare key_comp |
|
159 |
+ , RandIt const first |
|
160 |
+ , typename iterator_traits<RandIt>::size_type const len |
|
161 |
+ , typename iterator_traits<RandIt>::size_type const l_prev_merged |
|
162 |
+ , typename iterator_traits<RandIt>::size_type const l_block |
|
163 |
+ , bool const use_buf |
|
164 |
+ , bool const xbuf_used |
|
165 |
+ , XBuf & xbuf |
|
166 |
+ , Compare comp |
|
167 |
+ , bool merge_left) |
|
168 |
+{ |
|
169 |
+ boost::ignore_unused(xbuf); |
|
170 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
171 |
+ |
|
172 |
+ size_type const l_reg_combined = 2*l_prev_merged; |
|
173 |
+ size_type l_irreg_combined = 0; |
|
174 |
+ size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); |
|
175 |
+ size_type const n_reg_combined = len/l_reg_combined; |
|
176 |
+ RandIt combined_first = first; |
|
177 |
+ |
|
178 |
+ boost::ignore_unused(l_total_combined); |
|
179 |
+ BOOST_ASSERT(l_total_combined <= len); |
|
180 |
+ |
|
181 |
+ size_type const max_i = n_reg_combined + (l_irreg_combined != 0); |
|
182 |
+ |
|
183 |
+ if(merge_left || !use_buf) { |
|
184 |
+ for( size_type combined_i = 0; combined_i != max_i; ) { |
|
185 |
+ //Now merge blocks |
|
186 |
+ bool const is_last = combined_i==n_reg_combined; |
|
187 |
+ size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; |
|
188 |
+ |
|
189 |
+ range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); |
|
190 |
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
|
191 |
+ combine_params( keys, key_comp, l_cur_combined |
|
192 |
+ , l_prev_merged, l_block, rbuf |
|
193 |
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
|
194 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); |
|
195 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); |
|
196 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); |
|
197 |
+ if(!use_buf){ |
|
198 |
+ merge_blocks_bufferless |
|
199 |
+ (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); |
|
200 |
+ } |
|
201 |
+ else{ |
|
202 |
+ merge_blocks_left |
|
203 |
+ (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); |
|
204 |
+ } |
|
205 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block); |
|
206 |
+ ++combined_i; |
|
207 |
+ if(combined_i != max_i) |
|
208 |
+ combined_first += l_reg_combined; |
|
209 |
+ } |
|
210 |
+ } |
|
211 |
+ else{ |
|
212 |
+ combined_first += l_reg_combined*(max_i-1); |
|
213 |
+ for( size_type combined_i = max_i; combined_i; ) { |
|
214 |
+ --combined_i; |
|
215 |
+ bool const is_last = combined_i==n_reg_combined; |
|
216 |
+ size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; |
|
217 |
+ |
|
218 |
+ RandIt const combined_last(combined_first+l_cur_combined); |
|
219 |
+ range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); |
|
220 |
+ size_type n_block_a, n_block_b, l_irreg1, l_irreg2; |
|
221 |
+ combine_params( keys, key_comp, l_cur_combined |
|
222 |
+ , l_prev_merged, l_block, rbuf |
|
223 |
+ , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs |
|
224 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); |
|
225 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); |
|
226 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); |
|
227 |
+ merge_blocks_right |
|
228 |
+ (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); |
|
229 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block); |
|
230 |
+ if(combined_i) |
|
231 |
+ combined_first -= l_reg_combined; |
|
232 |
+ } |
|
233 |
+ } |
|
234 |
+} |
|
235 |
+ |
|
236 |
+//Returns true if buffer is placed in |
|
237 |
+//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is |
|
238 |
+//[buffer,buffer+l_intbuf) |
|
239 |
+template<class RandIt, class Compare, class XBuf> |
|
240 |
+bool adaptive_sort_combine_all_blocks |
|
241 |
+ ( RandIt keys |
|
242 |
+ , typename iterator_traits<RandIt>::size_type &n_keys |
|
243 |
+ , RandIt const buffer |
|
244 |
+ , typename iterator_traits<RandIt>::size_type const l_buf_plus_data |
|
245 |
+ , typename iterator_traits<RandIt>::size_type l_merged |
|
246 |
+ , typename iterator_traits<RandIt>::size_type &l_intbuf |
|
247 |
+ , XBuf & xbuf |
|
248 |
+ , Compare comp) |
|
249 |
+{ |
|
250 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
251 |
+ RandIt const first = buffer + l_intbuf; |
|
252 |
+ size_type const l_data = l_buf_plus_data - l_intbuf; |
|
253 |
+ size_type const l_unique = l_intbuf+n_keys; |
|
254 |
+ //Backup data to external buffer once if possible |
|
255 |
+ bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); |
|
256 |
+ if(common_xbuf){ |
|
257 |
+ xbuf.move_assign(buffer, l_intbuf); |
|
258 |
+ } |
|
259 |
+ |
|
260 |
+ bool prev_merge_left = true; |
|
261 |
+ size_type l_prev_total_combined = l_merged, l_prev_block = 0; |
|
262 |
+ bool prev_use_internal_buf = true; |
|
263 |
+ |
|
264 |
+ for( size_type n = 0; l_data > l_merged |
|
265 |
+ ; l_merged*=2 |
|
266 |
+ , ++n){ |
|
267 |
+ //If l_intbuf is non-zero, use that internal buffer. |
|
268 |
+ // Implies l_block == l_intbuf && use_internal_buf == true |
|
269 |
+ //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, |
|
270 |
+ // Implies l_block == n_keys/2 && use_internal_buf == true |
|
271 |
+ //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) |
|
272 |
+ bool use_internal_buf = false; |
|
273 |
+ size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf); |
|
274 |
+ BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); |
|
275 |
+ BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); |
|
276 |
+ BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); |
|
277 |
+ |
|
278 |
+ bool const is_merge_left = (n&1) == 0; |
|
279 |
+ size_type const l_total_combined = calculate_total_combined(l_data, l_merged); |
|
280 |
+ if(n && prev_use_internal_buf && prev_merge_left){ |
|
281 |
+ if(is_merge_left || !use_internal_buf){ |
|
282 |
+ move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); |
|
283 |
+ } |
|
284 |
+ else{ |
|
285 |
+ //Put the buffer just after l_total_combined |
|
286 |
+ RandIt const buf_end = first+l_prev_total_combined; |
|
287 |
+ RandIt const buf_beg = buf_end-l_block; |
|
288 |
+ if(l_prev_total_combined > l_total_combined){ |
|
289 |
+ size_type const l_diff = l_prev_total_combined - l_total_combined; |
|
290 |
+ move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); |
|
291 |
+ } |
|
292 |
+ else if(l_prev_total_combined < l_total_combined){ |
|
293 |
+ size_type const l_diff = l_total_combined - l_prev_total_combined; |
|
294 |
+ move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); |
|
295 |
+ } |
|
296 |
+ } |
|
297 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf); |
|
298 |
+ } |
|
299 |
+ |
|
300 |
+ //Combine to form l_merged*2 segments |
|
301 |
+ if(n_keys){ |
|
302 |
+ size_type upper_n_keys_this_iter = 2*l_merged/l_block; |
|
303 |
+ if(upper_n_keys_this_iter > 256){ |
|
304 |
+ adaptive_sort_combine_blocks |
|
305 |
+ ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block |
|
306 |
+ , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); |
|
307 |
+ } |
|
308 |
+ else{ |
|
309 |
+ unsigned char uint_keys[256]; |
|
310 |
+ adaptive_sort_combine_blocks |
|
311 |
+ ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block |
|
312 |
+ , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); |
|
313 |
+ } |
|
314 |
+ } |
|
315 |
+ else{ |
|
316 |
+ size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); |
|
317 |
+ adaptive_sort_combine_blocks |
|
318 |
+ ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block |
|
319 |
+ , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); |
|
320 |
+ } |
|
321 |
+ |
|
322 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf); |
|
323 |
+ prev_merge_left = is_merge_left; |
|
324 |
+ l_prev_total_combined = l_total_combined; |
|
325 |
+ l_prev_block = l_block; |
|
326 |
+ prev_use_internal_buf = use_internal_buf; |
|
327 |
+ } |
|
328 |
+ BOOST_ASSERT(l_prev_total_combined == l_data); |
|
329 |
+ bool const buffer_right = prev_use_internal_buf && prev_merge_left; |
|
330 |
+ |
|
331 |
+ l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; |
|
332 |
+ n_keys = l_unique - l_intbuf; |
|
333 |
+ //Restore data from to external common buffer if used |
|
334 |
+ if(common_xbuf){ |
|
335 |
+ if(buffer_right){ |
|
336 |
+ boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); |
|
337 |
+ } |
|
338 |
+ else{ |
|
339 |
+ boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); |
|
340 |
+ } |
|
341 |
+ } |
|
342 |
+ return buffer_right; |
|
343 |
+} |
|
344 |
+ |
|
345 |
+ |
|
346 |
+template<class RandIt, class Compare, class XBuf> |
|
347 |
+void adaptive_sort_final_merge( bool buffer_right |
|
348 |
+ , RandIt const first |
|
349 |
+ , typename iterator_traits<RandIt>::size_type const l_intbuf |
|
350 |
+ , typename iterator_traits<RandIt>::size_type const n_keys |
|
351 |
+ , typename iterator_traits<RandIt>::size_type const len |
|
352 |
+ , XBuf & xbuf |
|
353 |
+ , Compare comp) |
|
354 |
+{ |
|
355 |
+ //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); |
|
356 |
+ xbuf.clear(); |
|
357 |
+ |
|
358 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
359 |
+ size_type const n_key_plus_buf = l_intbuf+n_keys; |
|
360 |
+ if(buffer_right){ |
|
361 |
+ //Use stable sort as some buffer elements might not be unique (see non_unique_buf) |
|
362 |
+ stable_sort(first+len-l_intbuf, first+len, comp, xbuf); |
|
363 |
+ stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); |
|
364 |
+ unstable_sort(first, first+n_keys, comp, xbuf); |
|
365 |
+ stable_merge(first, first+n_keys, first+len, comp, xbuf); |
|
366 |
+ } |
|
367 |
+ else{ |
|
368 |
+ //Use stable sort as some buffer elements might not be unique (see non_unique_buf) |
|
369 |
+ stable_sort(first, first+n_key_plus_buf, comp, xbuf); |
|
370 |
+ if(xbuf.capacity() >= n_key_plus_buf){ |
|
371 |
+ buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); |
|
372 |
+ } |
|
373 |
+ else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ |
|
374 |
+ stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); |
|
375 |
+ stable_merge(first, first+n_keys, first+len, comp, xbuf); |
|
376 |
+ } |
|
377 |
+ else{ |
|
378 |
+ stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); |
|
379 |
+ } |
|
380 |
+ } |
|
381 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len); |
|
382 |
+} |
|
383 |
+ |
|
384 |
+template<class RandIt, class Compare, class Unsigned, class XBuf> |
|
385 |
+bool adaptive_sort_build_params |
|
386 |
+ (RandIt first, Unsigned const len, Compare comp |
|
387 |
+ , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf |
|
388 |
+ , XBuf & xbuf |
|
389 |
+ ) |
|
390 |
+{ |
|
391 |
+ typedef Unsigned size_type; |
|
392 |
+ |
|
393 |
+ //Calculate ideal parameters and try to collect needed unique keys |
|
394 |
+ l_base = 0u; |
|
395 |
+ |
|
396 |
+ //Try to find a value near sqrt(len) that is 2^N*l_base where |
|
397 |
+ //l_base <= AdaptiveSortInsertionSortThreshold. This property is important |
|
398 |
+ //as build_blocks merges to the left iteratively duplicating the |
|
399 |
+ //merged size and all the buffer must be used just before the final |
|
400 |
+ //merge to right step. This guarantees "build_blocks" produces |
|
401 |
+ //segments of size l_build_buf*2, maximizing the classic merge phase. |
|
402 |
+ l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); |
|
403 |
+ |
|
404 |
+ //The internal buffer can be expanded if there is enough external memory |
|
405 |
+ while(xbuf.capacity() >= l_intbuf*2){ |
|
406 |
+ l_intbuf *= 2; |
|
407 |
+ } |
|
408 |
+ |
|
409 |
+ //This is the minimum number of keys to implement the ideal algorithm |
|
410 |
+ // |
|
411 |
+ //l_intbuf is used as buffer plus the key count |
|
412 |
+ size_type n_min_ideal_keys = l_intbuf-1; |
|
413 |
+ while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ |
|
414 |
+ --n_min_ideal_keys; |
|
415 |
+ } |
|
416 |
+ n_min_ideal_keys += 1; |
|
417 |
+ BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); |
|
418 |
+ |
|
419 |
+ if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ |
|
420 |
+ n_keys = 0u; |
|
421 |
+ l_build_buf = l_intbuf; |
|
422 |
+ } |
|
423 |
+ else{ |
|
424 |
+ //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that |
|
425 |
+ //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half |
|
426 |
+ //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. |
|
427 |
+ // |
|
428 |
+ //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, |
|
429 |
+ //(to be used for keys in combine_all_blocks) as the whole l_build_buf |
|
430 |
+ //will be backuped in the buffer during build_blocks. |
|
431 |
+ bool const non_unique_buf = xbuf.capacity() >= l_intbuf; |
|
432 |
+ size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; |
|
433 |
+ size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); |
|
434 |
+ |
|
435 |
+ //If available memory is 2*sqrt(l), then for "build_params" |
|
436 |
+ //the situation is the same as if 2*l_intbuf were collected. |
|
437 |
+ if(non_unique_buf && collected == n_min_ideal_keys){ |
|
438 |
+ l_build_buf = l_intbuf; |
|
439 |
+ n_keys = n_min_ideal_keys; |
|
440 |
+ } |
|
441 |
+ else if(collected == 2*l_intbuf){ |
|
442 |
+ //l_intbuf*2 elements found. Use all of them in the build phase |
|
443 |
+ l_build_buf = l_intbuf*2; |
|
444 |
+ n_keys = l_intbuf; |
|
445 |
+ } |
|
446 |
+ else if(collected == (n_min_ideal_keys+l_intbuf)){ |
|
447 |
+ l_build_buf = l_intbuf; |
|
448 |
+ n_keys = n_min_ideal_keys; |
|
449 |
+ } |
|
450 |
+ //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix |
|
451 |
+ //is possible (due to very low unique keys), then go to a slow sort based on rotations. |
|
452 |
+ else{ |
|
453 |
+ BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); |
|
454 |
+ if(collected < 4){ //No combination possible with less that 4 keys |
|
455 |
+ return false; |
|
456 |
+ } |
|
457 |
+ n_keys = l_intbuf; |
|
458 |
+ while(n_keys&(n_keys-1)){ |
|
459 |
+ n_keys &= n_keys-1; // make it power or 2 |
|
460 |
+ } |
|
461 |
+ while(n_keys > collected){ |
|
462 |
+ n_keys/=2; |
|
463 |
+ } |
|
464 |
+ //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two |
|
465 |
+ l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); |
|
466 |
+ l_intbuf = 0; |
|
467 |
+ l_build_buf = n_keys; |
|
468 |
+ } |
|
469 |
+ BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); |
|
470 |
+ } |
|
471 |
+ |
|
472 |
+ return true; |
|
473 |
+} |
|
474 |
+ |
|
475 |
+// Main explanation of the sort algorithm. |
|
476 |
+// |
|
477 |
+// csqrtlen = ceil(sqrt(len)); |
|
478 |
+// |
|
479 |
+// * First, 2*csqrtlen unique elements elements are extracted from elements to be |
|
480 |
+// sorted and placed in the beginning of the range. |
|
481 |
+// |
|
482 |
+// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements |
|
483 |
+// will be used as auxiliary memory, so trailing len-2*csqrtlen elements are |
|
484 |
+// are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step |
|
485 |
+// 2*csqrtlen unique elements are again the leading elements of the whole range. |
|
486 |
+// |
|
487 |
+// * Step "combine_blocks": pairs of previously formed blocks are merged with a different |
|
488 |
+// ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the |
|
489 |
+// "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen |
|
490 |
+// elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. |
|
491 |
+// |
|
492 |
+// In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to |
|
493 |
+// know if elements belong to the first or second block to be merged and another |
|
494 |
+// leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: |
|
495 |
+// |
|
496 |
+// Iteratively until all trailing (len-2*csqrtlen) elements are merged: |
|
497 |
+// Iteratively for each pair of previously merged block: |
|
498 |
+// * Blocks are divided groups of csqrtlen elements and |
|
499 |
+// 2*merged_block/csqrtlen keys are sorted to be used as markers |
|
500 |
+// * Groups are selection-sorted by first or last element (depending whether they are going |
|
501 |
+// to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. |
|
502 |
+// * Elements of each block pair are merged using the csqrtlen buffer taking into account |
|
503 |
+// if they belong to the first half or second half (marked by the key). |
|
504 |
+// |
|
505 |
+// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with |
|
506 |
+// rotations with the rest of sorted elements in the "combine_blocks" step. |
|
507 |
+// |
|
508 |
+// Corner cases: |
|
509 |
+// |
|
510 |
+// * If no 2*csqrtlen elements can be extracted: |
|
511 |
+// |
|
512 |
+// * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used |
|
513 |
+// as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This |
|
514 |
+// means that an additional "combine_blocks" step will be needed to merge all elements. |
|
515 |
+// |
|
516 |
+// * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, |
|
517 |
+// then reduces the number of elements used as buffer and keys in the "build_blocks" |
|
518 |
+// and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction |
|
519 |
+// then uses a rotation based smart merge. |
|
520 |
+// |
|
521 |
+// * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. |
|
522 |
+// |
|
523 |
+// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. |
|
524 |
+// |
|
525 |
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), |
|
526 |
+// then only csqrtlen elements need to be extracted and "combine_blocks" will use integral |
|
527 |
+// keys to combine blocks. |
|
528 |
+// |
|
529 |
+// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks |
|
530 |
+// using classic merge and "combine_blocks" will use bigger blocks when merging. |
|
531 |
+template<class RandIt, class Compare, class XBuf> |
|
532 |
+void adaptive_sort_impl |
|
533 |
+ ( RandIt first |
|
534 |
+ , typename iterator_traits<RandIt>::size_type const len |
|
535 |
+ , Compare comp |
|
536 |
+ , XBuf & xbuf |
|
537 |
+ ) |
|
538 |
+{ |
|
539 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
540 |
+ |
|
541 |
+ //Small sorts go directly to insertion sort |
|
542 |
+ if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ |
|
543 |
+ insertion_sort(first, first + len, comp); |
|
544 |
+ } |
|
545 |
+ else if((len-len/2) <= xbuf.capacity()){ |
|
546 |
+ merge_sort(first, first+len, comp, xbuf.data()); |
|
547 |
+ } |
|
548 |
+ else{ |
|
549 |
+ //Make sure it is at least four |
|
550 |
+ BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); |
|
551 |
+ |
|
552 |
+ size_type l_base = 0; |
|
553 |
+ size_type l_intbuf = 0; |
|
554 |
+ size_type n_keys = 0; |
|
555 |
+ size_type l_build_buf = 0; |
|
556 |
+ |
|
557 |
+ //Calculate and extract needed unique elements. If a minimum is not achieved |
|
558 |
+ //fallback to a slow stable sort |
|
559 |
+ if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ |
|
560 |
+ stable_sort(first, first+len, comp, xbuf); |
|
561 |
+ } |
|
562 |
+ else{ |
|
563 |
+ BOOST_ASSERT(l_build_buf); |
|
564 |
+ //Otherwise, continue the adaptive_sort |
|
565 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len); |
|
566 |
+ size_type const n_key_plus_buf = l_intbuf+n_keys; |
|
567 |
+ //l_build_buf is always power of two if l_intbuf is zero |
|
568 |
+ BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); |
|
569 |
+ |
|
570 |
+ //Classic merge sort until internal buffer and xbuf are exhausted |
|
571 |
+ size_type const l_merged = adaptive_sort_build_blocks |
|
572 |
+ (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); |
|
573 |
+ BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len); |
|
574 |
+ |
|
575 |
+ //Non-trivial merge |
|
576 |
+ bool const buffer_right = adaptive_sort_combine_all_blocks |
|
577 |
+ (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); |
|
578 |
+ |
|
579 |
+ //Sort keys and buffer and merge the whole sequence |
|
580 |
+ adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); |
|
581 |
+ } |
|
582 |
+ } |
|
583 |
+} |
|
584 |
+ |
|
585 |
+} //namespace detail_adaptive { |
|
586 |
+ |
|
587 |
+///@endcond |
|
588 |
+ |
|
589 |
+//! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according |
|
590 |
+//! to comparison functor "comp". The sort is stable (order of equal elements |
|
591 |
+//! is guaranteed to be preserved). Performance is improved if additional raw storage is |
|
592 |
+//! provided. |
|
593 |
+//! |
|
594 |
+//! <b>Requires</b>: |
|
595 |
+//! - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator. |
|
596 |
+//! - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible. |
|
597 |
+//! |
|
598 |
+//! <b>Parameters</b>: |
|
599 |
+//! - first, last: the range of elements to sort |
|
600 |
+//! - comp: comparison function object which returns true if the first argument is is ordered before the second. |
|
601 |
+//! - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len" |
|
602 |
+//! elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len |
|
603 |
+//! is ceil(std::distance(first, last)/2). |
|
604 |
+//! |
|
605 |
+//! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type |
|
606 |
+//! of dereferenced RandIt throws. |
|
607 |
+//! |
|
608 |
+//! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps. |
|
609 |
+//! Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized |
|
610 |
+//! when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when |
|
611 |
+//! ceil(sqrt(std::distance(first, last)))*2. |
|
612 |
+//! |
|
613 |
+//! <b>Caution</b>: Experimental implementation, not production-ready. |
|
614 |
+template<class RandIt, class RandRawIt, class Compare> |
|
615 |
+void adaptive_sort( RandIt first, RandIt last, Compare comp |
|
616 |
+ , RandRawIt uninitialized |
|
617 |
+ , typename iterator_traits<RandIt>::size_type uninitialized_len) |
|
618 |
+{ |
|
619 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
620 |
+ typedef typename iterator_traits<RandIt>::value_type value_type; |
|
621 |
+ |
|
622 |
+ ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len); |
|
623 |
+ ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf); |
|
624 |
+} |
|
625 |
+ |
|
626 |
+template<class RandIt, class Compare> |
|
627 |
+void adaptive_sort( RandIt first, RandIt last, Compare comp) |
|
628 |
+{ |
|
629 |
+ typedef typename iterator_traits<RandIt>::value_type value_type; |
|
630 |
+ adaptive_sort(first, last, comp, (value_type*)0, 0u); |
|
631 |
+} |
|
632 |
+ |
|
633 |
+} //namespace movelib { |
|
634 |
+} //namespace boost { |
|
635 |
+ |
|
636 |
+#include <boost/move/detail/config_end.hpp> |
|
637 |
+ |
|
638 |
+#endif //#define BOOST_MOVE_ADAPTIVE_SORT_HPP |
0 | 639 |
new file mode 100755 |
... | ... |
@@ -0,0 +1,1492 @@ |
1 |
+////////////////////////////////////////////////////////////////////////////// |
|
2 |
+// |
|
3 |
+// (C) Copyright Ion Gaztanaga 2015-2016. |
|
4 |
+// Distributed under the Boost Software License, Version 1.0. |
|
5 |
+// (See accompanying file LICENSE_1_0.txt or copy at |
|
6 |
+// http://www.boost.org/LICENSE_1_0.txt) |
|
7 |
+// |
|
8 |
+// See http://www.boost.org/libs/move for documentation. |
|
9 |
+// |
|
10 |
+////////////////////////////////////////////////////////////////////////////// |
|
11 |
+// |
|
12 |
+// Stable sorting that works in O(N*log(N)) worst time |
|
13 |
+// and uses O(1) extra memory |
|
14 |
+// |
|
15 |
+////////////////////////////////////////////////////////////////////////////// |
|
16 |
+// |
|
17 |
+// The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin |
|
18 |
+// and explained in the article from the russian collaborative blog |
|
19 |
+// Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on |
|
20 |
+// ideas from B-C. Huang and M. A. Langston explained in their article |
|
21 |
+// "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)" |
|
22 |
+// (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf). |
|
23 |
+// |
|
24 |
+// This implementation by Ion Gaztanaga uses previous ideas with additional changes: |
|
25 |
+// |
|
26 |
+// - Use of GCD-based rotation. |
|
27 |
+// - Non power of two buffer-sizes. |
|
28 |
+// - Tries to find sqrt(len)*2 unique keys, so that the merge sort |
|
29 |
+// phase can form up to sqrt(len)*4 segments if enough keys are found. |
|
30 |
+// - The merge-sort phase can take advantage of external memory to |
|
31 |
+// save some additional combination steps. |
|
32 |
+// - Combination phase: Blocks are selection sorted and merged in parallel. |
|
33 |
+// - The combination phase is performed alternating merge to left and merge |
|
34 |
+// to right phases minimizing swaps due to internal buffer repositioning. |
|
35 |
+// - When merging blocks special optimizations are made to avoid moving some |
|
36 |
+// elements twice. |
|
37 |
+// |
|
38 |
+// The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts |
|
39 |
+// from the sorting algorithm and implementing an additional block merge algorithm |
|
40 |
+// without moving elements to left or right. |
|
41 |
+////////////////////////////////////////////////////////////////////////////// |
|
42 |
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |
|
43 |
+#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP |
|
44 |
+ |
|
45 |
+#include <boost/move/detail/config_begin.hpp> |
|
46 |
+#include <boost/move/detail/reverse_iterator.hpp> |
|
47 |
+#include <boost/move/algo/move.hpp> |
|
48 |
+#include <boost/move/algo/detail/merge.hpp> |
|
49 |
+#include <boost/move/adl_move_swap.hpp> |
|
50 |
+#include <boost/move/algo/detail/insertion_sort.hpp> |
|
51 |
+#include <boost/move/algo/detail/merge_sort.hpp> |
|
52 |
+#include <boost/move/algo/detail/heap_sort.hpp> |
|
53 |
+#include <boost/move/algo/detail/merge.hpp> |
|
54 |
+#include <boost/move/algo/detail/is_sorted.hpp> |
|
55 |
+#include <boost/core/ignore_unused.hpp> |
|
56 |
+#include <boost/assert.hpp> |
|
57 |
+#include <boost/cstdint.hpp> |
|
58 |
+ |
|
59 |
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL |
|
60 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 1 |
|
61 |
+#endif |
|
62 |
+ |
|
63 |
+#ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS |
|
64 |
+ #if BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL == 2 |
|
65 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \ |
|
66 |
+ print_stats(STR, L)\ |
|
67 |
+ // |
|
68 |
+ |
|
69 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) \ |
|
70 |
+ print_stats(STR, L)\ |
|
71 |
+ // |
|
72 |
+ #else |
|
73 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \ |
|
74 |
+ print_stats(STR, L)\ |
|
75 |
+ // |
|
76 |
+ |
|
77 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) |
|
78 |
+ #endif |
|
79 |
+#else |
|
80 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) |
|
81 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) |
|
82 |
+#endif |
|
83 |
+ |
|
84 |
+#ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS |
|
85 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT BOOST_ASSERT |
|
86 |
+#else |
|
87 |
+ #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L) |
|
88 |
+#endif |
|
89 |
+ |
|
90 |
+namespace boost { |
|
91 |
+namespace movelib { |
|
92 |
+ |
|
93 |
+#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) |
|
94 |
+ |
|
95 |
+bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) |
|
96 |
+{ |
|
97 |
+ if (first != last) { |
|
98 |
+ const order_perf_type *next = first, *cur(first); |
|
99 |
+ while (++next != last) { |
|
100 |
+ if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) |
|
101 |
+ return false; |
|
102 |
+ cur = next; |
|
103 |
+ } |
|
104 |
+ } |
|
105 |
+ return true; |
|
106 |
+} |
|
107 |
+ |
|
108 |
+#endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS |
|
109 |
+ |
|
110 |
+namespace detail_adaptive { |
|
111 |
+ |
|
112 |
+static const std::size_t AdaptiveSortInsertionSortThreshold = 16; |
|
113 |
+//static const std::size_t AdaptiveSortInsertionSortThreshold = 4; |
|
114 |
+BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0); |
|
115 |
+ |
|
116 |
+#if defined BOOST_HAS_INTPTR_T |
|
117 |
+ typedef ::boost::uintptr_t uintptr_t; |
|
118 |
+#else |
|
119 |
+ typedef std::size_t uintptr_t; |
|
120 |
+#endif |
|
121 |
+ |
|
122 |
+template<class T> |
|
123 |
+const T &min_value(const T &a, const T &b) |
|
124 |
+{ |
|
125 |
+ return a < b ? a : b; |
|
126 |
+} |
|
127 |
+ |
|
128 |
+template<class T> |
|
129 |
+const T &max_value(const T &a, const T &b) |
|
130 |
+{ |
|
131 |
+ return a > b ? a : b; |
|
132 |
+} |
|
133 |
+ |
|
134 |
+template<class ForwardIt, class Pred, class V> |
|
135 |
+typename iterator_traits<ForwardIt>::size_type |
|
136 |
+ count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v) |
|
137 |
+{ |
|
138 |
+ typedef typename iterator_traits<ForwardIt>::size_type size_type; |
|
139 |
+ size_type count = 0; |
|
140 |
+ while(first != last) { |
|
141 |
+ count += static_cast<size_type>(0 != pred(*first, v)); |
|
142 |
+ ++first; |
|
143 |
+ } |
|
144 |
+ return count; |
|
145 |
+} |
|
146 |
+ |
|
147 |
+ |
|
148 |
+template<class RandIt, class Compare> |
|
149 |
+RandIt skip_until_merge |
|
150 |
+ ( RandIt first1, RandIt const last1 |
|
151 |
+ , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp) |
|
152 |
+{ |
|
153 |
+ while(first1 != last1 && !comp(next_key, *first1)){ |
|
154 |
+ ++first1; |
|
155 |
+ } |
|
156 |
+ return first1; |
|
157 |
+} |
|
158 |
+ |
|
159 |
+ |
|
160 |
+template<class RandItKeys, class RandIt> |
|
161 |
+void swap_and_update_key |
|
162 |
+ ( RandItKeys const key_next |
|
163 |
+ , RandItKeys const key_range2 |
|
164 |
+ , RandItKeys &key_mid |
|
165 |
+ , RandIt const begin |
|
166 |
+ , RandIt const end |
|
167 |
+ , RandIt const with) |
|
168 |
+{ |
|
169 |
+ if(begin != with){ |
|
170 |
+ ::boost::adl_move_swap_ranges(begin, end, with); |
|
171 |
+ ::boost::adl_move_swap(*key_next, *key_range2); |
|
172 |
+ if(key_next == key_mid){ |
|
173 |
+ key_mid = key_range2; |
|
174 |
+ } |
|
175 |
+ else if(key_mid == key_range2){ |
|
176 |
+ key_mid = key_next; |
|
177 |
+ } |
|
178 |
+ } |
|
179 |
+} |
|
180 |
+ |
|
181 |
+template<class RandItKeys> |
|
182 |
+void update_key |
|
183 |
+(RandItKeys const key_next |
|
184 |
+ , RandItKeys const key_range2 |
|
185 |
+ , RandItKeys &key_mid) |
|
186 |
+{ |
|
187 |
+ if (key_next != key_range2) { |
|
188 |
+ ::boost::adl_move_swap(*key_next, *key_range2); |
|
189 |
+ if (key_next == key_mid) { |
|
190 |
+ key_mid = key_range2; |
|
191 |
+ } |
|
192 |
+ else if (key_mid == key_range2) { |
|
193 |
+ key_mid = key_next; |
|
194 |
+ } |
|
195 |
+ } |
|
196 |
+} |
|
197 |
+ |
|
198 |
+template<class RandItKeys, class RandIt, class RandIt2, class Op> |
|
199 |
+RandIt2 buffer_and_update_key |
|
200 |
+(RandItKeys const key_next |
|
201 |
+ , RandItKeys const key_range2 |
|
202 |
+ , RandItKeys &key_mid |
|
203 |
+ , RandIt begin |
|
204 |
+ , RandIt end |
|
205 |
+ , RandIt with |
|
206 |
+ , RandIt2 buffer |
|
207 |
+ , Op op) |
|
208 |
+{ |
|
209 |
+ if (begin != with) { |
|
210 |
+ while(begin != end) { |
|
211 |
+ op(three_way_t(), begin++, with++, buffer++); |
|
212 |
+ } |
|
213 |
+ ::boost::adl_move_swap(*key_next, *key_range2); |
|
214 |
+ if (key_next == key_mid) { |
|
215 |
+ key_mid = key_range2; |
|
216 |
+ } |
|
217 |
+ else if (key_mid == key_range2) { |
|
218 |
+ key_mid = key_next; |
|
219 |
+ } |
|
220 |
+ } |
|
221 |
+ return buffer; |
|
222 |
+} |
|
223 |
+ |
|
224 |
+/////////////////////////////////////////////////////////////////////////////// |
|
225 |
+// |
|
226 |
+// MERGE BUFFERLESS |
|
227 |
+// |
|
228 |
+/////////////////////////////////////////////////////////////////////////////// |
|
229 |
+ |
|
230 |
+// [first1, last1) merge [last1,last2) -> [first1,last2) |
|
231 |
+template<class RandIt, class Compare> |
|
232 |
+RandIt partial_merge_bufferless_impl |
|
233 |
+ (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) |
|
234 |
+{ |
|
235 |
+ if(last1 == last2){ |
|
236 |
+ return first1; |
|
237 |
+ } |
|
238 |
+ bool const is_range1_A = *pis_range1_A; |
|
239 |
+ if(first1 != last1 && comp(*last1, last1[-1])){ |
|
240 |
+ do{ |
|
241 |
+ RandIt const old_last1 = last1; |
|
242 |
+ last1 = boost::movelib::lower_bound(last1, last2, *first1, comp); |
|
243 |
+ first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported |
|
244 |
+ if(last1 == last2){ |
|
245 |
+ return first1; |
|
246 |
+ } |
|
247 |
+ do{ |
|
248 |
+ ++first1; |
|
249 |
+ } while(last1 != first1 && !comp(*last1, *first1) ); |
|
250 |
+ } while(first1 != last1); |
|
251 |
+ } |
|
252 |
+ *pis_range1_A = !is_range1_A; |
|
253 |
+ return last1; |
|
254 |
+} |
|
255 |
+ |
|
256 |
+// [first1, last1) merge [last1,last2) -> [first1,last2) |
|
257 |
+template<class RandIt, class Compare> |
|
258 |
+RandIt partial_merge_bufferless |
|
259 |
+ (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) |
|
260 |
+{ |
|
261 |
+ return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp) |
|
262 |
+ : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp)); |
|
263 |
+} |
|
264 |
+ |
|
265 |
+template<class SizeType> |
|
266 |
+static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b) |
|
267 |
+{ |
|
268 |
+ return n_block_a + n_block_b; |
|
269 |
+} |
|
270 |
+ |
|
271 |
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
|
272 |
+typename iterator_traits<RandIt>::size_type |
|
273 |
+ find_next_block |
|
274 |
+ ( RandItKeys const key_first |
|
275 |
+ , KeyCompare key_comp |
|
276 |
+ , RandIt const first |
|
277 |
+ , typename iterator_traits<RandIt>::size_type const l_block |
|
278 |
+ , typename iterator_traits<RandIt>::size_type const ix_first_block |
|
279 |
+ , typename iterator_traits<RandIt>::size_type const ix_last_block |
|
280 |
+ , Compare comp) |
|
281 |
+{ |
|
282 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
283 |
+ typedef typename iterator_traits<RandIt>::value_type value_type; |
|
284 |
+ typedef typename iterator_traits<RandItKeys>::value_type key_type; |
|
285 |
+ BOOST_ASSERT(ix_first_block <= ix_last_block); |
|
286 |
+ size_type ix_min_block = 0u; |
|
287 |
+ for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) { |
|
288 |
+ const value_type &min_val = first[ix_min_block*l_block]; |
|
289 |
+ const value_type &cur_val = first[szt_i*l_block]; |
|
290 |
+ const key_type &min_key = key_first[ix_min_block]; |
|
291 |
+ const key_type &cur_key = key_first[szt_i]; |
|
292 |
+ |
|
293 |
+ bool const less_than_minimum = comp(cur_val, min_val) || |
|
294 |
+ (!comp(min_val, cur_val) && key_comp(cur_key, min_key)); |
|
295 |
+ |
|
296 |
+ if (less_than_minimum) { |
|
297 |
+ ix_min_block = szt_i; |
|
298 |
+ } |
|
299 |
+ } |
|
300 |
+ return ix_min_block; |
|
301 |
+} |
|
302 |
+ |
|
303 |
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare> |
|
304 |
+void merge_blocks_bufferless |
|
305 |
+ ( RandItKeys const key_first |
|
306 |
+ , KeyCompare key_comp |
|
307 |
+ , RandIt const first |
|
308 |
+ , typename iterator_traits<RandIt>::size_type const l_block |
|
309 |
+ , typename iterator_traits<RandIt>::size_type const l_irreg1 |
|
310 |
+ , typename iterator_traits<RandIt>::size_type const n_block_a |
|
311 |
+ , typename iterator_traits<RandIt>::size_type const n_block_b |
|
312 |
+ , typename iterator_traits<RandIt>::size_type const l_irreg2 |
|
313 |
+ , Compare comp) |
|
314 |
+{ |
|
315 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
316 |
+ size_type const key_count = needed_keys_count(n_block_a, n_block_b); |
|
317 |
+ ::boost::ignore_unused(key_count); |
|
318 |
+ //BOOST_ASSERT(n_block_a || n_block_b); |
|
319 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp)); |
|
320 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); |
|
321 |
+ |
|
322 |
+ size_type n_bef_irreg2 = 0; |
|
323 |
+ bool l_irreg_pos_count = true; |
|
324 |
+ RandItKeys key_mid(key_first + n_block_a); |
|
325 |
+ RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block; |
|
326 |
+ RandIt const last_irr2 = first_irr2 + l_irreg2; |
|
327 |
+ |
|
328 |
+ { //Selection sort blocks |
|
329 |
+ size_type n_block_left = n_block_b + n_block_a; |
|
330 |
+ RandItKeys key_range2(key_first); |
|
331 |
+ |
|
332 |
+ size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; |
|
333 |
+ size_type max_check = min_value<size_type>(min_check+1, n_block_left); |
|
334 |
+ for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) { |
|
335 |
+ size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp); |
|
336 |
+ RandItKeys const key_next(key_range2 + next_key_idx); |
|
337 |
+ max_check = min_value<size_type>(max_value<size_type>(max_check, next_key_idx+size_type(2)), n_block_left); |
|
338 |
+ |
|
339 |
+ RandIt const first_min = f + next_key_idx*l_block; |
|
340 |
+ |
|
341 |
+ //Check if irregular b block should go here. |
|
342 |
+ //If so, break to the special code handling the irregular block |
|
343 |
+ if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){ |
|
344 |
+ l_irreg_pos_count = false; |
|
345 |
+ } |
|
346 |
+ n_bef_irreg2 += l_irreg_pos_count; |
|
347 |
+ |
|
348 |
+ swap_and_update_key(key_next, key_range2, key_mid, f, f + l_block, first_min); |
|
349 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(f, f+l_block, comp)); |
|
350 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first_min, first_min + l_block, comp)); |
|
351 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block))); |
|
352 |
+ } |
|
353 |
+ } |
|
354 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp)); |
|
355 |
+ |
|
356 |
+ RandIt first1 = first; |
|
357 |
+ RandIt last1 = first+l_irreg1; |
|
358 |
+ RandItKeys const key_end (key_first+n_bef_irreg2); |
|
359 |
+ bool is_range1_A = true; |
|
360 |
+ |
|
361 |
+ for(RandItKeys key_next = key_first; key_next != key_end; ++key_next){ |
|
362 |
+ bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); |
|
363 |
+ first1 = is_range1_A == is_range2_A |
|
364 |
+ ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); |
|
365 |
+ last1 += l_block; |
|
366 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first, first1, comp)); |
|
367 |
+ } |
|
368 |
+ |
|
369 |
+ merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp); |
|
370 |
+ BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first, last_irr2, comp)); |
|
371 |
+} |
|
372 |
+ |
|
373 |
+// Complexity: 2*distance(first, last)+max_collected^2/2 |
|
374 |
+// |
|
375 |
+// Tries to collect at most n_keys unique elements from [first, last), |
|
376 |
+// in the begining of the range, and ordered according to comp |
|
377 |
+// |
|
378 |
+// Returns the number of collected keys |
|
379 |
+template<class RandIt, class Compare, class XBuf> |
|
380 |
+typename iterator_traits<RandIt>::size_type |
|
381 |
+ collect_unique |
|
382 |
+ ( RandIt const first, RandIt const last |
|
383 |
+ , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp |
|
384 |
+ , XBuf & xbuf) |
|
385 |
+{ |
|
386 |
+ typedef typename iterator_traits<RandIt>::size_type size_type; |
|
387 |
+ size_type h = 0; |
|
388 |
+ if(max_collected){ |
|
389 |