Browse code

Adding algo

Steffen Neumann authored on 18/01/2022 10:55:21
Showing 14 changed files

1 1
new file mode 100755
... ...
@@ -0,0 +1,350 @@
1
+//////////////////////////////////////////////////////////////////////////////
2
+//
3
+// (C) Copyright Ion Gaztanaga 2015-2016.
4
+// Distributed under the Boost Software License, Version 1.0.
5
+// (See accompanying file LICENSE_1_0.txt or copy at
6
+// http://www.boost.org/LICENSE_1_0.txt)
7
+//
8
+// See http://www.boost.org/libs/move for documentation.
9
+//
10
+//////////////////////////////////////////////////////////////////////////////
11
+
12
+#ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP
13
+#define BOOST_MOVE_ADAPTIVE_MERGE_HPP
14
+
15
+#include <boost/move/detail/config_begin.hpp>
16
+#include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17
+
18
+namespace boost {
19
+namespace movelib {
20
+
21
+///@cond
22
+namespace detail_adaptive {
23
+
24
+template<class RandIt, class Compare, class XBuf>
25
+inline void adaptive_merge_combine_blocks( RandIt first
26
+                                      , typename iterator_traits<RandIt>::size_type len1
27
+                                      , typename iterator_traits<RandIt>::size_type len2
28
+                                      , typename iterator_traits<RandIt>::size_type collected
29
+                                      , typename iterator_traits<RandIt>::size_type n_keys
30
+                                      , typename iterator_traits<RandIt>::size_type l_block
31
+                                      , bool use_internal_buf
32
+                                      , bool xbuf_used
33
+                                      , Compare comp
34
+                                      , XBuf & xbuf
35
+                                      )
36
+{
37
+   typedef typename iterator_traits<RandIt>::size_type size_type;
38
+   size_type const len = len1+len2;
39
+   size_type const l_combine  = len-collected;
40
+   size_type const l_combine1 = len1-collected;
41
+
42
+    if(n_keys){
43
+      RandIt const first_data = first+collected;
44
+      RandIt const keys = first;
45
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
46
+      if(xbuf_used){
47
+         if(xbuf.size() < l_block){
48
+            xbuf.initialize_until(l_block, *first);
49
+         }
50
+         BOOST_ASSERT(xbuf.size() >= l_block);
51
+         size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
52
+         combine_params( keys, comp, l_combine
53
+                           , l_combine1, l_block, xbuf
54
+                           , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
55
+         op_merge_blocks_with_buf
56
+            (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
57
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg xbf: ", len);
58
+      }
59
+      else{
60
+         size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
61
+         combine_params( keys, comp, l_combine
62
+                           , l_combine1, l_block, xbuf
63
+                           , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
64
+         if(use_internal_buf){
65
+            op_merge_blocks_with_buf
66
+               (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), first_data-l_block);
67
+            BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A mrg buf: ", len);
68
+         }
69
+         else{
70
+            merge_blocks_bufferless
71
+               (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
72
+            BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg nbf: ", len);
73
+         }
74
+      }
75
+   }
76
+   else{
77
+      xbuf.shrink_to_fit(l_block);
78
+      if(xbuf.size() < l_block){
79
+         xbuf.initialize_until(l_block, *first);
80
+      }
81
+      size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
82
+      size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
83
+      combine_params( uint_keys, less(), l_combine
84
+                     , l_combine1, l_block, xbuf
85
+                     , n_block_a, n_block_b, l_irreg1, l_irreg2, true);   //Outputs
86
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
87
+      BOOST_ASSERT(xbuf.size() >= l_block);
88
+      op_merge_blocks_with_buf
89
+         (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
90
+      xbuf.clear();
91
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg buf: ", len);
92
+   }
93
+}
94
+
95
+template<class RandIt, class Compare, class XBuf>
96
+inline void adaptive_merge_final_merge( RandIt first
97
+                                      , typename iterator_traits<RandIt>::size_type len1
98
+                                      , typename iterator_traits<RandIt>::size_type len2
99
+                                      , typename iterator_traits<RandIt>::size_type collected
100
+                                      , typename iterator_traits<RandIt>::size_type l_intbuf
101
+                                      , typename iterator_traits<RandIt>::size_type //l_block
102
+                                      , bool //use_internal_buf
103
+                                      , bool xbuf_used
104
+                                      , Compare comp
105
+                                      , XBuf & xbuf
106
+                                      )
107
+{
108
+   typedef typename iterator_traits<RandIt>::size_type size_type;
109
+   size_type n_keys = collected-l_intbuf;
110
+   size_type len = len1+len2;
111
+   if (!xbuf_used || n_keys) {
112
+      xbuf.clear();
113
+      const size_type middle = xbuf_used && n_keys ? n_keys: collected;
114
+      unstable_sort(first, first + middle, comp, xbuf);
115
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b srt: ", len);
116
+      stable_merge(first, first + middle, first + len, comp, xbuf);
117
+   }
118
+   BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A fin mrg: ", len);
119
+}
120
+
121
+template<class SizeType>
122
+inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
123
+{
124
+   typedef SizeType size_type;
125
+   //This is the minimum number of keys to implement the ideal algorithm
126
+   size_type n_keys = len1/l_block+len2/l_block;
127
+   const size_type second_half_blocks = len2/l_block;
128
+   const size_type first_half_aux = len1-l_intbuf;
129
+   while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){
130
+      --n_keys;
131
+   }
132
+   ++n_keys;
133
+   return n_keys;
134
+}
135
+
136
+template<class SizeType>
137
+inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
138
+{
139
+   typedef SizeType size_type;
140
+   //This is the minimum number of keys to implement the ideal algorithm
141
+   size_type n_keys = (len1-l_intbuf)/l_block + len2/l_block;
142
+   return n_keys;
143
+}
144
+
145
+template<class SizeType, class Xbuf>
146
+inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
147
+{
148
+   typedef SizeType size_type;
149
+   size_type l_block = rl_block;
150
+   size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
151
+
152
+   if (xbuf.capacity() > l_block){
153
+      l_block = xbuf.capacity();
154
+   }
155
+
156
+   //This is the minimum number of keys to implement the ideal algorithm
157
+   size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf);
158
+   BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
159
+
160
+   if(xbuf.template supports_aligned_trailing<size_type>
161
+      ( l_block
162
+      , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf)))
163
+   {
164
+      n_keys = 0u;
165
+   }
166
+   l_intbuf_inout = l_intbuf;
167
+   rl_block = l_block;
168
+   return n_keys;
169
+}
170
+
171
+// Main explanation of the merge algorithm.
172
+//
173
+// csqrtlen = ceil(sqrt(len));
174
+//
175
+// * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
176
+//   unique elements are extracted from elements to be sorted and placed in the beginning of the range.
177
+//
178
+// * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
179
+//   are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
180
+//
181
+//   Explanation of the "combine_blocks" step:
182
+//
183
+//         * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
184
+//           Remaining elements that can't form a group are grouped in front of those elements.
185
+//         * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
186
+//           Remaining elements that can't form a group are grouped in the back of those elements.
187
+//         * In parallel the following two steps are performed:
188
+//             *  Groups are selection-sorted by first or last element (depending whether they are going
189
+//                to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
190
+//             * Elements of each block pair are merged using the csqrtlen buffer taking into account
191
+//                if they belong to the first half or second half (marked by the key).
192
+//
193
+// * In the final merge step leading "to_collect" elements are merged with rotations
194
+//   with the rest of merged elements in the "combine_blocks" step.
195
+//
196
+// Corner cases:
197
+//
198
+// * If no "to_collect" elements can be extracted:
199
+//
200
+//    * If more than a minimum number of elements is extracted
201
+//      then reduces the number of elements used as buffer and keys in the
202
+//      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
203
+//      then uses a rotation based smart merge.
204
+//
205
+//    * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
206
+//
207
+// * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
208
+//
209
+// * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
210
+//
211
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
212
+//   then no csqrtlen need to be extracted and "combine_blocks" will use integral
213
+//   keys to combine blocks.
214
+template<class RandIt, class Compare, class XBuf>
215
+void adaptive_merge_impl
216
+   ( RandIt first
217
+   , typename iterator_traits<RandIt>::size_type len1
218
+   , typename iterator_traits<RandIt>::size_type len2
219
+   , Compare comp
220
+   , XBuf & xbuf
221
+   )
222
+{
223
+   typedef typename iterator_traits<RandIt>::size_type size_type;
224
+
225
+   if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
226
+      buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
227
+   }
228
+   else{
229
+      const size_type len = len1+len2;
230
+      //Calculate ideal parameters and try to collect needed unique keys
231
+      size_type l_block = size_type(ceil_sqrt(len));
232
+
233
+      //One range is not big enough to extract keys and the internal buffer so a
234
+      //rotation-based based merge will do just fine
235
+      if(len1 <= l_block*2 || len2 <= l_block*2){
236
+         merge_bufferless(first, first+len1, first+len1+len2, comp);
237
+         return;
238
+      }
239
+
240
+      //Detail the number of keys and internal buffer. If xbuf has enough memory, no
241
+      //internal buffer is needed so l_intbuf will remain 0.
242
+      size_type l_intbuf = 0;
243
+      size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
244
+      size_type const to_collect = l_intbuf+n_keys;
245
+      //Try to extract needed unique values from the first range
246
+      size_type const collected  = collect_unique(first, first+len1, to_collect, comp, xbuf);
247
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   A collect: ", len);
248
+
249
+      //Not the minimum number of keys is not available on the first range, so fallback to rotations
250
+      if(collected != to_collect && collected < 4){
251
+         merge_bufferless(first, first+collected, first+len1, comp);
252
+         merge_bufferless(first, first + len1, first + len1 + len2, comp);
253
+         return;
254
+      }
255
+
256
+      //If not enough keys but more than minimum, adjust the internal buffer and key count
257
+      bool use_internal_buf = collected == to_collect;
258
+      if (!use_internal_buf){
259
+         l_intbuf = 0u;
260
+         n_keys = collected;
261
+         l_block  = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
262
+         //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
263
+         l_intbuf = use_internal_buf ? l_block : 0u;
264
+      }
265
+
266
+      bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
267
+      //Merge trailing elements using smart merges
268
+      adaptive_merge_combine_blocks(first, len1, len2, collected,   n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
269
+      //Merge buffer and keys with the rest of the values
270
+      adaptive_merge_final_merge   (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
271
+   }
272
+}
273
+
274
+}  //namespace detail_adaptive {
275
+
276
+///@endcond
277
+
278
+//! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
279
+//!   into one sorted range [first, last) according to the given comparison function comp.
280
+//!   The algorithm is stable (if there are equivalent elements in the original two ranges,
281
+//!   the elements from the first range (preserving their original order) precede the elements
282
+//!   from the second range (preserving their original order).
283
+//!
284
+//! <b>Requires</b>:
285
+//!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
286
+//!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
287
+//!
288
+//! <b>Parameters</b>:
289
+//!   - first: the beginning of the first sorted range. 
290
+//!   - middle: the end of the first sorted range and the beginning of the second
291
+//!   - last: the end of the second sorted range
292
+//!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
293
+//!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
294
+//!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
295
+//!      is min(std::distance(first, middle), std::distance(middle, last)).
296
+//!
297
+//! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
298
+//!   of dereferenced RandIt throws.
299
+//!
300
+//! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps.
301
+//!   Constant factor for comparisons and data movement is minimized when uninitialized_len
302
+//!   is min(std::distance(first, middle), std::distance(middle, last)).
303
+//!   Pretty good enough performance is achieved when uninitialized_len is
304
+//!   ceil(sqrt(std::distance(first, last)))*2.
305
+//!
306
+//! <b>Caution</b>: Experimental implementation, not production-ready.
307
+template<class RandIt, class Compare>
308
+void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp
309
+                , typename iterator_traits<RandIt>::value_type* uninitialized = 0
310
+                , typename iterator_traits<RandIt>::size_type uninitialized_len = 0)
311
+{
312
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
313
+   typedef typename iterator_traits<RandIt>::value_type value_type;
314
+
315
+   if (first == middle || middle == last){
316
+      return;
317
+   }
318
+
319
+   //Reduce ranges to merge if possible
320
+   do {
321
+      if (comp(*middle, *first)){
322
+         break;
323
+      }
324
+      ++first;
325
+      if (first == middle)
326
+         return;
327
+   } while(1);
328
+
329
+   RandIt first_high(middle);
330
+   --first_high;
331
+   do {
332
+      --last;
333
+      if (comp(*last, *first_high)){
334
+         ++last;
335
+         break;
336
+      }
337
+      if (last == middle)
338
+         return;
339
+   } while(1);
340
+
341
+   ::boost::movelib::adaptive_xbuf<value_type, value_type*, size_type> xbuf(uninitialized, size_type(uninitialized_len));
342
+   ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf);
343
+}
344
+
345
+}  //namespace movelib {
346
+}  //namespace boost {
347
+
348
+#include <boost/move/detail/config_end.hpp>
349
+
350
+#endif   //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP
0 351
new file mode 100755
... ...
@@ -0,0 +1,638 @@
1
+//////////////////////////////////////////////////////////////////////////////
2
+//
3
+// (C) Copyright Ion Gaztanaga 2015-2016.
4
+// Distributed under the Boost Software License, Version 1.0.
5
+// (See accompanying file LICENSE_1_0.txt or copy at
6
+// http://www.boost.org/LICENSE_1_0.txt)
7
+//
8
+// See http://www.boost.org/libs/move for documentation.
9
+//
10
+//////////////////////////////////////////////////////////////////////////////
11
+
12
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP
13
+#define BOOST_MOVE_ADAPTIVE_SORT_HPP
14
+
15
+#include <boost/move/detail/config_begin.hpp>
16
+#include <boost/move/algo/detail/adaptive_sort_merge.hpp>
17
+#include <boost/core/ignore_unused.hpp>
18
+
19
+namespace boost {
20
+namespace movelib {
21
+
22
+///@cond
23
+namespace detail_adaptive {
24
+
25
+template<class RandIt>
26
+void move_data_backward( RandIt cur_pos
27
+              , typename iterator_traits<RandIt>::size_type const l_data
28
+              , RandIt new_pos
29
+              , bool const xbuf_used)
30
+{
31
+   //Move buffer to the total combination right
32
+   if(xbuf_used){
33
+      boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);      
34
+   }
35
+   else{
36
+      boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);      
37
+      //Rotate does less moves but it seems slower due to cache issues
38
+      //rotate_gcd(first-l_block, first+len-l_block, first+len);
39
+   }
40
+}
41
+
42
+template<class RandIt>
43
+void move_data_forward( RandIt cur_pos
44
+              , typename iterator_traits<RandIt>::size_type const l_data
45
+              , RandIt new_pos
46
+              , bool const xbuf_used)
47
+{
48
+   //Move buffer to the total combination right
49
+   if(xbuf_used){
50
+      boost::move(cur_pos, cur_pos+l_data, new_pos);
51
+   }
52
+   else{
53
+      boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
54
+      //Rotate does less moves but it seems slower due to cache issues
55
+      //rotate_gcd(first-l_block, first+len-l_block, first+len);
56
+   }
57
+}
58
+
59
+// build blocks of length 2*l_build_buf. l_build_buf is power of two
60
+// input: [0, l_build_buf) elements are buffer, rest unsorted elements
61
+// output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
62
+//
63
+// First elements are merged from right to left until elements start
64
+// at first. All old elements [first, first + l_build_buf) are placed at the end
65
+// [first+len-l_build_buf, first+len). To achieve this:
66
+// - If we have external memory to merge, we save elements from the buffer
67
+//   so that a non-swapping merge is used. Buffer elements are restored
68
+//   at the end of the buffer from the external memory.
69
+//
70
+// - When the external memory is not available or it is insufficient
71
+//   for a merge operation, left swap merging is used.
72
+//
73
+// Once elements are merged left to right in blocks of l_build_buf, then a single left
74
+// to right merge step is performed to achieve merged blocks of size 2K.
75
+// If external memory is available, usual merge is used, swap merging otherwise.
76
+//
77
+// As a last step, if auxiliary memory is available in-place merge is performed.
78
+// until all is merged or auxiliary memory is not large enough.
79
+template<class RandIt, class Compare, class XBuf>
80
+typename iterator_traits<RandIt>::size_type  
81
+   adaptive_sort_build_blocks
82
+      ( RandIt const first
83
+      , typename iterator_traits<RandIt>::size_type const len
84
+      , typename iterator_traits<RandIt>::size_type const l_base
85
+      , typename iterator_traits<RandIt>::size_type const l_build_buf
86
+      , XBuf & xbuf
87
+      , Compare comp)
88
+{
89
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
90
+   BOOST_ASSERT(l_build_buf <= len);
91
+   BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
92
+
93
+   //Place the start pointer after the buffer
94
+   RandIt first_block = first + l_build_buf;
95
+   size_type const elements_in_blocks = len - l_build_buf;
96
+
97
+   //////////////////////////////////
98
+   // Start of merge to left step
99
+   //////////////////////////////////
100
+   size_type l_merged = 0u;
101
+
102
+   BOOST_ASSERT(l_build_buf);
103
+   //If there is no enough buffer for the insertion sort step, just avoid the external buffer
104
+   size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
105
+   kbuf = kbuf < l_base ? 0 : kbuf;
106
+
107
+   if(kbuf){
108
+      //Backup internal buffer values in external buffer so they can be overwritten
109
+      xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
110
+      l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
111
+
112
+      //Now combine them using the buffer. Elements from buffer can be
113
+      //overwritten since they've been saved to xbuf
114
+      l_merged = op_merge_left_step_multiple
115
+         ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
116
+
117
+      //Restore internal buffer from external buffer unless kbuf was l_build_buf,
118
+      //in that case restoration will happen later
119
+      if(kbuf != l_build_buf){
120
+         boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
121
+      }
122
+   }
123
+   else{
124
+      l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
125
+      rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
126
+   }
127
+
128
+   //Now combine elements using the buffer. Elements from buffer can't be
129
+   //overwritten since xbuf was not big enough, so merge swapping elements.
130
+   l_merged = op_merge_left_step_multiple
131
+      (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
132
+
133
+   BOOST_ASSERT(l_merged == l_build_buf);
134
+
135
+   //////////////////////////////////
136
+   // Start of merge to right step
137
+   //////////////////////////////////
138
+
139
+   //If kbuf is l_build_buf then we can merge right without swapping
140
+   //Saved data is still in xbuf
141
+   if(kbuf && kbuf == l_build_buf){
142
+      op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
143
+      //Restore internal buffer from external buffer if kbuf was l_build_buf.
144
+      //as this operation was previously delayed.
145
+      boost::move(xbuf.data(), xbuf.data() + kbuf, first);
146
+   }
147
+   else{
148
+      op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
149
+   }
150
+   xbuf.clear();
151
+   //2*l_build_buf or total already merged
152
+   return min_value<size_type>(elements_in_blocks, 2*l_build_buf);
153
+}
154
+
155
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
156
+void adaptive_sort_combine_blocks
157
+   ( RandItKeys const keys
158
+   , KeyCompare key_comp
159
+   , RandIt const first
160
+   , typename iterator_traits<RandIt>::size_type const len
161
+   , typename iterator_traits<RandIt>::size_type const l_prev_merged
162
+   , typename iterator_traits<RandIt>::size_type const l_block
163
+   , bool const use_buf
164
+   , bool const xbuf_used
165
+   , XBuf & xbuf
166
+   , Compare comp
167
+   , bool merge_left)
168
+{
169
+   boost::ignore_unused(xbuf);
170
+   typedef typename iterator_traits<RandIt>::size_type   size_type;
171
+
172
+   size_type const l_reg_combined   = 2*l_prev_merged;
173
+   size_type l_irreg_combined = 0;
174
+   size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
175
+   size_type const n_reg_combined = len/l_reg_combined;
176
+   RandIt combined_first = first;
177
+
178
+   boost::ignore_unused(l_total_combined);
179
+   BOOST_ASSERT(l_total_combined <= len);
180
+
181
+   size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
182
+
183
+   if(merge_left || !use_buf) {
184
+      for( size_type combined_i = 0; combined_i != max_i; ) {
185
+         //Now merge blocks
186
+         bool const is_last = combined_i==n_reg_combined;
187
+         size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
188
+
189
+         range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
190
+         size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
191
+         combine_params( keys, key_comp, l_cur_combined
192
+                        , l_prev_merged, l_block, rbuf
193
+                        , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
194
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
195
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
196
+            BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
197
+         if(!use_buf){
198
+            merge_blocks_bufferless
199
+               (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
200
+         }
201
+         else{
202
+            merge_blocks_left
203
+               (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
204
+         }
205
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_L: ", len + l_block);
206
+         ++combined_i;
207
+         if(combined_i != max_i)
208
+            combined_first += l_reg_combined;
209
+      }
210
+   }
211
+   else{
212
+      combined_first += l_reg_combined*(max_i-1);
213
+      for( size_type combined_i = max_i; combined_i; ) {
214
+         --combined_i;
215
+         bool const is_last = combined_i==n_reg_combined;
216
+         size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
217
+
218
+         RandIt const combined_last(combined_first+l_cur_combined);
219
+         range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
220
+         size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
221
+         combine_params( keys, key_comp, l_cur_combined
222
+                        , l_prev_merged, l_block, rbuf
223
+                        , n_block_a, n_block_b, l_irreg1, l_irreg2);  //Outputs
224
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
225
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
226
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
227
+         merge_blocks_right
228
+            (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
229
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_R: ", len + l_block);
230
+         if(combined_i)
231
+            combined_first -= l_reg_combined;
232
+      }
233
+   }
234
+}
235
+
236
+//Returns true if buffer is placed in 
237
+//[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
238
+//[buffer,buffer+l_intbuf)
239
+template<class RandIt, class Compare, class XBuf>
240
+bool adaptive_sort_combine_all_blocks
241
+   ( RandIt keys
242
+   , typename iterator_traits<RandIt>::size_type &n_keys
243
+   , RandIt const buffer
244
+   , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
245
+   , typename iterator_traits<RandIt>::size_type l_merged
246
+   , typename iterator_traits<RandIt>::size_type &l_intbuf
247
+   , XBuf & xbuf
248
+   , Compare comp)
249
+{
250
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
251
+   RandIt const first = buffer + l_intbuf;
252
+   size_type const l_data = l_buf_plus_data - l_intbuf;
253
+   size_type const l_unique = l_intbuf+n_keys;
254
+   //Backup data to external buffer once if possible
255
+   bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
256
+   if(common_xbuf){
257
+      xbuf.move_assign(buffer, l_intbuf);
258
+   }
259
+
260
+   bool prev_merge_left = true;
261
+   size_type l_prev_total_combined = l_merged, l_prev_block = 0;
262
+   bool prev_use_internal_buf = true;
263
+
264
+   for( size_type n = 0; l_data > l_merged
265
+      ; l_merged*=2
266
+      , ++n){
267
+      //If l_intbuf is non-zero, use that internal buffer.
268
+      //    Implies l_block == l_intbuf && use_internal_buf == true
269
+      //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
270
+      //    Implies l_block == n_keys/2 && use_internal_buf == true
271
+      //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
272
+      bool use_internal_buf = false;
273
+      size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
274
+      BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
275
+      BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
276
+      BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
277
+      
278
+      bool const is_merge_left = (n&1) == 0;
279
+      size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
280
+      if(n && prev_use_internal_buf && prev_merge_left){
281
+         if(is_merge_left || !use_internal_buf){
282
+            move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
283
+         }
284
+         else{
285
+            //Put the buffer just after l_total_combined
286
+            RandIt const buf_end = first+l_prev_total_combined;
287
+            RandIt const buf_beg = buf_end-l_block;
288
+            if(l_prev_total_combined > l_total_combined){
289
+               size_type const l_diff = l_prev_total_combined - l_total_combined;
290
+               move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
291
+            }
292
+            else if(l_prev_total_combined < l_total_combined){
293
+               size_type const l_diff = l_total_combined - l_prev_total_combined;
294
+               move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
295
+            }
296
+         }
297
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After move_data     : ", l_data + l_intbuf);
298
+      }
299
+
300
+      //Combine to form l_merged*2 segments
301
+      if(n_keys){
302
+         size_type upper_n_keys_this_iter = 2*l_merged/l_block;
303
+         if(upper_n_keys_this_iter > 256){
304
+            adaptive_sort_combine_blocks
305
+               ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
306
+               , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
307
+         }
308
+         else{
309
+            unsigned char uint_keys[256];
310
+            adaptive_sort_combine_blocks
311
+               ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
312
+               , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
313
+            }
314
+      }
315
+      else{
316
+         size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
317
+         adaptive_sort_combine_blocks
318
+            ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
319
+            , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
320
+      }
321
+
322
+      BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? "   After comb blocks L:  " : "   After comb blocks R:  ", l_data + l_intbuf);
323
+      prev_merge_left = is_merge_left;
324
+      l_prev_total_combined = l_total_combined;
325
+      l_prev_block = l_block;
326
+      prev_use_internal_buf = use_internal_buf;
327
+   }
328
+   BOOST_ASSERT(l_prev_total_combined == l_data);
329
+   bool const buffer_right = prev_use_internal_buf && prev_merge_left;
330
+
331
+   l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
332
+   n_keys = l_unique - l_intbuf;
333
+   //Restore data from to external common buffer if used
334
+   if(common_xbuf){
335
+      if(buffer_right){
336
+         boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
337
+      }
338
+      else{
339
+         boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
340
+      }
341
+   }
342
+   return buffer_right;
343
+}
344
+
345
+
346
+template<class RandIt, class Compare, class XBuf>
347
+void adaptive_sort_final_merge( bool buffer_right
348
+                              , RandIt const first
349
+                              , typename iterator_traits<RandIt>::size_type const l_intbuf
350
+                              , typename iterator_traits<RandIt>::size_type const n_keys
351
+                              , typename iterator_traits<RandIt>::size_type const len
352
+                              , XBuf & xbuf
353
+                              , Compare comp)
354
+{
355
+   //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
356
+   xbuf.clear();
357
+
358
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
359
+   size_type const n_key_plus_buf = l_intbuf+n_keys;
360
+   if(buffer_right){
361
+      //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
362
+      stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
363
+      stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
364
+      unstable_sort(first, first+n_keys, comp, xbuf);
365
+      stable_merge(first, first+n_keys, first+len, comp, xbuf);
366
+   }
367
+   else{
368
+      //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
369
+      stable_sort(first, first+n_key_plus_buf, comp, xbuf);
370
+      if(xbuf.capacity() >= n_key_plus_buf){
371
+         buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
372
+      }
373
+      else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
374
+         stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
375
+         stable_merge(first, first+n_keys, first+len, comp, xbuf);
376
+      }
377
+      else{
378
+         stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
379
+      }
380
+   }
381
+   BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
382
+}
383
+
384
+template<class RandIt, class Compare, class Unsigned, class XBuf>
385
+bool adaptive_sort_build_params
386
+   (RandIt first, Unsigned const len, Compare comp
387
+   , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
388
+   , XBuf & xbuf
389
+   )
390
+{
391
+   typedef Unsigned size_type;
392
+
393
+   //Calculate ideal parameters and try to collect needed unique keys
394
+   l_base = 0u;
395
+
396
+   //Try to find a value near sqrt(len) that is 2^N*l_base where
397
+   //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
398
+   //as build_blocks merges to the left iteratively duplicating the
399
+   //merged size and all the buffer must be used just before the final
400
+   //merge to right step. This guarantees "build_blocks" produces 
401
+   //segments of size l_build_buf*2, maximizing the classic merge phase.
402
+   l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
403
+
404
+   //The internal buffer can be expanded if there is enough external memory
405
+   while(xbuf.capacity() >= l_intbuf*2){
406
+      l_intbuf *= 2;
407
+   }
408
+
409
+   //This is the minimum number of keys to implement the ideal algorithm
410
+   //
411
+   //l_intbuf is used as buffer plus the key count
412
+   size_type n_min_ideal_keys = l_intbuf-1;
413
+   while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
414
+      --n_min_ideal_keys;
415
+   }
416
+   n_min_ideal_keys += 1;
417
+   BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
418
+
419
+   if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
420
+      n_keys = 0u;
421
+      l_build_buf = l_intbuf;
422
+   }
423
+   else{
424
+      //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
425
+      //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
426
+      //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
427
+      //
428
+      //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
429
+      //(to be used for keys in combine_all_blocks) as the whole l_build_buf
430
+      //will be backuped in the buffer during build_blocks.
431
+      bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
432
+      size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
433
+      size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
434
+
435
+      //If available memory is 2*sqrt(l), then for "build_params" 
436
+      //the situation is the same as if 2*l_intbuf were collected.
437
+      if(non_unique_buf && collected == n_min_ideal_keys){
438
+         l_build_buf = l_intbuf;
439
+         n_keys = n_min_ideal_keys;
440
+      }
441
+      else if(collected == 2*l_intbuf){
442
+         //l_intbuf*2 elements found. Use all of them in the build phase 
443
+         l_build_buf = l_intbuf*2;
444
+         n_keys = l_intbuf;
445
+      }
446
+      else if(collected == (n_min_ideal_keys+l_intbuf)){ 
447
+         l_build_buf = l_intbuf;
448
+         n_keys = n_min_ideal_keys;
449
+      }
450
+      //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
451
+      //is possible (due to very low unique keys), then go to a slow sort based on rotations.
452
+      else{
453
+         BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
454
+         if(collected < 4){  //No combination possible with less that 4 keys
455
+            return false;
456
+         }
457
+         n_keys = l_intbuf;
458
+         while(n_keys&(n_keys-1)){
459
+            n_keys &= n_keys-1;  // make it power or 2
460
+         }
461
+         while(n_keys > collected){
462
+            n_keys/=2;
463
+         }
464
+         //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
465
+         l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
466
+         l_intbuf = 0;
467
+         l_build_buf = n_keys;
468
+      }
469
+      BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
470
+   }
471
+
472
+   return true;
473
+}
474
+
475
+// Main explanation of the sort algorithm.
476
+//
477
+// csqrtlen = ceil(sqrt(len));
478
+//
479
+// * First, 2*csqrtlen unique elements elements are extracted from elements to be
480
+//   sorted and placed in the beginning of the range.
481
+//
482
+// * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
483
+//   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
484
+//   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
485
+//   2*csqrtlen unique elements are again the leading elements of the whole range.
486
+//
487
+// * Step "combine_blocks": pairs of previously formed blocks are merged with a different
488
+//   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
489
+//   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
490
+//   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
491
+//
492
+//   In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
493
+//   know if elements belong to the first or second block to be merged and another 
494
+//   leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
495
+//
496
+//   Iteratively until all trailing (len-2*csqrtlen) elements are merged:
497
+//      Iteratively for each pair of previously merged block:
498
+//         * Blocks are divided groups of csqrtlen elements and
499
+//           2*merged_block/csqrtlen keys are sorted to be used as markers
500
+//         * Groups are selection-sorted by first or last element (depending whether they are going
501
+//           to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
502
+//         * Elements of each block pair are merged using the csqrtlen buffer taking into account
503
+//           if they belong to the first half or second half (marked by the key).
504
+//
505
+// * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
506
+//   rotations with the rest of sorted elements in the "combine_blocks" step.
507
+//
508
+// Corner cases:
509
+//
510
+// * If no 2*csqrtlen elements can be extracted:
511
+//
512
+//    * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
513
+//      as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
514
+//      means that an additional "combine_blocks" step will be needed to merge all elements.
515
+//    
516
+//    * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
517
+//      then reduces the number of elements used as buffer and keys in the "build_blocks"
518
+//      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
519
+//      then uses a rotation based smart merge.
520
+//
521
+//    * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
522
+//
523
+// * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
524
+//
525
+// * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
526
+//   then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
527
+//   keys to combine blocks.
528
+//
529
+// * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
530
+//   using classic merge and "combine_blocks" will use bigger blocks when merging.
531
+template<class RandIt, class Compare, class XBuf>
532
+void adaptive_sort_impl
533
+   ( RandIt first
534
+   , typename iterator_traits<RandIt>::size_type const len
535
+   , Compare comp
536
+   , XBuf & xbuf
537
+   )
538
+{
539
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
540
+
541
+   //Small sorts go directly to insertion sort
542
+   if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
543
+      insertion_sort(first, first + len, comp);
544
+   }
545
+   else if((len-len/2) <= xbuf.capacity()){
546
+      merge_sort(first, first+len, comp, xbuf.data());
547
+   }
548
+   else{
549
+      //Make sure it is at least four
550
+      BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
551
+
552
+      size_type l_base = 0;
553
+      size_type l_intbuf = 0;
554
+      size_type n_keys = 0;
555
+      size_type l_build_buf = 0;
556
+
557
+      //Calculate and extract needed unique elements. If a minimum is not achieved
558
+      //fallback to a slow stable sort
559
+      if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
560
+         stable_sort(first, first+len, comp, xbuf);
561
+      }
562
+      else{
563
+         BOOST_ASSERT(l_build_buf);
564
+         //Otherwise, continue the adaptive_sort
565
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   After collect_unique: ", len);
566
+         size_type const n_key_plus_buf = l_intbuf+n_keys;
567
+         //l_build_buf is always power of two if l_intbuf is zero
568
+         BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
569
+
570
+         //Classic merge sort until internal buffer and xbuf are exhausted
571
+         size_type const l_merged = adaptive_sort_build_blocks
572
+            (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
573
+         BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After build_blocks:   ", len);
574
+
575
+         //Non-trivial merge
576
+         bool const buffer_right = adaptive_sort_combine_all_blocks
577
+            (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
578
+
579
+         //Sort keys and buffer and merge the whole sequence
580
+         adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
581
+      }
582
+   }
583
+}
584
+
585
+}  //namespace detail_adaptive {
586
+
587
+///@endcond
588
+
589
+//! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
590
+//!   to comparison functor "comp". The sort is stable (order of equal elements
591
+//!   is guaranteed to be preserved). Performance is improved if additional raw storage is
592
+//!   provided.
593
+//!
594
+//! <b>Requires</b>:
595
+//!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
596
+//!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
597
+//!
598
+//! <b>Parameters</b>:
599
+//!   - first, last: the range of elements to sort
600
+//!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
601
+//!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
602
+//!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
603
+//!      is ceil(std::distance(first, last)/2).
604
+//!
605
+//! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
606
+//!   of dereferenced RandIt throws.
607
+//!
608
+//! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
609
+//!   Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
610
+//!   when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
611
+//!   ceil(sqrt(std::distance(first, last)))*2.
612
+//!
613
+//! <b>Caution</b>: Experimental implementation, not production-ready.
614
+template<class RandIt, class RandRawIt, class Compare>
615
+void adaptive_sort( RandIt first, RandIt last, Compare comp
616
+               , RandRawIt uninitialized
617
+               , typename iterator_traits<RandIt>::size_type uninitialized_len)
618
+{
619
+   typedef typename iterator_traits<RandIt>::size_type  size_type;
620
+   typedef typename iterator_traits<RandIt>::value_type value_type;
621
+
622
+   ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
623
+   ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
624
+}
625
+
626
+template<class RandIt, class Compare>
627
+void adaptive_sort( RandIt first, RandIt last, Compare comp)
628
+{
629
+   typedef typename iterator_traits<RandIt>::value_type value_type;
630
+   adaptive_sort(first, last, comp, (value_type*)0, 0u);
631
+}
632
+
633
+}  //namespace movelib {
634
+}  //namespace boost {
635
+
636
+#include <boost/move/detail/config_end.hpp>
637
+
638
+#endif   //#define BOOST_MOVE_ADAPTIVE_SORT_HPP
0 639
new file mode 100755
... ...
@@ -0,0 +1,1492 @@
1
+//////////////////////////////////////////////////////////////////////////////
2
+//
3
+// (C) Copyright Ion Gaztanaga 2015-2016.
4
+// Distributed under the Boost Software License, Version 1.0.
5
+// (See accompanying file LICENSE_1_0.txt or copy at
6
+// http://www.boost.org/LICENSE_1_0.txt)
7
+//
8
+// See http://www.boost.org/libs/move for documentation.
9
+//
10
+//////////////////////////////////////////////////////////////////////////////
11
+//
12
+// Stable sorting that works in O(N*log(N)) worst time
13
+// and uses O(1) extra memory
14
+//
15
+//////////////////////////////////////////////////////////////////////////////
16
+//
17
+// The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin
18
+// and explained in the article from the russian collaborative blog
19
+// Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on
20
+// ideas from B-C. Huang and M. A. Langston explained in their article
21
+// "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)"
22
+// (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf).
23
+//
24
+// This implementation by Ion Gaztanaga uses previous ideas with additional changes:
25
+// 
26
+// - Use of GCD-based rotation.
27
+// - Non power of two buffer-sizes.
28
+// - Tries to find sqrt(len)*2 unique keys, so that the merge sort
29
+//   phase can form up to sqrt(len)*4 segments if enough keys are found.
30
+// - The merge-sort phase can take advantage of external memory to
31
+//   save some additional combination steps.
32
+// - Combination phase: Blocks are selection sorted and merged in parallel.
33
+// - The combination phase is performed alternating merge to left and merge
34
+//   to right phases minimizing swaps due to internal buffer repositioning.
35
+// - When merging blocks special optimizations are made to avoid moving some
36
+//   elements twice.
37
+//
38
+// The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts
39
+// from the sorting algorithm and implementing an additional block merge algorithm
40
+// without moving elements to left or right.
41
+//////////////////////////////////////////////////////////////////////////////
42
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
43
+#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
44
+
45
+#include <boost/move/detail/config_begin.hpp>
46
+#include <boost/move/detail/reverse_iterator.hpp>
47
+#include <boost/move/algo/move.hpp>
48
+#include <boost/move/algo/detail/merge.hpp>
49
+#include <boost/move/adl_move_swap.hpp>
50
+#include <boost/move/algo/detail/insertion_sort.hpp>
51
+#include <boost/move/algo/detail/merge_sort.hpp>
52
+#include <boost/move/algo/detail/heap_sort.hpp>
53
+#include <boost/move/algo/detail/merge.hpp>
54
+#include <boost/move/algo/detail/is_sorted.hpp>
55
+#include <boost/core/ignore_unused.hpp>
56
+#include <boost/assert.hpp>
57
+#include <boost/cstdint.hpp>
58
+
59
+#ifndef BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL
60
+   #define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 1
61
+#endif
62
+
63
+#ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS
64
+   #if BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL == 2
65
+      #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \
66
+         print_stats(STR, L)\
67
+      //
68
+
69
+      #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) \
70
+         print_stats(STR, L)\
71
+      //
72
+   #else
73
+      #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \
74
+         print_stats(STR, L)\
75
+      //
76
+
77
+      #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L)
78
+   #endif
79
+#else
80
+   #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L)
81
+   #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L)
82
+#endif
83
+
84
+#ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
85
+   #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT  BOOST_ASSERT
86
+#else
87
+   #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L)
88
+#endif
89
+
90
+namespace boost {
91
+namespace movelib {
92
+
93
+#if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS)
94
+
95
+bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less)
96
+{
97
+   if (first != last) {
98
+      const order_perf_type *next = first, *cur(first);
99
+      while (++next != last) {
100
+         if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val)))
101
+            return false;
102
+         cur = next;
103
+      }
104
+   }
105
+   return true;
106
+}
107
+
108
+#endif   //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
109
+
110
+namespace detail_adaptive {
111
+
112
+static const std::size_t AdaptiveSortInsertionSortThreshold = 16;
113
+//static const std::size_t AdaptiveSortInsertionSortThreshold = 4;
114
+BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0);
115
+
116
+#if defined BOOST_HAS_INTPTR_T
117
+   typedef ::boost::uintptr_t uintptr_t;
118
+#else
119
+   typedef std::size_t uintptr_t;
120
+#endif
121
+
122
+template<class T>
123
+const T &min_value(const T &a, const T &b)
124
+{
125
+   return a < b ? a : b;
126
+}
127
+
128
+template<class T>
129
+const T &max_value(const T &a, const T &b)
130
+{
131
+   return a > b ? a : b;
132
+}
133
+
134
+template<class ForwardIt, class Pred, class V>
135
+typename iterator_traits<ForwardIt>::size_type
136
+   count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v)
137
+{
138
+   typedef typename iterator_traits<ForwardIt>::size_type size_type;
139
+   size_type count = 0;
140
+   while(first != last) {
141
+      count += static_cast<size_type>(0 != pred(*first, v));
142
+      ++first;
143
+   }
144
+   return count;
145
+}
146
+
147
+
148
+template<class RandIt, class Compare>
149
+RandIt skip_until_merge
150
+   ( RandIt first1, RandIt const last1
151
+   , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp)
152
+{
153
+   while(first1 != last1 && !comp(next_key, *first1)){
154
+      ++first1;
155
+   }
156
+   return first1;
157
+}
158
+
159
+
160
+template<class RandItKeys, class RandIt>
161
+void swap_and_update_key
162
+   ( RandItKeys const key_next
163
+   , RandItKeys const key_range2
164
+   , RandItKeys &key_mid
165
+   , RandIt const begin
166
+   , RandIt const end
167
+   , RandIt const with)
168
+{
169
+   if(begin != with){
170
+      ::boost::adl_move_swap_ranges(begin, end, with);
171
+      ::boost::adl_move_swap(*key_next, *key_range2);
172
+      if(key_next == key_mid){
173
+         key_mid = key_range2;
174
+      }
175
+      else if(key_mid == key_range2){
176
+         key_mid = key_next;
177
+      }
178
+   }
179
+}
180
+
181
+template<class RandItKeys>
182
+void update_key
183
+(RandItKeys const key_next
184
+   , RandItKeys const key_range2
185
+   , RandItKeys &key_mid)
186
+{
187
+   if (key_next != key_range2) {
188
+      ::boost::adl_move_swap(*key_next, *key_range2);
189
+      if (key_next == key_mid) {
190
+         key_mid = key_range2;
191
+      }
192
+      else if (key_mid == key_range2) {
193
+         key_mid = key_next;
194
+      }
195
+   }
196
+}
197
+
198
+template<class RandItKeys, class RandIt, class RandIt2, class Op>
199
+RandIt2 buffer_and_update_key
200
+(RandItKeys const key_next
201
+   , RandItKeys const key_range2
202
+   , RandItKeys &key_mid
203
+   , RandIt begin
204
+   , RandIt end
205
+   , RandIt with
206
+   , RandIt2 buffer
207
+   , Op op)
208
+{
209
+   if (begin != with) {
210
+      while(begin != end) {
211
+         op(three_way_t(), begin++, with++, buffer++);
212
+      }
213
+      ::boost::adl_move_swap(*key_next, *key_range2);
214
+      if (key_next == key_mid) {
215
+         key_mid = key_range2;
216
+      }
217
+      else if (key_mid == key_range2) {
218
+         key_mid = key_next;
219
+      }
220
+   }
221
+   return buffer;
222
+}
223
+
224
+///////////////////////////////////////////////////////////////////////////////
225
+//
226
+//                         MERGE BUFFERLESS
227
+//
228
+///////////////////////////////////////////////////////////////////////////////
229
+
230
+// [first1, last1) merge [last1,last2) -> [first1,last2)
231
+template<class RandIt, class Compare>
232
+RandIt partial_merge_bufferless_impl
233
+   (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
234
+{
235
+   if(last1 == last2){
236
+      return first1;
237
+   }
238
+   bool const is_range1_A = *pis_range1_A;
239
+   if(first1 != last1 && comp(*last1, last1[-1])){
240
+      do{
241
+         RandIt const old_last1 = last1;
242
+         last1  = boost::movelib::lower_bound(last1, last2, *first1, comp);
243
+         first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported
244
+         if(last1 == last2){
245
+            return first1;
246
+         }
247
+         do{
248
+            ++first1;
249
+         } while(last1 != first1 && !comp(*last1, *first1) );
250
+      } while(first1 != last1);
251
+   }
252
+   *pis_range1_A = !is_range1_A;
253
+   return last1;
254
+}
255
+
256
+// [first1, last1) merge [last1,last2) -> [first1,last2)
257
+template<class RandIt, class Compare>
258
+RandIt partial_merge_bufferless
259
+   (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
260
+{
261
+   return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp)
262
+                        : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp));
263
+}
264
+
265
+template<class SizeType>
266
+static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b)
267
+{
268
+   return n_block_a + n_block_b;
269
+}
270
+
271
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
272
+typename iterator_traits<RandIt>::size_type
273
+   find_next_block
274
+      ( RandItKeys const key_first
275
+      , KeyCompare key_comp
276
+      , RandIt const first
277
+      , typename iterator_traits<RandIt>::size_type const l_block
278
+      , typename iterator_traits<RandIt>::size_type const ix_first_block
279
+      , typename iterator_traits<RandIt>::size_type const ix_last_block
280
+      , Compare comp)
281
+{
282
+   typedef typename iterator_traits<RandIt>::size_type      size_type;
283
+   typedef typename iterator_traits<RandIt>::value_type     value_type;
284
+   typedef typename iterator_traits<RandItKeys>::value_type key_type;
285
+   BOOST_ASSERT(ix_first_block <= ix_last_block);
286
+   size_type ix_min_block = 0u;
287
+   for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) {
288
+      const value_type &min_val = first[ix_min_block*l_block];
289
+      const value_type &cur_val = first[szt_i*l_block];
290
+      const key_type   &min_key = key_first[ix_min_block];
291
+      const key_type   &cur_key = key_first[szt_i];
292
+
293
+      bool const less_than_minimum = comp(cur_val, min_val) ||
294
+         (!comp(min_val, cur_val) && key_comp(cur_key, min_key));
295
+
296
+      if (less_than_minimum) {
297
+         ix_min_block = szt_i;
298
+      }
299
+   }
300
+   return ix_min_block;
301
+}
302
+
303
+template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
304
+void merge_blocks_bufferless
305
+   ( RandItKeys const key_first
306
+   , KeyCompare key_comp
307
+   , RandIt const first
308
+   , typename iterator_traits<RandIt>::size_type const l_block
309
+   , typename iterator_traits<RandIt>::size_type const l_irreg1
310
+   , typename iterator_traits<RandIt>::size_type const n_block_a
311
+   , typename iterator_traits<RandIt>::size_type const n_block_b
312
+   , typename iterator_traits<RandIt>::size_type const l_irreg2
313
+   , Compare comp)
314
+{
315
+   typedef typename iterator_traits<RandIt>::size_type size_type;
316
+   size_type const key_count = needed_keys_count(n_block_a, n_block_b);
317
+   ::boost::ignore_unused(key_count);
318
+   //BOOST_ASSERT(n_block_a || n_block_b);
319
+   BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted_and_unique(key_first, key_first + key_count, key_comp));
320
+   BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
321
+
322
+   size_type n_bef_irreg2 = 0;
323
+   bool l_irreg_pos_count = true;
324
+   RandItKeys key_mid(key_first + n_block_a);
325
+   RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block;
326
+   RandIt const last_irr2  = first_irr2 + l_irreg2;
327
+
328
+   {  //Selection sort blocks
329
+      size_type n_block_left = n_block_b + n_block_a;
330
+      RandItKeys key_range2(key_first);
331
+
332
+      size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
333
+      size_type max_check = min_value<size_type>(min_check+1, n_block_left);
334
+      for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) {
335
+         size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp);
336
+         RandItKeys const key_next(key_range2 + next_key_idx);
337
+         max_check = min_value<size_type>(max_value<size_type>(max_check, next_key_idx+size_type(2)), n_block_left);
338
+
339
+         RandIt const first_min = f + next_key_idx*l_block;
340
+
341
+         //Check if irregular b block should go here.
342
+         //If so, break to the special code handling the irregular block
343
+         if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){
344
+            l_irreg_pos_count = false;
345
+         }
346
+         n_bef_irreg2 += l_irreg_pos_count;
347
+
348
+         swap_and_update_key(key_next, key_range2, key_mid, f, f + l_block, first_min);
349
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(f, f+l_block, comp));
350
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first_min, first_min + l_block, comp));
351
+         BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block)));
352
+      }
353
+   }
354
+   BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp));
355
+
356
+   RandIt first1 = first;
357
+   RandIt last1  = first+l_irreg1;
358
+   RandItKeys const key_end (key_first+n_bef_irreg2);
359
+   bool is_range1_A = true;
360
+
361
+   for(RandItKeys key_next = key_first; key_next != key_end; ++key_next){
362
+      bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid);
363
+      first1 = is_range1_A == is_range2_A
364
+         ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp);
365
+      last1 += l_block;
366
+      BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first, first1, comp));
367
+   }
368
+
369
+   merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp);
370
+   BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(first, last_irr2, comp));
371
+}
372
+
373
+// Complexity: 2*distance(first, last)+max_collected^2/2
374
+//
375
+// Tries to collect at most n_keys unique elements from [first, last),
376
+// in the begining of the range, and ordered according to comp
377
+// 
378
+// Returns the number of collected keys
379
+template<class RandIt, class Compare, class XBuf>
380
+typename iterator_traits<RandIt>::size_type
381
+   collect_unique
382
+      ( RandIt const first, RandIt const last
383
+      , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp
384
+      , XBuf & xbuf)
385
+{
386
+   typedef typename iterator_traits<RandIt>::size_type size_type;
387
+   size_type h = 0;
388
+   if(max_collected){
389