OSDN Git Service

2007-11-22 Johannes Singler <singler@ira.uka.de>
[pf3gnuchains/gcc-fork.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING.  If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction.  Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License.  This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 *  @brief Implementation of sequential and parallel multiway merge.
33 *
34 *  Explanations on the high-speed merging routines in the appendix of
35 *
36 *  P. Sanders.
37 *  Fast priority queues for cached memory.
38 *  ACM Journal of Experimental Algorithmics, 5, 2000.
39 *
40 *  This file is a GNU parallel extension to the Standard C++ Library.
41 */
42
43 // Written by Johannes Singler.
44
45 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
46 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
47
48 #include <vector>
49
50 #include <bits/stl_algo.h>
51 #include <parallel/features.h>
52 #include <parallel/parallel.h>
53 #include <parallel/merge.h>
54 #include <parallel/losertree.h>
55 #if _GLIBCXX_ASSERTIONS
56 #include <parallel/checkers.h>
57 #endif
58
59 /** @brief Length of a sequence described by a pair of iterators. */
60 #define LENGTH(s) ((s).second - (s).first)
61
62 // XXX need iterator typedefs
63 namespace __gnu_parallel
64 {
65 template<typename RandomAccessIterator, typename Comparator>
66   class guarded_iterator;
67
68 template<typename RandomAccessIterator, typename Comparator>
69   inline bool
70   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
71             guarded_iterator<RandomAccessIterator, Comparator>& bi2);
72
73 template<typename RandomAccessIterator, typename Comparator>
74   inline bool
75   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
76             guarded_iterator<RandomAccessIterator, Comparator>& bi2);
77
78   /** @brief Iterator wrapper supporting an implicit supremum at the end
79       of the sequence, dominating all comparisons.
80       *  Deriving from RandomAccessIterator is not possible since
81       *  RandomAccessIterator need not be a class.
82       */
83 template<typename RandomAccessIterator, typename Comparator>
84   class guarded_iterator
85   {
86   private:
87     /** @brief Current iterator position. */
88     RandomAccessIterator current;
89
90     /** @brief End iterator of the sequence. */
91     RandomAccessIterator end;
92
93     /** @brief Comparator. */
94     Comparator& comp;
95
96   public:
97     /** @brief Constructor. Sets iterator to beginning of sequence.
98     *  @param begin Begin iterator of sequence.
99     *  @param end End iterator of sequence.
100     *  @param comp Comparator provided for associated overloaded
101     *  compare operators. */
102     inline guarded_iterator(RandomAccessIterator begin,
103                             RandomAccessIterator end, Comparator& comp)
104     : current(begin), end(end), comp(comp)
105     { }
106
107     /** @brief Pre-increment operator.
108     *  @return This. */
109     inline guarded_iterator<RandomAccessIterator, Comparator>&
110     operator++()
111     {
112       ++current;
113       return *this;
114     }
115
116     /** @brief Dereference operator.
117     *  @return Referenced element. */
118     inline typename std::iterator_traits<RandomAccessIterator>::value_type
119     operator*()
120     { return *current; }
121
122     /** @brief Convert to wrapped iterator.
123     *  @return Wrapped iterator. */
124     inline operator RandomAccessIterator()
125     { return current; }
126
127     friend bool
128     operator< <RandomAccessIterator, Comparator>(
129         guarded_iterator<RandomAccessIterator, Comparator>& bi1,
130         guarded_iterator<RandomAccessIterator, Comparator>& bi2);
131
132     friend bool
133     operator<= <RandomAccessIterator, Comparator>(
134         guarded_iterator<RandomAccessIterator, Comparator>& bi1,
135         guarded_iterator<RandomAccessIterator, Comparator>& bi2);
136   };
137
138 /** @brief Compare two elements referenced by guarded iterators.
139  *  @param bi1 First iterator.
140  *  @param bi2 Second iterator.
141  *  @return @c True if less. */
142 template<typename RandomAccessIterator, typename Comparator>
143   inline bool
144   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
145             guarded_iterator<RandomAccessIterator, Comparator>& bi2)
146   {
147     if (bi1.current == bi1.end) //bi1 is sup
148       return bi2.current == bi2.end;    //bi2 is not sup
149     if (bi2.current == bi2.end) //bi2 is sup
150       return true;
151     return (bi1.comp)(*bi1, *bi2);      //normal compare
152   }
153
154 /** @brief Compare two elements referenced by guarded iterators.
155  *  @param bi1 First iterator.
156  *  @param bi2 Second iterator.
157  *  @return @c True if less equal. */
158 template<typename RandomAccessIterator, typename Comparator>
159   inline bool
160   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
161             guarded_iterator<RandomAccessIterator, Comparator>& bi2)
162   {
163     if (bi2.current == bi2.end) //bi1 is sup
164       return bi1.current != bi1.end;    //bi2 is not sup
165     if (bi1.current == bi1.end) //bi2 is sup
166       return false;
167     return !(bi1.comp)(*bi2, *bi1);     //normal compare
168   }
169
170 template<typename RandomAccessIterator, typename Comparator>
171   class unguarded_iterator;
172
173 template<typename RandomAccessIterator, typename Comparator>
174   inline bool
175   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
176             unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
177
178 template<typename RandomAccessIterator, typename Comparator>
179   inline bool
180   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
181              unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
182
183 template<typename RandomAccessIterator, typename Comparator>
184   class unguarded_iterator
185   {
186   private:
187     /** @brief Current iterator position. */
188     RandomAccessIterator& current;
189     /** @brief Comparator. */
190     mutable Comparator& comp;
191
192   public:
193     /** @brief Constructor. Sets iterator to beginning of sequence.
194     *  @param begin Begin iterator of sequence.
195     *  @param end Unused, only for compatibility.
196     *  @param comp Unused, only for compatibility. */
197     inline unguarded_iterator(RandomAccessIterator begin,
198                               RandomAccessIterator end, Comparator& comp)
199     : current(begin), comp(comp)
200     { }
201
202     /** @brief Pre-increment operator.
203     *  @return This. */
204     inline  unguarded_iterator<RandomAccessIterator, Comparator>&
205     operator++()
206     {
207       current++;
208       return *this;
209     }
210
211     /** @brief Dereference operator.
212     *  @return Referenced element. */
213     inline typename std::iterator_traits<RandomAccessIterator>::value_type
214     operator*()
215     { return *current; }
216
217     /** @brief Convert to wrapped iterator.
218     *  @return Wrapped iterator. */
219     inline
220     operator RandomAccessIterator()
221     { return current; }
222
223     friend bool
224     operator< <RandomAccessIterator, Comparator>(
225         unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
226         unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
227
228     friend bool
229     operator<= <RandomAccessIterator, Comparator>(
230         unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
231         unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
232   };
233
234 /** @brief Compare two elements referenced by unguarded iterators.
235  *  @param bi1 First iterator.
236  *  @param bi2 Second iterator.
237  *  @return @c True if less. */
238 template<typename RandomAccessIterator, typename Comparator>
239   inline bool
240   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
241             unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
242   {
243     // Normal compare.
244     return (bi1.comp)(*bi1, *bi2);
245   }
246
247 /** @brief Compare two elements referenced by unguarded iterators.
248  *  @param bi1 First iterator.
249  *  @param bi2 Second iterator.
250  *  @return @c True if less equal. */
251 template<typename RandomAccessIterator, typename Comparator>
252   inline bool
253   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
254             unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
255   {
256     // Normal compare.
257     return !(bi1.comp)(*bi2, *bi1);
258   }
259
260 /** Prepare a set of sequences to be merged without a (end) guard
261  *  @param seqs_begin
262  *  @param seqs_end
263  *  @param comp
264  *  @param min_sequence
265  *  @param stable
266  *  @pre (seqs_end - seqs_begin > 0) */
267 template<typename RandomAccessIteratorIterator, typename Comparator>
268   typename std::iterator_traits<
269       typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
270       ::first_type>::difference_type
271   prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
272                     RandomAccessIteratorIterator seqs_end, Comparator comp,
273                     int& min_sequence, bool stable)
274   {
275     _GLIBCXX_CALL(seqs_end - seqs_begin)
276
277     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
278         ::value_type::first_type
279       RandomAccessIterator1;
280     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
281       value_type;
282     typedef typename std::iterator_traits<RandomAccessIterator1>
283       ::difference_type
284       difference_type;
285
286     if ((*seqs_begin).first == (*seqs_begin).second)
287       {
288         // Empty sequence found, it's the first one.
289         min_sequence = 0;
290         return -1;
291       }
292
293     // Last element in sequence.
294     value_type min = *((*seqs_begin).second - 1);
295     min_sequence = 0;
296     for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
297       {
298         if ((*s).first == (*s).second)
299           {
300             // Empty sequence found.
301             min_sequence = static_cast<int>(s - seqs_begin);
302             return -1;
303           }
304
305         // Last element in sequence.
306         const value_type& v = *((*s).second - 1);
307         if (comp(v, min))       //strictly smaller
308           {
309             min = v;
310             min_sequence = static_cast<int>(s - seqs_begin);
311           }
312       }
313
314     difference_type overhang_size = 0;
315
316     int s = 0;
317     for (s = 0; s <= min_sequence; s++)
318       {
319         RandomAccessIterator1 split;
320         if (stable)
321           split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
322                                   min, comp);
323         else
324           split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
325                                   min, comp);
326
327         overhang_size += seqs_begin[s].second - split;
328       }
329
330     for (; s < (seqs_end - seqs_begin); s++)
331       {
332         RandomAccessIterator1 split = std::lower_bound(
333             seqs_begin[s].first, seqs_begin[s].second, min, comp);
334         overhang_size += seqs_begin[s].second - split;
335       }
336
337     // So many elements will be left over afterwards.
338     return overhang_size;
339   }
340
341 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
342  *  @param seqs_begin
343  *  @param seqs_end
344  *  @param comp */
345 template<typename RandomAccessIteratorIterator, typename Comparator>
346   typename std::iterator_traits<typename std::iterator_traits<
347       RandomAccessIteratorIterator>::value_type::first_type>::difference_type
348   prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
349                             RandomAccessIteratorIterator seqs_end,
350                             Comparator comp)
351   {
352     _GLIBCXX_CALL(seqs_end - seqs_begin)
353
354     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
355       ::value_type::first_type
356       RandomAccessIterator1;
357     typedef typename std::iterator_traits<RandomAccessIterator1>
358       ::value_type
359       value_type;
360     typedef typename std::iterator_traits<RandomAccessIterator1>
361       ::difference_type
362       difference_type;
363
364     // Last element in sequence.
365     value_type* max = NULL;
366     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
367       {
368         if ((*s).first == (*s).second)
369           continue;
370
371         // Last element in sequence.
372         value_type& v = *((*s).second - 1);
373
374         // Strictly greater.
375         if (!max || comp(*max, v))
376           max = &v;
377       }
378
379     difference_type overhang_size = 0;
380     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
381       {
382         RandomAccessIterator1 split =
383             std::lower_bound((*s).first, (*s).second, *max, comp);
384         overhang_size += (*s).second - split;
385
386         // Set sentinel.
387         *((*s).second) = *max;
388       }
389
390     // So many elements will be left over afterwards.
391     return overhang_size;
392   }
393
394 /** @brief Highly efficient 3-way merging procedure.
395  *  @param seqs_begin Begin iterator of iterator pair input sequence.
396  *  @param seqs_end End iterator of iterator pair input sequence.
397  *  @param target Begin iterator out output sequence.
398  *  @param comp Comparator.
399  *  @param length Maximum length to merge.
400  *  @param stable Unused, stable anyway.
401  *  @return End iterator of output sequence. */
402 template<
403     template<typename RAI, typename C> class iterator,
404     typename RandomAccessIteratorIterator,
405     typename RandomAccessIterator3,
406     typename _DifferenceTp,
407     typename Comparator>
408   RandomAccessIterator3
409   multiway_merge_3_variant(
410       RandomAccessIteratorIterator seqs_begin,
411       RandomAccessIteratorIterator seqs_end,
412       RandomAccessIterator3 target,
413       Comparator comp, _DifferenceTp length, bool stable)
414   {
415     _GLIBCXX_CALL(length);
416
417     typedef _DifferenceTp difference_type;
418
419     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
420       ::value_type::first_type
421       RandomAccessIterator1;
422     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
423       value_type;
424
425     if (length == 0)
426       return target;
427
428     iterator<RandomAccessIterator1, Comparator>
429       seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
430       seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
431       seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
432
433     if (seq0 <= seq1)
434       {
435         if (seq1 <= seq2)
436           goto s012;
437         else
438           if (seq2 <  seq0)
439             goto s201;
440           else
441             goto s021;
442       }
443     else
444       {
445         if (seq1 <= seq2)
446           {
447             if (seq0 <= seq2)
448               goto s102;
449             else
450               goto s120;
451           }
452         else
453           goto s210;
454       }
455
456 #define Merge3Case(a,b,c,c0,c1)                         \
457     s ## a ## b ## c :                                  \
458       *target = *seq ## a;                              \
459     ++target;                                           \
460     length--;                                           \
461     ++seq ## a;                                         \
462     if (length == 0) goto finish;                       \
463     if (seq ## a c0 seq ## b) goto s ## a ## b ## c;    \
464     if (seq ## a c1 seq ## c) goto s ## b ## a ## c;    \
465     goto s ## b ## c ## a;
466
467     Merge3Case(0, 1, 2, <=, <=);
468     Merge3Case(1, 2, 0, <=, < );
469     Merge3Case(2, 0, 1, < , < );
470     Merge3Case(1, 0, 2, < , <=);
471     Merge3Case(0, 2, 1, <=, <=);
472     Merge3Case(2, 1, 0, < , < );
473
474 #undef Merge3Case
475
476   finish:
477     ;
478
479     seqs_begin[0].first = seq0;
480     seqs_begin[1].first = seq1;
481     seqs_begin[2].first = seq2;
482
483     return target;
484   }
485
486 template<
487     typename RandomAccessIteratorIterator,
488     typename RandomAccessIterator3,
489     typename _DifferenceTp,
490     typename Comparator>
491   RandomAccessIterator3
492   multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
493                             RandomAccessIteratorIterator seqs_end,
494                             RandomAccessIterator3 target,
495                             Comparator comp,
496                             _DifferenceTp length, bool stable)
497   {
498     _GLIBCXX_CALL(length);
499
500     typedef _DifferenceTp difference_type;
501     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
502       ::value_type::first_type
503       RandomAccessIterator1;
504     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
505       value_type;
506
507     int min_seq;
508     RandomAccessIterator3 target_end;
509
510     // Stable anyway.
511     difference_type overhang =
512         prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
513
514     difference_type total_length = 0;
515     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
516       total_length += LENGTH(*s);
517
518     if (overhang != -1)
519       {
520         difference_type unguarded_length =
521             std::min(length, total_length - overhang);
522         target_end = multiway_merge_3_variant<unguarded_iterator>
523           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
524         overhang = length - unguarded_length;
525       }
526     else
527       {
528         // Empty sequence found.
529         overhang = length;
530         target_end = target;
531       }
532
533 #if _GLIBCXX_ASSERTIONS
534     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
535     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
536 #endif
537
538     switch (min_seq)
539       {
540       case 0:
541         // Iterators will be advanced accordingly.
542         target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
543                                   seqs_begin[2].first, seqs_begin[2].second,
544                                   target_end, overhang, comp);
545         break;
546       case 1:
547         target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
548                                   seqs_begin[2].first, seqs_begin[2].second,
549                                   target_end, overhang, comp);
550         break;
551       case 2:
552         target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
553                                   seqs_begin[1].first, seqs_begin[1].second,
554                                   target_end, overhang, comp);
555         break;
556       default:
557         _GLIBCXX_PARALLEL_ASSERT(false);
558       }
559
560 #if _GLIBCXX_ASSERTIONS
561     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
562     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
563 #endif
564
565     return target_end;
566   }
567
568 /** @brief Highly efficient 4-way merging procedure.
569  *  @param seqs_begin Begin iterator of iterator pair input sequence.
570  *  @param seqs_end End iterator of iterator pair input sequence.
571  *  @param target Begin iterator out output sequence.
572  *  @param comp Comparator.
573  *  @param length Maximum length to merge.
574  *  @param stable Unused, stable anyway.
575  *  @return End iterator of output sequence. */
576 template<
577     template<typename RAI, typename C> class iterator,
578     typename RandomAccessIteratorIterator,
579     typename RandomAccessIterator3,
580     typename _DifferenceTp,
581     typename Comparator>
582   RandomAccessIterator3
583   multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
584                            RandomAccessIteratorIterator seqs_end,
585                            RandomAccessIterator3 target,
586                            Comparator comp, _DifferenceTp length, bool stable)
587   {
588     _GLIBCXX_CALL(length);
589     typedef _DifferenceTp difference_type;
590
591     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
592       ::value_type::first_type
593       RandomAccessIterator1;
594     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
595       value_type;
596
597     iterator<RandomAccessIterator1, Comparator>
598       seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
599       seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
600       seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
601       seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
602
603 #define Decision(a,b,c,d) {                                     \
604       if (seq ## d < seq ## a) goto s ## d ## a ## b ## c;      \
605       if (seq ## d < seq ## b) goto s ## a ## d ## b ## c;      \
606       if (seq ## d < seq ## c) goto s ## a ## b ## d ## c;      \
607       goto s ## a ## b ## c ## d;  }
608
609     if (seq0 <= seq1)
610       {
611         if (seq1 <= seq2)
612           Decision(0,1,2,3)
613           else
614             if (seq2 < seq0)
615               Decision(2,0,1,3)
616               else
617                 Decision(0,2,1,3)
618                   }
619     else
620       {
621         if (seq1 <= seq2)
622           {
623             if (seq0 <= seq2)
624               Decision(1,0,2,3)
625               else
626                 Decision(1,2,0,3)
627                   }
628         else
629           Decision(2,1,0,3)
630             }
631
632 #define Merge4Case(a,b,c,d,c0,c1,c2)                            \
633     s ## a ## b ## c ## d:                                      \
634       if (length == 0) goto finish;                             \
635     *target = *seq ## a;                                        \
636     ++target;                                                   \
637     length--;                                                   \
638     ++seq ## a;                                                 \
639     if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;       \
640     if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;       \
641     if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;       \
642     goto s ## b ## c ## d ## a;
643
644     Merge4Case(0, 1, 2, 3, <=, <=, <=);
645     Merge4Case(0, 1, 3, 2, <=, <=, <=);
646     Merge4Case(0, 2, 1, 3, <=, <=, <=);
647     Merge4Case(0, 2, 3, 1, <=, <=, <=);
648     Merge4Case(0, 3, 1, 2, <=, <=, <=);
649     Merge4Case(0, 3, 2, 1, <=, <=, <=);
650     Merge4Case(1, 0, 2, 3, < , <=, <=);
651     Merge4Case(1, 0, 3, 2, < , <=, <=);
652     Merge4Case(1, 2, 0, 3, <=, < , <=);
653     Merge4Case(1, 2, 3, 0, <=, <=, < );
654     Merge4Case(1, 3, 0, 2, <=, < , <=);
655     Merge4Case(1, 3, 2, 0, <=, <=, < );
656     Merge4Case(2, 0, 1, 3, < , < , <=);
657     Merge4Case(2, 0, 3, 1, < , <=, < );
658     Merge4Case(2, 1, 0, 3, < , < , <=);
659     Merge4Case(2, 1, 3, 0, < , <=, < );
660     Merge4Case(2, 3, 0, 1, <=, < , < );
661     Merge4Case(2, 3, 1, 0, <=, < , < );
662     Merge4Case(3, 0, 1, 2, < , < , < );
663     Merge4Case(3, 0, 2, 1, < , < , < );
664     Merge4Case(3, 1, 0, 2, < , < , < );
665     Merge4Case(3, 1, 2, 0, < , < , < );
666     Merge4Case(3, 2, 0, 1, < , < , < );
667     Merge4Case(3, 2, 1, 0, < , < , < );
668
669 #undef Merge4Case
670 #undef Decision
671
672   finish:
673     ;
674
675     seqs_begin[0].first = seq0;
676     seqs_begin[1].first = seq1;
677     seqs_begin[2].first = seq2;
678     seqs_begin[3].first = seq3;
679
680     return target;
681   }
682
683 template<
684     typename RandomAccessIteratorIterator,
685     typename RandomAccessIterator3,
686     typename _DifferenceTp,
687     typename Comparator>
688   RandomAccessIterator3
689   multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
690                             RandomAccessIteratorIterator seqs_end,
691                             RandomAccessIterator3 target,
692                             Comparator comp,
693                             _DifferenceTp length, bool stable)
694   {
695     _GLIBCXX_CALL(length);
696     typedef _DifferenceTp difference_type;
697
698     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
699       ::value_type::first_type
700       RandomAccessIterator1;
701     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
702       value_type;
703
704     int min_seq;
705     RandomAccessIterator3 target_end;
706
707     // Stable anyway.
708     difference_type overhang =
709         prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
710
711     difference_type total_length = 0;
712     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
713       total_length += LENGTH(*s);
714
715     if (overhang != -1)
716       {
717         difference_type unguarded_length =
718             std::min(length, total_length - overhang);
719         target_end = multiway_merge_4_variant<unguarded_iterator>
720           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
721         overhang = length - unguarded_length;
722       }
723     else
724       {
725         // Empty sequence found.
726         overhang = length;
727         target_end = target;
728       }
729
730 #if _GLIBCXX_ASSERTIONS
731     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
732     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
733 #endif
734
735     std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
736         one_missing(seqs_begin, seqs_end);
737     one_missing.erase(one_missing.begin() + min_seq);   //remove
738
739     target_end = multiway_merge_3_variant<guarded_iterator>(
740         one_missing.begin(), one_missing.end(),
741         target_end, comp, overhang, stable);
742
743     // Insert back again.
744     one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
745     // Write back modified iterators.
746     copy(one_missing.begin(), one_missing.end(), seqs_begin);
747
748 #if _GLIBCXX_ASSERTIONS
749     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
750     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
751 #endif
752
753     return target_end;
754   }
755
756 /** @brief Basic multi-way merging procedure.
757  *
758  *  The head elements are kept in a sorted array, new heads are
759  *  inserted linearly.
760  *  @param seqs_begin Begin iterator of iterator pair input sequence.
761  *  @param seqs_end End iterator of iterator pair input sequence.
762  *  @param target Begin iterator out output sequence.
763  *  @param comp Comparator.
764  *  @param length Maximum length to merge.
765  *  @param stable Stable merging incurs a performance penalty.
766  *  @return End iterator of output sequence.
767  */
768 template<
769     typename RandomAccessIteratorIterator,
770     typename RandomAccessIterator3,
771     typename _DifferenceTp,
772     typename Comparator>
773   RandomAccessIterator3
774   multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
775                         RandomAccessIteratorIterator seqs_end,
776                         RandomAccessIterator3 target,
777                         Comparator comp, _DifferenceTp length, bool stable)
778   {
779     _GLIBCXX_CALL(length)
780
781     typedef _DifferenceTp difference_type;
782     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
783       ::value_type::first_type
784       RandomAccessIterator1;
785     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
786       value_type;
787
788     // Num remaining pieces.
789     int k = static_cast<int>(seqs_end - seqs_begin), nrp;
790
791     value_type* pl = static_cast<value_type*>(
792       ::operator new(sizeof(value_type) * k));
793     int* source = new int[k];
794     difference_type total_length = 0;
795
796 #define POS(i) seqs_begin[(i)].first
797 #define STOPS(i) seqs_begin[(i)].second
798
799     // Write entries into queue.
800     nrp = 0;
801     for (int pi = 0; pi < k; pi++)
802       {
803         if (STOPS(pi) != POS(pi))
804           {
805             pl[nrp] = *(POS(pi));
806             source[nrp] = pi;
807             nrp++;
808             total_length += LENGTH(seqs_begin[pi]);
809           }
810       }
811
812     if (stable)
813       {
814         for (int k = 0; k < nrp - 1; k++)
815           for (int pi = nrp - 1; pi > k; pi--)
816             if (comp(pl[pi], pl[pi - 1]) ||
817                 (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
818               {
819                 std::swap(pl[pi - 1], pl[pi]);
820                 std::swap(source[pi - 1], source[pi]);
821               }
822       }
823     else
824       {
825         for (int k = 0; k < nrp - 1; k++)
826           for (int pi = nrp - 1; pi > k; pi--)
827             if (comp(pl[pi], pl[pi-1]))
828               {
829                 std::swap(pl[pi-1], pl[pi]);
830                 std::swap(source[pi-1], source[pi]);
831               }
832       }
833
834     // Iterate.
835     if (stable)
836       {
837         int j;
838         while (nrp > 0 && length > 0)
839           {
840             if (source[0] < source[1])
841               {
842                 // pl[0] <= pl[1]
843                 while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
844                   {
845                     *target = pl[0];
846                     ++target;
847                     ++POS(source[0]);
848                     length--;
849                     if (POS(source[0]) == STOPS(source[0]))
850                       {
851                         // Move everything to the left.
852                         for (int s = 0; s < nrp - 1; s++)
853                           {
854                             pl[s] = pl[s + 1];
855                             source[s] = source[s + 1];
856                           }
857                         nrp--;
858                         break;
859                       }
860                     else
861                       pl[0] = *(POS(source[0]));
862                   }
863               }
864             else
865               {
866                 // pl[0] < pl[1]
867                 while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
868                   {
869                     *target = pl[0];
870                     ++target;
871                     ++POS(source[0]);
872                     length--;
873                     if (POS(source[0]) == STOPS(source[0]))
874                       {
875                         for (int s = 0; s < nrp - 1; s++)
876                           {
877                             pl[s] = pl[s + 1];
878                             source[s] = source[s + 1];
879                           }
880                         nrp--;
881                         break;
882                       }
883                     else
884                       pl[0] = *(POS(source[0]));
885                   }
886               }
887
888             // Sink down.
889             j = 1;
890             while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
891                                 (!comp(pl[j - 1], pl[j])
892                                     && (source[j] < source[j - 1]))))
893               {
894                 std::swap(pl[j - 1], pl[j]);
895                 std::swap(source[j - 1], source[j]);
896                 j++;
897               }
898           }
899       }
900     else
901       {
902         int j;
903         while (nrp > 0 && length > 0)
904           {
905             // pl[0] <= pl[1]
906             while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
907               {
908                 *target = pl[0];
909                 ++target;
910                 ++POS(source[0]);
911                 length--;
912                 if (POS(source[0]) == STOPS(source[0]))
913                   {
914                     for (int s = 0; s < (nrp - 1); s++)
915                       {
916                         pl[s] = pl[s + 1];
917                         source[s] = source[s + 1];
918                       }
919                     nrp--;
920                     break;
921                   }
922                 else
923                   pl[0] = *(POS(source[0]));
924               }
925
926             // Sink down.
927             j = 1;
928             while ((j < nrp) && comp(pl[j], pl[j - 1]))
929               {
930                 std::swap(pl[j - 1], pl[j]);
931                 std::swap(source[j - 1], source[j]);
932                 j++;
933               }
934           }
935       }
936
937     delete[] pl;
938     delete[] source;
939
940     return target;
941   }
942
943 /** @brief Multi-way merging procedure for a high branching factor,
944  * guarded case.
945  *
946  *  The head elements are kept in a loser tree.
947  *  @param seqs_begin Begin iterator of iterator pair input sequence.
948  *  @param seqs_end End iterator of iterator pair input sequence.
949  *  @param target Begin iterator out output sequence.
950  *  @param comp Comparator.
951  *  @param length Maximum length to merge.
952  *   @param stable Stable merging incurs a performance penalty.
953  *  @return End iterator of output sequence.
954  */
955 template<
956     typename LT,
957     typename RandomAccessIteratorIterator,
958     typename RandomAccessIterator3,
959     typename _DifferenceTp,
960     typename Comparator>
961   RandomAccessIterator3
962   multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
963                             RandomAccessIteratorIterator seqs_end,
964                             RandomAccessIterator3 target,
965                             Comparator comp,
966                             _DifferenceTp length, bool stable)
967   {
968     _GLIBCXX_CALL(length)
969
970     typedef _DifferenceTp difference_type;
971     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
972       ::value_type::first_type
973       RandomAccessIterator1;
974     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
975       value_type;
976
977     int k = static_cast<int>(seqs_end - seqs_begin);
978
979     LT lt(k, comp);
980
981     difference_type total_length = 0;
982
983     // Default value for potentially non-default-constructible types.
984     value_type* arbitrary_element = NULL;
985
986     for (int t = 0; t < k; t++)
987       {
988         if(arbitrary_element == NULL && LENGTH(seqs_begin[t]) > 0)
989           arbitrary_element = &(*seqs_begin[t].first);
990         total_length += LENGTH(seqs_begin[t]);
991       }
992
993     if(total_length == 0)
994       return target;
995
996     for (int t = 0; t < k; t++)
997       {
998         if (stable)
999           {
1000             if (seqs_begin[t].first == seqs_begin[t].second)
1001               lt.insert_start_stable(*arbitrary_element, t, true);
1002             else
1003               lt.insert_start_stable(*seqs_begin[t].first, t, false);
1004           }
1005         else
1006           {
1007             if (seqs_begin[t].first == seqs_begin[t].second)
1008               lt.insert_start(*arbitrary_element, t, true);
1009             else
1010               lt.insert_start(*seqs_begin[t].first, t, false);
1011           }
1012       }
1013
1014     if (stable)
1015       lt.init_stable();
1016     else
1017       lt.init();
1018
1019     total_length = std::min(total_length, length);
1020
1021     int source;
1022
1023     if (stable)
1024       {
1025         for (difference_type i = 0; i < total_length; i++)
1026           {
1027             // Take out.
1028             source = lt.get_min_source();
1029
1030             *(target++) = *(seqs_begin[source].first++);
1031
1032             // Feed.
1033             if (seqs_begin[source].first == seqs_begin[source].second)
1034               lt.delete_min_insert_stable(*arbitrary_element, true);
1035             else
1036               // Replace from same source.
1037               lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1038
1039           }
1040       }
1041     else
1042       {
1043         for (difference_type i = 0; i < total_length; i++)
1044           {
1045             //take out
1046             source = lt.get_min_source();
1047
1048             *(target++) = *(seqs_begin[source].first++);
1049
1050             // Feed.
1051             if (seqs_begin[source].first == seqs_begin[source].second)
1052               lt.delete_min_insert(*arbitrary_element, true);
1053             else
1054               // Replace from same source.
1055               lt.delete_min_insert(*seqs_begin[source].first, false);
1056           }
1057       }
1058
1059     return target;
1060   }
1061
1062 /** @brief Multi-way merging procedure for a high branching factor,
1063  * unguarded case.
1064  *
1065  *  The head elements are kept in a loser tree.
1066  *  @param seqs_begin Begin iterator of iterator pair input sequence.
1067  *  @param seqs_end End iterator of iterator pair input sequence.
1068  *  @param target Begin iterator out output sequence.
1069  *  @param comp Comparator.
1070  *  @param length Maximum length to merge.
1071  *  @param stable Stable merging incurs a performance penalty.
1072  *  @return End iterator of output sequence.
1073  *  @pre No input will run out of elements during the merge.
1074  */
1075 template<
1076     typename LT,
1077     typename RandomAccessIteratorIterator,
1078     typename RandomAccessIterator3,
1079     typename _DifferenceTp, typename Comparator>
1080   RandomAccessIterator3
1081   multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
1082                                       RandomAccessIteratorIterator seqs_end,
1083                                       RandomAccessIterator3 target,
1084                                       Comparator comp,
1085                                       _DifferenceTp length, bool stable)
1086   {
1087     _GLIBCXX_CALL(length)
1088     typedef _DifferenceTp difference_type;
1089
1090     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1091       ::value_type::first_type
1092       RandomAccessIterator1;
1093     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1094       value_type;
1095
1096     int k = seqs_end - seqs_begin;
1097
1098     LT lt(k, comp);
1099
1100     difference_type total_length = 0;
1101
1102     for (int t = 0; t < k; t++)
1103       {
1104 #if _GLIBCXX_ASSERTIONS
1105         _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1106 #endif
1107         if (stable)
1108           lt.insert_start_stable(*seqs_begin[t].first, t, false);
1109         else
1110           lt.insert_start(*seqs_begin[t].first, t, false);
1111
1112         total_length += LENGTH(seqs_begin[t]);
1113       }
1114
1115     if (stable)
1116       lt.init_stable();
1117     else
1118       lt.init();
1119
1120     // Do not go past end.
1121     length = std::min(total_length, length);
1122
1123     int source;
1124
1125 #if _GLIBCXX_ASSERTIONS
1126     difference_type i = 0;
1127 #endif
1128
1129     if (stable)
1130       {
1131         RandomAccessIterator3 target_end = target + length;
1132         while (target < target_end)
1133           {
1134             // Take out.
1135             source = lt.get_min_source();
1136
1137 #if _GLIBCXX_ASSERTIONS
1138             _GLIBCXX_PARALLEL_ASSERT(i == 0
1139                 || !comp(*(seqs_begin[source].first), *(target - 1)));
1140 #endif
1141
1142             *(target++) = *(seqs_begin[source].first++);
1143
1144 #if _GLIBCXX_ASSERTIONS
1145             _GLIBCXX_PARALLEL_ASSERT(
1146                 (seqs_begin[source].first != seqs_begin[source].second)
1147                 || (i == length - 1));
1148             i++;
1149 #endif
1150             // Feed.
1151             // Replace from same source.
1152             lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1153
1154           }
1155       }
1156     else
1157       {
1158         RandomAccessIterator3 target_end = target + length;
1159         while (target < target_end)
1160           {
1161             // Take out.
1162             source = lt.get_min_source();
1163
1164 #if _GLIBCXX_ASSERTIONS
1165             if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1166               printf("         %i %i %i\n", length, i, source);
1167             _GLIBCXX_PARALLEL_ASSERT(i == 0
1168                 || !comp(*(seqs_begin[source].first), *(target - 1)));
1169 #endif
1170
1171             *(target++) = *(seqs_begin[source].first++);
1172
1173 #if _GLIBCXX_ASSERTIONS
1174             if (!((seqs_begin[source].first != seqs_begin[source].second)
1175                 || (i >= length - 1)))
1176               printf("         %i %i %i\n", length, i, source);
1177             _GLIBCXX_PARALLEL_ASSERT(
1178                 (seqs_begin[source].first != seqs_begin[source].second)
1179                 || (i >= length - 1));
1180             i++;
1181 #endif
1182             // Feed.
1183             // Replace from same source.
1184             lt.delete_min_insert(*seqs_begin[source].first, false);
1185           }
1186       }
1187
1188     return target;
1189   }
1190
1191 template<
1192     typename RandomAccessIteratorIterator,
1193     typename RandomAccessIterator3,
1194     typename _DifferenceTp,
1195     typename Comparator>
1196   RandomAccessIterator3
1197   multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
1198                                      RandomAccessIteratorIterator seqs_end,
1199                                      RandomAccessIterator3 target,
1200                                      Comparator comp,
1201                                      _DifferenceTp length, bool stable)
1202   {
1203     _GLIBCXX_CALL(length)
1204
1205     typedef _DifferenceTp difference_type;
1206
1207     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1208       ::value_type::first_type
1209       RandomAccessIterator1;
1210     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1211       value_type;
1212
1213     int min_seq;
1214     RandomAccessIterator3 target_end;
1215     difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1216                                           comp, min_seq, stable);
1217
1218     difference_type total_length = 0;
1219     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1220       total_length += LENGTH(*s);
1221
1222     if (overhang != -1)
1223       {
1224         difference_type unguarded_length =
1225             std::min(length, total_length - overhang);
1226         target_end = multiway_merge_loser_tree_unguarded
1227           <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1228           (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1229         overhang = length - unguarded_length;
1230       }
1231     else
1232       {
1233         // Empty sequence found.
1234         overhang = length;
1235         target_end = target;
1236       }
1237
1238 #if _GLIBCXX_ASSERTIONS
1239     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1240     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1241 #endif
1242
1243     target_end = multiway_merge_loser_tree
1244       <typename loser_tree_traits<value_type, Comparator>::LT>
1245       (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1246
1247 #if _GLIBCXX_ASSERTIONS
1248     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1249     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1250 #endif
1251
1252     return target_end;
1253   }
1254
1255 template<
1256     typename RandomAccessIteratorIterator,
1257     typename RandomAccessIterator3,
1258     typename _DifferenceTp,
1259     typename Comparator>
1260   RandomAccessIterator3
1261   multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
1262                                      RandomAccessIteratorIterator seqs_end,
1263                                       RandomAccessIterator3 target,
1264                                       Comparator comp,
1265                                       _DifferenceTp length, bool stable)
1266   {
1267     _GLIBCXX_CALL(length)
1268
1269     typedef _DifferenceTp difference_type;
1270     typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1271     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1272       ::value_type::first_type
1273       RandomAccessIterator1;
1274     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1275       value_type;
1276
1277     RandomAccessIterator3 target_end;
1278     difference_type overhang =
1279         prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1280
1281     difference_type total_length = 0;
1282     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1283       {
1284         total_length += LENGTH(*s);
1285
1286         // Sentinel spot.
1287         (*s).second++;
1288       }
1289
1290     difference_type unguarded_length =
1291         std::min(length, total_length - overhang);
1292     target_end = multiway_merge_loser_tree_unguarded
1293       <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1294       (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1295     overhang = length - unguarded_length;
1296
1297 #if _GLIBCXX_ASSERTIONS
1298     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1299     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1300 #endif
1301
1302     // Copy rest stable.
1303     for (RandomAccessIteratorIterator s = seqs_begin;
1304          s != seqs_end && overhang > 0; s++)
1305       {
1306         // Restore.
1307         (*s).second--;
1308         difference_type local_length =
1309             std::min<difference_type>(overhang, LENGTH(*s));
1310         target_end = std::copy((*s).first, (*s).first + local_length,
1311                                target_end);
1312         (*s).first += local_length;
1313         overhang -= local_length;
1314       }
1315
1316 #if _GLIBCXX_ASSERTIONS
1317     _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1318     _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1319     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1320 #endif
1321
1322     return target_end;
1323   }
1324
1325 /** @brief Sequential multi-way merging switch.
1326  *
1327  *  The decision if based on the branching factor and runtime settings.
1328  *  @param seqs_begin Begin iterator of iterator pair input sequence.
1329  *  @param seqs_end End iterator of iterator pair input sequence.
1330  *  @param target Begin iterator out output sequence.
1331  *  @param comp Comparator.
1332  *  @param length Maximum length to merge.
1333  *  @param stable Stable merging incurs a performance penalty.
1334  *  @param sentinel The sequences have a sentinel element.
1335  *  @return End iterator of output sequence. */
1336 template<
1337     typename RandomAccessIteratorIterator,
1338     typename RandomAccessIterator3,
1339     typename _DifferenceTp,
1340     typename Comparator>
1341   RandomAccessIterator3
1342   multiway_merge(RandomAccessIteratorIterator seqs_begin,
1343                  RandomAccessIteratorIterator seqs_end,
1344                  RandomAccessIterator3 target,
1345                  Comparator comp, _DifferenceTp length,
1346                  bool stable, bool sentinel,
1347                  sequential_tag)
1348   {
1349     _GLIBCXX_CALL(length)
1350
1351     typedef _DifferenceTp difference_type;
1352     typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1353       ::value_type::first_type
1354       RandomAccessIterator1;
1355     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1356       value_type;
1357
1358 #if _GLIBCXX_ASSERTIONS
1359     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1360       _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1361 #endif
1362
1363     RandomAccessIterator3 return_target = target;
1364     int k = static_cast<int>(seqs_end - seqs_begin);
1365
1366     Settings::MultiwayMergeAlgorithm mwma =
1367         Settings::multiway_merge_algorithm;
1368
1369     if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1370       mwma = Settings::LOSER_TREE_COMBINED;
1371
1372     switch (k)
1373       {
1374       case 0:
1375         break;
1376       case 1:
1377         return_target = std::copy(seqs_begin[0].first,
1378                                   seqs_begin[0].first + length,
1379                                   target);
1380         seqs_begin[0].first += length;
1381         break;
1382       case 2:
1383         return_target = merge_advance(seqs_begin[0].first,
1384                                       seqs_begin[0].second,
1385                                       seqs_begin[1].first,
1386                                       seqs_begin[1].second,
1387                                       target, length, comp);
1388         break;
1389       case 3:
1390         switch (mwma)
1391           {
1392           case Settings::LOSER_TREE_COMBINED:
1393             return_target = multiway_merge_3_combined(seqs_begin,
1394                 seqs_end,
1395                 target,
1396                 comp, length, stable);
1397             break;
1398           case Settings::LOSER_TREE_SENTINEL:
1399             return_target = multiway_merge_3_variant<unguarded_iterator>(
1400                 seqs_begin,
1401                 seqs_end,
1402                 target,
1403                 comp, length, stable);
1404             break;
1405           default:
1406             return_target = multiway_merge_3_variant<guarded_iterator>(
1407                 seqs_begin,
1408                 seqs_end,
1409                 target,
1410                 comp, length, stable);
1411             break;
1412           }
1413         break;
1414       case 4:
1415         switch (mwma)
1416           {
1417           case Settings::LOSER_TREE_COMBINED:
1418             return_target = multiway_merge_4_combined(
1419                 seqs_begin,
1420                 seqs_end,
1421                 target,
1422                 comp, length, stable);
1423             break;
1424           case Settings::LOSER_TREE_SENTINEL:
1425             return_target = multiway_merge_4_variant<unguarded_iterator>(
1426                 seqs_begin,
1427                 seqs_end,
1428                 target,
1429                 comp, length, stable);
1430             break;
1431           default:
1432             return_target = multiway_merge_4_variant<guarded_iterator>(
1433                 seqs_begin,
1434                 seqs_end,
1435                 target,
1436                 comp, length, stable);
1437             break;
1438           }
1439         break;
1440       default:
1441         {
1442           switch (mwma)
1443             {
1444             case Settings::BUBBLE:
1445               return_target = multiway_merge_bubble(
1446                   seqs_begin,
1447                   seqs_end,
1448                   target,
1449                   comp, length, stable);
1450               break;
1451 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1452             case Settings::LOSER_TREE_EXPLICIT:
1453               return_target = multiway_merge_loser_tree<
1454                     LoserTreeExplicit<value_type, Comparator> >(
1455                   seqs_begin,
1456                   seqs_end,
1457                   target,
1458                   comp, length, stable);
1459               break;
1460 #endif
1461 #if _GLIBCXX_LOSER_TREE
1462             case Settings::LOSER_TREE:
1463               return_target = multiway_merge_loser_tree<
1464                     LoserTree<value_type, Comparator> >(
1465                   seqs_begin,
1466                   seqs_end,
1467                   target,
1468                   comp, length, stable);
1469               break;
1470 #endif
1471 #if _GLIBCXX_LOSER_TREE_COMBINED
1472             case Settings::LOSER_TREE_COMBINED:
1473               return_target = multiway_merge_loser_tree_combined(
1474                   seqs_begin,
1475                   seqs_end,
1476                   target,
1477                   comp, length, stable);
1478               break;
1479 #endif
1480 #if _GLIBCXX_LOSER_TREE_SENTINEL
1481             case Settings::LOSER_TREE_SENTINEL:
1482               return_target = multiway_merge_loser_tree_sentinel(
1483                   seqs_begin,
1484                   seqs_end,
1485                   target,
1486                   comp, length, stable);
1487               break;
1488 #endif
1489             default:
1490               // multiway_merge algorithm not implemented.
1491               _GLIBCXX_PARALLEL_ASSERT(0);
1492               break;
1493             }
1494         }
1495       }
1496 #if _GLIBCXX_ASSERTIONS
1497     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1498 #endif
1499
1500     return return_target;
1501   }
1502
1503 /** @brief Parallel multi-way merge routine.
1504  *
1505  *  The decision if based on the branching factor and runtime settings.
1506  *  @param seqs_begin Begin iterator of iterator pair input sequence.
1507  *  @param seqs_end End iterator of iterator pair input sequence.
1508  *  @param target Begin iterator out output sequence.
1509  *  @param comp Comparator.
1510  *  @param length Maximum length to merge.
1511  *  @param stable Stable merging incurs a performance penalty.
1512  *  @param sentinel Ignored.
1513  *  @return End iterator of output sequence.
1514  */
1515 template<
1516     typename RandomAccessIteratorIterator,
1517     typename RandomAccessIterator3,
1518     typename _DifferenceTp,
1519     typename Comparator>
1520   RandomAccessIterator3
1521   parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
1522                           RandomAccessIteratorIterator seqs_end,
1523                            RandomAccessIterator3 target,
1524                            Comparator comp,
1525                            _DifferenceTp length, bool stable, bool sentinel)
1526     {
1527       _GLIBCXX_CALL(length)
1528
1529       typedef _DifferenceTp difference_type;
1530       typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1531         ::value_type::first_type
1532         RandomAccessIterator1;
1533       typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1534         value_type;
1535
1536       // k sequences.
1537       int k = static_cast<int>(seqs_end - seqs_begin);
1538
1539       difference_type total_length = 0;
1540       for (RandomAccessIteratorIterator raii = seqs_begin;
1541            raii != seqs_end; raii++)
1542         total_length += LENGTH(*raii);
1543
1544       _GLIBCXX_CALL(total_length)
1545
1546       if (total_length == 0 || k == 0)
1547         return target;
1548
1549       bool tight = (total_length == length);
1550
1551       std::vector<std::pair<difference_type, difference_type> >* pieces;
1552
1553       thread_index_t num_threads = static_cast<thread_index_t>(
1554           std::min<difference_type>(get_max_threads(), total_length));
1555
1556 #     pragma omp parallel num_threads (num_threads)
1557         {
1558 #         pragma omp single
1559             {
1560               num_threads = omp_get_num_threads();
1561               // Thread t will have to merge pieces[iam][0..k - 1]
1562               pieces = new std::vector<
1563                   std::pair<difference_type, difference_type> >[num_threads];
1564               for (int s = 0; s < num_threads; s++)
1565                 pieces[s].resize(k);
1566
1567               difference_type num_samples =
1568                   Settings::merge_oversampling * num_threads;
1569
1570               if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1571                 {
1572                   value_type* samples = static_cast<value_type*>(
1573                     ::operator new(sizeof(value_type) * k * num_samples));
1574                   // Sample.
1575                   for (int s = 0; s < k; s++)
1576                     for (int i = 0; (difference_type)i < num_samples; i++)
1577                       {
1578                         difference_type sample_index =
1579                             static_cast<difference_type>(
1580                                 LENGTH(seqs_begin[s]) * (double(i + 1) /
1581                                 (num_samples + 1)) * (double(length)
1582                                 / total_length));
1583                         samples[s * num_samples + i] =
1584                             seqs_begin[s].first[sample_index];
1585                       }
1586
1587                   if (stable)
1588                     __gnu_sequential::stable_sort(
1589                       samples, samples + (num_samples * k), comp);
1590                   else
1591                     __gnu_sequential::sort(
1592                       samples, samples + (num_samples * k), comp);
1593
1594                   for (int slab = 0; slab < num_threads; slab++)
1595                     // For each slab / processor.
1596                     for (int seq = 0; seq < k; seq++)
1597                       {
1598                         // For each sequence.
1599                         if (slab > 0)
1600                           pieces[slab][seq].first =
1601                               std::upper_bound(
1602                                 seqs_begin[seq].first,
1603                                 seqs_begin[seq].second,
1604                                 samples[num_samples * k * slab / num_threads],
1605                                   comp)
1606                               - seqs_begin[seq].first;
1607                         else
1608                           {
1609                             // Absolute beginning.
1610                             pieces[slab][seq].first = 0;
1611                           }
1612                         if ((slab + 1) < num_threads)
1613                           pieces[slab][seq].second =
1614                               std::upper_bound(
1615                                   seqs_begin[seq].first,
1616                                   seqs_begin[seq].second,
1617                                   samples[num_samples * k * (slab + 1) /
1618                                       num_threads], comp)
1619                               - seqs_begin[seq].first;
1620                         else
1621                         pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
1622                       }
1623                     delete[] samples;
1624                 }
1625               else
1626                 {
1627                   // (Settings::multiway_merge_splitting == Settings::EXACT).
1628                   std::vector<RandomAccessIterator1>* offsets =
1629                       new std::vector<RandomAccessIterator1>[num_threads];
1630                   std::vector<
1631                       std::pair<RandomAccessIterator1, RandomAccessIterator1>
1632                       > se(k);
1633
1634                   copy(seqs_begin, seqs_end, se.begin());
1635
1636                   difference_type* borders =
1637                       new difference_type[num_threads + 1];
1638                   equally_split(length, num_threads, borders);
1639
1640                   for (int s = 0; s < (num_threads - 1); s++)
1641                     {
1642                       offsets[s].resize(k);
1643                       multiseq_partition(
1644                           se.begin(), se.end(), borders[s + 1],
1645                           offsets[s].begin(), comp);
1646
1647                       // Last one also needed and available.
1648                       if (!tight)
1649                         {
1650                           offsets[num_threads - 1].resize(k);
1651                           multiseq_partition(se.begin(), se.end(),
1652                                 difference_type(length),
1653                                 offsets[num_threads - 1].begin(),  comp);
1654                         }
1655                     }
1656
1657
1658                   for (int slab = 0; slab < num_threads; slab++)
1659                     {
1660                       // For each slab / processor.
1661                       for (int seq = 0; seq < k; seq++)
1662                         {
1663                           // For each sequence.
1664                           if (slab == 0)
1665                             {
1666                               // Absolute beginning.
1667                               pieces[slab][seq].first = 0;
1668                             }
1669                           else
1670                             pieces[slab][seq].first =
1671                                 pieces[slab - 1][seq].second;
1672                           if (!tight || slab < (num_threads - 1))
1673                             pieces[slab][seq].second =
1674                                 offsets[slab][seq] - seqs_begin[seq].first;
1675                           else
1676                             {
1677                               // slab == num_threads - 1
1678                               pieces[slab][seq].second =
1679                                   LENGTH(seqs_begin[seq]);
1680                             }
1681                         }
1682                     }
1683                   delete[] offsets;
1684                 }
1685             } //single
1686
1687           thread_index_t iam = omp_get_thread_num();
1688
1689           difference_type target_position = 0;
1690
1691           for (int c = 0; c < k; c++)
1692             target_position += pieces[iam][c].first;
1693
1694           if (k > 2)
1695             {
1696               std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
1697                 = new
1698                   std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1699
1700               difference_type local_length = 0;
1701               for (int s = 0; s < k; s++)
1702                 {
1703                   chunks[s] = std::make_pair(
1704                       seqs_begin[s].first + pieces[iam][s].first,
1705                       seqs_begin[s].first + pieces[iam][s].second);
1706                   local_length += LENGTH(chunks[s]);
1707                 }
1708
1709               multiway_merge(
1710                     chunks, chunks + k, target + target_position, comp,
1711                     std::min(local_length, length - target_position),
1712                     stable, false, sequential_tag());
1713
1714               delete[] chunks;
1715             }
1716           else if (k == 2)
1717             {
1718               RandomAccessIterator1
1719                   begin0 = seqs_begin[0].first + pieces[iam][0].first,
1720                   begin1 = seqs_begin[1].first + pieces[iam][1].first;
1721               merge_advance(begin0,
1722                     seqs_begin[0].first + pieces[iam][0].second,
1723                     begin1,
1724                     seqs_begin[1].first + pieces[iam][1].second,
1725                     target + target_position,
1726                     (pieces[iam][0].second - pieces[iam][0].first) +
1727                         (pieces[iam][1].second - pieces[iam][1].first),
1728                     comp);
1729             }
1730         } //parallel
1731
1732 #if _GLIBCXX_ASSERTIONS
1733       _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1734 #endif
1735
1736       // Update ends of sequences.
1737       for (int s = 0; s < k; s++)
1738         seqs_begin[s].first += pieces[num_threads - 1][s].second;
1739
1740       delete[] pieces;
1741
1742       return target + length;
1743     }
1744
1745 /**
1746  *  @brief Multi-way merging front-end.
1747  *  @param seqs_begin Begin iterator of iterator pair input sequence.
1748  *  @param seqs_end End iterator of iterator pair input sequence.
1749  *  @param target Begin iterator out output sequence.
1750  *  @param comp Comparator.
1751  *  @param length Maximum length to merge.
1752  *  @param stable Stable merging incurs a performance penalty.
1753  *  @return End iterator of output sequence.
1754  */
1755 template<
1756     typename RandomAccessIteratorPairIterator,
1757     typename RandomAccessIterator3,
1758     typename _DifferenceTp,
1759     typename Comparator>
1760   RandomAccessIterator3
1761   multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1762                 RandomAccessIteratorPairIterator seqs_end,
1763                 RandomAccessIterator3 target, Comparator comp,
1764                 _DifferenceTp length, bool stable)
1765   {
1766     typedef _DifferenceTp difference_type;
1767     _GLIBCXX_CALL(seqs_end - seqs_begin)
1768
1769     if (seqs_begin == seqs_end)
1770       return target;
1771
1772     RandomAccessIterator3 target_end;
1773     if (_GLIBCXX_PARALLEL_CONDITION(
1774         ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1775         && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1776       target_end = parallel_multiway_merge(
1777           seqs_begin, seqs_end,
1778           target, comp, static_cast<difference_type>(length), stable, false);
1779     else
1780       target_end = multiway_merge(
1781           seqs_begin, seqs_end,
1782           target, comp, length, stable, false, sequential_tag());
1783
1784     return target_end;
1785   }
1786
1787 /** @brief Multi-way merging front-end.
1788  *  @param seqs_begin Begin iterator of iterator pair input sequence.
1789  *  @param seqs_end End iterator of iterator pair input sequence.
1790  *  @param target Begin iterator out output sequence.
1791  *  @param comp Comparator.
1792  *  @param length Maximum length to merge.
1793  *  @param stable Stable merging incurs a performance penalty.
1794  *  @return End iterator of output sequence.
1795  *  @pre For each @c i, @c seqs_begin[i].second must be the end
1796  *  marker of the sequence, but also reference the one more sentinel
1797  *  element. */
1798 template<
1799     typename RandomAccessIteratorPairIterator,
1800     typename RandomAccessIterator3,
1801     typename _DifferenceTp,
1802     typename Comparator>
1803   RandomAccessIterator3
1804   multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1805                           RandomAccessIteratorPairIterator seqs_end,
1806                           RandomAccessIterator3 target,
1807                           Comparator comp,
1808                           _DifferenceTp length,
1809                           bool stable)
1810   {
1811     typedef _DifferenceTp difference_type;
1812
1813     if (seqs_begin == seqs_end)
1814       return target;
1815
1816     _GLIBCXX_CALL(seqs_end - seqs_begin)
1817
1818     if (_GLIBCXX_PARALLEL_CONDITION(
1819         ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1820         && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1821       return parallel_multiway_merge(
1822           seqs_begin, seqs_end,
1823           target, comp, static_cast<difference_type>(length), stable, true);
1824     else
1825       return multiway_merge(
1826           seqs_begin, seqs_end,
1827           target, comp, length, stable, true, sequential_tag());
1828   }
1829 }
1830
1831 #endif