OSDN Git Service

2012-05-02 Paolo Carlini <paolo.carlini@oracle.com>
[pf3gnuchains/gcc-fork.git] / libstdc++-v3 / include / parallel / multiway_merge.h
index cdafacb..a1977da 100644 (file)
@@ -1,11 +1,11 @@
 // -*- C++ -*-
 
-// Copyright (C) 2007 Free Software Foundation, Inc.
+// Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the terms
 // of the GNU General Public License as published by the Free Software
-// Foundation; either version 2, or (at your option) any later
+// Foundation; either version 3, or (at your option) any later
 // version.
 
 // This library is distributed in the hope that it will be useful, but
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // General Public License for more details.
 
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING.  If not, write to
-// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
-// MA 02111-1307, USA.
-
-// As a special exception, you may use this file as part of a free
-// software library without restriction.  Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License.  This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
 
-/** @file parallel/multiway_merge.h
- *  @brief Implementation of sequential and parallel multiway merge.
- *  This file is a GNU parallel extension to the Standard C++ Library.
- */
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+// <http://www.gnu.org/licenses/>.
 
-// Written by Johannes Singler.
+/** @file parallel/multiway_merge.h
+*  @brief Implementation of sequential and parallel multiway merge.
+*
+*  Explanations on the high-speed merging routines in the appendix of
+*
+*  P. Sanders.
+*  Fast priority queues for cached memory.
+*  ACM Journal of Experimental Algorithmics, 5, 2000.
+*
+*  This file is a GNU parallel extension to the Standard C++ Library.
+*/
+
+// Written by Johannes Singler and Manuel Holtgrewe.
 
 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
 #include <bits/stl_algo.h>
 #include <parallel/features.h>
 #include <parallel/parallel.h>
-#include <parallel/merge.h>
 #include <parallel/losertree.h>
-#include <parallel/timing.h>
+#include <parallel/multiseq_selection.h>
 #if _GLIBCXX_ASSERTIONS
 #include <parallel/checkers.h>
 #endif
 
 /** @brief Length of a sequence described by a pair of iterators. */
-#define LENGTH(s) ((s).second - (s).first)
+#define _GLIBCXX_PARALLEL_LENGTH(__s) ((__s).second - (__s).first)
 
-// XXX need iterator typedefs
 namespace __gnu_parallel
 {
-  template<typename RandomAccessIterator, typename Comparator>
-  class guarded_iterator;
-
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           guarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            guarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-  /** @brief Iterator wrapper supporting an implicit supremum at the end
-      of the sequence, dominating all comparisons.
-      *  Deriving from RandomAccessIterator is not possible since
-      *  RandomAccessIterator need not be a class.
-      */
-  template<typename RandomAccessIterator, typename Comparator>
-  class guarded_iterator
-  {
-  private:
-    /** @brief Current iterator position. */
-    RandomAccessIterator current;
-
-    /** @brief End iterator of the sequence. */
-    RandomAccessIterator end;
-
-    /** @brief Comparator. */
-    Comparator& comp;
-
-  public:
-    /** @brief Constructor. Sets iterator to beginning of sequence.
-     *  @param begin Begin iterator of sequence.
-     *  @param end End iterator of sequence.
-     *  @param comp Comparator provided for associated overloaded
-     *  compare operators. */
-    inline guarded_iterator(RandomAccessIterator begin, 
-                           RandomAccessIterator end, Comparator& comp) 
-    : current(begin), end(end), comp(comp)
-    { }
-
-    /** @brief Pre-increment operator.
-     *  @return This. */
-    inline guarded_iterator<RandomAccessIterator, Comparator>&
-    operator++()
-    {
-      ++current;
-      return *this;
-    }
-
-    /** @brief Dereference operator.
-     *  @return Referenced element. */
-    inline typename std::iterator_traits<RandomAccessIterator>::value_type
-    operator*()
-    { return *current; }
-
-    /** @brief Convert to wrapped iterator.
-     *  @return Wrapped iterator. */
-    inline operator RandomAccessIterator()
-    { return current; }
-
-    friend bool
-    operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-    friend bool
-    operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
-  };
-
-  /** @brief Compare two elements referenced by guarded iterators.
-   *  @param bi1 First iterator.
-   *  @param bi2 Second iterator.
-   *  @return @c True if less. */
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           guarded_iterator<RandomAccessIterator, Comparator>& bi2)
-  {
-    if (bi1.current == bi1.end)        //bi1 is sup
-      return bi2.current == bi2.end;   //bi2 is not sup
-    if (bi2.current == bi2.end)        //bi2 is sup
-      return true;
-    return (bi1.comp)(*bi1, *bi2);     //normal compare
-  }
-
-  /** @brief Compare two elements referenced by guarded iterators.
-   *  @param bi1 First iterator.
-   *  @param bi2 Second iterator.
-   *  @return @c True if less equal. */
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            guarded_iterator<RandomAccessIterator, Comparator>& bi2)
-  {
-    if (bi2.current == bi2.end)        //bi1 is sup
-      return bi1.current != bi1.end;   //bi2 is not sup
-    if (bi1.current == bi1.end)        //bi2 is sup
-      return false;
-    return !(bi1.comp)(*bi2, *bi1);    //normal compare
-  }
-
-  template<typename RandomAccessIterator, typename Comparator>
-  class unguarded_iterator;
-
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-  template<typename RandomAccessIterator, typename Comparator>
-  class unguarded_iterator
-  {
-  private:
-    /** @brief Current iterator position. */
-    RandomAccessIterator& current;
-    /** @brief Comparator. */
-    mutable Comparator& comp;
-
-  public:
-    /** @brief Constructor. Sets iterator to beginning of sequence.
-     *  @param begin Begin iterator of sequence.
-     *  @param end Unused, only for compatibility.
-     *  @param comp Unused, only for compatibility. */
-    inline unguarded_iterator(RandomAccessIterator begin, 
-                             RandomAccessIterator end, Comparator& comp) 
-    : current(begin), comp(comp)
-    { }
-
-    /** @brief Pre-increment operator.
-     *  @return This. */
-    inline  unguarded_iterator<RandomAccessIterator, Comparator>&
-    operator++()
+  template<typename _RAIter1, typename _RAIter2, typename _OutputIterator,
+          typename _DifferenceTp, typename _Compare>
+    _OutputIterator
+    __merge_advance(_RAIter1&, _RAIter1, _RAIter2&, _RAIter2,
+                   _OutputIterator, _DifferenceTp, _Compare);
+
+  /** @brief _Iterator wrapper supporting an implicit supremum at the end
+   *         of the sequence, dominating all comparisons.
+   *
+   * The implicit supremum comes with a performance cost.
+   *
+   * Deriving from _RAIter is not possible since
+   * _RAIter need not be a class.
+   */
+  template<typename _RAIter, typename _Compare>
+    class _GuardedIterator
     {
-      current++;
-      return *this;
-    }
-
-    /** @brief Dereference operator.
-     *  @return Referenced element. */
-    inline typename std::iterator_traits<RandomAccessIterator>::value_type 
-    operator*()
-    { return *current; }
-
-    /** @brief Convert to wrapped iterator.
-     *  @return Wrapped iterator. */
-    inline
-    operator RandomAccessIterator()
-    { return current; }
-
-    friend bool
-    operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
-
-    friend bool
-    operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
-  };
-
-  /** @brief Compare two elements referenced by unguarded iterators.
-   *  @param bi1 First iterator.
-   *  @param bi2 Second iterator.
-   *  @return @c True if less. */
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
-  {
-    // Normal compare.
-    return (bi1.comp)(*bi1, *bi2);
-  }
-
-  /** @brief Compare two elements referenced by unguarded iterators.
-   *  @param bi1 First iterator.
-   *  @param bi2 Second iterator.
-   *  @return @c True if less equal. */
-  template<typename RandomAccessIterator, typename Comparator>
-  inline bool
-  operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
-  {
-    // Normal compare.
-    return !(bi1.comp)(*bi2, *bi1);
-  }
-
-  /** Prepare a set of sequences to be merged without a (end) guard
-   *  @param seqs_begin
-   *  @param seqs_end
-   *  @param comp
-   *  @param min_sequence
-   *  @param stable
-   *  @pre (seqs_end - seqs_begin > 0) */
-  template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
-  prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
-                   RandomAccessIteratorIterator seqs_end, Comparator comp,
-                   int& min_sequence, bool stable)
-  {
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-      typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
-      difference_type;
-
-    if ((*seqs_begin).first == (*seqs_begin).second)
+    private:
+      /** @brief Current iterator __position. */
+      _RAIter _M_current;
+
+      /** @brief End iterator of the sequence. */
+      _RAIter _M_end;
+
+      /** @brief _Compare. */
+      _Compare& __comp;
+
+    public:
+      /** @brief Constructor. Sets iterator to beginning of sequence.
+      *  @param __begin Begin iterator of sequence.
+      *  @param __end End iterator of sequence.
+      *  @param __comp Comparator provided for associated overloaded
+      *  compare operators. */
+      _GuardedIterator(_RAIter __begin, _RAIter __end, _Compare& __comp)
+      : _M_current(__begin), _M_end(__end), __comp(__comp)
+      { }
+
+      /** @brief Pre-increment operator.
+      *  @return This. */
+      _GuardedIterator<_RAIter, _Compare>&
+      operator++()
       {
-       // Empty sequence found, it's the first one.
-       min_sequence = 0;
-       return -1;
+       ++_M_current;
+       return *this;
       }
 
-    // Last element in sequence.
-    value_type min = *((*seqs_begin).second - 1);
-    min_sequence = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
+      /** @brief Dereference operator.
+      *  @return Referenced element. */
+      typename std::iterator_traits<_RAIter>::value_type&
+      operator*()
+      { return *_M_current; }
+
+      /** @brief Convert to wrapped iterator.
+      *  @return Wrapped iterator. */
+      operator _RAIter()
+      { return _M_current; }
+
+      /** @brief Compare two elements referenced by guarded iterators.
+       *  @param __bi1 First iterator.
+       *  @param __bi2 Second iterator.
+       *  @return @c true if less. */
+      friend bool
+      operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
+               _GuardedIterator<_RAIter, _Compare>& __bi2)
       {
-       if ((*s).first == (*s).second)
-         {
-           // Empty sequence found.
-           min_sequence = static_cast<int>(s - seqs_begin);
-           return -1;
-         }
-
-       // Last element in sequence.
-       const value_type& v = *((*s).second - 1);
-       if (comp(v, min))       //strictly smaller
-         {
-           min = v;
-           min_sequence = static_cast<int>(s - seqs_begin);
-         }
+       if (__bi1._M_current == __bi1._M_end)       // __bi1 is sup
+         return __bi2._M_current == __bi2._M_end;  // __bi2 is not sup
+       if (__bi2._M_current == __bi2._M_end)       // __bi2 is sup
+         return true;
+       return (__bi1.__comp)(*__bi1, *__bi2);      // normal compare
       }
 
-    difference_type overhang_size = 0;
-
-    int s = 0;
-    for (s = 0; s <= min_sequence; s++)
+      /** @brief Compare two elements referenced by guarded iterators.
+       *  @param __bi1 First iterator.
+       *  @param __bi2 Second iterator.
+       *  @return @c True if less equal. */
+      friend bool
+      operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
+                _GuardedIterator<_RAIter, _Compare>& __bi2)
       {
-       RandomAccessIterator1 split;
-       if (stable)
-         split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-       else
-         split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-
-       overhang_size += seqs_begin[s].second - split;
-      }
-
-    for (; s < (seqs_end - seqs_begin); s++)
+       if (__bi2._M_current == __bi2._M_end)       // __bi1 is sup
+         return __bi1._M_current != __bi1._M_end;  // __bi2 is not sup
+       if (__bi1._M_current == __bi1._M_end)       // __bi2 is sup
+         return false;
+       return !(__bi1.__comp)(*__bi2, *__bi1);     // normal compare
+      } 
+    };
+
+  template<typename _RAIter, typename _Compare>
+    class _UnguardedIterator
+    {
+    private:
+      /** @brief Current iterator __position. */
+      _RAIter _M_current;
+      /** @brief _Compare. */
+      _Compare& __comp;
+
+    public:
+      /** @brief Constructor. Sets iterator to beginning of sequence.
+      *  @param __begin Begin iterator of sequence.
+      *  @param __end Unused, only for compatibility.
+      *  @param __comp Unused, only for compatibility. */
+      _UnguardedIterator(_RAIter __begin,
+                        _RAIter /* __end */, _Compare& __comp)
+      : _M_current(__begin), __comp(__comp)
+      { }
+
+      /** @brief Pre-increment operator.
+      *  @return This. */
+      _UnguardedIterator<_RAIter, _Compare>&
+      operator++()
       {
-       RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
-       overhang_size += seqs_begin[s].second - split;
+       ++_M_current;
+       return *this;
       }
 
-    // So many elements will be left over afterwards.
-    return overhang_size;
-  }
-
-  /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
-   *  @param seqs_begin
-   *  @param seqs_end
-   *  @param comp */
-  template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
-  prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
-                            RandomAccessIteratorIterator seqs_end,
-                            Comparator comp)
-  {
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
-      difference_type;
-
-    // Last element in sequence.
-    value_type max;
-    bool max_found = false;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      /** @brief Dereference operator.
+      *  @return Referenced element. */
+      typename std::iterator_traits<_RAIter>::value_type&
+      operator*()
+      { return *_M_current; }
+
+      /** @brief Convert to wrapped iterator.
+      *  @return Wrapped iterator. */
+      operator _RAIter()
+      { return _M_current; }
+
+      /** @brief Compare two elements referenced by unguarded iterators.
+       *  @param __bi1 First iterator.
+       *  @param __bi2 Second iterator.
+       *  @return @c true if less. */
+      friend bool
+      operator<(_UnguardedIterator<_RAIter, _Compare>& __bi1,
+               _UnguardedIterator<_RAIter, _Compare>& __bi2)
       {
-       if ((*s).first == (*s).second)
-         continue;
-
-       // Last element in sequence.
-       value_type& v = *((*s).second - 1);
-
-       // Strictly greater.
-       if (!max_found || comp(max, v))
-         max = v;
-       max_found = true;
+       // Normal compare.
+       return (__bi1.__comp)(*__bi1, *__bi2);
       }
 
-    difference_type overhang_size = 0;
-
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+      /** @brief Compare two elements referenced by unguarded iterators.
+       *  @param __bi1 First iterator.
+       *  @param __bi2 Second iterator.
+       *  @return @c True if less equal. */
+      friend bool
+      operator<=(_UnguardedIterator<_RAIter, _Compare>& __bi1,
+                _UnguardedIterator<_RAIter, _Compare>& __bi2)
       {
-       RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
-       overhang_size += (*s).second - split;
-
-       // Set sentinel.
-       *((*s).second) = max;
+       // Normal compare.
+       return !(__bi1.__comp)(*__bi2, *__bi1);
       }
-
-    // So many elements will be left over afterwards.
-    return overhang_size;
-  }
+    };
 
   /** @brief Highly efficient 3-way merging procedure.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Unused, stable anyway.
-   *  @return End iterator of output sequence. */
-  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-    
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    if (length == 0)
-      return target;
-
-    iterator<RandomAccessIterator1, Comparator>
-      seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
-      seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
-      seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
-
-    if (seq0 <= seq1)
-      {
-       if (seq1 <= seq2)
-         goto s012;
-       else
-         if (seq2 <  seq0)
-           goto s201;
-         else
-           goto s021;
-      }
-    else
-      {
-       if (seq1 <= seq2)
-         {
-           if (seq0 <= seq2)
-             goto s102;
-           else
-             goto s120;
-         }
-       else
-         goto s210;
-      }
+   *
+   * Merging is done with the algorithm implementation described by Peter
+   * Sanders.  Basically, the idea is to minimize the number of necessary
+   * comparison after merging an element.  The implementation trick
+   * that makes this fast is that the order of the sequences is stored
+   * in the instruction pointer (translated into labels in C++).
+   *
+   * This works well for merging up to 4 sequences.
+   *
+   * Note that making the merging stable does @a not come at a
+   * performance hit.
+   *
+   * Whether the merging is done guarded or unguarded is selected by the
+   * used iterator class.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, less equal than the
+   * total number of elements available.
+   *
+   * @return End iterator of output sequence.
+   */
+  template<template<typename RAI, typename C> class iterator,
+           typename _RAIterIterator,
+           typename _RAIter3,
+           typename _DifferenceTp,
+           typename _Compare>
+    _RAIter3
+    multiway_merge_3_variant(_RAIterIterator __seqs_begin,
+                            _RAIterIterator __seqs_end,
+                            _RAIter3 __target,
+                            _DifferenceTp __length, _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length);
 
-#define Merge3Case(a,b,c,c0,c1)                                \
-    s ## a ## b ## c :                                 \
-      *target = *seq ## a;                             \
-    ++target;                                          \
-    length--;                                          \
-    ++seq ## a;                                                \
-    if (length == 0) goto finish;                      \
-    if (seq ## a c0 seq ## b) goto s ## a ## b ## c;   \
-    if (seq ## a c1 seq ## c) goto s ## b ## a ## c;   \
-    goto s ## b ## c ## a;
-
-    Merge3Case(0, 1, 2, <=, <=);
-    Merge3Case(1, 2, 0, <=, < );
-    Merge3Case(2, 0, 1, < , < );
-    Merge3Case(1, 0, 2, < , <=);
-    Merge3Case(0, 2, 1, <=, <=);
-    Merge3Case(2, 1, 0, < , < );
-
-#undef Merge3Case
-
-  finish:
-    ;
-
-    seqs_begin[0].first = seq0;
-    seqs_begin[1].first = seq1;
-    seqs_begin[2].first = seq2;
-
-    return target;
-  }
-
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-    
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int min_seq;
-    RandomAccessIterator3 target_end;
-
-    // Stable anyway.
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += LENGTH(*s);
-
-    if (overhang != -1)
-      {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_3_variant<unguarded_iterator>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
-      }
-    else
-      {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
-      }
+      typedef _DifferenceTp _DifferenceType;
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
 
-    switch (min_seq)
-      {
-      case 0:
-       // Iterators will be advanced accordingly.
-       target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-       break;
-      case 1:
-       target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-       break;
-      case 2:
-       target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[1].first, seqs_begin[1].second,
-                                  target_end, overhang, comp);
-       break;
-      default:
-       _GLIBCXX_PARALLEL_ASSERT(false);
-      }
+      if (__length == 0)
+       return __target;
 
 #if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+      _DifferenceTp __orig_length = __length;
 #endif
 
-    return target_end;
-  }
-
-  /** @brief Highly efficient 4-way merging procedure.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Unused, stable anyway.
-   *  @return End iterator of output sequence. */
-  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    iterator<RandomAccessIterator1, Comparator>
-      seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
-      seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
-      seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
-      seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
-
-#define Decision(a,b,c,d) {                                    \
-      if (seq ## d < seq ## a) goto s ## d ## a ## b ## c;     \
-      if (seq ## d < seq ## b) goto s ## a ## d ## b ## c;     \
-      if (seq ## d < seq ## c) goto s ## a ## b ## d ## c;     \
-      goto s ## a ## b ## c ## d;  }
-
-    if (seq0 <= seq1)
-      {
-       if (seq1 <= seq2)
-         Decision(0,1,2,3)
-         else
-           if (seq2 < seq0)
-             Decision(2,0,1,3)
-             else
-               Decision(0,2,1,3)
-                 }
-    else
-      {
-       if (seq1 <= seq2)
-         {
-           if (seq0 <= seq2)
-             Decision(1,0,2,3)
-             else
-               Decision(1,2,0,3)
-                 }
-       else
-         Decision(2,1,0,3)
-           }
+      iterator<_RAIter1, _Compare>
+       __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp),
+       __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp),
+       __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp);
 
-#define Merge4Case(a,b,c,d,c0,c1,c2)                           \
-    s ## a ## b ## c ## d:                                     \
-      if (length == 0) goto finish;                            \
-    *target = *seq ## a;                                       \
-    ++target;                                                  \
-    length--;                                                  \
-    ++seq ## a;                                                        \
-    if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;      \
-    if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;      \
-    if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;      \
-    goto s ## b ## c ## d ## a;
-
-    Merge4Case(0, 1, 2, 3, <=, <=, <=);
-    Merge4Case(0, 1, 3, 2, <=, <=, <=);
-    Merge4Case(0, 2, 1, 3, <=, <=, <=);
-    Merge4Case(0, 2, 3, 1, <=, <=, <=);
-    Merge4Case(0, 3, 1, 2, <=, <=, <=);
-    Merge4Case(0, 3, 2, 1, <=, <=, <=);
-    Merge4Case(1, 0, 2, 3, < , <=, <=);
-    Merge4Case(1, 0, 3, 2, < , <=, <=);
-    Merge4Case(1, 2, 0, 3, <=, < , <=);
-    Merge4Case(1, 2, 3, 0, <=, <=, < );
-    Merge4Case(1, 3, 0, 2, <=, < , <=);
-    Merge4Case(1, 3, 2, 0, <=, <=, < );
-    Merge4Case(2, 0, 1, 3, < , < , <=);
-    Merge4Case(2, 0, 3, 1, < , <=, < );
-    Merge4Case(2, 1, 0, 3, < , < , <=);
-    Merge4Case(2, 1, 3, 0, < , <=, < );
-    Merge4Case(2, 3, 0, 1, <=, < , < );
-    Merge4Case(2, 3, 1, 0, <=, < , < );
-    Merge4Case(3, 0, 1, 2, < , < , < );
-    Merge4Case(3, 0, 2, 1, < , < , < );
-    Merge4Case(3, 1, 0, 2, < , < , < );
-    Merge4Case(3, 1, 2, 0, < , < , < );
-    Merge4Case(3, 2, 0, 1, < , < , < );
-    Merge4Case(3, 2, 1, 0, < , < , < );
-
-#undef Merge4Case
-#undef Decision
-
-  finish:
-    ;
-
-    seqs_begin[0].first = seq0;
-    seqs_begin[1].first = seq1;
-    seqs_begin[2].first = seq2;
-    seqs_begin[3].first = seq3;
-
-    return target;
-  }
-
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length);
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int min_seq;
-    RandomAccessIterator3 target_end;
-
-    // Stable anyway.
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += LENGTH(*s);
-
-    if (overhang != -1)
-      {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_4_variant<unguarded_iterator>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
-      }
-    else
-      {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
-      }
+      if (__seq0 <= __seq1)
+       {
+          if (__seq1 <= __seq2)
+            goto __s012;
+          else
+            if (__seq2 <  __seq0)
+              goto __s201;
+            else
+              goto __s021;
+       }
+      else
+       {
+          if (__seq1 <= __seq2)
+            {
+              if (__seq0 <= __seq2)
+               goto __s102;
+              else
+               goto __s120;
+            }
+          else
+            goto __s210;
+       }
+#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a, __b, __c, __c0, __c1) \
+      __s ## __a ## __b ## __c :                            \
+       *__target = *__seq ## __a;                          \
+       ++__target;                                         \
+       --__length;                                         \
+       ++__seq ## __a;                                     \
+       if (__length == 0) goto __finish;                   \
+       if (__seq ## __a __c0 __seq ## __b) goto __s ## __a ## __b ## __c; \
+       if (__seq ## __a __c1 __seq ## __c) goto __s ## __b ## __a ## __c; \
+       goto __s ## __b ## __c ## __a;
+
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
+
+#undef _GLIBCXX_PARALLEL_MERGE_3_CASE
+
+    __finish:
+      ;
 
 #if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+    _GLIBCXX_PARALLEL_ASSERT(
+       ((_RAIter1)__seq0 - __seqs_begin[0].first) +
+       ((_RAIter1)__seq1 - __seqs_begin[1].first) +
+       ((_RAIter1)__seq2 - __seqs_begin[2].first)
+       == __orig_length);
 #endif
 
-    std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
-    one_missing.erase(one_missing.begin() + min_seq);  //remove
-
-    target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
+      __seqs_begin[0].first = __seq0;
+      __seqs_begin[1].first = __seq1;
+      __seqs_begin[2].first = __seq2;
 
-    // Insert back again.
-    one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
-    // Write back modified iterators.
-    copy(one_missing.begin(), one_missing.end(), seqs_begin);
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    return target_end;
-  }
+      return __target;
+    }
 
-  /** @brief Basic multi-way merging procedure.
+  /**
+   * @brief Highly efficient 4-way merging procedure.
+   *
+   * Merging is done with the algorithm implementation described by Peter
+   * Sanders. Basically, the idea is to minimize the number of necessary
+   * comparison after merging an element.  The implementation trick
+   * that makes this fast is that the order of the sequences is stored
+   * in the instruction pointer (translated into goto labels in C++).
    *
-   *  The head elements are kept in a sorted array, new heads are
-   *  inserted linearly.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
+   * This works well for merging up to 4 sequences.
+   *
+   * Note that making the merging stable does @a not come at a
+   * performance hit.
+   *
+   * Whether the merging is done guarded or unguarded is selected by the
+   * used iterator class.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, less equal than the
+   * total number of elements available.
+   *
+   * @return End iterator of output sequence.
    */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    // Num remaining pieces.
-    int k = static_cast<int>(seqs_end - seqs_begin), nrp;
-
-    value_type* pl = new value_type[k];
-    int* source = new int[k];
-    difference_type total_length = 0;
-
-#define POS(i) seqs_begin[(i)].first
-#define STOPS(i) seqs_begin[(i)].second
-
-    // Write entries into queue.
-    nrp = 0;
-    for (int pi = 0; pi < k; pi++)
-      {
-       if (STOPS(pi) != POS(pi))
-         {
-           pl[nrp] = *(POS(pi));
-           source[nrp] = pi;
-           nrp++;
-           total_length += LENGTH(seqs_begin[pi]);
-         }
-      }
-
-    if (stable)
-      {
-       for (int k = 0; k < nrp - 1; k++)
-         for (int pi = nrp - 1; pi > k; pi--)
-           if (comp(pl[pi], pl[pi - 1]) ||
-               (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
-             {
-               std::swap(pl[pi - 1], pl[pi]);
-               std::swap(source[pi - 1], source[pi]);
-             }
-      }
-    else
-      {
-       for (int k = 0; k < nrp - 1; k++)
-         for (int pi = nrp - 1; pi > k; pi--)
-           if (comp(pl[pi], pl[pi-1]))
-             {
-               std::swap(pl[pi-1], pl[pi]);
-               std::swap(source[pi-1], source[pi]);
-             }
-      }
-
-    // Iterate.
-    if (stable)
-      {
-       int j;
-       while (nrp > 0 && length > 0)
-         {
-           if (source[0] < source[1])
-             {
-               // pl[0] <= pl[1]
-               while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
-                 {
-                   *target = pl[0];
-                   ++target;
-                   ++POS(source[0]);
-                   length--;
-                   if (POS(source[0]) == STOPS(source[0]))
-                     {
-                       // Move everything to the left.
-                       for (int s = 0; s < nrp - 1; s++)
-                         {
-                           pl[s] = pl[s + 1];
-                           source[s] = source[s + 1];
-                         }
-                       nrp--;
-                       break;
-                     }
-                   else
-                     pl[0] = *(POS(source[0]));
-                 }
-             }
-           else
-             {
-               // pl[0] < pl[1]
-               while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
-                 {
-                   *target = pl[0];
-                   ++target;
-                   ++POS(source[0]);
-                   length--;
-                   if (POS(source[0]) == STOPS(source[0]))
-                     {
-                       for (int s = 0; s < nrp - 1; s++)
-                         {
-                           pl[s] = pl[s + 1];
-                           source[s] = source[s + 1];
-                         }
-                       nrp--;
-                       break;
-                     }
-                   else
-                     pl[0] = *(POS(source[0]));
-                 }
-             }
-
-           // Sink down.
-           j = 1;
-           while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
-                                (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
-             {
-               std::swap(pl[j - 1], pl[j]);
-               std::swap(source[j - 1], source[j]);
-               j++;
-             }
-         }
-      }
-    else
-      {
-       int j;
-       while (nrp > 0 && length > 0)
-         {
-           // pl[0] <= pl[1]
-           while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
-             {
-               *target = pl[0];
-               ++target;
-               ++POS(source[0]);
-               length--;
-               if (POS(source[0]) == STOPS(source[0]))
-                 {
-                   for (int s = 0; s < (nrp - 1); s++)
-                     {
-                       pl[s] = pl[s + 1];
-                       source[s] = source[s + 1];
-                     }
-                   nrp--;
-                   break;
-                 }
-               else
-                 pl[0] = *(POS(source[0]));
-             }
-
-           // Sink down.
-           j = 1;
-           while ((j < nrp) && comp(pl[j], pl[j - 1]))
-             {
-               std::swap(pl[j - 1], pl[j]);
-               std::swap(source[j - 1], source[j]);
-               j++;
-             }
-         }
-      }
-
-    delete[] pl;
-    delete[] source;
-
-    return target;
-  }
+  template<template<typename RAI, typename C> class iterator,
+           typename _RAIterIterator,
+           typename _RAIter3,
+           typename _DifferenceTp,
+           typename _Compare>
+    _RAIter3
+    multiway_merge_4_variant(_RAIterIterator __seqs_begin,
+                             _RAIterIterator __seqs_end,
+                             _RAIter3 __target,
+                             _DifferenceTp __length, _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length);
+      typedef _DifferenceTp _DifferenceType;
+
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
+
+      iterator<_RAIter1, _Compare>
+       __seq0(__seqs_begin[0].first, __seqs_begin[0].second, __comp),
+       __seq1(__seqs_begin[1].first, __seqs_begin[1].second, __comp),
+       __seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp),
+       __seq3(__seqs_begin[3].first, __seqs_begin[3].second, __comp);
+
+#define _GLIBCXX_PARALLEL_DECISION(__a, __b, __c, __d) {  \
+       if (__seq ## __d < __seq ## __a)                  \
+         goto __s ## __d ## __a ## __b ## __c;           \
+       if (__seq ## __d < __seq ## __b)                  \
+         goto __s ## __a ## __d ## __b ## __c;           \
+       if (__seq ## __d < __seq ## __c)                  \
+         goto __s ## __a ## __b ## __d ## __c;           \
+       goto __s ## __a ## __b ## __c ## __d;  }
+
+      if (__seq0 <= __seq1)
+       {
+          if (__seq1 <= __seq2)
+            _GLIBCXX_PARALLEL_DECISION(0,1,2,3)
+            else
+              if (__seq2 < __seq0)
+               _GLIBCXX_PARALLEL_DECISION(2,0,1,3)
+               else
+                  _GLIBCXX_PARALLEL_DECISION(0,2,1,3)
+                    }
+      else
+       {
+          if (__seq1 <= __seq2)
+            {
+              if (__seq0 <= __seq2)
+               _GLIBCXX_PARALLEL_DECISION(1,0,2,3)
+               else
+                  _GLIBCXX_PARALLEL_DECISION(1,2,0,3)
+                    }
+          else
+            _GLIBCXX_PARALLEL_DECISION(2,1,0,3)
+              }
+
+#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a, __b, __c, __d,  \
+                                      __c0, __c1, __c2)    \
+      __s ## __a ## __b ## __c ## __d:                      \
+      if (__length == 0) goto __finish;                     \
+      *__target = *__seq ## __a;                            \
+      ++__target;                                           \
+      --__length;                                           \
+      ++__seq ## __a;                                       \
+      if (__seq ## __a __c0 __seq ## __b)      \
+       goto __s ## __a ## __b ## __c ## __d;  \
+      if (__seq ## __a __c1 __seq ## __c)      \
+       goto __s ## __b ## __a ## __c ## __d;  \
+      if (__seq ## __a __c2 __seq ## __d)      \
+       goto __s ## __b ## __c ## __a ## __d;  \
+      goto __s ## __b ## __c ## __d ## __a;
+
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
+      _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
+
+#undef _GLIBCXX_PARALLEL_MERGE_4_CASE
+#undef _GLIBCXX_PARALLEL_DECISION
+
+    __finish:
+      ;
+
+      __seqs_begin[0].first = __seq0;
+      __seqs_begin[1].first = __seq1;
+      __seqs_begin[2].first = __seq2;
+      __seqs_begin[3].first = __seq3;
+
+      return __target;
+    }
 
   /** @brief Multi-way merging procedure for a high branching factor,
-   * guarded case.
-   *
-   *  The head elements are kept in a loser tree.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
+   *         guarded case.
+   *
+   * This merging variant uses a LoserTree class as selected by <tt>_LT</tt>.
+   *
+   * Stability is selected through the used LoserTree class <tt>_LT</tt>.
+   *
+   * At least one non-empty sequence is required.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, less equal than the
+   * total number of elements available.
+   *
+   * @return End iterator of output sequence.
    */
-  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-
-      typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int k = static_cast<int>(seqs_end - seqs_begin);
-
-    LT lt(k, comp);
-
-    difference_type total_length = 0;
+  template<typename _LT,
+           typename _RAIterIterator,
+           typename _RAIter3,
+           typename _DifferenceTp,
+           typename _Compare>
+    _RAIter3
+    multiway_merge_loser_tree(_RAIterIterator __seqs_begin,
+                              _RAIterIterator __seqs_end,
+                              _RAIter3 __target,
+                              _DifferenceTp __length, _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length)
 
-    for (int t = 0; t < k; t++)
-      {
-       if (stable)
-         {
-           if (seqs_begin[t].first == seqs_begin[t].second)
-             lt.insert_start_stable(value_type(), t, true);
-           else
-             lt.insert_start_stable(*seqs_begin[t].first, t, false);
-         }
-       else
-         {
-           if (seqs_begin[t].first == seqs_begin[t].second)
-             lt.insert_start(value_type(), t, true);
-           else
-             lt.insert_start(*seqs_begin[t].first, t, false);
-         }
+      typedef _DifferenceTp _DifferenceType;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::difference_type _SeqNumber;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
 
-       total_length += LENGTH(seqs_begin[t]);
-      }
+      _SeqNumber __k = static_cast<_SeqNumber>(__seqs_end - __seqs_begin);
 
-    if (stable)
-      lt.init_stable();
-    else
-      lt.init();
+      _LT __lt(__k, __comp);
 
-    total_length = std::min(total_length, length);
+      // Default value for potentially non-default-constructible types.
+      _ValueType* __arbitrary_element = 0;
 
-    int source;
+      for (_SeqNumber __t = 0; __t < __k; ++__t)
+       {
+          if(!__arbitrary_element
+            && _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__t]) > 0)
+            __arbitrary_element = &(*__seqs_begin[__t].first);
+       }
 
-    if (stable)
-      {
-       for (difference_type i = 0; i < total_length; i++)
-         {
-           // Take out.
-           source = lt.get_min_source();
+      for (_SeqNumber __t = 0; __t < __k; ++__t)
+       {
+          if (__seqs_begin[__t].first == __seqs_begin[__t].second)
+            __lt.__insert_start(*__arbitrary_element, __t, true);
+          else
+            __lt.__insert_start(*__seqs_begin[__t].first, __t, false);
+       }
 
-           *(target++) = *(seqs_begin[source].first++);
+      __lt.__init();
 
-           // Feed.
-           if (seqs_begin[source].first == seqs_begin[source].second)
-             lt.delete_min_insert_stable(value_type(), true);
-           else
-             // Replace from same source.
-             lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+      _SeqNumber __source;
 
-         }
-      }
-    else
-      {
-       for (difference_type i = 0; i < total_length; i++)
-         {
-           //take out
-           source = lt.get_min_source();
+      for (_DifferenceType __i = 0; __i < __length; ++__i)
+       {
+          //take out
+          __source = __lt.__get_min_source();
 
-           *(target++) = *(seqs_begin[source].first++);
+          *(__target++) = *(__seqs_begin[__source].first++);
 
-           // Feed.
-           if (seqs_begin[source].first == seqs_begin[source].second)
-             lt.delete_min_insert(value_type(), true);
-           else
-             // Replace from same source.
-             lt.delete_min_insert(*seqs_begin[source].first, false);
-         }
-      }
+          // Feed.
+          if (__seqs_begin[__source].first == __seqs_begin[__source].second)
+            __lt.__delete_min_insert(*__arbitrary_element, true);
+          else
+            // Replace from same __source.
+            __lt.__delete_min_insert(*__seqs_begin[__source].first, false);
+       }
 
-    return target;
-  }
+      return __target;
+    }
 
   /** @brief Multi-way merging procedure for a high branching factor,
-   * unguarded case.
-   *
-   *  The head elements are kept in a loser tree.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence.
-   *  @pre No input will run out of elements during the merge. 
+   *         unguarded case.
+   *
+   * Merging is done using the LoserTree class <tt>_LT</tt>.
+   *
+   * Stability is selected by the used LoserTrees.
+   *
+   * @pre No input will run out of elements during the merge.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, less equal than the
+   * total number of elements available.
+   *
+   * @return End iterator of output sequence.
    */
-  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
+  template<typename _LT,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp, typename _Compare>
+    _RAIter3
+    multiway_merge_loser_tree_unguarded(_RAIterIterator __seqs_begin,
+                                       _RAIterIterator __seqs_end,
+                                       _RAIter3 __target,
+       const typename std::iterator_traits<typename std::iterator_traits<
+         _RAIterIterator>::value_type::first_type>::value_type&
+                                       __sentinel,
+                                       _DifferenceTp __length,
+                                       _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length)
+      typedef _DifferenceTp _DifferenceType;
 
-    int k = seqs_end - seqs_begin;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::difference_type _SeqNumber;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
 
-    LT lt(k, comp);
+      _SeqNumber __k = __seqs_end - __seqs_begin;
 
-    difference_type total_length = 0;
+      _LT __lt(__k, __sentinel, __comp);
 
-    for (int t = 0; t < k; t++)
-      {
+      for (_SeqNumber __t = 0; __t < __k; ++__t)
+       {
 #if _GLIBCXX_ASSERTIONS
-       _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
+          _GLIBCXX_PARALLEL_ASSERT(__seqs_begin[__t].first
+                                   != __seqs_begin[__t].second);
 #endif
-       if (stable)
-         lt.insert_start_stable(*seqs_begin[t].first, t, false);
-       else
-         lt.insert_start(*seqs_begin[t].first, t, false);
-
-       total_length += LENGTH(seqs_begin[t]);
-      }
-
-    if (stable)
-      lt.init_stable();
-    else
-      lt.init();
+          __lt.__insert_start(*__seqs_begin[__t].first, __t, false);
+       }
 
-    // Do not go past end.
-    length = std::min(total_length, length);
+      __lt.__init();
 
-    int source;
+      _SeqNumber __source;
 
 #if _GLIBCXX_ASSERTIONS
-    difference_type i = 0;
+      _DifferenceType __i = 0;
 #endif
 
-    if (stable)
-      {
-       RandomAccessIterator3 target_end = target + length;
-       while (target < target_end)
-         {
-           // Take out.
-           source = lt.get_min_source();
+      _RAIter3 __target_end = __target + __length;
+      while (__target < __target_end)
+       {
+          // Take out.
+          __source = __lt.__get_min_source();
 
 #if _GLIBCXX_ASSERTIONS
-           _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+          _GLIBCXX_PARALLEL_ASSERT(0 <= __source && __source < __k);
+          _GLIBCXX_PARALLEL_ASSERT(__i == 0
+              || !__comp(*(__seqs_begin[__source].first), *(__target - 1)));
 #endif
 
-           *(target++) = *(seqs_begin[source].first++);
+          // Feed.
+          *(__target++) = *(__seqs_begin[__source].first++);
 
 #if _GLIBCXX_ASSERTIONS
-           _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
-           i++;
+          ++__i;
 #endif
-           // Feed.
-           // Replace from same source.
-           lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+          // Replace from same __source.
+          __lt.__delete_min_insert(*__seqs_begin[__source].first, false);
+       }
 
-         }
-      }
-    else
-      {
-       RandomAccessIterator3 target_end = target + length;
-       while (target < target_end)
-         {
-           // Take out.
-           source = lt.get_min_source();
+      return __target;
+    }
 
-#if _GLIBCXX_ASSERTIONS
-           if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
-             printf("         %i %i %i\n", length, i, source);
-           _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
-#endif
 
-           *(target++) = *(seqs_begin[source].first++);
+  /** @brief Multi-way merging procedure for a high branching factor,
+   *         requiring sentinels to exist.
+   *
+   * @tparam UnguardedLoserTree _Loser Tree variant to use for the unguarded
+   *   merging.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, less equal than the
+   * total number of elements available.
+   *
+   * @return End iterator of output sequence.
+   */
+  template<typename UnguardedLoserTree,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIter3
+    multiway_merge_loser_tree_sentinel(_RAIterIterator __seqs_begin,
+                                      _RAIterIterator __seqs_end,
+                                      _RAIter3 __target,
+      const typename std::iterator_traits<typename std::iterator_traits<
+       _RAIterIterator>::value_type::first_type>::value_type&
+                                      __sentinel,
+                                      _DifferenceTp __length,
+                                      _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length)
 
-#if _GLIBCXX_ASSERTIONS
-           if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
-             printf("         %i %i %i\n", length, i, source);
-           _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
-           i++;
-#endif
-           // Feed.
-           // Replace from same source.
-           lt.delete_min_insert(*seqs_begin[source].first, false);
-         }
-      }
+      typedef _DifferenceTp _DifferenceType;
+      typedef std::iterator_traits<_RAIterIterator> _TraitsType;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
 
-    return target;
-  }
-
-  template<typename _ValueTp, class Comparator>
-  struct loser_tree_traits
-  {
-    typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
-  };
-
-
-  /*#define NO_POINTER(T) \
-    template<typename Comparator> \
-    struct loser_tree_traits<T, Comparator> \
-    { \
-    typedef LoserTreePointer<T, Comparator> LT; \
-    };*/
-  //
-  // NO_POINTER(unsigned char)
-  // NO_POINTER(char)
-  // NO_POINTER(unsigned short)
-  // NO_POINTER(short)
-  // NO_POINTER(unsigned int)
-  // NO_POINTER(int)
-  // NO_POINTER(unsigned long)
-  // NO_POINTER(long)
-  // NO_POINTER(unsigned long long)
-  // NO_POINTER(long long)
-  //
-  // #undef NO_POINTER
-
-  template<typename _ValueTp, class Comparator>
-  struct loser_tree_traits_unguarded
-  {
-    typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
-  };
-
-  /*#define NO_POINTER_UNGUARDED(T) \
-    template<typename Comparator> \
-    struct loser_tree_traits_unguarded<T, Comparator> \
-    { \
-    typedef LoserTreePointerUnguarded<T, Comparator> LT; \
-    };*/
-  //
-  // NO_POINTER_UNGUARDED(unsigned char)
-  // NO_POINTER_UNGUARDED(char)
-  // NO_POINTER_UNGUARDED(unsigned short)
-  // NO_POINTER_UNGUARDED(short)
-  // NO_POINTER_UNGUARDED(unsigned int)
-  // NO_POINTER_UNGUARDED(int)
-  // NO_POINTER_UNGUARDED(unsigned long)
-  // NO_POINTER_UNGUARDED(long)
-  // NO_POINTER_UNGUARDED(unsigned long long)
-  // NO_POINTER_UNGUARDED(long long)
-  //
-  // #undef NO_POINTER_UNGUARDED
-
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-    int min_seq;
-    RandomAccessIterator3 target_end;
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
-                                         comp, min_seq, stable);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
-      total_length += LENGTH(*s);
-
-    if (overhang != -1)
-      {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_loser_tree_unguarded
-         <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
-      }
-    else
-      {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
-      }
+      _RAIter3 __target_end;
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+      for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s)
+       // Move the sequence ends to the sentinel.  This has the
+       // effect that the sentinel appears to be within the sequence. Then,
+       // we can use the unguarded variant if we merge out as many
+       // non-sentinel elements as we have.
+       ++((*__s).second);
 
-    target_end = multiway_merge_loser_tree
-      <typename loser_tree_traits<value_type, Comparator>::LT>
-      (seqs_begin, seqs_end, target_end, comp, overhang, stable);
+      __target_end = multiway_merge_loser_tree_unguarded<UnguardedLoserTree>
+       (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
 
 #if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
+      _GLIBCXX_PARALLEL_ASSERT(__target_end == __target + __length);
+      _GLIBCXX_PARALLEL_ASSERT(__is_sorted(__target, __target_end, __comp));
 #endif
 
-    return target_end;
-  }
-
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-
-    RandomAccessIterator3 target_end;
-    difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
-      {
-       total_length += LENGTH(*s);
+      // Restore the sequence ends so the sentinels are not contained in the
+      // sequence any more (see comment in loop above).
+      for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s)
+       --((*__s).second);
 
-       // Sentinel spot.
-       (*s).second++;
-      }
-
-    difference_type unguarded_length = std::min(length, total_length - overhang);
-    target_end = multiway_merge_loser_tree_unguarded
-      <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
-      (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-    overhang = length - unguarded_length;
-
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
+      return __target_end;
+    }
 
-    // Copy rest stable.
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
+  /**
+   * @brief Traits for determining whether the loser tree should
+   *   use pointers or copies.
+   *
+   * The field "_M_use_pointer" is used to determine whether to use pointers
+   * in he loser trees or whether to copy the values into the loser tree.
+   *
+   * The default behavior is to use pointers if the data type is 4 times as
+   * big as the pointer to it.
+   *
+   * Specialize for your data type to customize the behavior.
+   *
+   * Example:
+   *
+   *   template<>
+   *   struct _LoserTreeTraits<int>
+   *   { static const bool _M_use_pointer = false; };
+   *
+   *   template<>
+   *   struct _LoserTreeTraits<heavyweight_type>
+   *   { static const bool _M_use_pointer = true; };
+   *
+   * @param _Tp type to give the loser tree traits for.
+   */
+  template <typename _Tp>
+    struct _LoserTreeTraits
+    {
+      /**
+       * @brief True iff to use pointers instead of values in loser trees.
+       *
+       * The default behavior is to use pointers if the data type is four
+       * times as big as the pointer to it.
+       */
+      static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*));
+    };
+
+  /**
+   * @brief Switch for 3-way merging with __sentinels turned off.
+   *
+   * Note that 3-way merging is always stable!
+   */
+  template<bool __sentinels /*default == false*/,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_3_variant_sentinel_switch
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+                _DifferenceTp __length, _Compare __comp)
+      { return multiway_merge_3_variant<_GuardedIterator>
+         (__seqs_begin, __seqs_end, __target, __length, __comp); }
+    };
+
+  /**
+   * @brief Switch for 3-way merging with __sentinels turned on.
+   *
+   * Note that 3-way merging is always stable!
+   */
+  template<typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_3_variant_sentinel_switch<true, _RAIterIterator,
+                                                     _RAIter3, _DifferenceTp,
+                                                     _Compare>
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+                _DifferenceTp __length, _Compare __comp)
+      { return multiway_merge_3_variant<_UnguardedIterator>
+         (__seqs_begin, __seqs_end, __target, __length, __comp); }
+    };
+
+  /**
+   * @brief Switch for 4-way merging with __sentinels turned off.
+   *
+   * Note that 4-way merging is always stable!
+   */
+  template<bool __sentinels /*default == false*/,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_4_variant_sentinel_switch
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+                _DifferenceTp __length, _Compare __comp)
+      { return multiway_merge_4_variant<_GuardedIterator>
+         (__seqs_begin, __seqs_end, __target, __length, __comp); }
+    };
+
+  /**
+   * @brief Switch for 4-way merging with __sentinels turned on.
+   *
+   * Note that 4-way merging is always stable!
+   */
+  template<typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_4_variant_sentinel_switch<true, _RAIterIterator,
+                                                     _RAIter3, _DifferenceTp,
+                                                     _Compare>
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+                _DifferenceTp __length, _Compare __comp)
+      { return multiway_merge_4_variant<_UnguardedIterator>
+         (__seqs_begin, __seqs_end, __target, __length, __comp); }
+    };
+
+  /**
+   * @brief Switch for k-way merging with __sentinels turned on.
+   */
+  template<bool __sentinels,
+          bool __stable,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_k_variant_sentinel_switch
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+      const typename std::iterator_traits<typename std::iterator_traits<
+      _RAIterIterator>::value_type::first_type>::value_type&
+                __sentinel,
+                _DifferenceTp __length, _Compare __comp)
       {
-       // Restore.
-       (*s).second--;
-       difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
-       target_end = std::copy((*s).first, (*s).first + local_length, target_end);
-       (*s).first += local_length;
-       overhang -= local_length;
+       typedef typename std::iterator_traits<_RAIterIterator>
+         ::value_type::first_type
+         _RAIter1;
+       typedef typename std::iterator_traits<_RAIter1>::value_type
+         _ValueType;
+
+       return multiway_merge_loser_tree_sentinel<
+       typename __gnu_cxx::__conditional_type<
+       _LoserTreeTraits<_ValueType>::_M_use_pointer,
+         _LoserTreePointerUnguarded<__stable, _ValueType, _Compare>,
+         _LoserTreeUnguarded<__stable, _ValueType, _Compare>
+          >::__type>
+         (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
       }
+    };
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
-    _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
-#endif
-
-    return target_end;
-  }
+  /**
+   * @brief Switch for k-way merging with __sentinels turned off.
+   */
+  template<bool __stable,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    struct __multiway_merge_k_variant_sentinel_switch<false, __stable,
+                                                     _RAIterIterator,
+                                                     _RAIter3, _DifferenceTp,
+                                                     _Compare>
+    {
+      _RAIter3
+      operator()(_RAIterIterator __seqs_begin,
+                _RAIterIterator __seqs_end,
+                _RAIter3 __target,
+       const typename std::iterator_traits<typename std::iterator_traits<
+       _RAIterIterator>::value_type::first_type>::value_type&
+                __sentinel,
+                _DifferenceTp __length, _Compare __comp)
+      {
+       typedef typename std::iterator_traits<_RAIterIterator>
+         ::value_type::first_type
+         _RAIter1;
+       typedef typename std::iterator_traits<_RAIter1>::value_type
+         _ValueType;
+
+       return multiway_merge_loser_tree<
+       typename __gnu_cxx::__conditional_type<
+       _LoserTreeTraits<_ValueType>::_M_use_pointer,
+         _LoserTreePointer<__stable, _ValueType, _Compare>,
+         _LoserTree<__stable, _ValueType, _Compare>
+          >::__type >(__seqs_begin, __seqs_end, __target, __length, __comp);
+      }
+    };
 
   /** @brief Sequential multi-way merging switch.
    *
-   *  The decision if based on the branching factor and runtime settings.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @param sentinel The sequences have a sentinel element.
+   *  The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
+   *  runtime settings.
+   *  @param __seqs_begin Begin iterator of iterator pair input sequence.
+   *  @param __seqs_end End iterator of iterator pair input sequence.
+   *  @param __target Begin iterator of output sequence.
+   *  @param __comp Comparator.
+   *  @param __length Maximum length to merge, possibly larger than the
+   *  number of elements available.
+   *  @param __sentinel The sequences have __a __sentinel element.
    *  @return End iterator of output sequence. */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
+  template<bool __stable,
+          bool __sentinels,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIter3
+    __sequential_multiway_merge(_RAIterIterator __seqs_begin,
+                               _RAIterIterator __seqs_end,
+                               _RAIter3 __target,
+      const typename std::iterator_traits<typename std::iterator_traits<
+       _RAIterIterator>::value_type::first_type>::value_type&
+                               __sentinel,
+                               _DifferenceTp __length, _Compare __comp)
+    {
+      _GLIBCXX_CALL(__length)
+
+      typedef _DifferenceTp _DifferenceType;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::difference_type _SeqNumber;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
 
 #if _GLIBCXX_ASSERTIONS
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
-      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
+      for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s)
+       {
+          _GLIBCXX_PARALLEL_ASSERT(__is_sorted((*__s).first,
+                                              (*__s).second, __comp));
+       }
 #endif
 
-    RandomAccessIterator3 return_target = target;
-    int k = static_cast<int>(seqs_end - seqs_begin);
+      _DifferenceTp __total_length = 0;
+      for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s)
+       __total_length += _GLIBCXX_PARALLEL_LENGTH(*__s);
 
-    Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
+      __length = std::min<_DifferenceTp>(__length, __total_length);
 
-    if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
-      mwma = Settings::LOSER_TREE_COMBINED;
+      if(__length == 0)
+       return __target;
 
-    switch (k)
-      {
-      case 0:
-       break;
-      case 1:
-       return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
-       seqs_begin[0].first += length;
-       break;
-      case 2:
-       return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
-       break;
-      case 3:
-       switch (mwma)
-         {
-         case Settings::LOSER_TREE_COMBINED:
-           return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         case Settings::LOSER_TREE_SENTINEL:
-           return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         default:
-           return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         }
-       break;
-      case 4:
-       switch (mwma)
-         {
-         case Settings::LOSER_TREE_COMBINED:
-           return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         case Settings::LOSER_TREE_SENTINEL:
-           return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         default:
-           return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         }
-       break;
-      default:
+      _RAIter3 __return_target = __target;
+      _SeqNumber __k = static_cast<_SeqNumber>(__seqs_end - __seqs_begin);
+
+      switch (__k)
        {
-         switch (mwma)
-           {
-           case Settings::BUBBLE:
-             return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
-#if _GLIBCXX_LOSER_TREE_EXPLICIT
-           case Settings::LOSER_TREE_EXPLICIT:
-             return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
-#endif
-#if _GLIBCXX_LOSER_TREE
-           case Settings::LOSER_TREE:
-             return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
-#endif
-#if _GLIBCXX_LOSER_TREE_COMBINED
-           case Settings::LOSER_TREE_COMBINED:
-             return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
-#endif
-#if _GLIBCXX_LOSER_TREE_SENTINEL
-           case Settings::LOSER_TREE_SENTINEL:
-             return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
-#endif
-           default:
-             // multiway_merge algorithm not implemented.
-             _GLIBCXX_PARALLEL_ASSERT(0);
-             break;
-           }
+       case 0:
+          break;
+       case 1:
+          __return_target = std::copy(__seqs_begin[0].first,
+                                     __seqs_begin[0].first + __length,
+                                     __target);
+          __seqs_begin[0].first += __length;
+          break;
+       case 2:
+          __return_target = __merge_advance(__seqs_begin[0].first,
+                                           __seqs_begin[0].second,
+                                           __seqs_begin[1].first,
+                                           __seqs_begin[1].second,
+                                           __target, __length, __comp);
+          break;
+       case 3:
+          __return_target = __multiway_merge_3_variant_sentinel_switch
+           <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>()
+           (__seqs_begin, __seqs_end, __target, __length, __comp);
+          break;
+       case 4:
+          __return_target = __multiway_merge_4_variant_sentinel_switch
+           <__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>()
+           (__seqs_begin, __seqs_end, __target, __length, __comp);
+          break;
+       default:
+         __return_target = __multiway_merge_k_variant_sentinel_switch
+           <__sentinels, __stable, _RAIterIterator, _RAIter3, _DifferenceTp,
+            _Compare>()
+           (__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
+         break;
        }
-      }
 #if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
+      _GLIBCXX_PARALLEL_ASSERT(
+       __is_sorted(__target, __target + __length, __comp));
 #endif
 
-    return return_target;
-  }
+      return __return_target;
+    }
 
-  /** @brief Parallel multi-way merge routine.
+  /**
+   * @brief Stable sorting functor.
    *
-   *  The decision if based on the branching factor and runtime settings.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @param sentinel Ignored.
-   *  @return End iterator of output sequence. 
+   * Used to reduce code instanciation in multiway_merge_sampling_splitting.
    */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
+  template<bool __stable, class _RAIter, class _StrictWeakOrdering>
+    struct _SamplingSorter
+    {
+      void
+      operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp)
+      { __gnu_sequential::stable_sort(__first, __last, __comp); }
+    };
 
-#if _GLIBCXX_ASSERTIONS
-    for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
-      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
-#endif
+  /**
+   * @brief Non-__stable sorting functor.
+   *
+   * Used to reduce code instantiation in multiway_merge_sampling_splitting.
+   */
+  template<class _RAIter, class _StrictWeakOrdering>
+    struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering>
+    {
+      void
+      operator()(_RAIter __first, _RAIter __last, _StrictWeakOrdering __comp)
+      { __gnu_sequential::sort(__first, __last, __comp); }
+    };
+
+  /**
+   * @brief Sampling based splitting for parallel multiway-merge routine.
+   */
+  template<bool __stable,
+          typename _RAIterIterator,
+          typename _Compare,
+          typename _DifferenceType>
+    void
+    multiway_merge_sampling_splitting(_RAIterIterator __seqs_begin,
+                                     _RAIterIterator __seqs_end,
+                                     _DifferenceType __length,
+                                     _DifferenceType __total_length,
+                                     _Compare __comp,
+     std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces)
+    {
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::difference_type _SeqNumber;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
+      typedef typename std::iterator_traits<_RAIter1>::value_type
+       _ValueType;
+
+      // __k sequences.
+      const _SeqNumber __k
+       = static_cast<_SeqNumber>(__seqs_end - __seqs_begin);
+
+      const _ThreadIndex __num_threads = omp_get_num_threads();
+
+      const _DifferenceType __num_samples =
+       __gnu_parallel::_Settings::get().merge_oversampling * __num_threads;
+
+      _ValueType* __samples = static_cast<_ValueType*>
+       (::operator new(sizeof(_ValueType) * __k * __num_samples));
+      // Sample.
+      for (_SeqNumber __s = 0; __s < __k; ++__s)
+       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
+         {
+           _DifferenceType sample_index = static_cast<_DifferenceType>
+             (_GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__s])
+              * (double(__i + 1) / (__num_samples + 1))
+              * (double(__length) / __total_length));
+           new(&(__samples[__s * __num_samples + __i]))
+              _ValueType(__seqs_begin[__s].first[sample_index]);
+         }
+
+      // Sort stable or non-stable, depending on value of template parameter
+      // "__stable".
+      _SamplingSorter<__stable, _ValueType*, _Compare>()
+       (__samples, __samples + (__num_samples * __k), __comp);
 
-    // k sequences.
-    int k = static_cast<int>(seqs_end - seqs_begin);
+      for (_ThreadIndex __slab = 0; __slab < __num_threads; ++__slab)
+       // For each slab / processor.
+       for (_SeqNumber __seq = 0; __seq < __k; ++__seq)
+         {
+           // For each sequence.
+           if (__slab > 0)
+             __pieces[__slab][__seq].first = std::upper_bound
+               (__seqs_begin[__seq].first, __seqs_begin[__seq].second,
+                __samples[__num_samples * __k * __slab / __num_threads],
+                __comp)
+               - __seqs_begin[__seq].first;
+           else
+             // Absolute beginning.
+             __pieces[__slab][__seq].first = 0;
+           if ((__slab + 1) < __num_threads)
+             __pieces[__slab][__seq].second = std::upper_bound
+               (__seqs_begin[__seq].first, __seqs_begin[__seq].second,
+                __samples[__num_samples * __k * (__slab + 1) / __num_threads],
+                __comp)
+               - __seqs_begin[__seq].first;
+           else
+              // Absolute end.
+             __pieces[__slab][__seq].second =
+               _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]);
+         }
 
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
-      total_length += LENGTH(*raii);
+      for (_SeqNumber __s = 0; __s < __k; ++__s)
+       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
+         __samples[__s * __num_samples + __i].~_ValueType();
+      ::operator delete(__samples);
+    }
 
-    _GLIBCXX_CALL(total_length)
+  /**
+   * @brief Exact splitting for parallel multiway-merge routine.
+   *
+   * None of the passed sequences may be empty.
+   */
+  template<bool __stable,
+          typename _RAIterIterator,
+          typename _Compare,
+          typename _DifferenceType>
+    void
+    multiway_merge_exact_splitting(_RAIterIterator __seqs_begin,
+                                  _RAIterIterator __seqs_end,
+                                  _DifferenceType __length,
+                                  _DifferenceType __total_length,
+                                  _Compare __comp,
+       std::vector<std::pair<_DifferenceType, _DifferenceType> > *__pieces)
+    {
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::difference_type _SeqNumber;
+      typedef typename std::iterator_traits<_RAIterIterator>
+       ::value_type::first_type
+       _RAIter1;
 
-    if (total_length == 0 || k == 0)
-      return target;
+      const bool __tight = (__total_length == __length);
 
-    thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
+      // __k sequences.
+      const _SeqNumber __k = __seqs_end - __seqs_begin;
 
-    Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
+      const _ThreadIndex __num_threads = omp_get_num_threads();
 
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
+      // (Settings::multiway_merge_splitting
+      //  == __gnu_parallel::_Settings::EXACT).
+      std::vector<_RAIter1>* __offsets = 
+       new std::vector<_RAIter1>[__num_threads];
+      std::vector<std::pair<_RAIter1, _RAIter1> > __se(__k);
 
-    bool tight = (total_length == length);
+      copy(__seqs_begin, __seqs_end, __se.begin());
 
-    // Thread t will have to merge pieces[iam][0..k - 1]
-    std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
-    for (int s = 0; s < num_threads; s++)
-      pieces[s].resize(k);
+      _DifferenceType* __borders =
+       new _DifferenceType[__num_threads + 1];
+      __equally_split(__length, __num_threads, __borders);
 
-    difference_type num_samples = Settings::merge_oversampling * num_threads;
+      for (_ThreadIndex __s = 0; __s < (__num_threads - 1); ++__s)
+       {
+         __offsets[__s].resize(__k);
+         multiseq_partition(__se.begin(), __se.end(), __borders[__s + 1],
+                            __offsets[__s].begin(), __comp);
 
-    if (Settings::multiway_merge_splitting == Settings::SAMPLING)
-      {
-       value_type* samples = new value_type[k * num_samples];
-       // Sample.
-       for (int s = 0; s < k; s++)
-         for (int i = 0; (difference_type)i < num_samples; i++)
+         // Last one also needed and available.
+         if (!__tight)
            {
-             difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
-             samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
+             __offsets[__num_threads - 1].resize(__k);
+             multiseq_partition(__se.begin(), __se.end(),
+                                _DifferenceType(__length),
+                                __offsets[__num_threads - 1].begin(),
+                                __comp);
            }
+       }
+      delete[] __borders;
 
-       if (stable)
-         __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
-       else
-         __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
-
-       for (int slab = 0; slab < num_threads; slab++)
+      for (_ThreadIndex __slab = 0; __slab < __num_threads; ++__slab)
+       {
          // For each slab / processor.
-         for (int seq = 0; seq < k; seq++)
+         for (_SeqNumber __seq = 0; __seq < __k; ++__seq)
            {
              // For each sequence.
-             if (slab > 0)
-               pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
-             else
+             if (__slab == 0)
                {
                  // Absolute beginning.
-                 pieces[slab][seq].first = 0;
+                 __pieces[__slab][__seq].first = 0;
                }
-             if ((slab + 1) < num_threads)
-               pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
              else
-               pieces[slab][seq].second = LENGTH(seqs_begin[seq]);     //absolute ending
+               __pieces[__slab][__seq].first =
+                 __pieces[__slab - 1][__seq].second;
+             if (!__tight || __slab < (__num_threads - 1))
+               __pieces[__slab][__seq].second =
+                 __offsets[__slab][__seq] - __seqs_begin[__seq].first;
+             else
+               {
+                 // __slab == __num_threads - 1
+                 __pieces[__slab][__seq].second =
+                    _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]);
+               }
            }
-       delete[] samples;
-      }
-    else
-      {
-       // (Settings::multiway_merge_splitting == Settings::EXACT).
-       std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
-       std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
-
-       copy(seqs_begin, seqs_end, se.begin());
+       }
+      delete[] __offsets;
+    }
 
-       difference_type borders[num_threads + 1];
-       equally_split(length, num_threads, borders);
+  /** @brief Parallel multi-way merge routine.
+   *
+   * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
+   * and runtime settings.
+   *
+   * Must not be called if the number of sequences is 1.
+   *
+   * @tparam _Splitter functor to split input (either __exact or sampling based)
+   * @tparam __stable Stable merging incurs a performance penalty.
+   * @tparam __sentinel Ignored.
+   *
+   * @param __seqs_begin Begin iterator of iterator pair input sequence.
+   * @param __seqs_end End iterator of iterator pair input sequence.
+   * @param __target Begin iterator of output sequence.
+   * @param __comp Comparator.
+   * @param __length Maximum length to merge, possibly larger than the
+   * number of elements available.
+   * @return End iterator of output sequence.
+   */
+  template<bool __stable,
+          bool __sentinels,
+          typename _RAIterIterator,
+          typename _RAIter3,
+          typename _DifferenceTp,
+          typename _Splitter,
+          typename _Compare>
+    _RAIter3
+    parallel_multiway_merge(_RAIterIterator __seqs_begin,
+                            _RAIterIterator __seqs_end,
+                            _RAIter3 __target,
+                            _Splitter __splitter,
+                            _DifferenceTp __length,
+                            _Compare __comp,
+                            _ThreadIndex __num_threads)
+      {
+#if _GLIBCXX_ASSERTIONS
+       _GLIBCXX_PARALLEL_ASSERT(__seqs_end - __seqs_begin > 1);
+#endif
 
-       for (int s = 0; s < (num_threads - 1); s++)
+       _GLIBCXX_CALL(__length)
+
+       typedef _DifferenceTp _DifferenceType;
+        typedef typename std::iterator_traits<_RAIterIterator>
+         ::difference_type _SeqNumber;
+       typedef typename std::iterator_traits<_RAIterIterator>
+          ::value_type::first_type
+          _RAIter1;
+       typedef typename
+          std::iterator_traits<_RAIter1>::value_type _ValueType;
+
+       // Leave only non-empty sequences.
+       typedef std::pair<_RAIter1, _RAIter1> seq_type;
+       seq_type* __ne_seqs = new seq_type[__seqs_end - __seqs_begin];
+       _SeqNumber __k = 0;
+       _DifferenceType __total_length = 0;
+       for (_RAIterIterator __raii = __seqs_begin;
+             __raii != __seqs_end; ++__raii)
+          {
+            _DifferenceTp __seq_length = _GLIBCXX_PARALLEL_LENGTH(*__raii);
+            if(__seq_length > 0)
+              {
+               __total_length += __seq_length;
+               __ne_seqs[__k++] = *__raii;
+              }
+          }
+
+       _GLIBCXX_CALL(__total_length)
+
+       __length = std::min<_DifferenceTp>(__length, __total_length);
+
+       if (__total_length == 0 || __k == 0)
          {
-           offsets[s].resize(k);
-           multiseq_partition(se.begin(), se.end(), borders[s + 1],
-                              offsets[s].begin(), comp);
-
-           // Last one also needed and available.
-           if (!tight)
-             {
-               offsets[num_threads - 1].resize(k);
-               multiseq_partition(se.begin(), se.end(), (difference_type)length,
-                                  offsets[num_threads - 1].begin(),  comp);
-             }
+           delete[] __ne_seqs;
+           return __target;
          }
 
+       std::vector<std::pair<_DifferenceType, _DifferenceType> >* __pieces;
 
-       for (int slab = 0; slab < num_threads; slab++)
+       __num_threads = static_cast<_ThreadIndex>
+          (std::min<_DifferenceType>(__num_threads, __total_length));
+
+#       pragma omp parallel num_threads (__num_threads)
+       {
+#         pragma omp single
          {
-           // For each slab / processor.
-           for (int seq = 0; seq < k; seq++)
-             {
-               // For each sequence.
-               if (slab == 0)
-                 {
-                   // Absolute beginning.
-                   pieces[slab][seq].first = 0;
-                 }
-               else
-                 pieces[slab][seq].first = pieces[slab - 1][seq].second;
-               if (!tight || slab < (num_threads - 1))
-                 pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
-               else
-                 {
-                   // slab == num_threads - 1
-                   pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
-                 }
-             }
-         }
-       delete[] offsets;
+           __num_threads = omp_get_num_threads();
+           // Thread __t will have to merge pieces[__iam][0..__k - 1]
+           __pieces = new std::vector<
+           std::pair<_DifferenceType, _DifferenceType> >[__num_threads];
+           for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
+             __pieces[__s].resize(__k);
+
+           _DifferenceType __num_samples =
+             __gnu_parallel::_Settings::get().merge_oversampling
+             * __num_threads;
+
+           __splitter(__ne_seqs, __ne_seqs + __k, __length, __total_length,
+                      __comp, __pieces);
+         } //single
+
+         _ThreadIndex __iam = omp_get_thread_num();
+
+         _DifferenceType __target_position = 0;
+
+         for (_SeqNumber __c = 0; __c < __k; ++__c)
+           __target_position += __pieces[__iam][__c].first;
+
+         seq_type* __chunks = new seq_type[__k];
+
+         for (_SeqNumber __s = 0; __s < __k; ++__s)
+           __chunks[__s] = std::make_pair(__ne_seqs[__s].first
+                                          + __pieces[__iam][__s].first,
+                                          __ne_seqs[__s].first
+                                          + __pieces[__iam][__s].second);
+
+         if(__length > __target_position)
+           __sequential_multiway_merge<__stable, __sentinels>
+             (__chunks, __chunks + __k, __target + __target_position,
+              *(__seqs_begin->second), __length - __target_position, __comp);
+
+         delete[] __chunks;
+       } // parallel
+
+#if _GLIBCXX_ASSERTIONS
+       _GLIBCXX_PARALLEL_ASSERT(
+          __is_sorted(__target, __target + __length, __comp));
+#endif
+
+       __k = 0;
+       // Update ends of sequences.
+       for (_RAIterIterator __raii = __seqs_begin;
+             __raii != __seqs_end; ++__raii)
+          {
+            _DifferenceTp __length = _GLIBCXX_PARALLEL_LENGTH(*__raii);
+            if(__length > 0)
+              (*__raii).first += __pieces[__num_threads - 1][__k++].second;
+          }
+
+       delete[] __pieces;
+       delete[] __ne_seqs;
+
+       return __target + __length;
       }
 
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
+  /**
+   * @brief Multiway Merge Frontend.
+   *
+   * Merge the sequences specified by seqs_begin and __seqs_end into
+   * __target.  __seqs_begin and __seqs_end must point to a sequence of
+   * pairs.  These pairs must contain an iterator to the beginning
+   * of a sequence in their first entry and an iterator the _M_end of
+   * the same sequence in their second entry.
+   *
+   * Ties are broken arbitrarily.  See stable_multiway_merge for a variant
+   * that breaks ties by sequence number but is slower.
+   *
+   * The first entries of the pairs (i.e. the begin iterators) will be moved
+   * forward.
+   *
+   * The output sequence has to provide enough space for all elements
+   * that are written to it.
+   *
+   * This function will merge the input sequences:
+   *
+   * - not stable
+   * - parallel, depending on the input size and Settings
+   * - using sampling for splitting
+   * - not using sentinels
+   *
+   * Example:
+   *
+   * <pre>
+   *   int sequences[10][10];
+   *   for (int __i = 0; __i < 10; ++__i)
+   *     for (int __j = 0; __i < 10; ++__j)
+   *       sequences[__i][__j] = __j;
+   *
+   *   int __out[33];
+   *   std::vector<std::pair<int*> > seqs;
+   *   for (int __i = 0; __i < 10; ++__i)
+   *     { seqs.push(std::make_pair<int*>(sequences[__i],
+   *                                      sequences[__i] + 10)) }
+   *
+   *   multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33);
+   * </pre>
+   *
+   * @see stable_multiway_merge
+   *
+   * @pre All input sequences must be sorted.
+   * @pre Target must provide enough space to merge out length elements or
+   *    the number of elements in all sequences, whichever is smaller.
+   *
+   * @post [__target, return __value) contains merged __elements from the
+   *    input sequences.
+   * @post return __value - __target = min(__length, number of elements in all
+   *    sequences).
+   *
+   * @tparam _RAIterPairIterator iterator over sequence
+   *    of pairs of iterators
+   * @tparam _RAIterOut iterator over target sequence
+   * @tparam _DifferenceTp difference type for the sequence
+   * @tparam _Compare strict weak ordering type to compare elements
+   *    in sequences
+   *
+   * @param __seqs_begin  __begin of sequence __sequence
+   * @param __seqs_end    _M_end of sequence __sequence
+   * @param __target      target sequence to merge to.
+   * @param __comp        strict weak ordering to use for element comparison.
+   * @param __length Maximum length to merge, possibly larger than the
+   * number of elements available.
+   *
+   * @return _M_end iterator of output sequence
+   */
+  // multiway_merge
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge(_RAIterPairIterator __seqs_begin,
+                  _RAIterPairIterator __seqs_end,
+                  _RAIterOut __target,
+                  _DifferenceTp __length, _Compare __comp,
+                  __gnu_parallel::sequential_tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute multiway merge *sequentially*.
+      return __sequential_multiway_merge
+       </* __stable = */ false, /* __sentinels = */ false>
+       (__seqs_begin, __seqs_end, __target,
+        *(__seqs_begin->second), __length, __comp);
+    }
 
-#      pragma omp parallel num_threads(num_threads)
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge(_RAIterPairIterator __seqs_begin,
+                  _RAIterPairIterator __seqs_end,
+                  _RAIterOut __target,
+                  _DifferenceTp __length, _Compare __comp,
+                  __gnu_parallel::exact_tag __tag)
     {
-      thread_index_t iam = omp_get_thread_num();
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+               __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+         </* __stable = */ false, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_exact_splitting</* __stable = */ false,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+         </* __stable = */ false, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-      t[iam].tic();
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge(_RAIterPairIterator __seqs_begin,
+                  _RAIterPairIterator __seqs_end,
+                  _RAIterOut __target,
+                  _DifferenceTp __length, _Compare __comp,
+                  __gnu_parallel::sampling_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+               __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+         </* __stable = */ false, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_exact_splitting</* __stable = */ false,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+         </* __stable = */ false, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-      difference_type target_position = 0;
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge(_RAIterPairIterator __seqs_begin,
+                  _RAIterPairIterator __seqs_end,
+                  _RAIterOut __target,
+                  _DifferenceTp __length, _Compare __comp,
+                  parallel_tag __tag = parallel_tag(0))
+    { return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
+                           __comp, exact_tag(__tag.__get_num_threads())); }
+
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge(_RAIterPairIterator __seqs_begin,
+                  _RAIterPairIterator __seqs_end,
+                  _RAIterOut __target,
+                  _DifferenceTp __length, _Compare __comp,
+                  default_parallel_tag __tag)
+    { return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
+                           __comp, exact_tag(__tag.__get_num_threads())); }
+
+  // stable_multiway_merge
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge(_RAIterPairIterator __seqs_begin,
+                         _RAIterPairIterator __seqs_end,
+                         _RAIterOut __target,
+                         _DifferenceTp __length, _Compare __comp,
+                         __gnu_parallel::sequential_tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute multiway merge *sequentially*.
+      return __sequential_multiway_merge
+       </* __stable = */ true, /* __sentinels = */ false>
+          (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-      for (int c = 0; c < k; c++)
-       target_position += pieces[iam][c].first;
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge(_RAIterPairIterator __seqs_begin,
+                         _RAIterPairIterator __seqs_end,
+                         _RAIterOut __target,
+                         _DifferenceTp __length, _Compare __comp,
+                         __gnu_parallel::exact_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_exact_splitting</* __stable = */ true,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+         </* __stable = */ true, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-      if (k > 2)
-       {
-         std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge(_RAIterPairIterator __seqs_begin,
+                         _RAIterPairIterator __seqs_end,
+                         _RAIterOut __target,
+                         _DifferenceTp __length, _Compare __comp,
+                         sampling_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_sampling_splitting</* __stable = */ true,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ false>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-         difference_type local_length = 0;
-         for (int s = 0; s < k; s++)
-           {
-             chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
-             local_length += LENGTH(chunks[s]);
-           }
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge(_RAIterPairIterator __seqs_begin,
+                         _RAIterPairIterator __seqs_end,
+                         _RAIterOut __target,
+                         _DifferenceTp __length, _Compare __comp,
+                         parallel_tag __tag = parallel_tag(0))
+    {
+      return stable_multiway_merge
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
+    }
 
-         multiway_merge(chunks, chunks + k, target + target_position, comp,
-                        std::min(local_length, length - target_position),
-                        stable, false, sequential_tag());
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge(_RAIterPairIterator __seqs_begin,
+                         _RAIterPairIterator __seqs_end,
+                         _RAIterOut __target,
+                         _DifferenceTp __length, _Compare __comp,
+                         default_parallel_tag __tag)
+    {
+      return stable_multiway_merge
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
+    }
 
-         delete[] chunks;
-       }
-      else if (k == 2)
-       {
-         RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
-         merge_advance(begin0,
-                       seqs_begin[0].first + pieces[iam][0].second,
-                       begin1,
-                       seqs_begin[1].first + pieces[iam][1].second,
-                       target + target_position,
-                       (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
-                       comp);
-       }
+  /**
+   * @brief Multiway Merge Frontend.
+   *
+   * Merge the sequences specified by seqs_begin and __seqs_end into
+   * __target.  __seqs_begin and __seqs_end must point to a sequence of
+   * pairs.  These pairs must contain an iterator to the beginning
+   * of a sequence in their first entry and an iterator the _M_end of
+   * the same sequence in their second entry.
+   *
+   * Ties are broken arbitrarily.  See stable_multiway_merge for a variant
+   * that breaks ties by sequence number but is slower.
+   *
+   * The first entries of the pairs (i.e. the begin iterators) will be moved
+   * forward accordingly.
+   *
+   * The output sequence has to provide enough space for all elements
+   * that are written to it.
+   *
+   * This function will merge the input sequences:
+   *
+   * - not stable
+   * - parallel, depending on the input size and Settings
+   * - using sampling for splitting
+   * - using sentinels
+   *
+   * You have to take care that the element the _M_end iterator points to is
+   * readable and contains a value that is greater than any other non-sentinel
+   * value in all sequences.
+   *
+   * Example:
+   *
+   * <pre>
+   *   int sequences[10][11];
+   *   for (int __i = 0; __i < 10; ++__i)
+   *     for (int __j = 0; __i < 11; ++__j)
+   *       sequences[__i][__j] = __j; // __last one is sentinel!
+   *
+   *   int __out[33];
+   *   std::vector<std::pair<int*> > seqs;
+   *   for (int __i = 0; __i < 10; ++__i)
+   *     { seqs.push(std::make_pair<int*>(sequences[__i],
+   *                                      sequences[__i] + 10)) }
+   *
+   *   multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33);
+   * </pre>
+   *
+   * @pre All input sequences must be sorted.
+   * @pre Target must provide enough space to merge out length elements or
+   *    the number of elements in all sequences, whichever is smaller.
+   * @pre For each @c __i, @c __seqs_begin[__i].second must be the end
+   *    marker of the sequence, but also reference the one more __sentinel
+   *    element.
+   *
+   * @post [__target, return __value) contains merged __elements from the
+   *    input sequences.
+   * @post return __value - __target = min(__length, number of elements in all
+   *    sequences).
+   *
+   * @see stable_multiway_merge_sentinels
+   *
+   * @tparam _RAIterPairIterator iterator over sequence
+   *    of pairs of iterators
+   * @tparam _RAIterOut iterator over target sequence
+   * @tparam _DifferenceTp difference type for the sequence
+   * @tparam _Compare strict weak ordering type to compare elements
+   *    in sequences
+   *
+   * @param __seqs_begin  __begin of sequence __sequence
+   * @param __seqs_end    _M_end of sequence __sequence
+   * @param __target      target sequence to merge to.
+   * @param __comp        strict weak ordering to use for element comparison.
+   * @param __length Maximum length to merge, possibly larger than the
+   * number of elements available.
+   *
+   * @return _M_end iterator of output sequence
+   */
+  // multiway_merge_sentinels
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                            _RAIterPairIterator __seqs_end,
+                            _RAIterOut __target,
+                            _DifferenceTp __length, _Compare __comp,
+                            __gnu_parallel::sequential_tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute multiway merge *sequentially*.
+      return __sequential_multiway_merge
+       </* __stable = */ false, /* __sentinels = */ true>
+          (__seqs_begin, __seqs_end,
+           __target, *(__seqs_begin->second), __length, __comp);
+    }
+
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                            _RAIterPairIterator __seqs_end,
+                            _RAIterOut __target,
+                            _DifferenceTp __length, _Compare __comp,
+                            __gnu_parallel::exact_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ false, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_exact_splitting</* __stable = */ false,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+          </* __stable = */ false, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-      t[iam].tic();
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                            _RAIterPairIterator __seqs_end,
+                            _RAIterOut __target,
+                            _DifferenceTp __length, _Compare __comp,
+                            sampling_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ false, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_sampling_splitting</* __stable = */ false,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+          </* __stable = */false, /* __sentinels = */ true>(
+            __seqs_begin, __seqs_end, __target,
+           *(__seqs_begin->second), __length, __comp);
+    }
 
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                            _RAIterPairIterator __seqs_end,
+                            _RAIterOut __target,
+                            _DifferenceTp __length, _Compare __comp,
+                            parallel_tag __tag = parallel_tag(0))
+    {
+      return multiway_merge_sentinels
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
     }
 
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                            _RAIterPairIterator __seqs_end,
+                            _RAIterOut __target,
+                            _DifferenceTp __length, _Compare __comp,
+                            default_parallel_tag __tag)
+    {
+      return multiway_merge_sentinels
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
+    }
 
-#if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
-#endif
+  // stable_multiway_merge_sentinels
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                                   _RAIterPairIterator __seqs_end,
+                                   _RAIterOut __target,
+                                   _DifferenceTp __length, _Compare __comp,
+                                   __gnu_parallel::sequential_tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute multiway merge *sequentially*.
+      return __sequential_multiway_merge
+       </* __stable = */ true, /* __sentinels = */ true>
+       (__seqs_begin, __seqs_end, __target,
+        *(__seqs_begin->second), __length, __comp);
+    }
 
-    // Update ends of sequences.
-    for (int s = 0; s < k; s++)
-      seqs_begin[s].first += pieces[num_threads - 1][s].second;
-
-    delete[] pieces;
-
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].print();
-    delete[] t;
-
-    return target + length;
-  }
-
-  /** 
-   *  @brief Multi-way merging front-end.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
-   */
-  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
-                RandomAccessIteratorPairIterator seqs_end,
-                RandomAccessIterator3 target, Comparator comp,
-                _DifferenceTp length, bool stable)
-  {
-    typedef _DifferenceTp difference_type;
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-    if (seqs_begin == seqs_end)
-      return target;
-
-    RandomAccessIterator3 target_end;
-    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
-      target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
-    else
-      target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
-
-    return target_end;
-  }
-
-  /** @brief Multi-way merging front-end.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence.
-   *  @pre For each @c i, @c seqs_begin[i].second must be the end
-   *  marker of the sequence, but also reference the one more sentinel
-   *  element. */
-  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
-  RandomAccessIterator3
-  multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
-                         RandomAccessIteratorPairIterator seqs_end,
-                         RandomAccessIterator3 target,
-                         Comparator comp,
-                         _DifferenceTp length,
-                         bool stable)
-  {
-    typedef _DifferenceTp difference_type;
-
-    if (seqs_begin == seqs_end)
-      return target;
-
-    _GLIBCXX_CALL(seqs_end - seqs_begin)
-
-    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
-      return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
-    else
-      return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
-  }
-}
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                                   _RAIterPairIterator __seqs_end,
+                                   _RAIterOut __target,
+                                   _DifferenceTp __length, _Compare __comp,
+                                   __gnu_parallel::exact_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+            __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_exact_splitting</* __stable = */ true,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
 
-#endif
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                                   _RAIterPairIterator __seqs_end,
+                                   _RAIterOut __target,
+                                   _DifferenceTp __length,
+                                   _Compare __comp,
+                                   sampling_tag __tag)
+    {
+      typedef _DifferenceTp _DifferenceType;
+      _GLIBCXX_CALL(__seqs_end - __seqs_begin)
+
+      // catch special case: no sequences
+      if (__seqs_begin == __seqs_end)
+       return __target;
+
+      // Execute merge; maybe parallel, depending on the number of merged
+      // elements and the number of sequences and global thresholds in
+      // Settings.
+      if ((__seqs_end - __seqs_begin > 1)
+         && _GLIBCXX_PARALLEL_CONDITION(
+            ((__seqs_end - __seqs_begin) >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
+            && ((_SequenceIndex)__length >=
+              __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
+       return parallel_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          multiway_merge_sampling_splitting</* __stable = */ true,
+          typename std::iterator_traits<_RAIterPairIterator>
+          ::value_type*, _Compare, _DifferenceTp>,
+          static_cast<_DifferenceType>(__length), __comp,
+          __tag.__get_num_threads());
+      else
+       return __sequential_multiway_merge
+          </* __stable = */ true, /* __sentinels = */ true>
+         (__seqs_begin, __seqs_end, __target,
+          *(__seqs_begin->second), __length, __comp);
+    }
+
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                                   _RAIterPairIterator __seqs_end,
+                                   _RAIterOut __target,
+                                   _DifferenceTp __length,
+                                   _Compare __comp,
+                                   parallel_tag __tag = parallel_tag(0))
+    {
+      return stable_multiway_merge_sentinels
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
+    }
+
+  // public interface
+  template<typename _RAIterPairIterator,
+          typename _RAIterOut,
+          typename _DifferenceTp,
+          typename _Compare>
+    _RAIterOut
+    stable_multiway_merge_sentinels(_RAIterPairIterator __seqs_begin,
+                                   _RAIterPairIterator __seqs_end,
+                                   _RAIterOut __target,
+                                   _DifferenceTp __length, _Compare __comp,
+                                   default_parallel_tag __tag)
+    {
+      return stable_multiway_merge_sentinels
+       (__seqs_begin, __seqs_end, __target, __length, __comp,
+        exact_tag(__tag.__get_num_threads()));
+    }
+}; // namespace __gnu_parallel
+
+#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H */