OSDN Git Service

2007-11-28 Johannes Singler <singler@ira.uka.de>
[pf3gnuchains/gcc-fork.git] / libstdc++-v3 / include / parallel / multiway_merge.h
index cdafacb..c1bf251 100644 (file)
 // Public License.
 
 /** @file parallel/multiway_merge.h
- *  @brief Implementation of sequential and parallel multiway merge.
- *  This file is a GNU parallel extension to the Standard C++ Library.
- */
+*  @brief Implementation of sequential and parallel multiway merge.
+*
+*  Explanations on the high-speed merging routines in the appendix of
+*
+*  P. Sanders.
+*  Fast priority queues for cached memory.
+*  ACM Journal of Experimental Algorithmics, 5, 2000.
+*
+*  This file is a GNU parallel extension to the Standard C++ Library.
+*/
 
 // Written by Johannes Singler.
 
 #include <parallel/parallel.h>
 #include <parallel/merge.h>
 #include <parallel/losertree.h>
-#include <parallel/timing.h>
 #if _GLIBCXX_ASSERTIONS
 #include <parallel/checkers.h>
 #endif
 
 /** @brief Length of a sequence described by a pair of iterators. */
-#define LENGTH(s) ((s).second - (s).first)
+#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
 
 // XXX need iterator typedefs
 namespace __gnu_parallel
 {
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   class guarded_iterator;
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+            guarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+            guarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
   /** @brief Iterator wrapper supporting an implicit supremum at the end
       of the sequence, dominating all comparisons.
       *  Deriving from RandomAccessIterator is not possible since
       *  RandomAccessIterator need not be a class.
       */
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   class guarded_iterator
   {
   private:
@@ -89,17 +95,17 @@ namespace __gnu_parallel
 
   public:
     /** @brief Constructor. Sets iterator to beginning of sequence.
-     *  @param begin Begin iterator of sequence.
-     *  @param end End iterator of sequence.
-     *  @param comp Comparator provided for associated overloaded
-     *  compare operators. */
-    inline guarded_iterator(RandomAccessIterator begin, 
-                           RandomAccessIterator end, Comparator& comp) 
+    *  @param begin Begin iterator of sequence.
+    *  @param end End iterator of sequence.
+    *  @param comp Comparator provided for associated overloaded
+    *  compare operators. */
+    inline guarded_iterator(RandomAccessIterator begin,
+                            RandomAccessIterator end, Comparator& comp)
     : current(begin), end(end), comp(comp)
     { }
 
     /** @brief Pre-increment operator.
-     *  @return This. */
+    *  @return This. */
     inline guarded_iterator<RandomAccessIterator, Comparator>&
     operator++()
     {
@@ -108,31 +114,35 @@ namespace __gnu_parallel
     }
 
     /** @brief Dereference operator.
-     *  @return Referenced element. */
+    *  @return Referenced element. */
     inline typename std::iterator_traits<RandomAccessIterator>::value_type
     operator*()
     { return *current; }
 
     /** @brief Convert to wrapped iterator.
-     *  @return Wrapped iterator. */
+    *  @return Wrapped iterator. */
     inline operator RandomAccessIterator()
     { return current; }
 
     friend bool
-    operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+    operator< <RandomAccessIterator, Comparator>(
+        guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+        guarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
     friend bool
-    operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
+    operator<= <RandomAccessIterator, Comparator>(
+        guarded_iterator<RandomAccessIterator, Comparator>& bi1,
+        guarded_iterator<RandomAccessIterator, Comparator>& bi2);
   };
 
-  /** @brief Compare two elements referenced by guarded iterators.
  *  @param bi1 First iterator.
  *  @param bi2 Second iterator.
  *  @return @c True if less. */
-  template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by guarded iterators.
+ *  @param bi1 First iterator.
+ *  @param bi2 Second iterator.
+ *  @return @c True if less. */
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+            guarded_iterator<RandomAccessIterator, Comparator>& bi2)
   {
     if (bi1.current == bi1.end)        //bi1 is sup
       return bi2.current == bi2.end;   //bi2 is not sup
@@ -141,14 +151,14 @@ namespace __gnu_parallel
     return (bi1.comp)(*bi1, *bi2);     //normal compare
   }
 
-  /** @brief Compare two elements referenced by guarded iterators.
  *  @param bi1 First iterator.
  *  @param bi2 Second iterator.
  *  @return @c True if less equal. */
-  template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by guarded iterators.
+ *  @param bi1 First iterator.
+ *  @param bi2 Second iterator.
+ *  @return @c True if less equal. */
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            guarded_iterator<RandomAccessIterator, Comparator>& bi2)
+            guarded_iterator<RandomAccessIterator, Comparator>& bi2)
   {
     if (bi2.current == bi2.end)        //bi1 is sup
       return bi1.current != bi1.end;   //bi2 is not sup
@@ -157,20 +167,20 @@ namespace __gnu_parallel
     return !(bi1.comp)(*bi2, *bi1);    //normal compare
   }
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   class unguarded_iterator;
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+            unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+             unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
-  template<typename RandomAccessIterator, typename Comparator>
+template<typename RandomAccessIterator, typename Comparator>
   class unguarded_iterator
   {
   private:
@@ -181,212 +191,233 @@ namespace __gnu_parallel
 
   public:
     /** @brief Constructor. Sets iterator to beginning of sequence.
-     *  @param begin Begin iterator of sequence.
-     *  @param end Unused, only for compatibility.
-     *  @param comp Unused, only for compatibility. */
-    inline unguarded_iterator(RandomAccessIterator begin, 
-                             RandomAccessIterator end, Comparator& comp) 
+    *  @param begin Begin iterator of sequence.
+    *  @param end Unused, only for compatibility.
+    *  @param comp Unused, only for compatibility. */
+    inline unguarded_iterator(RandomAccessIterator begin,
+                              RandomAccessIterator end, Comparator& comp)
     : current(begin), comp(comp)
     { }
 
     /** @brief Pre-increment operator.
-     *  @return This. */
+    *  @return This. */
     inline  unguarded_iterator<RandomAccessIterator, Comparator>&
     operator++()
     {
-      current++;
+      ++current;
       return *this;
     }
 
     /** @brief Dereference operator.
-     *  @return Referenced element. */
-    inline typename std::iterator_traits<RandomAccessIterator>::value_type 
+    *  @return Referenced element. */
+    inline typename std::iterator_traits<RandomAccessIterator>::value_type
     operator*()
     { return *current; }
 
     /** @brief Convert to wrapped iterator.
-     *  @return Wrapped iterator. */
+    *  @return Wrapped iterator. */
     inline
     operator RandomAccessIterator()
     { return current; }
 
     friend bool
-    operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+    operator< <RandomAccessIterator, Comparator>(
+        unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+        unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
 
     friend bool
-    operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
+    operator<= <RandomAccessIterator, Comparator>(
+        unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
+        unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
   };
 
-  /** @brief Compare two elements referenced by unguarded iterators.
  *  @param bi1 First iterator.
  *  @param bi2 Second iterator.
  *  @return @c True if less. */
-  template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by unguarded iterators.
+ *  @param bi1 First iterator.
+ *  @param bi2 Second iterator.
+ *  @return @c True if less. */
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-           unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+            unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
   {
     // Normal compare.
     return (bi1.comp)(*bi1, *bi2);
   }
 
-  /** @brief Compare two elements referenced by unguarded iterators.
  *  @param bi1 First iterator.
  *  @param bi2 Second iterator.
  *  @return @c True if less equal. */
-  template<typename RandomAccessIterator, typename Comparator>
+/** @brief Compare two elements referenced by unguarded iterators.
+ *  @param bi1 First iterator.
+ *  @param bi2 Second iterator.
+ *  @return @c True if less equal. */
+template<typename RandomAccessIterator, typename Comparator>
   inline bool
   operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
-            unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
+            unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
   {
     // Normal compare.
     return !(bi1.comp)(*bi2, *bi1);
   }
 
-  /** Prepare a set of sequences to be merged without a (end) guard
-   *  @param seqs_begin
-   *  @param seqs_end
-   *  @param comp
-   *  @param min_sequence
-   *  @param stable
-   *  @pre (seqs_end - seqs_begin > 0) */
-  template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+/** Prepare a set of sequences to be merged without a (end) guard
+ *  @param seqs_begin
+ *  @param seqs_end
+ *  @param comp
+ *  @param min_sequence
+ *  @param stable
+ *  @pre (seqs_end - seqs_begin > 0) */
+template<typename RandomAccessIteratorIterator, typename Comparator>
+  typename std::iterator_traits<
+      typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
+      ::first_type>::difference_type
   prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
-                   RandomAccessIteratorIterator seqs_end, Comparator comp,
-                   int& min_sequence, bool stable)
+                    RandomAccessIteratorIterator seqs_end, Comparator comp,
+                    int& min_sequence, bool stable)
   {
     _GLIBCXX_CALL(seqs_end - seqs_begin)
 
-      typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+        ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+    typedef typename std::iterator_traits<RandomAccessIterator1>
+      ::difference_type
       difference_type;
 
     if ((*seqs_begin).first == (*seqs_begin).second)
       {
-       // Empty sequence found, it's the first one.
-       min_sequence = 0;
-       return -1;
+        // Empty sequence found, it's the first one.
+        min_sequence = 0;
+        return -1;
       }
 
     // Last element in sequence.
     value_type min = *((*seqs_begin).second - 1);
     min_sequence = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
+    for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
       {
-       if ((*s).first == (*s).second)
-         {
-           // Empty sequence found.
-           min_sequence = static_cast<int>(s - seqs_begin);
-           return -1;
-         }
-
-       // Last element in sequence.
-       const value_type& v = *((*s).second - 1);
-       if (comp(v, min))       //strictly smaller
-         {
-           min = v;
-           min_sequence = static_cast<int>(s - seqs_begin);
-         }
+        if ((*s).first == (*s).second)
+          {
+            // Empty sequence found.
+            min_sequence = static_cast<int>(s - seqs_begin);
+            return -1;
+          }
+
+        // Last element in sequence.
+        const value_type& v = *((*s).second - 1);
+        if (comp(v, min))      //strictly smaller
+          {
+            min = v;
+            min_sequence = static_cast<int>(s - seqs_begin);
+          }
       }
 
     difference_type overhang_size = 0;
 
     int s = 0;
-    for (s = 0; s <= min_sequence; s++)
+    for (s = 0; s <= min_sequence; ++s)
       {
-       RandomAccessIterator1 split;
-       if (stable)
-         split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-       else
-         split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
-                                  min, comp);
-
-       overhang_size += seqs_begin[s].second - split;
+        RandomAccessIterator1 split;
+        if (stable)
+          split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
+                                  min, comp);
+        else
+          split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
+                                  min, comp);
+
+        overhang_size += seqs_begin[s].second - split;
       }
 
-    for (; s < (seqs_end - seqs_begin); s++)
+    for (; s < (seqs_end - seqs_begin); ++s)
       {
-       RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
-       overhang_size += seqs_begin[s].second - split;
+        RandomAccessIterator1 split = std::lower_bound(
+            seqs_begin[s].first, seqs_begin[s].second, min, comp);
+        overhang_size += seqs_begin[s].second - split;
       }
 
     // So many elements will be left over afterwards.
     return overhang_size;
   }
 
-  /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
-   *  @param seqs_begin
-   *  @param seqs_end
-   *  @param comp */
-  template<typename RandomAccessIteratorIterator, typename Comparator>
-  typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
+/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
+ *  @param seqs_begin
+ *  @param seqs_end
+ *  @param comp */
+template<typename RandomAccessIteratorIterator, typename Comparator>
+  typename std::iterator_traits<typename std::iterator_traits<
+      RandomAccessIteratorIterator>::value_type::first_type>::difference_type
   prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
-                            RandomAccessIteratorIterator seqs_end,
-                            Comparator comp)
+                            RandomAccessIteratorIterator seqs_end,
+                            Comparator comp)
   {
     _GLIBCXX_CALL(seqs_end - seqs_begin)
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+    typedef typename std::iterator_traits<RandomAccessIterator1>
+      ::value_type
       value_type;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
+    typedef typename std::iterator_traits<RandomAccessIterator1>
+      ::difference_type
       difference_type;
 
     // Last element in sequence.
-    value_type max;
-    bool max_found = false;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+    value_type* max = NULL;
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
       {
-       if ((*s).first == (*s).second)
-         continue;
+        if ((*s).first == (*s).second)
+          continue;
 
-       // Last element in sequence.
-       value_type& v = *((*s).second - 1);
+        // Last element in sequence.
+        value_type& v = *((*s).second - 1);
 
-       // Strictly greater.
-       if (!max_found || comp(max, v))
-         max = v;
-       max_found = true;
+        // Strictly greater.
+        if (!max || comp(*max, v))
+          max = &v;
       }
 
     difference_type overhang_size = 0;
-
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
       {
-       RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
-       overhang_size += (*s).second - split;
+        RandomAccessIterator1 split =
+            std::lower_bound((*s).first, (*s).second, *max, comp);
+        overhang_size += (*s).second - split;
 
-       // Set sentinel.
-       *((*s).second) = max;
+        // Set sentinel.
+        *((*s).second) = *max;
       }
 
     // So many elements will be left over afterwards.
     return overhang_size;
   }
 
-  /** @brief Highly efficient 3-way merging procedure.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Unused, stable anyway.
-   *  @return End iterator of output sequence. */
-  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Highly efficient 3-way merging procedure.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Unused, stable anyway.
+ *  @return End iterator of output sequence. */
+template<
+    template<typename RAI, typename C> class iterator,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_3_variant(
+      RandomAccessIteratorIterator seqs_begin,
+      RandomAccessIteratorIterator seqs_end,
+      RandomAccessIterator3 target,
+      Comparator comp, _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length);
-    
+
     typedef _DifferenceTp difference_type;
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -401,46 +432,46 @@ namespace __gnu_parallel
 
     if (seq0 <= seq1)
       {
-       if (seq1 <= seq2)
-         goto s012;
-       else
-         if (seq2 <  seq0)
-           goto s201;
-         else
-           goto s021;
+        if (seq1 <= seq2)
+          goto s012;
+        else
+          if (seq2 <  seq0)
+            goto s201;
+          else
+            goto s021;
       }
     else
       {
-       if (seq1 <= seq2)
-         {
-           if (seq0 <= seq2)
-             goto s102;
-           else
-             goto s120;
-         }
-       else
-         goto s210;
+        if (seq1 <= seq2)
+          {
+            if (seq0 <= seq2)
+              goto s102;
+            else
+              goto s120;
+          }
+        else
+          goto s210;
       }
 
-#define Merge3Case(a,b,c,c0,c1)                                \
-    s ## a ## b ## c :                                 \
-      *target = *seq ## a;                             \
-    ++target;                                          \
-    length--;                                          \
-    ++seq ## a;                                                \
-    if (length == 0) goto finish;                      \
-    if (seq ## a c0 seq ## b) goto s ## a ## b ## c;   \
-    if (seq ## a c1 seq ## c) goto s ## b ## a ## c;   \
+#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
+    s ## a ## b ## c :                                  \
+      *target = *seq ## a;                              \
+    ++target;                                           \
+    --length;                                           \
+    ++seq ## a;                                         \
+    if (length == 0) goto finish;                       \
+    if (seq ## a c0 seq ## b) goto s ## a ## b ## c;    \
+    if (seq ## a c1 seq ## c) goto s ## b ## a ## c;    \
     goto s ## b ## c ## a;
 
-    Merge3Case(0, 1, 2, <=, <=);
-    Merge3Case(1, 2, 0, <=, < );
-    Merge3Case(2, 0, 1, < , < );
-    Merge3Case(1, 0, 2, < , <=);
-    Merge3Case(0, 2, 1, <=, <=);
-    Merge3Case(2, 1, 0, < , < );
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
 
-#undef Merge3Case
+#undef _GLIBCXX_PARALLEL_MERGE_3_CASE
 
   finish:
     ;
@@ -452,14 +483,23 @@ namespace __gnu_parallel
     return target;
   }
 
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
+                            RandomAccessIteratorIterator seqs_end,
+                            RandomAccessIterator3 target,
+                            Comparator comp,
+                            _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length);
-    
+
     typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -468,24 +508,26 @@ namespace __gnu_parallel
     RandomAccessIterator3 target_end;
 
     // Stable anyway.
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+    difference_type overhang =
+        prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
 
     difference_type total_length = 0;
     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += LENGTH(*s);
+      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
 
     if (overhang != -1)
       {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_3_variant<unguarded_iterator>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
+        difference_type unguarded_length =
+            std::min(length, total_length - overhang);
+        target_end = multiway_merge_3_variant<unguarded_iterator>
+          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+        overhang = length - unguarded_length;
       }
     else
       {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
+        // Empty sequence found.
+        overhang = length;
+        target_end = target;
       }
 
 #if _GLIBCXX_ASSERTIONS
@@ -496,23 +538,23 @@ namespace __gnu_parallel
     switch (min_seq)
       {
       case 0:
-       // Iterators will be advanced accordingly.
-       target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-       break;
+        // Iterators will be advanced accordingly.
+        target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
+                                  seqs_begin[2].first, seqs_begin[2].second,
+                                  target_end, overhang, comp);
+        break;
       case 1:
-       target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[2].first, seqs_begin[2].second,
-                                  target_end, overhang, comp);
-       break;
+        target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+                                  seqs_begin[2].first, seqs_begin[2].second,
+                                  target_end, overhang, comp);
+        break;
       case 2:
-       target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
-                                  seqs_begin[1].first, seqs_begin[1].second,
-                                  target_end, overhang, comp);
-       break;
+        target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
+                                  seqs_begin[1].first, seqs_begin[1].second,
+                                  target_end, overhang, comp);
+        break;
       default:
-       _GLIBCXX_PARALLEL_ASSERT(false);
+        _GLIBCXX_PARALLEL_ASSERT(false);
       }
 
 #if _GLIBCXX_ASSERTIONS
@@ -523,22 +565,31 @@ namespace __gnu_parallel
     return target_end;
   }
 
-  /** @brief Highly efficient 4-way merging procedure.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Unused, stable anyway.
-   *  @return End iterator of output sequence. */
-  template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Highly efficient 4-way merging procedure.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Unused, stable anyway.
+ *  @return End iterator of output sequence. */
+template<
+    template<typename RAI, typename C> class iterator,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
+                           RandomAccessIteratorIterator seqs_end,
+                           RandomAccessIterator3 target,
+                           Comparator comp, _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length);
     typedef _DifferenceTp difference_type;
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -549,7 +600,7 @@ namespace __gnu_parallel
       seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
       seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
 
-#define Decision(a,b,c,d) {                                    \
+#define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) {                   \
       if (seq ## d < seq ## a) goto s ## d ## a ## b ## c;     \
       if (seq ## d < seq ## b) goto s ## a ## d ## b ## c;     \
       if (seq ## d < seq ## c) goto s ## a ## b ## d ## c;     \
@@ -557,66 +608,66 @@ namespace __gnu_parallel
 
     if (seq0 <= seq1)
       {
-       if (seq1 <= seq2)
-         Decision(0,1,2,3)
-         else
-           if (seq2 < seq0)
-             Decision(2,0,1,3)
-             else
-               Decision(0,2,1,3)
-                 }
+        if (seq1 <= seq2)
+          _GLIBCXX_PARALLEL_DECISION(0,1,2,3)
+          else
+            if (seq2 < seq0)
+              _GLIBCXX_PARALLEL_DECISION(2,0,1,3)
+              else
+                _GLIBCXX_PARALLEL_DECISION(0,2,1,3)
+                  }
     else
       {
-       if (seq1 <= seq2)
-         {
-           if (seq0 <= seq2)
-             Decision(1,0,2,3)
-             else
-               Decision(1,2,0,3)
-                 }
-       else
-         Decision(2,1,0,3)
-           }
-
-#define Merge4Case(a,b,c,d,c0,c1,c2)                           \
-    s ## a ## b ## c ## d:                                     \
-      if (length == 0) goto finish;                            \
-    *target = *seq ## a;                                       \
-    ++target;                                                  \
-    length--;                                                  \
-    ++seq ## a;                                                        \
-    if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;      \
-    if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;      \
-    if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;      \
+        if (seq1 <= seq2)
+          {
+            if (seq0 <= seq2)
+              _GLIBCXX_PARALLEL_DECISION(1,0,2,3)
+              else
+                _GLIBCXX_PARALLEL_DECISION(1,2,0,3)
+                  }
+        else
+          _GLIBCXX_PARALLEL_DECISION(2,1,0,3)
+            }
+
+#define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2)        \
+    s ## a ## b ## c ## d:                                      \
+      if (length == 0) goto finish;                             \
+    *target = *seq ## a;                                        \
+    ++target;                                                   \
+    --length;                                                   \
+    ++seq ## a;                                                 \
+    if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d;       \
+    if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d;       \
+    if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d;       \
     goto s ## b ## c ## d ## a;
 
-    Merge4Case(0, 1, 2, 3, <=, <=, <=);
-    Merge4Case(0, 1, 3, 2, <=, <=, <=);
-    Merge4Case(0, 2, 1, 3, <=, <=, <=);
-    Merge4Case(0, 2, 3, 1, <=, <=, <=);
-    Merge4Case(0, 3, 1, 2, <=, <=, <=);
-    Merge4Case(0, 3, 2, 1, <=, <=, <=);
-    Merge4Case(1, 0, 2, 3, < , <=, <=);
-    Merge4Case(1, 0, 3, 2, < , <=, <=);
-    Merge4Case(1, 2, 0, 3, <=, < , <=);
-    Merge4Case(1, 2, 3, 0, <=, <=, < );
-    Merge4Case(1, 3, 0, 2, <=, < , <=);
-    Merge4Case(1, 3, 2, 0, <=, <=, < );
-    Merge4Case(2, 0, 1, 3, < , < , <=);
-    Merge4Case(2, 0, 3, 1, < , <=, < );
-    Merge4Case(2, 1, 0, 3, < , < , <=);
-    Merge4Case(2, 1, 3, 0, < , <=, < );
-    Merge4Case(2, 3, 0, 1, <=, < , < );
-    Merge4Case(2, 3, 1, 0, <=, < , < );
-    Merge4Case(3, 0, 1, 2, < , < , < );
-    Merge4Case(3, 0, 2, 1, < , < , < );
-    Merge4Case(3, 1, 0, 2, < , < , < );
-    Merge4Case(3, 1, 2, 0, < , < , < );
-    Merge4Case(3, 2, 0, 1, < , < , < );
-    Merge4Case(3, 2, 1, 0, < , < , < );
-
-#undef Merge4Case
-#undef Decision
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
+    _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
+
+#undef _GLIBCXX_PARALLEL_MERGE_4_CASE
+#undef _GLIBCXX_PARALLEL_DECISION
 
   finish:
     ;
@@ -629,14 +680,23 @@ namespace __gnu_parallel
     return target;
   }
 
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
+                            RandomAccessIteratorIterator seqs_end,
+                            RandomAccessIterator3 target,
+                            Comparator comp,
+                            _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length);
     typedef _DifferenceTp difference_type;
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -645,24 +705,26 @@ namespace __gnu_parallel
     RandomAccessIterator3 target_end;
 
     // Stable anyway.
-    difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
+    difference_type overhang =
+        prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
 
     difference_type total_length = 0;
     for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
-      total_length += LENGTH(*s);
+      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
 
     if (overhang != -1)
       {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_4_variant<unguarded_iterator>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
+        difference_type unguarded_length =
+            std::min(length, total_length - overhang);
+        target_end = multiway_merge_4_variant<unguarded_iterator>
+          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+        overhang = length - unguarded_length;
       }
     else
       {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
+        // Empty sequence found.
+        overhang = length;
+        target_end = target;
       }
 
 #if _GLIBCXX_ASSERTIONS
@@ -670,10 +732,13 @@ namespace __gnu_parallel
     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
 #endif
 
-    std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
+    std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
+        one_missing(seqs_begin, seqs_end);
     one_missing.erase(one_missing.begin() + min_seq);  //remove
 
-    target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
+    target_end = multiway_merge_3_variant<guarded_iterator>(
+        one_missing.begin(), one_missing.end(),
+        target_end, comp, overhang, stable);
 
     // Insert back again.
     one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
@@ -688,203 +753,225 @@ namespace __gnu_parallel
     return target_end;
   }
 
-  /** @brief Basic multi-way merging procedure.
-   *
-   *  The head elements are kept in a sorted array, new heads are
-   *  inserted linearly.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
-   */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Basic multi-way merging procedure.
+ *
+ *  The head elements are kept in a sorted array, new heads are
+ *  inserted linearly.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @return End iterator of output sequence.
+ */
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
+                        RandomAccessIteratorIterator seqs_end,
+                        RandomAccessIterator3 target,
+                        Comparator comp, _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length)
 
     typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
 
-    // Num remaining pieces.
-    int k = static_cast<int>(seqs_end - seqs_begin), nrp;
+    int k = static_cast<int>(seqs_end - seqs_begin);
+    int nrs;  // Number of remaining sequences.
 
-    value_type* pl = new value_type[k];
+    // Avoid default constructor.
+    value_type* fe = static_cast<value_type*>(
+      ::operator new(sizeof(value_type) * k));  // Front elements.
     int* source = new int[k];
     difference_type total_length = 0;
 
-#define POS(i) seqs_begin[(i)].first
-#define STOPS(i) seqs_begin[(i)].second
-
     // Write entries into queue.
-    nrp = 0;
-    for (int pi = 0; pi < k; pi++)
+    nrs = 0;
+    for (int pi = 0; pi < k; ++pi)
       {
-       if (STOPS(pi) != POS(pi))
-         {
-           pl[nrp] = *(POS(pi));
-           source[nrp] = pi;
-           nrp++;
-           total_length += LENGTH(seqs_begin[pi]);
-         }
+        if (seqs_begin[pi].first != seqs_begin[pi].second)
+          {
+            new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
+            source[nrs] = pi;
+            ++nrs;
+            total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
+          }
       }
 
     if (stable)
       {
-       for (int k = 0; k < nrp - 1; k++)
-         for (int pi = nrp - 1; pi > k; pi--)
-           if (comp(pl[pi], pl[pi - 1]) ||
-               (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
-             {
-               std::swap(pl[pi - 1], pl[pi]);
-               std::swap(source[pi - 1], source[pi]);
-             }
+        // Bubble sort fe and source by fe.
+        for (int k = 0; k < nrs - 1; ++k)
+          for (int pi = nrs - 1; pi > k; --pi)
+            if (comp(fe[pi], fe[pi - 1]) ||
+                (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
+              {
+                std::swap(fe[pi - 1], fe[pi]);
+                std::swap(source[pi - 1], source[pi]);
+              }
       }
     else
       {
-       for (int k = 0; k < nrp - 1; k++)
-         for (int pi = nrp - 1; pi > k; pi--)
-           if (comp(pl[pi], pl[pi-1]))
-             {
-               std::swap(pl[pi-1], pl[pi]);
-               std::swap(source[pi-1], source[pi]);
-             }
+        for (int k = 0; k < nrs - 1; ++k)
+          for (int pi = nrs - 1; pi > k; --pi)
+            if (comp(fe[pi], fe[pi-1]))
+              {
+                std::swap(fe[pi-1], fe[pi]);
+                std::swap(source[pi-1], source[pi]);
+              }
       }
 
     // Iterate.
     if (stable)
       {
-       int j;
-       while (nrp > 0 && length > 0)
-         {
-           if (source[0] < source[1])
-             {
-               // pl[0] <= pl[1]
-               while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
-                 {
-                   *target = pl[0];
-                   ++target;
-                   ++POS(source[0]);
-                   length--;
-                   if (POS(source[0]) == STOPS(source[0]))
-                     {
-                       // Move everything to the left.
-                       for (int s = 0; s < nrp - 1; s++)
-                         {
-                           pl[s] = pl[s + 1];
-                           source[s] = source[s + 1];
-                         }
-                       nrp--;
-                       break;
-                     }
-                   else
-                     pl[0] = *(POS(source[0]));
-                 }
-             }
-           else
-             {
-               // pl[0] < pl[1]
-               while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
-                 {
-                   *target = pl[0];
-                   ++target;
-                   ++POS(source[0]);
-                   length--;
-                   if (POS(source[0]) == STOPS(source[0]))
-                     {
-                       for (int s = 0; s < nrp - 1; s++)
-                         {
-                           pl[s] = pl[s + 1];
-                           source[s] = source[s + 1];
-                         }
-                       nrp--;
-                       break;
-                     }
-                   else
-                     pl[0] = *(POS(source[0]));
-                 }
-             }
-
-           // Sink down.
-           j = 1;
-           while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
-                                (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
-             {
-               std::swap(pl[j - 1], pl[j]);
-               std::swap(source[j - 1], source[j]);
-               j++;
-             }
-         }
+        int j;
+        while (nrs > 0 && length > 0)
+          {
+            if (source[0] < source[1])
+              {
+                // fe[0] <= fe[1]
+                while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
+                  {
+                    *target = fe[0];
+                    ++target;
+                    ++(seqs_begin[source[0]].first);
+                    --length;
+                    if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
+                      {
+                        // Move everything to the left.
+                        for (int s = 0; s < nrs - 1; ++s)
+                          {
+                            fe[s] = fe[s + 1];
+                            source[s] = source[s + 1];
+                          }
+                        fe[nrs - 1].~value_type();  //Destruct explicitly.
+                        --nrs;
+                        break;
+                      }
+                    else
+                      fe[0] = *(seqs_begin[source[0]].first);
+                  }
+              }
+            else
+              {
+                // fe[0] < fe[1]
+                while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
+                  {
+                    *target = fe[0];
+                    ++target;
+                    ++(seqs_begin[source[0]].first);
+                    --length;
+                    if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
+                      {
+                        for (int s = 0; s < nrs - 1; ++s)
+                          {
+                            fe[s] = fe[s + 1];
+                            source[s] = source[s + 1];
+                          }
+                        fe[nrs - 1].~value_type();  //Destruct explicitly.
+                        --nrs;
+                        break;
+                      }
+                    else
+                      fe[0] = *(seqs_begin[source[0]].first);
+                  }
+              }
+
+            // Sink down.
+            j = 1;
+            while ((j < nrs) && (comp(fe[j], fe[j - 1]) ||
+                                (!comp(fe[j - 1], fe[j])
+                                    && (source[j] < source[j - 1]))))
+              {
+                std::swap(fe[j - 1], fe[j]);
+                std::swap(source[j - 1], source[j]);
+                ++j;
+              }
+          }
       }
     else
       {
-       int j;
-       while (nrp > 0 && length > 0)
-         {
-           // pl[0] <= pl[1]
-           while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
-             {
-               *target = pl[0];
-               ++target;
-               ++POS(source[0]);
-               length--;
-               if (POS(source[0]) == STOPS(source[0]))
-                 {
-                   for (int s = 0; s < (nrp - 1); s++)
-                     {
-                       pl[s] = pl[s + 1];
-                       source[s] = source[s + 1];
-                     }
-                   nrp--;
-                   break;
-                 }
-               else
-                 pl[0] = *(POS(source[0]));
-             }
-
-           // Sink down.
-           j = 1;
-           while ((j < nrp) && comp(pl[j], pl[j - 1]))
-             {
-               std::swap(pl[j - 1], pl[j]);
-               std::swap(source[j - 1], source[j]);
-               j++;
-             }
-         }
+        int j;
+        while (nrs > 0 && length > 0)
+          {
+            // fe[0] <= fe[1]
+            while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
+              {
+                *target = fe[0];
+                ++target;
+                ++seqs_begin[source[0]].first;
+                --length;
+                if (seqs_begin[source[0]].first == seqs_begin[source[0]].second)
+                  {
+                    for (int s = 0; s < (nrs - 1); ++s)
+                      {
+                        fe[s] = fe[s + 1];
+                        source[s] = source[s + 1];
+                      }
+                    fe[nrs - 1].~value_type();  //Destruct explicitly.
+                    --nrs;
+                    break;
+                  }
+                else
+                  fe[0] = *(seqs_begin[source[0]].first);
+              }
+
+            // Sink down.
+            j = 1;
+            while ((j < nrs) && comp(fe[j], fe[j - 1]))
+              {
+                std::swap(fe[j - 1], fe[j]);
+                std::swap(source[j - 1], source[j]);
+                ++j;
+              }
+          }
       }
 
-    delete[] pl;
+    delete fe;  //Destructors already called.
     delete[] source;
 
     return target;
   }
 
-  /** @brief Multi-way merging procedure for a high branching factor,
-   * guarded case.
-   *
-   *  The head elements are kept in a loser tree.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
-   */
-  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging procedure for a high branching factor,
+ * guarded case.
+ *
+ *  The head elements are kept in a loser tree.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *   @param stable Stable merging incurs a performance penalty.
+ *  @return End iterator of output sequence.
+ */
+template<
+    typename LT,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
+                            RandomAccessIteratorIterator seqs_end,
+                            RandomAccessIterator3 target,
+                            Comparator comp,
+                            _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length)
 
-      typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef _DifferenceTp difference_type;
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -895,24 +982,35 @@ namespace __gnu_parallel
 
     difference_type total_length = 0;
 
-    for (int t = 0; t < k; t++)
+    // Default value for potentially non-default-constructible types.
+    value_type* arbitrary_element = NULL;
+
+    for (int t = 0; t < k; ++t)
       {
-       if (stable)
-         {
-           if (seqs_begin[t].first == seqs_begin[t].second)
-             lt.insert_start_stable(value_type(), t, true);
-           else
-             lt.insert_start_stable(*seqs_begin[t].first, t, false);
-         }
-       else
-         {
-           if (seqs_begin[t].first == seqs_begin[t].second)
-             lt.insert_start(value_type(), t, true);
-           else
-             lt.insert_start(*seqs_begin[t].first, t, false);
-         }
-
-       total_length += LENGTH(seqs_begin[t]);
+        if(arbitrary_element == NULL && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
+          arbitrary_element = &(*seqs_begin[t].first);
+        total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
+      }
+
+    if(total_length == 0)
+      return target;
+
+    for (int t = 0; t < k; ++t)
+      {
+        if (stable)
+          {
+            if (seqs_begin[t].first == seqs_begin[t].second)
+              lt.insert_start_stable(*arbitrary_element, t, true);
+            else
+              lt.insert_start_stable(*seqs_begin[t].first, t, false);
+          }
+        else
+          {
+            if (seqs_begin[t].first == seqs_begin[t].second)
+              lt.insert_start(*arbitrary_element, t, true);
+            else
+              lt.insert_start(*seqs_begin[t].first, t, false);
+          }
       }
 
     if (stable)
@@ -926,64 +1024,73 @@ namespace __gnu_parallel
 
     if (stable)
       {
-       for (difference_type i = 0; i < total_length; i++)
-         {
-           // Take out.
-           source = lt.get_min_source();
+        for (difference_type i = 0; i < total_length; ++i)
+          {
+            // Take out.
+            source = lt.get_min_source();
 
-           *(target++) = *(seqs_begin[source].first++);
+            *(target++) = *(seqs_begin[source].first++);
 
-           // Feed.
-           if (seqs_begin[source].first == seqs_begin[source].second)
-             lt.delete_min_insert_stable(value_type(), true);
-           else
-             // Replace from same source.
-             lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+            // Feed.
+            if (seqs_begin[source].first == seqs_begin[source].second)
+              lt.delete_min_insert_stable(*arbitrary_element, true);
+            else
+              // Replace from same source.
+              lt.delete_min_insert_stable(*seqs_begin[source].first, false);
 
-         }
+          }
       }
     else
       {
-       for (difference_type i = 0; i < total_length; i++)
-         {
-           //take out
-           source = lt.get_min_source();
-
-           *(target++) = *(seqs_begin[source].first++);
-
-           // Feed.
-           if (seqs_begin[source].first == seqs_begin[source].second)
-             lt.delete_min_insert(value_type(), true);
-           else
-             // Replace from same source.
-             lt.delete_min_insert(*seqs_begin[source].first, false);
-         }
+        for (difference_type i = 0; i < total_length; ++i)
+          {
+            //take out
+            source = lt.get_min_source();
+
+            *(target++) = *(seqs_begin[source].first++);
+
+            // Feed.
+            if (seqs_begin[source].first == seqs_begin[source].second)
+              lt.delete_min_insert(*arbitrary_element, true);
+            else
+              // Replace from same source.
+              lt.delete_min_insert(*seqs_begin[source].first, false);
+          }
       }
 
     return target;
   }
 
-  /** @brief Multi-way merging procedure for a high branching factor,
-   * unguarded case.
-   *
-   *  The head elements are kept in a loser tree.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence.
-   *  @pre No input will run out of elements during the merge. 
-   */
-  template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging procedure for a high branching factor,
+ * unguarded case.
+ *
+ *  The head elements are kept in a loser tree.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @return End iterator of output sequence.
+ *  @pre No input will run out of elements during the merge.
+ */
+template<
+    typename LT,
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp, typename Comparator>
   RandomAccessIterator3
-  multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
+                                      RandomAccessIteratorIterator seqs_end,
+                                      RandomAccessIterator3 target,
+                                      Comparator comp,
+                                      _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length)
     typedef _DifferenceTp difference_type;
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -994,17 +1101,17 @@ namespace __gnu_parallel
 
     difference_type total_length = 0;
 
-    for (int t = 0; t < k; t++)
+    for (int t = 0; t < k; ++t)
       {
 #if _GLIBCXX_ASSERTIONS
-       _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
+        _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
 #endif
-       if (stable)
-         lt.insert_start_stable(*seqs_begin[t].first, t, false);
-       else
-         lt.insert_start(*seqs_begin[t].first, t, false);
+        if (stable)
+          lt.insert_start_stable(*seqs_begin[t].first, t, false);
+        else
+          lt.insert_start(*seqs_begin[t].first, t, false);
 
-       total_length += LENGTH(seqs_begin[t]);
+        total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
       }
 
     if (stable)
@@ -1023,121 +1130,84 @@ namespace __gnu_parallel
 
     if (stable)
       {
-       RandomAccessIterator3 target_end = target + length;
-       while (target < target_end)
-         {
-           // Take out.
-           source = lt.get_min_source();
+        RandomAccessIterator3 target_end = target + length;
+        while (target < target_end)
+          {
+            // Take out.
+            source = lt.get_min_source();
 
 #if _GLIBCXX_ASSERTIONS
-           _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+            _GLIBCXX_PARALLEL_ASSERT(i == 0
+                || !comp(*(seqs_begin[source].first), *(target - 1)));
 #endif
 
-           *(target++) = *(seqs_begin[source].first++);
+            *(target++) = *(seqs_begin[source].first++);
 
 #if _GLIBCXX_ASSERTIONS
-           _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
-           i++;
+            _GLIBCXX_PARALLEL_ASSERT(
+                (seqs_begin[source].first != seqs_begin[source].second)
+                || (i == length - 1));
+            ++i;
 #endif
-           // Feed.
-           // Replace from same source.
-           lt.delete_min_insert_stable(*seqs_begin[source].first, false);
+            // Feed.
+            // Replace from same source.
+            lt.delete_min_insert_stable(*seqs_begin[source].first, false);
 
-         }
+          }
       }
     else
       {
-       RandomAccessIterator3 target_end = target + length;
-       while (target < target_end)
-         {
-           // Take out.
-           source = lt.get_min_source();
+        RandomAccessIterator3 target_end = target + length;
+        while (target < target_end)
+          {
+            // Take out.
+            source = lt.get_min_source();
 
 #if _GLIBCXX_ASSERTIONS
-           if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
-             printf("         %i %i %i\n", length, i, source);
-           _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
+            if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
+              printf("         %i %i %i\n", length, i, source);
+            _GLIBCXX_PARALLEL_ASSERT(i == 0
+                || !comp(*(seqs_begin[source].first), *(target - 1)));
 #endif
 
-           *(target++) = *(seqs_begin[source].first++);
+            *(target++) = *(seqs_begin[source].first++);
 
 #if _GLIBCXX_ASSERTIONS
-           if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
-             printf("         %i %i %i\n", length, i, source);
-           _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
-           i++;
+            if (!((seqs_begin[source].first != seqs_begin[source].second)
+                || (i >= length - 1)))
+              printf("         %i %i %i\n", length, i, source);
+            _GLIBCXX_PARALLEL_ASSERT(
+                (seqs_begin[source].first != seqs_begin[source].second)
+                || (i >= length - 1));
+            ++i;
 #endif
-           // Feed.
-           // Replace from same source.
-           lt.delete_min_insert(*seqs_begin[source].first, false);
-         }
+            // Feed.
+            // Replace from same source.
+            lt.delete_min_insert(*seqs_begin[source].first, false);
+          }
       }
 
     return target;
   }
 
-  template<typename _ValueTp, class Comparator>
-  struct loser_tree_traits
-  {
-    typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
-  };
-
-
-  /*#define NO_POINTER(T) \
-    template<typename Comparator> \
-    struct loser_tree_traits<T, Comparator> \
-    { \
-    typedef LoserTreePointer<T, Comparator> LT; \
-    };*/
-  //
-  // NO_POINTER(unsigned char)
-  // NO_POINTER(char)
-  // NO_POINTER(unsigned short)
-  // NO_POINTER(short)
-  // NO_POINTER(unsigned int)
-  // NO_POINTER(int)
-  // NO_POINTER(unsigned long)
-  // NO_POINTER(long)
-  // NO_POINTER(unsigned long long)
-  // NO_POINTER(long long)
-  //
-  // #undef NO_POINTER
-
-  template<typename _ValueTp, class Comparator>
-  struct loser_tree_traits_unguarded
-  {
-    typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
-  };
-
-  /*#define NO_POINTER_UNGUARDED(T) \
-    template<typename Comparator> \
-    struct loser_tree_traits_unguarded<T, Comparator> \
-    { \
-    typedef LoserTreePointerUnguarded<T, Comparator> LT; \
-    };*/
-  //
-  // NO_POINTER_UNGUARDED(unsigned char)
-  // NO_POINTER_UNGUARDED(char)
-  // NO_POINTER_UNGUARDED(unsigned short)
-  // NO_POINTER_UNGUARDED(short)
-  // NO_POINTER_UNGUARDED(unsigned int)
-  // NO_POINTER_UNGUARDED(int)
-  // NO_POINTER_UNGUARDED(unsigned long)
-  // NO_POINTER_UNGUARDED(long)
-  // NO_POINTER_UNGUARDED(unsigned long long)
-  // NO_POINTER_UNGUARDED(long long)
-  //
-  // #undef NO_POINTER_UNGUARDED
-
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
+                                     RandomAccessIteratorIterator seqs_end,
+                                     RandomAccessIterator3 target,
+                                     Comparator comp,
+                                     _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length)
 
     typedef _DifferenceTp difference_type;
 
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
@@ -1145,25 +1215,26 @@ namespace __gnu_parallel
     int min_seq;
     RandomAccessIterator3 target_end;
     difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
-                                         comp, min_seq, stable);
+                                          comp, min_seq, stable);
 
     difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
-      total_length += LENGTH(*s);
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
+      total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
 
     if (overhang != -1)
       {
-       difference_type unguarded_length = std::min(length, total_length - overhang);
-       target_end = multiway_merge_loser_tree_unguarded
-         <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
-         (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
-       overhang = length - unguarded_length;
+        difference_type unguarded_length =
+            std::min(length, total_length - overhang);
+        target_end = multiway_merge_loser_tree_unguarded
+          <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
+          (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
+        overhang = length - unguarded_length;
       }
     else
       {
-       // Empty sequence found.
-       overhang = length;
-       target_end = target;
+        // Empty sequence found.
+        overhang = length;
+        target_end = target;
       }
 
 #if _GLIBCXX_ASSERTIONS
@@ -1183,36 +1254,45 @@ namespace __gnu_parallel
     return target_end;
   }
 
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
+  multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
+                                     RandomAccessIteratorIterator seqs_end,
+                                      RandomAccessIterator3 target,
+                                      Comparator comp,
+                                      _DifferenceTp length, bool stable)
   {
     _GLIBCXX_CALL(length)
 
     typedef _DifferenceTp difference_type;
     typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
 
     RandomAccessIterator3 target_end;
-    difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
+    difference_type overhang =
+        prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
 
     difference_type total_length = 0;
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
       {
-       total_length += LENGTH(*s);
+        total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
 
-       // Sentinel spot.
-       (*s).second++;
+        // Sentinel spot.
+        ++((*s).second);
       }
 
-    difference_type unguarded_length = std::min(length, total_length - overhang);
+    difference_type unguarded_length =
+        std::min(length, total_length - overhang);
     target_end = multiway_merge_loser_tree_unguarded
-      <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
+      <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
       (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
     overhang = length - unguarded_length;
 
@@ -1222,14 +1302,17 @@ namespace __gnu_parallel
 #endif
 
     // Copy rest stable.
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
+    for (RandomAccessIteratorIterator s = seqs_begin;
+         s != seqs_end && overhang > 0; ++s)
       {
-       // Restore.
-       (*s).second--;
-       difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
-       target_end = std::copy((*s).first, (*s).first + local_length, target_end);
-       (*s).first += local_length;
-       overhang -= local_length;
+        // Restore.
+        --((*s).second);
+        difference_type local_length =
+            std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
+        target_end = std::copy((*s).first, (*s).first + local_length,
+                               target_end);
+        (*s).first += local_length;
+        overhang -= local_length;
       }
 
 #if _GLIBCXX_ASSERTIONS
@@ -1241,38 +1324,49 @@ namespace __gnu_parallel
     return target_end;
   }
 
-  /** @brief Sequential multi-way merging switch.
-   *
-   *  The decision if based on the branching factor and runtime settings.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @param sentinel The sequences have a sentinel element.
-   *  @return End iterator of output sequence. */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Sequential multi-way merging switch.
+ *
+ *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @param sentinel The sequences have a sentinel element.
+ *  @return End iterator of output sequence. */
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
+  multiway_merge(RandomAccessIteratorIterator seqs_begin,
+                 RandomAccessIteratorIterator seqs_end,
+                 RandomAccessIterator3 target,
+                 Comparator comp, _DifferenceTp length,
+                 bool stable, bool sentinel,
+                 sequential_tag)
   {
     _GLIBCXX_CALL(length)
 
     typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
+    typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+      ::value_type::first_type
       RandomAccessIterator1;
     typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
       value_type;
 
 #if _GLIBCXX_ASSERTIONS
-    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
+    for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
       _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
 #endif
 
     RandomAccessIterator3 return_target = target;
     int k = static_cast<int>(seqs_end - seqs_begin);
 
-    Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
+    Settings::MultiwayMergeAlgorithm mwma =
+        Settings::multiway_merge_algorithm;
 
     if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
       mwma = Settings::LOSER_TREE_COMBINED;
@@ -1280,75 +1374,126 @@ namespace __gnu_parallel
     switch (k)
       {
       case 0:
-       break;
+        break;
       case 1:
-       return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
-       seqs_begin[0].first += length;
-       break;
+        return_target = std::copy(seqs_begin[0].first,
+                                  seqs_begin[0].first + length,
+                                  target);
+        seqs_begin[0].first += length;
+        break;
       case 2:
-       return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
-       break;
+        return_target = merge_advance(seqs_begin[0].first,
+                                      seqs_begin[0].second,
+                                      seqs_begin[1].first,
+                                      seqs_begin[1].second,
+                                      target, length, comp);
+        break;
       case 3:
-       switch (mwma)
-         {
-         case Settings::LOSER_TREE_COMBINED:
-           return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         case Settings::LOSER_TREE_SENTINEL:
-           return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         default:
-           return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         }
-       break;
+        switch (mwma)
+          {
+          case Settings::LOSER_TREE_COMBINED:
+            return_target = multiway_merge_3_combined(seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          case Settings::LOSER_TREE_SENTINEL:
+            return_target = multiway_merge_3_variant<unguarded_iterator>(
+                seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          default:
+            return_target = multiway_merge_3_variant<guarded_iterator>(
+                seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          }
+        break;
       case 4:
-       switch (mwma)
-         {
-         case Settings::LOSER_TREE_COMBINED:
-           return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         case Settings::LOSER_TREE_SENTINEL:
-           return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         default:
-           return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
-           break;
-         }
-       break;
+        switch (mwma)
+          {
+          case Settings::LOSER_TREE_COMBINED:
+            return_target = multiway_merge_4_combined(
+                seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          case Settings::LOSER_TREE_SENTINEL:
+            return_target = multiway_merge_4_variant<unguarded_iterator>(
+                seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          default:
+            return_target = multiway_merge_4_variant<guarded_iterator>(
+                seqs_begin,
+                seqs_end,
+                target,
+                comp, length, stable);
+            break;
+          }
+        break;
       default:
-       {
-         switch (mwma)
-           {
-           case Settings::BUBBLE:
-             return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
+        {
+          switch (mwma)
+            {
+            case Settings::BUBBLE:
+              return_target = multiway_merge_bubble(
+                  seqs_begin,
+                  seqs_end,
+                  target,
+                  comp, length, stable);
+              break;
 #if _GLIBCXX_LOSER_TREE_EXPLICIT
-           case Settings::LOSER_TREE_EXPLICIT:
-             return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
+            case Settings::LOSER_TREE_EXPLICIT:
+              return_target = multiway_merge_loser_tree<
+                    LoserTreeExplicit<value_type, Comparator> >(
+                  seqs_begin,
+                  seqs_end,
+                  target,
+                  comp, length, stable);
+              break;
 #endif
 #if _GLIBCXX_LOSER_TREE
-           case Settings::LOSER_TREE:
-             return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
+            case Settings::LOSER_TREE:
+              return_target = multiway_merge_loser_tree<
+                    LoserTree<value_type, Comparator> >(
+                  seqs_begin,
+                  seqs_end,
+                  target,
+                  comp, length, stable);
+              break;
 #endif
 #if _GLIBCXX_LOSER_TREE_COMBINED
-           case Settings::LOSER_TREE_COMBINED:
-             return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
+            case Settings::LOSER_TREE_COMBINED:
+              return_target = multiway_merge_loser_tree_combined(
+                  seqs_begin,
+                  seqs_end,
+                  target,
+                  comp, length, stable);
+              break;
 #endif
 #if _GLIBCXX_LOSER_TREE_SENTINEL
-           case Settings::LOSER_TREE_SENTINEL:
-             return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
-             break;
+            case Settings::LOSER_TREE_SENTINEL:
+              return_target = multiway_merge_loser_tree_sentinel(
+                  seqs_begin,
+                  seqs_end,
+                  target,
+                  comp, length, stable);
+              break;
 #endif
-           default:
-             // multiway_merge algorithm not implemented.
-             _GLIBCXX_PARALLEL_ASSERT(0);
-             break;
-           }
-       }
+            default:
+              // multiway_merge algorithm not implemented.
+              _GLIBCXX_PARALLEL_ASSERT(0);
+              break;
+            }
+        }
       }
 #if _GLIBCXX_ASSERTIONS
     _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
@@ -1357,235 +1502,268 @@ namespace __gnu_parallel
     return return_target;
   }
 
-  /** @brief Parallel multi-way merge routine.
-   *
-   *  The decision if based on the branching factor and runtime settings.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @param sentinel Ignored.
-   *  @return End iterator of output sequence. 
-   */
-  template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Parallel multi-way merge routine.
+ *
+ *  The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and runtime settings.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @param sentinel Ignored.
+ *  @return End iterator of output sequence.
+ */
+template<
+    typename RandomAccessIteratorIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
-  parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
-  {
-    _GLIBCXX_CALL(length)
-
-    typedef _DifferenceTp difference_type;
-    typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
-      RandomAccessIterator1;
-    typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
-      value_type;
-
-#if _GLIBCXX_ASSERTIONS
-    for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
-      _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
-#endif
-
-    // k sequences.
-    int k = static_cast<int>(seqs_end - seqs_begin);
-
-    difference_type total_length = 0;
-    for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
-      total_length += LENGTH(*raii);
-
-    _GLIBCXX_CALL(total_length)
-
-    if (total_length == 0 || k == 0)
-      return target;
-
-    thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
-
-    Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
-
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-
-    bool tight = (total_length == length);
-
-    // Thread t will have to merge pieces[iam][0..k - 1]
-    std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
-    for (int s = 0; s < num_threads; s++)
-      pieces[s].resize(k);
-
-    difference_type num_samples = Settings::merge_oversampling * num_threads;
-
-    if (Settings::multiway_merge_splitting == Settings::SAMPLING)
-      {
-       value_type* samples = new value_type[k * num_samples];
-       // Sample.
-       for (int s = 0; s < k; s++)
-         for (int i = 0; (difference_type)i < num_samples; i++)
-           {
-             difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
-             samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
-           }
-
-       if (stable)
-         __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
-       else
-         __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
-
-       for (int slab = 0; slab < num_threads; slab++)
-         // For each slab / processor.
-         for (int seq = 0; seq < k; seq++)
-           {
-             // For each sequence.
-             if (slab > 0)
-               pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
-             else
-               {
-                 // Absolute beginning.
-                 pieces[slab][seq].first = 0;
-               }
-             if ((slab + 1) < num_threads)
-               pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
-             else
-               pieces[slab][seq].second = LENGTH(seqs_begin[seq]);     //absolute ending
-           }
-       delete[] samples;
-      }
-    else
-      {
-       // (Settings::multiway_merge_splitting == Settings::EXACT).
-       std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
-       std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
-
-       copy(seqs_begin, seqs_end, se.begin());
-
-       difference_type borders[num_threads + 1];
-       equally_split(length, num_threads, borders);
-
-       for (int s = 0; s < (num_threads - 1); s++)
-         {
-           offsets[s].resize(k);
-           multiseq_partition(se.begin(), se.end(), borders[s + 1],
-                              offsets[s].begin(), comp);
-
-           // Last one also needed and available.
-           if (!tight)
-             {
-               offsets[num_threads - 1].resize(k);
-               multiseq_partition(se.begin(), se.end(), (difference_type)length,
-                                  offsets[num_threads - 1].begin(),  comp);
-             }
-         }
-
-
-       for (int slab = 0; slab < num_threads; slab++)
-         {
-           // For each slab / processor.
-           for (int seq = 0; seq < k; seq++)
-             {
-               // For each sequence.
-               if (slab == 0)
-                 {
-                   // Absolute beginning.
-                   pieces[slab][seq].first = 0;
-                 }
-               else
-                 pieces[slab][seq].first = pieces[slab - 1][seq].second;
-               if (!tight || slab < (num_threads - 1))
-                 pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
-               else
-                 {
-                   // slab == num_threads - 1
-                   pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
-                 }
-             }
-         }
-       delete[] offsets;
-      }
-
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-
-#      pragma omp parallel num_threads(num_threads)
+  parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
+                          RandomAccessIteratorIterator seqs_end,
+                           RandomAccessIterator3 target,
+                           Comparator comp,
+                           _DifferenceTp length, bool stable, bool sentinel)
     {
-      thread_index_t iam = omp_get_thread_num();
-
-      t[iam].tic();
-
-      difference_type target_position = 0;
-
-      for (int c = 0; c < k; c++)
-       target_position += pieces[iam][c].first;
-
-      if (k > 2)
-       {
-         std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
-
-         difference_type local_length = 0;
-         for (int s = 0; s < k; s++)
-           {
-             chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
-             local_length += LENGTH(chunks[s]);
-           }
-
-         multiway_merge(chunks, chunks + k, target + target_position, comp,
-                        std::min(local_length, length - target_position),
-                        stable, false, sequential_tag());
-
-         delete[] chunks;
-       }
-      else if (k == 2)
-       {
-         RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
-         merge_advance(begin0,
-                       seqs_begin[0].first + pieces[iam][0].second,
-                       begin1,
-                       seqs_begin[1].first + pieces[iam][1].second,
-                       target + target_position,
-                       (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
-                       comp);
-       }
-
-      t[iam].tic();
-
-    }
+      _GLIBCXX_CALL(length)
 
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
+      typedef _DifferenceTp difference_type;
+      typedef typename std::iterator_traits<RandomAccessIteratorIterator>
+        ::value_type::first_type
+        RandomAccessIterator1;
+      typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
+        value_type;
+
+      // k sequences.
+      int k = static_cast<int>(seqs_end - seqs_begin);
+
+      difference_type total_length = 0;
+      for (RandomAccessIteratorIterator raii = seqs_begin;
+           raii != seqs_end; ++raii)
+        total_length += _GLIBCXX_PARALLEL_LENGTH(*raii);
+
+      _GLIBCXX_CALL(total_length)
+
+      if (total_length == 0 || k == 0)
+        return target;
+
+      bool tight = (total_length == length);
+
+      std::vector<std::pair<difference_type, difference_type> >* pieces;
+
+      thread_index_t num_threads = static_cast<thread_index_t>(
+          std::min<difference_type>(get_max_threads(), total_length));
+
+#     pragma omp parallel num_threads (num_threads)
+        {
+#         pragma omp single
+            {
+              num_threads = omp_get_num_threads();
+              // Thread t will have to merge pieces[iam][0..k - 1]
+              pieces = new std::vector<
+                  std::pair<difference_type, difference_type> >[num_threads];
+              for (int s = 0; s < num_threads; ++s)
+                pieces[s].resize(k);
+
+              difference_type num_samples =
+                  Settings::merge_oversampling * num_threads;
+
+              if (Settings::multiway_merge_splitting == Settings::SAMPLING)
+                {
+                  value_type* samples = static_cast<value_type*>(
+                    ::operator new(sizeof(value_type) * k * num_samples));
+                  // Sample.
+                  for (int s = 0; s < k; ++s)
+                    for (difference_type i = 0; i < num_samples; ++i)
+                      {
+                        difference_type sample_index =
+                            static_cast<difference_type>(
+                                _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
+                                (num_samples + 1)) * (double(length)
+                                / total_length));
+                        new(&(samples[s * num_samples + i])) value_type(
+                            seqs_begin[s].first[sample_index]);
+                      }
+
+                  if (stable)
+                    __gnu_sequential::stable_sort(
+                      samples, samples + (num_samples * k), comp);
+                  else
+                    __gnu_sequential::sort(
+                      samples, samples + (num_samples * k), comp);
+
+                  for (int slab = 0; slab < num_threads; ++slab)
+                    // For each slab / processor.
+                    for (int seq = 0; seq < k; ++seq)
+                      {
+                        // For each sequence.
+                        if (slab > 0)
+                          pieces[slab][seq].first =
+                              std::upper_bound(
+                                seqs_begin[seq].first,
+                                seqs_begin[seq].second,
+                                samples[num_samples * k * slab / num_threads],
+                                  comp)
+                              - seqs_begin[seq].first;
+                        else
+                          {
+                            // Absolute beginning.
+                            pieces[slab][seq].first = 0;
+                          }
+                        if ((slab + 1) < num_threads)
+                          pieces[slab][seq].second =
+                              std::upper_bound(
+                                  seqs_begin[seq].first,
+                                  seqs_begin[seq].second,
+                                  samples[num_samples * k * (slab + 1) /
+                                      num_threads], comp)
+                              - seqs_begin[seq].first;
+                        else
+                        pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+                      }
+                    delete[] samples;
+                }
+              else
+                {
+                  // (Settings::multiway_merge_splitting == Settings::EXACT).
+                  std::vector<RandomAccessIterator1>* offsets =
+                      new std::vector<RandomAccessIterator1>[num_threads];
+                  std::vector<
+                      std::pair<RandomAccessIterator1, RandomAccessIterator1>
+                      > se(k);
+
+                  copy(seqs_begin, seqs_end, se.begin());
+
+                  difference_type* borders =
+                      new difference_type[num_threads + 1];
+                  equally_split(length, num_threads, borders);
+
+                  for (int s = 0; s < (num_threads - 1); ++s)
+                    {
+                      offsets[s].resize(k);
+                      multiseq_partition(
+                          se.begin(), se.end(), borders[s + 1],
+                          offsets[s].begin(), comp);
+
+                      // Last one also needed and available.
+                      if (!tight)
+                        {
+                          offsets[num_threads - 1].resize(k);
+                          multiseq_partition(se.begin(), se.end(),
+                                difference_type(length),
+                                offsets[num_threads - 1].begin(),  comp);
+                        }
+                    }
+
+
+                  for (int slab = 0; slab < num_threads; ++slab)
+                    {
+                      // For each slab / processor.
+                      for (int seq = 0; seq < k; ++seq)
+                        {
+                          // For each sequence.
+                          if (slab == 0)
+                            {
+                              // Absolute beginning.
+                              pieces[slab][seq].first = 0;
+                            }
+                          else
+                            pieces[slab][seq].first =
+                                pieces[slab - 1][seq].second;
+                          if (!tight || slab < (num_threads - 1))
+                            pieces[slab][seq].second =
+                                offsets[slab][seq] - seqs_begin[seq].first;
+                          else
+                            {
+                              // slab == num_threads - 1
+                              pieces[slab][seq].second =
+                                  _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
+                            }
+                        }
+                    }
+                  delete[] offsets;
+                }
+            } //single
+
+          thread_index_t iam = omp_get_thread_num();
+
+          difference_type target_position = 0;
+
+          for (int c = 0; c < k; ++c)
+            target_position += pieces[iam][c].first;
+
+          if (k > 2)
+            {
+              std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
+                = new
+                  std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
+
+              difference_type local_length = 0;
+              for (int s = 0; s < k; ++s)
+                {
+                  chunks[s] = std::make_pair(
+                      seqs_begin[s].first + pieces[iam][s].first,
+                      seqs_begin[s].first + pieces[iam][s].second);
+                  local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
+                }
+
+              multiway_merge(
+                    chunks, chunks + k, target + target_position, comp,
+                    std::min(local_length, length - target_position),
+                    stable, false, sequential_tag());
+
+              delete[] chunks;
+            }
+          else if (k == 2)
+            {
+              RandomAccessIterator1
+                  begin0 = seqs_begin[0].first + pieces[iam][0].first,
+                  begin1 = seqs_begin[1].first + pieces[iam][1].first;
+              merge_advance(begin0,
+                    seqs_begin[0].first + pieces[iam][0].second,
+                    begin1,
+                    seqs_begin[1].first + pieces[iam][1].second,
+                    target + target_position,
+                    (pieces[iam][0].second - pieces[iam][0].first) +
+                        (pieces[iam][1].second - pieces[iam][1].first),
+                    comp);
+            }
+        } //parallel
 
 #if _GLIBCXX_ASSERTIONS
-    _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
+      _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
 #endif
 
-    // Update ends of sequences.
-    for (int s = 0; s < k; s++)
-      seqs_begin[s].first += pieces[num_threads - 1][s].second;
+      // Update ends of sequences.
+      for (int s = 0; s < k; ++s)
+        seqs_begin[s].first += pieces[num_threads - 1][s].second;
 
-    delete[] pieces;
+      delete[] pieces;
 
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].tic();
-    for (int pr = 0; pr < num_threads; pr++)
-      t[pr].print();
-    delete[] t;
-
-    return target + length;
-  }
+      return target + length;
+    }
 
-  /** 
-   *  @brief Multi-way merging front-end.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence. 
-   */
-  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/**
+ *  @brief Multi-way merging front-end.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @return End iterator of output sequence.
+ */
+template<
+    typename RandomAccessIteratorPairIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
   multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
-                RandomAccessIteratorPairIterator seqs_end,
-                RandomAccessIterator3 target, Comparator comp,
-                _DifferenceTp length, bool stable)
+                RandomAccessIteratorPairIterator seqs_end,
+                RandomAccessIterator3 target, Comparator comp,
+                _DifferenceTp length, bool stable)
   {
     typedef _DifferenceTp difference_type;
     _GLIBCXX_CALL(seqs_end - seqs_begin)
@@ -1594,33 +1772,43 @@ namespace __gnu_parallel
       return target;
 
     RandomAccessIterator3 target_end;
-    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
-      target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
+    if (_GLIBCXX_PARALLEL_CONDITION(
+        ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
+        && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+      target_end = parallel_multiway_merge(
+          seqs_begin, seqs_end,
+          target, comp, static_cast<difference_type>(length), stable, false);
     else
-      target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
+      target_end = multiway_merge(
+          seqs_begin, seqs_end,
+          target, comp, length, stable, false, sequential_tag());
 
     return target_end;
   }
 
-  /** @brief Multi-way merging front-end.
-   *  @param seqs_begin Begin iterator of iterator pair input sequence.
-   *  @param seqs_end End iterator of iterator pair input sequence.
-   *  @param target Begin iterator out output sequence.
-   *  @param comp Comparator.
-   *  @param length Maximum length to merge.
-   *  @param stable Stable merging incurs a performance penalty.
-   *  @return End iterator of output sequence.
-   *  @pre For each @c i, @c seqs_begin[i].second must be the end
-   *  marker of the sequence, but also reference the one more sentinel
-   *  element. */
-  template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
+/** @brief Multi-way merging front-end.
+ *  @param seqs_begin Begin iterator of iterator pair input sequence.
+ *  @param seqs_end End iterator of iterator pair input sequence.
+ *  @param target Begin iterator out output sequence.
+ *  @param comp Comparator.
+ *  @param length Maximum length to merge.
+ *  @param stable Stable merging incurs a performance penalty.
+ *  @return End iterator of output sequence.
+ *  @pre For each @c i, @c seqs_begin[i].second must be the end
+ *  marker of the sequence, but also reference the one more sentinel
+ *  element. */
+template<
+    typename RandomAccessIteratorPairIterator,
+    typename RandomAccessIterator3,
+    typename _DifferenceTp,
+    typename Comparator>
   RandomAccessIterator3
   multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
-                         RandomAccessIteratorPairIterator seqs_end,
-                         RandomAccessIterator3 target,
-                         Comparator comp,
-                         _DifferenceTp length,
-                         bool stable)
+                          RandomAccessIteratorPairIterator seqs_end,
+                          RandomAccessIterator3 target,
+                          Comparator comp,
+                          _DifferenceTp length,
+                          bool stable)
   {
     typedef _DifferenceTp difference_type;
 
@@ -1629,10 +1817,16 @@ namespace __gnu_parallel
 
     _GLIBCXX_CALL(seqs_end - seqs_begin)
 
-    if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
-      return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
+    if (_GLIBCXX_PARALLEL_CONDITION(
+        ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
+        && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
+      return parallel_multiway_merge(
+          seqs_begin, seqs_end,
+          target, comp, static_cast<difference_type>(length), stable, true);
     else
-      return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
+      return multiway_merge(
+          seqs_begin, seqs_end,
+          target, comp, length, stable, true, sequential_tag());
   }
 }