3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/balanced_quicksort.h
26 * @brief Implementation of a dynamically load-balanced parallel quicksort.
28 * It works in-place and needs only logarithmic extra memory.
29 * The algorithm is similar to the one proposed in
31 * P. Tsigas and Y. Zhang.
32 * A simple, fast parallel implementation of quicksort and
33 * its performance evaluation on SUN enterprise 10000.
34 * In 11th Euromicro Conference on Parallel, Distributed and
35 * Network-Based Processing, page 372, 2003.
37 * This file is a GNU parallel extension to the Standard C++ Library.
40 // Written by Johannes Singler.
42 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
45 #include <parallel/basic_iterator.h>
46 #include <bits/stl_algo.h>
48 #include <parallel/settings.h>
49 #include <parallel/partition.h>
50 #include <parallel/random_number.h>
51 #include <parallel/queue.h>
54 #if _GLIBCXX_ASSERTIONS
55 #include <parallel/checkers.h>
58 namespace __gnu_parallel
60 /** @brief Information local to one thread in the parallel quicksort run. */
61 template<typename RandomAccessIterator>
64 typedef std::iterator_traits<RandomAccessIterator> traits_type;
65 typedef typename traits_type::difference_type difference_type;
67 /** @brief Continuous part of the sequence, described by an
69 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
71 /** @brief Initial piece to work on. */
74 /** @brief Work-stealing queue. */
75 RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
77 /** @brief Number of threads involved in this algorithm. */
78 thread_index_t num_threads;
80 /** @brief Pointer to a counter of elements left over to sort. */
81 volatile difference_type* elements_leftover;
83 /** @brief The complete sequence to sort. */
86 /** @brief Constructor.
87 * @param queue_size Size of the work-stealing queue. */
88 QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
91 /** @brief Balanced quicksort divide step.
92 * @param begin Begin iterator of subsequence.
93 * @param end End iterator of subsequence.
94 * @param comp Comparator.
95 * @param num_threads Number of threads that are allowed to work on
97 * @pre @c (end-begin)>=1 */
98 template<typename RandomAccessIterator, typename Comparator>
99 typename std::iterator_traits<RandomAccessIterator>::difference_type
100 qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
101 Comparator comp, thread_index_t num_threads)
103 _GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
105 typedef std::iterator_traits<RandomAccessIterator> traits_type;
106 typedef typename traits_type::value_type value_type;
107 typedef typename traits_type::difference_type difference_type;
109 RandomAccessIterator pivot_pos =
110 median_of_three_iterators(begin, begin + (end - begin) / 2,
113 #if defined(_GLIBCXX_ASSERTIONS)
114 // Must be in between somewhere.
115 difference_type n = end - begin;
117 _GLIBCXX_PARALLEL_ASSERT(
118 (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
119 || (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
120 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
121 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
122 || (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
123 || (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
126 // Swap pivot value to end.
127 if (pivot_pos != (end - 1))
128 std::swap(*pivot_pos, *(end - 1));
131 __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
132 pred(comp, *pivot_pos);
134 // Divide, returning end - begin - 1 in the worst case.
135 difference_type split_pos = parallel_partition(
136 begin, end - 1, pred, num_threads);
138 // Swap back pivot to middle.
139 std::swap(*(begin + split_pos), *pivot_pos);
140 pivot_pos = begin + split_pos;
142 #if _GLIBCXX_ASSERTIONS
143 RandomAccessIterator r;
144 for (r = begin; r != pivot_pos; ++r)
145 _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
146 for (; r != end; ++r)
147 _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
153 /** @brief Quicksort conquer step.
154 * @param tls Array of thread-local storages.
155 * @param begin Begin iterator of subsequence.
156 * @param end End iterator of subsequence.
157 * @param comp Comparator.
158 * @param iam Number of the thread processing this function.
160 * Number of threads that are allowed to work on this part. */
161 template<typename RandomAccessIterator, typename Comparator>
163 qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
164 RandomAccessIterator begin, RandomAccessIterator end,
166 thread_index_t iam, thread_index_t num_threads,
169 typedef std::iterator_traits<RandomAccessIterator> traits_type;
170 typedef typename traits_type::value_type value_type;
171 typedef typename traits_type::difference_type difference_type;
173 difference_type n = end - begin;
175 if (num_threads <= 1 || n <= 1)
177 tls[iam]->initial.first = begin;
178 tls[iam]->initial.second = end;
180 qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
186 difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
188 #if _GLIBCXX_ASSERTIONS
189 _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
192 thread_index_t num_threads_leftside =
193 std::max<thread_index_t>(1, std::min<thread_index_t>(
194 num_threads - 1, split_pos * num_threads / n));
197 *tls[iam]->elements_leftover -= (difference_type)1;
200 # pragma omp parallel num_threads(2)
203 if(omp_get_num_threads() < 2)
208 # pragma omp sections
212 qsb_conquer(tls, begin, begin + split_pos, comp,
214 num_threads_leftside,
218 // The pivot_pos is left in place, to ensure termination.
221 qsb_conquer(tls, begin + split_pos + 1, end, comp,
222 iam + num_threads_leftside,
223 num_threads - num_threads_leftside,
232 * @brief Quicksort step doing load-balanced local sort.
233 * @param tls Array of thread-local storages.
234 * @param comp Comparator.
235 * @param iam Number of the thread processing this function.
237 template<typename RandomAccessIterator, typename Comparator>
239 qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
240 Comparator& comp, int iam, bool wait)
242 typedef std::iterator_traits<RandomAccessIterator> traits_type;
243 typedef typename traits_type::value_type value_type;
244 typedef typename traits_type::difference_type difference_type;
245 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
247 QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
249 difference_type base_case_n =
250 _Settings::get().sort_qsb_base_case_maximal_n;
253 thread_index_t num_threads = tl.num_threads;
255 // Every thread has its own random number generator.
256 random_number rng(iam + 1);
258 Piece current = tl.initial;
260 difference_type elements_done = 0;
261 #if _GLIBCXX_ASSERTIONS
262 difference_type total_elements_done = 0;
267 // Invariant: current must be a valid (maybe empty) range.
268 RandomAccessIterator begin = current.first, end = current.second;
269 difference_type n = end - begin;
274 RandomAccessIterator pivot_pos = begin + rng(n);
276 // Swap pivot_pos value to end.
277 if (pivot_pos != (end - 1))
278 std::swap(*pivot_pos, *(end - 1));
281 __gnu_parallel::binder2nd
282 <Comparator, value_type, value_type, bool>
283 pred(comp, *pivot_pos);
285 // Divide, leave pivot unchanged in last place.
286 RandomAccessIterator split_pos1, split_pos2;
287 split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
289 // Left side: < pivot_pos; right side: >= pivot_pos.
290 #if _GLIBCXX_ASSERTIONS
291 _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
293 // Swap pivot back to middle.
294 if (split_pos1 != pivot_pos)
295 std::swap(*split_pos1, *pivot_pos);
296 pivot_pos = split_pos1;
298 // In case all elements are equal, split_pos1 == 0.
299 if ((split_pos1 + 1 - begin) < (n >> 7)
300 || (end - split_pos1) < (n >> 7))
302 // Very unequal split, one part smaller than one 128th
303 // elements not strictly larger than the pivot.
304 __gnu_parallel::unary_negate<__gnu_parallel::binder1st
305 <Comparator, value_type, value_type, bool>, value_type>
306 pred(__gnu_parallel::binder1st
307 <Comparator, value_type, value_type, bool>(comp,
310 // Find other end of pivot-equal range.
311 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
315 // Only skip the pivot.
316 split_pos2 = split_pos1 + 1;
318 // Elements equal to pivot are done.
319 elements_done += (split_pos2 - split_pos1);
320 #if _GLIBCXX_ASSERTIONS
321 total_elements_done += (split_pos2 - split_pos1);
323 // Always push larger part onto stack.
324 if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
326 // Right side larger.
327 if ((split_pos2) != end)
328 tl.leftover_parts.push_front(std::make_pair(split_pos2,
331 //current.first = begin; //already set anyway
332 current.second = split_pos1;
338 if (begin != split_pos1)
339 tl.leftover_parts.push_front(std::make_pair(begin,
342 current.first = split_pos2;
343 //current.second = end; //already set anyway
349 __gnu_sequential::sort(begin, end, comp);
351 #if _GLIBCXX_ASSERTIONS
352 total_elements_done += n;
355 // Prefer own stack, small pieces.
356 if (tl.leftover_parts.pop_front(current))
360 *tl.elements_leftover -= elements_done;
364 #if _GLIBCXX_ASSERTIONS
365 double search_start = omp_get_wtime();
368 // Look for new work.
369 bool successfully_stolen = false;
370 while (wait && *tl.elements_leftover > 0 && !successfully_stolen
371 #if _GLIBCXX_ASSERTIONS
372 // Possible dead-lock.
373 && (omp_get_wtime() < (search_start + 1.0))
377 thread_index_t victim;
378 victim = rng(num_threads);
381 successfully_stolen = (victim != iam)
382 && tls[victim]->leftover_parts.pop_back(current);
383 if (!successfully_stolen)
385 #if !defined(__ICC) && !defined(__ECC)
390 #if _GLIBCXX_ASSERTIONS
391 if (omp_get_wtime() >= (search_start + 1.0))
394 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
395 < (search_start + 1.0));
398 if (!successfully_stolen)
400 #if _GLIBCXX_ASSERTIONS
401 _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
409 /** @brief Top-level quicksort routine.
410 * @param begin Begin iterator of sequence.
411 * @param end End iterator of sequence.
412 * @param comp Comparator.
413 * @param num_threads Number of threads that are allowed to work on
416 template<typename RandomAccessIterator, typename Comparator>
418 parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
420 thread_index_t num_threads)
422 _GLIBCXX_CALL(end - begin)
424 typedef std::iterator_traits<RandomAccessIterator> traits_type;
425 typedef typename traits_type::value_type value_type;
426 typedef typename traits_type::difference_type difference_type;
427 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
429 typedef QSBThreadLocal<RandomAccessIterator> tls_type;
431 difference_type n = end - begin;
436 // At least one element per processor.
438 num_threads = static_cast<thread_index_t>(n);
440 // Initialize thread local storage
441 tls_type** tls = new tls_type*[num_threads];
442 difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
443 for (thread_index_t t = 0; t < num_threads; ++t)
444 tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
446 // There can never be more than ceil(log2(n)) ranges on the stack, because
447 // 1. Only one processor pushes onto the stack
448 // 2. The largest range has at most length n
449 // 3. Each range is larger than half of the range remaining
450 volatile difference_type elements_leftover = n;
451 for (int i = 0; i < num_threads; ++i)
453 tls[i]->elements_leftover = &elements_leftover;
454 tls[i]->num_threads = num_threads;
455 tls[i]->global = std::make_pair(begin, end);
457 // Just in case nothing is left to assign.
458 tls[i]->initial = std::make_pair(end, end);
461 // Main recursion call.
462 qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
464 #if _GLIBCXX_ASSERTIONS
465 // All stack must be empty.
467 for (int i = 1; i < num_threads; ++i)
468 _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
471 for (int i = 0; i < num_threads; ++i)
475 } // namespace __gnu_parallel
477 #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */