3 // Copyright (C) 2007 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
31 /** @file parallel/multiseq_selection.h
32 * @brief Functions to find elements of a certain global rank in
33 * multiple sorted sequences. Also serves for splitting such
36 * The algorithm description can be found in
38 * P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard.
39 * Merging Multiple Lists on Hierarchical-Memory Multiprocessors.
40 * Journal of Parallel and Distributed Computing, 12(2):171–177, 1991.
42 * This file is a GNU parallel extension to the Standard C++ Library.
45 // Written by Johannes Singler.
47 #ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
48 #define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
53 #include <bits/stl_algo.h>
55 #include <parallel/sort.h>
57 namespace __gnu_parallel
59 /** @brief Compare a pair of types lexicographically, ascending. */
60 template<typename T1, typename T2, typename Comparator>
61 class lexicographic : public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
67 lexicographic(Comparator& _comp) : comp(_comp) { }
71 operator()(const std::pair<T1, T2>& p1, const std::pair<T1, T2>& p2) const
73 if (comp(p1.first, p2.first))
76 if (comp(p2.first, p1.first))
80 return p1.second < p2.second;
84 /** @brief Compare a pair of types lexicographically, descending. */
85 template<typename T1, typename T2, typename Comparator>
86 class lexicographic_reverse : public std::binary_function<T1, T2, bool>
92 lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
95 operator()(const std::pair<T1, T2>& p1, const std::pair<T1, T2>& p2) const
97 if (comp(p2.first, p1.first))
100 if (comp(p1.first, p2.first))
104 return p2.second < p1.second;
109 * @brief Splits several sorted sequences at a certain global rank,
110 * resulting in a splitting point for each sequence.
111 * The sequences are passed via a sequence of random-access
112 * iterator pairs, none of the sequences may be empty. If there
113 * are several equal elements across the split, the ones on the
114 * left side will be chosen from sequences with smaller number.
115 * @param begin_seqs Begin of the sequence of iterator pairs.
116 * @param end_seqs End of the sequence of iterator pairs.
117 * @param rank The global rank to partition at.
118 * @param begin_offsets A random-access sequence begin where the
119 * result will be stored in. Each element of the sequence is an
120 * iterator that points to the first element on the greater part of
121 * the respective sequence.
122 * @param comp The ordering functor, defaults to std::less<T>.
124 template<typename RanSeqs, typename RankType, typename RankIterator, typename Comparator>
126 multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
127 RankIterator begin_offsets,
128 Comparator comp = std::less<
129 typename std::iterator_traits<typename std::iterator_traits<RanSeqs>::value_type::first_type>::value_type>()) // std::less<T>
131 _GLIBCXX_CALL(end_seqs - begin_seqs)
133 typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It;
134 typedef typename std::iterator_traits<It>::difference_type difference_type;
135 typedef typename std::iterator_traits<It>::value_type T;
137 lexicographic<T, int, Comparator> lcomp(comp);
138 lexicographic_reverse<T, int, Comparator> lrcomp(comp);
140 // Number of sequences, number of elements in total (possibly
141 // including padding).
142 difference_type m = std::distance(begin_seqs, end_seqs), N = 0, nmax, n, r;
144 for (int i = 0; i < m; i++)
145 N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
149 for (int i = 0; i < m; i++)
150 begin_offsets[i] = begin_seqs[i].second; // Very end.
154 _GLIBCXX_PARALLEL_ASSERT(m != 0 && N != 0 && rank >= 0 && rank < N);
156 difference_type* ns = new difference_type[m];
157 difference_type* a = new difference_type[m];
158 difference_type* b = new difference_type[m];
161 ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
163 for (int i = 0; i < m; i++)
165 ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
166 nmax = std::max(nmax, ns[i]);
171 // Pad all lists to this length, at least as long as any ns[i],
172 // equality iff nmax = 2^k - 1.
175 // From now on, including padding.
178 for (int i = 0; i < m; i++)
186 // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
188 #define S(i) (begin_seqs[i].first)
190 // Initial partition.
191 std::vector<std::pair<T, int> > sample;
193 for (int i = 0; i < m; i++)
194 if (n < ns[i]) //sequence long enough
195 sample.push_back(std::make_pair(S(i)[n], i));
196 __gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
198 for (int i = 0; i < m; i++) //conceptual infinity
199 if (n >= ns[i]) //sequence too short, conceptual infinity
200 sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
202 difference_type localrank = rank * m / N ;
205 for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++)
206 a[sample[j].second] += n + 1;
208 b[sample[j].second] -= n + 1;
210 // Further refinement.
215 int lmax_seq = -1; // to avoid warning
216 const T* lmax = NULL; // impossible to avoid the warning?
217 for (int i = 0; i < m; i++)
223 lmax = &(S(i)[a[i] - 1]);
228 // Max, favor rear sequences.
229 if (!comp(S(i)[a[i] - 1], *lmax))
231 lmax = &(S(i)[a[i] - 1]);
239 for (i = 0; i < m; i++)
241 difference_type middle = (b[i] + a[i]) / 2;
242 if (lmax && middle < ns[i] &&
243 lcomp(std::make_pair(S(i)[middle], i), std::make_pair(*lmax, lmax_seq)))
244 a[i] = std::min(a[i] + n + 1, ns[i]);
249 difference_type leftsize = 0, total = 0;
250 for (int i = 0; i < m; i++)
252 leftsize += a[i] / (n + 1);
253 total += l / (n + 1);
256 difference_type skew = static_cast<difference_type>(static_cast<uint64>(total) * rank / N - leftsize);
260 // Move to the left, find smallest.
261 std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
263 for (int i = 0; i < m; i++)
265 pq.push(std::make_pair(S(i)[b[i]], i));
267 for (; skew != 0 && !pq.empty(); skew--)
269 int source = pq.top().second;
272 a[source] = std::min(a[source] + n + 1, ns[source]);
275 if (b[source] < ns[source])
276 pq.push(std::make_pair(S(source)[b[source]], source));
281 // Move to the right, find greatest.
282 std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic<T, int, Comparator> > pq(lcomp);
284 for (int i = 0; i < m; i++)
286 pq.push(std::make_pair(S(i)[a[i] - 1], i));
288 for (; skew != 0; skew++)
290 int source = pq.top().second;
297 pq.push(std::make_pair(S(source)[a[source] - 1], source));
303 // a[i] == b[i] in most cases, except when a[i] has been clamped
304 // because of having reached the boundary
306 // Now return the result, calculate the offset.
308 // Compare the keys on both edges of the border.
310 // Maximum of left edge, minimum of right edge.
311 bool maxleftset = false, minrightset = false;
312 T maxleft, minright; // Impossible to avoid the warning?
313 for (int i = 0; i < m; i++)
319 maxleft = S(i)[a[i] - 1];
324 // Max, favor rear sequences.
325 if (!comp(S(i)[a[i] - 1], maxleft))
326 maxleft = S(i)[a[i] - 1];
333 minright = S(i)[b[i]];
338 // Min, favor fore sequences.
339 if (comp(S(i)[b[i]], minright))
340 minright = S(i)[b[i]];
346 for (int i = 0; i < m; i++)
347 begin_offsets[i] = S(i) + a[i];
357 * @brief Selects the element at a certain global rank from several
360 * The sequences are passed via a sequence of random-access
361 * iterator pairs, none of the sequences may be empty.
362 * @param begin_seqs Begin of the sequence of iterator pairs.
363 * @param end_seqs End of the sequence of iterator pairs.
364 * @param rank The global rank to partition at.
365 * @param offset The rank of the selected element in the global
366 * subsequence of elements equal to the selected element. If the
367 * selected element is unique, this number is 0.
368 * @param comp The ordering functor, defaults to std::less.
370 template<typename T, typename RanSeqs, typename RankType, typename Comparator>
372 multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
373 RankType& offset, Comparator comp = std::less<T>())
375 _GLIBCXX_CALL(end_seqs - begin_seqs)
377 typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It;
378 typedef typename std::iterator_traits<It>::difference_type difference_type;
380 lexicographic<T, int, Comparator> lcomp(comp);
381 lexicographic_reverse<T, int, Comparator> lrcomp(comp);
383 // Number of sequences, number of elements in total (possibly
384 // including padding).
385 difference_type m = std::distance(begin_seqs, end_seqs);
386 difference_type N = 0;
387 difference_type nmax, n, r;
389 for (int i = 0; i < m; i++)
390 N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
392 if (m == 0 || N == 0 || rank < 0 || rank >= N)
394 // Result undefined when there is no data or rank is outside bounds.
395 throw std::exception();
399 difference_type* ns = new difference_type[m];
400 difference_type* a = new difference_type[m];
401 difference_type* b = new difference_type[m];
404 ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
406 for (int i = 0; i < m; i++)
408 ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
409 nmax = std::max(nmax, ns[i]);
414 // Pad all lists to this length, at least as long as any ns[i],
415 // equality iff nmax = 2^k - 1
418 // From now on, including padding.
421 for (int i = 0; i < m; i++)
429 // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
431 #define S(i) (begin_seqs[i].first)
433 // Initial partition.
434 std::vector<std::pair<T, int> > sample;
436 for (int i = 0; i < m; i++)
438 sample.push_back(std::make_pair(S(i)[n], i));
439 __gnu_sequential::sort(sample.begin(), sample.end(), lcomp, sequential_tag());
441 // Conceptual infinity.
442 for (int i = 0; i < m; i++)
444 sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
446 difference_type localrank = rank * m / N ;
449 for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++)
450 a[sample[j].second] += n + 1;
452 b[sample[j].second] -= n + 1;
454 // Further refinement.
459 const T* lmax = NULL;
460 for (int i = 0; i < m; i++)
466 lmax = &(S(i)[a[i] - 1]);
470 if (comp(*lmax, S(i)[a[i] - 1])) //max
471 lmax = &(S(i)[a[i] - 1]);
477 for (i = 0; i < m; i++)
479 difference_type middle = (b[i] + a[i]) / 2;
480 if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
481 a[i] = std::min(a[i] + n + 1, ns[i]);
486 difference_type leftsize = 0, total = 0;
487 for (int i = 0; i < m; i++)
489 leftsize += a[i] / (n + 1);
490 total += l / (n + 1);
493 difference_type skew = (unsigned long long)total * rank / N - leftsize;
497 // Move to the left, find smallest.
498 std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
500 for (int i = 0; i < m; i++)
502 pq.push(std::make_pair(S(i)[b[i]], i));
504 for (; skew != 0 && !pq.empty(); skew--)
506 int source = pq.top().second;
509 a[source] = std::min(a[source] + n + 1, ns[source]);
512 if (b[source] < ns[source])
513 pq.push(std::make_pair(S(source)[b[source]], source));
518 // Move to the right, find greatest.
519 std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic<T, int, Comparator> > pq(lcomp);
521 for (int i = 0; i < m; i++)
523 pq.push(std::make_pair(S(i)[a[i] - 1], i));
525 for (; skew != 0; skew++)
527 int source = pq.top().second;
534 pq.push(std::make_pair(S(source)[a[source] - 1], source));
540 // a[i] == b[i] in most cases, except when a[i] has been clamped
541 // because of having reached the boundary
543 // Now return the result, calculate the offset.
545 // Compare the keys on both edges of the border.
547 // Maximum of left edge, minimum of right edge.
548 bool maxleftset = false, minrightset = false;
550 // Impossible to avoid the warning?
552 for (int i = 0; i < m; i++)
558 maxleft = S(i)[a[i] - 1];
564 if (comp(maxleft, S(i)[a[i] - 1]))
565 maxleft = S(i)[a[i] - 1];
572 minright = S(i)[b[i]];
578 if (comp(S(i)[b[i]], minright))
579 minright = S(i)[b[i]];
584 // Minright is the splitter, in any case.
586 if (!maxleftset || comp(minright, maxleft))
588 // Good luck, everything is split unambigiously.
593 // We have to calculate an offset.
596 for (int i = 0; i < m; i++)
598 difference_type lb = std::lower_bound(S(i), S(i) + ns[i], minright,