libstdc++
balanced_quicksort.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 
3 // Copyright (C) 2007-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/balanced_quicksort.h
26  * @brief Implementation of a dynamically load-balanced parallel quicksort.
27  *
28  * It works in-place and needs only logarithmic extra memory.
29  * The algorithm is similar to the one proposed in
30  *
31  * P. Tsigas and Y. Zhang.
32  * A simple, fast parallel implementation of quicksort and
33  * its performance evaluation on SUN enterprise 10000.
34  * In 11th Euromicro Conference on Parallel, Distributed and
35  * Network-Based Processing, page 372, 2003.
36  *
37  * This file is a GNU parallel extension to the Standard C++ Library.
38  */
39 
40 // Written by Johannes Singler.
41 
42 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
44 
46 #include <bits/stl_algo.h>
47 #include <bits/stl_function.h>
48 
49 #include <parallel/settings.h>
50 #include <parallel/partition.h>
51 #include <parallel/random_number.h>
52 #include <parallel/queue.h>
53 
54 #if _GLIBCXX_ASSERTIONS
55 #include <parallel/checkers.h>
56 #endif
57 
58 namespace __gnu_parallel
59 {
60  /** @brief Information local to one thread in the parallel quicksort run. */
61  template<typename _RAIter>
63  {
64  typedef std::iterator_traits<_RAIter> _TraitsType;
65  typedef typename _TraitsType::difference_type _DifferenceType;
66 
67  /** @brief Continuous part of the sequence, described by an
68  iterator pair. */
70 
71  /** @brief Initial piece to work on. */
72  _Piece _M_initial;
73 
74  /** @brief Work-stealing queue. */
76 
77  /** @brief Number of threads involved in this algorithm. */
79 
80  /** @brief Pointer to a counter of elements left over to sort. */
81  volatile _DifferenceType* _M_elements_leftover;
82 
83  /** @brief The complete sequence to sort. */
84  _Piece _M_global;
85 
86  /** @brief Constructor.
87  * @param __queue_size size of the work-stealing queue. */
88  _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
89  };
90 
91  /** @brief Balanced quicksort divide step.
92  * @param __begin Begin iterator of subsequence.
93  * @param __end End iterator of subsequence.
94  * @param __comp Comparator.
95  * @param __num_threads Number of threads that are allowed to work on
96  * this part.
97  * @pre @c (__end-__begin)>=1 */
98  template<typename _RAIter, typename _Compare>
99  typename std::iterator_traits<_RAIter>::difference_type
100  __qsb_divide(_RAIter __begin, _RAIter __end,
101  _Compare __comp, _ThreadIndex __num_threads)
102  {
103  _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
104 
105  typedef std::iterator_traits<_RAIter> _TraitsType;
106  typedef typename _TraitsType::value_type _ValueType;
107  typedef typename _TraitsType::difference_type _DifferenceType;
108 
109  _RAIter __pivot_pos =
110  __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
111  __end - 1, __comp);
112 
113 #if defined(_GLIBCXX_ASSERTIONS)
114  // Must be in between somewhere.
115  _DifferenceType __n = __end - __begin;
116 
117  _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
118  && !__comp(*(__begin + __n / 2),
119  *__pivot_pos))
120  || (!__comp(*__pivot_pos, *__begin)
121  && !__comp(*(__end - 1), *__pivot_pos))
122  || (!__comp(*__pivot_pos, *(__begin + __n / 2))
123  && !__comp(*__begin, *__pivot_pos))
124  || (!__comp(*__pivot_pos, *(__begin + __n / 2))
125  && !__comp(*(__end - 1), *__pivot_pos))
126  || (!__comp(*__pivot_pos, *(__end - 1))
127  && !__comp(*__begin, *__pivot_pos))
128  || (!__comp(*__pivot_pos, *(__end - 1))
129  && !__comp(*(__begin + __n / 2),
130  *__pivot_pos)));
131 #endif
132 
133  // Swap pivot value to end.
134  if (__pivot_pos != (__end - 1))
135  std::iter_swap(__pivot_pos, __end - 1);
136  __pivot_pos = __end - 1;
137 
139  __pred(__comp, *__pivot_pos);
140 
141  // Divide, returning __end - __begin - 1 in the worst case.
142  _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
143  __pred,
144  __num_threads);
145 
146  // Swap back pivot to middle.
147  std::iter_swap(__begin + __split_pos, __pivot_pos);
148  __pivot_pos = __begin + __split_pos;
149 
150 #if _GLIBCXX_ASSERTIONS
151  _RAIter __r;
152  for (__r = __begin; __r != __pivot_pos; ++__r)
153  _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
154  for (; __r != __end; ++__r)
155  _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
156 #endif
157 
158  return __split_pos;
159  }
160 
161  /** @brief Quicksort conquer step.
162  * @param __tls Array of thread-local storages.
163  * @param __begin Begin iterator of subsequence.
164  * @param __end End iterator of subsequence.
165  * @param __comp Comparator.
166  * @param __iam Number of the thread processing this function.
167  * @param __num_threads
168  * Number of threads that are allowed to work on this part. */
169  template<typename _RAIter, typename _Compare>
170  void
172  _RAIter __begin, _RAIter __end,
173  _Compare __comp,
174  _ThreadIndex __iam, _ThreadIndex __num_threads,
175  bool __parent_wait)
176  {
177  typedef std::iterator_traits<_RAIter> _TraitsType;
178  typedef typename _TraitsType::value_type _ValueType;
179  typedef typename _TraitsType::difference_type _DifferenceType;
180 
181  _DifferenceType __n = __end - __begin;
182 
183  if (__num_threads <= 1 || __n <= 1)
184  {
185  __tls[__iam]->_M_initial.first = __begin;
186  __tls[__iam]->_M_initial.second = __end;
187 
188  __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
189 
190  return;
191  }
192 
193  // Divide step.
194  _DifferenceType __split_pos =
195  __qsb_divide(__begin, __end, __comp, __num_threads);
196 
197 #if _GLIBCXX_ASSERTIONS
198  _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
199  __split_pos < (__end - __begin));
200 #endif
201 
203  __num_threads_leftside = std::max<_ThreadIndex>
204  (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
205  * __num_threads / __n));
206 
207 # pragma omp atomic
208  *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
209 
210  // Conquer step.
211 # pragma omp parallel num_threads(2)
212  {
213  bool __wait;
214  if(omp_get_num_threads() < 2)
215  __wait = false;
216  else
217  __wait = __parent_wait;
218 
219 # pragma omp sections
220  {
221 # pragma omp section
222  {
223  __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
224  __iam, __num_threads_leftside, __wait);
225  __wait = __parent_wait;
226  }
227  // The pivot_pos is left in place, to ensure termination.
228 # pragma omp section
229  {
230  __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
231  __iam + __num_threads_leftside,
232  __num_threads - __num_threads_leftside, __wait);
233  __wait = __parent_wait;
234  }
235  }
236  }
237  }
238 
239  /**
240  * @brief Quicksort step doing load-balanced local sort.
241  * @param __tls Array of thread-local storages.
242  * @param __comp Comparator.
243  * @param __iam Number of the thread processing this function.
244  */
245  template<typename _RAIter, typename _Compare>
246  void
248  _Compare& __comp, _ThreadIndex __iam,
249  bool __wait)
250  {
251  typedef std::iterator_traits<_RAIter> _TraitsType;
252  typedef typename _TraitsType::value_type _ValueType;
253  typedef typename _TraitsType::difference_type _DifferenceType;
255 
256  _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
257 
258  _DifferenceType
260  if (__base_case_n < 2)
261  __base_case_n = 2;
262  _ThreadIndex __num_threads = __tl._M_num_threads;
263 
264  // Every thread has its own random number generator.
265  _RandomNumber __rng(__iam + 1);
266 
267  _Piece __current = __tl._M_initial;
268 
269  _DifferenceType __elements_done = 0;
270 #if _GLIBCXX_ASSERTIONS
271  _DifferenceType __total_elements_done = 0;
272 #endif
273 
274  for (;;)
275  {
276  // Invariant: __current must be a valid (maybe empty) range.
277  _RAIter __begin = __current.first, __end = __current.second;
278  _DifferenceType __n = __end - __begin;
279 
280  if (__n > __base_case_n)
281  {
282  // Divide.
283  _RAIter __pivot_pos = __begin + __rng(__n);
284 
285  // Swap __pivot_pos value to end.
286  if (__pivot_pos != (__end - 1))
287  std::iter_swap(__pivot_pos, __end - 1);
288  __pivot_pos = __end - 1;
289 
291  <_Compare, _ValueType, _ValueType, bool>
292  __pred(__comp, *__pivot_pos);
293 
294  // Divide, leave pivot unchanged in last place.
295  _RAIter __split_pos1, __split_pos2;
296  __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
297  __pred);
298 
299  // Left side: < __pivot_pos; __right side: >= __pivot_pos.
300 #if _GLIBCXX_ASSERTIONS
301  _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
302  && __split_pos1 < __end);
303 #endif
304  // Swap pivot back to middle.
305  if (__split_pos1 != __pivot_pos)
306  std::iter_swap(__split_pos1, __pivot_pos);
307  __pivot_pos = __split_pos1;
308 
309  // In case all elements are equal, __split_pos1 == 0.
310  if ((__split_pos1 + 1 - __begin) < (__n >> 7)
311  || (__end - __split_pos1) < (__n >> 7))
312  {
313  // Very unequal split, one part smaller than one 128th
314  // elements not strictly larger than the pivot.
316  <_Compare, _ValueType, _ValueType, bool>, _ValueType>
318  <_Compare, _ValueType, _ValueType, bool>
319  (__comp, *__pivot_pos));
320 
321  // Find other end of pivot-equal range.
322  __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
323  __end, __pred);
324  }
325  else
326  // Only skip the pivot.
327  __split_pos2 = __split_pos1 + 1;
328 
329  // Elements equal to pivot are done.
330  __elements_done += (__split_pos2 - __split_pos1);
331 #if _GLIBCXX_ASSERTIONS
332  __total_elements_done += (__split_pos2 - __split_pos1);
333 #endif
334  // Always push larger part onto stack.
335  if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
336  {
337  // Right side larger.
338  if ((__split_pos2) != __end)
339  __tl._M_leftover_parts.push_front
340  (std::make_pair(__split_pos2, __end));
341 
342  //__current.first = __begin; //already set anyway
343  __current.second = __split_pos1;
344  continue;
345  }
346  else
347  {
348  // Left side larger.
349  if (__begin != __split_pos1)
350  __tl._M_leftover_parts.push_front(std::make_pair
351  (__begin, __split_pos1));
352 
353  __current.first = __split_pos2;
354  //__current.second = __end; //already set anyway
355  continue;
356  }
357  }
358  else
359  {
360  __gnu_sequential::sort(__begin, __end, __comp);
361  __elements_done += __n;
362 #if _GLIBCXX_ASSERTIONS
363  __total_elements_done += __n;
364 #endif
365 
366  // Prefer own stack, small pieces.
367  if (__tl._M_leftover_parts.pop_front(__current))
368  continue;
369 
370 # pragma omp atomic
371  *__tl._M_elements_leftover -= __elements_done;
372 
373  __elements_done = 0;
374 
375 #if _GLIBCXX_ASSERTIONS
376  double __search_start = omp_get_wtime();
377 #endif
378 
379  // Look for new work.
380  bool __successfully_stolen = false;
381  while (__wait && *__tl._M_elements_leftover > 0
382  && !__successfully_stolen
384  // Possible dead-lock.
385  && (omp_get_wtime() < (__search_start + 1.0))
386 #endif
387  )
388  {
389  _ThreadIndex __victim;
390  __victim = __rng(__num_threads);
391 
392  // Large pieces.
393  __successfully_stolen = (__victim != __iam)
394  && __tls[__victim]->_M_leftover_parts.pop_back(__current);
395  if (!__successfully_stolen)
396  __yield();
397 #if !defined(__ICC) && !defined(__ECC)
398 # pragma omp flush
399 #endif
400  }
401 
402 #if _GLIBCXX_ASSERTIONS
403  if (omp_get_wtime() >= (__search_start + 1.0))
404  {
405  sleep(1);
406  _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
407  < (__search_start + 1.0));
408  }
409 #endif
410  if (!__successfully_stolen)
411  {
412 #if _GLIBCXX_ASSERTIONS
413  _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
414 #endif
415  return;
416  }
417  }
418  }
419  }
420 
421  /** @brief Top-level quicksort routine.
422  * @param __begin Begin iterator of sequence.
423  * @param __end End iterator of sequence.
424  * @param __comp Comparator.
425  * @param __num_threads Number of threads that are allowed to work on
426  * this part.
427  */
428  template<typename _RAIter, typename _Compare>
429  void
430  __parallel_sort_qsb(_RAIter __begin, _RAIter __end,
431  _Compare __comp, _ThreadIndex __num_threads)
432  {
433  _GLIBCXX_CALL(__end - __begin)
434 
435  typedef std::iterator_traits<_RAIter> _TraitsType;
436  typedef typename _TraitsType::value_type _ValueType;
437  typedef typename _TraitsType::difference_type _DifferenceType;
439 
440  typedef _QSBThreadLocal<_RAIter> _TLSType;
441 
442  _DifferenceType __n = __end - __begin;
443 
444  if (__n <= 1)
445  return;
446 
447  // At least one element per processor.
448  if (__num_threads > __n)
449  __num_threads = static_cast<_ThreadIndex>(__n);
450 
451  // Initialize thread local storage
452  _TLSType** __tls = new _TLSType*[__num_threads];
453  _DifferenceType __queue_size = (__num_threads
454  * (_ThreadIndex)(__rd_log2(__n) + 1));
455  for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
456  __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
457 
458  // There can never be more than ceil(__rd_log2(__n)) ranges on the
459  // stack, because
460  // 1. Only one processor pushes onto the stack
461  // 2. The largest range has at most length __n
462  // 3. Each range is larger than half of the range remaining
463  volatile _DifferenceType __elements_leftover = __n;
464  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
465  {
466  __tls[__i]->_M_elements_leftover = &__elements_leftover;
467  __tls[__i]->_M_num_threads = __num_threads;
468  __tls[__i]->_M_global = std::make_pair(__begin, __end);
469 
470  // Just in case nothing is left to assign.
471  __tls[__i]->_M_initial = std::make_pair(__end, __end);
472  }
473 
474  // Main recursion call.
475  __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
476  __num_threads, true);
477 
478 #if _GLIBCXX_ASSERTIONS
479  // All stack must be empty.
480  _Piece __dummy;
481  for (_ThreadIndex __i = 1; __i < __num_threads; ++__i)
482  _GLIBCXX_PARALLEL_ASSERT(
483  !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
484 #endif
485 
486  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
487  delete __tls[__i];
488  delete[] __tls;
489  }
490 } // namespace __gnu_parallel
491 
492 #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
GNU parallel code for public use.
void __qsb_local_sort_with_helping(_QSBThreadLocal< _RAIter > **__tls, _Compare &__comp, _ThreadIndex __iam, bool __wait)
Quicksort step doing load-balanced local sort.
Parallel implementation of std::partition(), std::nth_element(), and std::partial_sort(). This file is a GNU parallel extension to the Standard C++ Library.
_ThreadIndex _M_num_threads
Number of threads involved in this algorithm.
Information local to one thread in the parallel quicksort run.
Includes the original header files concerned with iterators except for stream iterators. This file is a GNU parallel extension to the Standard C++ Library.
_Piece _M_global
The complete sequence to sort.
_Size __rd_log2(_Size __n)
Calculates the rounded-down logarithm of __n for base 2.
Runtime settings and tuning parameters, heuristics to decide whether to use parallelized algorithms...
Similar to std::binder2nd, but giving the argument types explicitly.
Lock-free double-ended queue. This file is a GNU parallel extension to the Standard C++ Library...
Routines for checking the correctness of algorithm results. This file is a GNU parallel extension to ...
_RestrictedBoundedConcurrentQueue< _Piece > _M_leftover_parts
Work-stealing queue.
uint16_t _ThreadIndex
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:123
_SequenceIndex sort_qsb_base_case_maximal_n
Maximal subsequence __length to switch to unbalanced __base case. Applies to std::sort with dynamical...
Definition: settings.h:241
#define _GLIBCXX_CALL(__n)
Macro to produce log message when entering a function.
void __qsb_conquer(_QSBThreadLocal< _RAIter > **__tls, _RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __iam, _ThreadIndex __num_threads, bool __parent_wait)
Quicksort conquer step.
Double-ended queue of bounded size, allowing lock-free atomic access. push_front() and pop_front() mu...
Definition: queue.h:52
Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...
void __yield()
Yield control to another thread, without waiting for the end of the time slice.
Subsequence description.
constexpr pair< typename __decay_and_strip< _T1 >::__type, typename __decay_and_strip< _T2 >::__type > make_pair(_T1 &&__x, _T2 &&__y)
A convenience wrapper for creating a pair from two objects.
Definition: stl_pair.h:276
Similar to std::binder1st, but giving the argument types explicitly.
Random number generator, based on the Mersenne twister.
Definition: random_number.h:42
static const _Settings & get()
Get the global settings.
#define _GLIBCXX_ASSERTIONS
Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. Should be switched on only locally...
std::iterator_traits< _RAIter >::difference_type __parallel_partition(_RAIter __begin, _RAIter __end, _Predicate __pred, _ThreadIndex __num_threads)
Parallel implementation of std::partition.
Definition: partition.h:56
_Piece _M_initial
Initial piece to work on.
volatile _DifferenceType * _M_elements_leftover
Pointer to a counter of elements left over to sort.
_RAIter __median_of_three_iterators(_RAIter __a, _RAIter __b, _RAIter __c, _Compare __comp)
Compute the median of three referenced elements, according to __comp.
void __parallel_sort_qsb(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Top-level quicksort routine.
std::iterator_traits< _RAIter >::difference_type __qsb_divide(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Balanced quicksort divide step.
Similar to std::unary_negate, but giving the argument types explicitly.
std::pair< _RAIter, _RAIter > _Piece
Continuous part of the sequence, described by an iterator pair.
_QSBThreadLocal(int __queue_size)
Constructor.