SDL  2.0
SDL_qsort.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 
22 #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
23 #define SDL_DISABLE_ANALYZE_MACROS 1
24 #endif
25 
26 #include "../SDL_internal.h"
27 
28 #include "SDL_stdinc.h"
29 #include "SDL_assert.h"
30 
31 #if defined(HAVE_QSORT)
32 void
33 SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
34 {
35  qsort(base, nmemb, size, compare);
36 }
37 
38 #else
39 
40 #ifdef assert
41 #undef assert
42 #endif
43 #define assert SDL_assert
44 #ifdef malloc
45 #undef malloc
46 #endif
47 #define malloc SDL_malloc
48 #ifdef free
49 #undef free
50 #endif
51 #define free SDL_free
52 #ifdef memcpy
53 #undef memcpy
54 #endif
55 #define memcpy SDL_memcpy
56 #ifdef memmove
57 #undef memmove
58 #endif
59 #define memmove SDL_memmove
60 #ifdef qsortG
61 #undef qsortG
62 #endif
63 #define qsortG SDL_qsort
64 
65 /*
66 This code came from Gareth McCaughan, under the zlib license.
67 Specifically this: https://www.mccaughan.org.uk/software/qsort.c-1.14
68 
69 Everything below this comment until the HAVE_QSORT #endif was from Gareth
70 (any minor changes will be noted inline).
71 
72 Thank you to Gareth for relicensing this code under the zlib license for our
73 benefit!
74 
75 --ryan.
76 */
77 
78 /* This is a drop-in replacement for the C library's |qsort()| routine.
79  *
80  * It is intended for use where you know or suspect that your
81  * platform's qsort is bad. If that isn't the case, then you
82  * should probably use the qsort your system gives you in preference
83  * to mine -- it will likely have been tested and tuned better.
84  *
85  * Features:
86  * - Median-of-three pivoting (and more)
87  * - Truncation and final polishing by a single insertion sort
88  * - Early truncation when no swaps needed in pivoting step
89  * - Explicit recursion, guaranteed not to overflow
90  * - A few little wrinkles stolen from the GNU |qsort()|.
91  * (For the avoidance of doubt, no code was stolen, only
92  * broad ideas.)
93  * - separate code for non-aligned / aligned / word-size objects
94  *
95  * Earlier releases of this code used an idiosyncratic licence
96  * I wrote myself, because I'm an idiot. The code is now released
97  * under the "zlib/libpng licence"; you will find the actual
98  * terms in the next comment. I request (but do not require)
99  * that if you make any changes beyond the name of the exported
100  * routine and reasonable tweaks to the TRUNC_* and
101  * PIVOT_THRESHOLD values, you modify the _ID string so as
102  * to make it clear that you have changed the code.
103  *
104  * If you find problems with this code, or find ways of
105  * making it significantly faster, please let me know!
106  * My e-mail address, valid as of early 2016 and for the
107  * foreseeable future, is
108  * gareth.mccaughan@pobox.com
109  * Thanks!
110  *
111  * Gareth McCaughan
112  */
113 
114 /* Copyright (c) 1998-2016 Gareth McCaughan
115  *
116  * This software is provided 'as-is', without any express or implied
117  * warranty. In no event will the authors be held liable for any
118  * damages arising from the use of this software.
119  *
120  * Permission is granted to anyone to use this software for any purpose,
121  * including commercial applications, and to alter it and redistribute it
122  * freely, subject to the following restrictions:
123  *
124  * 1. The origin of this software must not be misrepresented;
125  * you must not claim that you wrote the original software.
126  * If you use this software in a product, an acknowledgment
127  * in the product documentation would be appreciated but
128  * is not required.
129  *
130  * 2. Altered source versions must be plainly marked as such,
131  * and must not be misrepresented as being the original software.
132  *
133  * 3. This notice may not be removed or altered from any source
134  * distribution.
135  */
136 
137 /* Revision history since release:
138  * 1998-03-19 v1.12 First release I have any records of.
139  * 2007-09-02 v1.13 Fix bug kindly reported by Dan Bodoh
140  * (premature termination of recursion).
141  * Add a few clarifying comments.
142  * Minor improvements to debug output.
143  * 2016-02-21 v1.14 Replace licence with 2-clause BSD,
144  * and clarify a couple of things in
145  * comments. No code changes.
146  */
147 
148 /* BEGIN SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
149 #if 0
150 #include <assert.h>
151 #include <stdlib.h>
152 #include <string.h>
153 
154 #define DEBUG_QSORT
155 
156 static char _ID[]="<qsort.c gjm 1.14 2016-02-21>";
157 #endif
158 /* END SDL CHANGE ... commented this out with an #if 0 block. --ryan. */
159 
160 /* How many bytes are there per word? (Must be a power of 2,
161  * and must in fact equal sizeof(int).)
162  */
163 #define WORD_BYTES sizeof(int)
164 
165 /* How big does our stack need to be? Answer: one entry per
166  * bit in a |size_t|.
167  */
168 #define STACK_SIZE (8*sizeof(size_t))
169 
170 /* Different situations have slightly different requirements,
171  * and we make life epsilon easier by using different truncation
172  * points for the three different cases.
173  * So far, I have tuned TRUNC_words and guessed that the same
174  * value might work well for the other two cases. Of course
175  * what works well on my machine might work badly on yours.
176  */
177 #define TRUNC_nonaligned 12
178 #define TRUNC_aligned 12
179 #define TRUNC_words 12*WORD_BYTES /* nb different meaning */
180 
181 /* We use a simple pivoting algorithm for shortish sub-arrays
182  * and a more complicated one for larger ones. The threshold
183  * is PIVOT_THRESHOLD.
184  */
185 #define PIVOT_THRESHOLD 40
186 
187 typedef struct { char * first; char * last; } stack_entry;
188 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
189 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
190 #define doLeft {first=ffirst;llast=last;continue;}
191 #define doRight {ffirst=first;last=llast;continue;}
192 #define pop {if (--stacktop<0) break;\
193  first=ffirst=stack[stacktop].first;\
194  last=llast=stack[stacktop].last;\
195  continue;}
196 
197 /* Some comments on the implementation.
198  * 1. When we finish partitioning the array into "low"
199  * and "high", we forget entirely about short subarrays,
200  * because they'll be done later by insertion sort.
201  * Doing lots of little insertion sorts might be a win
202  * on large datasets for locality-of-reference reasons,
203  * but it makes the code much nastier and increases
204  * bookkeeping overhead.
205  * 2. We always save the shorter and get to work on the
206  * longer. This guarantees that every time we push
207  * an item onto the stack its size is <= 1/2 of that
208  * of its parent; so the stack can't need more than
209  * log_2(max-array-size) entries.
210  * 3. We choose a pivot by looking at the first, last
211  * and middle elements. We arrange them into order
212  * because it's easy to do that in conjunction with
213  * choosing the pivot, and it makes things a little
214  * easier in the partitioning step. Anyway, the pivot
215  * is the middle of these three. It's still possible
216  * to construct datasets where the algorithm takes
217  * time of order n^2, but it simply never happens in
218  * practice.
219  * 3' Newsflash: On further investigation I find that
220  * it's easy to construct datasets where median-of-3
221  * simply isn't good enough. So on large-ish subarrays
222  * we do a more sophisticated pivoting: we take three
223  * sets of 3 elements, find their medians, and then
224  * take the median of those.
225  * 4. We copy the pivot element to a separate place
226  * because that way we can always do our comparisons
227  * directly against a pointer to that separate place,
228  * and don't have to wonder "did we move the pivot
229  * element?". This makes the inner loop better.
230  * 5. It's possible to make the pivoting even more
231  * reliable by looking at more candidates when n
232  * is larger. (Taking this to its logical conclusion
233  * results in a variant of quicksort that doesn't
234  * have that n^2 worst case.) However, the overhead
235  * from the extra bookkeeping means that it's just
236  * not worth while.
237  * 6. This is pretty clean and portable code. Here are
238  * all the potential portability pitfalls and problems
239  * I know of:
240  * - In one place (the insertion sort) I construct
241  * a pointer that points just past the end of the
242  * supplied array, and assume that (a) it won't
243  * compare equal to any pointer within the array,
244  * and (b) it will compare equal to a pointer
245  * obtained by stepping off the end of the array.
246  * These might fail on some segmented architectures.
247  * - I assume that there are 8 bits in a |char| when
248  * computing the size of stack needed. This would
249  * fail on machines with 9-bit or 16-bit bytes.
250  * - I assume that if |((int)base&(sizeof(int)-1))==0|
251  * and |(size&(sizeof(int)-1))==0| then it's safe to
252  * get at array elements via |int*|s, and that if
253  * actually |size==sizeof(int)| as well then it's
254  * safe to treat the elements as |int|s. This might
255  * fail on systems that convert pointers to integers
256  * in non-standard ways.
257  * - I assume that |8*sizeof(size_t)<=INT_MAX|. This
258  * would be false on a machine with 8-bit |char|s,
259  * 16-bit |int|s and 4096-bit |size_t|s. :-)
260  */
261 
262 /* The recursion logic is the same in each case.
263  * We keep chopping up until we reach subarrays of size
264  * strictly less than Trunc; we leave these unsorted. */
265 #define Recurse(Trunc) \
266  { size_t l=last-ffirst,r=llast-first; \
267  if (l<Trunc) { \
268  if (r>=Trunc) doRight \
269  else pop \
270  } \
271  else if (l<=r) { pushLeft; doRight } \
272  else if (r>=Trunc) { pushRight; doLeft }\
273  else doLeft \
274  }
275 
276 /* and so is the pivoting logic (note: last is inclusive): */
277 #define Pivot(swapper,sz) \
278  if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
279  else { \
280  if (compare(first,mid)<0) { \
281  if (compare(mid,last)>0) { \
282  swapper(mid,last); \
283  if (compare(first,mid)>0) swapper(first,mid);\
284  } \
285  } \
286  else { \
287  if (compare(mid,last)>0) swapper(first,last)\
288  else { \
289  swapper(first,mid); \
290  if (compare(mid,last)>0) swapper(mid,last);\
291  } \
292  } \
293  first+=sz; last-=sz; \
294  }
295 
296 #ifdef DEBUG_QSORT
297 #include <stdio.h>
298 #endif
299 
300 /* and so is the partitioning logic: */
301 #define Partition(swapper,sz) { \
302  do { \
303  while (compare(first,pivot)<0) first+=sz; \
304  while (compare(pivot,last)<0) last-=sz; \
305  if (first<last) { \
306  swapper(first,last); \
307  first+=sz; last-=sz; } \
308  else if (first==last) { first+=sz; last-=sz; break; }\
309  } while (first<=last); \
310 }
311 
312 /* and so is the pre-insertion-sort operation of putting
313  * the smallest element into place as a sentinel.
314  * Doing this makes the inner loop nicer. I got this
315  * idea from the GNU implementation of qsort().
316  * We find the smallest element from the first |nmemb|,
317  * or the first |limit|, whichever is smaller;
318  * therefore we must have ensured that the globally smallest
319  * element is in the first |limit|.
320  */
321 #define PreInsertion(swapper,limit,sz) \
322  first=base; \
323  last=first + ((nmemb>limit ? limit : nmemb)-1)*sz;\
324  while (last!=base) { \
325  if (compare(first,last)>0) first=last; \
326  last-=sz; } \
327  if (first!=base) swapper(first,(char*)base);
328 
329 /* and so is the insertion sort, in the first two cases: */
330 #define Insertion(swapper) \
331  last=((char*)base)+nmemb*size; \
332  for (first=((char*)base)+size;first!=last;first+=size) { \
333  char *test; \
334  /* Find the right place for |first|. \
335  * My apologies for var reuse. */ \
336  for (test=first-size;compare(test,first)>0;test-=size) ; \
337  test+=size; \
338  if (test!=first) { \
339  /* Shift everything in [test,first) \
340  * up by one, and place |first| \
341  * where |test| is. */ \
342  memcpy(pivot,first,size); \
343  memmove(test+size,test,first-test); \
344  memcpy(test,pivot,size); \
345  } \
346  }
347 
348 #define SWAP_nonaligned(a,b) { \
349  register char *aa=(a),*bb=(b); \
350  register size_t sz=size; \
351  do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
352 
353 #define SWAP_aligned(a,b) { \
354  register int *aa=(int*)(a),*bb=(int*)(b); \
355  register size_t sz=size; \
356  do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
357 
358 #define SWAP_words(a,b) { \
359  register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
360 
361 /* ---------------------------------------------------------------------- */
362 
363 static char * pivot_big(char *first, char *mid, char *last, size_t size,
364  int compare(const void *, const void *)) {
365  size_t d=(((last-first)/size)>>3)*size;
366 #ifdef DEBUG_QSORT
367 fprintf(stderr, "pivot_big: first=%p last=%p size=%lu n=%lu\n", first, (unsigned long)last, size, (unsigned long)((last-first+1)/size));
368 #endif
369  char *m1,*m2,*m3;
370  { char *a=first, *b=first+d, *c=first+2*d;
371 #ifdef DEBUG_QSORT
372 fprintf(stderr,"< %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
373 #endif
374  m1 = compare(a,b)<0 ?
375  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
376  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
377  }
378  { char *a=mid-d, *b=mid, *c=mid+d;
379 #ifdef DEBUG_QSORT
380 fprintf(stderr,". %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
381 #endif
382  m2 = compare(a,b)<0 ?
383  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
384  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
385  }
386  { char *a=last-2*d, *b=last-d, *c=last;
387 #ifdef DEBUG_QSORT
388 fprintf(stderr,"> %d %d %d @ %p %p %p\n",*(int*)a,*(int*)b,*(int*)c, a,b,c);
389 #endif
390  m3 = compare(a,b)<0 ?
391  (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
392  : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
393  }
394 #ifdef DEBUG_QSORT
395 fprintf(stderr,"-> %d %d %d @ %p %p %p\n",*(int*)m1,*(int*)m2,*(int*)m3, m1,m2,m3);
396 #endif
397  return compare(m1,m2)<0 ?
398  (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
399  : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
400 }
401 
402 /* ---------------------------------------------------------------------- */
403 
404 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
405  int (*compare)(const void *, const void *)) {
406 
407  stack_entry stack[STACK_SIZE];
408  int stacktop=0;
409  char *first,*last;
410  char *pivot=malloc(size);
411  size_t trunc=TRUNC_nonaligned*size;
412  assert(pivot!=0);
413 
414  first=(char*)base; last=first+(nmemb-1)*size;
415 
416  if ((size_t)(last-first)>=trunc) {
417  char *ffirst=first, *llast=last;
418  while (1) {
419  /* Select pivot */
420  { char * mid=first+size*((last-first)/size >> 1);
421  Pivot(SWAP_nonaligned,size);
422  memcpy(pivot,mid,size);
423  }
424  /* Partition. */
426  /* Prepare to recurse/iterate. */
427  Recurse(trunc)
428  }
429  }
432  free(pivot);
433 }
434 
435 static void qsort_aligned(void *base, size_t nmemb, size_t size,
436  int (*compare)(const void *, const void *)) {
437 
438  stack_entry stack[STACK_SIZE];
439  int stacktop=0;
440  char *first,*last;
441  char *pivot=malloc(size);
442  size_t trunc=TRUNC_aligned*size;
443  assert(pivot!=0);
444 
445  first=(char*)base; last=first+(nmemb-1)*size;
446 
447  if ((size_t)(last-first)>=trunc) {
448  char *ffirst=first,*llast=last;
449  while (1) {
450  /* Select pivot */
451  { char * mid=first+size*((last-first)/size >> 1);
452  Pivot(SWAP_aligned,size);
453  memcpy(pivot,mid,size);
454  }
455  /* Partition. */
456  Partition(SWAP_aligned,size);
457  /* Prepare to recurse/iterate. */
458  Recurse(trunc)
459  }
460  }
463  free(pivot);
464 }
465 
466 static void qsort_words(void *base, size_t nmemb,
467  int (*compare)(const void *, const void *)) {
468 
469  stack_entry stack[STACK_SIZE];
470  int stacktop=0;
471  char *first,*last;
472  char *pivot=malloc(WORD_BYTES);
473  assert(pivot!=0);
474 
475  first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
476 
477  if (last-first>=TRUNC_words) {
478  char *ffirst=first, *llast=last;
479  while (1) {
480 #ifdef DEBUG_QSORT
481 fprintf(stderr,"Doing %d:%d: ",
482  (first-(char*)base)/WORD_BYTES,
483  (last-(char*)base)/WORD_BYTES);
484 #endif
485  /* Select pivot */
486  { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
488  *(int*)pivot=*(int*)mid;
489 #ifdef DEBUG_QSORT
490 fprintf(stderr,"pivot = %p = #%lu = %d\n", mid, (unsigned long)(((int*)mid)-((int*)base)), *(int*)mid);
491 #endif
492  }
493  /* Partition. */
495 #ifdef DEBUG_QSORT
496 fprintf(stderr, "after partitioning first=#%lu last=#%lu\n", (first-(char*)base)/4lu, (last-(char*)base)/4lu);
497 #endif
498  /* Prepare to recurse/iterate. */
500  }
501  }
503  /* Now do insertion sort. */
504  last=((char*)base)+nmemb*WORD_BYTES;
505  for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
506  /* Find the right place for |first|. My apologies for var reuse */
507  int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
508  *(int*)pivot=*(int*)first;
509  for (;compare(pl,pivot)>0;pr=pl,--pl) {
510  *pr=*pl; }
511  if (pr!=(int*)first) *pr=*(int*)pivot;
512  }
513  free(pivot);
514 }
515 
516 /* ---------------------------------------------------------------------- */
517 
518 extern void qsortG(void *base, size_t nmemb, size_t size,
519  int (*compare)(const void *, const void *)) {
520 
521  if (nmemb<=1) return;
522  if (((size_t)base|size)&(WORD_BYTES-1))
523  qsort_nonaligned(base,nmemb,size,compare);
524  else if (size!=WORD_BYTES)
525  qsort_aligned(base,nmemb,size,compare);
526  else
527  qsort_words(base,nmemb,compare);
528 }
529 
530 
531 #endif /* HAVE_QSORT */
532 
533 /* vi: set ts=4 sw=4 expandtab: */
534 
#define SWAP_aligned(a, b)
Definition: SDL_qsort.c:353
#define WORD_BYTES
Definition: SDL_qsort.c:163
#define TRUNC_aligned
Definition: SDL_qsort.c:178
Definition: SDL_qsort.c:187
#define SDL_qsort
const GLint * first
#define PreInsertion(swapper, limit, sz)
Definition: SDL_qsort.c:321
#define memcpy
Definition: SDL_qsort.c:55
#define Pivot(swapper, sz)
Definition: SDL_qsort.c:277
#define qsortG
Definition: SDL_qsort.c:63
#define SWAP_words(a, b)
Definition: SDL_qsort.c:358
#define STACK_SIZE
Definition: SDL_qsort.c:168
static char * pivot_big(char *first, char *mid, char *last, size_t size, int compare(const void *, const void *))
Definition: SDL_qsort.c:363
#define Recurse(Trunc)
Definition: SDL_qsort.c:265
SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char int SDL_PRINTF_FORMAT_STRING const char const char SDL_SCANF_FORMAT_STRING const char return SDL_ThreadFunction const char void return Uint32 return Uint32 SDL_AssertionHandler void SDL_SpinLock SDL_atomic_t int int return SDL_atomic_t return void void void return void return int return SDL_AudioSpec SDL_AudioSpec return int int return return int SDL_RWops int SDL_AudioSpec Uint8 ** d
static void qsort_nonaligned(void *base, size_t nmemb, size_t size, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:404
#define TRUNC_nonaligned
Definition: SDL_qsort.c:177
const GLubyte * c
#define SWAP_nonaligned(a, b)
Definition: SDL_qsort.c:348
static void qsort_words(void *base, size_t nmemb, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:466
GLsizeiptr size
char * last
Definition: SDL_qsort.c:187
#define TRUNC_words
Definition: SDL_qsort.c:179
#define Insertion(swapper)
Definition: SDL_qsort.c:330
#define free
Definition: SDL_qsort.c:51
#define Partition(swapper, sz)
Definition: SDL_qsort.c:301
GLboolean GLboolean GLboolean GLboolean a
GLboolean GLboolean GLboolean b
#define malloc
Definition: SDL_qsort.c:47
static void qsort_aligned(void *base, size_t nmemb, size_t size, int(*compare)(const void *, const void *))
Definition: SDL_qsort.c:435
#define assert
Definition: SDL_qsort.c:43