From edcffb7d0655507d3126704c71661e8c546122ef Mon Sep 17 00:00:00 2001 From: Julien Schueller Date: Thu, 6 Dec 2018 10:14:42 +0100 Subject: [PATCH] Use BSD qsort_r implementation Fixes #239 --- src/util/nlopt-util.h | 2 +- src/util/qsort_r.c | 227 ++++++++++++++++++++++++++++-------------- 2 files changed, 156 insertions(+), 73 deletions(-) diff --git a/src/util/nlopt-util.h b/src/util/nlopt-util.h index b7f8ab6..fe89a46 100644 --- a/src/util/nlopt-util.h +++ b/src/util/nlopt-util.h @@ -50,7 +50,7 @@ extern "C" { int nlopt_istiny(double x); int nlopt_isnan(double x); -/* re-entrant qsort */ +/* re-entrant qsort, uses the BSD convention */ extern void nlopt_qsort_r(void *base_, size_t nmemb, size_t size, void *thunk, int (*compar) (void *, const void *, const void *)); /* seconds timer */ diff --git a/src/util/qsort_r.c b/src/util/qsort_r.c index 2eb56d5..bf98b6b 100644 --- a/src/util/qsort_r.c +++ b/src/util/qsort_r.c @@ -19,87 +19,170 @@ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - #include "nlopt-util.h" +#include + +typedef int cmp_t(void *, const void *, const void *); + +static inline char *med3(char *, char *, char *, cmp_t *, void *); + +#define MIN(a, b) ((a) < (b) ? a : b) + +/* + * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". + */ + +static inline void +swapfunc(char *a, char *b, size_t es) +{ + char t; + + do { + t = *a; + *a++ = *b; + *b++ = t; + } while (--es > 0); +} + +#define vecswap(a, b, n) \ + if ((n) > 0) swapfunc(a, b, n) -/* Simple replacement for the BSD qsort_r function (re-entrant sorting), - if it is not available. +#define CMP(t, x, y) (cmp((t), (x), (y))) - (glibc 2.8 included a qsort_r function as well, but totally - *%&$#-ed things up by gratuitously changing the argument order, in - such a way as to allow code using the BSD ordering to compile but - die a flaming death at runtime. Damn them all to Hell, I'll just - use my own implementation.) +static inline char * +med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk) +{ + return CMP(thunk, a, b) < 0 ? + (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) + :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); +} + + +void qsort_r_fallback(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) +{ + char *pa, *pb, *pc, *pd, *pl, *pm, *pn; + size_t d1, d2; + int cmp_result; + int swap_cnt; + +loop: + swap_cnt = 0; + if (n < 7) { + for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) + for (pl = pm; + pl > (char *)a && CMP(thunk, pl - es, pl) > 0; + pl -= es) + swapfunc(pl, pl - es, es); + return; + } + pm = (char *)a + (n / 2) * es; + if (n > 7) { + pl = a; + pn = (char *)a + (n - 1) * es; + if (n > 40) { + size_t d = (n / 8) * es; + + pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); + pm = med3(pm - d, pm, pm + d, cmp, thunk); + pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); + } + pm = med3(pl, pm, pn, cmp, thunk); + } + swapfunc(a, pm, es); + pa = pb = (char *)a + es; + + pc = pd = (char *)a + (n - 1) * es; + for (;;) { + while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { + if (cmp_result == 0) { + swap_cnt = 1; + swapfunc(pa, pb, es); + pa += es; + } + pb += es; + } + while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { + if (cmp_result == 0) { + swap_cnt = 1; + swapfunc(pc, pd, es); + pd -= es; + } + pc -= es; + } + if (pb > pc) + break; + swapfunc(pb, pc, es); + swap_cnt = 1; + pb += es; + pc -= es; + } + if (swap_cnt == 0) { /* Switch to insertion sort */ + for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) + for (pl = pm; + pl > (char *)a && CMP(thunk, pl - es, pl) > 0; + pl -= es) + swapfunc(pl, pl - es, es); + return; + } + + pn = (char *)a + n * es; + d1 = MIN(pa - (char *)a, pb - pa); + vecswap(a, pb - d1, d1); + d1 = MIN(pd - pc, pn - pd - es); + vecswap(pb, pn - d1, d1); + + d1 = pb - pa; + d2 = pd - pc; + if (d1 <= d2) { + /* Recurse on left partition, then iterate on right partition */ + if (d1 > es) { + qsort_r_fallback(a, d1 / es, es, thunk, cmp); + } + if (d2 > es) { + /* Iterate rather than recurse to save stack space */ + /* qsort(pn - d2, d2 / es, es, cmp); */ + a = pn - d2; + n = d2 / es; + goto loop; + } + } else { + /* Recurse on right partition, then iterate on left partition */ + if (d2 > es) { + qsort_r_fallback(pn - d2, d2 / es, es, thunk, cmp); + } + if (d1 > es) { + /* Iterate rather than recurse to save stack space */ + /* qsort(a, d1 / es, es, cmp); */ + n = d1 / es; + goto loop; + } + } +} - (Actually, with glibc 2.3.6 on my Intel Core Duo, my implementation - below seems to be significantly faster than qsort. Go figure.) -*/ +/* these are required for GNU api compatibility as nlopt uses the BSD arguments ordering */ +typedef struct { + cmp_t* compar; + void *thunk; +} qsort_wrapper; -#ifndef HAVE_QSORT_R_damn_it_use_my_own -/* swap size bytes between a_ and b_ */ -static void swap(void *a_, void *b_, size_t size) +static int qsort_cmp_wrap(const void *a, const void *b, void *thunk) { - if (a_ == b_) - return; - { - size_t i, nlong = size / sizeof(long); - long *a = (long *) a_, *b = (long *) b_; - for (i = 0; i < nlong; ++i) { - long c = a[i]; - a[i] = b[i]; - b[i] = c; - } - a_ = (void *) (a + nlong); - b_ = (void *) (b + nlong); - } - { - size_t i; - char *a = (char *) a_, *b = (char *) b_; - size = size % sizeof(long); - for (i = 0; i < size; ++i) { - char c = a[i]; - a[i] = b[i]; - b[i] = c; - } - } + qsort_wrapper *wrap = (qsort_wrapper *) thunk; + return (*wrap->compar)(wrap->thunk, a, b); } -#endif /* HAVE_QSORT_R */ -void nlopt_qsort_r(void *base_, size_t nmemb, size_t size, void *thunk, int (*compar) (void *, const void *, const void *)) +void nlopt_qsort_r(void *base_, size_t nmemb, size_t size, void *thunk, cmp_t* compar) { -#ifdef HAVE_QSORT_R_damn_it_use_my_own - /* Even if we could detect glibc vs. BSD by appropriate - macrology, there is no way to make the calls compatible - without writing a wrapper for the compar function...screw - this. */ +#if defined(HAVE_QSORT_R) && (defined(__APPLE__) || defined(__FreeBSD__)) qsort_r(base_, nmemb, size, thunk, compar); +#elif defined(HAVE_QSORT_R) && defined(__linux__) + qsort_wrapper wrapper; + wrapper.compar = compar; + wrapper.thunk = thunk; + qsort_r(base_, nmemb, size, qsort_cmp_wrap, &wrapper); +#elif defined(_WIN32) + qsort_s(base_, nmemb, size, compar, thunk); #else - char *base = (char *) base_; - if (nmemb < 10) { /* use O(nmemb^2) algorithm for small enough nmemb */ - size_t i, j; - for (i = 0; i + 1 < nmemb; ++i) - for (j = i + 1; j < nmemb; ++j) - if (compar(thunk, base + i * size, base + j * size) > 0) - swap(base + i * size, base + j * size, size); - } else { - size_t i, pivot, npart; - /* pick median of first/middle/last elements as pivot */ - { - const char *a = base, *b = base + (nmemb / 2) * size, *c = base + (nmemb - 1) * size; - pivot = compar(thunk, a, b) < 0 ? (compar(thunk, b, c) < 0 ? nmemb / 2 : (compar(thunk, a, c) < 0 ? nmemb - 1 : 0)) - : (compar(thunk, a, c) < 0 ? 0 : (compar(thunk, b, c) < 0 ? nmemb - 1 : nmemb / 2)); - } - /* partition array */ - swap(base + pivot * size, base + (nmemb - 1) * size, size); - pivot = (nmemb - 1) * size; - for (i = npart = 0; i < nmemb - 1; ++i) - if (compar(thunk, base + i * size, base + pivot) <= 0) - swap(base + i * size, base + (npart++) * size, size); - swap(base + npart * size, base + pivot, size); - /* recursive sort of two partitions */ - nlopt_qsort_r(base, npart, size, thunk, compar); - npart++; /* don't need to sort pivot */ - nlopt_qsort_r(base + npart * size, nmemb - npart, size, thunk, compar); - } -#endif /* !HAVE_QSORT_R */ + qsort_r_fallback(base_, nmemb, size, thunk, compar); +#endif } -- 2.30.2