* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-
#include "nlopt-util.h"
+#include <stdlib.h>
+
+typedef int cmp_t(void *, const void *, const void *);
+
+static inline char *med3(char *, char *, char *, cmp_t *, void *);
+
+#define MIN(a, b) ((a) < (b) ? a : b)
+
+/*
+ * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function".
+ */
+
+static inline void
+swapfunc(char *a, char *b, size_t es)
+{
+ char t;
+
+ do {
+ t = *a;
+ *a++ = *b;
+ *b++ = t;
+ } while (--es > 0);
+}
+
+#define vecswap(a, b, n) \
+ if ((n) > 0) swapfunc(a, b, n)
-/* Simple replacement for the BSD qsort_r function (re-entrant sorting),
- if it is not available.
+#define CMP(t, x, y) (cmp((t), (x), (y)))
- (glibc 2.8 included a qsort_r function as well, but totally
- *%&$#-ed things up by gratuitously changing the argument order, in
- such a way as to allow code using the BSD ordering to compile but
- die a flaming death at runtime. Damn them all to Hell, I'll just
- use my own implementation.)
+static inline char *
+med3(char *a, char *b, char *c, cmp_t *cmp, void *thunk)
+{
+ return CMP(thunk, a, b) < 0 ?
+ (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a ))
+ :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
+}
+
+
+void qsort_r_fallback(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp)
+{
+ char *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+ size_t d1, d2;
+ int cmp_result;
+ int swap_cnt;
+
+loop:
+ swap_cnt = 0;
+ if (n < 7) {
+ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+ for (pl = pm;
+ pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+ pl -= es)
+ swapfunc(pl, pl - es, es);
+ return;
+ }
+ pm = (char *)a + (n / 2) * es;
+ if (n > 7) {
+ pl = a;
+ pn = (char *)a + (n - 1) * es;
+ if (n > 40) {
+ size_t d = (n / 8) * es;
+
+ pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk);
+ pm = med3(pm - d, pm, pm + d, cmp, thunk);
+ pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk);
+ }
+ pm = med3(pl, pm, pn, cmp, thunk);
+ }
+ swapfunc(a, pm, es);
+ pa = pb = (char *)a + es;
+
+ pc = pd = (char *)a + (n - 1) * es;
+ for (;;) {
+ while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) {
+ if (cmp_result == 0) {
+ swap_cnt = 1;
+ swapfunc(pa, pb, es);
+ pa += es;
+ }
+ pb += es;
+ }
+ while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) {
+ if (cmp_result == 0) {
+ swap_cnt = 1;
+ swapfunc(pc, pd, es);
+ pd -= es;
+ }
+ pc -= es;
+ }
+ if (pb > pc)
+ break;
+ swapfunc(pb, pc, es);
+ swap_cnt = 1;
+ pb += es;
+ pc -= es;
+ }
+ if (swap_cnt == 0) { /* Switch to insertion sort */
+ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es)
+ for (pl = pm;
+ pl > (char *)a && CMP(thunk, pl - es, pl) > 0;
+ pl -= es)
+ swapfunc(pl, pl - es, es);
+ return;
+ }
+
+ pn = (char *)a + n * es;
+ d1 = MIN(pa - (char *)a, pb - pa);
+ vecswap(a, pb - d1, d1);
+ d1 = MIN(pd - pc, pn - pd - es);
+ vecswap(pb, pn - d1, d1);
+
+ d1 = pb - pa;
+ d2 = pd - pc;
+ if (d1 <= d2) {
+ /* Recurse on left partition, then iterate on right partition */
+ if (d1 > es) {
+ qsort_r_fallback(a, d1 / es, es, thunk, cmp);
+ }
+ if (d2 > es) {
+ /* Iterate rather than recurse to save stack space */
+ /* qsort(pn - d2, d2 / es, es, cmp); */
+ a = pn - d2;
+ n = d2 / es;
+ goto loop;
+ }
+ } else {
+ /* Recurse on right partition, then iterate on left partition */
+ if (d2 > es) {
+ qsort_r_fallback(pn - d2, d2 / es, es, thunk, cmp);
+ }
+ if (d1 > es) {
+ /* Iterate rather than recurse to save stack space */
+ /* qsort(a, d1 / es, es, cmp); */
+ n = d1 / es;
+ goto loop;
+ }
+ }
+}
- (Actually, with glibc 2.3.6 on my Intel Core Duo, my implementation
- below seems to be significantly faster than qsort. Go figure.)
-*/
+/* these are required for GNU api compatibility as nlopt uses the BSD arguments ordering */
+typedef struct {
+ cmp_t* compar;
+ void *thunk;
+} qsort_wrapper;
-#ifndef HAVE_QSORT_R_damn_it_use_my_own
-/* swap size bytes between a_ and b_ */
-static void swap(void *a_, void *b_, size_t size)
+static int qsort_cmp_wrap(const void *a, const void *b, void *thunk)
{
- if (a_ == b_)
- return;
- {
- size_t i, nlong = size / sizeof(long);
- long *a = (long *) a_, *b = (long *) b_;
- for (i = 0; i < nlong; ++i) {
- long c = a[i];
- a[i] = b[i];
- b[i] = c;
- }
- a_ = (void *) (a + nlong);
- b_ = (void *) (b + nlong);
- }
- {
- size_t i;
- char *a = (char *) a_, *b = (char *) b_;
- size = size % sizeof(long);
- for (i = 0; i < size; ++i) {
- char c = a[i];
- a[i] = b[i];
- b[i] = c;
- }
- }
+ qsort_wrapper *wrap = (qsort_wrapper *) thunk;
+ return (*wrap->compar)(wrap->thunk, a, b);
}
-#endif /* HAVE_QSORT_R */
-void nlopt_qsort_r(void *base_, size_t nmemb, size_t size, void *thunk, int (*compar) (void *, const void *, const void *))
+void nlopt_qsort_r(void *base_, size_t nmemb, size_t size, void *thunk, cmp_t* compar)
{
-#ifdef HAVE_QSORT_R_damn_it_use_my_own
- /* Even if we could detect glibc vs. BSD by appropriate
- macrology, there is no way to make the calls compatible
- without writing a wrapper for the compar function...screw
- this. */
+#if defined(HAVE_QSORT_R) && (defined(__APPLE__) || defined(__FreeBSD__))
qsort_r(base_, nmemb, size, thunk, compar);
+#elif defined(HAVE_QSORT_R) && defined(__linux__)
+ qsort_wrapper wrapper;
+ wrapper.compar = compar;
+ wrapper.thunk = thunk;
+ qsort_r(base_, nmemb, size, qsort_cmp_wrap, &wrapper);
+#elif defined(_WIN32)
+ qsort_s(base_, nmemb, size, compar, thunk);
#else
- char *base = (char *) base_;
- if (nmemb < 10) { /* use O(nmemb^2) algorithm for small enough nmemb */
- size_t i, j;
- for (i = 0; i + 1 < nmemb; ++i)
- for (j = i + 1; j < nmemb; ++j)
- if (compar(thunk, base + i * size, base + j * size) > 0)
- swap(base + i * size, base + j * size, size);
- } else {
- size_t i, pivot, npart;
- /* pick median of first/middle/last elements as pivot */
- {
- const char *a = base, *b = base + (nmemb / 2) * size, *c = base + (nmemb - 1) * size;
- pivot = compar(thunk, a, b) < 0 ? (compar(thunk, b, c) < 0 ? nmemb / 2 : (compar(thunk, a, c) < 0 ? nmemb - 1 : 0))
- : (compar(thunk, a, c) < 0 ? 0 : (compar(thunk, b, c) < 0 ? nmemb - 1 : nmemb / 2));
- }
- /* partition array */
- swap(base + pivot * size, base + (nmemb - 1) * size, size);
- pivot = (nmemb - 1) * size;
- for (i = npart = 0; i < nmemb - 1; ++i)
- if (compar(thunk, base + i * size, base + pivot) <= 0)
- swap(base + i * size, base + (npart++) * size, size);
- swap(base + npart * size, base + pivot, size);
- /* recursive sort of two partitions */
- nlopt_qsort_r(base, npart, size, thunk, compar);
- npart++; /* don't need to sort pivot */
- nlopt_qsort_r(base + npart * size, nmemb - npart, size, thunk, compar);
- }
-#endif /* !HAVE_QSORT_R */
+ qsort_r_fallback(base_, nmemb, size, thunk, compar);
+#endif
}