From 435f3202410d2116b3e1e58bf797b4a05f5a71eb Mon Sep 17 00:00:00 2001 From: stevenj Date: Thu, 6 Sep 2007 11:43:35 -0400 Subject: [PATCH] use clever O(1) algorithm from Knuth to find rightmost zero bit (I doubt it's significantly faster, but it sure is cooler) darcs-hash:20070906154335-c8de0-7d3f930d12b00a5d71a0406605cace694195e620.gz --- util/sobolseq.c | 64 ++++++++++++++++--------------------------------- 1 file changed, 21 insertions(+), 43 deletions(-) diff --git a/util/sobolseq.c b/util/sobolseq.c index 895152b..1948277 100644 --- a/util/sobolseq.c +++ b/util/sobolseq.c @@ -75,49 +75,23 @@ typedef struct nlopt_soboldata_s { uint32_t n; /* number of x's generated so far */ } soboldata; -/* return position (0, 1, ...) of rightmost zero bit in n, - via binary search on the (32) bits of n. */ +/* Return position (0, 1, ...) of rightmost zero bit in n, + * via binary search on the (32) bits of n. + * + * This code uses a 32-bit version of algorithm to find the rightmost + * one bit in Knuth, _The Art of Computer Programming_, volume 4A + * (draft fascicle), section 7.1.3, "Bitwise tricks and + * techniques." + * + * Assumes n has a zero bit, i.e. n < 2^32 - 1. + * + */ static unsigned rightzero32(uint32_t n) { -#if 0 /* use 8-bit lookup table ... seems slightly slower */ - static const unsigned rightone8[256] = { - 8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0 - }; -#endif - unsigned z = 0; - n = ~n; /* change to rightmost-one bit problem */ - if (n & 0xffffU) { /* 1 in lower 16 bits */ - lower16: - if (n & 0xffU) { /* 1 in lower 8 bits */ - lower8: -#if 0 /* use 8-bit lookup table ... seems slightly slower */ - return z + rightone8[n & 0xffU]; -#else - if (n & 0xfU) { /* 1 in lower 4 bits */ - lower4: - if (n & 3U) /* 1 in lower 2 bits */ - return (z + !(n & 1)); - else /* 1 in upper 2 bits */ - return (z + 2 + !(n & 4)); - } - else { /* 1 in upper 4 bits */ - n >>= 4; - z += 4; - goto lower4; - } -#endif - } - else { /* 1 in upper 8 bits */ - n >>= 8; - z += 8; - goto lower8; - } - } - else if (n) { /* 1 in upper 16 bits */ - n >>= (z = 16); - goto lower16; - } - return 32; /* no 1 */ + const uint32_t a = 0x05f66a47; /* magic number, found by brute force */ + static const unsigned decode[32] = {0,1,2,26,23,3,15,27,24,21,19,4,12,16,28,6,31,25,22,14,20,18,11,5,30,13,17,10,29,9,8,7}; + n = ~n; /* change to rightmost-one problem */ + return decode[(a * (n & (-n))) >> 27]; } /* generate the next term x_{n+1} in the Sobol sequence, as an array @@ -125,9 +99,13 @@ static unsigned rightzero32(uint32_t n) (if too many #'s generated) */ static int sobol_gen(soboldata *sd, double *x) { - unsigned c = rightzero32(sd->n++), b, i, sdim = sd->sdim; + unsigned c, b, i, sdim; - if (c >= 32) return 0; + if (c == 4294967295U) return 0; /* c == 2^32 - 1 ... we would + need to switch to a 64-bit version + to generate more terms. */ + c = rightzero32(sd->n++); + sdim = sd->sdim; for (i = 0; i < sdim; ++i) { b = sd->b[i]; if (b >= c) { -- 2.30.2