src/algs/mlsl/mlsl.c

   1 /* Copyright (c) 2007-2014 Massachusetts Institute of Technology
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining
   4  * a copy of this software and associated documentation files (the
   5  * "Software"), to deal in the Software without restriction, including
   6  * without limitation the rights to use, copy, modify, merge, publish,
   7  * distribute, sublicense, and/or sell copies of the Software, and to
   8  * permit persons to whom the Software is furnished to do so, subject to
   9  * the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be
  12  * included in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  18  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  19  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /* Multi-Level Single-Linkage (MLSL) algorithm for
  24    global optimization by random local optimizations (a multistart algorithm
  25    with "clustering" to avoid repeated detection of the same local minimum),
  26    modified to optionally use a Sobol' low-discrepancy sequence (LDS) instead
  27    of pseudorandom numbers.  See:
  28
  29    A. H. G. Rinnooy Kan and G. T. Timmer, "Stochastic global optimization
  30    methods," Mathematical Programming, vol. 39, p. 27-78 (1987).
  31        [ actually 2 papers -- part I: clustering methods (p. 27), then
  32                               part II: multilevel methods (p. 57) ]
  33
  34    and also:
  35
  36    Sergei Kucherenko and Yury Sytsko, "Application of deterministic
  37    low-discrepancy sequences in global optimization," Computational
  38    Optimization and Applications, vol. 30, p. 297-318 (2005).
  39
  40    Compared to the above papers, I made a couple other modifications
  41    to the algorithm besides the use of a LDS.
  42
  43    1) first, the original algorithm suggests discarding points
  44       within a *fixed* distance of the boundaries or of previous
  45       local minima; I changed this to a distance that decreases with,
  46       iteration, proportional to the same threshold radius used
  47       for clustering.  (In the case of the boundaries, I make
  48       the proportionality constant rather small as well.)
  49
  50    2) Kan and Timmer suggest using fancy "spiral-search" algorithms
  51       to quickly locate nearest-neighbor points, reducing the
  52       overall time for N sample points from O(N^2) to O(N log N)
  53       However, recent papers seem to show that such schemes (kd trees,
  54       etcetera) become inefficient for high-dimensional spaces (d > 20),
  55       and finding a better approach seems to be an open question.  Therefore,
  56       since I am mainly interested in MLSL for high-dimensional problems
  57       (in low dimensions we have DIRECT etc.), I punted on this question
  58       for now.  In practice, O(N^2) (which does not count the #points
  59       evaluated in local searches) does not seem too bad if the objective
  60       function is expensive.
  61
  62 */
  63
  64 #include <stdlib.h>
  65 #include <math.h>
  66 #include <string.h>
  67
  68 #include "redblack.h"
  69 #include "mlsl.h"
  70
  71 /* data structure for each random/quasirandom point in the population */
  72 typedef struct {
  73      double f; /* function value at x */
  74      int minimized; /* if we have already minimized starting from x */
  75      double closest_pt_d; /* distance^2 to closest pt with smaller f */
  76      double closest_lm_d; /* distance^2 to closest lm with smaller f*/
  77      double x[1]; /* array of length n (K&R struct hack) */
  78 } pt;
  79
  80 /* all of the data used by the various mlsl routines...it's
  81    not clear in hindsight that we need to put all of this in a data
  82    structure since most of the work occurs in a single routine,
  83    but it doesn't hurt us */
  84 typedef struct {
  85      int n; /* # dimensions */
  86      const double *lb, *ub;
  87      nlopt_stopping *stop; /* stopping criteria */
  88      nlopt_func f; void *f_data;
  89
  90      rb_tree pts; /* tree of points (k == pt), sorted by f */
  91      rb_tree lms; /* tree of local minimizers, sorted by function value
  92                      (k = array of length d+1, [0] = f, [1..d] = x) */
  93
  94      nlopt_sobol s; /* sobol data for LDS point generation, or NULL
  95                        to use pseudo-random numbers */
  96
  97      double R_prefactor, dlm, dbound, gamma; /* parameters of MLSL */
  98      int N; /* number of pts to add per iteration */
  99 } mlsl_data;
 100
 101 /* comparison routines to sort the red-black trees by function value */
 102 static int pt_compare(rb_key p1_, rb_key p2_)
 103 {
 104      pt *p1 = (pt *) p1_;
 105      pt *p2 = (pt *) p2_;
 106      if (p1->f < p2->f) return -1;
 107      if (p1->f > p2->f) return +1;
 108      return 0;
 109 }
 110 static int lm_compare(double *k1, double *k2)
 111 {
 112      if (*k1 < *k2) return -1;
 113      if (*k1 > *k2) return +1;
 114      return 0;
 115 }
 116
 117 /* Euclidean distance |x1 - x2|^2 */
 118 static double distance2(int n, const double *x1, const double *x2)
 119 {
 120      int i;
 121      double d = 0.;
 122      for (i = 0; i < n; ++i) {
 123           double dx = x1[i] - x2[i];
 124           d += dx * dx;
 125      }
 126      return d;
 127 }
 128
 129 /* find the closest pt to p with a smaller function value;
 130    this function is called when p is first added to our tree */
 131 static void find_closest_pt(int n, rb_tree *pts, pt *p)
 132 {
 133      rb_node *node = rb_tree_find_lt(pts, (rb_key) p);
 134      double closest_d = HUGE_VAL;
 135      while (node) {
 136           double d = distance2(n, p->x, ((pt *) node->k)->x);
 137           if (d < closest_d) closest_d = d;
 138           node = rb_tree_pred(node);
 139      }
 140      p->closest_pt_d = closest_d;
 141 }
 142
 143 /* find the closest local minimizer (lm) to p with a smaller function value;
 144    this function is called when p is first added to our tree */
 145 static void find_closest_lm(int n, rb_tree *lms, pt *p)
 146 {
 147      rb_node *node = rb_tree_find_lt(lms, &p->f);
 148      double closest_d = HUGE_VAL;
 149      while (node) {
 150           double d = distance2(n, p->x, node->k+1);
 151           if (d < closest_d) closest_d = d;
 152           node = rb_tree_pred(node);
 153      }
 154      p->closest_lm_d = closest_d;
 155 }
 156
 157 /* given newpt, which presumably has just been added to our
 158    tree, update all pts with a greater function value in case
 159    newpt is closer to them than their previous closest_pt ...
 160    we can ignore already-minimized points since we never do
 161    local minimization from the same point twice */
 162 static void pts_update_newpt(int n, rb_tree *pts, pt *newpt)
 163 {
 164      rb_node *node = rb_tree_find_gt(pts, (rb_key) newpt);
 165      while (node) {
 166           pt *p = (pt *) node->k;
 167           if (!p->minimized) {
 168                double d = distance2(n, newpt->x, p->x);
 169                if (d < p->closest_pt_d) p->closest_pt_d = d;
 170           }
 171           node = rb_tree_succ(node);
 172      }
 173 }
 174
 175 /* given newlm, which presumably has just been added to our
 176    tree, update all pts with a greater function value in case
 177    newlm is closer to them than their previous closest_lm ...
 178    we can ignore already-minimized points since we never do
 179    local minimization from the same point twice */
 180 static void pts_update_newlm(int n, rb_tree *pts, double *newlm)
 181 {
 182      pt tmp_pt;
 183      rb_node *node;
 184      tmp_pt.f = newlm[0];
 185      node = rb_tree_find_gt(pts, (rb_key) &tmp_pt);
 186      while (node) {
 187           pt *p = (pt *) node->k;
 188           if (!p->minimized) {
 189                double d = distance2(n, newlm+1, p->x);
 190                if (d < p->closest_lm_d) p->closest_lm_d = d;
 191           }
 192           node = rb_tree_succ(node);
 193      }
 194 }
 195
 196 static int is_potential_minimizer(mlsl_data *mlsl, pt *p,
 197                                   double dpt_min,
 198                                   double dlm_min,
 199                                   double dbound_min)
 200 {
 201      int i, n = mlsl->n;
 202      const double *lb = mlsl->lb;
 203      const double *ub = mlsl->ub;
 204      const double *x = p->x;
 205
 206      if (p->minimized)
 207           return 0;
 208
 209      if (p->closest_pt_d <= dpt_min*dpt_min)
 210           return 0;
 211
 212      if (p->closest_lm_d <= dlm_min*dlm_min)
 213           return 0;
 214
 215      for (i = 0; i < n; ++i)
 216           if ((x[i] - lb[i] <= dbound_min || ub[i] - x[i] <= dbound_min)
 217               && ub[i] - lb[i] > dbound_min)
 218                return 0;
 219
 220      return 1;
 221 }
 222
 223 #define K2PI (6.2831853071795864769252867665590057683943388)
 224
 225 /* compute Gamma(1 + n/2)^{1/n} ... this is a constant factor
 226    used in MLSL as part of the minimum-distance cutoff*/
 227 static double gam(int n)
 228 {
 229      /* use Stirling's approximation:
 230         Gamma(1 + z) ~ sqrt(2*pi*z) * z^z / exp(z)
 231         ... this is more than accurate enough for our purposes
 232             (error in gam < 15% for d=1, < 4% for d=2, < 2% for d=3, ...)
 233             and avoids overflow problems because we can combine it with
 234             the ^{1/n} exponent */
 235      double z = n/2;
 236      return sqrt(pow(K2PI * z, 1.0/n) * z) * exp(-0.5);
 237 }
 238
 239 static pt *alloc_pt(int n)
 240 {
 241      pt *p = (pt *) malloc(sizeof(pt) + (n-1) * sizeof(double));
 242      if (p) {
 243           p->minimized = 0;
 244           p->closest_pt_d = HUGE_VAL;
 245           p->closest_lm_d = HUGE_VAL;
 246      }
 247      return p;
 248 }
 249
 250 /* wrapper around objective function that increments evaluation count */
 251 static double fcount(unsigned n, const double *x, double *grad, void *p_)
 252 {
 253      mlsl_data *p = (mlsl_data *) p_;
 254      ++ *(p->stop->nevals_p);
 255      return p->f(n, x, grad, p->f_data);
 256 }
 257
 258 static void get_minf(mlsl_data *d, double *minf, double *x)
 259 {
 260      rb_node *node = rb_tree_min(&d->pts);
 261      if (node) {
 262           *minf = ((pt *) node->k)->f;
 263           memcpy(x, ((pt *) node->k)->x, sizeof(double) * d->n);
 264      }
 265      node = rb_tree_min(&d->lms);
 266      if (node && node->k[0] < *minf) {
 267           *minf = node->k[0];
 268           memcpy(x, node->k + 1, sizeof(double) * d->n);
 269      }
 270 }
 271
 272 #define MIN(a,b) ((a) < (b) ? (a) : (b))
 273
 274 #define MLSL_SIGMA 2. /* MLSL sigma parameter, using value from the papers */
 275 #define MLSL_GAMMA 0.3 /* MLSL gamma parameter (FIXME: what should it be?) */
 276
 277 nlopt_result mlsl_minimize(int n, nlopt_func f, void *f_data,
 278                            const double *lb, const double *ub, /* bounds */
 279                            double *x, /* in: initial guess, out: minimizer */
 280                            double *minf,
 281                            nlopt_stopping *stop,
 282                            nlopt_opt local_opt,
 283                            int Nsamples, /* #samples/iteration (0=default) */
 284                            int lds) /* random or low-discrepancy seq. (lds) */
 285 {
 286      nlopt_result ret = NLOPT_SUCCESS;
 287      mlsl_data d;
 288      int i;
 289      pt *p;
 290
 291      if (!Nsamples)
 292           d.N = 4; /* FIXME: what is good number of samples per iteration? */
 293      else
 294           d.N = Nsamples;
 295      if (d.N < 1) {
 296          nlopt_stop_msg(stop, "population %d is too small", d.N);
 297          return NLOPT_INVALID_ARGS;
 298      }
 299
 300      d.n = n;
 301      d.lb = lb; d.ub = ub;
 302      d.stop = stop;
 303      d.f = f; d.f_data = f_data;
 304      rb_tree_init(&d.pts, pt_compare);
 305      rb_tree_init(&d.lms, lm_compare);
 306      d.s = lds ? nlopt_sobol_create((unsigned) n) : NULL;
 307
 308      nlopt_set_min_objective(local_opt, fcount, &d);
 309      nlopt_set_lower_bounds(local_opt, lb);
 310      nlopt_set_upper_bounds(local_opt, ub);
 311      nlopt_set_stopval(local_opt, stop->minf_max);
 312
 313      d.gamma = MLSL_GAMMA;
 314
 315      d.R_prefactor = sqrt(2./K2PI) * pow(gam(n) * MLSL_SIGMA, 1.0/n);
 316      for (i = 0; i < n; ++i)
 317           d.R_prefactor *= pow(ub[i] - lb[i], 1.0/n);
 318
 319      /* MLSL also suggests setting some minimum distance from points
 320         to previous local minimiza and to the boundaries; I don't know
 321         how to choose this as a fixed number, so I set it proportional
 322         to R; see also the comments at top.  dlm and dbound are the
 323         proportionality constants. */
 324      d.dlm = 1.0; /* min distance/R to local minima (FIXME: good value?) */
 325      d.dbound = 1e-6; /* min distance/R to ub/lb boundaries (good value?) */
 326
 327
 328      p = alloc_pt(n);
 329      if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 330
 331      /* FIXME: how many sobol points to skip, if any? */
 332      nlopt_sobol_skip(d.s, (unsigned) (10*n+d.N), p->x);
 333
 334      memcpy(p->x, x, n * sizeof(double));
 335      p->f = f(n, x, NULL, f_data);
 336      ++ *(stop->nevals_p);
 337      if (!rb_tree_insert(&d.pts, (rb_key) p)) {
 338           free(p); ret = NLOPT_OUT_OF_MEMORY;
 339      }
 340      if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
 341      else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
 342      else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
 343      else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
 344
 345      while (ret == NLOPT_SUCCESS) {
 346           rb_node *node;
 347           double R;
 348
 349           get_minf(&d, minf, x);
 350
 351           /* sampling phase: add random/quasi-random points */
 352           for (i = 0; i < d.N && ret == NLOPT_SUCCESS; ++i) {
 353                p = alloc_pt(n);
 354                if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 355                if (d.s) nlopt_sobol_next(d.s, p->x, lb, ub);
 356                else { /* use random points instead of LDS */
 357                     int j;
 358                     for (j = 0; j < n; ++j) p->x[j] = nlopt_urand(lb[j],ub[j]);
 359                }
 360                p->f = f(n, p->x, NULL, f_data);
 361                ++ *(stop->nevals_p);
 362                if (!rb_tree_insert(&d.pts, (rb_key) p)) {
 363                     free(p); ret = NLOPT_OUT_OF_MEMORY;
 364                }
 365                if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
 366                else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
 367                else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
 368                else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
 369                else {
 370                     find_closest_pt(n, &d.pts, p);
 371                     find_closest_lm(n, &d.lms, p);
 372                     pts_update_newpt(n, &d.pts, p);
 373                }
 374           }
 375
 376           /* distance threshhold parameter R in MLSL */
 377           R = d.R_prefactor
 378                * pow(log((double) d.pts.N) / d.pts.N, 1.0 / n);
 379
 380           /* local search phase: do local opt. for promising points */
 381           node = rb_tree_min(&d.pts);
 382           for (i = (int) (ceil(d.gamma * d.pts.N) + 0.5);
 383                node && i > 0 && ret == NLOPT_SUCCESS; --i) {
 384                p = (pt *) node->k;
 385                if (is_potential_minimizer(&d, p,
 386                                           R, d.dlm*R, d.dbound*R)) {
 387                     nlopt_result lret;
 388                     double *lm;
 389                     double t = nlopt_seconds();
 390
 391                     if (nlopt_stop_forced(stop)) {
 392                          ret = NLOPT_FORCED_STOP; break;
 393                     }
 394                     if (nlopt_stop_evals(stop)) {
 395                          ret = NLOPT_MAXEVAL_REACHED; break;
 396                     }
 397                     if (stop->maxtime > 0 &&
 398                         t - stop->start >= stop->maxtime) {
 399                          ret = NLOPT_MAXTIME_REACHED; break;
 400                     }
 401                     lm = (double *) malloc(sizeof(double) * (n+1));
 402                     if (!lm) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 403                     memcpy(lm+1, p->x, sizeof(double) * n);
 404                     lret = nlopt_optimize_limited(local_opt, lm+1, lm,
 405                                                   stop->maxeval - *(stop->nevals_p),
 406                                                   stop->maxtime -
 407                                                   (t - stop->start));
 408                     p->minimized = 1;
 409                     if (lret < 0) { free(lm); ret = lret; goto done; }
 410                     if (!rb_tree_insert(&d.lms, lm)) {
 411                          free(lm); ret = NLOPT_OUT_OF_MEMORY;
 412                     }
 413                     else if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
 414                     else if (*lm < stop->minf_max)
 415                          ret = NLOPT_MINF_MAX_REACHED;
 416                     else if (nlopt_stop_evals(stop))
 417                          ret = NLOPT_MAXEVAL_REACHED;
 418                     else if (nlopt_stop_time(stop))
 419                          ret = NLOPT_MAXTIME_REACHED;
 420                     else
 421                          pts_update_newlm(n, &d.pts, lm);
 422                }
 423
 424                /* TODO: additional stopping criteria based
 425                   e.g. on improvement in function values, etc? */
 426
 427                node = rb_tree_succ(node);
 428           }
 429      }
 430
 431      get_minf(&d, minf, x);
 432
 433  done:
 434      nlopt_sobol_destroy(d.s);
 435      rb_tree_destroy_with_keys(&d.lms);
 436      rb_tree_destroy_with_keys(&d.pts);
 437      return ret;
 438 }