mlsl/mlsl.c

   1 /* Copyright (c) 2007-2008 Massachusetts Institute of Technology
   2  *
   3  * Permission is hereby granted, free of charge, to any person obtaining
   4  * a copy of this software and associated documentation files (the
   5  * "Software"), to deal in the Software without restriction, including
   6  * without limitation the rights to use, copy, modify, merge, publish,
   7  * distribute, sublicense, and/or sell copies of the Software, and to
   8  * permit persons to whom the Software is furnished to do so, subject to
   9  * the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be
  12  * included in all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  18  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  19  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  20  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22
  23 /* Multi-Level Single-Linkage (MLSL) algorithm for
  24    global optimization by random local optimizations (a multistart algorithm
  25    with "clustering" to avoid repeated detection of the same local minimum),
  26    modified to optionally use a Sobol' low-discrepancy sequence (LDS) instead
  27    of pseudorandom numbers.  See:
  28
  29    A. H. G. Rinnooy Kan and G. T. Timmer, "Stochastic global optimization
  30    methods," Mathematical Programming, vol. 39, p. 27-78 (1987).
  31        [ actually 2 papers -- part I: clustering methods (p. 27), then
  32                               part II: multilevel methods (p. 57) ]
  33
  34    and also:
  35
  36    Sergei Kucherenko and Yury Sytsko, "Application of deterministic
  37    low-discrepancy sequences in global optimization," Computational
  38    Optimization and Applications, vol. 30, p. 297-318 (2005).
  39
  40    Compared to the above papers, I made a couple other modifications
  41    to the algorithm besides the use of a LDS.
  42
  43    1) first, the original algorithm suggests discarding points
  44       within a *fixed* distance of the boundaries or of previous
  45       local minima; I changed this to a distance that decreases with,
  46       iteration, proportional to the same threshold radius used
  47       for clustering.  (In the case of the boundaries, I make
  48       the proportionality constant rather small as well.)
  49
  50    2) Kan and Timmer suggest using fancy "spiral-search" algorithms
  51       to quickly locate nearest-neighbor points, reducing the
  52       overall time for N sample points from O(N^2) to O(N log N)
  53       However, recent papers seem to show that such schemes (kd trees,
  54       etcetera) become inefficient for high-dimensional spaces (d > 20),
  55       and finding a better approach seems to be an open question.  Therefore,
  56       since I am mainly interested in MLSL for high-dimensional problems
  57       (in low dimensions we have DIRECT etc.), I punted on this question
  58       for now.  In practice, O(N^2) (which does not count the #points
  59       evaluated in local searches) does not seem too bad if the objective
  60       function is expensive.
  61
  62 */
  63
  64 #include <stdlib.h>
  65 #include <math.h>
  66 #include <string.h>
  67
  68 #include "redblack.h"
  69 #include "mlsl.h"
  70
  71 /* data structure for each random/quasirandom point in the population */
  72 typedef struct {
  73      double f; /* function value at x */
  74      int minimized; /* if we have already minimized starting from x */
  75      double closest_pt_d; /* distance^2 to closest pt with smaller f */
  76      double closest_lm_d; /* distance^2 to closest lm with smaller f*/
  77      double x[1]; /* array of length n (K&R struct hack) */
  78 } pt;
  79
  80 /* all of the data used by the various mlsl routines...it's
  81    not clear in hindsight that we need to put all of this in a data
  82    structure since most of the work occurs in a single routine,
  83    but it doesn't hurt us */
  84 typedef struct {
  85      int n; /* # dimensions */
  86      const double *lb, *ub;
  87      nlopt_stopping *stop; /* stopping criteria */
  88      nlopt_func f; void *f_data;
  89
  90      rb_tree pts; /* tree of points (k == pt), sorted by f */
  91      rb_tree lms; /* tree of local minimizers, sorted by function value
  92                      (k = array of length d+1, [0] = f, [1..d] = x) */
  93
  94      nlopt_sobol s; /* sobol data for LDS point generation, or NULL
  95                        to use pseudo-random numbers */
  96
  97      nlopt_algorithm local_alg; /* local search algorithm */
  98      int local_maxeval; /* max # local iterations (0 if unlimited) */
  99
 100      double R_prefactor, dlm, dbound, gamma; /* parameters of MLSL */
 101      int N; /* number of pts to add per iteration */
 102 } mlsl_data;
 103
 104 /* comparison routines to sort the red-black trees by function value */
 105 static int pt_compare(rb_key p1_, rb_key p2_)
 106 {
 107      pt *p1 = (pt *) p1_;
 108      pt *p2 = (pt *) p2_;
 109      if (p1->f < p2->f) return -1;
 110      if (p1->f > p2->f) return +1;
 111      return 0;
 112 }
 113 static int lm_compare(double *k1, double *k2)
 114 {
 115      if (*k1 < *k2) return -1;
 116      if (*k1 > *k2) return +1;
 117      return 0;
 118 }
 119
 120 /* Euclidean distance |x1 - x2|^2 */
 121 static double distance2(int n, const double *x1, const double *x2)
 122 {
 123      int i;
 124      double d = 0.;
 125      for (i = 0; i < n; ++i) {
 126           double dx = x1[i] - x2[i];
 127           d += dx * dx;
 128      }
 129      return d;
 130 }
 131
 132 /* find the closest pt to p with a smaller function value;
 133    this function is called when p is first added to our tree */
 134 static void find_closest_pt(int n, rb_tree *pts, pt *p)
 135 {
 136      rb_node *node = rb_tree_find_lt(pts, (rb_key) p);
 137      double closest_d = HUGE_VAL;
 138      while (node) {
 139           double d = distance2(n, p->x, ((pt *) node->k)->x);
 140           if (d < closest_d) closest_d = d;
 141           node = rb_tree_pred(node);
 142      }
 143      p->closest_pt_d = closest_d;
 144 }
 145
 146 /* find the closest local minimizer (lm) to p with a smaller function value;
 147    this function is called when p is first added to our tree */
 148 static void find_closest_lm(int n, rb_tree *lms, pt *p)
 149 {
 150      rb_node *node = rb_tree_find_lt(lms, &p->f);
 151      double closest_d = HUGE_VAL;
 152      while (node) {
 153           double d = distance2(n, p->x, node->k+1);
 154           if (d < closest_d) closest_d = d;
 155           node = rb_tree_pred(node);
 156      }
 157      p->closest_lm_d = closest_d;
 158 }
 159
 160 /* given newpt, which presumably has just been added to our
 161    tree, update all pts with a greater function value in case
 162    newpt is closer to them than their previous closest_pt ...
 163    we can ignore already-minimized points since we never do
 164    local minimization from the same point twice */
 165 static void pts_update_newpt(int n, rb_tree *pts, pt *newpt)
 166 {
 167      rb_node *node = rb_tree_find_gt(pts, (rb_key) newpt);
 168      while (node) {
 169           pt *p = (pt *) node->k;
 170           if (!p->minimized) {
 171                double d = distance2(n, newpt->x, p->x);
 172                if (d < p->closest_pt_d) p->closest_pt_d = d;
 173           }
 174           node = rb_tree_succ(node);
 175      }
 176 }
 177
 178 /* given newlm, which presumably has just been added to our
 179    tree, update all pts with a greater function value in case
 180    newlm is closer to them than their previous closest_lm ...
 181    we can ignore already-minimized points since we never do
 182    local minimization from the same point twice */
 183 static void pts_update_newlm(int n, rb_tree *pts, double *newlm)
 184 {
 185      pt tmp_pt;
 186      rb_node *node;
 187      tmp_pt.f = newlm[0];
 188      node = rb_tree_find_gt(pts, (rb_key) &tmp_pt);
 189      while (node) {
 190           pt *p = (pt *) node->k;
 191           if (!p->minimized) {
 192                double d = distance2(n, newlm+1, p->x);
 193                if (d < p->closest_lm_d) p->closest_lm_d = d;
 194           }
 195           node = rb_tree_succ(node);
 196      }
 197 }
 198
 199 static int is_potential_minimizer(mlsl_data *mlsl, pt *p,
 200                                   double dpt_min,
 201                                   double dlm_min,
 202                                   double dbound_min)
 203 {
 204      int i, n = mlsl->n;
 205      const double *lb = mlsl->lb;
 206      const double *ub = mlsl->ub;
 207      const double *x = p->x;
 208
 209      if (p->minimized)
 210           return 0;
 211
 212      if (p->closest_pt_d <= dpt_min*dpt_min)
 213           return 0;
 214
 215      if (p->closest_lm_d <= dlm_min*dlm_min)
 216           return 0;
 217
 218      for (i = 0; i < n; ++i)
 219           if (x[i] - lb[i] <= dbound_min || ub[i] - x[i] <= dbound_min)
 220                return 0;
 221
 222      return 1;
 223 }
 224
 225 #define K2PI (6.2831853071795864769252867665590057683943388)
 226
 227 /* compute Gamma(1 + n/2)^{1/n} ... this is a constant factor
 228    used in MLSL as part of the minimum-distance cutoff*/
 229 static double gam(int n)
 230 {
 231      /* use Stirling's approximation:
 232         Gamma(1 + z) ~ sqrt(2*pi*z) * z^z / exp(z)
 233         ... this is more than accurate enough for our purposes
 234             (error in gam < 15% for d=1, < 4% for d=2, < 2% for d=3, ...)
 235             and avoids overflow problems because we can combine it with
 236             the ^{1/n} exponent */
 237      double z = n/2;
 238      return sqrt(pow(K2PI * z, 1.0/n) * z) * exp(-0.5);
 239 }
 240
 241 static pt *alloc_pt(int n)
 242 {
 243      pt *p = (pt *) malloc(sizeof(pt) + (n-1) * sizeof(double));
 244      if (p) {
 245           p->minimized = 0;
 246           p->closest_pt_d = HUGE_VAL;
 247           p->closest_lm_d = HUGE_VAL;
 248      }
 249      return p;
 250 }
 251
 252 /* wrapper around objective function that increments evaluation count */
 253 static double fcount(int n, const double *x, double *grad, void *p_)
 254 {
 255      mlsl_data *p = (mlsl_data *) p_;
 256      p->stop->nevals++;
 257      return p->f(n, x, grad, p->f_data);
 258 }
 259
 260 static void get_minf(mlsl_data *d, double *minf, double *x)
 261 {
 262      rb_node *node = rb_tree_min(&d->pts);
 263      if (node) {
 264           *minf = ((pt *) node->k)->f;
 265           memcpy(x, ((pt *) node->k)->x, sizeof(double) * d->n);
 266      }
 267      node = rb_tree_min(&d->lms);
 268      if (node && node->k[0] < *minf) {
 269           *minf = node->k[0];
 270           memcpy(x, node->k + 1, sizeof(double) * d->n);
 271      }
 272 }
 273
 274 #define MIN(a,b) ((a) < (b) ? (a) : (b))
 275
 276 #define MLSL_SIGMA 2. /* MLSL sigma parameter, using value from the papers */
 277 #define MLSL_GAMMA 0.3 /* MLSL gamma parameter (FIXME: what should it be?) */
 278
 279 nlopt_result mlsl_minimize(int n, nlopt_func f, void *f_data,
 280                            const double *lb, const double *ub, /* bounds */
 281                            double *x, /* in: initial guess, out: minimizer */
 282                            double *minf,
 283                            nlopt_stopping *stop,
 284                            nlopt_algorithm local_alg,
 285                            int local_maxeval,
 286                            int Nsamples, /* #samples/iteration (0=default) */
 287                            int lds) /* random or low-discrepancy seq. (lds) */
 288 {
 289      nlopt_result ret = NLOPT_SUCCESS;
 290      mlsl_data d;
 291      int i;
 292      pt *p;
 293
 294      if (!Nsamples)
 295           d.N = 4; /* FIXME: what is good number of samples per iteration? */
 296      else
 297           d.N = Nsamples;
 298      if (d.N < 1) return NLOPT_INVALID_ARGS;
 299
 300      d.n = n;
 301      d.lb = lb; d.ub = ub;
 302      d.stop = stop;
 303      d.f = f; d.f_data = f_data;
 304      rb_tree_init(&d.pts, pt_compare);
 305      rb_tree_init(&d.lms, lm_compare);
 306      d.s = lds ? nlopt_sobol_create((unsigned) n) : NULL;
 307      d.local_alg = local_alg; d.local_maxeval = local_maxeval;
 308
 309      d.gamma = MLSL_GAMMA;
 310
 311      d.R_prefactor = sqrt(2./K2PI) * pow(gam(n) * MLSL_SIGMA, 1.0/n);
 312      for (i = 0; i < n; ++i)
 313           d.R_prefactor *= pow(ub[i] - lb[i], 1.0/n);
 314
 315      /* MLSL also suggests setting some minimum distance from points
 316         to previous local minimiza and to the boundaries; I don't know
 317         how to choose this as a fixed number, so I set it proportional
 318         to R; see also the comments at top.  dlm and dbound are the
 319         proportionality constants. */
 320      d.dlm = 1.0; /* min distance/R to local minima (FIXME: good value?) */
 321      d.dbound = 1e-6; /* min distance/R to ub/lb boundaries (good value?) */
 322
 323
 324      p = alloc_pt(n);
 325      if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 326
 327      /* FIXME: how many sobol points to skip, if any? */
 328      nlopt_sobol_skip(d.s, (unsigned) (10*n+d.N), p->x);
 329
 330      memcpy(p->x, x, n * sizeof(double));
 331      p->f = f(n, x, NULL, f_data);
 332      stop->nevals++;
 333      if (!rb_tree_insert(&d.pts, (rb_key) p)) {
 334           free(p); ret = NLOPT_OUT_OF_MEMORY;
 335      }
 336      if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
 337      else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
 338      else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
 339
 340      while (ret == NLOPT_SUCCESS) {
 341           rb_node *node;
 342           double R;
 343
 344           get_minf(&d, minf, x);
 345
 346           /* sampling phase: add random/quasi-random points */
 347           for (i = 0; i < d.N && ret == NLOPT_SUCCESS; ++i) {
 348                p = alloc_pt(n);
 349                if (!p) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 350                if (d.s) nlopt_sobol_next(d.s, p->x, lb, ub);
 351                else { /* use random points instead of LDS */
 352                     int j;
 353                     for (j = 0; j < n; ++j) p->x[j] = nlopt_urand(lb[j],ub[j]);
 354                }
 355                p->f = f(n, p->x, NULL, f_data);
 356                stop->nevals++;
 357                if (!rb_tree_insert(&d.pts, (rb_key) p)) {
 358                     free(p); ret = NLOPT_OUT_OF_MEMORY;
 359                }
 360                if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
 361                else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
 362                else if (p->f < stop->minf_max) ret = NLOPT_MINF_MAX_REACHED;
 363                else {
 364                     find_closest_pt(n, &d.pts, p);
 365                     find_closest_lm(n, &d.lms, p);
 366                     pts_update_newpt(n, &d.pts, p);
 367                }
 368           }
 369
 370           /* distance threshhold parameter R in MLSL */
 371           R = d.R_prefactor
 372                * pow(log((double) d.pts.N) / d.pts.N, 1.0 / n);
 373
 374           /* local search phase: do local opt. for promising points */
 375           node = rb_tree_min(&d.pts);
 376           for (i = (int) (ceil(d.gamma * d.pts.N) + 0.5);
 377                node && i > 0 && ret == NLOPT_SUCCESS; --i) {
 378                p = (pt *) node->k;
 379                if (is_potential_minimizer(&d, p,
 380                                           R, d.dlm*R, d.dbound*R)) {
 381                     nlopt_result lret;
 382                     double *lm;
 383                     double t = nlopt_seconds();
 384
 385                     if (nlopt_stop_evals(stop)) {
 386                          ret = NLOPT_MAXEVAL_REACHED; break;
 387                     }
 388                     if (stop->maxtime > 0 &&
 389                         t - stop->start >= stop->maxtime) {
 390                          ret = NLOPT_MAXTIME_REACHED; break;
 391                     }
 392                     lm = (double *) malloc(sizeof(double) * (n+1));
 393                     if (!lm) { ret = NLOPT_OUT_OF_MEMORY; goto done; }
 394                     memcpy(lm+1, p->x, sizeof(double) * n);
 395                     lret = nlopt_minimize(local_alg, n, fcount, &d,
 396                                           lb, ub, lm+1, lm,
 397                                           stop->minf_max,
 398                                           stop->ftol_rel, stop->ftol_abs,
 399                                           stop->xtol_rel, stop->xtol_abs,
 400                                           local_maxeval > 0 ?
 401                                           MIN(local_maxeval,
 402                                               stop->maxeval - stop->nevals)
 403                                           : stop->maxeval - stop->nevals,
 404                                           stop->maxtime - (t - stop->start));
 405                     p->minimized = 1;
 406                     if (lret < 0) { free(lm); ret = lret; goto done; }
 407                     if (!rb_tree_insert(&d.lms, lm)) {
 408                          free(lm); ret = NLOPT_OUT_OF_MEMORY;
 409                     }
 410                     else if (*lm < stop->minf_max)
 411                          ret = NLOPT_MINF_MAX_REACHED;
 412                     else if (nlopt_stop_evals(stop))
 413                          ret = NLOPT_MAXEVAL_REACHED;
 414                     else if (nlopt_stop_time(stop))
 415                          ret = NLOPT_MAXTIME_REACHED;
 416                     else
 417                          pts_update_newlm(n, &d.pts, lm);
 418                }
 419
 420                /* TODO: additional stopping criteria based
 421                   e.g. on improvement in function values, etc? */
 422
 423                node = rb_tree_succ(node);
 424           }
 425      }
 426
 427      get_minf(&d, minf, x);
 428
 429  done:
 430      nlopt_sobol_destroy(d.s);
 431      rb_tree_destroy_with_keys(&d.lms);
 432      rb_tree_destroy_with_keys(&d.pts);
 433      return ret;
 434 }