1 /* Copyright (c) 2007-2010 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
37 static int my_isnan(double x) { return x != x; }
38 # define isnan my_isnan
41 /* magic minimum value for rho in MMA ... the 2002 paper says it should
42 be a "fixed, strictly positive `small' number, e.g. 1e-5"
43 ... grrr, I hate these magic numbers, which seem like they
44 should depend on the objective function in some way ... in particular,
45 note that rho is dimensionful (= dimensions of objective function) */
46 #define MMA_RHOMIN 1e-5
48 /***********************************************************************/
49 /* function for MMA's dual solution of the approximate problem */
52 int count; /* evaluation count, incremented each call */
53 unsigned n; /* must be set on input to dimension of x */
54 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
55 const double *dfcdx; /* m-by-n array of fc gradients */
56 double fval, rho; /* must be set on input */
57 const double *fcval, *rhoc; /* arrays of length m */
58 double *xcur; /* array of length n, output each time */
59 double gval, wval, *gcval; /* output each time (array length m) */
62 static double sqr(double x) { return x * x; }
64 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
66 dual_data *d = (dual_data *) d_;
68 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
70 const double *dfcdx = d->dfcdx;
71 double rho = d->rho, fval = d->fval;
72 const double *rhoc = d->rhoc, *fcval = d->fcval;
73 double *xcur = d->xcur;
74 double *gcval = d->gcval;
82 for (i = 0; i < m; ++i)
83 val += y[i] * (gcval[i] = isnan(fcval[i]) ? 0 : fcval[i]);
85 for (j = 0; j < n; ++j) {
86 double u, v, dx, denominv, c, sigma2, dx2;
88 /* first, compute xcur[j] for y. Because this objective is
89 separable, we can minimize over x analytically, and the minimum
90 dx is given by the solution of a quadratic equation:
91 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
92 where u and v are defined by the sums below. Because of
93 the definitions, it is guaranteed that |u/v| <= sigma,
94 and it follows that the only dx solution with |dx| <= sigma
96 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
97 (which goes to zero as u -> 0). */
100 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
101 for (i = 0; i < m; ++i) if (!isnan(fcval[i])) {
102 u += dfcdx[i*n + j] * y[i];
103 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
105 u *= (sigma2 = sqr(sigma[j]));
106 if (fabs(u) < 1e-3 * (v*sigma[j])) { /* Taylor exp. for small u */
107 double a = u / (v*sigma[j]);
108 dx = -sigma[j] * (0.5 * a + 0.125 * a*a*a);
111 dx = (v/u)*sigma2 * (-1 + sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
113 if (xcur[j] > ub[j]) xcur[j] = ub[j];
114 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
115 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
116 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
119 /* function value: */
121 denominv = 1.0 / (sigma2 - dx2);
122 val += (u * dx + v * dx2) * denominv;
124 /* update gval, wval, gcval (approximant functions) */
126 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
128 d->wval += 0.5 * dx2 * denominv;
129 for (i = 0; i < m; ++i) if (!isnan(fcval[i]))
130 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
131 + 0.5*rhoc[i]) * dx2)
135 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
136 we only need the partial derivative with respect to y, and
137 we negate because we are maximizing: */
138 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
142 /***********************************************************************/
144 /* note that we implement a hidden feature not in the standard
145 nlopt_minimize_constrained interface: whenever the constraint
146 function returns NaN, that constraint becomes inactive. */
148 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
149 unsigned m, nlopt_constraint *fc,
150 const double *lb, const double *ub, /* bounds */
151 double *x, /* in: initial guess, out: minimizer */
153 nlopt_stopping *stop,
156 nlopt_result ret = NLOPT_SUCCESS;
157 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
158 double *dfcdx, *dfcdx_cur;
159 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
160 unsigned i, j, k = 0;
163 double infeasibility;
165 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
166 if (!sigma) return NLOPT_OUT_OF_MEMORY;
171 xprevprev = xprev + n;
172 fcval = xprevprev + n;
173 fcval_cur = fcval + m;
174 rhoc = fcval_cur + m;
177 dual_ub = dual_lb + m;
180 dfcdx_cur = dfcdx + m*n;
194 for (j = 0; j < n; ++j) {
195 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
196 sigma[j] = 1.0; /* arbitrary default */
198 sigma[j] = 0.5 * (ub[j] - lb[j]);
201 for (i = 0; i < m; ++i) {
203 dual_lb[i] = y[i] = 0.0;
204 dual_ub[i] = HUGE_VAL;
207 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
209 memcpy(xcur, x, sizeof(double) * n);
211 feasible = 1; infeasibility = 0;
212 for (i = 0; i < m; ++i) {
213 fcval[i] = fc[i].f(n, x, dfcdx + i*n, fc[i].f_data);
214 feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
215 if (fcval[i] > infeasibility) infeasibility = fcval[i];
217 /* For non-feasible initial points, set a finite (large)
218 upper-bound on the dual variables. What this means is that,
219 if no feasible solution is found from the dual problem, it
220 will minimize the dual objective with the unfeasible
221 constraint weighted by 1e40 -- basically, minimizing the
222 unfeasible constraint until it becomes feasible or until we at
223 least obtain a step towards a feasible point.
225 Svanberg suggested a different approach in his 1987 paper, basically
226 introducing additional penalty variables for unfeasible constraints,
227 but this is easier to implement and at least as efficient. */
229 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
231 nlopt_set_min_objective(dual_opt, dual_func, &dd);
232 nlopt_set_lower_bounds(dual_opt, dual_lb);
233 nlopt_set_upper_bounds(dual_opt, dual_ub);
234 nlopt_set_stopval(dual_opt, -HUGE_VAL);
236 while (1) { /* outer iterations */
238 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCE_STOP;
239 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
240 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
241 else if (feasible && *minf < stop->minf_max)
242 ret = NLOPT_MINF_MAX_REACHED;
243 if (ret != NLOPT_SUCCESS) goto done;
244 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
245 memcpy(xprev, xcur, sizeof(double) * n);
247 while (1) { /* inner iterations */
248 double min_dual, infeasibility_cur;
249 int feasible_cur, inner_done;
250 unsigned save_verbose;
251 int new_infeasible_constraint;
254 /* solve dual problem */
255 dd.rho = rho; dd.count = 0;
256 save_verbose = mma_verbose;
257 mma_verbose = 0; /* no recursive verbosity */
258 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
260 stop->maxtime - (nlopt_seconds()
262 mma_verbose = save_verbose;
263 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
268 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
270 printf("MMA dual converged in %d iterations to g=%g:\n",
272 for (i = 0; i < MIN(mma_verbose, m); ++i)
273 printf(" MMA y[%d]=%g, gc[%d]=%g\n",
274 i, y[i], i, dd.gcval[i]);
277 fcur = f(n, xcur, dfdx_cur, f_data);
279 feasible_cur = 1; infeasibility_cur = 0;
280 new_infeasible_constraint = 0;
281 inner_done = dd.gval >= fcur;
282 for (i = 0; i < m; ++i) {
283 fcval_cur[i] = fc[i].f(n, xcur, dfcdx_cur + i*n,
285 if (!isnan(fcval_cur[i])) {
286 feasible_cur = feasible_cur
287 && (fcval_cur[i] <= fc[i].tol);
288 if (!isnan(fcval[i]))
289 inner_done = inner_done &&
290 (dd.gcval[i] >= fcval_cur[i]);
291 else if (fcval_cur[i] > 0)
292 new_infeasible_constraint = 1;
293 if (fcval_cur[i] > infeasibility_cur)
294 infeasibility_cur = fcval_cur[i];
298 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
299 || (!feasible && infeasibility_cur < infeasibility)) {
300 if (mma_verbose && !feasible_cur)
301 printf("MMA - using infeasible point?\n");
302 dd.fval = *minf = fcur;
303 infeasibility = infeasibility_cur;
304 memcpy(fcval, fcval_cur, sizeof(double)*m);
305 memcpy(x, xcur, sizeof(double)*n);
306 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
307 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
309 /* once we have reached a feasible solution, the
310 algorithm should never make the solution infeasible
311 again (if inner_done), although the constraints may
312 be violated slightly by rounding errors etc. so we
313 must be a little careful about checking feasibility */
314 if (infeasibility_cur == 0) {
315 if (!feasible) /* reset upper bounds to infinity */
316 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
319 else if (new_infeasible_constraint) feasible = 0;
322 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCE_STOP;
323 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
324 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
325 else if (feasible && *minf < stop->minf_max)
326 ret = NLOPT_MINF_MAX_REACHED;
327 if (ret != NLOPT_SUCCESS) goto done;
329 if (inner_done) break;
332 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
333 for (i = 0; i < m; ++i)
334 if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
337 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
341 printf("MMA inner iteration: rho -> %g\n", rho);
342 for (i = 0; i < MIN(mma_verbose, m); ++i)
343 printf(" MMA rhoc[%d] -> %g\n", i,rhoc[i]);
346 if (nlopt_stop_ftol(stop, fcur, fprev))
347 ret = NLOPT_FTOL_REACHED;
348 if (nlopt_stop_x(stop, xcur, xprev))
349 ret = NLOPT_XTOL_REACHED;
350 if (ret != NLOPT_SUCCESS) goto done;
352 /* update rho and sigma for iteration k+1 */
353 rho = MAX(0.1 * rho, MMA_RHOMIN);
355 printf("MMA outer iteration: rho -> %g\n", rho);
356 for (i = 0; i < m; ++i)
357 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
358 for (i = 0; i < MIN(mma_verbose, m); ++i)
359 printf(" MMA rhoc[%d] -> %g\n", i, rhoc[i]);
361 for (j = 0; j < n; ++j) {
362 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
363 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
365 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
366 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
367 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
370 for (j = 0; j < MIN(mma_verbose, n); ++j)
371 printf(" MMA sigma[%d] -> %g\n",