1 /* Copyright (c) 2007-2010 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
37 static int my_isnan(double x) { return x != x; }
38 # define isnan my_isnan
41 /* magic minimum value for rho in MMA ... the 2002 paper says it should
42 be a "fixed, strictly positive `small' number, e.g. 1e-5"
43 ... grrr, I hate these magic numbers, which seem like they
44 should depend on the objective function in some way ... in particular,
45 note that rho is dimensionful (= dimensions of objective function) */
46 #define MMA_RHOMIN 1e-5
48 /***********************************************************************/
49 /* function for MMA's dual solution of the approximate problem */
52 int count; /* evaluation count, incremented each call */
53 unsigned n; /* must be set on input to dimension of x */
54 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
55 const double *dfcdx; /* m-by-n array of fc gradients */
56 double fval, rho; /* must be set on input */
57 const double *fcval, *rhoc; /* arrays of length m */
58 double *xcur; /* array of length n, output each time */
59 double gval, wval, *gcval; /* output each time (array length m) */
62 static double sqr(double x) { return x * x; }
64 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
66 dual_data *d = (dual_data *) d_;
68 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
70 const double *dfcdx = d->dfcdx;
71 double rho = d->rho, fval = d->fval;
72 const double *rhoc = d->rhoc, *fcval = d->fcval;
73 double *xcur = d->xcur;
74 double *gcval = d->gcval;
82 for (i = 0; i < m; ++i)
83 val += y[i] * (gcval[i] = isnan(fcval[i]) ? 0 : fcval[i]);
85 for (j = 0; j < n; ++j) {
86 double u, v, dx, denominv, c, sigma2, dx2;
88 /* first, compute xcur[j] for y. Because this objective is
89 separable, we can minimize over x analytically, and the minimum
90 dx is given by the solution of a quadratic equation:
91 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
92 where u and v are defined by the sums below. Because of
93 the definitions, it is guaranteed that |u/v| <= sigma,
94 and it follows that the only dx solution with |dx| <= sigma
96 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
97 (which goes to zero as u -> 0). */
99 if (sigma[j] == 0) { /* special case for lb[i] == ub[i] dims, dx=0 */
105 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
106 for (i = 0; i < m; ++i) if (!isnan(fcval[i])) {
107 u += dfcdx[i*n + j] * y[i];
108 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
110 u *= (sigma2 = sqr(sigma[j]));
111 if (fabs(u) < 1e-3 * (v*sigma[j])) { /* Taylor exp. for small u */
112 double a = u / (v*sigma[j]);
113 dx = -sigma[j] * (0.5 * a + 0.125 * a*a*a);
116 dx = (v/u)*sigma2 * (-1 + sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
118 if (xcur[j] > ub[j]) xcur[j] = ub[j];
119 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
120 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
121 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
124 /* function value: */
126 denominv = 1.0 / (sigma2 - dx2);
127 val += (u * dx + v * dx2) * denominv;
129 /* update gval, wval, gcval (approximant functions) */
131 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
133 d->wval += 0.5 * dx2 * denominv;
134 for (i = 0; i < m; ++i) if (!isnan(fcval[i]))
135 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
136 + 0.5*rhoc[i]) * dx2)
140 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
141 we only need the partial derivative with respect to y, and
142 we negate because we are maximizing: */
143 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
147 /***********************************************************************/
149 /* note that we implement a hidden feature not in the standard
150 nlopt_minimize_constrained interface: whenever the constraint
151 function returns NaN, that constraint becomes inactive. */
153 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
154 unsigned m, nlopt_constraint *fc,
155 const double *lb, const double *ub, /* bounds */
156 double *x, /* in: initial guess, out: minimizer */
158 nlopt_stopping *stop,
161 nlopt_result ret = NLOPT_SUCCESS;
162 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
163 double *dfcdx, *dfcdx_cur;
164 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
165 unsigned i, ifc, j, k = 0;
168 double infeasibility;
171 m = nlopt_count_constraints(mfc = m, fc);
172 if (nlopt_get_dimension(dual_opt) != m) return NLOPT_INVALID_ARGS;
173 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
174 if (!sigma) return NLOPT_OUT_OF_MEMORY;
179 xprevprev = xprev + n;
180 fcval = xprevprev + n;
181 fcval_cur = fcval + m;
182 rhoc = fcval_cur + m;
185 dual_ub = dual_lb + m;
188 dfcdx_cur = dfcdx + m*n;
202 for (j = 0; j < n; ++j) {
203 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
204 sigma[j] = 1.0; /* arbitrary default */
206 sigma[j] = 0.5 * (ub[j] - lb[j]);
209 for (i = 0; i < m; ++i) {
211 dual_lb[i] = y[i] = 0.0;
212 dual_ub[i] = HUGE_VAL;
215 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
217 memcpy(xcur, x, sizeof(double) * n);
218 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
220 feasible = 1; infeasibility = 0;
221 for (i = ifc = 0; ifc < mfc; ++ifc) {
222 nlopt_eval_constraint(fcval + i, dfcdx + i*n,
225 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
227 for (i = 0; i < m; ++i) {
228 feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
229 if (fcval[i] > infeasibility) infeasibility = fcval[i];
231 /* For non-feasible initial points, set a finite (large)
232 upper-bound on the dual variables. What this means is that,
233 if no feasible solution is found from the dual problem, it
234 will minimize the dual objective with the unfeasible
235 constraint weighted by 1e40 -- basically, minimizing the
236 unfeasible constraint until it becomes feasible or until we at
237 least obtain a step towards a feasible point.
239 Svanberg suggested a different approach in his 1987 paper, basically
240 introducing additional penalty variables for unfeasible constraints,
241 but this is easier to implement and at least as efficient. */
243 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
245 nlopt_set_min_objective(dual_opt, dual_func, &dd);
246 nlopt_set_lower_bounds(dual_opt, dual_lb);
247 nlopt_set_upper_bounds(dual_opt, dual_ub);
248 nlopt_set_stopval(dual_opt, -HUGE_VAL);
249 nlopt_remove_inequality_constraints(dual_opt);
250 nlopt_remove_equality_constraints(dual_opt);
252 while (1) { /* outer iterations */
254 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
255 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
256 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
257 else if (feasible && *minf < stop->minf_max)
258 ret = NLOPT_MINF_MAX_REACHED;
259 if (ret != NLOPT_SUCCESS) goto done;
260 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
261 memcpy(xprev, xcur, sizeof(double) * n);
263 while (1) { /* inner iterations */
264 double min_dual, infeasibility_cur;
265 int feasible_cur, inner_done;
266 unsigned save_verbose;
267 int new_infeasible_constraint;
270 /* solve dual problem */
271 dd.rho = rho; dd.count = 0;
272 save_verbose = mma_verbose;
273 mma_verbose = 0; /* no recursive verbosity */
274 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
276 stop->maxtime - (nlopt_seconds()
278 mma_verbose = save_verbose;
279 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
284 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
286 printf("MMA dual converged in %d iterations to g=%g:\n",
288 for (i = 0; i < MIN(mma_verbose, m); ++i)
289 printf(" MMA y[%d]=%g, gc[%d]=%g\n",
290 i, y[i], i, dd.gcval[i]);
293 fcur = f(n, xcur, dfdx_cur, f_data);
295 if (nlopt_stop_forced(stop)) {
296 ret = NLOPT_FORCED_STOP; goto done; }
297 feasible_cur = 1; infeasibility_cur = 0;
298 new_infeasible_constraint = 0;
299 inner_done = dd.gval >= fcur;
300 for (i = ifc = 0; ifc < mfc; ++ifc) {
301 nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
304 if (nlopt_stop_forced(stop)) {
305 ret = NLOPT_FORCED_STOP; goto done; }
307 for (i = ifc = 0; ifc < mfc; ++ifc) {
308 unsigned i0 = i, inext = i + fc[ifc].m;
309 for (; i < inext; ++i)
310 if (!isnan(fcval_cur[i])) {
311 feasible_cur = feasible_cur
312 && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
313 if (!isnan(fcval[i]))
314 inner_done = inner_done &&
315 (dd.gcval[i] >= fcval_cur[i]);
316 else if (fcval_cur[i] > 0)
317 new_infeasible_constraint = 1;
318 if (fcval_cur[i] > infeasibility_cur)
319 infeasibility_cur = fcval_cur[i];
323 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
324 || (!feasible && infeasibility_cur < infeasibility)) {
325 if (mma_verbose && !feasible_cur)
326 printf("MMA - using infeasible point?\n");
327 dd.fval = *minf = fcur;
328 infeasibility = infeasibility_cur;
329 memcpy(fcval, fcval_cur, sizeof(double)*m);
330 memcpy(x, xcur, sizeof(double)*n);
331 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
332 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
334 /* once we have reached a feasible solution, the
335 algorithm should never make the solution infeasible
336 again (if inner_done), although the constraints may
337 be violated slightly by rounding errors etc. so we
338 must be a little careful about checking feasibility */
339 if (infeasibility_cur == 0) {
340 if (!feasible) { /* reset upper bounds to infin. */
341 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
342 nlopt_set_upper_bounds(dual_opt, dual_ub);
346 else if (new_infeasible_constraint) feasible = 0;
349 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
350 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
351 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
352 else if (feasible && *minf < stop->minf_max)
353 ret = NLOPT_MINF_MAX_REACHED;
354 if (ret != NLOPT_SUCCESS) goto done;
356 if (inner_done) break;
359 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
360 for (i = 0; i < m; ++i)
361 if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
364 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
368 printf("MMA inner iteration: rho -> %g\n", rho);
369 for (i = 0; i < MIN(mma_verbose, m); ++i)
370 printf(" MMA rhoc[%d] -> %g\n", i,rhoc[i]);
373 if (nlopt_stop_ftol(stop, fcur, fprev))
374 ret = NLOPT_FTOL_REACHED;
375 if (nlopt_stop_x(stop, xcur, xprev))
376 ret = NLOPT_XTOL_REACHED;
377 if (ret != NLOPT_SUCCESS) goto done;
379 /* update rho and sigma for iteration k+1 */
380 rho = MAX(0.1 * rho, MMA_RHOMIN);
382 printf("MMA outer iteration: rho -> %g\n", rho);
383 for (i = 0; i < m; ++i)
384 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
385 for (i = 0; i < MIN(mma_verbose, m); ++i)
386 printf(" MMA rhoc[%d] -> %g\n", i, rhoc[i]);
388 for (j = 0; j < n; ++j) {
389 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
390 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
392 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
393 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
394 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
397 for (j = 0; j < MIN(mma_verbose, n); ++j)
398 printf(" MMA sigma[%d] -> %g\n",