1 /* Copyright (c) 2007-2010 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
37 static int my_isnan(double x) { return x != x; }
38 # define isnan my_isnan
41 /* magic minimum value for rho in MMA ... the 2002 paper says it should
42 be a "fixed, strictly positive `small' number, e.g. 1e-5"
43 ... grrr, I hate these magic numbers, which seem like they
44 should depend on the objective function in some way ... in particular,
45 note that rho is dimensionful (= dimensions of objective function) */
46 #define MMA_RHOMIN 1e-5
48 /***********************************************************************/
49 /* function for MMA's dual solution of the approximate problem */
52 int count; /* evaluation count, incremented each call */
53 unsigned n; /* must be set on input to dimension of x */
54 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
55 const double *dfcdx; /* m-by-n array of fc gradients */
56 double fval, rho; /* must be set on input */
57 const double *fcval, *rhoc; /* arrays of length m */
58 double *xcur; /* array of length n, output each time */
59 double gval, wval, *gcval; /* output each time (array length m) */
62 static double sqr(double x) { return x * x; }
64 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
66 dual_data *d = (dual_data *) d_;
68 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
70 const double *dfcdx = d->dfcdx;
71 double rho = d->rho, fval = d->fval;
72 const double *rhoc = d->rhoc, *fcval = d->fcval;
73 double *xcur = d->xcur;
74 double *gcval = d->gcval;
82 for (i = 0; i < m; ++i)
83 val += y[i] * (gcval[i] = isnan(fcval[i]) ? 0 : fcval[i]);
85 for (j = 0; j < n; ++j) {
86 double u, v, dx, denominv, c, sigma2, dx2;
88 /* first, compute xcur[j] for y. Because this objective is
89 separable, we can minimize over x analytically, and the minimum
90 dx is given by the solution of a quadratic equation:
91 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
92 where u and v are defined by the sums below. Because of
93 the definitions, it is guaranteed that |u/v| <= sigma,
94 and it follows that the only dx solution with |dx| <= sigma
96 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
97 (which goes to zero as u -> 0). */
100 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
101 for (i = 0; i < m; ++i) if (!isnan(fcval[i])) {
102 u += dfcdx[i*n + j] * y[i];
103 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
105 u *= (sigma2 = sqr(sigma[j]));
106 if (fabs(u) < 1e-3 * (v*sigma[j])) { /* Taylor exp. for small u */
107 double a = u / (v*sigma[j]);
108 dx = -sigma[j] * (0.5 * a + 0.125 * a*a*a);
111 dx = (v/u)*sigma2 * (-1 + sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
113 if (xcur[j] > ub[j]) xcur[j] = ub[j];
114 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
115 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
116 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
119 /* function value: */
121 denominv = 1.0 / (sigma2 - dx2);
122 val += (u * dx + v * dx2) * denominv;
124 /* update gval, wval, gcval (approximant functions) */
126 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
128 d->wval += 0.5 * dx2 * denominv;
129 for (i = 0; i < m; ++i) if (!isnan(fcval[i]))
130 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
131 + 0.5*rhoc[i]) * dx2)
135 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
136 we only need the partial derivative with respect to y, and
137 we negate because we are maximizing: */
138 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
142 /***********************************************************************/
144 /* note that we implement a hidden feature not in the standard
145 nlopt_minimize_constrained interface: whenever the constraint
146 function returns NaN, that constraint becomes inactive. */
148 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
149 unsigned m, nlopt_constraint *fc,
150 const double *lb, const double *ub, /* bounds */
151 double *x, /* in: initial guess, out: minimizer */
153 nlopt_stopping *stop,
156 nlopt_result ret = NLOPT_SUCCESS;
157 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
158 double *dfcdx, *dfcdx_cur;
159 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
160 unsigned i, ifc, j, k = 0;
163 double infeasibility;
166 m = nlopt_count_constraints(mfc = m, fc);
167 if (nlopt_get_dimension(dual_opt) != m) return NLOPT_INVALID_ARGS;
168 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
169 if (!sigma) return NLOPT_OUT_OF_MEMORY;
174 xprevprev = xprev + n;
175 fcval = xprevprev + n;
176 fcval_cur = fcval + m;
177 rhoc = fcval_cur + m;
180 dual_ub = dual_lb + m;
183 dfcdx_cur = dfcdx + m*n;
197 for (j = 0; j < n; ++j) {
198 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
199 sigma[j] = 1.0; /* arbitrary default */
201 sigma[j] = 0.5 * (ub[j] - lb[j]);
204 for (i = 0; i < m; ++i) {
206 dual_lb[i] = y[i] = 0.0;
207 dual_ub[i] = HUGE_VAL;
210 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
212 memcpy(xcur, x, sizeof(double) * n);
213 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
215 feasible = 1; infeasibility = 0;
216 for (i = ifc = 0; ifc < mfc; ++ifc) {
217 nlopt_eval_constraint(fcval + i, dfcdx + i*n,
220 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
222 for (i = 0; i < m; ++i) {
223 feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
224 if (fcval[i] > infeasibility) infeasibility = fcval[i];
226 /* For non-feasible initial points, set a finite (large)
227 upper-bound on the dual variables. What this means is that,
228 if no feasible solution is found from the dual problem, it
229 will minimize the dual objective with the unfeasible
230 constraint weighted by 1e40 -- basically, minimizing the
231 unfeasible constraint until it becomes feasible or until we at
232 least obtain a step towards a feasible point.
234 Svanberg suggested a different approach in his 1987 paper, basically
235 introducing additional penalty variables for unfeasible constraints,
236 but this is easier to implement and at least as efficient. */
238 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
240 nlopt_set_min_objective(dual_opt, dual_func, &dd);
241 nlopt_set_lower_bounds(dual_opt, dual_lb);
242 nlopt_set_upper_bounds(dual_opt, dual_ub);
243 nlopt_set_stopval(dual_opt, -HUGE_VAL);
244 nlopt_remove_inequality_constraints(dual_opt);
245 nlopt_remove_equality_constraints(dual_opt);
247 while (1) { /* outer iterations */
249 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
250 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
251 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
252 else if (feasible && *minf < stop->minf_max)
253 ret = NLOPT_MINF_MAX_REACHED;
254 if (ret != NLOPT_SUCCESS) goto done;
255 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
256 memcpy(xprev, xcur, sizeof(double) * n);
258 while (1) { /* inner iterations */
259 double min_dual, infeasibility_cur;
260 int feasible_cur, inner_done;
261 unsigned save_verbose;
262 int new_infeasible_constraint;
265 /* solve dual problem */
266 dd.rho = rho; dd.count = 0;
267 save_verbose = mma_verbose;
268 mma_verbose = 0; /* no recursive verbosity */
269 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
271 stop->maxtime - (nlopt_seconds()
273 mma_verbose = save_verbose;
274 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
279 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
281 printf("MMA dual converged in %d iterations to g=%g:\n",
283 for (i = 0; i < MIN(mma_verbose, m); ++i)
284 printf(" MMA y[%d]=%g, gc[%d]=%g\n",
285 i, y[i], i, dd.gcval[i]);
288 fcur = f(n, xcur, dfdx_cur, f_data);
290 if (nlopt_stop_forced(stop)) {
291 ret = NLOPT_FORCED_STOP; goto done; }
292 feasible_cur = 1; infeasibility_cur = 0;
293 new_infeasible_constraint = 0;
294 inner_done = dd.gval >= fcur;
295 for (i = ifc = 0; ifc < mfc; ++ifc) {
296 nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
299 if (nlopt_stop_forced(stop)) {
300 ret = NLOPT_FORCED_STOP; goto done; }
302 for (i = ifc = 0; ifc < mfc; ++ifc) {
303 unsigned i0 = i, inext = i + fc[ifc].m;
304 for (; i < inext; ++i)
305 if (!isnan(fcval_cur[i])) {
306 feasible_cur = feasible_cur
307 && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
308 if (!isnan(fcval[i]))
309 inner_done = inner_done &&
310 (dd.gcval[i] >= fcval_cur[i]);
311 else if (fcval_cur[i] > 0)
312 new_infeasible_constraint = 1;
313 if (fcval_cur[i] > infeasibility_cur)
314 infeasibility_cur = fcval_cur[i];
318 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
319 || (!feasible && infeasibility_cur < infeasibility)) {
320 if (mma_verbose && !feasible_cur)
321 printf("MMA - using infeasible point?\n");
322 dd.fval = *minf = fcur;
323 infeasibility = infeasibility_cur;
324 memcpy(fcval, fcval_cur, sizeof(double)*m);
325 memcpy(x, xcur, sizeof(double)*n);
326 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
327 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
329 /* once we have reached a feasible solution, the
330 algorithm should never make the solution infeasible
331 again (if inner_done), although the constraints may
332 be violated slightly by rounding errors etc. so we
333 must be a little careful about checking feasibility */
334 if (infeasibility_cur == 0) {
335 if (!feasible) { /* reset upper bounds to infin. */
336 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
337 nlopt_set_upper_bounds(dual_opt, dual_ub);
341 else if (new_infeasible_constraint) feasible = 0;
344 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
345 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
346 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
347 else if (feasible && *minf < stop->minf_max)
348 ret = NLOPT_MINF_MAX_REACHED;
349 if (ret != NLOPT_SUCCESS) goto done;
351 if (inner_done) break;
354 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
355 for (i = 0; i < m; ++i)
356 if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
359 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
363 printf("MMA inner iteration: rho -> %g\n", rho);
364 for (i = 0; i < MIN(mma_verbose, m); ++i)
365 printf(" MMA rhoc[%d] -> %g\n", i,rhoc[i]);
368 if (nlopt_stop_ftol(stop, fcur, fprev))
369 ret = NLOPT_FTOL_REACHED;
370 if (nlopt_stop_x(stop, xcur, xprev))
371 ret = NLOPT_XTOL_REACHED;
372 if (ret != NLOPT_SUCCESS) goto done;
374 /* update rho and sigma for iteration k+1 */
375 rho = MAX(0.1 * rho, MMA_RHOMIN);
377 printf("MMA outer iteration: rho -> %g\n", rho);
378 for (i = 0; i < m; ++i)
379 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
380 for (i = 0; i < MIN(mma_verbose, m); ++i)
381 printf(" MMA rhoc[%d] -> %g\n", i, rhoc[i]);
383 for (j = 0; j < n; ++j) {
384 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
385 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
387 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
388 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
389 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
392 for (j = 0; j < MIN(mma_verbose, n); ++j)
393 printf(" MMA sigma[%d] -> %g\n",