1 /* Copyright (c) 2007-2010 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
37 static int my_isnan(double x) { return x != x; }
38 # define isnan my_isnan
41 /* magic minimum value for rho in MMA ... the 2002 paper says it should
42 be a "fixed, strictly positive `small' number, e.g. 1e-5"
43 ... grrr, I hate these magic numbers, which seem like they
44 should depend on the objective function in some way ... in particular,
45 note that rho is dimensionful (= dimensions of objective function) */
46 #define MMA_RHOMIN 1e-5
48 /***********************************************************************/
49 /* function for MMA's dual solution of the approximate problem */
52 int count; /* evaluation count, incremented each call */
53 unsigned n; /* must be set on input to dimension of x */
54 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
55 const double *dfcdx; /* m-by-n array of fc gradients */
56 double fval, rho; /* must be set on input */
57 const double *fcval, *rhoc; /* arrays of length m */
58 double *xcur; /* array of length n, output each time */
59 double gval, wval, *gcval; /* output each time (array length m) */
62 static double sqr(double x) { return x * x; }
64 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
66 dual_data *d = (dual_data *) d_;
68 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
70 const double *dfcdx = d->dfcdx;
71 double rho = d->rho, fval = d->fval;
72 const double *rhoc = d->rhoc, *fcval = d->fcval;
73 double *xcur = d->xcur;
74 double *gcval = d->gcval;
82 for (i = 0; i < m; ++i)
83 val += y[i] * (gcval[i] = isnan(fcval[i]) ? 0 : fcval[i]);
85 for (j = 0; j < n; ++j) {
86 double u, v, dx, denominv, c, sigma2, dx2;
88 /* first, compute xcur[j] for y. Because this objective is
89 separable, we can minimize over x analytically, and the minimum
90 dx is given by the solution of a quadratic equation:
91 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
92 where u and v are defined by the sums below. Because of
93 the definitions, it is guaranteed that |u/v| <= sigma,
94 and it follows that the only dx solution with |dx| <= sigma
96 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
97 (which goes to zero as u -> 0). */
100 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
101 for (i = 0; i < m; ++i) if (!isnan(fcval[i])) {
102 u += dfcdx[i*n + j] * y[i];
103 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
105 u *= (sigma2 = sqr(sigma[j]));
106 if (fabs(u) < 1e-3 * (v*sigma[j])) { /* Taylor exp. for small u */
107 double a = u / (v*sigma[j]);
108 dx = -sigma[j] * (0.5 * a + 0.125 * a*a*a);
111 dx = (v/u)*sigma2 * (-1 + sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
113 if (xcur[j] > ub[j]) xcur[j] = ub[j];
114 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
115 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
116 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
119 /* function value: */
121 denominv = 1.0 / (sigma2 - dx2);
122 val += (u * dx + v * dx2) * denominv;
124 /* update gval, wval, gcval (approximant functions) */
126 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
128 d->wval += 0.5 * dx2 * denominv;
129 for (i = 0; i < m; ++i) if (!isnan(fcval[i]))
130 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
131 + 0.5*rhoc[i]) * dx2)
135 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
136 we only need the partial derivative with respect to y, and
137 we negate because we are maximizing: */
138 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
142 /***********************************************************************/
144 /* note that we implement a hidden feature not in the standard
145 nlopt_minimize_constrained interface: whenever the constraint
146 function returns NaN, that constraint becomes inactive. */
148 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
149 unsigned m, nlopt_constraint *fc,
150 const double *lb, const double *ub, /* bounds */
151 double *x, /* in: initial guess, out: minimizer */
153 nlopt_stopping *stop,
156 nlopt_result ret = NLOPT_SUCCESS;
157 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
158 double *dfcdx, *dfcdx_cur;
159 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
160 unsigned i, ifc, j, k = 0;
163 double infeasibility;
166 m = nlopt_count_constraints(mfc = m, fc);
167 if (nlopt_get_dimension(dual_opt) != m) return NLOPT_INVALID_ARGS;
168 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
169 if (!sigma) return NLOPT_OUT_OF_MEMORY;
174 xprevprev = xprev + n;
175 fcval = xprevprev + n;
176 fcval_cur = fcval + m;
177 rhoc = fcval_cur + m;
180 dual_ub = dual_lb + m;
183 dfcdx_cur = dfcdx + m*n;
197 for (j = 0; j < n; ++j) {
198 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
199 sigma[j] = 1.0; /* arbitrary default */
201 sigma[j] = 0.5 * (ub[j] - lb[j]);
204 for (i = 0; i < m; ++i) {
206 dual_lb[i] = y[i] = 0.0;
207 dual_ub[i] = HUGE_VAL;
210 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
212 memcpy(xcur, x, sizeof(double) * n);
214 feasible = 1; infeasibility = 0;
215 for (i = ifc = 0; ifc < mfc; ++ifc) {
216 nlopt_eval_constraint(fcval + i, dfcdx + i*n,
220 for (i = 0; i < m; ++i) {
221 feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
222 if (fcval[i] > infeasibility) infeasibility = fcval[i];
224 /* For non-feasible initial points, set a finite (large)
225 upper-bound on the dual variables. What this means is that,
226 if no feasible solution is found from the dual problem, it
227 will minimize the dual objective with the unfeasible
228 constraint weighted by 1e40 -- basically, minimizing the
229 unfeasible constraint until it becomes feasible or until we at
230 least obtain a step towards a feasible point.
232 Svanberg suggested a different approach in his 1987 paper, basically
233 introducing additional penalty variables for unfeasible constraints,
234 but this is easier to implement and at least as efficient. */
236 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
238 nlopt_set_min_objective(dual_opt, dual_func, &dd);
239 nlopt_set_lower_bounds(dual_opt, dual_lb);
240 nlopt_set_upper_bounds(dual_opt, dual_ub);
241 nlopt_set_stopval(dual_opt, -HUGE_VAL);
242 nlopt_remove_inequality_constraints(dual_opt);
243 nlopt_remove_equality_constraints(dual_opt);
245 while (1) { /* outer iterations */
247 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
248 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
249 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
250 else if (feasible && *minf < stop->minf_max)
251 ret = NLOPT_MINF_MAX_REACHED;
252 if (ret != NLOPT_SUCCESS) goto done;
253 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
254 memcpy(xprev, xcur, sizeof(double) * n);
256 while (1) { /* inner iterations */
257 double min_dual, infeasibility_cur;
258 int feasible_cur, inner_done;
259 unsigned save_verbose;
260 int new_infeasible_constraint;
263 /* solve dual problem */
264 dd.rho = rho; dd.count = 0;
265 save_verbose = mma_verbose;
266 mma_verbose = 0; /* no recursive verbosity */
267 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
269 stop->maxtime - (nlopt_seconds()
271 mma_verbose = save_verbose;
272 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
277 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
279 printf("MMA dual converged in %d iterations to g=%g:\n",
281 for (i = 0; i < MIN(mma_verbose, m); ++i)
282 printf(" MMA y[%d]=%g, gc[%d]=%g\n",
283 i, y[i], i, dd.gcval[i]);
286 fcur = f(n, xcur, dfdx_cur, f_data);
288 feasible_cur = 1; infeasibility_cur = 0;
289 new_infeasible_constraint = 0;
290 inner_done = dd.gval >= fcur;
291 for (i = ifc = 0; ifc < mfc; ++ifc) {
292 nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
296 for (i = ifc = 0; ifc < mfc; ++ifc) {
297 unsigned i0 = i, inext = i + fc[ifc].m;
298 for (; i < inext; ++i)
299 if (!isnan(fcval_cur[i])) {
300 feasible_cur = feasible_cur
301 && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
302 if (!isnan(fcval[i]))
303 inner_done = inner_done &&
304 (dd.gcval[i] >= fcval_cur[i]);
305 else if (fcval_cur[i] > 0)
306 new_infeasible_constraint = 1;
307 if (fcval_cur[i] > infeasibility_cur)
308 infeasibility_cur = fcval_cur[i];
312 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
313 || (!feasible && infeasibility_cur < infeasibility)) {
314 if (mma_verbose && !feasible_cur)
315 printf("MMA - using infeasible point?\n");
316 dd.fval = *minf = fcur;
317 infeasibility = infeasibility_cur;
318 memcpy(fcval, fcval_cur, sizeof(double)*m);
319 memcpy(x, xcur, sizeof(double)*n);
320 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
321 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
323 /* once we have reached a feasible solution, the
324 algorithm should never make the solution infeasible
325 again (if inner_done), although the constraints may
326 be violated slightly by rounding errors etc. so we
327 must be a little careful about checking feasibility */
328 if (infeasibility_cur == 0) {
329 if (!feasible) { /* reset upper bounds to infin. */
330 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
331 nlopt_set_upper_bounds(dual_opt, dual_ub);
335 else if (new_infeasible_constraint) feasible = 0;
338 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
339 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
340 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
341 else if (feasible && *minf < stop->minf_max)
342 ret = NLOPT_MINF_MAX_REACHED;
343 if (ret != NLOPT_SUCCESS) goto done;
345 if (inner_done) break;
348 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
349 for (i = 0; i < m; ++i)
350 if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
353 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
357 printf("MMA inner iteration: rho -> %g\n", rho);
358 for (i = 0; i < MIN(mma_verbose, m); ++i)
359 printf(" MMA rhoc[%d] -> %g\n", i,rhoc[i]);
362 if (nlopt_stop_ftol(stop, fcur, fprev))
363 ret = NLOPT_FTOL_REACHED;
364 if (nlopt_stop_x(stop, xcur, xprev))
365 ret = NLOPT_XTOL_REACHED;
366 if (ret != NLOPT_SUCCESS) goto done;
368 /* update rho and sigma for iteration k+1 */
369 rho = MAX(0.1 * rho, MMA_RHOMIN);
371 printf("MMA outer iteration: rho -> %g\n", rho);
372 for (i = 0; i < m; ++i)
373 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
374 for (i = 0; i < MIN(mma_verbose, m); ++i)
375 printf(" MMA rhoc[%d] -> %g\n", i, rhoc[i]);
377 for (j = 0; j < n; ++j) {
378 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
379 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
381 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
382 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
383 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
386 for (j = 0; j < MIN(mma_verbose, n); ++j)
387 printf(" MMA sigma[%d] -> %g\n",