1 /* Copyright (c) 2007-2014 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
36 /* magic minimum value for rho in MMA ... the 2002 paper says it should
37 be a "fixed, strictly positive `small' number, e.g. 1e-5"
38 ... grrr, I hate these magic numbers, which seem like they
39 should depend on the objective function in some way ... in particular,
40 note that rho is dimensionful (= dimensions of objective function) */
41 #define MMA_RHOMIN 1e-5
43 /***********************************************************************/
44 /* function for MMA's dual solution of the approximate problem */
47 int count; /* evaluation count, incremented each call */
48 unsigned n; /* must be set on input to dimension of x */
49 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
50 const double *dfcdx; /* m-by-n array of fc gradients */
51 double fval, rho; /* must be set on input */
52 const double *fcval, *rhoc; /* arrays of length m */
53 double *xcur; /* array of length n, output each time */
54 double gval, wval, *gcval; /* output each time (array length m) */
57 static double sqr(double x) { return x * x; }
59 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
61 dual_data *d = (dual_data *) d_;
63 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
65 const double *dfcdx = d->dfcdx;
66 double rho = d->rho, fval = d->fval;
67 const double *rhoc = d->rhoc, *fcval = d->fcval;
68 double *xcur = d->xcur;
69 double *gcval = d->gcval;
77 for (i = 0; i < m; ++i)
78 val += y[i] * (gcval[i] = nlopt_isnan(fcval[i]) ? 0 : fcval[i]);
80 for (j = 0; j < n; ++j) {
81 double u, v, dx, denominv, c, sigma2, dx2;
83 /* first, compute xcur[j] for y. Because this objective is
84 separable, we can minimize over x analytically, and the minimum
85 dx is given by the solution of a quadratic equation:
86 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
87 where u and v are defined by the sums below. Because of
88 the definitions, it is guaranteed that |u/v| <= sigma,
89 and it follows that the only dx solution with |dx| <= sigma
91 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
92 = (u/v) / (-1 - sqrt(1 - (u / v sigma)^2))
93 (which goes to zero as u -> 0). The latter expression
94 is less susceptible to roundoff error. */
96 if (sigma[j] == 0) { /* special case for lb[i] == ub[i] dims, dx=0 */
102 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
103 for (i = 0; i < m; ++i) if (!nlopt_isnan(fcval[i])) {
104 u += dfcdx[i*n + j] * y[i];
105 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
107 u *= (sigma2 = sqr(sigma[j]));
108 dx = (u/v) / (-1 - sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
110 if (xcur[j] > ub[j]) xcur[j] = ub[j];
111 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
112 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
113 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
116 /* function value: */
118 denominv = 1.0 / (sigma2 - dx2);
119 val += (u * dx + v * dx2) * denominv;
121 /* update gval, wval, gcval (approximant functions) */
123 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
125 d->wval += 0.5 * dx2 * denominv;
126 for (i = 0; i < m; ++i) if (!nlopt_isnan(fcval[i]))
127 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
128 + 0.5*rhoc[i]) * dx2)
132 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
133 we only need the partial derivative with respect to y, and
134 we negate because we are maximizing: */
135 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
139 /***********************************************************************/
141 /* note that we implement a hidden feature not in the standard
142 nlopt_minimize_constrained interface: whenever the constraint
143 function returns NaN, that constraint becomes inactive. */
145 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
146 unsigned m, nlopt_constraint *fc,
147 const double *lb, const double *ub, /* bounds */
148 double *x, /* in: initial guess, out: minimizer */
150 nlopt_stopping *stop,
153 nlopt_result ret = NLOPT_SUCCESS;
154 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
155 double *dfcdx, *dfcdx_cur;
156 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
157 unsigned i, ifc, j, k = 0;
160 double infeasibility;
163 m = nlopt_count_constraints(mfc = m, fc);
164 if (nlopt_get_dimension(dual_opt) != m) {
165 nlopt_stop_msg(stop, "dual optimizer has wrong dimension %d != %d",
166 nlopt_get_dimension(dual_opt), m);
167 return NLOPT_INVALID_ARGS;
169 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
170 if (!sigma) return NLOPT_OUT_OF_MEMORY;
175 xprevprev = xprev + n;
176 fcval = xprevprev + n;
177 fcval_cur = fcval + m;
178 rhoc = fcval_cur + m;
181 dual_ub = dual_lb + m;
184 dfcdx_cur = dfcdx + m*n;
198 for (j = 0; j < n; ++j) {
199 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
200 sigma[j] = 1.0; /* arbitrary default */
202 sigma[j] = 0.5 * (ub[j] - lb[j]);
205 for (i = 0; i < m; ++i) {
207 dual_lb[i] = y[i] = 0.0;
208 dual_ub[i] = HUGE_VAL;
211 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
213 memcpy(xcur, x, sizeof(double) * n);
214 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
216 feasible = 1; infeasibility = 0;
217 for (i = ifc = 0; ifc < mfc; ++ifc) {
218 nlopt_eval_constraint(fcval + i, dfcdx + i*n,
221 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
223 for (i = 0; i < m; ++i) {
224 feasible = feasible && (fcval[i] <= 0 || nlopt_isnan(fcval[i]));
225 if (fcval[i] > infeasibility) infeasibility = fcval[i];
227 /* For non-feasible initial points, set a finite (large)
228 upper-bound on the dual variables. What this means is that,
229 if no feasible solution is found from the dual problem, it
230 will minimize the dual objective with the unfeasible
231 constraint weighted by 1e40 -- basically, minimizing the
232 unfeasible constraint until it becomes feasible or until we at
233 least obtain a step towards a feasible point.
235 Svanberg suggested a different approach in his 1987 paper, basically
236 introducing additional penalty variables for unfeasible constraints,
237 but this is easier to implement and at least as efficient. */
239 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
241 nlopt_set_min_objective(dual_opt, dual_func, &dd);
242 nlopt_set_lower_bounds(dual_opt, dual_lb);
243 nlopt_set_upper_bounds(dual_opt, dual_ub);
244 nlopt_set_stopval(dual_opt, -HUGE_VAL);
245 nlopt_remove_inequality_constraints(dual_opt);
246 nlopt_remove_equality_constraints(dual_opt);
248 while (1) { /* outer iterations */
250 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
251 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
252 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
253 else if (feasible && *minf < stop->minf_max)
254 ret = NLOPT_MINF_MAX_REACHED;
255 if (ret != NLOPT_SUCCESS) goto done;
256 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
257 memcpy(xprev, xcur, sizeof(double) * n);
259 while (1) { /* inner iterations */
260 double min_dual, infeasibility_cur;
261 int feasible_cur, inner_done;
262 unsigned save_verbose;
263 int new_infeasible_constraint;
266 /* solve dual problem */
267 dd.rho = rho; dd.count = 0;
268 save_verbose = mma_verbose;
269 mma_verbose = 0; /* no recursive verbosity */
270 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
272 stop->maxtime - (nlopt_seconds()
274 mma_verbose = save_verbose;
275 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
280 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
282 printf("MMA dual converged in %d iterations to g=%g:\n",
284 for (i = 0; i < MIN(mma_verbose, m); ++i)
285 printf(" MMA y[%u]=%g, gc[%u]=%g\n",
286 i, y[i], i, dd.gcval[i]);
289 fcur = f(n, xcur, dfdx_cur, f_data);
291 if (nlopt_stop_forced(stop)) {
292 ret = NLOPT_FORCED_STOP; goto done; }
293 feasible_cur = 1; infeasibility_cur = 0;
294 new_infeasible_constraint = 0;
295 inner_done = dd.gval >= fcur;
296 for (i = ifc = 0; ifc < mfc; ++ifc) {
297 nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
300 if (nlopt_stop_forced(stop)) {
301 ret = NLOPT_FORCED_STOP; goto done; }
303 for (i = ifc = 0; ifc < mfc; ++ifc) {
304 unsigned i0 = i, inext = i + fc[ifc].m;
305 for (; i < inext; ++i)
306 if (!nlopt_isnan(fcval_cur[i])) {
307 feasible_cur = feasible_cur
308 && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
309 if (!nlopt_isnan(fcval[i]))
310 inner_done = inner_done &&
311 (dd.gcval[i] >= fcval_cur[i]);
312 else if (fcval_cur[i] > 0)
313 new_infeasible_constraint = 1;
314 if (fcval_cur[i] > infeasibility_cur)
315 infeasibility_cur = fcval_cur[i];
319 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
320 || (!feasible && infeasibility_cur < infeasibility)) {
321 if (mma_verbose && !feasible_cur)
322 printf("MMA - using infeasible point?\n");
323 dd.fval = *minf = fcur;
324 infeasibility = infeasibility_cur;
325 memcpy(fcval, fcval_cur, sizeof(double)*m);
326 memcpy(x, xcur, sizeof(double)*n);
327 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
328 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
330 /* once we have reached a feasible solution, the
331 algorithm should never make the solution infeasible
332 again (if inner_done), although the constraints may
333 be violated slightly by rounding errors etc. so we
334 must be a little careful about checking feasibility */
335 if (infeasibility_cur == 0) {
336 if (!feasible) { /* reset upper bounds to infin. */
337 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
338 nlopt_set_upper_bounds(dual_opt, dual_ub);
342 else if (new_infeasible_constraint) feasible = 0;
345 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
346 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
347 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
348 else if (feasible && *minf < stop->minf_max)
349 ret = NLOPT_MINF_MAX_REACHED;
350 if (ret != NLOPT_SUCCESS) goto done;
352 if (inner_done) break;
355 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
356 for (i = 0; i < m; ++i)
357 if (!nlopt_isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
360 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
364 printf("MMA inner iteration: rho -> %g\n", rho);
365 for (i = 0; i < MIN(mma_verbose, m); ++i)
366 printf(" MMA rhoc[%u] -> %g\n", i,rhoc[i]);
369 if (nlopt_stop_ftol(stop, fcur, fprev))
370 ret = NLOPT_FTOL_REACHED;
371 if (nlopt_stop_x(stop, xcur, xprev))
372 ret = NLOPT_XTOL_REACHED;
373 if (ret != NLOPT_SUCCESS) goto done;
375 /* update rho and sigma for iteration k+1 */
376 rho = MAX(0.1 * rho, MMA_RHOMIN);
378 printf("MMA outer iteration: rho -> %g\n", rho);
379 for (i = 0; i < m; ++i)
380 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
381 for (i = 0; i < MIN(mma_verbose, m); ++i)
382 printf(" MMA rhoc[%u] -> %g\n", i, rhoc[i]);
384 for (j = 0; j < n; ++j) {
385 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
386 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
388 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
389 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
390 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
393 for (j = 0; j < MIN(mma_verbose, n); ++j)
394 printf(" MMA sigma[%u] -> %g\n",