1 /* Copyright (c) 2007-2014 Massachusetts Institute of Technology
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "nlopt-util.h"
31 unsigned mma_verbose = 0; /* > 0 for verbose output */
33 #define MIN(a,b) ((a) < (b) ? (a) : (b))
34 #define MAX(a,b) ((a) > (b) ? (a) : (b))
37 static int my_isnan(double x) { return x != x; }
38 # define isnan my_isnan
41 /* magic minimum value for rho in MMA ... the 2002 paper says it should
42 be a "fixed, strictly positive `small' number, e.g. 1e-5"
43 ... grrr, I hate these magic numbers, which seem like they
44 should depend on the objective function in some way ... in particular,
45 note that rho is dimensionful (= dimensions of objective function) */
46 #define MMA_RHOMIN 1e-5
48 /***********************************************************************/
49 /* function for MMA's dual solution of the approximate problem */
52 int count; /* evaluation count, incremented each call */
53 unsigned n; /* must be set on input to dimension of x */
54 const double *x, *lb, *ub, *sigma, *dfdx; /* arrays of length n */
55 const double *dfcdx; /* m-by-n array of fc gradients */
56 double fval, rho; /* must be set on input */
57 const double *fcval, *rhoc; /* arrays of length m */
58 double *xcur; /* array of length n, output each time */
59 double gval, wval, *gcval; /* output each time (array length m) */
62 static double sqr(double x) { return x * x; }
64 static double dual_func(unsigned m, const double *y, double *grad, void *d_)
66 dual_data *d = (dual_data *) d_;
68 const double *x = d->x, *lb = d->lb, *ub = d->ub, *sigma = d->sigma,
70 const double *dfcdx = d->dfcdx;
71 double rho = d->rho, fval = d->fval;
72 const double *rhoc = d->rhoc, *fcval = d->fcval;
73 double *xcur = d->xcur;
74 double *gcval = d->gcval;
82 for (i = 0; i < m; ++i)
83 val += y[i] * (gcval[i] = isnan(fcval[i]) ? 0 : fcval[i]);
85 for (j = 0; j < n; ++j) {
86 double u, v, dx, denominv, c, sigma2, dx2;
88 /* first, compute xcur[j] for y. Because this objective is
89 separable, we can minimize over x analytically, and the minimum
90 dx is given by the solution of a quadratic equation:
91 u dx^2 + 2 v sigma^2 dx + u sigma^2 = 0
92 where u and v are defined by the sums below. Because of
93 the definitions, it is guaranteed that |u/v| <= sigma,
94 and it follows that the only dx solution with |dx| <= sigma
96 (v/u) sigma^2 (-1 + sqrt(1 - (u / v sigma)^2))
97 = (u/v) / (-1 - sqrt(1 - (u / v sigma)^2))
98 (which goes to zero as u -> 0). The latter expression
99 is less susceptible to roundoff error. */
101 if (sigma[j] == 0) { /* special case for lb[i] == ub[i] dims, dx=0 */
107 v = fabs(dfdx[j]) * sigma[j] + 0.5 * rho;
108 for (i = 0; i < m; ++i) if (!isnan(fcval[i])) {
109 u += dfcdx[i*n + j] * y[i];
110 v += (fabs(dfcdx[i*n + j]) * sigma[j] + 0.5 * rhoc[i]) * y[i];
112 u *= (sigma2 = sqr(sigma[j]));
113 dx = (u/v) / (-1 - sqrt(fabs(1 - sqr(u/(v*sigma[j])))));
115 if (xcur[j] > ub[j]) xcur[j] = ub[j];
116 else if (xcur[j] < lb[j]) xcur[j] = lb[j];
117 if (xcur[j] > x[j]+0.9*sigma[j]) xcur[j] = x[j]+0.9*sigma[j];
118 else if (xcur[j] < x[j]-0.9*sigma[j]) xcur[j] = x[j]-0.9*sigma[j];
121 /* function value: */
123 denominv = 1.0 / (sigma2 - dx2);
124 val += (u * dx + v * dx2) * denominv;
126 /* update gval, wval, gcval (approximant functions) */
128 d->gval += (dfdx[j] * c + (fabs(dfdx[j])*sigma[j] + 0.5*rho) * dx2)
130 d->wval += 0.5 * dx2 * denominv;
131 for (i = 0; i < m; ++i) if (!isnan(fcval[i]))
132 gcval[i] += (dfcdx[i*n+j] * c + (fabs(dfcdx[i*n+j])*sigma[j]
133 + 0.5*rhoc[i]) * dx2)
137 /* gradient is easy to compute: since we are at a minimum x (dval/dx=0),
138 we only need the partial derivative with respect to y, and
139 we negate because we are maximizing: */
140 if (grad) for (i = 0; i < m; ++i) grad[i] = -gcval[i];
144 /***********************************************************************/
146 /* note that we implement a hidden feature not in the standard
147 nlopt_minimize_constrained interface: whenever the constraint
148 function returns NaN, that constraint becomes inactive. */
150 nlopt_result mma_minimize(unsigned n, nlopt_func f, void *f_data,
151 unsigned m, nlopt_constraint *fc,
152 const double *lb, const double *ub, /* bounds */
153 double *x, /* in: initial guess, out: minimizer */
155 nlopt_stopping *stop,
158 nlopt_result ret = NLOPT_SUCCESS;
159 double *xcur, rho, *sigma, *dfdx, *dfdx_cur, *xprev, *xprevprev, fcur;
160 double *dfcdx, *dfcdx_cur;
161 double *fcval, *fcval_cur, *rhoc, *gcval, *y, *dual_lb, *dual_ub;
162 unsigned i, ifc, j, k = 0;
165 double infeasibility;
168 m = nlopt_count_constraints(mfc = m, fc);
169 if (nlopt_get_dimension(dual_opt) != m) return NLOPT_INVALID_ARGS;
170 sigma = (double *) malloc(sizeof(double) * (6*n + 2*m*n + m*7));
171 if (!sigma) return NLOPT_OUT_OF_MEMORY;
176 xprevprev = xprev + n;
177 fcval = xprevprev + n;
178 fcval_cur = fcval + m;
179 rhoc = fcval_cur + m;
182 dual_ub = dual_lb + m;
185 dfcdx_cur = dfcdx + m*n;
199 for (j = 0; j < n; ++j) {
200 if (nlopt_isinf(ub[j]) || nlopt_isinf(lb[j]))
201 sigma[j] = 1.0; /* arbitrary default */
203 sigma[j] = 0.5 * (ub[j] - lb[j]);
206 for (i = 0; i < m; ++i) {
208 dual_lb[i] = y[i] = 0.0;
209 dual_ub[i] = HUGE_VAL;
212 dd.fval = fcur = *minf = f(n, x, dfdx, f_data);
214 memcpy(xcur, x, sizeof(double) * n);
215 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
217 feasible = 1; infeasibility = 0;
218 for (i = ifc = 0; ifc < mfc; ++ifc) {
219 nlopt_eval_constraint(fcval + i, dfcdx + i*n,
222 if (nlopt_stop_forced(stop)) { ret = NLOPT_FORCED_STOP; goto done; }
224 for (i = 0; i < m; ++i) {
225 feasible = feasible && (fcval[i] <= 0 || isnan(fcval[i]));
226 if (fcval[i] > infeasibility) infeasibility = fcval[i];
228 /* For non-feasible initial points, set a finite (large)
229 upper-bound on the dual variables. What this means is that,
230 if no feasible solution is found from the dual problem, it
231 will minimize the dual objective with the unfeasible
232 constraint weighted by 1e40 -- basically, minimizing the
233 unfeasible constraint until it becomes feasible or until we at
234 least obtain a step towards a feasible point.
236 Svanberg suggested a different approach in his 1987 paper, basically
237 introducing additional penalty variables for unfeasible constraints,
238 but this is easier to implement and at least as efficient. */
240 for (i = 0; i < m; ++i) dual_ub[i] = 1e40;
242 nlopt_set_min_objective(dual_opt, dual_func, &dd);
243 nlopt_set_lower_bounds(dual_opt, dual_lb);
244 nlopt_set_upper_bounds(dual_opt, dual_ub);
245 nlopt_set_stopval(dual_opt, -HUGE_VAL);
246 nlopt_remove_inequality_constraints(dual_opt);
247 nlopt_remove_equality_constraints(dual_opt);
249 while (1) { /* outer iterations */
251 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
252 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
253 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
254 else if (feasible && *minf < stop->minf_max)
255 ret = NLOPT_MINF_MAX_REACHED;
256 if (ret != NLOPT_SUCCESS) goto done;
257 if (++k > 1) memcpy(xprevprev, xprev, sizeof(double) * n);
258 memcpy(xprev, xcur, sizeof(double) * n);
260 while (1) { /* inner iterations */
261 double min_dual, infeasibility_cur;
262 int feasible_cur, inner_done;
263 unsigned save_verbose;
264 int new_infeasible_constraint;
267 /* solve dual problem */
268 dd.rho = rho; dd.count = 0;
269 save_verbose = mma_verbose;
270 mma_verbose = 0; /* no recursive verbosity */
271 reti = nlopt_optimize_limited(dual_opt, y, &min_dual,
273 stop->maxtime - (nlopt_seconds()
275 mma_verbose = save_verbose;
276 if (reti < 0 || reti == NLOPT_MAXTIME_REACHED) {
281 dual_func(m, y, NULL, &dd); /* evaluate final xcur etc. */
283 printf("MMA dual converged in %d iterations to g=%g:\n",
285 for (i = 0; i < MIN(mma_verbose, m); ++i)
286 printf(" MMA y[%d]=%g, gc[%d]=%g\n",
287 i, y[i], i, dd.gcval[i]);
290 fcur = f(n, xcur, dfdx_cur, f_data);
292 if (nlopt_stop_forced(stop)) {
293 ret = NLOPT_FORCED_STOP; goto done; }
294 feasible_cur = 1; infeasibility_cur = 0;
295 new_infeasible_constraint = 0;
296 inner_done = dd.gval >= fcur;
297 for (i = ifc = 0; ifc < mfc; ++ifc) {
298 nlopt_eval_constraint(fcval_cur + i, dfcdx_cur + i*n,
301 if (nlopt_stop_forced(stop)) {
302 ret = NLOPT_FORCED_STOP; goto done; }
304 for (i = ifc = 0; ifc < mfc; ++ifc) {
305 unsigned i0 = i, inext = i + fc[ifc].m;
306 for (; i < inext; ++i)
307 if (!isnan(fcval_cur[i])) {
308 feasible_cur = feasible_cur
309 && (fcval_cur[i] <= fc[ifc].tol[i-i0]);
310 if (!isnan(fcval[i]))
311 inner_done = inner_done &&
312 (dd.gcval[i] >= fcval_cur[i]);
313 else if (fcval_cur[i] > 0)
314 new_infeasible_constraint = 1;
315 if (fcval_cur[i] > infeasibility_cur)
316 infeasibility_cur = fcval_cur[i];
320 if ((fcur < *minf && (inner_done || feasible_cur || !feasible))
321 || (!feasible && infeasibility_cur < infeasibility)) {
322 if (mma_verbose && !feasible_cur)
323 printf("MMA - using infeasible point?\n");
324 dd.fval = *minf = fcur;
325 infeasibility = infeasibility_cur;
326 memcpy(fcval, fcval_cur, sizeof(double)*m);
327 memcpy(x, xcur, sizeof(double)*n);
328 memcpy(dfdx, dfdx_cur, sizeof(double)*n);
329 memcpy(dfcdx, dfcdx_cur, sizeof(double)*n*m);
331 /* once we have reached a feasible solution, the
332 algorithm should never make the solution infeasible
333 again (if inner_done), although the constraints may
334 be violated slightly by rounding errors etc. so we
335 must be a little careful about checking feasibility */
336 if (infeasibility_cur == 0) {
337 if (!feasible) { /* reset upper bounds to infin. */
338 for (i = 0; i < m; ++i) dual_ub[i] = HUGE_VAL;
339 nlopt_set_upper_bounds(dual_opt, dual_ub);
343 else if (new_infeasible_constraint) feasible = 0;
346 if (nlopt_stop_forced(stop)) ret = NLOPT_FORCED_STOP;
347 else if (nlopt_stop_evals(stop)) ret = NLOPT_MAXEVAL_REACHED;
348 else if (nlopt_stop_time(stop)) ret = NLOPT_MAXTIME_REACHED;
349 else if (feasible && *minf < stop->minf_max)
350 ret = NLOPT_MINF_MAX_REACHED;
351 if (ret != NLOPT_SUCCESS) goto done;
353 if (inner_done) break;
356 rho = MIN(10*rho, 1.1 * (rho + (fcur-dd.gval) / dd.wval));
357 for (i = 0; i < m; ++i)
358 if (!isnan(fcval_cur[i]) && fcval_cur[i] > dd.gcval[i])
361 1.1 * (rhoc[i] + (fcval_cur[i]-dd.gcval[i])
365 printf("MMA inner iteration: rho -> %g\n", rho);
366 for (i = 0; i < MIN(mma_verbose, m); ++i)
367 printf(" MMA rhoc[%d] -> %g\n", i,rhoc[i]);
370 if (nlopt_stop_ftol(stop, fcur, fprev))
371 ret = NLOPT_FTOL_REACHED;
372 if (nlopt_stop_x(stop, xcur, xprev))
373 ret = NLOPT_XTOL_REACHED;
374 if (ret != NLOPT_SUCCESS) goto done;
376 /* update rho and sigma for iteration k+1 */
377 rho = MAX(0.1 * rho, MMA_RHOMIN);
379 printf("MMA outer iteration: rho -> %g\n", rho);
380 for (i = 0; i < m; ++i)
381 rhoc[i] = MAX(0.1 * rhoc[i], MMA_RHOMIN);
382 for (i = 0; i < MIN(mma_verbose, m); ++i)
383 printf(" MMA rhoc[%d] -> %g\n", i, rhoc[i]);
385 for (j = 0; j < n; ++j) {
386 double dx2 = (xcur[j]-xprev[j]) * (xprev[j]-xprevprev[j]);
387 double gam = dx2 < 0 ? 0.7 : (dx2 > 0 ? 1.2 : 1);
389 if (!nlopt_isinf(ub[j]) && !nlopt_isinf(lb[j])) {
390 sigma[j] = MIN(sigma[j], 10*(ub[j]-lb[j]));
391 sigma[j] = MAX(sigma[j], 0.01*(ub[j]-lb[j]));
394 for (j = 0; j < MIN(mma_verbose, n); ++j)
395 printf(" MMA sigma[%d] -> %g\n",