3 * $Id: mdwopt.c,v 1.9 2000/10/08 09:57:31 mdw Exp $
5 * Options parsing, similar to GNU @getopt_long@
7 * (c) 1996 Straylight/Edgeware
10 /*----- Licensing notice --------------------------------------------------*
12 * This file is part of many programs.
14 * `mdwopt' is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU Library General Public License as
16 * published by the Free Software Foundation; either version 2 of the
17 * License, or (at your option) any later version.
19 * `mdwopt' is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Library General Public License for more details.
24 * You should have received a copy of the GNU Library General Public
25 * License along with `mdwopt'; if not, write to the Free
26 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
30 /*----- Revision history --------------------------------------------------*
33 * Revision 1.9 2000/10/08 09:57:31 mdw
34 * Use a copy of @str_qword@ for word splitting, to handle quotes
35 * properly. If building in mLib, use the real thing directly rather than
38 * Revision 1.8 1999/08/19 18:35:27 mdw
39 * Lots of reformatting. Spurious wing comments expunged.
41 * Revision 1.7 1999/06/18 21:59:46 mdw
42 * Fix stupid bug which only read one word from environment variables.
44 * Revision 1.6 1999/05/20 23:00:42 mdw
45 * Little formatting things.
47 * Revision 1.5 1999/05/19 20:23:59 mdw
48 * Change naming to match newer mLib conventions.
50 * Revision 1.4 1999/05/15 10:25:38 mdw
51 * Fix copyright information.
53 * Revision 1.3 1999/05/14 18:51:42 mdw
54 * Reformat the LGPL notice slightly.
56 * Revision 1.2 1999/05/13 22:57:23 mdw
57 * Change `-ise' to `-ize' throughout.
59 * Revision 1.1.1.1 1999/05/05 19:23:47 mdw
60 * New import. The old CVS repository was lost in a disk disaster.
62 * --- Previous lives ---
65 * Revision 1.7 1997/09/11 09:19:11 mdw
66 * (mo__nextWord): Arrrgh. Don't free the environment variable buffer!
67 * People are still using it!
69 * Revision 1.6 1997/09/11 09:05:54 mdw
70 * (mo__nextWord): Fix bug which returns too many words from environment
73 * Revision 1.5 1997/08/09 20:27:59 mdw
74 * Fix spelling of `Licensing'.
76 * Revision 1.4 1997/07/29 21:11:35 mdw
77 * Reformatted. Fixed buffer overflow when dealing with environment
78 * variables. Included NT in list of daft operating systems with `\' as a
79 * path separator. Fixed address of the FSF.
81 * Revision 1.3 1997/02/26 00:41:10 mdw
82 * Added GPL notice to the top. Slight formatting changes.
84 * Revision 1.2 1996/10/28 13:12:13 mdw
85 * Fixed calls to ctype.h routines. Arguments are cast to unsigned char
86 * to avoid invoking undefined behaviour caused by signedness of chars.
88 * Revision 1.1 1996/09/24 18:01:28 mdw
93 /*----- External dependencies ---------------------------------------------*/
102 /*----- Configuration things ----------------------------------------------*/
104 #if defined(__riscos)
106 #elif defined(__OS2__) || defined(__MSDOS__) || defined(__WINNT__)
107 # define PATHSEP '\\'
108 #else /* Assume a sane filing system */
112 /*----- Global variables --------------------------------------------------*/
114 mdwopt_data mdwopt_global = {0, 0, 0, 1, 0, 0, 0, 0, 0};
117 ORD_PERMUTE = 0, /* Permute the options (default) */
118 ORD_RETURN = 1, /* Return non-option things */
119 ORD_POSIX = 2, /* Do POSIX-type hacking */
120 ORD_NEGATE = 4 /* Magic negate-next-thing flag */
123 /*----- Word splitting ----------------------------------------------------*/
127 # define qword str_qword
132 * Arguments: @char **pp@ = address of pointer into string
133 * @unsigned f@ = various flags
135 * Returns: Pointer to the next space-separated possibly-quoted word from
136 * the string, or null.
138 * Use: Fetches the next word from a string. If the flag
139 * @STRF_QUOTE@ is set, the `\' character acts as an escape, and
140 * single and double quotes protect whitespace.
143 #define STRF_QUOTE 1u
145 static char *qword(char **pp, unsigned f)
147 char *p = *pp, *q, *qq;
150 /* --- Preliminaries --- */
154 while (isspace((unsigned char)*p))
161 /* --- Main work --- */
163 for (q = qq = p; *q; q++) {
179 if (isspace((unsigned char)*q)) {
180 do q++; while (*q && isspace((unsigned char)*q));
182 } else if (!(f & STRF_QUOTE))
200 /* --- Finished --- */
210 /*----- Main code ---------------------------------------------------------*/
212 /* --- @nextword@ --- *
214 * Arguments: @int argc@ = number of command line options
215 * @char *argv[]@ = pointer to command line options
216 * @mdwopt_data *data@ = pointer to persistent state
218 * Returns: Pointer to the next word to handle, or 0
220 * Use: Extracts the next word from the command line or environment
224 static char *nextword(int argc, char *const *argv, mdwopt_data *data)
226 if (data->ind == -1) {
228 if ((p = qword(&data->env, STRF_QUOTE)) != 0)
233 if (data->next == argc)
235 return (argv[data->next++]);
238 /* --- @permute@ --- *
240 * Arguments: @char *argv[]@ = pointer to command line arguments
241 * @mdwopt_data *data@ = pointer to persistent data
245 * Use: Moves a command line option into the right place.
248 static void permute(char *const *argv, mdwopt_data *data)
250 char **v = (char **)argv;
251 if (data->ind != -1) {
252 int i = data->next - 1;
254 while (i > data->ind) {
263 /* --- @findOpt@ --- *
265 * Arguments: @int o@ = which option to search for
266 * @const char *shortopt@ = short options string to search
267 * @mdwopt_data *data@ = pointer to persistant state
269 * Returns: Pointer to rest of short options string (including magic
272 * Use: Looks up a short option in the given string.
275 static const char *findOpt(int o, const char *shortopt,
278 const char *p = shortopt;
283 if (o != *p || (p[1] != '+' && data->order & ORD_NEGATE)) {
295 /* --- @mdwopt@ --- *
297 * Arguments: @int argc@ = number of command line arguments
298 * @char * const *argv@ = pointer to command line arguments
299 * @const char *shortopt@ = pointer to short options information
300 * @const struct option *longopts@ = pointer to long opts info
301 * @int *longind@ = where to store matched longopt
302 * @mdwopt_data *data@ = persistent state for the parser
303 * @int flags@ = various useful flags
305 * Returns: Value of option found next, or an error character, or
306 * @EOF@ for the last thing.
308 * Use: Reads options. The routine should be more-or-less compatible
309 * with standard getopts, although it provides many more
310 * features even than the standard GNU implementation.
312 * The precise manner of options parsing is determined by
313 * various flag settings, which are described below. By setting
314 * flag values appropriately, you can achieve behaviour very
315 * similar to most other getopt routines.
318 * How options parsing appears to users
320 * A command line consists of a number of `words' (which may
321 * contain spaces, according to various shell quoting
322 * conventions). A word may be an option, an argument to an
323 * option, or a non-option. An option begins with a special
324 * character, usually `%|-|%', although `%|+|%' is also used
325 * sometimes. As special exceptions, the word containing only a
326 * `%|-|%' is considered to be a non-option, since it usually
327 * represents standard input or output as a filename, and the
328 * word containing a double-dash `%|--|%' is used to mark all
329 * following words as being non-options regardless of their
332 * Traditionally, all words after the first non-option have been
333 * considered to be non-options automatically, so that options
334 * must be specified before filenames. However, this
335 * implementation can extract all the options from the command
336 * line regardless of their position. This can usually be
337 * disabled by setting one of the environment variables
338 * `%|POSIXLY_CORRECT|%' or `%|_POSIX_OPTION_ORDER|%'.
340 * There are two different styles of options: `short' and
343 * Short options are the sort which Unix has known for ages: an
344 * option is a single letter, preceded by a `%|-|%'. Short
345 * options can be joined together to save space (and possibly to
346 * make silly words): e.g., instead of giving options
347 * `%|-x -y|%', a user could write `%|-xy|%'. Some short
348 * options can have arguments, which appear after the option
349 * letter, either immediately following, or in the next `word'
350 * (so an option with an argument could be written as
351 * `%|-o foo|%' or as `%|-ofoo|%'). Note that options with
352 * optional arguments must be written in the second style.
354 * When a short option controls a flag setting, it is sometimes
355 * possible to explicitly turn the flag off, as well as turning
356 * it on, (usually to override default options). This is
357 * usually done by using a `%|+|%' instead of a `%|-|%' to
358 * introduce the option.
360 * Long options, as popularized by the GNU utilities, are given
361 * long-ish memorable names, preceded by a double-dash `%|--|%'.
362 * Since their names are more than a single character, long
363 * options can't be combined in the same way as short options.
364 * Arguments to long options may be given either in the same
365 * `word', separated from the option name by an equals sign, or
366 * in the following `word'.
368 * Long option names can be abbreviated if necessary, as long
369 * as the abbreviation is unique. This means that options can
370 * have sensible and memorable names but still not require much
371 * typing from an experienced user.
373 * Like short options, long options can control flag settings.
374 * The options to manipulate these settings come in pairs: an
375 * option of the form `%|--set-flag|%' might set the flag, while
376 * an option of the form `%|--no-set-flag|%' might clear it.
378 * It is usual for applications to provide both short and long
379 * options with identical behaviour. Some applications with
380 * lots of options may only provide long options (although they
381 * will often be only two or three characters long). In this
382 * case, long options can be preceded with a single `%|-|%'
383 * character, and negated by a `%|+|%' character.
385 * Finally, some (older) programs accept arguments of the form
386 * `%%@.{"-"<number>}%%', to set some numerical parameter,
387 * typically a line count of some kind.
390 * How programs parse options
392 * An application parses its options by calling mdwopt
393 * repeatedly. Each time it is called, mdwopt returns a value
394 * describing the option just read, and stores information about
395 * the option in a data block. The value %$-1$% is returned
396 * when there are no more options to be read. The `%|?|%'
397 * character is returned when an error is encountered.
399 * Before starting to parse options, the value @data->ind@ must
400 * be set to 0 or 1. The value of @data->err@ can also be set,
401 * to choose whether errors are reported by mdwopt.
403 * The program's `@argc@' and `@argv@' arguments are passed to
404 * the options parser, so that it can read the command line. A
405 * flags word is also passed, allowing the program fine control
406 * over parsing. The flags are described above.
408 * Short options are described by a string, which once upon a
409 * time just contained the permitted option characters. Now the
410 * options string begins with a collection of flag characters,
411 * and various flag characters can be put after options
412 * characters to change their properties.
414 * If the first character of the short options string is
415 * `%|+|%', `%|-|%' or `%|!|%', the order in which options are
416 * read is modified, as follows:
418 * `%|+|%' forces the POSIX order to be used. As soon as a non-
419 * option is found, mdwopt returns %$-1$%.
421 * `%|-|%' makes mdwopt treat non-options as being `special'
422 * sorts of option. When a non-option word is found, the
423 * value 0 is returned, and the actual text of the word
424 * is stored as being the option's argument.
426 * `%|!|%' forces the default order to be used. The entire
427 * command line is scanned for options, which are
428 * returned in order. However, during this process,
429 * the options are moved in the @argv@ array, so that
430 * they appear before the non- options.
432 * A `%|:|%' character may be placed after the ordering flag (or
433 * at the very beginning if no ordering flag is given) which
434 * indicates that the character `%|:|%', rather than `%|?|%',
435 * should be returned if a missing argument error is detected.
437 * Each option in the string can be followed by a `%|+|%' sign,
438 * indicating that it can be negated, a `%|:|%' sign indicating
439 * that it requires an argument, or a `%|::|%' string,
440 * indicating an optional argument. Both `%|+|%' and `%|:|%' or
441 * `%|::|%' may be given, although the `%|+|%' must come first.
443 * If an option is found, the option character is returned to
444 * the caller. A pointer to an argument is stored in
445 * @data->arg@, or @NULL@ is stored if there was no argument.
446 * If a negated option was found, the option character is
447 * returned ORred with @OPTF_NEGATED@ (bit 8 set).
449 * Long options are described in a table. Each entry in the
450 * table is of type @struct option@, and the table is terminated
451 * by an entry whose @name@ field is null. Each option has
452 * a flags word which, due to historical reasons, is called
453 * @has_arg@. This describes various properties of the option,
454 * such as what sort of argument it takes, and whether it can
457 * When mdwopt finds a long option, it looks the name up in the
458 * table. The index of the matching entry is stored in the
459 * @longind@ variable, passed to mdwopt (unless @longind@ is 0):
460 * a value of %$-1$% indicates that no long option was
461 * found. The behaviour is then dependent on the values in the
462 * table entry. If @flag@ is nonzero, it points to an integer
463 * to be modified by mdwopt. Usually the value in the @val@
464 * field is simply stored in the @flag@ variable. If the flag
465 * @OPTF_SWITCH@ is set, however, the value is combined with
466 * the existing value of the flags using a bitwise OR. If
467 * @OPTF_NEGATE@ is set, then the flag bit will be cleared if a
468 * matching negated long option is found. The value 0 is
471 * If @flag@ is zero, the value in @val@ is returned by mdwopt,
472 * possibly with bit 8 set if the option was negated.
474 * Arguments for long options are stored in @data->arg@, as
477 * Numeric options, if enabled, cause the value `%|#|%' to be
478 * returned, and the numeric value to be stored in @data->opt@.
480 * If the flag @OPTF_ENVVAR@ is set on entry, options will be
481 * extracted from an environment variable whose name is built by
482 * capitalizing all the letters of the program's name. (This
483 * allows a user to have different default settings for a
484 * program, by calling it through different symbolic links.)
487 int mdwopt(int argc, char *const *argv,
488 const char *shortopt,
489 const struct option *longopts, int *longind,
490 mdwopt_data *data, int flags)
492 /* --- Local variables --- */
499 /* --- Sort out our data --- */
502 data = &mdwopt_global;
504 /* --- See if this is the first time --- */
506 if (data->ind == 0 || (data->ind == 1 && ~flags & OPTF_NOPROGNAME)) {
508 /* --- Sort out default returning order --- */
510 if (getenv("_POSIX_OPTION_ORDER") ||
511 getenv("POSIXLY_CORRECT"))
512 data->order = ORD_POSIX;
514 data->order = ORD_PERMUTE;
516 /* --- Now see what the caller actually wants --- */
518 switch (shortopt[0]) {
520 data->order = ORD_RETURN;
523 data->order = ORD_POSIX;
526 data->order = ORD_PERMUTE;
530 /* --- Now decide on the program's name --- */
532 if (~flags & OPTF_NOPROGNAME) {
533 p = q = (char *)argv[0];
540 data->ind = data->next = 1;
543 /* --- See about environment variables --- *
545 * Be careful. The program may be setuid, and an attacker might have
546 * given us a long name in @argv[0]@. If the name is very long, don't
547 * support this option.
550 if (flags & OPTF_ENVVAR && strlen(data->prog) < 48) {
554 /* --- For RISC OS, support a different format --- *
556 * Acorn's RISC OS tends to put settings in variables named
557 * `App$Options' rather than `APP'. Under RISC OS, I'll support
558 * both methods, just to avoid confuddlement.
562 sprintf(buf, "%s$Options", data->prog);
570 *p++ = toupper(*q++);
578 /* --- Copy the options string into a buffer --- */
581 q = malloc(strlen(p) + 1);
584 "%s: Not enough memory to read settings in "
585 "environment variable\n",
590 data->env = data->estart = q;
597 data->ind = data->next = 0;
600 /* --- Do some initial bodgery --- *
602 * The @shortopt@ string can have some interesting characters at the
603 * beginning. We'll skip past them.
606 switch (shortopt[0]) {
614 if (shortopt[0] == ':') {
624 /* --- Now go off and search for an option --- */
626 if (!data->list || !*data->list) {
627 data->order &= 3; /* Clear negation flag */
629 /* --- Now we need to find the next option --- *
631 * Exactly how we do this depends on the settings of the order variable.
632 * We identify options as being things starting with `%|-|%', and which
633 * aren't equal to `%|-|%' or `%|--|%'. We'll look for options until:
635 * * We find something which isn't an option AND @order == ORD_POSIX@
636 * * We find a `%|--|%'
637 * * We reach the end of the list
639 * There are some added little wrinkles, which we'll meet as we go.
643 p = nextword(argc, argv, data);
647 /* --- See if we've found an option --- */
649 if ((p[0] == '-' || (p[0] == '+' && flags & OPTF_NEGATION)) &&
651 if (strcmp(p, "--") == 0) {
658 /* --- Figure out how to proceed --- */
660 switch (data->order & 3) {
673 /* --- We found an option --- */
677 /* --- Check for a numeric option --- *
679 * We only check the first character (or the second if the first is a
680 * sign). This ought to be enough.
683 if (flags & OPTF_NUMBERS && (p[0] == '-' || flags & OPTF_NEGNUMBER)) {
684 if (((p[1] == '+' || p[1] == '-') && isdigit((unsigned char)p[2])) ||
685 isdigit((unsigned char)p[1])) {
686 data->opt = strtol(p + 1, &data->arg, 10);
687 while (isspace((unsigned char)data->arg[0]))
691 return (p[0] == '-' ? '#' : '#' | OPTF_NEGATED);
695 /* --- Check for a long option --- */
698 data->order |= ORD_NEGATE;
700 if (((p[0] == '-' && p[1] == '-') ||
701 (flags & OPTF_NOSHORTS && !findOpt(p[1], shortopt, data))) &&
702 (~flags & OPTF_NOLONGS))
707 data->order |= ORD_NEGATE;
710 } else if (p[1] == '-') {
711 if ((flags & OPTF_NEGATION) && strncmp(p + 2, "no-", 3) == 0) {
714 data->order |= ORD_NEGATE;
720 if ((flags & OPTF_NEGATION) && strncmp(p + 1, "no-", 3) == 0) {
723 data->order |= ORD_NEGATE;
730 for (i = 0; longopts[i].name; i++) {
731 if ((data->order & ORD_NEGATE) &&
732 (~longopts[i].has_arg & OPTF_NEGATE))
735 r = (char *) longopts[i].name;
738 if (*q == 0 || *q == '=') {
760 fprintf(stderr, "%s: unrecognized option `%s%s'\n",
770 /* --- Handle argument behaviour --- */
772 while (*p != 0 && *p != '=')
774 p = (*p ? p + 1 : 0);
775 q = (char *) longopts[match].name;
777 switch (longopts[match].has_arg & OPTF_ARG) {
782 "%s: option `%s%s' does not accept arguments\n",
792 p = nextword(argc, argv, data);
796 fprintf(stderr, "%s: option `%s%s' requires an argument\n",
813 /* --- Do correct things now we have a match --- */
815 if (longopts[match].flag) {
816 if (longopts[match].has_arg & OPTF_SWITCH) {
817 if (data->order & ORD_NEGATE)
818 *longopts[match].flag &= ~longopts[match].val;
820 *longopts[match].flag |= longopts[match].val;
822 if (data->order & ORD_NEGATE)
823 *longopts[match].flag = 0;
825 *longopts[match].flag = longopts[match].val;
829 if (data->order & ORD_NEGATE)
830 return (longopts[match].val | OPTF_NEGATED);
832 return (longopts[match].val);
836 /* --- Do short options things --- */
840 data->order |= ORD_NEGATE;
845 /* --- Now process the short options --- */
850 p = (char *) findOpt(i, shortopt, data);
853 fprintf(stderr, "%s: unknown option `%c%c'\n",
855 data->order & ORD_NEGATE ? '+' : '-',
863 /* --- Sort out an argument, if we expect one --- */
866 q = (data->list[0] ? data->list : 0);
868 if (p[1] != ':' && !q) {
870 /* --- Same code as before --- */
872 q = nextword(argc, argv, data);
875 fprintf(stderr, "%s: option `%c%c' requires an argument\n",
877 data->order & ORD_NEGATE ? '+' : '-',
887 return ((data->order & ORD_NEGATE) ? i | OPTF_NEGATED : i);
890 /*----- That's all, folks -------------------------------------------------*/