.\" -*-nroff-*- .\" .\" Keyword argument support .\" .\" (c) 2015 Straylight/Edgeware .\" . .\"----- Licensing notice --------------------------------------------------- .\" .\" This file is part of the Sensible Object Design, an object system for C. .\" .\" SOD is free software; you can redistribute it and/or modify .\" it under the terms of the GNU Library General Public License as .\" published by the Free Software Foundation; either version 2 of the .\" License, or (at your option) any later version. .\" .\" SOD is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU Library General Public License for more details. .\" .\" You should have received a copy of the GNU Library General Public .\" License along with SOD; if not, write to the Free .\" Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, .\" MA 02111-1307, USA. . .\" Highlight using terminal escapes, rather than overstriking. .\"\X'tty: sgr 1' . .\" String definitions and font selection. .ie t \{\ . ds o \(bu . if \n(.g .fam P .\} .el \{\ . ds o o .\} . .\" .hP TEXT -- start an indented paragraph with TEXT hanging off to the left .de hP .IP \h'-\w'\fB\\$1\ \fP'u'\fB\\$1\ \fP\c .. . .de t( 'in +\\n(.ku .. .de t) .in .. . .\"-------------------------------------------------------------------------- .TH keyword 3 "16 December 2015" "Straylight/Edgeware" "Sensible Object Design" . .SH NAME keyword \- keyword argument support library . .\"-------------------------------------------------------------------------- .SH SYNOPSIS .B #include .PP .B "struct kwval { const char *kw; const void *val; };" .br .B "struct kwtab { const struct kwval *v; size_t n; };" .br .BI "typedef void kw_unkhookfn(const char *" set ", const char *" kw ");" .PP .BI "#define " set "_KWSET(_) \e" .in +4m .BI "_(" name ", " type ", " default ") \e" .br \&... .in .IB declaration-specifiers " KWSET_STRUCT(" set ");" .br .IB declaration-specifiers " KWSET_PARSEFN(" set ")" .PP .B KWCALL .IB type0 " " func "(" type1 " " arg1 , .RB ... , .IB typen " " argn , .B "KWTAIL);" .br .BI "KWDECL(" set ", " kw ");" .br .BI "KW_PARSE(" set ", " kw ", " kwfirst ");" .br .BI "KW_PARSE_EMPTY(" set ", " kwfirst ");" .br .BI "KWPARSE(" set ");" .br .BI "KWPARSE_EMPTY(" set ");" .PP .I val .B = .IB func "(" arg1 , .RB ... , .IB argn , .BI "KWARGS(" \c .t( .BI "K(" name ", " value ")" .br .BI "K_VALIST(" ap ")" .br .BI "K_TAB(" v ", " n ")" .br .RB ... ); .t) .br .I val .B = .IB func "(" arg1 , .RB ... , .IB argn , .B "NO_KWARGS);" .PP .B unsigned .BI "KW_COUNT(" set ");" .br .B void .BI "KW_COPY(" \c .t( .IB fromset ", " toset "," .br .BI "const struct " fromset "_kwset *" kw "," .br .BI "struct kwval *" v ", size_t " n ");" .t) .PP .BI "void kw_unknown(const char *" set ", const char *" kw ); .br .BI "void kw_parseempty(\fP" \c .t( .BI "const char *" set , .BI "const char *" kwfirst , .BI "va_list *" ap , .br .BI "const struct kwval *" v , .BI "size_t " n ); .t) .PP .B "kw_unkhookfn *kw_unkhook;" .br .B "kw_unkhookfn kw_defunknown;" . .\"-------------------------------------------------------------------------- .SH DESCRIPTION . .SS Theory In standard C, the actual arguments provided to a function are matched up with the formal arguments given in the function definition according to their ordering in a list. Unless the (rather cumbersome) machinery for dealing with variable-length argument tails .RB ( ) is used, exactly the correct number of arguments must be supplied, and in the correct order. .PP A .I keyword argument is matched by its distinctive .IR name , rather than by its position in a list. Keyword arguments may be .IR omitted , causing some default behaviour by the function. A function can detect whether a particular keyword argument was supplied: so the default behaviour need not be the same as that caused by any specific value of the argument. .PP Keyword arguments can be provided in three ways. .hP 1. Directly, as a variable-length argument tail, consisting (for the most part \(en see below) of alternating keyword names, as pointers to null-terminated strings, and argument values, and terminated by a null pointer. This is somewhat error-prone, and the support library defines some macros which help ensure that keyword argument lists are well formed. .hP 2. Indirectly, through a .B va_list object capturing a variable-length argument tail passed to some other function. Such indirect argument tails have the same structure as the direct argument tails described above. Because .B va_list objects are hard to copy, the keyword-argument support library consistently passes .B va_list objects .I by reference throughout its programming interface. .hP 3. Indirectly, through a vector of .B struct kwval objects, each of which contains a keyword name, as a pointer to a null-terminated string, and the .I address of a corresponding argument value. (This indirection is necessary so that the items in the vector can be of uniform size.) Argument vectors are rather inconvenient to use, but are the only practical way in which a caller can decide at runtime which arguments to include in a call, which is useful when writing wrapper functions. . .SS Type definitions The header file defines two simple structure types. .PP .IP .nf .ft B struct kwval { const char *kw; const void *val; }; .fi .PP The .B kwval structure describes a keyword argument name/value pair. The .B kw member points to the name, as a null-terminated string. The .B val member always contains the .I address of the value. (This somewhat inconvenient arrangement makes the size of a .B kwval object independent of the actual argument type.) .PP .IP .nf .ft B struct kwtab { const struct kwval *v; size_t n; }; .fi .PP The .B kwtab structure describes a list of keyword arguments, represented as a vector of .B kwval structures. The .B v member points to the start of the vector; the .B n member contains the number of elements in the vector. .PP The .B kw_unkhookfn type is the type of unknown-keyword handler functions. See the descriptions of .B kw_unknown and .B kw_unkhook below. . .SS Calling functions with keyword arguments Functions which accept keyword arguments are ordinary C functions with variable-length argument tails. Hence, they can be called using ordinary C (of the right kind) and all will be well. However, argument lists must follow certain rules (which will be described in full below); failure to do this will result in .IR "undefined behaviour" . The header file provides integration with some C compilers in the form of macros which can be used to help the compiler diagnose errors in calls to keyword-accepting functions; but such support is rather limited at the moment. Some additional macros are provided for use in calls to such functions, and it is recommended that, where possible, these are used. In particular, it's all too easy to forget the trailing null terminator which marks the end of a list of keyword arguments. .PP That said, the underlying machinery is presented first, and the convenience macros are described later. .PP The argument tail, following the mandatory arguments, consists of a sequence of zero or more alternating keyword names, as pointers to null-terminated strings (with type .BR "const char *" ), and their argument values. This sequence is finally terminated by a null pointer (again with type .BR "const char *" ) in place of a keyword name. .PP Each function may define for itself which keyword names it accepts, and what types the corresponding argument values should have. There are also (currently) three special keyword names. .TP .B kw.valist This special keyword is followed by a pointer to a variable-length argument tail cursor object, of type .BR "va_list *" . This cursor object will be modified as the function extracts successive arguments from the tail. The argument tail should consist of alternating keyword names and argument values, as described above, including the first keyword name. (This is therefore different from the convention used when calling keyword argument parser functions: see the description of the .B KW_PARSEFN macro below for more details about these.) The argument tail may itself contain the special keywords. .TP .B kw.tab This special keyword is followed by .I two argument values: a pointer to the base of a vector of .B kwval structures, and the number of elements in this vector (as a .BR size_t ). Each element of the vector describes a single keyword argument: the .B kw member points to the keyword's name, and the .B val member points to the value. The vector may contain special keywords. The .B val pointer for a .B kw.valist argument should contain the address of an object of type .B "va_list *" (and not point directly to the cursor object, since .B val is has type .B "const void *" but the cursor will be modified as its argument tail is traversed). The .B val pointer for a .B kw.tab argument should contain the address of a .B kwtab structure which itself contains the base address and length of the argument vector to be processed. .TP .B kw.unknown This keyword is never accepted by any function. If it is encountered, the .B kw_unknown function is called to report the situation as an error; see below. .PP It is possible to construct a circular structure of indirect argument lists (in a number of ways). Don't try to pass such a structure to a function: the result will be unbounded recursion or some other bad outcome. .PP The macro .BI "KWARGS(" body ")" wraps up a sequence of keyword arguments. The single .I body argument consists of a sequence of calls to the keyword-argument macros described below, one after another without any separation. .PP In C89, macro actual arguments are not permitted to be empty; if there are no keyword arguments to provide, then the argument-less macro .B NO_KWARGS should be used instead. If you're using C99 or later, it's fine to just write .B KWARGS() instead. .PP The following keyword-argument macros can be used within .BR KWARGS 's .I body argument. .TP .BI "K(" name ", " value ")" Passes a keyword name and its corresponding value, as a pair of arguments. The .I name should be a single identifier (not a quoted string). The .I value may be any C expression of the appropriate type. .TP .BI "K_VALIST(" ap ")" Passes an indirect variable-length argument tail. The argument .I ap should be an lvalue of type .B va_list which will be passed by reference. .TP .BI "K_TAB(" v ", " n ")" Passes a vector of keyword arguments. The argument .I v should be the base address of the vector, and .I n should be the number of elements in the vector. . .SS Defining functions with keyword arguments A .I "keyword set" defines the collection of keyword arguments accepted by a particular function. The same keyword set may be used by several functions. (If your function currently accepts no keyword arguments, but you plan to add some later, do not define a keyword set, and use the .B KWPARSE_EMPTY macro described below.) .PP Each keyword set has a name, which is a C identifier. It's good to choose meaningful and distinctive names for keyword sets. Keyword set names are meaningful at runtime: they are used as part of the .B kw_unknown protocol (described below), and may be examined by handler functions, or reported to a user in error messages. For a keyword set which is used only by a single function, it is recommended that the set be given the same name as the function. .PP The keyword arguments for a keyword set named .I set are described by a `list macro' named .IB set _KWSET \fR. This macro takes a single argument, conventionally named .RB ` _ '. It should expand to a sequence of one or more list items of the form .IP .BI "_(" type ", " name ", " default ")" .PP with no separation between them. .PP For example: .IP .nf .ft B #define example_KWSET(_) \e .in +4m _(int, x, 0) \e _(const char *, y, NULL) .fi .ft P .PP Each .I name should be a distinct C identifier; they will be used to name structure members. An argument .I name should not end with the suffix .RB ` _suppliedp ' (for reasons which will soon become apparent). .PP Each .I type should be a C .I type-name such that .IP .IB type " " name ; .PP is a valid declaration: so it may consist of declaration specifiers and (possibly qualified) pointer declarator markers, but not array or function markers (since they must be placed after the .IR name ). This is the same requirement made by the standard .BR va_arg (3) macro. .PP Each .I default should be an initializer expression or brace-enclosed list, suitable for use in an aggregate initializer for a variable with automatic storage duration. (In C89, aggregate initializers may contain only constant expressions; this restriction was lifted in C99.) .PP The macro .B KWTAIL is expected to be used at the end of function parameter type list to indicate that the function accepts keyword arguments; if there are preceding mandatory arguments then the .B KWTAIL marker should be separated from them with a comma .RB ` , '. (It is permitted for a function parameter type list to contain only a .B KWTAIL marker.) .PP Specifically, the macro declares a mandatory argument .B const char *kwfirst_ (to collect the first keyword name), and a variable-length argument tail. .PP The macro .B KWPARSE (described below) assumes that the enclosing function's argument list ends with a .B KWTAIL marker. The marker should be included both in the function's definition and in any declarations, e.g., in the corresponding header file. .PP The .B KWCALL macro acts as a declaration specifier for functions which accept keyword arguments. Its effect is to arrange for the compiler to check, as far as is possible, that calls to the function are well-formed according to the keyword-argument rules. The exact checking performed depends on the compiler's abilities (and how well supported the compiler is): it may check that every other argument is a string; it may check that the list is terminated with a null pointer; it may not do anything at all. Again, this marker should be included in a function's definition and in any declarations. .PP The .B KWSET_STRUCT macro defines a .IR "keyword structure" . If .I set is a keyword-set name then .IP .BI "KWSET_STRUCT(" set ");" .PP declares a structure .B struct .IB set _kwargs \fR. For each argument defined in the keyword set, this structure contains two members: one has exactly the .I name and .I type listed in the keyword set definition; the other is a 1-bit-wide bitfield of type .B "unsigned int" named .IB name _suppliedp \fR. .PP The macro .B KWDECL declares and initializes a keyword argument structure variable. If .I set is a keyword-set name then .IP .I declaration-specifiers .BI "KWDECL(" set ", " kw ");" .PP declares a variable of type .B struct .IB set _kwargs named .IR kw . The optional .I declaration-specifiers may provide additional storage-class, qualifiers, or other declaration specifiers. The .RB ` _suppliedp ' flags are initialized to zero; the other members are initialized with the corresponding defaults from the keyword-set definition. .PP The macro .B KWSET_PARSEFN defines a keyword argument .IR "parser function" . If .I set is a keyword-set name then .IP .I declaration-specifiers .BI "KWSET_PARSEFN(" set ")" .PP (no trailing semicolon!) defines a function .IP .B void .IB set _kwparse( \c .t( .BI "struct " set "_kwargs *" kw "," .br .BI "const char *" kwfirst ", va_list *" ap "," .br .BI "const struct kwval *" v ", size_t " n ");" .t) .PP The macro call can (and usually will) be preceded by storage class specifiers such as .BR static , for example to adjust the linkage of the name. (I don't recommend declaring parser functions .BR inline : parser functions are somewhat large, and modern compilers are pretty good at figuring out whether to inline static functions.) .PP The function's behaviour is as follows. It parses keyword arguments from a variable-length argument tail, and/or a vector of .B kwval structures. When a keyword argument is recognized, for some keyword .IR name , the keyword argument structure pointed to by .I kw is updated: the flag .IB name _suppliedp is set to 1; and the argument value is stored (by simple assignment) in the .I name member. Hence, if the .RB ` _suppliedp ' members are initialized to zero, the caller can determine which keyword arguments were supplied. It is not possible to discover whether two or more arguments have the same keyword: in this case, the value from the last such argument is left in the keyword argument structure, and any values from earlier arguments are lost. (For this purpose, the argument vector .I v is scanned .I after the variable-length argument tail captured in .IR ap .) .PP The variable-argument tail is read from the list described by .BI * ap \fR. The argument tail is expected to consist of alternating keyword strings (as ordinary null-terminated strings) and the corresponding values, terminated by a null pointer of type .B "const char *" in place of a keyword; except that the first keyword (or terminating null pointer, if no arguments are provided) is expected to have been extracted already and provided as the .I kwfirst argument; the first argument retrieved using the .B va_list cursor object should then be the value corresponding to the keyword named by .IR kwfirst . (This slightly unusual convention makes it possible for a function to collect the first keyword as a separate mandatory argument, which is essential if there are no other mandatory arguments. It also means that the compiler will emit a diagnostic if you attempt to call a function which expects keyword arguments, but don't supply any and forget the null pointer which terminates the (empty) list.) If .I kwfirst is a null pointer, then .I ap need not be a valid pointer; otherwise, the cursor object .BI * ap will be modified as the function extracts successive arguments from the tail. .PP The keyword vector is read from the vector of .B kwval structures starting at address .I v and containing the following .I n items. If .I n is zero then .I v need not be a valid pointer. .PP The function also handles the special .B kw.valist and .B kw.tab arguments described above. If an unrecognized keyword argument is encountered, then .B kw_unknown is called: see below for details. .PP The .B KW_PARSE macro invokes a keyword argument parsing function. If .I set is a keyword-set name, .I kw names a keyword argument structure variable of type .B struct .IB set _kwargs \fR, and .I kwfirst is the name of the enclosing function's last mandatory argument, which must have type .BR "const char *" , then .IP .BI "KW_PARSE(" set ", " kw ", " kwfirst ");" .PP calls the function .IB set _kwparse with five arguments: the address of the keyword argument structure .IR kw ; the string pointer .IR kwfirst ; the address of a temporary argument-tail cursor object of type .BR va_list , constructed on the assumption that .I kwfirst is the enclosing function's final keyword argument; a null pointer; and the value zero (signifying an empty keyword-argument vector). If the variable .I kw was declared using .B KWDECL and the function .IB set _kwparse has been defined using .B KWSET_PARSEFN then the effect is to parse the keyword arguments passed to the function and set the members of .I kw appropriately. .PP The macro .B KWPARSE (note the lack of underscore) combines .B KWDECL and .BR KW_PARSE . If .I set is a keyword-set name then .IP .BI "KWPARSE(" set ");" .PP declares and initializes a keyword argument structure variable with the fixed name .BR kw , and parses the keyword arguments provided to the enclosing function, storing the results in .BR kw . It assumes that the first keyword name is in an argument named .BR kwfirst_ , as set up by .B KWTAIL marker described above. .PP The macro expands both to a variable declaration and a statement: in C89, declarations must precede statements, so under C89 rules this macro must appear exactly between the declarations at the head of a brace-enclosed block (typically the function body) and the statements at the end. This restriction was lifted in C99, so the macro may appear anywhere in the function body. However, it is recommended that callers avoid taking actions which might require cleanup before attempting to parse their keyword arguments, since keyword argument parsing functions invoke the .B kw_unknown handler if they encounter an unknown keyword, and the calling function will not get a chance to tidy up after itself if this happens. .PP As mentioned above, it is not permitted to define an empty keyword set. (Specifically, invoking .B KWSET_STRUCT for an empty keyword set would result in attempting to define a structure with no members, which C doesn't allow.) On the other hand, keyword arguments are a useful extension mechanism, and it's useful to be able to define a function which doesn't currently accept any keywords, but which might in the future be extended to allow keyword arguments. The macros .B KW_PARSE_EMPTY and .B KWPARSE_EMPTY are analogues of .B KW_PARSE and .B KWPARSE respectively, and handle this case. These macros take a keyword-set name as an argument, but this name is used only in diagnostic messages (e.g., if an unknown keyword name is encountered) and need not (and probably should not) correspond to a defined keyword set. .PP If .I set is an identifier then .IP .BI "KW_PARSE_EMPTY(" set ", " kwfirst ");" .PP calls the function .B kw_parseempty with five arguments: the .I set name, as a string; the string pointer .IR kwfirst ; the address of a temporary argument-tail cursor object of type .BR va_list , constructed on the assumption that .I kwfirst is the enclosing function's final keyword argument; a null pointer; and the value zero (signifying an empty keyword-argument vector). The effect is to check that the argument tail contains no keyword arguments other than the special predefined ones. .PP If .I set is an identifier then .IP .B "KWPARSE_EMPTY(" set ");" .PP (note the lack of underscore) checks that the enclosing function has been passed no keyword arguments other than the special predefined ones. It assumes that the function's parameter type list ends with the .B KWTAIL marker described above. .PP The .B kw_parseempty function checks an keyword argument list to make sure that contains no keyword arguments (other than the special ones described above). .PP The .I set argument should point to a null-terminated string: this will be reported as the keyword set name to .BR kw_unknown , though it need not (and likely will not) refer to any defined keyword set. The remaining arguments are as for the keyword parsing functions defined by the .B KWSET_PARSEFN macro. . .SS "Wrapper functions" Most users will not need the hairy machinery involving argument vectors. Their main use is in defining .IR "wrapper functions" . Suppose there is a function .I f which accepts some keyword arguments, and we want to write a function .I g which accepts the same keywords recognized by .I f and some additional ones. Unfortunately .I f may behave differently depending on whether or not a particular keyword argument is supplied at all, but it's not possible to synthesize a valid .B va_list other than by simply capturing a live argument tail, and it's not possible to decide at runtime whether or not to include some arguments in a function call. It's still possible to write .IR g , by building a vector of keyword arguments, collected one-by-one depending on the corresponding .RB ` _suppliedp ' flags (see below). A few macros are provided to make this task easier. .PP The macro .B KW_COUNT returns the number of keywords defined in a keyword set. If .I set is a keyword-set name, then .IP .BI "KW_COUNT(" set ")" .PP returns the number of keywords defined by .IR set , as a constant expression of type .BR "unsigned int" . .PP The macro .B KW_COPY populates a vector of .B kwval structures from a keyword-argument structure. If .I fromset and .I toset are two keyword-set names then .IP .BI "KW_COPY(" fromset ", " toset ", " kw ", " v ", " n ");" .PP will populate the vector .IR v , taking argument values from .IR kw . The .I toset must be a subset of .IR fromset : i.e., for every keyword defined in .I toset there is a keyword defined in .I fromset with the same name and type. The remaining arguments are as follows: .I kw is a pointer to a .BI "struct " fromset "_kwset" keyword-argument structure which has been filled in, e.g., by the keyword-argument parsing function .IB fromset _kwparse \fR; .I v is a pointer to a sufficiently large vector of .B "struct kwval" objects; and .I n is an lvalue designating an object of integer type. Successive elements of .IR v , starting at index .IR n , are filled in to refer to the keyword arguments defined in .I toset whose .RB ` _suppliedp ' flag is set in the argument structure pointed to by .IR kw ; for each such argument, a pointer to the keyword name is stored in the corresponding vector element's .B kw member, and a pointer to the argument value, held in the keyword argument structure, is stored in the vector element's .B val member. At the end of this, the index .I n is advanced so as to contain the index of the first unused element of .IR v . Hence, at most .BI KW_COUNT( toset ) elements of .I v will be used. . .SS Handling unknown-keyword errors When parsing a variable-length argument tail, it is not possible to continue after encountering an unknown keyword name. This is because it is necessary to know the (promoted) type of the following argument value in order to skip past it; but the only clue provided as to the type is the keyword name, which in this case is meaningless. .PP In this situation, the parser functions generated by .B KW_PARSEFN (and the .B kw_parseempty function) call .BR kw_unknown . This is a function of two arguments: .I set points to the name of the keyword set expected by the caller, as a null-terminated string; and .I kw is the unknown keyword which was encountered. All that .B kw_unknown does is invoke the function whose address is stored in the global variable .B kw_unkhook with the same arguments. The .B kw_unknown function never returns to its caller: if the .B kw_unkhook function returns (which it shouldn't) then .B kw_unknown writes a fatal error message to standard error and calls .BR abort (3). .PP By default .B kw_unkhook points to the function .BR kw_defunknown , which just writes an error message quoting the keyword set name and offending keyword to standard error and calls .BR abort (3). .PP (In freestanding environments, the behaviour may be somewhat different: porting the library to such environments involves choosing appropriate behaviour for the target platform.) .PP As an example of the kind of special effect which can be achieved using this hook, the following hacking answers whether a function recognizes a particular keyword argument. .IP .nf .ft B #define KWARGS_TEST(k, val) KWARGS(K(k, val) K(kw.unknown, 0)) static jmp_buf kw_test_jmp; static void kw_test_unknown(const char *set, const char *kw) { if (strcmp(kw, "kw.unknown")) longjmp(kw_test_jmp, 1); else longjmp(kw_test_jmp, 2); } #define KW_TEST(flag, set, call) do { \e kw_unkhookfn *oldunk = kw_unkhook; \e kw_unkhook = kw_test_unknown; \e switch (setjmp(kw_test_jmp)) { \e case 0: call; abort(); \e case 1: flag = 1; break; \e case 2: flag = 0; break; \e default: abort(); \e } \e kw_unkhook = oldunk; \e } while (0) /* Example of use */ int f; KW_TEST(f, somefunc(1, "two", 3, KWARGS_TEST("shiny", 68.7))); /* now f is nonzero if `somefunc' accepts the `shiny' keyword * (which we hope wants a double argument) */ .ft P .fi . .\"-------------------------------------------------------------------------- .SH BUGS . The unknown-keyword hook is inadequate for a modern library, but dealing with multiple threads isn't currently possible without writing (moderately complex) system-specific code. The author's intention is that the hook variable .B kw_unkhook be `owned' by some external library which can make its functionality available to client programs in a safer and more convenient way. On Unix-like platforms (including Cygwin) that library will be (a later version) of .BR mLib ; other platforms will likely need different arrangements. The author is willing to coordinate any such efforts. .PP The whole interface is rather clunky. Working with keyword-argument vectors is especially unpleasant. The remarkable thing is not that it's done well, but that it can be done at all. . .\"-------------------------------------------------------------------------- .SH SEE ALSO . .BR va_start (3), .BR va_arg (3), .BR va_end (3). . .\"-------------------------------------------------------------------------- .SH AUTHOR . Mark Wooding, . .\"----- That's all, folks --------------------------------------------------