From: Richard Kettlewell Date: Wed, 7 May 2008 20:04:23 +0000 (+0100) Subject: Expansion syntax rewrite. Not documented yet, but then nor was the X-Git-Tag: 4.0~76^2~46 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/disorder/commitdiff_plain/f5fdc06f8777fabca1225b989cd540520968ca9a?hp=15091f766f1e8c380d2a7f752c387a2cecaf5bb1 Expansion syntax rewrite. Not documented yet, but then nor was the previous rewrite. --- diff --git a/lib/macros-builtin.c b/lib/macros-builtin.c index 17ec99c..0859695 100644 --- a/lib/macros-builtin.c +++ b/lib/macros-builtin.c @@ -122,7 +122,7 @@ static int exp_include(int attribute((unused)) nargs, struct stat sb; if(!(path = mx_find(args[0]))) { - if(sink_printf(output, "[[cannot find '%s']]", name) < 0) + if(sink_printf(output, "[[cannot find '%s']]", args[0]) < 0) return 0; return 0; } diff --git a/lib/macros.c b/lib/macros.c index 4e92236..fca59be 100644 --- a/lib/macros.c +++ b/lib/macros.c @@ -107,6 +107,13 @@ static int mx__expand_macro(const struct expansion *e, /* Parsing ------------------------------------------------------------------ */ +static int next_non_whitespace(const char *input, + const char *end) { + while(input < end && isspace((unsigned char)*input)) + ++input; + return input < end ? *input : -1; +} + /** @brief Parse a template * @param filename Input filename (for diagnostics) * @param line Line number (use 1 on initial call) @@ -126,18 +133,21 @@ const struct mx_node *mx_parse(const char *filename, int line, const char *input, const char *end) { - int braces, expansion_start_line, argument_start_line; - const char *argument_start, *argument_end, *p; + int braces, argument_start_line, obracket, cbracket; + const char *argument_start, *argument_end; struct mx_node_vector v[1]; struct dynstr d[1]; struct mx_node *head = 0, **tailp = &head, *e; - int omitted_terminator; if(!end) end = input + strlen(input); while(input < end) { if(*input != '@') { - expansion_start_line = line; + e = xmalloc(sizeof *e); + e->next = 0; + e->filename = filename; + e->line = line; + e->type = MX_TEXT; dynstr_init(d); /* Gather up text without any expansions in. */ while(input < end && *input != '@') { @@ -146,126 +156,115 @@ const struct mx_node *mx_parse(const char *filename, dynstr_append(d, *input++); } dynstr_terminate(d); - e = xmalloc(sizeof *e); - e->next = 0; - e->filename = filename; - e->line = expansion_start_line; - e->type = MX_TEXT; e->text = d->vec; *tailp = e; tailp = &e->next; continue; } - mx_node_vector_init(v); - braces = 0; - p = input; - ++input; - expansion_start_line = line; - omitted_terminator = 0; - while(!omitted_terminator && input < end && *input != '@') { - /* Skip whitespace */ - if(isspace((unsigned char)*input)) { - if(*input == '\n') - ++line; - ++input; - continue; + if(input + 1 < end) + switch(input[1]) { + case '@': + /* '@@' expands to '@' */ + e = xmalloc(sizeof *e); + e->next = 0; + e->filename = filename; + e->line = line; + e->type = MX_TEXT; + e->text = "@"; + *tailp = e; + tailp = &e->next; + input += 2; + continue; + case '#': + /* '@#' starts a (newline-eating comment), like dnl */ + input += 2; + while(input < end && *input != '\n') + ++input; + if(*input == '\n') { + ++line; + ++input; + } + continue; + case '_': + /* '@_' expands to nothing. It's there to allow dump to terminate + * expansions without having to know what follows. */ + input += 2; + continue; } - if(*input == '{') { - /* This is a bracketed argument. We'll walk over it counting - * braces to figure out where the end is. */ - ++input; - argument_start = input; - argument_start_line = line; - while(input < end && (*input != '}' || braces > 0)) { - switch(*input++) { - case '{': ++braces; break; - case '}': --braces; break; - case '\n': ++line; break; - } - } - /* If we run out of input without seeing a '}' that's an error */ - if(input >= end) - fatal(0, "%s:%d: unterminated expansion '%.*s'", + /* It's a full expansion */ + ++input; + e = xmalloc(sizeof *e); + e->next = 0; + e->filename = filename; + e->line = line; + e->type = MX_EXPANSION; + /* Collect the expansion name. Expansion names start with an alnum and + * consist of alnums and '-'. We don't permit whitespace between the '@' + * and the name. */ + dynstr_init(d); + if(input == end || !isalnum((unsigned char)*input)) + fatal(0, "%s:%d: invalid expansion", filename, e->line); + while(input < end && (isalnum((unsigned char)*input) || *input == '-')) + dynstr_append(d, *input++); + dynstr_terminate(d); + e->name = d->vec; + /* See what the bracket character is */ + obracket = next_non_whitespace(input, end); + switch(obracket) { + case '(': cbracket = ')'; break; + case '[': cbracket = ']'; break; + case '{': cbracket = '}'; break; + default: obracket = -1; break; /* no arguments */ + } + mx_node_vector_init(v); + if(obracket >= 0) { + /* Gather up arguments */ + while(next_non_whitespace(input, end) == obracket) { + while(isspace((unsigned char)*input)) { + if(*input == '\n') + ++line; + ++input; + } + ++input; /* the bracket */ + braces = 0; + /* Find the end of the argument */ + argument_start = input; + argument_start_line = line; + while(input < end && (*input != cbracket || braces > 0)) { + const int c = *input++; + + if(c == obracket) + ++braces; + else if(c == cbracket) + --braces; + else if(c == '\n') + ++line; + } + if(input >= end) { + /* We ran out of input without encountering a balanced cbracket */ + fatal(0, "%s:%d: unterminated expansion argument '%.*s'", filename, argument_start_line, (int)(input - argument_start), argument_start); + } /* Consistency check */ - assert(*input == '}'); + assert(*input == cbracket); /* Record the end of the argument */ argument_end = input; - /* Step over the '}' */ + /* Step over the cbracket */ ++input; - if(input < end && isspace((unsigned char)*input)) { - /* There is at least some whitespace after the '}'. Look - * ahead and see what is after all the whitespace. */ - for(p = input; p < end && isspace((unsigned char)*p); ++p) - ; - /* Now we are looking after the whitespace. If it's - * anything other than '{', including the end of the input, - * then we infer that this expansion finished at the '}' we - * just saw. (NB that we don't move input forward to p - - * the whitespace is NOT part of the expansion.) */ - if(p == end || *p != '{') - omitted_terminator = 1; - } - } else { - /* We are looking at an unbracketed argument. (A common example would - * be the expansion or macro name.) This is terminated by an '@' - * (indicating the end of the expansion), a ':' (allowing a subsequent - * unbracketed argument) or a '{' (allowing a bracketed argument). The - * end of the input will also do. */ - argument_start = input; - argument_start_line = line; - while(input < end - && *input != '@' && *input != '{' && *input != ':') { - if(*input == '\n') ++line; - ++input; - } - argument_end = input; - /* Trailing whitespace is not significant in unquoted arguments (and - * leading whitespace is eliminated by the whitespace skip above). */ - while(argument_end > argument_start - && isspace((unsigned char)argument_end[-1])) - --argument_end; - /* Step over the ':' if that's what we see */ - if(input < end && *input == ':') - ++input; + /* Now we have an argument in [argument_start, argument_end), and we + * know its filename and initial line number. This is sufficient to + * parse it. */ + mx_node_vector_append(v, mx_parse(filename, argument_start_line, + argument_start, argument_end)); } - /* Now we have an argument in [argument_start, argument_end), and we know - * its filename and initial line number. This is sufficient to parse - * it. */ - mx_node_vector_append(v, mx_parse(filename, argument_start_line, - argument_start, argument_end)); } - /* We're at the end of an expansion. We might have hit the end of the - * input, we might have hit an '@' or we might have matched the - * omitted_terminator criteria. */ - if(input < end) { - if(!omitted_terminator) { - assert(*input == '@'); - ++input; - } - } - /* @@ terminates this file */ - if(v->nvec == 0) - break; - /* Currently we require that the first element, the expansion name, is - * always plain text. Removing this restriction would raise some - * interesting possibilities but for the time being it is considered an - * error. */ - if(v->vec[0]->type != MX_TEXT) - fatal(0, "%s:%d: expansion names may not themselves contain expansions", - v->vec[0]->filename, v->vec[0]->line); /* Guarantee a NULL terminator (for the case where there's more than one * argument) */ mx_node_vector_terminate(v); - e = xmalloc(sizeof *e); - e->next = 0; - e->filename = filename; - e->line = expansion_start_line; - e->type = MX_EXPANSION; - e->name = v->vec[0]->text; - e->nargs = v->nvec - 1; - e->args = v->nvec > 1 ? &v->vec[1] : 0; + /* Fill in the remains of the node */ + e->nargs = v->nvec; + e->args = v->vec; *tailp = e; tailp = &e->next; } @@ -274,11 +273,14 @@ const struct mx_node *mx_parse(const char *filename, static void mx__dump(struct dynstr *d, const struct mx_node *m) { int n; - + const struct mx_node *mm; + if(!m) return; switch(m->type) { case MX_TEXT: + if(m->text[0] == '@') + dynstr_append(d, '@'); dynstr_append_string(d, m->text); break; case MX_EXPANSION: @@ -289,7 +291,22 @@ static void mx__dump(struct dynstr *d, const struct mx_node *m) { mx__dump(d, m->args[n]); dynstr_append(d, '}'); } - dynstr_append(d, '@'); + /* If the next non-whitespace is '{', add @_ to stop it being + * misinterpreted */ + mm = m->next; + while(mm && mm->type == MX_TEXT) { + switch(next_non_whitespace(mm->text, mm->text + strlen(mm->text))) { + case -1: + mm = mm->next; + continue; + case '{': + dynstr_append_string(d, "@_"); + break; + default: + break; + } + break; + } break; default: assert(!"invalid m->type"); @@ -297,7 +314,10 @@ static void mx__dump(struct dynstr *d, const struct mx_node *m) { mx__dump(d, m->next); } -/** @brief Dump a parse macro expansion to a string */ +/** @brief Dump a parse macro expansion to a string + * + * Not of production quality! Only intended for testing! + */ char *mx_dump(const struct mx_node *m) { struct dynstr d[1]; diff --git a/lib/macros.h b/lib/macros.h index 49c6834..a311102 100644 --- a/lib/macros.h +++ b/lib/macros.h @@ -42,10 +42,10 @@ struct mx_node { int line; /** @brief Plain text (if @p type is @ref MX_TEXT) */ - char *text; + const char *text; /** @brief Expansion name (if @p type is @ref MX_EXPANSION) */ - char *name; + const char *name; /** @brief Argument count (if @p type is @ref MX_EXPANSION) */ int nargs; diff --git a/lib/t-macros.c b/lib/t-macros.c index a34ff47..19bc14a 100644 --- a/lib/t-macros.c +++ b/lib/t-macros.c @@ -55,124 +55,53 @@ static void test_macros(void) { /* Simple macro parsing --------------------------------------------------- */ /* The simplest possible expansion */ - m = mx_parse("macro1", 1, "@macro@", NULL); + m = mx_parse("macro1", 1, "@macro", NULL); check_integer(m->type, MX_EXPANSION); check_string(m->filename, "macro1"); check_integer(m->line, 1); check_string(m->name, "macro"); check_integer(m->nargs, 0); - insist(m->args == 0); insist(m->next == 0); - check_string(mx_dump(m), "@macro@"); + check_string(mx_dump(m), "@macro"); - /* Spacing variants of the above */ - m = mx_parse("macro2", 1, "@ macro@", NULL); + m = mx_parse("macro2", 1, "@macro ", NULL); check_integer(m->type, MX_EXPANSION); check_string(m->filename, "macro2"); check_integer(m->line, 1); check_string(m->name, "macro"); check_integer(m->nargs, 0); - insist(m->args == 0); - insist(m->next == 0); - check_string(mx_dump(m), "@macro@"); - m = mx_parse("macro3", 1, "@macro @", NULL); - check_integer(m->type, MX_EXPANSION); - check_string(m->filename, "macro3"); - check_integer(m->line, 1); - check_string(m->name, "macro"); - check_integer(m->nargs, 0); - insist(m->args == 0); - insist(m->next == 0); - check_string(mx_dump(m), "@macro@"); - - /* Unterminated variants */ - m = mx_parse("macro4", 1, "@macro", NULL); - check_integer(m->type, MX_EXPANSION); - check_string(m->filename, "macro4"); - check_integer(m->line, 1); - check_string(m->name, "macro"); - check_integer(m->nargs, 0); - insist(m->args == 0); - insist(m->next == 0); - check_string(mx_dump(m), "@macro@"); - m = mx_parse("macro5", 1, "@macro ", NULL); - check_integer(m->type, MX_EXPANSION); - check_string(m->filename, "macro5"); - check_integer(m->line, 1); - check_string(m->name, "macro"); - check_integer(m->nargs, 0); - insist(m->args == 0); - insist(m->next == 0); - check_string(mx_dump(m), "@macro@"); - - /* Macros with a :-separated argument */ - m = mx_parse("macro5", 1, "@macro:arg@", NULL); - check_integer(m->type, MX_EXPANSION); - check_string(m->filename, "macro5"); - check_integer(m->line, 1); - check_string(m->name, "macro"); - check_integer(m->nargs, 1); - insist(m->next == 0); - - check_integer(m->args[0]->type, MX_TEXT); - check_string(m->args[0]->filename, "macro5"); - check_integer(m->args[0]->line, 1); - check_string(m->args[0]->text, "arg"); - insist(m->args[0]->next == 0); - - check_string(mx_dump(m), "@macro{arg}@"); - - /* Multiple :-separated arguments, and spacing, and newlines */ - m = mx_parse("macro6", 1, "@macro : \n arg1 : \n arg2@", NULL); - check_integer(m->type, MX_EXPANSION); - check_string(m->filename, "macro6"); - check_integer(m->line, 1); - check_string(m->name, "macro"); - check_integer(m->nargs, 2); - insist(m->next == 0); - - check_integer(m->args[0]->type, MX_TEXT); - check_string(m->args[0]->filename, "macro6"); - check_integer(m->args[0]->line, 2); - check_string(m->args[0]->text, "arg1"); - insist(m->args[0]->next == 0); - - check_integer(m->args[1]->type, MX_TEXT); - check_string(m->args[1]->filename, "macro6"); - check_integer(m->args[1]->line, 3); - check_string(m->args[1]->text, "arg2"); - insist(m->args[1]->next == 0); - - check_string(mx_dump(m), "@macro{arg1}{arg2}@"); + insist(m->next != 0); + check_integer(m->next->type, MX_TEXT); + check_string(mx_dump(m), "@macro "); /* Multiple bracketed arguments */ - m = mx_parse("macro7", 1, "@macro{arg1}{arg2}@", NULL); - check_string(mx_dump(m), "@macro{arg1}{arg2}@"); + m = mx_parse("macro7", 1, "@macro{arg1}{arg2}", NULL); + check_string(mx_dump(m), "@macro{arg1}{arg2}"); - m = mx_parse("macro8", 1, "@macro{\narg1}{\narg2}@", NULL); - check_string(mx_dump(m), "@macro{\narg1}{\narg2}@"); + m = mx_parse("macro8", 1, "@macro{\narg1}{\narg2}", NULL); + check_string(mx_dump(m), "@macro{\narg1}{\narg2}"); check_integer(m->args[0]->line, 1); check_integer(m->args[1]->line, 2); /* ...yes, lines 1 and 2: the first character of the first arg is * the \n at the end of line 1. Compare with macro9: */ - m = mx_parse("macro9", 1, "@macro\n{arg1}\n{arg2}@", NULL); - check_string(mx_dump(m), "@macro{arg1}{arg2}@"); + m = mx_parse("macro9", 1, "@macro\n{arg1}\n{arg2}", NULL); + check_string(mx_dump(m), "@macro{arg1}{arg2}"); check_integer(m->args[0]->line, 2); check_integer(m->args[1]->line, 3); /* Arguments that themselves contain expansions */ - m = mx_parse("macro10", 1, "@macro{@macro2{arg1}{arg2}@}@", NULL); - check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2}@}@"); + m = mx_parse("macro10", 1, "@macro{@macro2{arg1}{arg2}}", NULL); + check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2}}"); /* ...and with omitted trailing @ */ m = mx_parse("macro11", 1, "@macro{@macro2{arg1}{arg2}}", NULL); - check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2}@}@"); + check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2}}"); /* Similarly but with more whitespace; NB that the whitespace is * preserved. */ m = mx_parse("macro12", 1, "@macro {@macro2 {arg1} {arg2} }\n", NULL); - check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2}@ }@\n"); + check_string(mx_dump(m), "@macro{@macro2{arg1}{arg2} }\n"); /* Simple expansions ------------------------------------------------------ */ @@ -197,13 +126,21 @@ static void test_macros(void) { check_macro("empty", "", "", 0); check_macro("plain", plain, plain, 0); + check_macro("quote1", "@@", "@", 0); + check_macro("quote2", "@@@@", "@@", 0); + check_macro("nothing1", "@_", "", 0); + check_macro("nothing2", "<@_>", "<>", 0); check_macro("if1", "@if{true}{yes}{no}", "yes", 0); check_macro("if2", "@if{true}{yes}", "yes", 0); check_macro("if3", "@if{false}{yes}{no}", "no", 0); check_macro("if4", "@if{false}{yes}", "", 0); check_macro("if5", "@if{ true}{yes}", "", 0); + check_macro("if6", "@if{true}{yes}@_{wible}t", "yes{wible}t", 0); + check_macro("br1", "@if(true)(yes)(no)", "yes", 0); + check_macro("br1", "@if[true][yes]{no}", "yes{no}", 0); + check_macro("and1", "@and", "true", 0); check_macro("and2", "@and{true}", "true", 0); check_macro("and3", "@and{false}", "false", 0); @@ -224,8 +161,8 @@ static void test_macros(void) { check_macro("not2", "@not{false}", "true", 0); check_macro("not3", "@not{wibble}", "true", 0); - check_macro("comment1", "@#{wibble}", "", 0); - check_macro("comment2", "@#{comment with a\nnewline in}", "", 0); + check_macro("comment1", "@# wibble\n", "", 0); + check_macro("comment2", "@# comment\nplus a line", "plus a line", 0); check_macro("discard1", "@discard{wibble}", "", 0); check_macro("discard2", "@discard{comment with a\nnewline in}", "", 0); @@ -265,21 +202,21 @@ static void test_macros(void) { check_macro("include2", "@include{t-macros-2}", "wibble\n", 0); fprintf(stderr, ">>> expect error message about t-macros-nonesuch:\n"); - check_macro("include3", "<@include{t-macros-nonesuch}@>", - "<[[cannot find template 't-macros-nonesuch']]>", 0); + check_macro("include3", "<@include{t-macros-nonesuch}>", + "<[[cannot find 't-macros-nonesuch']]>", 0); fprintf(stderr, ">>> expect error message about 'wibble':\n"); - check_macro("badex1", "<@wibble@>", + check_macro("badex1", "<@wibble>", "<[['wibble' unknown]]>", 0); fprintf(stderr, ">>> expect error message about 'if':\n"); - check_macro("badex2", "<@if@>", + check_macro("badex2", "<@if>", "<[['if' too few args]]>", 0); fprintf(stderr, ">>> expect error message about 'if':\n"); - check_macro("badex3", "<@if:1:2:3:4:5@>", + check_macro("badex3", "<@if{1}{2}{3}{4}{5}>", "<[['if' too many args]]>", 0); /* Macro definitions ------------------------------------------------------ */ - check_macro("macro1", "@define{m}{a b c}{@c@ @b@ @a@}@" + check_macro("macro1", "@define{m}{a b c}{@c @b @a}@#\n" "@m{1}{2}{3}", "3 2 1", 0); check_macro("macro2", "@m{b}{c}{a}", @@ -289,8 +226,8 @@ static void test_macros(void) { check_macro("macro4", "@discard{\n" " @define{n}{a b c}\n" - " {@if{@eq{@a@}{@b@}} {@c@} {no}}\n" - "}@" + " {@if{@eq{@a}{@b}} {@c} {no}}\n" + "}@#\n" "@n{x}{y}{z}", "no", 0); check_macro("macro5",