chiark - git - mdw - disorder/blob - lib/macros.c

   1 /*
   2  * This file is part of DisOrder
   3  * Copyright (C) 2008 Richard Kettlewell
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  18  * USA
  19  */
  20
  21 /** @file lib/macros.c
  22  * @brief Macro expansion
  23  */
  24
  25 #include <config.h>
  26 #include "types.h"
  27
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <assert.h>
  31
  32 #include "macros.h"
  33 #include "mem.h"
  34 #include "vector.h"
  35 #include "log.h"
  36
  37 VECTOR_TYPE(mx_node_vector, const struct mx_node *, xrealloc);
  38
  39 /** @brief Parse a template
  40  * @param filename Input filename (for diagnostics)
  41  * @param line Line number (use 1 on initial call)
  42  * @param input Start of text to parse
  43  * @param end End of text to parse or NULL
  44  * @return Pointer to parse tree root node
  45  *
  46  * Parses the text in [start, end) and returns an (immutable) parse
  47  * tree representing it.
  48  *
  49  * If @p end is NULL then the whole string is parsed.
  50  *
  51  * Note that the @p filename value stored in the parse tree is @p filename,
  52  * i.e. it is not copied.
  53  */
  54 const struct mx_node *mx_parse(const char *filename,
  55                                int line,
  56                                const char *input,
  57                                const char *end) {
  58   int braces, expansion_start_line, argument_start_line;
  59   const char *argument_start, *argument_end, *p;
  60   struct mx_node_vector v[1];
  61   struct dynstr d[1];
  62   struct mx_node *head = 0, **tailp = &head, *e;
  63   int omitted_terminator;
  64
  65   if(!end)
  66     end = input + strlen(input);
  67   while(input < end) {
  68     if(*input != '@') {
  69       expansion_start_line = line;
  70       dynstr_init(d);
  71       /* Gather up text without any expansions in. */
  72       while(input < end && *input != '@') {
  73         if(*input == '\n')
  74           ++line;
  75         dynstr_append(d, *input++);
  76       }
  77       dynstr_terminate(d);
  78       e = xmalloc(sizeof *e);
  79       e->next = 0;
  80       e->filename = filename;
  81       e->line = expansion_start_line;
  82       e->type = MX_TEXT;
  83       e->text = d->vec;
  84       *tailp = e;
  85       tailp = &e->next;
  86       continue;
  87     }
  88     mx_node_vector_init(v);
  89     braces = 0;
  90     p = input;
  91     ++input;
  92     expansion_start_line = line;
  93     omitted_terminator = 0;
  94     while(!omitted_terminator && input < end && *input != '@') {
  95       /* Skip whitespace */
  96       if(isspace((unsigned char)*input)) {
  97         if(*input == '\n')
  98           ++line;
  99         ++input;
 100         continue;
 101       }
 102       if(*input == '{') {
 103         /* This is a bracketed argument.  We'll walk over it counting
 104          * braces to figure out where the end is. */
 105         ++input;
 106         argument_start = input;
 107         argument_start_line = line;
 108         while(input < end && (*input != '}' || braces > 0)) {
 109           switch(*input++) {
 110           case '{': ++braces; break;
 111           case '}': --braces; break;
 112           case '\n': ++line; break;
 113           }
 114         }
 115         /* If we run out of input without seeing a '}' that's an error */
 116         if(input >= end)
 117           fatal(0, "%s:%d: unterminated expansion '%.*s'",
 118                 filename, argument_start_line,
 119                 (int)(input - argument_start), argument_start);
 120         /* Consistency check */
 121         assert(*input == '}');
 122         /* Record the end of the argument */
 123         argument_end = input;
 124         /* Step over the '}' */
 125         ++input;
 126         if(input < end && isspace((unsigned char)*input)) {
 127           /* There is at least some whitespace after the '}'.  Look
 128            * ahead and see what is after all the whitespace. */
 129           for(p = input; p < end && isspace((unsigned char)*p); ++p)
 130             ;
 131           /* Now we are looking after the whitespace.  If it's
 132            * anything other than '{', including the end of the input,
 133            * then we infer that this expansion finished at the '}' we
 134            * just saw.  (NB that we don't move input forward to p -
 135            * the whitespace is NOT part of the expansion.) */
 136           if(p == end || *p != '{')
 137             omitted_terminator = 1;
 138         }
 139       } else {
 140         /* We are looking at an unbracketed argument.  (A common example would
 141          * be the expansion or macro name.)  This is terminated by an '@'
 142          * (indicating the end of the expansion), a ':' (allowing a subsequent
 143          * unbracketed argument) or a '{' (allowing a bracketed argument).  The
 144          * end of the input will also do. */
 145         argument_start = input;
 146         argument_start_line = line;
 147         while(input < end
 148               && *input != '@' && *input != '{' && *input != ':') {
 149           if(*input == '\n') ++line;
 150           ++input;
 151         }
 152         argument_end = input;
 153         /* Trailing whitespace is not significant in unquoted arguments (and
 154          * leading whitespace is eliminated by the whitespace skip above). */
 155         while(argument_end > argument_start
 156               && isspace((unsigned char)argument_end[-1]))
 157           --argument_end;
 158         /* Step over the ':' if that's what we see */
 159         if(input < end && *input == ':')
 160           ++input;
 161       }
 162       /* Now we have an argument in [argument_start, argument_end), and we know
 163        * its filename and initial line number.  This is sufficient to parse
 164        * it. */
 165       mx_node_vector_append(v, mx_parse(filename, argument_start_line,
 166                                         argument_start, argument_end));
 167     }
 168     /* We're at the end of an expansion.  We might have hit the end of the
 169      * input, we might have hit an '@' or we might have matched the
 170      * omitted_terminator criteria. */
 171     if(input < end) {
 172       if(!omitted_terminator) {
 173         assert(*input == '@');
 174         ++input;
 175       }
 176     }
 177     /* @@ terminates this file */
 178     if(v->nvec == 0)
 179       break;
 180     /* Currently we require that the first element, the expansion name, is
 181      * always plain text.  Removing this restriction would raise some
 182      * interesting possibilities but for the time being it is considered an
 183      * error. */
 184     if(v->vec[0]->type != MX_TEXT)
 185       fatal(0, "%s:%d: expansion names may not themselves contain expansions",
 186             v->vec[0]->filename, v->vec[0]->line);
 187     /* Guarantee a NULL terminator (for the case where there's more than one
 188      * argument) */
 189     mx_node_vector_terminate(v);
 190     e = xmalloc(sizeof *e);
 191     e->next = 0;
 192     e->filename = filename;
 193     e->line = expansion_start_line;
 194     e->type = MX_EXPANSION;
 195     e->name = v->vec[0]->text;
 196     e->nargs = v->nvec - 1;
 197     e->args = v->nvec > 1 ? &v->vec[1] : 0;
 198     *tailp = e;
 199     tailp = &e->next;
 200   }
 201   return head;
 202 }
 203
 204 /*
 205 Local Variables:
 206 c-basic-offset:2
 207 comment-column:40
 208 fill-column:79
 209 indent-tabs-mode:nil
 210 End:
 211 */