chiark - git - mdw - disorder/blob - lib/macros.c

   1 /*
   2  * This file is part of DisOrder
   3  * Copyright (C) 2008 Richard Kettlewell
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  18  * USA
  19  */
  20
  21 /** @file lib/macros.c
  22  * @brief Macro expansion
  23  */
  24
  25 #include <config.h>
  26 #include "types.h"
  27
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <assert.h>
  31 #include <stdio.h>
  32 #include <sys/stat.h>
  33 #include <fcntl.h>
  34 #include <unistd.h>
  35 #include <errno.h>
  36
  37 #include "macros.h"
  38 #include "mem.h"
  39 #include "vector.h"
  40 #include "log.h"
  41 #include "hash.h"
  42 #include "sink.h"
  43 #include "syscalls.h"
  44 #include "printf.h"
  45
  46 VECTOR_TYPE(mx_node_vector, const struct mx_node *, xrealloc);
  47
  48 /** @brief Definition of an expansion */
  49 struct expansion {
  50   /** @brief Minimum permitted arguments */
  51   int min;
  52
  53   /** @brief Maximum permitted arguments */
  54   int max;
  55
  56   /** @brief Flags
  57    *
  58    * See:
  59    * - @ref EXP_SIMPLE
  60    * - @ref EXP_MAGIC
  61    * - @ref EXP_MACRO
  62    * - @ref EXP_TYPE_MASK
  63    */
  64   unsigned flags;
  65
  66   /** @brief Macro argument names */
  67   char **args;
  68
  69   /** @brief Callback (cast to appropriate type)
  70    *
  71    * Cast to @ref mx_simple_callback or @ref mx_magic_callback as required. */
  72   void (*callback)();
  73
  74   /** @brief Macro definition
  75    *
  76    * Only for @ref EXP_MACRO expansions. */
  77   const struct mx_node *definition;
  78 };
  79
  80 /** @brief Expansion takes pre-expanded strings
  81  *
  82  * @p callback is cast to @ref mx_simple_callback. */
  83 #define EXP_SIMPLE 0x0000
  84
  85 /** @brief Expansion takes parsed templates, not strings
  86  *
  87  * @p callback is cast to @ref mx_magic_callback.  The callback must do its own
  88  * expansion e.g. via mx_expandstr() where necessary. */
  89 #define EXP_MAGIC 0x0001
  90
  91 /** @brief Expansion is a macro */
  92 #define EXP_MACRO 0x0002
  93
  94 /** @brief Mask of types */
  95 #define EXP_TYPE_MASK 0x0003
  96
  97 /** @brief Hash of all expansions
  98  *
  99  * Created by mx_register(), mx_register_macro() or mx_register_magic().
 100  */
 101 static hash *expansions;
 102
 103 static int mx__expand_macro(const struct expansion *e,
 104                             const struct mx_node *m,
 105                             struct sink *output,
 106                             void *u);
 107
 108 /* Parsing ------------------------------------------------------------------ */
 109
 110 /** @brief Parse a template
 111  * @param filename Input filename (for diagnostics)
 112  * @param line Line number (use 1 on initial call)
 113  * @param input Start of text to parse
 114  * @param end End of text to parse or NULL
 115  * @return Pointer to parse tree root node
 116  *
 117  * Parses the text in [start, end) and returns an (immutable) parse
 118  * tree representing it.
 119  *
 120  * If @p end is NULL then the whole string is parsed.
 121  *
 122  * Note that the @p filename value stored in the parse tree is @p filename,
 123  * i.e. it is not copied.
 124  */
 125 const struct mx_node *mx_parse(const char *filename,
 126                                int line,
 127                                const char *input,
 128                                const char *end) {
 129   int braces, expansion_start_line, argument_start_line;
 130   const char *argument_start, *argument_end, *p;
 131   struct mx_node_vector v[1];
 132   struct dynstr d[1];
 133   struct mx_node *head = 0, **tailp = &head, *e;
 134   int omitted_terminator;
 135
 136   if(!end)
 137     end = input + strlen(input);
 138   while(input < end) {
 139     if(*input != '@') {
 140       expansion_start_line = line;
 141       dynstr_init(d);
 142       /* Gather up text without any expansions in. */
 143       while(input < end && *input != '@') {
 144         if(*input == '\n')
 145           ++line;
 146         dynstr_append(d, *input++);
 147       }
 148       dynstr_terminate(d);
 149       e = xmalloc(sizeof *e);
 150       e->next = 0;
 151       e->filename = filename;
 152       e->line = expansion_start_line;
 153       e->type = MX_TEXT;
 154       e->text = d->vec;
 155       *tailp = e;
 156       tailp = &e->next;
 157       continue;
 158     }
 159     mx_node_vector_init(v);
 160     braces = 0;
 161     p = input;
 162     ++input;
 163     expansion_start_line = line;
 164     omitted_terminator = 0;
 165     while(!omitted_terminator && input < end && *input != '@') {
 166       /* Skip whitespace */
 167       if(isspace((unsigned char)*input)) {
 168         if(*input == '\n')
 169           ++line;
 170         ++input;
 171         continue;
 172       }
 173       if(*input == '{') {
 174         /* This is a bracketed argument.  We'll walk over it counting
 175          * braces to figure out where the end is. */
 176         ++input;
 177         argument_start = input;
 178         argument_start_line = line;
 179         while(input < end && (*input != '}' || braces > 0)) {
 180           switch(*input++) {
 181           case '{': ++braces; break;
 182           case '}': --braces; break;
 183           case '\n': ++line; break;
 184           }
 185         }
 186         /* If we run out of input without seeing a '}' that's an error */
 187         if(input >= end)
 188           fatal(0, "%s:%d: unterminated expansion '%.*s'",
 189                 filename, argument_start_line,
 190                 (int)(input - argument_start), argument_start);
 191         /* Consistency check */
 192         assert(*input == '}');
 193         /* Record the end of the argument */
 194         argument_end = input;
 195         /* Step over the '}' */
 196         ++input;
 197         if(input < end && isspace((unsigned char)*input)) {
 198           /* There is at least some whitespace after the '}'.  Look
 199            * ahead and see what is after all the whitespace. */
 200           for(p = input; p < end && isspace((unsigned char)*p); ++p)
 201             ;
 202           /* Now we are looking after the whitespace.  If it's
 203            * anything other than '{', including the end of the input,
 204            * then we infer that this expansion finished at the '}' we
 205            * just saw.  (NB that we don't move input forward to p -
 206            * the whitespace is NOT part of the expansion.) */
 207           if(p == end || *p != '{')
 208             omitted_terminator = 1;
 209         }
 210       } else {
 211         /* We are looking at an unbracketed argument.  (A common example would
 212          * be the expansion or macro name.)  This is terminated by an '@'
 213          * (indicating the end of the expansion), a ':' (allowing a subsequent
 214          * unbracketed argument) or a '{' (allowing a bracketed argument).  The
 215          * end of the input will also do. */
 216         argument_start = input;
 217         argument_start_line = line;
 218         while(input < end
 219               && *input != '@' && *input != '{' && *input != ':') {
 220           if(*input == '\n') ++line;
 221           ++input;
 222         }
 223         argument_end = input;
 224         /* Trailing whitespace is not significant in unquoted arguments (and
 225          * leading whitespace is eliminated by the whitespace skip above). */
 226         while(argument_end > argument_start
 227               && isspace((unsigned char)argument_end[-1]))
 228           --argument_end;
 229         /* Step over the ':' if that's what we see */
 230         if(input < end && *input == ':')
 231           ++input;
 232       }
 233       /* Now we have an argument in [argument_start, argument_end), and we know
 234        * its filename and initial line number.  This is sufficient to parse
 235        * it. */
 236       mx_node_vector_append(v, mx_parse(filename, argument_start_line,
 237                                         argument_start, argument_end));
 238     }
 239     /* We're at the end of an expansion.  We might have hit the end of the
 240      * input, we might have hit an '@' or we might have matched the
 241      * omitted_terminator criteria. */
 242     if(input < end) {
 243       if(!omitted_terminator) {
 244         assert(*input == '@');
 245         ++input;
 246       }
 247     }
 248     /* @@ terminates this file */
 249     if(v->nvec == 0)
 250       break;
 251     /* Currently we require that the first element, the expansion name, is
 252      * always plain text.  Removing this restriction would raise some
 253      * interesting possibilities but for the time being it is considered an
 254      * error. */
 255     if(v->vec[0]->type != MX_TEXT)
 256       fatal(0, "%s:%d: expansion names may not themselves contain expansions",
 257             v->vec[0]->filename, v->vec[0]->line);
 258     /* Guarantee a NULL terminator (for the case where there's more than one
 259      * argument) */
 260     mx_node_vector_terminate(v);
 261     e = xmalloc(sizeof *e);
 262     e->next = 0;
 263     e->filename = filename;
 264     e->line = expansion_start_line;
 265     e->type = MX_EXPANSION;
 266     e->name = v->vec[0]->text;
 267     e->nargs = v->nvec - 1;
 268     e->args = v->nvec > 1 ? &v->vec[1] : 0;
 269     *tailp = e;
 270     tailp = &e->next;
 271   }
 272   return head;
 273 }
 274
 275 static void mx__dump(struct dynstr *d, const struct mx_node *m) {
 276   int n;
 277
 278   if(!m)
 279     return;
 280   switch(m->type) {
 281   case MX_TEXT:
 282     dynstr_append_string(d, m->text);
 283     break;
 284   case MX_EXPANSION:
 285     dynstr_append(d, '@');
 286     dynstr_append_string(d, m->name);
 287     for(n = 0; n < m->nargs; ++n) {
 288       dynstr_append(d, '{');
 289       mx__dump(d, m->args[n]);
 290       dynstr_append(d, '}');
 291     }
 292     dynstr_append(d, '@');
 293     break;
 294   default:
 295     assert(!"invalid m->type");
 296   }
 297   mx__dump(d, m->next);
 298 }
 299
 300 /** @brief Dump a parse macro expansion to a string */
 301 char *mx_dump(const struct mx_node *m) {
 302   struct dynstr d[1];
 303
 304   dynstr_init(d);
 305   mx__dump(d, m);
 306   dynstr_terminate(d);
 307   return d->vec;
 308 }
 309
 310 /* Expansion registration --------------------------------------------------- */
 311
 312 static int mx__register(unsigned flags,
 313                         const char *name,
 314                         int min,
 315                         int max,
 316                         char **args,
 317                         void (*callback)(),
 318                         const struct mx_node *definition) {
 319   struct expansion e[1];
 320
 321   if(!expansions)
 322     expansions = hash_new(sizeof(struct expansion));
 323   e->min = min;
 324   e->max = max;
 325   e->flags = flags;
 326   e->args = args;
 327   e->callback = callback;
 328   e->definition = definition;
 329   return hash_add(expansions, name, &e,
 330                   ((flags & EXP_TYPE_MASK) == EXP_MACRO)
 331                       ? HASH_INSERT : HASH_INSERT_OR_REPLACE);
 332 }
 333
 334 /** @brief Register a simple expansion rule
 335  * @param name Name
 336  * @param min Minimum number of arguments
 337  * @param max Maximum number of arguments
 338  * @param callback Callback to write output
 339  */
 340 void mx_register(const char *name,
 341                  int min,
 342                  int max,
 343                  mx_simple_callback *callback) {
 344   mx__register(EXP_SIMPLE,  name, min, max, 0, (void (*)())callback, 0);
 345 }
 346
 347 /** @brief Register a magic expansion rule
 348  * @param name Name
 349  * @param min Minimum number of arguments
 350  * @param max Maximum number of arguments
 351  * @param callback Callback to write output
 352  */
 353 void mx_register_magic(const char *name,
 354                        int min,
 355                        int max,
 356                        mx_magic_callback *callback) {
 357   mx__register(EXP_MAGIC, name, min, max, 0, (void (*)())callback, 0);
 358 }
 359
 360 /** @brief Register a macro
 361  * @param name Name
 362  * @param nargs Number of arguments
 363  * @param args Argument names
 364  * @param definition Macro definition
 365  * @return 0 on success, negative on error
 366  */
 367 int mx_register_macro(const char *name,
 368                       int nargs,
 369                       char **args,
 370                       const struct mx_node *definition) {
 371   if(mx__register(EXP_MACRO, name, nargs, nargs, args,  0/*callback*/,
 372                   definition)) {
 373     /* This locates the error to the definition, which may be a line or two
 374      * beyond the @define command itself.  The backtrace generated by
 375      * mx_expand() may help more. */
 376     error(0, "%s:%d: duplicate definition of '%s'",
 377           definition->filename, definition->line, name);
 378     return -2;
 379   }
 380   return 0;
 381 }
 382
 383 /* Expansion ---------------------------------------------------------------- */
 384
 385 /** @brief Expand a template
 386  * @param m Where to start
 387  * @param output Where to send output
 388  * @param u User data
 389  * @return 0 on success, non-0 on error
 390  *
 391  * Interpretation of return values:
 392  * - 0 means success
 393  * - -1 means an error writing to the sink.
 394  * - other negative values mean errors generated from with the macro
 395  *   expansion system
 396  * - positive values are reserved for the application
 397  *
 398  * If any callback returns non-zero then that value is returned, abandoning
 399  * further expansion.
 400  */
 401 int mx_expand(const struct mx_node *m,
 402               struct sink *output,
 403               void *u) {
 404   const struct expansion *e;
 405   int rc;
 406
 407   if(!m)
 408     return 0;
 409   switch(m->type) {
 410   case MX_TEXT:
 411     if(sink_writes(output, m->text) < 0)
 412       return -1;
 413     break;
 414   case MX_EXPANSION:
 415     rc = 0;
 416     if(!(e = hash_find(expansions, m->name))) {
 417       error(0, "%s:%d: unknown expansion name '%s'",
 418             m->filename, m->line, m->name);
 419       if(sink_printf(output, "[[%s unknown]]", m->name) < 0)
 420         return -1;
 421     } else if(m->nargs < e->min) {
 422       error(0, "%s:%d: expansion '%s' requires %d args, only %d given",
 423             m->filename, m->line, m->name, e->min, m->nargs);
 424       if(sink_printf(output, "[[%s too few args]]", m->name) < 0)
 425         return -1;
 426     } else if(m->nargs > e->max) {
 427       error(0, "%s:%d: expansion '%s' takes at most %d args, but %d given",
 428             m->filename, m->line, m->name, e->max, m->nargs);
 429       if(sink_printf(output, "[[%s too many args]]", m->name) < 0)
 430         return -1;
 431     } else switch(e->flags & EXP_TYPE_MASK) {
 432       case EXP_MAGIC: {
 433         /* Magic callbacks we can call directly */
 434         rc = ((mx_magic_callback *)e->callback)(m->nargs,
 435                                                 m->args,
 436                                                 output,
 437                                                 u);
 438         break;
 439       }
 440       case EXP_SIMPLE: {
 441         /* For simple callbacks we expand their arguments for them. */
 442         char **args = xcalloc(1 + m->nargs, sizeof (char *)), *argname;
 443         int n;
 444
 445         for(n = 0; n < m->nargs; ++n) {
 446           /* Argument numbers are at least clear from looking at the text;
 447            * adding names as well would be nice.  TODO */
 448           byte_xasprintf(&argname, "argument #%d", n);
 449           if((rc = mx_expandstr(m->args[n], &args[n], u, argname)))
 450             break;
 451         }
 452         if(!rc) {
 453           args[n] = NULL;
 454           rc = ((mx_simple_callback *)e->callback)(m->nargs,
 455                                                    args,
 456                                                    output,
 457                                                    u);
 458         }
 459         break;
 460       }
 461       case EXP_MACRO: {
 462         /* Macros we expand by rewriting their definition with argument values
 463          * substituted and then expanding that. */
 464         rc = mx__expand_macro(e, m, output, u);
 465         break;
 466       }
 467       default:
 468         assert(!"impossible EXP_TYPE_MASK value");
 469     }
 470     if(rc) {
 471       /* For non-IO errors we generate some backtrace */
 472       if(rc != -1)
 473         error(0,  "  ...in '%s' at %s:%d",
 474               m->name, m->filename, m->line);
 475       return rc;
 476     }
 477     break;
 478   default:
 479     assert(!"invalid m->type");
 480   }
 481   return mx_expand(m->next, output, u);
 482 }
 483
 484 /** @brief Expand a template storing the result in a string
 485  * @param m Where to start
 486  * @param sp Where to store string
 487  * @param u User data
 488  * @param what Token for backtrace, or NULL
 489  * @return 0 on success, non-0 on error
 490  *
 491  * Same return conventions as mx_expand().  This wrapper is slightly more
 492  * convenient to use from 'magic' expansions.
 493  */
 494 int mx_expandstr(const struct mx_node *m,
 495                  char **sp,
 496                  void *u,
 497                  const char *what) {
 498   struct dynstr d[1];
 499   int rc;
 500
 501   dynstr_init(d);
 502   if(!(rc = mx_expand(m, sink_dynstr(d), u))) {
 503     dynstr_terminate(d);
 504     *sp = d->vec;
 505   } else
 506     *sp = 0;
 507   if(rc && rc != -1 && what)
 508     error(0, "  ...in %s at %s:%d", what, m->filename, m->line);
 509   return rc;
 510 }
 511
 512 /** @brief Expand a template file
 513  * @param path Filename
 514  * @param output Where to send output
 515  * @param u User data
 516  * @return 0 on success, non-0 on error
 517  *
 518  * Same return conventions as mx_expand().
 519  */
 520 int mx_expand_file(const char *path,
 521                    struct sink *output,
 522                    void *u) {
 523   int fd, n, rc;
 524   struct stat sb;
 525   char *b;
 526   off_t sofar;
 527   const struct mx_node *m;
 528
 529   if((fd = open(path, O_RDONLY)) < 0)
 530     fatal(errno, "error opening %s", path);
 531   if(fstat(fd, &sb) < 0)
 532     fatal(errno, "error statting %s", path);
 533   if(!S_ISREG(sb.st_mode))
 534     fatal(0, "%s: not a regular file", path);
 535   sofar = 0;
 536   b = xmalloc_noptr(sb.st_size);
 537   while(sofar < sb.st_size) {
 538     n = read(fd, b + sofar, sb.st_size - sofar);
 539     if(n > 0)
 540       sofar += n;
 541     else if(n == 0)
 542       fatal(0, "unexpected EOF reading %s", path);
 543     else if(errno != EINTR)
 544       fatal(errno, "error reading %s", path);
 545   }
 546   xclose(fd);
 547   m = mx_parse(path, 1, b, b + sb.st_size);
 548   rc = mx_expand(m, output, u);
 549   if(rc && rc != -1)
 550     /* Mention inclusion in backtrace */
 551     error(0, "  ...in inclusion of file '%s'", path);
 552   return rc;
 553 }
 554
 555 /** @brief Rewrite a parse tree substituting in macro arguments
 556  * @param m Parse tree to rewrite (from macro definition)
 557  * @param h Hash mapping argument names to argument values
 558  * @return Rewritten parse tree
 559  */
 560 static const struct mx_node *mx__rewrite(const struct mx_node *m,
 561                                          hash *h) {
 562   const struct mx_node *head = 0, **tailp = &head, *arg, *mm;
 563   struct mx_node *nm;
 564   int n;
 565
 566   for(; m; m = m->next) {
 567     switch(m->type) {
 568     case MX_TEXT:
 569       nm = xmalloc(sizeof *nm);
 570       *nm = *m;                          /* Dumb copy of text node fields */
 571       nm->next = 0;                      /* Maintain list structure */
 572       *tailp = nm;
 573       tailp = (const struct mx_node **)&nm->next;
 574       break;
 575     case MX_EXPANSION:
 576       if(m->nargs == 0
 577          && (arg = hash_find(h, m->name))) {
 578         /* This expansion has no arguments and its name matches one of the
 579          * macro arguments.  (Even if it's a valid expansion name we override
 580          * it.)  We insert its value at this point.  We do NOT recursively
 581          * rewrite the argument's value - it is outside the lexical scope of
 582          * the argument name.
 583          *
 584          * We need to recreate the list structure but a shallow copy will
 585          * suffice here.
 586          */
 587         for(mm = arg; mm; mm = mm->next) {
 588           nm = xmalloc(sizeof *nm);
 589           *nm = *mm;
 590           nm->next = 0;
 591           *tailp = nm;
 592           tailp = (const struct mx_node **)&nm->next;
 593         }
 594       } else {
 595         /* This is some other expansion.  We recursively rewrite its argument
 596          * values according to h. */
 597         nm = xmalloc(sizeof *nm);
 598         *nm = *mm;
 599         for(n = 0; n < nm->nargs; ++n)
 600           nm->args[n] = mx__rewrite(m->args[n], h);
 601         nm->next = 0;
 602         *tailp = nm;
 603         tailp = (const struct mx_node **)&nm->next;
 604       }
 605       break;
 606     default:
 607       assert(!"invalid m->type");
 608     }
 609   }
 610   *tailp = 0;                           /* Mark end of list */
 611   return head;
 612 }
 613
 614 /** @brief Expand a macro
 615  * @param e Macro definition
 616  * @param m Macro expansion
 617  * @param output Where to send output
 618  * @param u User data
 619  * @return 0 on success, non-0 on error
 620  */
 621 static int mx__expand_macro(const struct expansion *e,
 622                             const struct mx_node *m,
 623                             struct sink *output,
 624                             void *u) {
 625   hash *h = hash_new(sizeof (struct mx_node *));
 626   int n;
 627
 628   /* We store the macro arguments in a hash.  Currently there is no check for
 629    * duplicate argument names (and this would be the wrong place for it
 630    * anyway); if you do that you just lose in some undefined way. */
 631   for(n = 0; n < m->nargs; ++n)
 632     hash_add(h, e->args[n], m->args[n], HASH_INSERT);
 633   /* Generate a rewritten parse tree */
 634   m = mx__rewrite(e->definition, h);
 635   /* Expand the result */
 636   return mx_expand(m, output, u);
 637   /* mx_expand() will update the backtrace */
 638 }
 639
 640 /*
 641 Local Variables:
 642 c-basic-offset:2
 643 comment-column:40
 644 fill-column:79
 645 indent-tabs-mode:nil
 646 End:
 647 */