chiark - git - mdw - disorder/blob - lib/macros.c

   1 /*
   2  * This file is part of DisOrder
   3  * Copyright (C) 2008 Richard Kettlewell
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  18  * USA
  19  */
  20
  21 /** @file lib/macros.c
  22  * @brief Macro expansion
  23  */
  24
  25 #include "common.h"
  26
  27 #include <ctype.h>
  28 #include <sys/stat.h>
  29 #include <fcntl.h>
  30 #include <unistd.h>
  31 #include <errno.h>
  32
  33 #include "hash.h"
  34 #include "macros.h"
  35 #include "mem.h"
  36 #include "vector.h"
  37 #include "log.h"
  38 #include "sink.h"
  39 #include "syscalls.h"
  40 #include "printf.h"
  41
  42 VECTOR_TYPE(mx_node_vector, const struct mx_node *, xrealloc);
  43
  44 /** @brief Definition of an expansion */
  45 struct expansion {
  46   /** @brief Minimum permitted arguments */
  47   int min;
  48
  49   /** @brief Maximum permitted arguments */
  50   int max;
  51
  52   /** @brief Flags
  53    *
  54    * See:
  55    * - @ref EXP_SIMPLE
  56    * - @ref EXP_MAGIC
  57    * - @ref EXP_MACRO
  58    * - @ref EXP_TYPE_MASK
  59    */
  60   unsigned flags;
  61
  62   /** @brief Macro argument names */
  63   char **args;
  64
  65   /** @brief Callback (cast to appropriate type)
  66    *
  67    * Cast to @ref mx_simple_callback or @ref mx_magic_callback as required. */
  68   void (*callback)();
  69
  70   /** @brief Macro definition
  71    *
  72    * Only for @ref EXP_MACRO expansions. */
  73   const struct mx_node *definition;
  74 };
  75
  76 /** @brief Expansion takes pre-expanded strings
  77  *
  78  * @p callback is cast to @ref mx_simple_callback. */
  79 #define EXP_SIMPLE 0x0000
  80
  81 /** @brief Expansion takes parsed templates, not strings
  82  *
  83  * @p callback is cast to @ref mx_magic_callback.  The callback must do its own
  84  * expansion e.g. via mx_expandstr() where necessary. */
  85 #define EXP_MAGIC 0x0001
  86
  87 /** @brief Expansion is a macro */
  88 #define EXP_MACRO 0x0002
  89
  90 /** @brief Mask of types */
  91 #define EXP_TYPE_MASK 0x0003
  92
  93 /** @brief Hash of all expansions
  94  *
  95  * Created by mx_register(), mx_register_macro() or mx_register_magic().
  96  */
  97 static hash *expansions;
  98
  99 static int mx__expand_macro(const struct expansion *e,
 100                             const struct mx_node *m,
 101                             struct sink *output,
 102                             void *u);
 103
 104 /* Parsing ------------------------------------------------------------------ */
 105
 106 static int next_non_whitespace(const char *input,
 107                                const char *end) {
 108   while(input < end && isspace((unsigned char)*input))
 109     ++input;
 110   return input < end ? *input : -1;
 111 }
 112
 113 /** @brief Parse a template
 114  * @param filename Input filename (for diagnostics)
 115  * @param line Line number (use 1 on initial call)
 116  * @param input Start of text to parse
 117  * @param end End of text to parse or NULL
 118  * @return Pointer to parse tree root node
 119  *
 120  * Parses the text in [start, end) and returns an (immutable) parse
 121  * tree representing it.
 122  *
 123  * If @p end is NULL then the whole string is parsed.
 124  *
 125  * Note that the @p filename value stored in the parse tree is @p filename,
 126  * i.e. it is not copied.
 127  */
 128 const struct mx_node *mx_parse(const char *filename,
 129                                int line,
 130                                const char *input,
 131                                const char *end) {
 132   int braces, argument_start_line, obracket, cbracket;
 133   const char *argument_start, *argument_end;
 134   struct mx_node_vector v[1];
 135   struct dynstr d[1];
 136   struct mx_node *head = 0, **tailp = &head, *e;
 137
 138   if(!end)
 139     end = input + strlen(input);
 140   while(input < end) {
 141     if(*input != '@') {
 142       e = xmalloc(sizeof *e);
 143       e->next = 0;
 144       e->filename = filename;
 145       e->line = line;
 146       e->type = MX_TEXT;
 147       dynstr_init(d);
 148       /* Gather up text without any expansions in. */
 149       while(input < end && *input != '@') {
 150         if(*input == '\n')
 151           ++line;
 152         dynstr_append(d, *input++);
 153       }
 154       dynstr_terminate(d);
 155       e->text = d->vec;
 156       *tailp = e;
 157       tailp = &e->next;
 158       continue;
 159     }
 160     if(input + 1 < end)
 161       switch(input[1]) {
 162       case '@':
 163         /* '@@' expands to '@' */
 164         e = xmalloc(sizeof *e);
 165         e->next = 0;
 166         e->filename = filename;
 167         e->line = line;
 168         e->type = MX_TEXT;
 169         e->text = "@";
 170         *tailp = e;
 171         tailp = &e->next;
 172         input += 2;
 173         continue;
 174       case '#':
 175         /* '@#' starts a (newline-eating comment), like dnl */
 176         input += 2;
 177         while(input < end && *input != '\n')
 178           ++input;
 179         if(*input == '\n') {
 180           ++line;
 181           ++input;
 182         }
 183         continue;
 184       case '_':
 185         /* '@_' expands to nothing.  It's there to allow dump to terminate
 186          * expansions without having to know what follows. */
 187         input += 2;
 188         continue;
 189       }
 190     /* It's a full expansion */
 191     ++input;
 192     e = xmalloc(sizeof *e);
 193     e->next = 0;
 194     e->filename = filename;
 195     e->line = line;
 196     e->type = MX_EXPANSION;
 197     /* Collect the expansion name.  Expansion names start with an alnum and
 198      * consist of alnums and '-'.  We don't permit whitespace between the '@'
 199      * and the name. */
 200     dynstr_init(d);
 201     if(input == end)
 202       fatal(0, "%s:%d: invalid expansion syntax (truncated)",
 203             filename, e->line);
 204     if(!isalnum((unsigned char)*input))
 205       fatal(0, "%s:%d: invalid expansion syntax (unexpected %#x)",
 206             filename, e->line, (unsigned char)*input);
 207     while(input < end && (isalnum((unsigned char)*input) || *input == '-'))
 208       dynstr_append(d, *input++);
 209     dynstr_terminate(d);
 210     e->name = d->vec;
 211     /* See what the bracket character is */
 212     obracket = next_non_whitespace(input, end);
 213     switch(obracket) {
 214     case '(': cbracket = ')'; break;
 215     case '[': cbracket = ']'; break;
 216     case '{': cbracket = '}'; break;
 217     default: cbracket = obracket = -1; break;      /* no arguments */
 218     }
 219     mx_node_vector_init(v);
 220     if(obracket >= 0) {
 221       /* Gather up arguments */
 222       while(next_non_whitespace(input, end) == obracket) {
 223         while(isspace((unsigned char)*input)) {
 224           if(*input == '\n')
 225             ++line;
 226           ++input;
 227         }
 228         ++input;                        /* the bracket */
 229         braces = 0;
 230         /* Find the end of the argument */
 231         argument_start = input;
 232         argument_start_line = line;
 233         while(input < end && (*input != cbracket || braces > 0)) {
 234           const int c = *input++;
 235
 236           if(c == obracket)
 237             ++braces;
 238           else if(c == cbracket)
 239             --braces;
 240           else if(c == '\n')
 241             ++line;
 242         }
 243         if(input >= end) {
 244           /* We ran out of input without encountering a balanced cbracket */
 245           fatal(0, "%s:%d: unterminated expansion argument '%.*s'",
 246                 filename, argument_start_line,
 247                 (int)(input - argument_start), argument_start);
 248         }
 249         /* Consistency check */
 250         assert(*input == cbracket);
 251         /* Record the end of the argument */
 252         argument_end = input;
 253         /* Step over the cbracket */
 254         ++input;
 255         /* Now we have an argument in [argument_start, argument_end), and we
 256          * know its filename and initial line number.  This is sufficient to
 257          * parse it. */
 258         mx_node_vector_append(v, mx_parse(filename, argument_start_line,
 259                                           argument_start, argument_end));
 260       }
 261     }
 262     /* Guarantee a NULL terminator (for the case where there's more than one
 263      * argument) */
 264     mx_node_vector_terminate(v);
 265     /* Fill in the remains of the node */
 266     e->nargs = v->nvec;
 267     e->args = v->vec;
 268     *tailp = e;
 269     tailp = &e->next;
 270   }
 271   return head;
 272 }
 273
 274 static void mx__dump(struct dynstr *d, const struct mx_node *m) {
 275   int n;
 276   const struct mx_node *mm;
 277
 278   if(!m)
 279     return;
 280   switch(m->type) {
 281   case MX_TEXT:
 282     if(m->text[0] == '@')
 283       dynstr_append(d, '@');
 284     dynstr_append_string(d, m->text);
 285     break;
 286   case MX_EXPANSION:
 287     dynstr_append(d, '@');
 288     dynstr_append_string(d, m->name);
 289     for(n = 0; n < m->nargs; ++n) {
 290       dynstr_append(d, '{');
 291       mx__dump(d, m->args[n]);
 292       dynstr_append(d, '}');
 293     }
 294     /* If the next non-whitespace is '{', add @_ to stop it being
 295      * misinterpreted */
 296     mm = m->next;
 297     while(mm && mm->type == MX_TEXT) {
 298       switch(next_non_whitespace(mm->text, mm->text + strlen(mm->text))) {
 299       case -1:
 300         mm = mm->next;
 301         continue;
 302       case '{':
 303         dynstr_append_string(d, "@_");
 304         break;
 305       default:
 306         break;
 307       }
 308       break;
 309     }
 310     break;
 311   default:
 312     assert(!"invalid m->type");
 313   }
 314   mx__dump(d, m->next);
 315 }
 316
 317 /** @brief Dump a parse macro expansion to a string
 318  *
 319  * Not of production quality!  Only intended for testing!
 320  */
 321 char *mx_dump(const struct mx_node *m) {
 322   struct dynstr d[1];
 323
 324   dynstr_init(d);
 325   mx__dump(d, m);
 326   dynstr_terminate(d);
 327   return d->vec;
 328 }
 329
 330 /* Expansion registration --------------------------------------------------- */
 331
 332 static int mx__register(unsigned flags,
 333                         const char *name,
 334                         int min,
 335                         int max,
 336                         char **args,
 337                         void (*callback)(),
 338                         const struct mx_node *definition) {
 339   struct expansion e[1];
 340
 341   if(!expansions)
 342     expansions = hash_new(sizeof(struct expansion));
 343   e->min = min;
 344   e->max = max;
 345   e->flags = flags;
 346   e->args = args;
 347   e->callback = callback;
 348   e->definition = definition;
 349   return hash_add(expansions, name, &e, HASH_INSERT_OR_REPLACE);
 350 }
 351
 352 /** @brief Register a simple expansion rule
 353  * @param name Name
 354  * @param min Minimum number of arguments
 355  * @param max Maximum number of arguments
 356  * @param callback Callback to write output
 357  */
 358 void mx_register(const char *name,
 359                  int min,
 360                  int max,
 361                  mx_simple_callback *callback) {
 362   mx__register(EXP_SIMPLE,  name, min, max, 0, (void (*)())callback, 0);
 363 }
 364
 365 /** @brief Register a magic expansion rule
 366  * @param name Name
 367  * @param min Minimum number of arguments
 368  * @param max Maximum number of arguments
 369  * @param callback Callback to write output
 370  */
 371 void mx_register_magic(const char *name,
 372                        int min,
 373                        int max,
 374                        mx_magic_callback *callback) {
 375   mx__register(EXP_MAGIC, name, min, max, 0, (void (*)())callback, 0);
 376 }
 377
 378 /** @brief Register a macro
 379  * @param name Name
 380  * @param nargs Number of arguments
 381  * @param args Argument names
 382  * @param definition Macro definition
 383  * @return 0 on success, negative on error
 384  */
 385 int mx_register_macro(const char *name,
 386                       int nargs,
 387                       char **args,
 388                       const struct mx_node *definition) {
 389   if(mx__register(EXP_MACRO, name, nargs, nargs, args,  0/*callback*/,
 390                   definition)) {
 391 #if 0
 392     /* This locates the error to the definition, which may be a line or two
 393      * beyond the @define command itself.  The backtrace generated by
 394      * mx_expand() may help more. */
 395     error(0, "%s:%d: duplicate definition of '%s'",
 396           definition->filename, definition->line, name);
 397 #endif
 398     return -2;
 399   }
 400   return 0;
 401 }
 402
 403 /* Expansion ---------------------------------------------------------------- */
 404
 405 /** @brief Expand a template
 406  * @param m Where to start
 407  * @param output Where to send output
 408  * @param u User data
 409  * @return 0 on success, non-0 on error
 410  *
 411  * Interpretation of return values:
 412  * - 0 means success
 413  * - -1 means an error writing to the sink.
 414  * - other negative values mean errors generated from with the macro
 415  *   expansion system
 416  * - positive values are reserved for the application
 417  *
 418  * If any callback returns non-zero then that value is returned, abandoning
 419  * further expansion.
 420  */
 421 int mx_expand(const struct mx_node *m,
 422               struct sink *output,
 423               void *u) {
 424   const struct expansion *e;
 425   int rc;
 426
 427   if(!m)
 428     return 0;
 429   switch(m->type) {
 430   case MX_TEXT:
 431     if(sink_writes(output, m->text) < 0)
 432       return -1;
 433     break;
 434   case MX_EXPANSION:
 435     rc = 0;
 436     if(!(e = hash_find(expansions, m->name))) {
 437       error(0, "%s:%d: unknown expansion name '%s'",
 438             m->filename, m->line, m->name);
 439       if(sink_printf(output, "[['%s' unknown]]", m->name) < 0)
 440         return -1;
 441     } else if(m->nargs < e->min) {
 442       error(0, "%s:%d: expansion '%s' requires %d args, only %d given",
 443             m->filename, m->line, m->name, e->min, m->nargs);
 444       if(sink_printf(output, "[['%s' too few args]]", m->name) < 0)
 445         return -1;
 446     } else if(m->nargs > e->max) {
 447       error(0, "%s:%d: expansion '%s' takes at most %d args, but %d given",
 448             m->filename, m->line, m->name, e->max, m->nargs);
 449       if(sink_printf(output, "[['%s' too many args]]", m->name) < 0)
 450         return -1;
 451     } else switch(e->flags & EXP_TYPE_MASK) {
 452       case EXP_MAGIC: {
 453         /* Magic callbacks we can call directly */
 454         rc = ((mx_magic_callback *)e->callback)(m->nargs,
 455                                                 m->args,
 456                                                 output,
 457                                                 u);
 458         break;
 459       }
 460       case EXP_SIMPLE: {
 461         /* For simple callbacks we expand their arguments for them. */
 462         char **args = xcalloc(1 + m->nargs, sizeof (char *)), *argname;
 463         int n;
 464
 465         for(n = 0; n < m->nargs; ++n) {
 466           /* Argument numbers are at least clear from looking at the text;
 467            * adding names as well would be nice.  TODO */
 468           byte_xasprintf(&argname, "argument #%d", n);
 469           if((rc = mx_expandstr(m->args[n], &args[n], u, argname)))
 470             break;
 471         }
 472         if(!rc) {
 473           args[n] = NULL;
 474           rc = ((mx_simple_callback *)e->callback)(m->nargs,
 475                                                    args,
 476                                                    output,
 477                                                    u);
 478         }
 479         break;
 480       }
 481       case EXP_MACRO: {
 482         /* Macros we expand by rewriting their definition with argument values
 483          * substituted and then expanding that. */
 484         rc = mx__expand_macro(e, m, output, u);
 485         break;
 486       }
 487       default:
 488         assert(!"impossible EXP_TYPE_MASK value");
 489     }
 490     if(rc) {
 491       /* For non-IO errors we generate some backtrace */
 492       if(rc != -1)
 493         error(0,  "  ...in @%s at %s:%d",
 494               m->name, m->filename, m->line);
 495       return rc;
 496     }
 497     break;
 498   default:
 499     assert(!"invalid m->type");
 500   }
 501   return mx_expand(m->next, output, u);
 502 }
 503
 504 /** @brief Expand a template storing the result in a string
 505  * @param m Where to start
 506  * @param sp Where to store string
 507  * @param u User data
 508  * @param what Token for backtrace, or NULL
 509  * @return 0 on success, non-0 on error
 510  *
 511  * Same return conventions as mx_expand().  This wrapper is slightly more
 512  * convenient to use from 'magic' expansions.
 513  */
 514 int mx_expandstr(const struct mx_node *m,
 515                  char **sp,
 516                  void *u,
 517                  const char *what) {
 518   struct dynstr d[1];
 519   int rc;
 520
 521   dynstr_init(d);
 522   if(!(rc = mx_expand(m, sink_dynstr(d), u))) {
 523     dynstr_terminate(d);
 524     *sp = d->vec;
 525   } else
 526     *sp = 0;
 527   if(rc && rc != -1 && what)
 528     error(0, "  ...in %s at %s:%d", what, m->filename, m->line);
 529   return rc;
 530 }
 531
 532 /** @brief Expand a template file
 533  * @param path Filename
 534  * @param output Where to send output
 535  * @param u User data
 536  * @return 0 on success, non-0 on error
 537  *
 538  * Same return conventions as mx_expand().
 539  */
 540 int mx_expand_file(const char *path,
 541                    struct sink *output,
 542                    void *u) {
 543   int fd, n, rc;
 544   struct stat sb;
 545   char *b;
 546   off_t sofar;
 547   const struct mx_node *m;
 548
 549   if((fd = open(path, O_RDONLY)) < 0)
 550     fatal(errno, "error opening %s", path);
 551   if(fstat(fd, &sb) < 0)
 552     fatal(errno, "error statting %s", path);
 553   if(!S_ISREG(sb.st_mode))
 554     fatal(0, "%s: not a regular file", path);
 555   sofar = 0;
 556   b = xmalloc_noptr(sb.st_size);
 557   while(sofar < sb.st_size) {
 558     n = read(fd, b + sofar, sb.st_size - sofar);
 559     if(n > 0)
 560       sofar += n;
 561     else if(n == 0)
 562       fatal(0, "unexpected EOF reading %s", path);
 563     else if(errno != EINTR)
 564       fatal(errno, "error reading %s", path);
 565   }
 566   xclose(fd);
 567   m = mx_parse(path, 1, b, b + sb.st_size);
 568   rc = mx_expand(m, output, u);
 569   if(rc && rc != -1)
 570     /* Mention inclusion in backtrace */
 571     error(0, "  ...in inclusion of file '%s'", path);
 572   return rc;
 573 }
 574
 575 /* Macros ------------------------------------------------------------------- */
 576
 577 /** @brief Rewrite a parse tree substituting sub-expansions
 578  * @param m Parse tree to rewrite (from macro definition)
 579  * @param ... Name/value pairs to rewrite
 580  * @return Rewritten parse tree
 581  *
 582  * The name/value pair list consists of pairs of strings and is terminated by
 583  * (char *)0.  Names and values are both copied so need not survive the call.
 584  */
 585 const struct mx_node *mx_rewritel(const struct mx_node *m,
 586                                   ...) {
 587   va_list ap;
 588   hash *h = hash_new(sizeof (struct mx_node *));
 589   const char *n, *v;
 590   struct mx_node *e;
 591
 592   va_start(ap, m);
 593   while((n = va_arg(ap, const char *))) {
 594     v = va_arg(ap, const char *);
 595     e = xmalloc(sizeof *e);
 596     e->next = 0;
 597     e->filename = m->filename;
 598     e->line = m->line;
 599     e->type = MX_TEXT;
 600     e->text = xstrdup(v);
 601     hash_add(h, n, &e, HASH_INSERT);
 602     /* hash_add() copies n */
 603   }
 604   return mx_rewrite(m, h);
 605 }
 606
 607 /** @brief Rewrite a parse tree substituting in macro arguments
 608  * @param definition Parse tree to rewrite (from macro definition)
 609  * @param h Hash mapping argument names to argument values
 610  * @return Rewritten parse tree
 611  */
 612 const struct mx_node *mx_rewrite(const struct mx_node *definition,
 613                                  hash *h) {
 614   const struct mx_node *head = 0, **tailp = &head, *argvalue, *m, *mm, **ap;
 615   struct mx_node *nm;
 616   int n;
 617
 618   for(m = definition; m; m = m->next) {
 619     switch(m->type) {
 620     case MX_TEXT:
 621       nm = xmalloc(sizeof *nm);
 622       *nm = *m;                          /* Dumb copy of text node fields */
 623       nm->next = 0;                      /* Maintain list structure */
 624       *tailp = nm;
 625       tailp = (const struct mx_node **)&nm->next;
 626       break;
 627     case MX_EXPANSION:
 628       if(m->nargs == 0
 629          && (ap = hash_find(h, m->name))) {
 630         /* This expansion has no arguments and its name matches one of the
 631          * macro arguments.  (Even if it's a valid expansion name we override
 632          * it.)  We insert its value at this point.  We do NOT recursively
 633          * rewrite the argument's value - it is outside the lexical scope of
 634          * the argument name.
 635          *
 636          * We need to recreate the list structure but a shallow copy will
 637          * suffice here.
 638          */
 639         argvalue = *ap;
 640         for(mm = argvalue; mm; mm = mm->next) {
 641           nm = xmalloc(sizeof *nm);
 642           *nm = *mm;
 643           nm->next = 0;
 644           *tailp = nm;
 645           tailp = (const struct mx_node **)&nm->next;
 646         }
 647       } else {
 648         /* This is some other expansion.  We recursively rewrite its argument
 649          * values according to h. */
 650         nm = xmalloc(sizeof *nm);
 651         *nm = *m;
 652         nm->args = xcalloc(nm->nargs, sizeof (struct mx_node *));
 653         for(n = 0; n < nm->nargs; ++n)
 654           nm->args[n] = mx_rewrite(m->args[n], h);
 655         nm->next = 0;
 656         *tailp = nm;
 657         tailp = (const struct mx_node **)&nm->next;
 658       }
 659       break;
 660     default:
 661       assert(!"invalid m->type");
 662     }
 663   }
 664   *tailp = 0;                           /* Mark end of list */
 665   return head;
 666 }
 667
 668 /** @brief Expand a macro
 669  * @param e Macro definition
 670  * @param m Macro expansion
 671  * @param output Where to send output
 672  * @param u User data
 673  * @return 0 on success, non-0 on error
 674  */
 675 static int mx__expand_macro(const struct expansion *e,
 676                             const struct mx_node *m,
 677                             struct sink *output,
 678                             void *u) {
 679   hash *h = hash_new(sizeof (struct mx_node *));
 680   int n;
 681
 682   /* We store the macro arguments in a hash.  Currently there is no check for
 683    * duplicate argument names (and this would be the wrong place for it
 684    * anyway); if you do that you just lose in some undefined way. */
 685   for(n = 0; n < m->nargs; ++n)
 686     hash_add(h, e->args[n], &m->args[n], HASH_INSERT);
 687   /* Generate a rewritten parse tree */
 688   m = mx_rewrite(e->definition, h);
 689   /* Expand the result */
 690   return mx_expand(m, output, u);
 691   /* mx_expand() will update the backtrace */
 692 }
 693
 694 /*
 695 Local Variables:
 696 c-basic-offset:2
 697 comment-column:40
 698 fill-column:79
 699 indent-tabs-mode:nil
 700 End:
 701 */