chiark - git - mdw - disorder/blob - lib/macros.c

   1 /*
   2  * This file is part of DisOrder
   3  * Copyright (C) 2008 Richard Kettlewell
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  18  * USA
  19  */
  20
  21 /** @file lib/macros.c
  22  * @brief Macro expansion
  23  */
  24
  25 #include <config.h>
  26 #include "types.h"
  27
  28 #include <string.h>
  29 #include <ctype.h>
  30 #include <assert.h>
  31 #include <stdio.h>
  32 #include <sys/stat.h>
  33 #include <fcntl.h>
  34 #include <unistd.h>
  35 #include <errno.h>
  36
  37 #include "hash.h"
  38 #include "macros.h"
  39 #include "mem.h"
  40 #include "vector.h"
  41 #include "log.h"
  42 #include "sink.h"
  43 #include "syscalls.h"
  44 #include "printf.h"
  45
  46 VECTOR_TYPE(mx_node_vector, const struct mx_node *, xrealloc);
  47
  48 /** @brief Definition of an expansion */
  49 struct expansion {
  50   /** @brief Minimum permitted arguments */
  51   int min;
  52
  53   /** @brief Maximum permitted arguments */
  54   int max;
  55
  56   /** @brief Flags
  57    *
  58    * See:
  59    * - @ref EXP_SIMPLE
  60    * - @ref EXP_MAGIC
  61    * - @ref EXP_MACRO
  62    * - @ref EXP_TYPE_MASK
  63    */
  64   unsigned flags;
  65
  66   /** @brief Macro argument names */
  67   char **args;
  68
  69   /** @brief Callback (cast to appropriate type)
  70    *
  71    * Cast to @ref mx_simple_callback or @ref mx_magic_callback as required. */
  72   void (*callback)();
  73
  74   /** @brief Macro definition
  75    *
  76    * Only for @ref EXP_MACRO expansions. */
  77   const struct mx_node *definition;
  78 };
  79
  80 /** @brief Expansion takes pre-expanded strings
  81  *
  82  * @p callback is cast to @ref mx_simple_callback. */
  83 #define EXP_SIMPLE 0x0000
  84
  85 /** @brief Expansion takes parsed templates, not strings
  86  *
  87  * @p callback is cast to @ref mx_magic_callback.  The callback must do its own
  88  * expansion e.g. via mx_expandstr() where necessary. */
  89 #define EXP_MAGIC 0x0001
  90
  91 /** @brief Expansion is a macro */
  92 #define EXP_MACRO 0x0002
  93
  94 /** @brief Mask of types */
  95 #define EXP_TYPE_MASK 0x0003
  96
  97 /** @brief Hash of all expansions
  98  *
  99  * Created by mx_register(), mx_register_macro() or mx_register_magic().
 100  */
 101 static hash *expansions;
 102
 103 static int mx__expand_macro(const struct expansion *e,
 104                             const struct mx_node *m,
 105                             struct sink *output,
 106                             void *u);
 107
 108 /* Parsing ------------------------------------------------------------------ */
 109
 110 static int next_non_whitespace(const char *input,
 111                                const char *end) {
 112   while(input < end && isspace((unsigned char)*input))
 113     ++input;
 114   return input < end ? *input : -1;
 115 }
 116
 117 /** @brief Parse a template
 118  * @param filename Input filename (for diagnostics)
 119  * @param line Line number (use 1 on initial call)
 120  * @param input Start of text to parse
 121  * @param end End of text to parse or NULL
 122  * @return Pointer to parse tree root node
 123  *
 124  * Parses the text in [start, end) and returns an (immutable) parse
 125  * tree representing it.
 126  *
 127  * If @p end is NULL then the whole string is parsed.
 128  *
 129  * Note that the @p filename value stored in the parse tree is @p filename,
 130  * i.e. it is not copied.
 131  */
 132 const struct mx_node *mx_parse(const char *filename,
 133                                int line,
 134                                const char *input,
 135                                const char *end) {
 136   int braces, argument_start_line, obracket, cbracket;
 137   const char *argument_start, *argument_end;
 138   struct mx_node_vector v[1];
 139   struct dynstr d[1];
 140   struct mx_node *head = 0, **tailp = &head, *e;
 141
 142   if(!end)
 143     end = input + strlen(input);
 144   while(input < end) {
 145     if(*input != '@') {
 146       e = xmalloc(sizeof *e);
 147       e->next = 0;
 148       e->filename = filename;
 149       e->line = line;
 150       e->type = MX_TEXT;
 151       dynstr_init(d);
 152       /* Gather up text without any expansions in. */
 153       while(input < end && *input != '@') {
 154         if(*input == '\n')
 155           ++line;
 156         dynstr_append(d, *input++);
 157       }
 158       dynstr_terminate(d);
 159       e->text = d->vec;
 160       *tailp = e;
 161       tailp = &e->next;
 162       continue;
 163     }
 164     if(input + 1 < end)
 165       switch(input[1]) {
 166       case '@':
 167         /* '@@' expands to '@' */
 168         e = xmalloc(sizeof *e);
 169         e->next = 0;
 170         e->filename = filename;
 171         e->line = line;
 172         e->type = MX_TEXT;
 173         e->text = "@";
 174         *tailp = e;
 175         tailp = &e->next;
 176         input += 2;
 177         continue;
 178       case '#':
 179         /* '@#' starts a (newline-eating comment), like dnl */
 180         input += 2;
 181         while(input < end && *input != '\n')
 182           ++input;
 183         if(*input == '\n') {
 184           ++line;
 185           ++input;
 186         }
 187         continue;
 188       case '_':
 189         /* '@_' expands to nothing.  It's there to allow dump to terminate
 190          * expansions without having to know what follows. */
 191         input += 2;
 192         continue;
 193       }
 194     /* It's a full expansion */
 195     ++input;
 196     e = xmalloc(sizeof *e);
 197     e->next = 0;
 198     e->filename = filename;
 199     e->line = line;
 200     e->type = MX_EXPANSION;
 201     /* Collect the expansion name.  Expansion names start with an alnum and
 202      * consist of alnums and '-'.  We don't permit whitespace between the '@'
 203      * and the name. */
 204     dynstr_init(d);
 205     if(input == end)
 206       fatal(0, "%s:%d: invalid expansion syntax (truncated)",
 207             filename, e->line);
 208     if(!isalnum((unsigned char)*input))
 209       fatal(0, "%s:%d: invalid expansion syntax (unexpected %#x)",
 210             filename, e->line, (unsigned char)*input);
 211     while(input < end && (isalnum((unsigned char)*input) || *input == '-'))
 212       dynstr_append(d, *input++);
 213     dynstr_terminate(d);
 214     e->name = d->vec;
 215     /* See what the bracket character is */
 216     obracket = next_non_whitespace(input, end);
 217     switch(obracket) {
 218     case '(': cbracket = ')'; break;
 219     case '[': cbracket = ']'; break;
 220     case '{': cbracket = '}'; break;
 221     default: cbracket = obracket = -1; break;      /* no arguments */
 222     }
 223     mx_node_vector_init(v);
 224     if(obracket >= 0) {
 225       /* Gather up arguments */
 226       while(next_non_whitespace(input, end) == obracket) {
 227         while(isspace((unsigned char)*input)) {
 228           if(*input == '\n')
 229             ++line;
 230           ++input;
 231         }
 232         ++input;                        /* the bracket */
 233         braces = 0;
 234         /* Find the end of the argument */
 235         argument_start = input;
 236         argument_start_line = line;
 237         while(input < end && (*input != cbracket || braces > 0)) {
 238           const int c = *input++;
 239
 240           if(c == obracket)
 241             ++braces;
 242           else if(c == cbracket)
 243             --braces;
 244           else if(c == '\n')
 245             ++line;
 246         }
 247         if(input >= end) {
 248           /* We ran out of input without encountering a balanced cbracket */
 249           fatal(0, "%s:%d: unterminated expansion argument '%.*s'",
 250                 filename, argument_start_line,
 251                 (int)(input - argument_start), argument_start);
 252         }
 253         /* Consistency check */
 254         assert(*input == cbracket);
 255         /* Record the end of the argument */
 256         argument_end = input;
 257         /* Step over the cbracket */
 258         ++input;
 259         /* Now we have an argument in [argument_start, argument_end), and we
 260          * know its filename and initial line number.  This is sufficient to
 261          * parse it. */
 262         mx_node_vector_append(v, mx_parse(filename, argument_start_line,
 263                                           argument_start, argument_end));
 264       }
 265     }
 266     /* Guarantee a NULL terminator (for the case where there's more than one
 267      * argument) */
 268     mx_node_vector_terminate(v);
 269     /* Fill in the remains of the node */
 270     e->nargs = v->nvec;
 271     e->args = v->vec;
 272     *tailp = e;
 273     tailp = &e->next;
 274   }
 275   return head;
 276 }
 277
 278 static void mx__dump(struct dynstr *d, const struct mx_node *m) {
 279   int n;
 280   const struct mx_node *mm;
 281
 282   if(!m)
 283     return;
 284   switch(m->type) {
 285   case MX_TEXT:
 286     if(m->text[0] == '@')
 287       dynstr_append(d, '@');
 288     dynstr_append_string(d, m->text);
 289     break;
 290   case MX_EXPANSION:
 291     dynstr_append(d, '@');
 292     dynstr_append_string(d, m->name);
 293     for(n = 0; n < m->nargs; ++n) {
 294       dynstr_append(d, '{');
 295       mx__dump(d, m->args[n]);
 296       dynstr_append(d, '}');
 297     }
 298     /* If the next non-whitespace is '{', add @_ to stop it being
 299      * misinterpreted */
 300     mm = m->next;
 301     while(mm && mm->type == MX_TEXT) {
 302       switch(next_non_whitespace(mm->text, mm->text + strlen(mm->text))) {
 303       case -1:
 304         mm = mm->next;
 305         continue;
 306       case '{':
 307         dynstr_append_string(d, "@_");
 308         break;
 309       default:
 310         break;
 311       }
 312       break;
 313     }
 314     break;
 315   default:
 316     assert(!"invalid m->type");
 317   }
 318   mx__dump(d, m->next);
 319 }
 320
 321 /** @brief Dump a parse macro expansion to a string
 322  *
 323  * Not of production quality!  Only intended for testing!
 324  */
 325 char *mx_dump(const struct mx_node *m) {
 326   struct dynstr d[1];
 327
 328   dynstr_init(d);
 329   mx__dump(d, m);
 330   dynstr_terminate(d);
 331   return d->vec;
 332 }
 333
 334 /* Expansion registration --------------------------------------------------- */
 335
 336 static int mx__register(unsigned flags,
 337                         const char *name,
 338                         int min,
 339                         int max,
 340                         char **args,
 341                         void (*callback)(),
 342                         const struct mx_node *definition) {
 343   struct expansion e[1];
 344
 345   if(!expansions)
 346     expansions = hash_new(sizeof(struct expansion));
 347   e->min = min;
 348   e->max = max;
 349   e->flags = flags;
 350   e->args = args;
 351   e->callback = callback;
 352   e->definition = definition;
 353   return hash_add(expansions, name, &e, HASH_INSERT_OR_REPLACE);
 354 }
 355
 356 /** @brief Register a simple expansion rule
 357  * @param name Name
 358  * @param min Minimum number of arguments
 359  * @param max Maximum number of arguments
 360  * @param callback Callback to write output
 361  */
 362 void mx_register(const char *name,
 363                  int min,
 364                  int max,
 365                  mx_simple_callback *callback) {
 366   mx__register(EXP_SIMPLE,  name, min, max, 0, (void (*)())callback, 0);
 367 }
 368
 369 /** @brief Register a magic expansion rule
 370  * @param name Name
 371  * @param min Minimum number of arguments
 372  * @param max Maximum number of arguments
 373  * @param callback Callback to write output
 374  */
 375 void mx_register_magic(const char *name,
 376                        int min,
 377                        int max,
 378                        mx_magic_callback *callback) {
 379   mx__register(EXP_MAGIC, name, min, max, 0, (void (*)())callback, 0);
 380 }
 381
 382 /** @brief Register a macro
 383  * @param name Name
 384  * @param nargs Number of arguments
 385  * @param args Argument names
 386  * @param definition Macro definition
 387  * @return 0 on success, negative on error
 388  */
 389 int mx_register_macro(const char *name,
 390                       int nargs,
 391                       char **args,
 392                       const struct mx_node *definition) {
 393   if(mx__register(EXP_MACRO, name, nargs, nargs, args,  0/*callback*/,
 394                   definition)) {
 395 #if 0
 396     /* This locates the error to the definition, which may be a line or two
 397      * beyond the @define command itself.  The backtrace generated by
 398      * mx_expand() may help more. */
 399     error(0, "%s:%d: duplicate definition of '%s'",
 400           definition->filename, definition->line, name);
 401 #endif
 402     return -2;
 403   }
 404   return 0;
 405 }
 406
 407 /* Expansion ---------------------------------------------------------------- */
 408
 409 /** @brief Expand a template
 410  * @param m Where to start
 411  * @param output Where to send output
 412  * @param u User data
 413  * @return 0 on success, non-0 on error
 414  *
 415  * Interpretation of return values:
 416  * - 0 means success
 417  * - -1 means an error writing to the sink.
 418  * - other negative values mean errors generated from with the macro
 419  *   expansion system
 420  * - positive values are reserved for the application
 421  *
 422  * If any callback returns non-zero then that value is returned, abandoning
 423  * further expansion.
 424  */
 425 int mx_expand(const struct mx_node *m,
 426               struct sink *output,
 427               void *u) {
 428   const struct expansion *e;
 429   int rc;
 430
 431   if(!m)
 432     return 0;
 433   switch(m->type) {
 434   case MX_TEXT:
 435     if(sink_writes(output, m->text) < 0)
 436       return -1;
 437     break;
 438   case MX_EXPANSION:
 439     rc = 0;
 440     if(!(e = hash_find(expansions, m->name))) {
 441       error(0, "%s:%d: unknown expansion name '%s'",
 442             m->filename, m->line, m->name);
 443       if(sink_printf(output, "[['%s' unknown]]", m->name) < 0)
 444         return -1;
 445     } else if(m->nargs < e->min) {
 446       error(0, "%s:%d: expansion '%s' requires %d args, only %d given",
 447             m->filename, m->line, m->name, e->min, m->nargs);
 448       if(sink_printf(output, "[['%s' too few args]]", m->name) < 0)
 449         return -1;
 450     } else if(m->nargs > e->max) {
 451       error(0, "%s:%d: expansion '%s' takes at most %d args, but %d given",
 452             m->filename, m->line, m->name, e->max, m->nargs);
 453       if(sink_printf(output, "[['%s' too many args]]", m->name) < 0)
 454         return -1;
 455     } else switch(e->flags & EXP_TYPE_MASK) {
 456       case EXP_MAGIC: {
 457         /* Magic callbacks we can call directly */
 458         rc = ((mx_magic_callback *)e->callback)(m->nargs,
 459                                                 m->args,
 460                                                 output,
 461                                                 u);
 462         break;
 463       }
 464       case EXP_SIMPLE: {
 465         /* For simple callbacks we expand their arguments for them. */
 466         char **args = xcalloc(1 + m->nargs, sizeof (char *)), *argname;
 467         int n;
 468
 469         for(n = 0; n < m->nargs; ++n) {
 470           /* Argument numbers are at least clear from looking at the text;
 471            * adding names as well would be nice.  TODO */
 472           byte_xasprintf(&argname, "argument #%d", n);
 473           if((rc = mx_expandstr(m->args[n], &args[n], u, argname)))
 474             break;
 475         }
 476         if(!rc) {
 477           args[n] = NULL;
 478           rc = ((mx_simple_callback *)e->callback)(m->nargs,
 479                                                    args,
 480                                                    output,
 481                                                    u);
 482         }
 483         break;
 484       }
 485       case EXP_MACRO: {
 486         /* Macros we expand by rewriting their definition with argument values
 487          * substituted and then expanding that. */
 488         rc = mx__expand_macro(e, m, output, u);
 489         break;
 490       }
 491       default:
 492         assert(!"impossible EXP_TYPE_MASK value");
 493     }
 494     if(rc) {
 495       /* For non-IO errors we generate some backtrace */
 496       if(rc != -1)
 497         error(0,  "  ...in @%s at %s:%d",
 498               m->name, m->filename, m->line);
 499       return rc;
 500     }
 501     break;
 502   default:
 503     assert(!"invalid m->type");
 504   }
 505   return mx_expand(m->next, output, u);
 506 }
 507
 508 /** @brief Expand a template storing the result in a string
 509  * @param m Where to start
 510  * @param sp Where to store string
 511  * @param u User data
 512  * @param what Token for backtrace, or NULL
 513  * @return 0 on success, non-0 on error
 514  *
 515  * Same return conventions as mx_expand().  This wrapper is slightly more
 516  * convenient to use from 'magic' expansions.
 517  */
 518 int mx_expandstr(const struct mx_node *m,
 519                  char **sp,
 520                  void *u,
 521                  const char *what) {
 522   struct dynstr d[1];
 523   int rc;
 524
 525   dynstr_init(d);
 526   if(!(rc = mx_expand(m, sink_dynstr(d), u))) {
 527     dynstr_terminate(d);
 528     *sp = d->vec;
 529   } else
 530     *sp = 0;
 531   if(rc && rc != -1 && what)
 532     error(0, "  ...in %s at %s:%d", what, m->filename, m->line);
 533   return rc;
 534 }
 535
 536 /** @brief Expand a template file
 537  * @param path Filename
 538  * @param output Where to send output
 539  * @param u User data
 540  * @return 0 on success, non-0 on error
 541  *
 542  * Same return conventions as mx_expand().
 543  */
 544 int mx_expand_file(const char *path,
 545                    struct sink *output,
 546                    void *u) {
 547   int fd, n, rc;
 548   struct stat sb;
 549   char *b;
 550   off_t sofar;
 551   const struct mx_node *m;
 552
 553   if((fd = open(path, O_RDONLY)) < 0)
 554     fatal(errno, "error opening %s", path);
 555   if(fstat(fd, &sb) < 0)
 556     fatal(errno, "error statting %s", path);
 557   if(!S_ISREG(sb.st_mode))
 558     fatal(0, "%s: not a regular file", path);
 559   sofar = 0;
 560   b = xmalloc_noptr(sb.st_size);
 561   while(sofar < sb.st_size) {
 562     n = read(fd, b + sofar, sb.st_size - sofar);
 563     if(n > 0)
 564       sofar += n;
 565     else if(n == 0)
 566       fatal(0, "unexpected EOF reading %s", path);
 567     else if(errno != EINTR)
 568       fatal(errno, "error reading %s", path);
 569   }
 570   xclose(fd);
 571   m = mx_parse(path, 1, b, b + sb.st_size);
 572   rc = mx_expand(m, output, u);
 573   if(rc && rc != -1)
 574     /* Mention inclusion in backtrace */
 575     error(0, "  ...in inclusion of file '%s'", path);
 576   return rc;
 577 }
 578
 579 /* Macros ------------------------------------------------------------------- */
 580
 581 /** @brief Rewrite a parse tree substituting sub-expansions
 582  * @param m Parse tree to rewrite (from macro definition)
 583  * @param ... Name/value pairs to rewrite
 584  * @return Rewritten parse tree
 585  *
 586  * The name/value pair list consists of pairs of strings and is terminated by
 587  * (char *)0.  Names and values are both copied so need not survive the call.
 588  */
 589 const struct mx_node *mx_rewritel(const struct mx_node *m,
 590                                   ...) {
 591   va_list ap;
 592   hash *h = hash_new(sizeof (struct mx_node *));
 593   const char *n, *v;
 594   struct mx_node *e;
 595
 596   va_start(ap, m);
 597   while((n = va_arg(ap, const char *))) {
 598     v = va_arg(ap, const char *);
 599     e = xmalloc(sizeof *e);
 600     e->next = 0;
 601     e->filename = m->filename;
 602     e->line = m->line;
 603     e->type = MX_TEXT;
 604     e->text = xstrdup(v);
 605     hash_add(h, n, &e, HASH_INSERT);
 606     /* hash_add() copies n */
 607   }
 608   return mx_rewrite(m, h);
 609 }
 610
 611 /** @brief Rewrite a parse tree substituting in macro arguments
 612  * @param definition Parse tree to rewrite (from macro definition)
 613  * @param h Hash mapping argument names to argument values
 614  * @return Rewritten parse tree
 615  */
 616 const struct mx_node *mx_rewrite(const struct mx_node *definition,
 617                                  hash *h) {
 618   const struct mx_node *head = 0, **tailp = &head, *argvalue, *m, *mm, **ap;
 619   struct mx_node *nm;
 620   int n;
 621
 622   for(m = definition; m; m = m->next) {
 623     switch(m->type) {
 624     case MX_TEXT:
 625       nm = xmalloc(sizeof *nm);
 626       *nm = *m;                          /* Dumb copy of text node fields */
 627       nm->next = 0;                      /* Maintain list structure */
 628       *tailp = nm;
 629       tailp = (const struct mx_node **)&nm->next;
 630       break;
 631     case MX_EXPANSION:
 632       if(m->nargs == 0
 633          && (ap = hash_find(h, m->name))) {
 634         /* This expansion has no arguments and its name matches one of the
 635          * macro arguments.  (Even if it's a valid expansion name we override
 636          * it.)  We insert its value at this point.  We do NOT recursively
 637          * rewrite the argument's value - it is outside the lexical scope of
 638          * the argument name.
 639          *
 640          * We need to recreate the list structure but a shallow copy will
 641          * suffice here.
 642          */
 643         argvalue = *ap;
 644         for(mm = argvalue; mm; mm = mm->next) {
 645           nm = xmalloc(sizeof *nm);
 646           *nm = *mm;
 647           nm->next = 0;
 648           *tailp = nm;
 649           tailp = (const struct mx_node **)&nm->next;
 650         }
 651       } else {
 652         /* This is some other expansion.  We recursively rewrite its argument
 653          * values according to h. */
 654         nm = xmalloc(sizeof *nm);
 655         *nm = *m;
 656         nm->args = xcalloc(nm->nargs, sizeof (struct mx_node *));
 657         for(n = 0; n < nm->nargs; ++n)
 658           nm->args[n] = mx_rewrite(m->args[n], h);
 659         nm->next = 0;
 660         *tailp = nm;
 661         tailp = (const struct mx_node **)&nm->next;
 662       }
 663       break;
 664     default:
 665       assert(!"invalid m->type");
 666     }
 667   }
 668   *tailp = 0;                           /* Mark end of list */
 669   return head;
 670 }
 671
 672 /** @brief Expand a macro
 673  * @param e Macro definition
 674  * @param m Macro expansion
 675  * @param output Where to send output
 676  * @param u User data
 677  * @return 0 on success, non-0 on error
 678  */
 679 static int mx__expand_macro(const struct expansion *e,
 680                             const struct mx_node *m,
 681                             struct sink *output,
 682                             void *u) {
 683   hash *h = hash_new(sizeof (struct mx_node *));
 684   int n;
 685
 686   /* We store the macro arguments in a hash.  Currently there is no check for
 687    * duplicate argument names (and this would be the wrong place for it
 688    * anyway); if you do that you just lose in some undefined way. */
 689   for(n = 0; n < m->nargs; ++n)
 690     hash_add(h, e->args[n], &m->args[n], HASH_INSERT);
 691   /* Generate a rewritten parse tree */
 692   m = mx_rewrite(e->definition, h);
 693   /* Expand the result */
 694   return mx_expand(m, output, u);
 695   /* mx_expand() will update the backtrace */
 696 }
 697
 698 /*
 699 Local Variables:
 700 c-basic-offset:2
 701 comment-column:40
 702 fill-column:79
 703 indent-tabs-mode:nil
 704 End:
 705 */