lib/confparse.c

   1 /*  $Id: confparse.c 6135 2003-01-19 01:15:40Z rra $
   2 **
   3 **  Parse a standard block-structured configuration file syntax.
   4 **
   5 **  Herein are all the parsing and access functions for the configuration
   6 **  syntax used by INN.  See doc/config-* for additional documentation.
   7 **
   8 **  All entry point functions begin with config_*.  config_parse_file is
   9 **  the entry point for most of the work done in this file; all other
  10 **  functions access the parse tree that config_parse_file generates.
  11 **
  12 **  Functions are named by the structure or basic task they work on:
  13 **
  14 **      parameter_*     config_parameter structs.
  15 **      group_*         config_group structs.
  16 **      file_*          config_file structs (including all I/O).
  17 **      token_*         The guts of the lexer.
  18 **      parse_*         The guts of the parser.
  19 **      error_*         Error reporting functions.
  20 **      convert_*       Converting raw parameter values.
  21 **
  22 **  Each currently open file is represented by a config_file struct, which
  23 **  contains the current parse state for that file, including the internal
  24 **  buffer, a pointer to where in the buffer the next token starts, and the
  25 **  current token.  Inclusion of additional files is handled by maintaining a
  26 **  stack of config_file structs, so when one file is finished, the top struct
  27 **  popped off the stack and parsing continues where it left off.
  28 **
  29 **  Since config_file structs contain the parse state, they're passed as an
  30 **  argument to most functions.
  31 **
  32 **  A config_file struct contains a token struct, representing the current
  33 **  token.  The configuration file syntax is specifically designed to never
  34 **  require lookahead to parse; all parse decisions can be made on the basis
  35 **  of the current state and a single token.  A token consists of a type and
  36 **  an optional attached string.  Note that strings are allocated by the lexer
  37 **  but are never freed by the lexer!  Any token with an associated string
  38 **  should have that string copied into permanent storage (like the params
  39 **  hash of a config_group) or freed.  error_unexpected_token will do the
  40 **  latter.
  41 **
  42 **  Errors in the lexer are indicated by setting the token to TOKEN_ERROR.
  43 **  All parsing errors are indicated by setting the error flag in the current
  44 **  config_file struct.  Error recovery is *not* implemented by the current
  45 **  algorithm; it would add a lot of complexity to the parsing algorithm and
  46 **  the results still probably shouldn't be used by the calling program, so it
  47 **  would only be useful to catch more than one syntax error per invocation
  48 **  and it isn't expected that syntax errors will be that common.  Instead, if
  49 **  something fails to parse, the whole parser unwinds and returns failure.
  50 **
  51 **  The config_param_* functions are used to retrieve the values of
  52 **  parameters; each use a convert_* function to convert the raw parameter
  53 **  value to the type specified by the user.  group_parameter_get can
  54 **  therefore be the same for all parameter types, with all of the variations
  55 **  encapsulated in the convert_* functions.
  56 */
  57
  58 #include "config.h"
  59 #include "clibrary.h"
  60 #include <errno.h>
  61 #include <fcntl.h>
  62
  63 #include "inn/confparse.h"
  64 #include "inn/hashtab.h"
  65 #include "inn/messages.h"
  66 #include "inn/vector.h"
  67 #include "libinn.h"
  68
  69
  70 /* The types of tokens seen in configuration files. */
  71 enum token_type {
  72     TOKEN_CRLF,
  73     TOKEN_STRING,
  74     TOKEN_QSTRING,
  75     TOKEN_PARAM,
  76     TOKEN_LBRACE,
  77     TOKEN_RBRACE,
  78     TOKEN_LANGLE,
  79     TOKEN_RANGLE,
  80     TOKEN_LBRACKET,
  81     TOKEN_RBRACKET,
  82     TOKEN_SEMICOLON,
  83     TOKEN_EOF,
  84     TOKEN_ERROR
  85 };
  86
  87 /* The parse status of a file.  Variables marked internal are only used by
  88    file_* functions; other functions don't need to look at them.  Other
  89    variables are marked by what functions are responsible for maintaining
  90    them. */
  91 struct config_file {
  92     int fd;                     /* Internal */
  93     char *buffer;               /* Internal */
  94     size_t bufsize;             /* Internal */
  95     const char *filename;       /* file_open */
  96     unsigned int line;          /* token_newline and token_quoted_string */
  97     bool error;                 /* Everyone */
  98
  99     /* Set by file_* and token_*.  current == NULL indicates we've not yet
 100        read from the file. */
 101     char *current;
 102
 103     /* Normally set by token_*, but file_read and file_read_more may set token
 104        to TOKEN_ERROR or TOKEN_EOF when those conditions are encountered.  In
 105        that situation, they also return false. */
 106     struct {
 107         enum token_type type;
 108         char *string;
 109     } token;
 110 };
 111
 112 /* The types of parameters, used to distinguish the values of the union in the
 113    config_parameter_s struct. */
 114 enum value_type {
 115     VALUE_UNKNOWN,
 116     VALUE_BOOL,
 117     VALUE_INTEGER,
 118     VALUE_NUMBER,
 119     VALUE_STRING,
 120     VALUE_LIST,
 121     VALUE_INVALID
 122 };
 123
 124 /* Each setting is represented by one of these structs, stored in the params
 125    hash of a config group.  Since all of a config_group must be in the same
 126    file (either group->file for regular groups or group->included for groups
 127    whose definition is in an included file), we don't have to stash a file
 128    name here for error reporting but can instead get that from the enclosing
 129    group. */
 130 struct config_parameter {
 131     char *key;
 132     char *raw_value;
 133     unsigned int line;          /* For error reporting. */
 134     enum value_type type;
 135     union {
 136         bool boolean;
 137         long integer;
 138         double number;
 139         char *string;
 140         struct vector *list;
 141     } value;
 142 };
 143
 144 /* The type of a function that converts a raw parameter value to some other
 145    data type, storing the result in its second argument and returning true on
 146    success or false on failure. */
 147 typedef bool (*convert_func)(struct config_parameter *, const char *, void *);
 148
 149 /* The basic element of configuration data, a group of parameters.  This is
 150    the only struct that is exposed to callers, and then only as an opaque
 151    data structure. */
 152 struct config_group {
 153     char *type;
 154     char *tag;
 155     char *file;                 /* File in which the group starts. */
 156     unsigned int line;          /* Line number where the group starts. */
 157     char *included;             /* For group <file>, the included file. */
 158     struct hash *params;
 159
 160     struct config_group *parent;
 161     struct config_group *child;
 162     struct config_group *next;
 163 };
 164
 165
 166 /* Parameter handling, used by the hash table stored in a config_group. */
 167 static const void *parameter_key(const void *p);
 168 static bool parameter_equal(const void *k, const void *p);
 169 static void parameter_free(void *p);
 170
 171 /* Hash traversal function to collect parameters into a vector. */
 172 static void parameter_collect(void *, void *);
 173
 174 /* Group handling. */
 175 static struct config_group *group_new(const char *file, unsigned int line,
 176                                       const char *type, const char *tag);
 177 static void group_free(struct config_group *);
 178 static bool group_parameter_get(struct config_group *group, const char *key,
 179                                 void *result, convert_func convert);
 180
 181 /* Parameter type conversion functions.  All take the parameter, the file, and
 182    a pointer to where the result can be placed. */
 183 static bool convert_boolean(struct config_parameter *, const char *, void *);
 184 static bool convert_integer(struct config_parameter *, const char *, void *);
 185 static bool convert_string(struct config_parameter *, const char *, void *);
 186
 187 /* File I/O.  Many other functions also manipulate config_file structs; see
 188    the struct definition for notes on who's responsible for what. */
 189 static struct config_file *file_open(const char *filename);
 190 static bool file_read(struct config_file *);
 191 static bool file_read_more(struct config_file *, ptrdiff_t offset);
 192 static void file_close(struct config_file *);
 193
 194 /* The basic lexer function.  The token is stashed in file; the return value
 195    is just for convenience and duplicates that information. */
 196 static enum token_type token_next(struct config_file *);
 197
 198 /* Handler functions for specific types of tokens.  These should only be
 199    called by token_next. */
 200 static void token_simple(struct config_file *, enum token_type type);
 201 static void token_newline(struct config_file *);
 202 static void token_string(struct config_file *);
 203 static void token_quoted_string(struct config_file *);
 204
 205 /* Handles whitespace for the rest of the lexer. */
 206 static bool token_skip_whitespace(struct config_file *);
 207
 208 /* Handles comments for the rest of the lexer. */
 209 static bool token_skip_comment(struct config_file *);
 210
 211 /* Parser functions to parse the named syntactic element. */
 212 static bool parse_group_contents(struct config_group *, struct config_file *);
 213 static enum token_type parse_parameter(struct config_group *,
 214                                        struct config_file *, char *key);
 215
 216 /* Error reporting functions. */
 217 static void error_bad_unquoted_char(struct config_file *, char bad);
 218 static void error_unexpected_token(struct config_file *,
 219                                    const char *expecting);
 220
 221
 222 /*
 223 **  Return the key from a parameter struct, used by the hash table.
 224 */
 225 static const void *
 226 parameter_key(const void *p)
 227 {
 228     const struct config_parameter *param = p;
 229
 230     return param->key;
 231 }
 232
 233
 234 /*
 235 **  Check to see if a provided key matches the key of a parameter struct,
 236 **  used by the hash table.
 237 */
 238 static bool
 239 parameter_equal(const void *k, const void *p)
 240 {
 241     const char *key = k;
 242     const struct config_parameter *param = p;
 243
 244     return strcmp(key, param->key) == 0;
 245 }
 246
 247
 248 /*
 249 **  Free a parameter, used by the hash table.
 250 */
 251 static void
 252 parameter_free(void *p)
 253 {
 254     struct config_parameter *param = p;
 255
 256     free(param->key);
 257     free(param->raw_value);
 258     if (param->type == VALUE_STRING) {
 259         free(param->value.string);
 260     } else if (param->type == VALUE_LIST) {
 261         vector_free(param->value.list);
 262     }
 263     free(param);
 264 }
 265
 266
 267 /*
 268 **  Report an unexpected character while parsing a regular string and set the
 269 **  current token type to TOKEN_ERROR.
 270 */
 271 static void
 272 error_bad_unquoted_char(struct config_file *file, char bad)
 273 {
 274     warn("%s:%u: invalid character '%c' in unquoted string", file->filename,
 275          file->line, bad);
 276     file->token.type = TOKEN_ERROR;
 277     file->error = true;
 278 }
 279
 280
 281 /*
 282 **  Report an unexpected token.  If the token is TOKEN_ERROR, don't print an
 283 **  additional error message.  Takes a string saying what token was expected.
 284 **  Sets the token to TOKEN_ERROR and frees the associated string if the
 285 **  current token type is TOKEN_STRING, TOKEN_QSTRING, or TOKEN_PARAM.
 286 */
 287 static void
 288 error_unexpected_token(struct config_file *file, const char *expecting)
 289 {
 290     const char *name;
 291     bool string = false;
 292
 293     /* If the bad token type is a string, param, or quoted string, free the
 294        string associated with the token to avoid a memory leak. */
 295     if (file->token.type != TOKEN_ERROR) {
 296         switch (file->token.type) {
 297         case TOKEN_STRING:      name = "string";        string = true; break;
 298         case TOKEN_QSTRING:     name = "quoted string"; string = true; break;
 299         case TOKEN_PARAM:       name = "parameter";     string = true; break;
 300         case TOKEN_CRLF:        name = "end of line";   break;
 301         case TOKEN_LBRACE:      name = "'{'";           break;
 302         case TOKEN_RBRACE:      name = "'}'";           break;
 303         case TOKEN_LANGLE:      name = "'<'";           break;
 304         case TOKEN_RANGLE:      name = "'>'";           break;
 305         case TOKEN_LBRACKET:    name = "'['";           break;
 306         case TOKEN_RBRACKET:    name = "']'";           break;
 307         case TOKEN_SEMICOLON:   name = "';'";           break;
 308         case TOKEN_EOF:         name = "end of file";   break;
 309         default:                name = "unknown token"; break;
 310         }
 311         warn("%s:%u: parse error: saw %s, expecting %s", file->filename,
 312              file->line, name, expecting);
 313     }
 314     if (string) {
 315         free(file->token.string);
 316         file->token.string = NULL;
 317     }
 318     file->token.type = TOKEN_ERROR;
 319     file->error = true;
 320 }
 321
 322
 323 /*
 324 **  Handle a simple token (a single character), advancing the file->current
 325 **  pointer past it and setting file->token as appropriate.
 326 */
 327 static void
 328 token_simple(struct config_file *file, enum token_type type)
 329 {
 330     file->current++;
 331     file->token.type = type;
 332     file->token.string = NULL;
 333 }
 334
 335
 336 /*
 337 **  Handle a newline.  Skip any number of comments after the newline,
 338 **  including reading more data from the file if necessary, and update
 339 **  file->line as needed.
 340 */
 341 static void
 342 token_newline(struct config_file *file)
 343 {
 344     /* If we're actually positioned on a newline, update file->line and skip
 345        over it.  Try to handle CRLF correctly, as a single line terminator
 346        that only increments the line count once, while still treating either
 347        CR or LF alone as line terminators in their own regard. */
 348     if (*file->current == '\n') {
 349         file->current++;
 350         file->line++;
 351     } else if (*file->current == '\r') {
 352         if (file->current[1] == '\n')
 353             file->current += 2;
 354         else if (file->current[1] != '\0')
 355             file->current++;
 356         else {
 357             if (!file_read(file)) {
 358                 file->current++;
 359                 return;
 360             }
 361             if (*file->current == '\n')
 362                 file->current++;
 363         }
 364         file->line++;
 365     }
 366
 367     if (!token_skip_whitespace(file))
 368         return;
 369     while (*file->current == '#') {
 370         if (!token_skip_comment(file))
 371             return;
 372         if (!token_skip_whitespace(file))
 373             return;
 374     }
 375     file->token.type = TOKEN_CRLF;
 376     file->token.string = NULL;
 377 }
 378
 379
 380 /*
 381 **  Handle a string.  Only some characters are allowed in an unquoted string;
 382 **  check that, since otherwise it could hide syntax errors.  Any whitespace
 383 **  ends the token.  We have to distinguish between TOKEN_PARAM and
 384 **  TOKEN_STRING; the former ends in a colon, unlike the latter.
 385 */
 386 static void
 387 token_string(struct config_file *file)
 388 {
 389     int i;
 390     bool status;
 391     ptrdiff_t offset;
 392     bool done = false;
 393     bool colon = false;
 394
 395     /* Use an offset from file->current rather than a pointer that moves
 396        through the buffer, since the base of file->current can change during a
 397        file_read_more() call and we don't want to have to readjust a
 398        pointer.  If we have to read more, adjust our counter back one
 399        character, since the nul was replaced by a new, valid character. */
 400     i = 0;
 401     while (!done) {
 402         switch (file->current[i]) {
 403         case '\t':  case '\r':  case '\n':  case ' ':   case ';':
 404             done = true;
 405             break;
 406         case '"':   case '<':   case '>':   case '[':
 407         case '\\':  case ']':   case '{':   case '}':
 408             error_bad_unquoted_char(file, file->current[i]);
 409             return;
 410         case ':':
 411             if (colon) {
 412                 error_bad_unquoted_char(file, file->current[i]);
 413                 return;
 414             }
 415             colon = true;
 416             break;
 417         case '\0':
 418             offset = file->current - file->buffer;
 419             status = file_read_more(file, offset);
 420             if (status)
 421                 i--;
 422             else
 423                 done = true;
 424             break;
 425         default:
 426             if (colon) {
 427                 error_bad_unquoted_char(file, ':');
 428                 return;
 429             }
 430         }
 431         if (!done)
 432             i++;
 433     }
 434     file->token.type = colon ? TOKEN_PARAM : TOKEN_STRING;
 435     file->token.string = xstrndup(file->current, i - colon);
 436     file->current += i;
 437 }
 438
 439
 440 /*
 441 **  Handle a quoted string.  This token is unique as the only token that can
 442 **  contain whitespace, even newlines if they're escaped, so we also have to
 443 **  update file->line as we go.  Note that the quotes *are* included in the
 444 **  string we stash in file->token, since they should be part of the raw_value
 445 **  of a parameter.
 446 */
 447 static void
 448 token_quoted_string(struct config_file *file)
 449 {
 450     int i;
 451     ptrdiff_t offset;
 452     bool status;
 453     bool done = false;
 454
 455     /* Use an offset from file->current rather than a pointer that moves
 456        through the buffer, since the base of file->current can change during a
 457        file_read_more() call and we don't want to have to readjust a pointer.
 458        If we have to read more, adjust our counter back one character, since
 459        the nul was replaced by a new, valid character. */
 460     for (i = 1; !done; i++) {
 461         switch (file->current[i]) {
 462         case '"':
 463             done = true;
 464             break;
 465         case '\r':
 466         case '\n':
 467             warn("%s:%u: no close quote seen for quoted string",
 468                  file->filename, file->line);
 469             file->token.type = TOKEN_ERROR;
 470             file->error = true;
 471             return;
 472         case '\\':
 473             i++;
 474             if (file->current[i] == '\n')
 475                 file->line++;
 476
 477             /* CRLF should count as one line terminator.  Handle most cases of
 478                that here, but the case where CR is at the end of one buffer
 479                and LF at the beginning of the next has to be handled in the \0
 480                case below. */
 481             if (file->current[i] == '\r') {
 482                 file->line++;
 483                 if (file->current[i + 1] == '\n')
 484                     i++;
 485             }
 486             break;
 487         case '\0':
 488             offset = file->current - file->buffer;
 489             status = file_read_more(file, offset);
 490             if (status)
 491                 i--;
 492             else {
 493                 warn("%s:%u: end of file encountered while parsing quoted"
 494                      " string", file->filename, file->line);
 495                 file->token.type = TOKEN_ERROR;
 496                 file->error = true;
 497                 return;
 498             }
 499
 500             /* If the last character of the previous buffer was CR and the
 501                first character that we just read was LF, the CR must have been
 502                escaped which means that the LF is part of it, forming a CRLF
 503                line terminator.  Skip over the LF. */
 504             if (file->current[i] == '\r' && file->current[i + 1] == '\n')
 505                 i++;
 506
 507             break;
 508         default:
 509             break;
 510         }
 511     }
 512     file->token.type = TOKEN_QSTRING;
 513     file->token.string = xstrndup(file->current, i);
 514     file->current += i;
 515 }
 516
 517
 518 /*
 519 **  Skip over a comment line at file->current, reading more data as necessary.
 520 **  Stop when an end of line is encountered, positioning file->current
 521 **  directly after the end of line.  Returns false on end of file or a read
 522 **  error, true otherwise.
 523 */
 524 static bool
 525 token_skip_comment(struct config_file *file)
 526 {
 527     char *p = file->current;
 528
 529     while (*p != '\0' && *p != '\n' && *p != '\r')
 530         p++;
 531     while (*p == '\0') {
 532         if (!file_read(file))
 533             return false;
 534         p = file->current;
 535         while (*p != '\0' && *p != '\n' && *p != '\r')
 536             p++;
 537     }
 538
 539     /* CRLF should count as a single line terminator, but it may be split
 540        across a read boundary.  Try to handle that case correctly. */
 541     if (*p == '\n')
 542         p++;
 543     else if (*p == '\r') {
 544         p++;
 545         if (*p == '\n')
 546             p++;
 547         else if (*p == '\0') {
 548             if (!file_read(file))
 549                 return false;
 550             p = file->current;
 551             if (*p == '\n')
 552                 p++;
 553         }
 554     }
 555     file->current = p;
 556     file->line++;
 557     return true;
 558 }
 559
 560 /*
 561 **  Skip over all whitespace at file->current, reading more data as
 562 **  necessary.  Stop when the first non-whitespace character is encountered or
 563 **  at end of file, leaving file->current pointing appropriately.  Returns
 564 **  true if non-whitespace is found and false on end of file or a read error.
 565 */
 566 static bool
 567 token_skip_whitespace(struct config_file *file)
 568 {
 569     char *p = file->current;
 570
 571     while (*p == ' ' || *p == '\t')
 572         p++;
 573     while (*p == '\0') {
 574         if (!file_read(file))
 575             return false;
 576         p = file->current;
 577         while (*p == ' ' || *p == '\t')
 578             p++;
 579     }
 580     file->current = p;
 581     return true;
 582 }
 583
 584
 585 /*
 586 **  The basic lexer function.  Read the next token from a configuration file.
 587 **  Returns the token, which is also stored in file.  Lexer failures set the
 588 **  token to TOKEN_ERROR.
 589 */
 590 static enum token_type
 591 token_next(struct config_file *file)
 592 {
 593     /* If file->current is NULL, we've never read from the file.  There is
 594        special handling for a comment at the very beginning of a file, since
 595        normally we only look for comments after newline tokens.
 596
 597        If we do see a # at the beginning of the first line, let token_newline
 598        deal with it.  That function can cope with file->current not pointing
 599        at a newline.  We then return the newline token as the first token in
 600        the file. */
 601     if (file->current == NULL) {
 602         if (!file_read(file))
 603             return file->token.type;
 604         if (!token_skip_whitespace(file))
 605             return file->token.type;
 606         if (*file->current == '#') {
 607             token_newline(file);
 608             return file->token.type;
 609         }
 610     } else {
 611         if (!token_skip_whitespace(file))
 612             return file->token.type;
 613     }
 614
 615     /* Almost all of our tokens can be recognized by the first character; the
 616        only exception is telling strings from parameters.  token_string
 617        handles both of those and sets file->token.type appropriately.
 618        Comments are handled by token_newline. */
 619     switch (*file->current) {
 620     case '{':   token_simple(file, TOKEN_LBRACE);       break;
 621     case '}':   token_simple(file, TOKEN_RBRACE);       break;
 622     case '<':   token_simple(file, TOKEN_LANGLE);       break;
 623     case '>':   token_simple(file, TOKEN_RANGLE);       break;
 624     case '[':   token_simple(file, TOKEN_LBRACKET);     break;
 625     case ']':   token_simple(file, TOKEN_RBRACKET);     break;
 626     case ';':   token_simple(file, TOKEN_SEMICOLON);    break;
 627     case '\r':  token_newline(file);                    break;
 628     case '\n':  token_newline(file);                    break;
 629     case '"':   token_quoted_string(file);              break;
 630     default:    token_string(file);                     break;
 631     }
 632
 633     return file->token.type;
 634 }
 635
 636
 637 /*
 638 **  Open a new configuration file and return config_file representing the
 639 **  parse state of that file.  We assume that we don't have to make a copy of
 640 **  the filename argument.  Default to stdio BUFSIZ for our buffer size, since
 641 **  it's generally reasonably chosen with respect to disk block sizes, memory
 642 **  consumption, and the like.
 643 */
 644 static struct config_file *
 645 file_open(const char *filename)
 646 {
 647     struct config_file *file;
 648
 649     file = xmalloc(sizeof(*file));
 650     file->filename = filename;
 651     file->fd = open(filename, O_RDONLY);
 652     if (file->fd < 0) {
 653         free(file);
 654         return NULL;
 655     }
 656     file->buffer = xmalloc(BUFSIZ);
 657     file->bufsize = BUFSIZ;
 658     file->current = NULL;
 659     file->line = 1;
 660     file->token.type = TOKEN_ERROR;
 661     file->error = false;
 662     return file;
 663 }
 664
 665
 666 /*
 667 **  Read some data from a configuration file, handling errors (by reporting
 668 **  them with warn) and returning true if there's data left and false on EOF
 669 **  or a read error.
 670 */
 671 static bool
 672 file_read(struct config_file *file)
 673 {
 674     ssize_t status;
 675
 676     status = read(file->fd, file->buffer, file->bufsize - 1);
 677     if (status < 0) {
 678         syswarn("%s: read error", file->filename);
 679         file->token.type = TOKEN_ERROR;
 680         file->error = true;
 681     } else if (status == 0) {
 682         file->token.type = TOKEN_EOF;
 683     }
 684     if (status <= 0)
 685         return false;
 686     file->buffer[status] = '\0';
 687     file->current = file->buffer;
 688
 689     /* Reject nuls, since otherwise they would cause strange problems. */
 690     if (strlen(file->buffer) != (size_t) status) {
 691         warn("%s: invalid NUL character found in file", file->filename);
 692         return false;
 693     }
 694     return true;
 695 }
 696
 697
 698 /*
 699 **  Read additional data from a configuration file when there's some partial
 700 **  data in the buffer already that we want to save.  Takes the config_file
 701 **  struct and an offset from file->buffer specifying the start of the data
 702 **  that we want to preserve.  Resizes the buffer if offset is 0.  Returns
 703 **  false on EOF or a read error, true otherwise.
 704 */
 705 static bool
 706 file_read_more(struct config_file *file, ptrdiff_t offset)
 707 {
 708     char *start;
 709     size_t amount;
 710     ssize_t status;
 711
 712     if (offset > 0) {
 713         size_t left;
 714
 715         left = file->bufsize - offset - 1;
 716         memmove(file->buffer, file->buffer + offset, left);
 717         file->current -= offset;
 718         start = file->buffer + left;
 719         amount = offset;
 720     } else {
 721         file->buffer = xrealloc(file->buffer, file->bufsize + BUFSIZ);
 722         file->current = file->buffer;
 723         start = file->buffer + file->bufsize - 1;
 724         amount = BUFSIZ;
 725         file->bufsize += BUFSIZ;
 726     }
 727     status = read(file->fd, start, amount);
 728     if (status < 0)
 729         syswarn("%s: read error", file->filename);
 730     if (status <= 0)
 731         return false;
 732     start[status] = '\0';
 733
 734     /* Reject nuls, since otherwise they would cause strange problems. */
 735     if (strlen(start) != (size_t) status) {
 736         warn("%s: invalid NUL character found in file", file->filename);
 737         return false;
 738     }
 739     return true;
 740 }
 741
 742
 743 /*
 744 **  Close a file and free the resources associated with it.
 745 */
 746 static void
 747 file_close(struct config_file *file)
 748 {
 749     close(file->fd);
 750     free(file->buffer);
 751     free(file);
 752 }
 753
 754
 755 /*
 756 **  Given a config_group with the type and tag already filled in and a
 757 **  config_file with the buffer positioned after the opening brace of the
 758 **  group, read and add parameters to the group until encountering a close
 759 **  brace.  Returns true on a successful parse, false on an error that
 760 **  indicates the group should be discarded.
 761 */
 762 static bool
 763 parse_group_contents(struct config_group *group, struct config_file *file)
 764 {
 765     enum token_type token;
 766
 767     token = token_next(file);
 768     while (!file->error) {
 769         switch (token) {
 770         case TOKEN_PARAM:
 771             token = parse_parameter(group, file, file->token.string);
 772             while (token == TOKEN_CRLF || token == TOKEN_SEMICOLON)
 773                 token = token_next(file);
 774             break;
 775         case TOKEN_CRLF:
 776             token = token_next(file);
 777             break;
 778         case TOKEN_EOF:
 779             return true;
 780         default:
 781             error_unexpected_token(file, "parameter");
 782             break;
 783         }
 784     }
 785     return false;
 786 }
 787
 788
 789 /*
 790 **  Parse a parameter.  Takes the group we're currently inside, the
 791 **  config_file parse state, and the key of the parameter.  Returns the next
 792 **  token after the parameter, and also checks to make sure that it's
 793 **  something legal (end of line, end of file, or a semicolon).
 794 */
 795 static enum token_type
 796 parse_parameter(struct config_group *group, struct config_file *file,
 797                 char *key)
 798 {
 799     enum token_type token;
 800
 801     token = token_next(file);
 802     if (token == TOKEN_STRING || token == TOKEN_QSTRING) {
 803         struct config_parameter *param;
 804         unsigned int line;
 805         char *value;
 806
 807         /* Before storing the parameter, check to make sure that the next
 808            token is valid.  If it isn't, chances are high that the user has
 809            tried to set a parameter to a value containing spaces without
 810            quoting the value. */
 811         value = file->token.string;
 812         line = file->line;
 813         token = token_next(file);
 814         switch (token) {
 815         default:
 816             error_unexpected_token(file, "semicolon or newline");
 817             free(value);
 818             break;
 819         case TOKEN_CRLF:
 820         case TOKEN_SEMICOLON:
 821         case TOKEN_EOF:
 822             param = xmalloc(sizeof(*param));
 823             param->key = key;
 824             param->raw_value = value;
 825             param->type = VALUE_UNKNOWN;
 826             param->line = line;
 827             if (!hash_insert(group->params, key, param)) {
 828                 warn("%s:%u: duplicate parameter %s", file->filename, line,
 829                      key);
 830                 free(param->raw_value);
 831                 free(param->key);
 832                 free(param);
 833             }
 834             return token;
 835         }
 836     } else {
 837         error_unexpected_token(file, "parameter value");
 838     }
 839
 840     /* If we fell through, we encountered some sort of error.  Free allocated
 841        memory and return an error token. */
 842     free(key);
 843     return TOKEN_ERROR;
 844 }
 845
 846
 847 /*
 848 **  Allocate a new config_group and set the initial values of all of the
 849 **  struct members.
 850 */
 851 static struct config_group *
 852 group_new(const char *file, unsigned int line, const char *type,
 853           const char *tag)
 854 {
 855     struct config_group *group;
 856
 857     group = xmalloc(sizeof(*group));
 858     group->type = xstrdup(type);
 859     group->tag = (tag == NULL) ? NULL : xstrdup(tag);
 860     group->file = xstrdup(file);
 861     group->included = NULL;
 862     group->line = line;
 863     group->params = hash_create(4, hash_string, parameter_key,
 864                                 parameter_equal, parameter_free);
 865     group->parent = NULL;
 866     group->child = NULL;
 867     group->next = NULL;
 868     return group;
 869 }
 870
 871
 872 /*
 873 **  Free a config_group and all associated storage.
 874 */
 875 static void
 876 group_free(struct config_group *group)
 877 {
 878     free(group->type);
 879     if (group->tag != NULL)
 880         free(group->tag);
 881     free(group->file);
 882     if (group->included != NULL)
 883         free(group->included);
 884     hash_free(group->params);
 885     free(group);
 886 }
 887
 888
 889 /*
 890 **  Accessor function for the group type.
 891 */
 892 const char *
 893 config_group_type(struct config_group *group)
 894 {
 895     return group->type;
 896 }
 897
 898
 899 /*
 900 **  Accessor function for the group tag.
 901 */
 902 const char *
 903 config_group_tag(struct config_group *group)
 904 {
 905     return group->tag;
 906 }
 907
 908
 909 /*
 910 **  Parse a configuration file, returning the config_group that's the root of
 911 **  the tree represented by that file (and any other files that it includes).
 912 **  Returns NULL on a parse failure.
 913 */
 914 struct config_group *
 915 config_parse_file(const char *filename, ...)
 916 {
 917     struct config_group *group;
 918     struct config_file *file;
 919     bool success;
 920
 921     file = file_open(filename);
 922     if (file == NULL) {
 923         syswarn("open of %s failed", filename);
 924         return NULL;
 925     }
 926     group = group_new(filename, 1, "GLOBAL", NULL);
 927     success = parse_group_contents(group, file);
 928     file_close(file);
 929     return success ? group : NULL;
 930 }
 931
 932
 933 /*
 934 **  Given a config_group representing the root of a configuration structure,
 935 **  recursively free the entire structure.
 936 */
 937 void
 938 config_free(struct config_group *group)
 939 {
 940     group_free(group);
 941 }
 942
 943
 944 /*
 945 **  Convert a given parameter value to a boolean, returning true if successful
 946 **  and false otherwise.
 947 */
 948 static bool
 949 convert_boolean(struct config_parameter *param, const char *file,
 950                 void *result)
 951 {
 952     static const char *const truevals[] = { "yes", "on", "true", NULL };
 953     static const char *const falsevals[] = { "no", "off", "false", NULL };
 954     bool *value = result;
 955     int i;
 956
 957     if (param->type == VALUE_BOOL) {
 958         *value = param->value.boolean;
 959         return true;
 960     } else if (param->type != VALUE_UNKNOWN) {
 961         warn("%s:%u: %s is not a boolean", file, param->line, param->key);
 962         return false;
 963     }
 964     param->type = VALUE_BOOL;
 965     for (i = 0; truevals[i] != NULL; i++)
 966         if (strcmp(param->raw_value, truevals[i]) == 0) {
 967             param->value.boolean = true;
 968             *value = true;
 969             return true;
 970         }
 971     for (i = 0; falsevals[i] != NULL; i++)
 972         if (strcmp(param->raw_value, falsevals[i]) == 0) {
 973             param->value.boolean = false;
 974             *value = false;
 975             return true;
 976         }
 977     param->type = VALUE_INVALID;
 978     warn("%s:%u: %s is not a boolean", file, param->line, param->key);
 979     return false;
 980 }
 981
 982
 983 /*
 984 **  Convert a given parameter value to an integer, returning true if
 985 **  successful and false otherwise.
 986 */
 987 static bool
 988 convert_integer(struct config_parameter *param, const char *file,
 989                 void *result)
 990 {
 991     long *value = result;
 992     char *p;
 993
 994     if (param->type == VALUE_INTEGER) {
 995         *value = param->value.integer;
 996         return true;
 997     } else if (param->type != VALUE_UNKNOWN) {
 998         warn("%s:%u: %s is not an integer", file, param->line, param->key);
 999         return false;
1000     }
1001
1002     /* Do a syntax check even though strtol would do some of this for us,
1003        since otherwise some syntax errors may go silently undetected. */
1004     p = param->raw_value;
1005     if (*p == '-')
1006         p++;
1007     for (; *p != '\0'; p++)
1008         if (*p < '0' || *p > '9')
1009             break;
1010     if (*p != '\0') {
1011         warn("%s:%u: %s is not an integer", file, param->line, param->key);
1012         return false;
1013     }
1014
1015     /* Do the actual conversion with strtol. */
1016     errno = 0;
1017     param->value.integer = strtol(param->raw_value, NULL, 10);
1018     if (errno != 0) {
1019         warn("%s:%u: %s doesn't convert to an integer", file, param->line,
1020              param->key);
1021         return false;
1022     }
1023     *value = param->value.integer;
1024     param->type = VALUE_INTEGER;
1025     return true;
1026 }
1027
1028
1029 /*
1030 **  Convert a parameter value to a string, interpreting it as a quoted string,
1031 **  and returning true if successful and false otherwise.  Does none of the
1032 **  initial type checking, since convert_string should have already done that.
1033 */
1034 static bool
1035 convert_string_quoted(struct config_parameter *param, const char *file,
1036                       void *result)
1037 {
1038     const char **value = result;
1039     size_t length;
1040     char *src, *dest;
1041
1042     length = strlen(param->raw_value) - 2;
1043     param->value.string = xmalloc(length + 1);
1044     src = param->raw_value + 1;
1045     dest = param->value.string;
1046     for (; *src != '"' && *src != '\0'; src++) {
1047         if (*src != '\\') {
1048             *dest++ = *src;
1049         } else {
1050             src++;
1051
1052             /* This should implement precisely the semantics of backslash
1053                escapes in quoted strings in C. */
1054             switch (*src) {
1055             case 'a':   *dest++ = '\a'; break;
1056             case 'b':   *dest++ = '\b'; break;
1057             case 'f':   *dest++ = '\f'; break;
1058             case 'n':   *dest++ = '\n'; break;
1059             case 'r':   *dest++ = '\r'; break;
1060             case 't':   *dest++ = '\t'; break;
1061             case 'v':   *dest++ = '\v'; break;
1062
1063             case '\n':  break;  /* Escaped newlines disappear. */
1064
1065             case '\\':
1066             case '\'':
1067             case '"':
1068             case '?':
1069                 *dest++ = *src;
1070                 break;
1071
1072             case '\0':
1073                 /* Should never happen; the tokenizer should catch this. */
1074                 warn("%s:%u: unterminated string", file, param->line);
1075                 goto fail;
1076
1077             default:
1078                 /* FIXME: \<octal>, \x, \u, and \U not yet implemented; the
1079                    last three could use the same basic code.  Think about
1080                    whether the escape should generate a single 8-bit character
1081                    or a UTF-8 encoded character; maybe the first two generate
1082                    the former and \u and \U generate the latter? */
1083                 warn("%s:%u: unrecognized escape '\\%c'", file, param->line,
1084                      *src);
1085                 goto fail;
1086             }
1087         }
1088     }
1089     *dest = '\0';
1090
1091     /* The tokenizer already checked this for most cases but could miss the
1092        case where the final quote mark is escaped with a backslash. */
1093     if (*src != '"') {
1094         warn("%s:%u: unterminated string (no closing quote)", file,
1095              param->line);
1096         goto fail;
1097     }
1098
1099     param->type = VALUE_STRING;
1100     *value = param->value.string;
1101     return true;
1102
1103  fail:
1104     free(param->value.string);
1105     return false;
1106 }
1107
1108
1109 /*
1110 **  Convert a given parameter value to a string, returning true if successful
1111 **  and false otherwise.
1112 */
1113 static bool
1114 convert_string(struct config_parameter *param, const char *file, void *result)
1115 {
1116     const char **value = result;
1117
1118     if (param->type == VALUE_STRING) {
1119         *value = param->value.string;
1120         return true;
1121     } else if (param->type != VALUE_UNKNOWN) {
1122         warn("%s:%u: %s is not an string", file, param->line, param->key);
1123         return false;
1124     }
1125
1126     if (*param->raw_value == '"') {
1127         return convert_string_quoted(param, file, result);
1128     } else {
1129         param->value.string = xstrdup(param->raw_value);
1130         param->type = VALUE_STRING;
1131         *value = param->value.string;
1132         return true;
1133     }
1134 }
1135
1136
1137 /*
1138 **  Given a group, query it for the given parameter and then when the
1139 **  parameter is found, check to see if it's already marked invalid.  If so,
1140 **  fail quietly; otherwise, hand it off to the conversion function to do
1141 **  type-specific work, returning the result.  Returns true if the parameter
1142 **  is found in the group or one of its parents and convert can successfully
1143 **  convert the raw value and put it in result, false otherwise (either for
1144 **  the parameter not being found or for it being the wrong type).
1145 */
1146 static bool
1147 group_parameter_get(struct config_group *group, const char *key, void *result,
1148                     convert_func convert)
1149 {
1150     struct config_group *current = group;
1151
1152     while (current != NULL) {
1153         struct config_parameter *param;
1154
1155         param = hash_lookup(group->params, key);
1156         if (param != NULL) {
1157             if (param->type == VALUE_INVALID)
1158                 return false;
1159             else
1160                 return (*convert)(param, group->file, result);
1161         }
1162         current = group->parent;
1163     }
1164     return false;
1165 }
1166
1167
1168 /*
1169 **  All of the config_param_* functions do the following:
1170 **
1171 **  Given a group, query it for the given parameter, interpreting its value as
1172 **  the appropriate type and returning it in the third argument.  Returns true
1173 **  on success, false on failure (such as the parameter not being set or an
1174 **  error), and report errors via warn.
1175 */
1176 bool
1177 config_param_boolean(struct config_group *group, const char *key,
1178                      bool *result)
1179 {
1180     return group_parameter_get(group, key, result, convert_boolean);
1181 }
1182
1183 bool
1184 config_param_integer(struct config_group *group, const char *key,
1185                      long *result)
1186 {
1187     return group_parameter_get(group, key, result, convert_integer);
1188 }
1189
1190 bool
1191 config_param_string(struct config_group *group, const char *key,
1192                     const char **result)
1193 {
1194     return group_parameter_get(group, key, result, convert_string);
1195 }
1196
1197
1198 /*
1199 **  A hash traversal function to add all parameter keys to the vector provided
1200 **  as the second argument.
1201 */
1202 static void
1203 parameter_collect(void *element, void *cookie)
1204 {
1205     struct config_parameter *param = element;
1206     struct vector *params = cookie;
1207
1208     vector_add(params, param->key);
1209 }
1210
1211
1212 /*
1213 **  Returns a newly allocated vector of all of the config parameters in a
1214 **  group, including the inherited ones (not implemented yet).
1215 */
1216 struct vector *
1217 config_params(struct config_group *group)
1218 {
1219     struct vector *params;
1220     size_t size;
1221
1222     /* Size the vector, which we can do accurately for now. */
1223     params = vector_new();
1224     size = hash_count(group->params);
1225     vector_resize(params, size);
1226
1227     /* Now, walk the hash to build the vector of params. */
1228     hash_traverse(group->params, parameter_collect, params);
1229     return params;
1230 }
1231
1232
1233 /*
1234 **  Report an error in a given parameter.  Used so that the file and line
1235 **  number can be included in the error message.
1236 */
1237 void
1238 config_error_param(struct config_group *group, const char *key,
1239                    const char *fmt, ...)
1240 {
1241     va_list args;
1242     ssize_t length;
1243     char *message, *file;
1244     struct config_parameter *param;
1245
1246     va_start(args, fmt);
1247     length = vsnprintf(NULL, 0, fmt, args);
1248     va_end(args);
1249     if (length < 0)
1250         return;
1251     message = xmalloc(length + 1);
1252     va_start(args, fmt);
1253     vsnprintf(message, length + 1, fmt, args);
1254     va_end(args);
1255
1256     param = hash_lookup(group->params, key);
1257     if (param == NULL)
1258         warn("%s", message);
1259     else {
1260         file = (group->included != NULL ? group->included : group->file);
1261         warn("%s:%u: %s", file, param->line, message);
1262     }
1263
1264     free(message);
1265 }
1266
1267
1268 /*
1269 **  Stubs for functions not yet implemented.
1270 */
1271 struct config_group *
1272 config_find_group(struct config_group *group UNUSED, const char *type UNUSED)
1273 {
1274     return NULL;
1275 }
1276
1277 struct config_group *
1278 config_next_group(struct config_group *group UNUSED)
1279 {
1280     return NULL;
1281 }
1282
1283 bool
1284 config_param_real(struct config_group *group UNUSED, const char *key UNUSED,
1285                   double *result UNUSED)
1286 {
1287     return false;
1288 }
1289
1290 bool
1291 config_param_list(struct config_group *group UNUSED, const char *key UNUSED,
1292                   struct vector *result UNUSED)
1293 {
1294     return false;
1295 }
1296
1297 void
1298 config_error_group(struct config_group *group UNUSED, const char *fmt UNUSED,
1299                    ...)
1300 {
1301 }