locale/programs/linereader.c

   1 /* Copyright (C) 1996-2005, 2006 Free Software Foundation, Inc.
   2    This file is part of the GNU C Library.
   3    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published
   7    by the Free Software Foundation; version 2 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22
  23 #include <assert.h>
  24 #include <ctype.h>
  25 #include <errno.h>
  26 #include <libintl.h>
  27 #include <stdarg.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30
  31 #include "localedef.h"
  32 #include "charmap.h"
  33 #include "error.h"
  34 #include "linereader.h"
  35 #include "locfile.h"
  36
  37 /* Prototypes for local functions.  */
  38 static struct token *get_toplvl_escape (struct linereader *lr);
  39 static struct token *get_symname (struct linereader *lr);
  40 static struct token *get_ident (struct linereader *lr);
  41 static struct token *get_string (struct linereader *lr,
  42                                  const struct charmap_t *charmap,
  43                                  struct localedef_t *locale,
  44                                  const struct repertoire_t *repertoire,
  45                                  int verbose);
  46
  47
  48 struct linereader *
  49 lr_open (const char *fname, kw_hash_fct_t hf)
  50 {
  51   FILE *fp;
  52
  53   if (fname == NULL || strcmp (fname, "-") == 0
  54       || strcmp (fname, "/dev/stdin") == 0)
  55     return lr_create (stdin, "<stdin>", hf);
  56   else
  57     {
  58       fp = fopen (fname, "rm");
  59       if (fp == NULL)
  60         return NULL;
  61       return lr_create (fp, fname, hf);
  62     }
  63 }
  64
  65 struct linereader *
  66 lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf)
  67 {
  68   struct linereader *result;
  69   int n;
  70
  71   result = (struct linereader *) xmalloc (sizeof (*result));
  72
  73   result->fp = fp;
  74   result->fname = xstrdup (fname);
  75   result->buf = NULL;
  76   result->bufsize = 0;
  77   result->lineno = 1;
  78   result->idx = 0;
  79   result->comment_char = '#';
  80   result->escape_char = '\\';
  81   result->translate_strings = 1;
  82   result->return_widestr = 0;
  83
  84   n = getdelim (&result->buf, &result->bufsize, '\n', result->fp);
  85   if (n < 0)
  86     {
  87       int save = errno;
  88       fclose (result->fp);
  89       free ((char *) result->fname);
  90       free (result);
  91       errno = save;
  92       return NULL;
  93     }
  94
  95   if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n')
  96     n -= 2;
  97
  98   result->buf[n] = '\0';
  99   result->bufact = n;
 100   result->hash_fct = hf;
 101
 102   return result;
 103 }
 104
 105
 106 int
 107 lr_eof (struct linereader *lr)
 108 {
 109   return lr->bufact = 0;
 110 }
 111
 112
 113 void
 114 lr_ignore_rest (struct linereader *lr, int verbose)
 115 {
 116   if (verbose)
 117     {
 118       while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n'
 119              && lr->buf[lr->idx] != lr->comment_char)
 120         if (lr->buf[lr->idx] == '\0')
 121           {
 122             if (lr_next (lr) < 0)
 123               return;
 124           }
 125         else
 126           ++lr->idx;
 127
 128       if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp)
 129           && lr->buf[lr->idx] != lr->comment_char)
 130         lr_error (lr, _("trailing garbage at end of line"));
 131     }
 132
 133   /* Ignore continued line.  */
 134   while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n')
 135     if (lr_next (lr) < 0)
 136       break;
 137
 138   lr->idx = lr->bufact;
 139 }
 140
 141
 142 void
 143 lr_close (struct linereader *lr)
 144 {
 145   fclose (lr->fp);
 146   free (lr->buf);
 147   free (lr);
 148 }
 149
 150
 151 int
 152 lr_next (struct linereader *lr)
 153 {
 154   int n;
 155
 156   n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp);
 157   if (n < 0)
 158     return -1;
 159
 160   ++lr->lineno;
 161
 162   if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n')
 163     {
 164 #if 0
 165       /* XXX Is this correct?  */
 166       /* An escaped newline character is substituted with a single <SP>.  */
 167       --n;
 168       lr->buf[n - 1] = ' ';
 169 #else
 170       n -= 2;
 171 #endif
 172     }
 173
 174   lr->buf[n] = '\0';
 175   lr->bufact = n;
 176   lr->idx = 0;
 177
 178   return 0;
 179 }
 180
 181
 182 /* Defined in error.c.  */
 183 /* This variable is incremented each time `error' is called.  */
 184 extern unsigned int error_message_count;
 185
 186 /* The calling program should define program_name and set it to the
 187    name of the executing program.  */
 188 extern char *program_name;
 189
 190
 191 struct token *
 192 lr_token (struct linereader *lr, const struct charmap_t *charmap,
 193           struct localedef_t *locale, const struct repertoire_t *repertoire,
 194           int verbose)
 195 {
 196   int ch;
 197
 198   while (1)
 199     {
 200       do
 201         {
 202           ch = lr_getc (lr);
 203
 204           if (ch == EOF)
 205             {
 206               lr->token.tok = tok_eof;
 207               return &lr->token;
 208             };
 209
 210           if (ch == '\n')
 211             {
 212               lr->token.tok = tok_eol;
 213               return &lr->token;
 214             }
 215         }
 216       while (isspace (ch));
 217
 218       if (ch != lr->comment_char)
 219         break;
 220
 221       /* Is there an newline at the end of the buffer?  */
 222       if (lr->buf[lr->bufact - 1] != '\n')
 223         {
 224           /* No.  Some people want this to mean that only the line in
 225              the file not the logical, concatenated line is ignored.
 226              Let's try this.  */
 227           lr->idx = lr->bufact;
 228           continue;
 229         }
 230
 231       /* Ignore rest of line.  */
 232       lr_ignore_rest (lr, 0);
 233       lr->token.tok = tok_eol;
 234       return &lr->token;
 235     }
 236
 237   /* Match escape sequences.  */
 238   if (ch == lr->escape_char)
 239     return get_toplvl_escape (lr);
 240
 241   /* Match ellipsis.  */
 242   if (ch == '.')
 243     {
 244       if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0)
 245         {
 246           int cnt;
 247           for (cnt = 0; cnt < 10; ++cnt)
 248             lr_getc (lr);
 249           lr->token.tok = tok_ellipsis4_2;
 250           return &lr->token;
 251         }
 252       if (strncmp (&lr->buf[lr->idx], "...", 3) == 0)
 253         {
 254           lr_getc (lr);
 255           lr_getc (lr);
 256           lr_getc (lr);
 257           lr->token.tok = tok_ellipsis4;
 258           return &lr->token;
 259         }
 260       if (strncmp (&lr->buf[lr->idx], "..", 2) == 0)
 261         {
 262           lr_getc (lr);
 263           lr_getc (lr);
 264           lr->token.tok = tok_ellipsis3;
 265           return &lr->token;
 266         }
 267       if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0)
 268         {
 269           int cnt;
 270           for (cnt = 0; cnt < 6; ++cnt)
 271             lr_getc (lr);
 272           lr->token.tok = tok_ellipsis2_2;
 273           return &lr->token;
 274         }
 275       if (lr->buf[lr->idx] == '.')
 276         {
 277           lr_getc (lr);
 278           lr->token.tok = tok_ellipsis2;
 279           return &lr->token;
 280         }
 281     }
 282
 283   switch (ch)
 284     {
 285     case '<':
 286       return get_symname (lr);
 287
 288     case '0' ... '9':
 289       lr->token.tok = tok_number;
 290       lr->token.val.num = ch - '0';
 291
 292       while (isdigit (ch = lr_getc (lr)))
 293         {
 294           lr->token.val.num *= 10;
 295           lr->token.val.num += ch - '0';
 296         }
 297       if (isalpha (ch))
 298         lr_error (lr, _("garbage at end of number"));
 299       lr_ungetn (lr, 1);
 300
 301       return &lr->token;
 302
 303     case ';':
 304       lr->token.tok = tok_semicolon;
 305       return &lr->token;
 306
 307     case ',':
 308       lr->token.tok = tok_comma;
 309       return &lr->token;
 310
 311     case '(':
 312       lr->token.tok = tok_open_brace;
 313       return &lr->token;
 314
 315     case ')':
 316       lr->token.tok = tok_close_brace;
 317       return &lr->token;
 318
 319     case '"':
 320       return get_string (lr, charmap, locale, repertoire, verbose);
 321
 322     case '-':
 323       ch = lr_getc (lr);
 324       if (ch == '1')
 325         {
 326           lr->token.tok = tok_minus1;
 327           return &lr->token;
 328         }
 329       lr_ungetn (lr, 2);
 330       break;
 331     }
 332
 333   return get_ident (lr);
 334 }
 335
 336
 337 static struct token *
 338 get_toplvl_escape (struct linereader *lr)
 339 {
 340   /* This is supposed to be a numeric value.  We return the
 341      numerical value and the number of bytes.  */
 342   size_t start_idx = lr->idx - 1;
 343   unsigned char *bytes = lr->token.val.charcode.bytes;
 344   size_t nbytes = 0;
 345   int ch;
 346
 347   do
 348     {
 349       unsigned int byte = 0;
 350       unsigned int base = 8;
 351
 352       ch = lr_getc (lr);
 353
 354       if (ch == 'd')
 355         {
 356           base = 10;
 357           ch = lr_getc (lr);
 358         }
 359       else if (ch == 'x')
 360         {
 361           base = 16;
 362           ch = lr_getc (lr);
 363         }
 364
 365       if ((base == 16 && !isxdigit (ch))
 366           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 367         {
 368         esc_error:
 369           lr->token.val.str.startmb = &lr->buf[start_idx];
 370
 371           while (ch != EOF && !isspace (ch))
 372             ch = lr_getc (lr);
 373           lr->token.val.str.lenmb = lr->idx - start_idx;
 374
 375           lr->token.tok = tok_error;
 376           return &lr->token;
 377         }
 378
 379       if (isdigit (ch))
 380         byte = ch - '0';
 381       else
 382         byte = tolower (ch) - 'a' + 10;
 383
 384       ch = lr_getc (lr);
 385       if ((base == 16 && !isxdigit (ch))
 386           || (base != 16 && (ch < '0' || ch >= (int) ('0' + base))))
 387         goto esc_error;
 388
 389       byte *= base;
 390       if (isdigit (ch))
 391         byte += ch - '0';
 392       else
 393         byte += tolower (ch) - 'a' + 10;
 394
 395       ch = lr_getc (lr);
 396       if (base != 16 && isdigit (ch))
 397         {
 398           byte *= base;
 399           byte += ch - '0';
 400
 401           ch = lr_getc (lr);
 402         }
 403
 404       bytes[nbytes++] = byte;
 405     }
 406   while (ch == lr->escape_char
 407          && nbytes < (int) sizeof (lr->token.val.charcode.bytes));
 408
 409   if (!isspace (ch))
 410     lr_error (lr, _("garbage at end of character code specification"));
 411
 412   lr_ungetn (lr, 1);
 413
 414   lr->token.tok = tok_charcode;
 415   lr->token.val.charcode.nbytes = nbytes;
 416
 417   return &lr->token;
 418 }
 419
 420
 421 #define ADDC(ch) \
 422   do                                                                          \
 423     {                                                                         \
 424       if (bufact == bufmax)                                                   \
 425         {                                                                     \
 426           bufmax *= 2;                                                        \
 427           buf = xrealloc (buf, bufmax);                                       \
 428         }                                                                     \
 429       buf[bufact++] = (ch);                                                   \
 430     }                                                                         \
 431   while (0)
 432
 433
 434 #define ADDS(s, l) \
 435   do                                                                          \
 436     {                                                                         \
 437       size_t _l = (l);                                                        \
 438       if (bufact + _l > bufmax)                                               \
 439         {                                                                     \
 440           if (bufact < _l)                                                    \
 441             bufact = _l;                                                      \
 442           bufmax *= 2;                                                        \
 443           buf = xrealloc (buf, bufmax);                                       \
 444         }                                                                     \
 445       memcpy (&buf[bufact], s, _l);                                           \
 446       bufact += _l;                                                           \
 447     }                                                                         \
 448   while (0)
 449
 450
 451 #define ADDWC(ch) \
 452   do                                                                          \
 453     {                                                                         \
 454       if (buf2act == buf2max)                                                 \
 455         {                                                                     \
 456           buf2max *= 2;                                                       \
 457           buf2 = xrealloc (buf2, buf2max * 4);                                \
 458         }                                                                     \
 459       buf2[buf2act++] = (ch);                                                 \
 460     }                                                                         \
 461   while (0)
 462
 463
 464 static struct token *
 465 get_symname (struct linereader *lr)
 466 {
 467   /* Symbol in brackets.  We must distinguish three kinds:
 468      1. reserved words
 469      2. ISO 10646 position values
 470      3. all other.  */
 471   char *buf;
 472   size_t bufact = 0;
 473   size_t bufmax = 56;
 474   const struct keyword_t *kw;
 475   int ch;
 476
 477   buf = (char *) xmalloc (bufmax);
 478
 479   do
 480     {
 481       ch = lr_getc (lr);
 482       if (ch == lr->escape_char)
 483         {
 484           int c2 = lr_getc (lr);
 485           ADDC (c2);
 486
 487           if (c2 == '\n')
 488             ch = '\n';
 489         }
 490       else
 491         ADDC (ch);
 492     }
 493   while (ch != '>' && ch != '\n');
 494
 495   if (ch == '\n')
 496     lr_error (lr, _("unterminated symbolic name"));
 497
 498   /* Test for ISO 10646 position value.  */
 499   if (buf[0] == 'U' && (bufact == 6 || bufact == 10))
 500     {
 501       char *cp = buf + 1;
 502       while (cp < &buf[bufact - 1] && isxdigit (*cp))
 503         ++cp;
 504
 505       if (cp == &buf[bufact - 1])
 506         {
 507           /* Yes, it is.  */
 508           lr->token.tok = tok_ucs4;
 509           lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16);
 510
 511           return &lr->token;
 512         }
 513     }
 514
 515   /* It is a symbolic name.  Test for reserved words.  */
 516   kw = lr->hash_fct (buf, bufact - 1);
 517
 518   if (kw != NULL && kw->symname_or_ident == 1)
 519     {
 520       lr->token.tok = kw->token;
 521       free (buf);
 522     }
 523   else
 524     {
 525       lr->token.tok = tok_bsymbol;
 526
 527       buf = xrealloc (buf, bufact + 1);
 528       buf[bufact] = '\0';
 529
 530       lr->token.val.str.startmb = buf;
 531       lr->token.val.str.lenmb = bufact - 1;
 532     }
 533
 534   return &lr->token;
 535 }
 536
 537
 538 static struct token *
 539 get_ident (struct linereader *lr)
 540 {
 541   char *buf;
 542   size_t bufact;
 543   size_t bufmax = 56;
 544   const struct keyword_t *kw;
 545   int ch;
 546
 547   buf = xmalloc (bufmax);
 548   bufact = 0;
 549
 550   ADDC (lr->buf[lr->idx - 1]);
 551
 552   while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';'
 553          && ch != '<' && ch != ',' && ch != EOF)
 554     {
 555       if (ch == lr->escape_char)
 556         {
 557           ch = lr_getc (lr);
 558           if (ch == '\n' || ch == EOF)
 559             {
 560               lr_error (lr, _("invalid escape sequence"));
 561               break;
 562             }
 563         }
 564       ADDC (ch);
 565     }
 566
 567   lr_ungetc (lr, ch);
 568
 569   kw = lr->hash_fct (buf, bufact);
 570
 571   if (kw != NULL && kw->symname_or_ident == 0)
 572     {
 573       lr->token.tok = kw->token;
 574       free (buf);
 575     }
 576   else
 577     {
 578       lr->token.tok = tok_ident;
 579
 580       buf = xrealloc (buf, bufact + 1);
 581       buf[bufact] = '\0';
 582
 583       lr->token.val.str.startmb = buf;
 584       lr->token.val.str.lenmb = bufact;
 585     }
 586
 587   return &lr->token;
 588 }
 589
 590
 591 static struct token *
 592 get_string (struct linereader *lr, const struct charmap_t *charmap,
 593             struct localedef_t *locale, const struct repertoire_t *repertoire,
 594             int verbose)
 595 {
 596   int return_widestr = lr->return_widestr;
 597   char *buf;
 598   uint32_t *buf2 = NULL;
 599   size_t bufact;
 600   size_t bufmax = 56;
 601
 602   /* We must return two different strings.  */
 603   buf = xmalloc (bufmax);
 604   bufact = 0;
 605
 606   /* We know it'll be a string.  */
 607   lr->token.tok = tok_string;
 608
 609   /* If we need not translate the strings (i.e., expand <...> parts)
 610      we can run a simple loop.  */
 611   if (!lr->translate_strings)
 612     {
 613       int ch;
 614
 615       buf2 = NULL;
 616       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 617         ADDC (ch);
 618
 619       /* Catch errors with trailing escape character.  */
 620       if (bufact > 0 && buf[bufact - 1] == lr->escape_char
 621           && (bufact == 1 || buf[bufact - 2] != lr->escape_char))
 622         {
 623           lr_error (lr, _("illegal escape sequence at end of string"));
 624           --bufact;
 625         }
 626       else if (ch == '\n' || ch == EOF)
 627         lr_error (lr, _("unterminated string"));
 628
 629       ADDC ('\0');
 630     }
 631   else
 632     {
 633       int illegal_string = 0;
 634       size_t buf2act = 0;
 635       size_t buf2max = 56 * sizeof (uint32_t);
 636       int ch;
 637       int warned = 0;
 638
 639       /* We have to provide the wide character result as well.  */
 640       if (return_widestr)
 641         buf2 = xmalloc (buf2max);
 642
 643       /* Read until the end of the string (or end of the line or file).  */
 644       while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF)
 645         {
 646           size_t startidx;
 647           uint32_t wch;
 648           struct charseq *seq;
 649
 650           if (ch != '<')
 651             {
 652               /* The standards leave it up to the implementation to decide
 653                  what to do with character which stand for themself.  We
 654                  could jump through hoops to find out the value relative to
 655                  the charmap and the repertoire map, but instead we leave
 656                  it up to the locale definition author to write a better
 657                  definition.  We assume here that every character which
 658                  stands for itself is encoded using ISO 8859-1.  Using the
 659                  escape character is allowed.  */
 660               if (ch == lr->escape_char)
 661                 {
 662                   ch = lr_getc (lr);
 663                   if (ch == '\n' || ch == EOF)
 664                     break;
 665                 }
 666
 667               if (verbose && !warned)
 668                 {
 669                   lr_error (lr, _("\
 670 non-symbolic character value should not be used"));
 671                   warned = 1;
 672                 }
 673
 674               ADDC (ch);
 675               if (return_widestr)
 676                 ADDWC ((uint32_t) ch);
 677
 678               continue;
 679             }
 680
 681           /* Now we have to search for the end of the symbolic name, i.e.,
 682              the closing '>'.  */
 683           startidx = bufact;
 684           while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF)
 685             {
 686               if (ch == lr->escape_char)
 687                 {
 688                   ch = lr_getc (lr);
 689                   if (ch == '\n' || ch == EOF)
 690                     break;
 691                 }
 692               ADDC (ch);
 693             }
 694           if (ch == '\n' || ch == EOF)
 695             /* Not a correct string.  */
 696             break;
 697           if (bufact == startidx)
 698             {
 699               /* <> is no correct name.  Ignore it and also signal an
 700                  error.  */
 701               illegal_string = 1;
 702               continue;
 703             }
 704
 705           /* It might be a Uxxxx symbol.  */
 706           if (buf[startidx] == 'U'
 707               && (bufact - startidx == 5 || bufact - startidx == 9))
 708             {
 709               char *cp = buf + startidx + 1;
 710               while (cp < &buf[bufact] && isxdigit (*cp))
 711                 ++cp;
 712
 713               if (cp == &buf[bufact])
 714                 {
 715                   char utmp[10];
 716
 717                   /* Yes, it is.  */
 718                   ADDC ('\0');
 719                   wch = strtoul (buf + startidx + 1, NULL, 16);
 720
 721                   /* Now forget about the name we just added.  */
 722                   bufact = startidx;
 723
 724                   if (return_widestr)
 725                     ADDWC (wch);
 726
 727                   /* See whether the charmap contains the Uxxxxxxxx names.  */
 728                   snprintf (utmp, sizeof (utmp), "U%08X", wch);
 729                   seq = charmap_find_value (charmap, utmp, 9);
 730
 731                   if (seq == NULL)
 732                     {
 733                      /* No, this isn't the case.  Now determine from
 734                         the repertoire the name of the character and
 735                         find it in the charmap.  */
 736                       if (repertoire != NULL)
 737                         {
 738                           const char *symbol;
 739
 740                           symbol = repertoire_find_symbol (repertoire, wch);
 741
 742                           if (symbol != NULL)
 743                             seq = charmap_find_value (charmap, symbol,
 744                                                       strlen (symbol));
 745                         }
 746
 747                       if (seq == NULL)
 748                         {
 749 #ifndef NO_TRANSLITERATION
 750                           /* Transliterate if possible.  */
 751                           if (locale != NULL)
 752                             {
 753                               uint32_t *translit;
 754
 755                               if ((locale->avail & CTYPE_LOCALE) == 0)
 756                                 {
 757                                   /* Load the CTYPE data now.  */
 758                                   int old_needed = locale->needed;
 759
 760                                   locale->needed = 0;
 761                                   locale = load_locale (LC_CTYPE,
 762                                                         locale->name,
 763                                                         locale->repertoire_name,
 764                                                         charmap, locale);
 765                                   locale->needed = old_needed;
 766                                 }
 767
 768                               if ((locale->avail & CTYPE_LOCALE) != 0
 769                                   && ((translit = find_translit (locale,
 770                                                                  charmap, wch))
 771                                       != NULL))
 772                                 /* The CTYPE data contains a matching
 773                                    transliteration.  */
 774                                 {
 775                                   int i;
 776
 777                                   for (i = 0; translit[i] != 0; ++i)
 778                                     {
 779                                       char utmp[10];
 780
 781                                       snprintf (utmp, sizeof (utmp), "U%08X",
 782                                                 translit[i]);
 783                                       seq = charmap_find_value (charmap, utmp,
 784                                                                 9);
 785                                       assert (seq != NULL);
 786                                       ADDS (seq->bytes, seq->nbytes);
 787                                     }
 788
 789                                   continue;
 790                                 }
 791                             }
 792 #endif  /* NO_TRANSLITERATION */
 793
 794                           /* Not a known name.  */
 795                           illegal_string = 1;
 796                         }
 797                     }
 798
 799                   if (seq != NULL)
 800                     ADDS (seq->bytes, seq->nbytes);
 801
 802                   continue;
 803                 }
 804             }
 805
 806           /* We now have the symbolic name in buf[startidx] to
 807              buf[bufact-1].  Now find out the value for this character
 808              in the charmap as well as in the repertoire map (in this
 809              order).  */
 810           seq = charmap_find_value (charmap, &buf[startidx],
 811                                     bufact - startidx);
 812
 813           if (seq == NULL)
 814             {
 815               /* This name is not in the charmap.  */
 816               lr_error (lr, _("symbol `%.*s' not in charmap"),
 817                         (int) (bufact - startidx), &buf[startidx]);
 818               illegal_string = 1;
 819             }
 820
 821           if (return_widestr)
 822             {
 823               /* Now the same for the multibyte representation.  */
 824               if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
 825                 wch = seq->ucs4;
 826               else
 827                 {
 828                   wch = repertoire_find_value (repertoire, &buf[startidx],
 829                                                bufact - startidx);
 830                   if (seq != NULL)
 831                     seq->ucs4 = wch;
 832                 }
 833
 834               if (wch == ILLEGAL_CHAR_VALUE)
 835                 {
 836                   /* This name is not in the repertoire map.  */
 837                   lr_error (lr, _("symbol `%.*s' not in repertoire map"),
 838                             (int) (bufact - startidx), &buf[startidx]);
 839                   illegal_string = 1;
 840                 }
 841               else
 842                 ADDWC (wch);
 843             }
 844
 845           /* Now forget about the name we just added.  */
 846           bufact = startidx;
 847
 848           /* And copy the bytes.  */
 849           if (seq != NULL)
 850             ADDS (seq->bytes, seq->nbytes);
 851         }
 852
 853       if (ch == '\n' || ch == EOF)
 854         {
 855           lr_error (lr, _("unterminated string"));
 856           illegal_string = 1;
 857         }
 858
 859       if (illegal_string)
 860         {
 861           free (buf);
 862           free (buf2);
 863           lr->token.val.str.startmb = NULL;
 864           lr->token.val.str.lenmb = 0;
 865           lr->token.val.str.startwc = NULL;
 866           lr->token.val.str.lenwc = 0;
 867
 868           return &lr->token;
 869         }
 870
 871       ADDC ('\0');
 872
 873       if (return_widestr)
 874         {
 875           ADDWC (0);
 876           lr->token.val.str.startwc = xrealloc (buf2,
 877                                                 buf2act * sizeof (uint32_t));
 878           lr->token.val.str.lenwc = buf2act;
 879         }
 880     }
 881
 882   lr->token.val.str.startmb = xrealloc (buf, bufact);
 883   lr->token.val.str.lenmb = bufact;
 884
 885   return &lr->token;
 886 }