gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 #ifdef MULTIBYTE_CHARS
  29 #include "mbchar.h"
  30 #include <locale.h>
  31 #endif
  32
  33 /* Tokens with SPELL_STRING store their spelling in the token list,
  34    and it's length in the token->val.name.len.  */
  35 enum spell_type
  36 {
  37   SPELL_OPERATOR = 0,
  38   SPELL_CHAR,
  39   SPELL_IDENT,
  40   SPELL_NUMBER,
  41   SPELL_STRING,
  42   SPELL_NONE
  43 };
  44
  45 struct token_spelling
  46 {
  47   enum spell_type category;
  48   const unsigned char *name;
  49 };
  50
  51 static const unsigned char *const digraph_spellings[] =
  52 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  53
  54 #define OP(e, s) { SPELL_OPERATOR, U s           },
  55 #define TK(e, s) { s,              U STRINGX (e) },
  56 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  57 #undef OP
  58 #undef TK
  59
  60 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  61 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  62 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  63
  64 static void handle_newline PARAMS ((cpp_reader *));
  65 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  66 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  67
  68 static int skip_block_comment PARAMS ((cpp_reader *));
  69 static int skip_line_comment PARAMS ((cpp_reader *));
  70 static void adjust_column PARAMS ((cpp_reader *));
  71 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  72 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  73 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  74                                    unsigned int *));
  75 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  76 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  77 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  78 static bool trigraph_p PARAMS ((cpp_reader *));
  79 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  80                                   cppchar_t));
  81 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  82 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  83                                    const unsigned char *, unsigned int *));
  84 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  85
  86 static unsigned int hex_digit_value PARAMS ((unsigned int));
  87 static _cpp_buff *new_buff PARAMS ((size_t));
  88
  89 /* Utility routine:
  90
  91    Compares, the token TOKEN to the NUL-terminated string STRING.
  92    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  93 int
  94 cpp_ideq (token, string)
  95      const cpp_token *token;
  96      const char *string;
  97 {
  98   if (token->type != CPP_NAME)
  99     return 0;
 100
 101   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 102 }
 103
 104 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 105    Returns with buffer->cur pointing to the character immediately
 106    following the newline (combination).  */
 107 static void
 108 handle_newline (pfile)
 109      cpp_reader *pfile;
 110 {
 111   cpp_buffer *buffer = pfile->buffer;
 112
 113   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 114      only accept CR-LF; maybe we should fall back to that behaviour?  */
 115   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 116     buffer->cur++;
 117
 118   buffer->line_base = buffer->cur;
 119   buffer->col_adjust = 0;
 120   pfile->line++;
 121 }
 122
 123 /* Subroutine of skip_escaped_newlines; called when a 3-character
 124    sequence beginning with "??" is encountered.  buffer->cur points to
 125    the second '?'.
 126
 127    Warn if necessary, and returns true if the sequence forms a
 128    trigraph and the trigraph should be honoured.  */
 129 static bool
 130 trigraph_p (pfile)
 131      cpp_reader *pfile;
 132 {
 133   cpp_buffer *buffer = pfile->buffer;
 134   cppchar_t from_char = buffer->cur[1];
 135   bool accept;
 136
 137   if (!_cpp_trigraph_map[from_char])
 138     return false;
 139
 140   accept = CPP_OPTION (pfile, trigraphs);
 141
 142   /* Don't warn about trigraphs in comments.  */
 143   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 144     {
 145       if (accept)
 146         cpp_error_with_line (pfile, DL_WARNING,
 147                              pfile->line, CPP_BUF_COL (buffer) - 1,
 148                              "trigraph ??%c converted to %c",
 149                              (int) from_char,
 150                              (int) _cpp_trigraph_map[from_char]);
 151       else if (buffer->cur != buffer->last_Wtrigraphs)
 152         {
 153           buffer->last_Wtrigraphs = buffer->cur;
 154           cpp_error_with_line (pfile, DL_WARNING,
 155                                pfile->line, CPP_BUF_COL (buffer) - 1,
 156                                "trigraph ??%c ignored", (int) from_char);
 157         }
 158     }
 159
 160   return accept;
 161 }
 162
 163 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 164    lie in buffer->cur[-1].  Returns the next byte, which will be in
 165    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 166    2 of the ISO C standard.  */
 167 static cppchar_t
 168 skip_escaped_newlines (pfile)
 169      cpp_reader *pfile;
 170 {
 171   cpp_buffer *buffer = pfile->buffer;
 172   cppchar_t next = buffer->cur[-1];
 173
 174   /* Only do this if we apply stages 1 and 2.  */
 175   if (!buffer->from_stage3)
 176     {
 177       const unsigned char *saved_cur;
 178       cppchar_t next1;
 179
 180       do
 181         {
 182           if (next == '?')
 183             {
 184               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 185                 break;
 186
 187               /* Translate the trigraph.  */
 188               next = _cpp_trigraph_map[buffer->cur[1]];
 189               buffer->cur += 2;
 190               if (next != '\\')
 191                 break;
 192             }
 193
 194           if (buffer->cur == buffer->rlimit)
 195             break;
 196
 197           /* We have a backslash, and room for at least one more
 198              character.  Skip horizontal whitespace.  */
 199           saved_cur = buffer->cur;
 200           do
 201             next1 = *buffer->cur++;
 202           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 203
 204           if (!is_vspace (next1))
 205             {
 206               buffer->cur = saved_cur;
 207               break;
 208             }
 209
 210           if (saved_cur != buffer->cur - 1
 211               && !pfile->state.lexing_comment)
 212             cpp_error (pfile, DL_WARNING,
 213                        "backslash and newline separated by space");
 214
 215           handle_newline (pfile);
 216           buffer->backup_to = buffer->cur;
 217           if (buffer->cur == buffer->rlimit)
 218             {
 219               cpp_error (pfile, DL_PEDWARN,
 220                          "backslash-newline at end of file");
 221               next = EOF;
 222             }
 223           else
 224             next = *buffer->cur++;
 225         }
 226       while (next == '\\' || next == '?');
 227     }
 228
 229   return next;
 230 }
 231
 232 /* Obtain the next character, after trigraph conversion and skipping
 233    an arbitrarily long string of escaped newlines.  The common case of
 234    no trigraphs or escaped newlines falls through quickly.  On return,
 235    buffer->backup_to points to where to return to if the character is
 236    not to be processed.  */
 237 static cppchar_t
 238 get_effective_char (pfile)
 239      cpp_reader *pfile;
 240 {
 241   cppchar_t next;
 242   cpp_buffer *buffer = pfile->buffer;
 243
 244   buffer->backup_to = buffer->cur;
 245   next = *buffer->cur++;
 246   if (__builtin_expect (next == '?' || next == '\\', 0))
 247     next = skip_escaped_newlines (pfile);
 248
 249    return next;
 250 }
 251
 252 /* Skip a C-style block comment.  We find the end of the comment by
 253    seeing if an asterisk is before every '/' we encounter.  Returns
 254    non-zero if comment terminated by EOF, zero otherwise.  */
 255 static int
 256 skip_block_comment (pfile)
 257      cpp_reader *pfile;
 258 {
 259   cpp_buffer *buffer = pfile->buffer;
 260   cppchar_t c = EOF, prevc = EOF;
 261
 262   pfile->state.lexing_comment = 1;
 263   while (buffer->cur != buffer->rlimit)
 264     {
 265       prevc = c, c = *buffer->cur++;
 266
 267       /* FIXME: For speed, create a new character class of characters
 268          of interest inside block comments.  */
 269       if (c == '?' || c == '\\')
 270         c = skip_escaped_newlines (pfile);
 271
 272       /* People like decorating comments with '*', so check for '/'
 273          instead for efficiency.  */
 274       if (c == '/')
 275         {
 276           if (prevc == '*')
 277             break;
 278
 279           /* Warn about potential nested comments, but not if the '/'
 280              comes immediately before the true comment delimiter.
 281              Don't bother to get it right across escaped newlines.  */
 282           if (CPP_OPTION (pfile, warn_comments)
 283               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 284             cpp_error_with_line (pfile, DL_WARNING,
 285                                  pfile->line, CPP_BUF_COL (buffer),
 286                                  "\"/*\" within comment");
 287         }
 288       else if (is_vspace (c))
 289         handle_newline (pfile);
 290       else if (c == '\t')
 291         adjust_column (pfile);
 292     }
 293
 294   pfile->state.lexing_comment = 0;
 295   return c != '/' || prevc != '*';
 296 }
 297
 298 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 299    terminating newline.  Handles escaped newlines.  Returns non-zero
 300    if a multiline comment.  */
 301 static int
 302 skip_line_comment (pfile)
 303      cpp_reader *pfile;
 304 {
 305   cpp_buffer *buffer = pfile->buffer;
 306   unsigned int orig_line = pfile->line;
 307   cppchar_t c;
 308 #ifdef MULTIBYTE_CHARS
 309   wchar_t wc;
 310   int char_len;
 311 #endif
 312
 313   pfile->state.lexing_comment = 1;
 314 #ifdef MULTIBYTE_CHARS
 315   /* Reset multibyte conversion state.  */
 316   (void) local_mbtowc (NULL, NULL, 0);
 317 #endif
 318   do
 319     {
 320       if (buffer->cur == buffer->rlimit)
 321         goto at_eof;
 322
 323 #ifdef MULTIBYTE_CHARS
 324       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 325                                buffer->rlimit - buffer->cur);
 326       if (char_len == -1)
 327         {
 328           cpp_error (pfile, DL_WARNING,
 329                      "ignoring invalid multibyte character");
 330           char_len = 1;
 331           c = *buffer->cur++;
 332         }
 333       else
 334         {
 335           buffer->cur += char_len;
 336           c = wc;
 337         }
 338 #else
 339       c = *buffer->cur++;
 340 #endif
 341       if (c == '?' || c == '\\')
 342         c = skip_escaped_newlines (pfile);
 343     }
 344   while (!is_vspace (c));
 345
 346   /* Step back over the newline, except at EOF.  */
 347   buffer->cur--;
 348  at_eof:
 349
 350   pfile->state.lexing_comment = 0;
 351   return orig_line != pfile->line;
 352 }
 353
 354 /* pfile->buffer->cur is one beyond the \t character.  Update
 355    col_adjust so we track the column correctly.  */
 356 static void
 357 adjust_column (pfile)
 358      cpp_reader *pfile;
 359 {
 360   cpp_buffer *buffer = pfile->buffer;
 361   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 362
 363   /* Round it up to multiple of the tabstop, but subtract 1 since the
 364      tab itself occupies a character position.  */
 365   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 366                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 367 }
 368
 369 /* Skips whitespace, saving the next non-whitespace character.
 370    Adjusts pfile->col_adjust to account for tabs.  Without this,
 371    tokens might be assigned an incorrect column.  */
 372 static int
 373 skip_whitespace (pfile, c)
 374      cpp_reader *pfile;
 375      cppchar_t c;
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int warned = 0;
 379
 380   do
 381     {
 382       /* Horizontal space always OK.  */
 383       if (c == ' ')
 384         ;
 385       else if (c == '\t')
 386         adjust_column (pfile);
 387       /* Just \f \v or \0 left.  */
 388       else if (c == '\0')
 389         {
 390           if (buffer->cur - 1 == buffer->rlimit)
 391             return 0;
 392           if (!warned)
 393             {
 394               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 395               warned = 1;
 396             }
 397         }
 398       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 399         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 400                              CPP_BUF_COL (buffer),
 401                              "%s in preprocessing directive",
 402                              c == '\f' ? "form feed" : "vertical tab");
 403
 404       c = *buffer->cur++;
 405     }
 406   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 407   while (is_nvspace (c));
 408
 409   buffer->cur--;
 410   return 1;
 411 }
 412
 413 /* See if the characters of a number token are valid in a name (no
 414    '.', '+' or '-').  */
 415 static int
 416 name_p (pfile, string)
 417      cpp_reader *pfile;
 418      const cpp_string *string;
 419 {
 420   unsigned int i;
 421
 422   for (i = 0; i < string->len; i++)
 423     if (!is_idchar (string->text[i]))
 424       return 0;
 425
 426   return 1;
 427 }
 428
 429 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 430    a critical inner loop.  The common case is an identifier which has
 431    not been split by backslash-newline, does not contain a dollar
 432    sign, and has already been scanned (roughly 10:1 ratio of
 433    seen:unseen identifiers in normal code; the distribution is
 434    Poisson-like).  Second most common case is a new identifier, not
 435    split and no dollar sign.  The other possibilities are rare and
 436    have been relegated to parse_slow.  */
 437 static cpp_hashnode *
 438 parse_identifier (pfile)
 439      cpp_reader *pfile;
 440 {
 441   cpp_hashnode *result;
 442   const uchar *cur, *base;
 443
 444   /* Fast-path loop.  Skim over a normal identifier.
 445      N.B. ISIDNUM does not include $.  */
 446   cur = pfile->buffer->cur;
 447   while (ISIDNUM (*cur))
 448     cur++;
 449
 450   /* Check for slow-path cases.  */
 451   if (*cur == '?' || *cur == '\\' || *cur == '$')
 452     {
 453       unsigned int len;
 454
 455       base = parse_slow (pfile, cur, 0, &len);
 456       result = (cpp_hashnode *)
 457         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 458     }
 459   else
 460     {
 461       base = pfile->buffer->cur - 1;
 462       pfile->buffer->cur = cur;
 463       result = (cpp_hashnode *)
 464         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 465     }
 466
 467   /* Rarely, identifiers require diagnostics when lexed.
 468      XXX Has to be forced out of the fast path.  */
 469   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 470                         && !pfile->state.skipping, 0))
 471     {
 472       /* It is allowed to poison the same identifier twice.  */
 473       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 474         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 475                    NODE_NAME (result));
 476
 477       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 478          replacement list of a variadic macro.  */
 479       if (result == pfile->spec_nodes.n__VA_ARGS__
 480           && !pfile->state.va_args_ok)
 481         cpp_error (pfile, DL_PEDWARN,
 482         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 483     }
 484
 485   return result;
 486 }
 487
 488 /* Slow path.  This handles numbers and identifiers which have been
 489    split, or contain dollar signs.  The part of the token from
 490    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 491    1 if it's a number, and 2 if it has a leading period.  Returns a
 492    pointer to the token's NUL-terminated spelling in permanent
 493    storage, and sets PLEN to its length.  */
 494 static uchar *
 495 parse_slow (pfile, cur, number_p, plen)
 496      cpp_reader *pfile;
 497      const uchar *cur;
 498      int number_p;
 499      unsigned int *plen;
 500 {
 501   cpp_buffer *buffer = pfile->buffer;
 502   const uchar *base = buffer->cur - 1;
 503   struct obstack *stack = &pfile->hash_table->stack;
 504   unsigned int c, prevc, saw_dollar = 0;
 505
 506   /* Place any leading period.  */
 507   if (number_p == 2)
 508     obstack_1grow (stack, '.');
 509
 510   /* Copy the part of the token which is known to be okay.  */
 511   obstack_grow (stack, base, cur - base);
 512
 513   /* Now process the part which isn't.  We are looking at one of
 514      '$', '\\', or '?' on entry to this loop.  */
 515   prevc = cur[-1];
 516   c = *cur++;
 517   buffer->cur = cur;
 518   for (;;)
 519     {
 520       /* Potential escaped newline?  */
 521       buffer->backup_to = buffer->cur - 1;
 522       if (c == '?' || c == '\\')
 523         c = skip_escaped_newlines (pfile);
 524
 525       if (!is_idchar (c))
 526         {
 527           if (!number_p)
 528             break;
 529           if (c != '.' && !VALID_SIGN (c, prevc))
 530             break;
 531         }
 532
 533       /* Handle normal identifier characters in this loop.  */
 534       do
 535         {
 536           prevc = c;
 537           obstack_1grow (stack, c);
 538
 539           if (c == '$')
 540             saw_dollar++;
 541
 542           c = *buffer->cur++;
 543         }
 544       while (is_idchar (c));
 545     }
 546
 547   /* Step back over the unwanted char.  */
 548   BACKUP ();
 549
 550   /* $ is not an identifier character in the standard, but is commonly
 551      accepted as an extension.  Don't warn about it in skipped
 552      conditional blocks.  */
 553   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 554     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 555
 556   /* Identifiers and numbers are null-terminated.  */
 557   *plen = obstack_object_size (stack);
 558   obstack_1grow (stack, '\0');
 559   return obstack_finish (stack);
 560 }
 561
 562 /* Parse a number, beginning with character C, skipping embedded
 563    backslash-newlines.  LEADING_PERIOD is non-zero if there was a "."
 564    before C.  Place the result in NUMBER.  */
 565 static void
 566 parse_number (pfile, number, leading_period)
 567      cpp_reader *pfile;
 568      cpp_string *number;
 569      int leading_period;
 570 {
 571   const uchar *cur;
 572
 573   /* Fast-path loop.  Skim over a normal number.
 574      N.B. ISIDNUM does not include $.  */
 575   cur = pfile->buffer->cur;
 576   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 577     cur++;
 578
 579   /* Check for slow-path cases.  */
 580   if (*cur == '?' || *cur == '\\' || *cur == '$')
 581     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 582   else
 583     {
 584       const uchar *base = pfile->buffer->cur - 1;
 585       uchar *dest;
 586
 587       number->len = cur - base + leading_period;
 588       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 589       dest[number->len] = '\0';
 590       number->text = dest;
 591
 592       if (leading_period)
 593         *dest++ = '.';
 594       memcpy (dest, base, cur - base);
 595       pfile->buffer->cur = cur;
 596     }
 597 }
 598
 599 /* Subroutine of parse_string.  */
 600 static int
 601 unescaped_terminator_p (pfile, dest)
 602      cpp_reader *pfile;
 603      const unsigned char *dest;
 604 {
 605   const unsigned char *start, *temp;
 606
 607   /* In #include-style directives, terminators are not escapeable.  */
 608   if (pfile->state.angled_headers)
 609     return 1;
 610
 611   start = BUFF_FRONT (pfile->u_buff);
 612
 613   /* An odd number of consecutive backslashes represents an escaped
 614      terminator.  */
 615   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 616     ;
 617
 618   return ((dest - temp) & 1) == 0;
 619 }
 620
 621 /* Parses a string, character constant, or angle-bracketed header file
 622    name.  Handles embedded trigraphs and escaped newlines.  The stored
 623    string is guaranteed NUL-terminated, but it is not guaranteed that
 624    this is the first NUL since embedded NULs are preserved.
 625
 626    When this function returns, buffer->cur points to the next
 627    character to be processed.  */
 628 static void
 629 parse_string (pfile, token, terminator)
 630      cpp_reader *pfile;
 631      cpp_token *token;
 632      cppchar_t terminator;
 633 {
 634   cpp_buffer *buffer = pfile->buffer;
 635   unsigned char *dest, *limit;
 636   cppchar_t c;
 637   bool warned_nulls = false;
 638 #ifdef MULTIBYTE_CHARS
 639   wchar_t wc;
 640   int char_len;
 641 #endif
 642
 643   dest = BUFF_FRONT (pfile->u_buff);
 644   limit = BUFF_LIMIT (pfile->u_buff);
 645
 646 #ifdef MULTIBYTE_CHARS
 647   /* Reset multibyte conversion state.  */
 648   (void) local_mbtowc (NULL, NULL, 0);
 649 #endif
 650   for (;;)
 651     {
 652       /* We need room for another char, possibly the terminating NUL.  */
 653       if ((size_t) (limit - dest) < 1)
 654         {
 655           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 656           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 657           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 658           limit = BUFF_LIMIT (pfile->u_buff);
 659         }
 660
 661 #ifdef MULTIBYTE_CHARS
 662       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 663                                buffer->rlimit - buffer->cur);
 664       if (char_len == -1)
 665         {
 666           cpp_error (pfile, DL_WARNING,
 667                        "ignoring invalid multibyte character");
 668           char_len = 1;
 669           c = *buffer->cur++;
 670         }
 671       else
 672         {
 673           buffer->cur += char_len;
 674           c = wc;
 675         }
 676 #else
 677       c = *buffer->cur++;
 678 #endif
 679
 680       /* Handle trigraphs, escaped newlines etc.  */
 681       if (c == '?' || c == '\\')
 682         c = skip_escaped_newlines (pfile);
 683
 684       if (c == terminator)
 685         {
 686           if (unescaped_terminator_p (pfile, dest))
 687             break;
 688         }
 689       else if (is_vspace (c))
 690         {
 691           /* No string literal may extend over multiple lines.  In
 692              assembly language, suppress the error except for <>
 693              includes.  This is a kludge around not knowing where
 694              comments are.  */
 695         unterminated:
 696           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 697             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 698                        terminator);
 699           buffer->cur--;
 700           break;
 701         }
 702       else if (c == '\0')
 703         {
 704           if (buffer->cur - 1 == buffer->rlimit)
 705             goto unterminated;
 706           if (!warned_nulls)
 707             {
 708               warned_nulls = true;
 709               cpp_error (pfile, DL_WARNING,
 710                          "null character(s) preserved in literal");
 711             }
 712         }
 713 #ifdef MULTIBYTE_CHARS
 714       if (char_len > 1)
 715         {
 716           for ( ; char_len > 0; --char_len)
 717             *dest++ = (*buffer->cur - char_len);
 718         }
 719       else
 720 #endif
 721         *dest++ = c;
 722     }
 723
 724   *dest = '\0';
 725
 726   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 727   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 728   BUFF_FRONT (pfile->u_buff) = dest + 1;
 729 }
 730
 731 /* The stored comment includes the comment start and any terminator.  */
 732 static void
 733 save_comment (pfile, token, from, type)
 734      cpp_reader *pfile;
 735      cpp_token *token;
 736      const unsigned char *from;
 737      cppchar_t type;
 738 {
 739   unsigned char *buffer;
 740   unsigned int len, clen;
 741
 742   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 743
 744   /* C++ comments probably (not definitely) have moved past a new
 745      line, which we don't want to save in the comment.  */
 746   if (is_vspace (pfile->buffer->cur[-1]))
 747     len--;
 748
 749   /* If we are currently in a directive, then we need to store all
 750      C++ comments as C comments internally, and so we need to
 751      allocate a little extra space in that case.
 752
 753      Note that the only time we encounter a directive here is
 754      when we are saving comments in a "#define".  */
 755   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 756
 757   buffer = _cpp_unaligned_alloc (pfile, clen);
 758
 759   token->type = CPP_COMMENT;
 760   token->val.str.len = clen;
 761   token->val.str.text = buffer;
 762
 763   buffer[0] = '/';
 764   memcpy (buffer + 1, from, len - 1);
 765
 766   /* Finish conversion to a C comment, if necessary.  */
 767   if (pfile->state.in_directive && type == '/')
 768     {
 769       buffer[1] = '*';
 770       buffer[clen - 2] = '*';
 771       buffer[clen - 1] = '/';
 772     }
 773 }
 774
 775 /* Allocate COUNT tokens for RUN.  */
 776 void
 777 _cpp_init_tokenrun (run, count)
 778      tokenrun *run;
 779      unsigned int count;
 780 {
 781   run->base = xnewvec (cpp_token, count);
 782   run->limit = run->base + count;
 783   run->next = NULL;
 784 }
 785
 786 /* Returns the next tokenrun, or creates one if there is none.  */
 787 static tokenrun *
 788 next_tokenrun (run)
 789      tokenrun *run;
 790 {
 791   if (run->next == NULL)
 792     {
 793       run->next = xnew (tokenrun);
 794       run->next->prev = run;
 795       _cpp_init_tokenrun (run->next, 250);
 796     }
 797
 798   return run->next;
 799 }
 800
 801 /* Allocate a single token that is invalidated at the same time as the
 802    rest of the tokens on the line.  Has its line and col set to the
 803    same as the last lexed token, so that diagnostics appear in the
 804    right place.  */
 805 cpp_token *
 806 _cpp_temp_token (pfile)
 807      cpp_reader *pfile;
 808 {
 809   cpp_token *old, *result;
 810
 811   old = pfile->cur_token - 1;
 812   if (pfile->cur_token == pfile->cur_run->limit)
 813     {
 814       pfile->cur_run = next_tokenrun (pfile->cur_run);
 815       pfile->cur_token = pfile->cur_run->base;
 816     }
 817
 818   result = pfile->cur_token++;
 819   result->line = old->line;
 820   result->col = old->col;
 821   return result;
 822 }
 823
 824 /* Lex a token into RESULT (external interface).  Takes care of issues
 825    like directive handling, token lookahead, multiple include
 826    optimization and skipping.  */
 827 const cpp_token *
 828 _cpp_lex_token (pfile)
 829      cpp_reader *pfile;
 830 {
 831   cpp_token *result;
 832
 833   for (;;)
 834     {
 835       if (pfile->cur_token == pfile->cur_run->limit)
 836         {
 837           pfile->cur_run = next_tokenrun (pfile->cur_run);
 838           pfile->cur_token = pfile->cur_run->base;
 839         }
 840
 841       if (pfile->lookaheads)
 842         {
 843           pfile->lookaheads--;
 844           result = pfile->cur_token++;
 845         }
 846       else
 847         result = _cpp_lex_direct (pfile);
 848
 849       if (result->flags & BOL)
 850         {
 851           /* Is this a directive.  If _cpp_handle_directive returns
 852              false, it is an assembler #.  */
 853           if (result->type == CPP_HASH
 854               /* 6.10.3 p 11: Directives in a list of macro arguments
 855                  gives undefined behavior.  This implementation
 856                  handles the directive as normal.  */
 857               && pfile->state.parsing_args != 1
 858               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 859             continue;
 860           if (pfile->cb.line_change && !pfile->state.skipping)
 861             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 862         }
 863
 864       /* We don't skip tokens in directives.  */
 865       if (pfile->state.in_directive)
 866         break;
 867
 868       /* Outside a directive, invalidate controlling macros.  At file
 869          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 870          get here and MI optimisation works.  */
 871       pfile->mi_valid = false;
 872
 873       if (!pfile->state.skipping || result->type == CPP_EOF)
 874         break;
 875     }
 876
 877   return result;
 878 }
 879
 880 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 881   do {                                          \
 882     if (get_effective_char (pfile) == CHAR)     \
 883       result->type = THEN_TYPE;                 \
 884     else                                        \
 885       {                                         \
 886         BACKUP ();                              \
 887         result->type = ELSE_TYPE;               \
 888       }                                         \
 889   } while (0)
 890
 891 /* Lex a token into pfile->cur_token, which is also incremented, to
 892    get diagnostics pointing to the correct location.
 893
 894    Does not handle issues such as token lookahead, multiple-include
 895    optimisation, directives, skipping etc.  This function is only
 896    suitable for use by _cpp_lex_token, and in special cases like
 897    lex_expansion_token which doesn't care for any of these issues.
 898
 899    When meeting a newline, returns CPP_EOF if parsing a directive,
 900    otherwise returns to the start of the token buffer if permissible.
 901    Returns the location of the lexed token.  */
 902 cpp_token *
 903 _cpp_lex_direct (pfile)
 904      cpp_reader *pfile;
 905 {
 906   cppchar_t c;
 907   cpp_buffer *buffer;
 908   const unsigned char *comment_start;
 909   cpp_token *result = pfile->cur_token++;
 910
 911  fresh_line:
 912   buffer = pfile->buffer;
 913   result->flags = buffer->saved_flags;
 914   buffer->saved_flags = 0;
 915  update_tokens_line:
 916   result->line = pfile->line;
 917
 918  skipped_white:
 919   c = *buffer->cur++;
 920   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 921
 922  trigraph:
 923   switch (c)
 924     {
 925     case ' ': case '\t': case '\f': case '\v': case '\0':
 926       result->flags |= PREV_WHITE;
 927       if (skip_whitespace (pfile, c))
 928         goto skipped_white;
 929
 930       /* EOF.  */
 931       buffer->cur--;
 932       buffer->saved_flags = BOL;
 933       if (!pfile->state.parsing_args && !pfile->state.in_directive)
 934         {
 935           if (buffer->cur != buffer->line_base)
 936             {
 937               /* Non-empty files should end in a newline.  Don't warn
 938                  for command line and _Pragma buffers.  */
 939               if (!buffer->from_stage3)
 940                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 941               handle_newline (pfile);
 942             }
 943
 944           /* Don't pop the last buffer.  */
 945           if (buffer->prev)
 946             {
 947               unsigned char stop = buffer->return_at_eof;
 948
 949               _cpp_pop_buffer (pfile);
 950               if (!stop)
 951                 goto fresh_line;
 952             }
 953         }
 954       result->type = CPP_EOF;
 955       break;
 956
 957     case '\n': case '\r':
 958       handle_newline (pfile);
 959       buffer->saved_flags = BOL;
 960       if (! pfile->state.in_directive)
 961         {
 962           if (pfile->state.parsing_args == 2)
 963             buffer->saved_flags |= PREV_WHITE;
 964           if (!pfile->keep_tokens)
 965             {
 966               pfile->cur_run = &pfile->base_run;
 967               result = pfile->base_run.base;
 968               pfile->cur_token = result + 1;
 969             }
 970           goto fresh_line;
 971         }
 972       result->type = CPP_EOF;
 973       break;
 974
 975     case '?':
 976     case '\\':
 977       /* These could start an escaped newline, or '?' a trigraph.  Let
 978          skip_escaped_newlines do all the work.  */
 979       {
 980         unsigned int line = pfile->line;
 981
 982         c = skip_escaped_newlines (pfile);
 983         if (line != pfile->line)
 984           {
 985             buffer->cur--;
 986             /* We had at least one escaped newline of some sort.
 987                Update the token's line and column.  */
 988             goto update_tokens_line;
 989           }
 990       }
 991
 992       /* We are either the original '?' or '\\', or a trigraph.  */
 993       if (c == '?')
 994         result->type = CPP_QUERY;
 995       else if (c == '\\')
 996         goto random_char;
 997       else
 998         goto trigraph;
 999       break;
1000
1001     case '0': case '1': case '2': case '3': case '4':
1002     case '5': case '6': case '7': case '8': case '9':
1003       result->type = CPP_NUMBER;
1004       parse_number (pfile, &result->val.str, 0);
1005       break;
1006
1007     case 'L':
1008       /* 'L' may introduce wide characters or strings.  */
1009         {
1010           const unsigned char *pos = buffer->cur;
1011
1012           c = get_effective_char (pfile);
1013           if (c == '\'' || c == '"')
1014             {
1015               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1016               parse_string (pfile, result, c);
1017               break;
1018             }
1019           buffer->cur = pos;
1020         }
1021         /* Fall through.  */
1022
1023     start_ident:
1024     case '_':
1025     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1026     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1027     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1028     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1029     case 'y': case 'z':
1030     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1031     case 'G': case 'H': case 'I': case 'J': case 'K':
1032     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1033     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1034     case 'Y': case 'Z':
1035       result->type = CPP_NAME;
1036       result->val.node = parse_identifier (pfile);
1037
1038       /* Convert named operators to their proper types.  */
1039       if (result->val.node->flags & NODE_OPERATOR)
1040         {
1041           result->flags |= NAMED_OP;
1042           result->type = result->val.node->value.operator;
1043         }
1044       break;
1045
1046     case '\'':
1047     case '"':
1048       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1049       parse_string (pfile, result, c);
1050       break;
1051
1052     case '/':
1053       /* A potential block or line comment.  */
1054       comment_start = buffer->cur;
1055       c = get_effective_char (pfile);
1056
1057       if (c == '*')
1058         {
1059           if (skip_block_comment (pfile))
1060             cpp_error (pfile, DL_ERROR, "unterminated comment");
1061         }
1062       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1063                             || CPP_IN_SYSTEM_HEADER (pfile)))
1064         {
1065           /* Warn about comments only if pedantically GNUC89, and not
1066              in system headers.  */
1067           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1068               && ! buffer->warned_cplusplus_comments)
1069             {
1070               cpp_error (pfile, DL_PEDWARN,
1071                            "C++ style comments are not allowed in ISO C89");
1072               cpp_error (pfile, DL_PEDWARN,
1073                          "(this will be reported only once per input file)");
1074               buffer->warned_cplusplus_comments = 1;
1075             }
1076
1077           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1078             cpp_error (pfile, DL_WARNING, "multi-line comment");
1079         }
1080       else if (c == '=')
1081         {
1082           result->type = CPP_DIV_EQ;
1083           break;
1084         }
1085       else
1086         {
1087           BACKUP ();
1088           result->type = CPP_DIV;
1089           break;
1090         }
1091
1092       if (!pfile->state.save_comments)
1093         {
1094           result->flags |= PREV_WHITE;
1095           goto update_tokens_line;
1096         }
1097
1098       /* Save the comment as a token in its own right.  */
1099       save_comment (pfile, result, comment_start, c);
1100       break;
1101
1102     case '<':
1103       if (pfile->state.angled_headers)
1104         {
1105           result->type = CPP_HEADER_NAME;
1106           parse_string (pfile, result, '>');
1107           break;
1108         }
1109
1110       c = get_effective_char (pfile);
1111       if (c == '=')
1112         result->type = CPP_LESS_EQ;
1113       else if (c == '<')
1114         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1115       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1116         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1117       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1118         {
1119           result->type = CPP_OPEN_SQUARE;
1120           result->flags |= DIGRAPH;
1121         }
1122       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1123         {
1124           result->type = CPP_OPEN_BRACE;
1125           result->flags |= DIGRAPH;
1126         }
1127       else
1128         {
1129           BACKUP ();
1130           result->type = CPP_LESS;
1131         }
1132       break;
1133
1134     case '>':
1135       c = get_effective_char (pfile);
1136       if (c == '=')
1137         result->type = CPP_GREATER_EQ;
1138       else if (c == '>')
1139         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1140       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1141         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1142       else
1143         {
1144           BACKUP ();
1145           result->type = CPP_GREATER;
1146         }
1147       break;
1148
1149     case '%':
1150       c = get_effective_char (pfile);
1151       if (c == '=')
1152         result->type = CPP_MOD_EQ;
1153       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1154         {
1155           result->flags |= DIGRAPH;
1156           result->type = CPP_HASH;
1157           if (get_effective_char (pfile) == '%')
1158             {
1159               const unsigned char *pos = buffer->cur;
1160
1161               if (get_effective_char (pfile) == ':')
1162                 result->type = CPP_PASTE;
1163               else
1164                 buffer->cur = pos - 1;
1165             }
1166           else
1167             BACKUP ();
1168         }
1169       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1170         {
1171           result->flags |= DIGRAPH;
1172           result->type = CPP_CLOSE_BRACE;
1173         }
1174       else
1175         {
1176           BACKUP ();
1177           result->type = CPP_MOD;
1178         }
1179       break;
1180
1181     case '.':
1182       result->type = CPP_DOT;
1183       c = get_effective_char (pfile);
1184       if (c == '.')
1185         {
1186           const unsigned char *pos = buffer->cur;
1187
1188           if (get_effective_char (pfile) == '.')
1189             result->type = CPP_ELLIPSIS;
1190           else
1191             buffer->cur = pos - 1;
1192         }
1193       /* All known character sets have 0...9 contiguous.  */
1194       else if (ISDIGIT (c))
1195         {
1196           result->type = CPP_NUMBER;
1197           parse_number (pfile, &result->val.str, 1);
1198         }
1199       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1200         result->type = CPP_DOT_STAR;
1201       else
1202         BACKUP ();
1203       break;
1204
1205     case '+':
1206       c = get_effective_char (pfile);
1207       if (c == '+')
1208         result->type = CPP_PLUS_PLUS;
1209       else if (c == '=')
1210         result->type = CPP_PLUS_EQ;
1211       else
1212         {
1213           BACKUP ();
1214           result->type = CPP_PLUS;
1215         }
1216       break;
1217
1218     case '-':
1219       c = get_effective_char (pfile);
1220       if (c == '>')
1221         {
1222           result->type = CPP_DEREF;
1223           if (CPP_OPTION (pfile, cplusplus))
1224             {
1225               if (get_effective_char (pfile) == '*')
1226                 result->type = CPP_DEREF_STAR;
1227               else
1228                 BACKUP ();
1229             }
1230         }
1231       else if (c == '-')
1232         result->type = CPP_MINUS_MINUS;
1233       else if (c == '=')
1234         result->type = CPP_MINUS_EQ;
1235       else
1236         {
1237           BACKUP ();
1238           result->type = CPP_MINUS;
1239         }
1240       break;
1241
1242     case '&':
1243       c = get_effective_char (pfile);
1244       if (c == '&')
1245         result->type = CPP_AND_AND;
1246       else if (c == '=')
1247         result->type = CPP_AND_EQ;
1248       else
1249         {
1250           BACKUP ();
1251           result->type = CPP_AND;
1252         }
1253       break;
1254
1255     case '|':
1256       c = get_effective_char (pfile);
1257       if (c == '|')
1258         result->type = CPP_OR_OR;
1259       else if (c == '=')
1260         result->type = CPP_OR_EQ;
1261       else
1262         {
1263           BACKUP ();
1264           result->type = CPP_OR;
1265         }
1266       break;
1267
1268     case ':':
1269       c = get_effective_char (pfile);
1270       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1271         result->type = CPP_SCOPE;
1272       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1273         {
1274           result->flags |= DIGRAPH;
1275           result->type = CPP_CLOSE_SQUARE;
1276         }
1277       else
1278         {
1279           BACKUP ();
1280           result->type = CPP_COLON;
1281         }
1282       break;
1283
1284     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1285     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1286     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1287     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1288     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1289
1290     case '~': result->type = CPP_COMPL; break;
1291     case ',': result->type = CPP_COMMA; break;
1292     case '(': result->type = CPP_OPEN_PAREN; break;
1293     case ')': result->type = CPP_CLOSE_PAREN; break;
1294     case '[': result->type = CPP_OPEN_SQUARE; break;
1295     case ']': result->type = CPP_CLOSE_SQUARE; break;
1296     case '{': result->type = CPP_OPEN_BRACE; break;
1297     case '}': result->type = CPP_CLOSE_BRACE; break;
1298     case ';': result->type = CPP_SEMICOLON; break;
1299
1300       /* @ is a punctuator in Objective C.  */
1301     case '@': result->type = CPP_ATSIGN; break;
1302
1303     case '$':
1304       if (CPP_OPTION (pfile, dollars_in_ident))
1305         goto start_ident;
1306       /* Fall through...  */
1307
1308     random_char:
1309     default:
1310       result->type = CPP_OTHER;
1311       result->val.c = c;
1312       break;
1313     }
1314
1315   return result;
1316 }
1317
1318 /* An upper bound on the number of bytes needed to spell TOKEN,
1319    including preceding whitespace.  */
1320 unsigned int
1321 cpp_token_len (token)
1322      const cpp_token *token;
1323 {
1324   unsigned int len;
1325
1326   switch (TOKEN_SPELL (token))
1327     {
1328     default:            len = 0;                                break;
1329     case SPELL_NUMBER:
1330     case SPELL_STRING:  len = token->val.str.len;               break;
1331     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1332     }
1333   /* 1 for whitespace, 4 for comment delimiters.  */
1334   return len + 5;
1335 }
1336
1337 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1338    already contain the enough space to hold the token's spelling.
1339    Returns a pointer to the character after the last character
1340    written.  */
1341 unsigned char *
1342 cpp_spell_token (pfile, token, buffer)
1343      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1344      const cpp_token *token;
1345      unsigned char *buffer;
1346 {
1347   switch (TOKEN_SPELL (token))
1348     {
1349     case SPELL_OPERATOR:
1350       {
1351         const unsigned char *spelling;
1352         unsigned char c;
1353
1354         if (token->flags & DIGRAPH)
1355           spelling
1356             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1357         else if (token->flags & NAMED_OP)
1358           goto spell_ident;
1359         else
1360           spelling = TOKEN_NAME (token);
1361
1362         while ((c = *spelling++) != '\0')
1363           *buffer++ = c;
1364       }
1365       break;
1366
1367     case SPELL_CHAR:
1368       *buffer++ = token->val.c;
1369       break;
1370
1371     spell_ident:
1372     case SPELL_IDENT:
1373       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1374       buffer += NODE_LEN (token->val.node);
1375       break;
1376
1377     case SPELL_NUMBER:
1378       memcpy (buffer, token->val.str.text, token->val.str.len);
1379       buffer += token->val.str.len;
1380       break;
1381
1382     case SPELL_STRING:
1383       {
1384         int left, right, tag;
1385         switch (token->type)
1386           {
1387           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1388           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1389           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1390           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1391           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1392           default:
1393             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1394                        TOKEN_NAME (token));
1395             return buffer;
1396           }
1397         if (tag) *buffer++ = tag;
1398         *buffer++ = left;
1399         memcpy (buffer, token->val.str.text, token->val.str.len);
1400         buffer += token->val.str.len;
1401         *buffer++ = right;
1402       }
1403       break;
1404
1405     case SPELL_NONE:
1406       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1407       break;
1408     }
1409
1410   return buffer;
1411 }
1412
1413 /* Returns TOKEN spelt as a null-terminated string.  The string is
1414    freed when the reader is destroyed.  Useful for diagnostics.  */
1415 unsigned char *
1416 cpp_token_as_text (pfile, token)
1417      cpp_reader *pfile;
1418      const cpp_token *token;
1419 {
1420   unsigned int len = cpp_token_len (token);
1421   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1422
1423   end = cpp_spell_token (pfile, token, start);
1424   end[0] = '\0';
1425
1426   return start;
1427 }
1428
1429 /* Used by C front ends, which really should move to using
1430    cpp_token_as_text.  */
1431 const char *
1432 cpp_type2name (type)
1433      enum cpp_ttype type;
1434 {
1435   return (const char *) token_spellings[type].name;
1436 }
1437
1438 /* Writes the spelling of token to FP, without any preceding space.
1439    Separated from cpp_spell_token for efficiency - to avoid stdio
1440    double-buffering.  */
1441 void
1442 cpp_output_token (token, fp)
1443      const cpp_token *token;
1444      FILE *fp;
1445 {
1446   switch (TOKEN_SPELL (token))
1447     {
1448     case SPELL_OPERATOR:
1449       {
1450         const unsigned char *spelling;
1451         int c;
1452
1453         if (token->flags & DIGRAPH)
1454           spelling
1455             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1456         else if (token->flags & NAMED_OP)
1457           goto spell_ident;
1458         else
1459           spelling = TOKEN_NAME (token);
1460
1461         c = *spelling;
1462         do
1463           putc (c, fp);
1464         while ((c = *++spelling) != '\0');
1465       }
1466       break;
1467
1468     case SPELL_CHAR:
1469       putc (token->val.c, fp);
1470       break;
1471
1472     spell_ident:
1473     case SPELL_IDENT:
1474       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1475     break;
1476
1477     case SPELL_NUMBER:
1478       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1479       break;
1480
1481     case SPELL_STRING:
1482       {
1483         int left, right, tag;
1484         switch (token->type)
1485           {
1486           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1487           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1488           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1489           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1490           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1491           default:
1492             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1493             return;
1494           }
1495         if (tag) putc (tag, fp);
1496         putc (left, fp);
1497         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1498         putc (right, fp);
1499       }
1500       break;
1501
1502     case SPELL_NONE:
1503       /* An error, most probably.  */
1504       break;
1505     }
1506 }
1507
1508 /* Compare two tokens.  */
1509 int
1510 _cpp_equiv_tokens (a, b)
1511      const cpp_token *a, *b;
1512 {
1513   if (a->type == b->type && a->flags == b->flags)
1514     switch (TOKEN_SPELL (a))
1515       {
1516       default:                  /* Keep compiler happy.  */
1517       case SPELL_OPERATOR:
1518         return 1;
1519       case SPELL_CHAR:
1520         return a->val.c == b->val.c; /* Character.  */
1521       case SPELL_NONE:
1522         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1523       case SPELL_IDENT:
1524         return a->val.node == b->val.node;
1525       case SPELL_NUMBER:
1526       case SPELL_STRING:
1527         return (a->val.str.len == b->val.str.len
1528                 && !memcmp (a->val.str.text, b->val.str.text,
1529                             a->val.str.len));
1530       }
1531
1532   return 0;
1533 }
1534
1535 /* Returns nonzero if a space should be inserted to avoid an
1536    accidental token paste for output.  For simplicity, it is
1537    conservative, and occasionally advises a space where one is not
1538    needed, e.g. "." and ".2".  */
1539 int
1540 cpp_avoid_paste (pfile, token1, token2)
1541      cpp_reader *pfile;
1542      const cpp_token *token1, *token2;
1543 {
1544   enum cpp_ttype a = token1->type, b = token2->type;
1545   cppchar_t c;
1546
1547   if (token1->flags & NAMED_OP)
1548     a = CPP_NAME;
1549   if (token2->flags & NAMED_OP)
1550     b = CPP_NAME;
1551
1552   c = EOF;
1553   if (token2->flags & DIGRAPH)
1554     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1555   else if (token_spellings[b].category == SPELL_OPERATOR)
1556     c = token_spellings[b].name[0];
1557
1558   /* Quickly get everything that can paste with an '='.  */
1559   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1560     return 1;
1561
1562   switch (a)
1563     {
1564     case CPP_GREATER:   return c == '>' || c == '?';
1565     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1566     case CPP_PLUS:      return c == '+';
1567     case CPP_MINUS:     return c == '-' || c == '>';
1568     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1569     case CPP_MOD:       return c == ':' || c == '>';
1570     case CPP_AND:       return c == '&';
1571     case CPP_OR:        return c == '|';
1572     case CPP_COLON:     return c == ':' || c == '>';
1573     case CPP_DEREF:     return c == '*';
1574     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1575     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1576     case CPP_NAME:      return ((b == CPP_NUMBER
1577                                  && name_p (pfile, &token2->val.str))
1578                                 || b == CPP_NAME
1579                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1580     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1581                                 || c == '.' || c == '+' || c == '-');
1582     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1583                                 && token1->val.c == '@'
1584                                 && (b == CPP_NAME || b == CPP_STRING));
1585     default:            break;
1586     }
1587
1588   return 0;
1589 }
1590
1591 /* Output all the remaining tokens on the current line, and a newline
1592    character, to FP.  Leading whitespace is removed.  If there are
1593    macros, special token padding is not performed.  */
1594 void
1595 cpp_output_line (pfile, fp)
1596      cpp_reader *pfile;
1597      FILE *fp;
1598 {
1599   const cpp_token *token;
1600
1601   token = cpp_get_token (pfile);
1602   while (token->type != CPP_EOF)
1603     {
1604       cpp_output_token (token, fp);
1605       token = cpp_get_token (pfile);
1606       if (token->flags & PREV_WHITE)
1607         putc (' ', fp);
1608     }
1609
1610   putc ('\n', fp);
1611 }
1612
1613 /* Returns the value of a hexadecimal digit.  */
1614 static unsigned int
1615 hex_digit_value (c)
1616      unsigned int c;
1617 {
1618   if (hex_p (c))
1619     return hex_value (c);
1620   else
1621     abort ();
1622 }
1623
1624 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1625    failure if cpplib is not parsing C++ or C99.  Such failure is
1626    silent, and no variables are updated.  Otherwise returns 0, and
1627    warns if -Wtraditional.
1628
1629    [lex.charset]: The character designated by the universal character
1630    name \UNNNNNNNN is that character whose character short name in
1631    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1632    universal character name \uNNNN is that character whose character
1633    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1634    for a universal character name is less than 0x20 or in the range
1635    0x7F-0x9F (inclusive), or if the universal character name
1636    designates a character in the basic source character set, then the
1637    program is ill-formed.
1638
1639    We assume that wchar_t is Unicode, so we don't need to do any
1640    mapping.  Is this ever wrong?
1641
1642    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1643    LIMIT is the end of the string or charconst.  PSTR is updated to
1644    point after the UCS on return, and the UCS is written into PC.  */
1645
1646 static int
1647 maybe_read_ucs (pfile, pstr, limit, pc)
1648      cpp_reader *pfile;
1649      const unsigned char **pstr;
1650      const unsigned char *limit;
1651      unsigned int *pc;
1652 {
1653   const unsigned char *p = *pstr;
1654   unsigned int code = 0;
1655   unsigned int c = *pc, length;
1656
1657   /* Only attempt to interpret a UCS for C++ and C99.  */
1658   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1659     return 1;
1660
1661   if (CPP_WTRADITIONAL (pfile))
1662     cpp_error (pfile, DL_WARNING,
1663                "the meaning of '\\%c' is different in traditional C", c);
1664
1665   length = (c == 'u' ? 4: 8);
1666
1667   if ((size_t) (limit - p) < length)
1668     {
1669       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1670       /* Skip to the end to avoid more diagnostics.  */
1671       p = limit;
1672     }
1673   else
1674     {
1675       for (; length; length--, p++)
1676         {
1677           c = *p;
1678           if (ISXDIGIT (c))
1679             code = (code << 4) + hex_digit_value (c);
1680           else
1681             {
1682               cpp_error (pfile, DL_ERROR,
1683                          "non-hex digit '%c' in universal-character-name", c);
1684               /* We shouldn't skip in case there are multibyte chars.  */
1685               break;
1686             }
1687         }
1688     }
1689
1690 #ifdef TARGET_EBCDIC
1691   cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1692   code = 0x3f;  /* EBCDIC invalid character */
1693 #else
1694  /* True extended characters are OK.  */
1695   if (code >= 0xa0
1696       && !(code & 0x80000000)
1697       && !(code >= 0xD800 && code <= 0xDFFF))
1698     ;
1699   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1700      hex escapes so that this also works with EBCDIC hosts.  */
1701   else if (code == 0x24 || code == 0x40 || code == 0x60)
1702     ;
1703   /* Don't give another error if one occurred above.  */
1704   else if (length == 0)
1705     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1706 #endif
1707
1708   *pstr = p;
1709   *pc = code;
1710   return 0;
1711 }
1712
1713 /* Interpret an escape sequence, and return its value.  PSTR points to
1714    the input pointer, which is just after the backslash.  LIMIT is how
1715    much text we have.  MASK is a bitmask for the precision for the
1716    destination type (char or wchar_t).
1717
1718    Handles all relevant diagnostics.  */
1719 unsigned int
1720 cpp_parse_escape (pfile, pstr, limit, mask)
1721      cpp_reader *pfile;
1722      const unsigned char **pstr;
1723      const unsigned char *limit;
1724      unsigned HOST_WIDE_INT mask;
1725 {
1726   int unknown = 0;
1727   const unsigned char *str = *pstr;
1728   unsigned int c = *str++;
1729
1730   switch (c)
1731     {
1732     case '\\': case '\'': case '"': case '?': break;
1733     case 'b': c = TARGET_BS;      break;
1734     case 'f': c = TARGET_FF;      break;
1735     case 'n': c = TARGET_NEWLINE; break;
1736     case 'r': c = TARGET_CR;      break;
1737     case 't': c = TARGET_TAB;     break;
1738     case 'v': c = TARGET_VT;      break;
1739
1740     case '(': case '{': case '[': case '%':
1741       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1742          '\%' is used to prevent SCCS from getting confused.  */
1743       unknown = CPP_PEDANTIC (pfile);
1744       break;
1745
1746     case 'a':
1747       if (CPP_WTRADITIONAL (pfile))
1748         cpp_error (pfile, DL_WARNING,
1749                    "the meaning of '\\a' is different in traditional C");
1750       c = TARGET_BELL;
1751       break;
1752
1753     case 'e': case 'E':
1754       if (CPP_PEDANTIC (pfile))
1755         cpp_error (pfile, DL_PEDWARN,
1756                    "non-ISO-standard escape sequence, '\\%c'", c);
1757       c = TARGET_ESC;
1758       break;
1759
1760     case 'u': case 'U':
1761       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1762       break;
1763
1764     case 'x':
1765       if (CPP_WTRADITIONAL (pfile))
1766         cpp_error (pfile, DL_WARNING,
1767                    "the meaning of '\\x' is different in traditional C");
1768
1769         {
1770           unsigned int i = 0, overflow = 0;
1771           int digits_found = 0;
1772
1773           while (str < limit)
1774             {
1775               c = *str;
1776               if (! ISXDIGIT (c))
1777                 break;
1778               str++;
1779               overflow |= i ^ (i << 4 >> 4);
1780               i = (i << 4) + hex_digit_value (c);
1781               digits_found = 1;
1782             }
1783
1784           if (!digits_found)
1785             cpp_error (pfile, DL_ERROR,
1786                        "\\x used with no following hex digits");
1787
1788           if (overflow | (i != (i & mask)))
1789             {
1790               cpp_error (pfile, DL_PEDWARN,
1791                          "hex escape sequence out of range");
1792               i &= mask;
1793             }
1794           c = i;
1795         }
1796       break;
1797
1798     case '0':  case '1':  case '2':  case '3':
1799     case '4':  case '5':  case '6':  case '7':
1800       {
1801         unsigned int i = c - '0';
1802         int count = 0;
1803
1804         while (str < limit && ++count < 3)
1805           {
1806             c = *str;
1807             if (c < '0' || c > '7')
1808               break;
1809             str++;
1810             i = (i << 3) + c - '0';
1811           }
1812
1813         if (i != (i & mask))
1814           {
1815             cpp_error (pfile, DL_PEDWARN,
1816                        "octal escape sequence out of range");
1817             i &= mask;
1818           }
1819         c = i;
1820       }
1821       break;
1822
1823     default:
1824       unknown = 1;
1825       break;
1826     }
1827
1828   if (unknown)
1829     {
1830       if (ISGRAPH (c))
1831         cpp_error (pfile, DL_PEDWARN, "unknown escape sequence '\\%c'", c);
1832       else
1833         cpp_error (pfile, DL_PEDWARN, "unknown escape sequence: '\\%03o'", c);
1834     }
1835
1836   if (c > mask)
1837     cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
1838
1839   *pstr = str;
1840   return c;
1841 }
1842
1843 #ifndef MAX_CHAR_TYPE_SIZE
1844 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1845 #endif
1846
1847 #ifndef MAX_WCHAR_TYPE_SIZE
1848 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1849 #endif
1850
1851 /* Interpret a (possibly wide) character constant in TOKEN.
1852    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN points
1853    to a variable that is filled in with the number of characters seen.  */
1854 HOST_WIDE_INT
1855 cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
1856      cpp_reader *pfile;
1857      const cpp_token *token;
1858      int warn_multi;
1859      unsigned int *pchars_seen;
1860 {
1861   const unsigned char *str = token->val.str.text;
1862   const unsigned char *limit = str + token->val.str.len;
1863   unsigned int chars_seen = 0;
1864   unsigned int width, max_chars, c;
1865   unsigned HOST_WIDE_INT mask;
1866   HOST_WIDE_INT result = 0;
1867   bool unsigned_p;
1868
1869 #ifdef MULTIBYTE_CHARS
1870   (void) local_mbtowc (NULL, NULL, 0);
1871 #endif
1872
1873   /* Width in bits.  */
1874   if (token->type == CPP_CHAR)
1875     {
1876       width = MAX_CHAR_TYPE_SIZE;
1877       unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1878     }
1879   else
1880     {
1881       width = MAX_WCHAR_TYPE_SIZE;
1882       unsigned_p = WCHAR_UNSIGNED;
1883     }
1884
1885   if (width < HOST_BITS_PER_WIDE_INT)
1886     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1887   else
1888     mask = ~0;
1889   max_chars = HOST_BITS_PER_WIDE_INT / width;
1890
1891   while (str < limit)
1892     {
1893 #ifdef MULTIBYTE_CHARS
1894       wchar_t wc;
1895       int char_len;
1896
1897       char_len = local_mbtowc (&wc, str, limit - str);
1898       if (char_len == -1)
1899         {
1900           cpp_error (pfile, DL_WARNING,
1901                      "ignoring invalid multibyte character");
1902           c = *str++;
1903         }
1904       else
1905         {
1906           str += char_len;
1907           c = wc;
1908         }
1909 #else
1910       c = *str++;
1911 #endif
1912
1913       if (c == '\\')
1914         c = cpp_parse_escape (pfile, &str, limit, mask);
1915
1916 #ifdef MAP_CHARACTER
1917       if (ISPRINT (c))
1918         c = MAP_CHARACTER (c);
1919 #endif
1920
1921       /* Merge character into result; ignore excess chars.  */
1922       if (++chars_seen <= max_chars)
1923         {
1924           if (width < HOST_BITS_PER_WIDE_INT)
1925             result = (result << width) | (c & mask);
1926           else
1927             result = c;
1928         }
1929     }
1930
1931   if (chars_seen == 0)
1932     cpp_error (pfile, DL_ERROR, "empty character constant");
1933   else if (chars_seen > max_chars)
1934     {
1935       chars_seen = max_chars;
1936       cpp_error (pfile, DL_WARNING, "character constant too long");
1937     }
1938   else if (chars_seen > 1 && warn_multi)
1939     cpp_error (pfile, DL_WARNING, "multi-character character constant");
1940
1941   /* If relevant type is signed, sign-extend the constant.  */
1942   if (chars_seen)
1943     {
1944       unsigned int nbits = chars_seen * width;
1945
1946       mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
1947       if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
1948         result &= mask;
1949       else
1950         result |= ~mask;
1951     }
1952
1953   *pchars_seen = chars_seen;
1954   return result;
1955 }
1956
1957 /* Memory buffers.  Changing these three constants can have a dramatic
1958    effect on performance.  The values here are reasonable defaults,
1959    but might be tuned.  If you adjust them, be sure to test across a
1960    range of uses of cpplib, including heavy nested function-like macro
1961    expansion.  Also check the change in peak memory usage (NJAMD is a
1962    good tool for this).  */
1963 #define MIN_BUFF_SIZE 8000
1964 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1965 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1966         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1967
1968 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1969   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1970 #endif
1971
1972 struct dummy
1973 {
1974   char c;
1975   union
1976   {
1977     double d;
1978     int *p;
1979   } u;
1980 };
1981
1982 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1983 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1984
1985 /* Create a new allocation buffer.  Place the control block at the end
1986    of the buffer, so that buffer overflows will cause immediate chaos.  */
1987 static _cpp_buff *
1988 new_buff (len)
1989      size_t len;
1990 {
1991   _cpp_buff *result;
1992   unsigned char *base;
1993
1994   if (len < MIN_BUFF_SIZE)
1995     len = MIN_BUFF_SIZE;
1996   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1997
1998   base = xmalloc (len + sizeof (_cpp_buff));
1999   result = (_cpp_buff *) (base + len);
2000   result->base = base;
2001   result->cur = base;
2002   result->limit = base + len;
2003   result->next = NULL;
2004   return result;
2005 }
2006
2007 /* Place a chain of unwanted allocation buffers on the free list.  */
2008 void
2009 _cpp_release_buff (pfile, buff)
2010      cpp_reader *pfile;
2011      _cpp_buff *buff;
2012 {
2013   _cpp_buff *end = buff;
2014
2015   while (end->next)
2016     end = end->next;
2017   end->next = pfile->free_buffs;
2018   pfile->free_buffs = buff;
2019 }
2020
2021 /* Return a free buffer of size at least MIN_SIZE.  */
2022 _cpp_buff *
2023 _cpp_get_buff (pfile, min_size)
2024      cpp_reader *pfile;
2025      size_t min_size;
2026 {
2027   _cpp_buff *result, **p;
2028
2029   for (p = &pfile->free_buffs;; p = &(*p)->next)
2030     {
2031       size_t size;
2032
2033       if (*p == NULL)
2034         return new_buff (min_size);
2035       result = *p;
2036       size = result->limit - result->base;
2037       /* Return a buffer that's big enough, but don't waste one that's
2038          way too big.  */
2039       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2040         break;
2041     }
2042
2043   *p = result->next;
2044   result->next = NULL;
2045   result->cur = result->base;
2046   return result;
2047 }
2048
2049 /* Creates a new buffer with enough space to hold the uncommitted
2050    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2051    the excess bytes to the new buffer.  Chains the new buffer after
2052    BUFF, and returns the new buffer.  */
2053 _cpp_buff *
2054 _cpp_append_extend_buff (pfile, buff, min_extra)
2055      cpp_reader *pfile;
2056      _cpp_buff *buff;
2057      size_t min_extra;
2058 {
2059   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2060   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2061
2062   buff->next = new_buff;
2063   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2064   return new_buff;
2065 }
2066
2067 /* Creates a new buffer with enough space to hold the uncommitted
2068    remaining bytes of the buffer pointed to by BUFF, and at least
2069    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2070    Chains the new buffer before the buffer pointed to by BUFF, and
2071    updates the pointer to point to the new buffer.  */
2072 void
2073 _cpp_extend_buff (pfile, pbuff, min_extra)
2074      cpp_reader *pfile;
2075      _cpp_buff **pbuff;
2076      size_t min_extra;
2077 {
2078   _cpp_buff *new_buff, *old_buff = *pbuff;
2079   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2080
2081   new_buff = _cpp_get_buff (pfile, size);
2082   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2083   new_buff->next = old_buff;
2084   *pbuff = new_buff;
2085 }
2086
2087 /* Free a chain of buffers starting at BUFF.  */
2088 void
2089 _cpp_free_buff (buff)
2090      _cpp_buff *buff;
2091 {
2092   _cpp_buff *next;
2093
2094   for (; buff; buff = next)
2095     {
2096       next = buff->next;
2097       free (buff->base);
2098     }
2099 }
2100
2101 /* Allocate permanent, unaligned storage of length LEN.  */
2102 unsigned char *
2103 _cpp_unaligned_alloc (pfile, len)
2104      cpp_reader *pfile;
2105      size_t len;
2106 {
2107   _cpp_buff *buff = pfile->u_buff;
2108   unsigned char *result = buff->cur;
2109
2110   if (len > (size_t) (buff->limit - result))
2111     {
2112       buff = _cpp_get_buff (pfile, len);
2113       buff->next = pfile->u_buff;
2114       pfile->u_buff = buff;
2115       result = buff->cur;
2116     }
2117
2118   buff->cur = result + len;
2119   return result;
2120 }
2121
2122 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2123    That buffer is used for growing allocations when saving macro
2124    replacement lists in a #define, and when parsing an answer to an
2125    assertion in #assert, #unassert or #if (and therefore possibly
2126    whilst expanding macros).  It therefore must not be used by any
2127    code that they might call: specifically the lexer and the guts of
2128    the macro expander.
2129
2130    All existing other uses clearly fit this restriction: storing
2131    registered pragmas during initialization.  */
2132 unsigned char *
2133 _cpp_aligned_alloc (pfile, len)
2134      cpp_reader *pfile;
2135      size_t len;
2136 {
2137   _cpp_buff *buff = pfile->a_buff;
2138   unsigned char *result = buff->cur;
2139
2140   if (len > (size_t) (buff->limit - result))
2141     {
2142       buff = _cpp_get_buff (pfile, len);
2143       buff->next = pfile->a_buff;
2144       pfile->a_buff = buff;
2145       result = buff->cur;
2146     }
2147
2148   buff->cur = result + len;
2149   return result;
2150 }