gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "tm.h"
  27 #include "cpplib.h"
  28 #include "cpphash.h"
  29
  30 #ifdef MULTIBYTE_CHARS
  31 #include "mbchar.h"
  32 #include <locale.h>
  33 #endif
  34
  35 /* Tokens with SPELL_STRING store their spelling in the token list,
  36    and it's length in the token->val.name.len.  */
  37 enum spell_type
  38 {
  39   SPELL_OPERATOR = 0,
  40   SPELL_CHAR,
  41   SPELL_IDENT,
  42   SPELL_NUMBER,
  43   SPELL_STRING,
  44   SPELL_NONE
  45 };
  46
  47 struct token_spelling
  48 {
  49   enum spell_type category;
  50   const unsigned char *name;
  51 };
  52
  53 static const unsigned char *const digraph_spellings[] =
  54 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  55
  56 #define OP(e, s) { SPELL_OPERATOR, U s           },
  57 #define TK(e, s) { s,              U STRINGX (e) },
  58 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  59 #undef OP
  60 #undef TK
  61
  62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  65
  66 static void handle_newline PARAMS ((cpp_reader *));
  67 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  68 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  69
  70 static int skip_block_comment PARAMS ((cpp_reader *));
  71 static int skip_line_comment PARAMS ((cpp_reader *));
  72 static void adjust_column PARAMS ((cpp_reader *));
  73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  76                                   unsigned int *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  80 static bool trigraph_p PARAMS ((cpp_reader *));
  81 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  82                                   cppchar_t));
  83 static bool continue_after_nul PARAMS ((cpp_reader *));
  84 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  85 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  86                                    const unsigned char *, cppchar_t *));
  87 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  88
  89 static unsigned int hex_digit_value PARAMS ((unsigned int));
  90 static _cpp_buff *new_buff PARAMS ((size_t));
  91
  92 /* Change to the native locale for multibyte conversions.  */
  93 void
  94 _cpp_init_mbchar ()
  95 {
  96 #ifdef MULTIBYTE_CHARS
  97   setlocale (LC_CTYPE, "");
  98   GET_ENVIRONMENT (literal_codeset, "LANG");
  99 #endif
 100 }
 101
 102 /* Utility routine:
 103
 104    Compares, the token TOKEN to the NUL-terminated string STRING.
 105    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 106 int
 107 cpp_ideq (token, string)
 108      const cpp_token *token;
 109      const char *string;
 110 {
 111   if (token->type != CPP_NAME)
 112     return 0;
 113
 114   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
 115 }
 116
 117 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 118    Returns with buffer->cur pointing to the character immediately
 119    following the newline (combination).  */
 120 static void
 121 handle_newline (pfile)
 122      cpp_reader *pfile;
 123 {
 124   cpp_buffer *buffer = pfile->buffer;
 125
 126   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 127      only accept CR-LF; maybe we should fall back to that behavior?  */
 128   if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 129     buffer->cur++;
 130
 131   buffer->line_base = buffer->cur;
 132   buffer->col_adjust = 0;
 133   pfile->line++;
 134 }
 135
 136 /* Subroutine of skip_escaped_newlines; called when a 3-character
 137    sequence beginning with "??" is encountered.  buffer->cur points to
 138    the second '?'.
 139
 140    Warn if necessary, and returns true if the sequence forms a
 141    trigraph and the trigraph should be honored.  */
 142 static bool
 143 trigraph_p (pfile)
 144      cpp_reader *pfile;
 145 {
 146   cpp_buffer *buffer = pfile->buffer;
 147   cppchar_t from_char = buffer->cur[1];
 148   bool accept;
 149
 150   if (!_cpp_trigraph_map[from_char])
 151     return false;
 152
 153   accept = CPP_OPTION (pfile, trigraphs);
 154
 155   /* Don't warn about trigraphs in comments.  */
 156   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 157     {
 158       if (accept)
 159         cpp_error_with_line (pfile, DL_WARNING,
 160                              pfile->line, CPP_BUF_COL (buffer) - 1,
 161                              "trigraph ??%c converted to %c",
 162                              (int) from_char,
 163                              (int) _cpp_trigraph_map[from_char]);
 164       else if (buffer->cur != buffer->last_Wtrigraphs)
 165         {
 166           buffer->last_Wtrigraphs = buffer->cur;
 167           cpp_error_with_line (pfile, DL_WARNING,
 168                                pfile->line, CPP_BUF_COL (buffer) - 1,
 169                                "trigraph ??%c ignored", (int) from_char);
 170         }
 171     }
 172
 173   return accept;
 174 }
 175
 176 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 177    lie in buffer->cur[-1].  Returns the next byte, which will be in
 178    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 179    2 of the ISO C standard.  */
 180 static cppchar_t
 181 skip_escaped_newlines (pfile)
 182      cpp_reader *pfile;
 183 {
 184   cpp_buffer *buffer = pfile->buffer;
 185   cppchar_t next = buffer->cur[-1];
 186
 187   /* Only do this if we apply stages 1 and 2.  */
 188   if (!buffer->from_stage3)
 189     {
 190       const unsigned char *saved_cur;
 191       cppchar_t next1;
 192
 193       do
 194         {
 195           if (next == '?')
 196             {
 197               if (buffer->cur[0] != '?' || !trigraph_p (pfile))
 198                 break;
 199
 200               /* Translate the trigraph.  */
 201               next = _cpp_trigraph_map[buffer->cur[1]];
 202               buffer->cur += 2;
 203               if (next != '\\')
 204                 break;
 205             }
 206
 207           if (buffer->cur == buffer->rlimit)
 208             break;
 209
 210           /* We have a backslash, and room for at least one more
 211              character.  Skip horizontal whitespace.  */
 212           saved_cur = buffer->cur;
 213           do
 214             next1 = *buffer->cur++;
 215           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 216
 217           if (!is_vspace (next1))
 218             {
 219               buffer->cur = saved_cur;
 220               break;
 221             }
 222
 223           if (saved_cur != buffer->cur - 1
 224               && !pfile->state.lexing_comment)
 225             cpp_error (pfile, DL_WARNING,
 226                        "backslash and newline separated by space");
 227
 228           handle_newline (pfile);
 229           buffer->backup_to = buffer->cur;
 230           if (buffer->cur == buffer->rlimit)
 231             {
 232               cpp_error (pfile, DL_PEDWARN,
 233                          "backslash-newline at end of file");
 234               next = EOF;
 235             }
 236           else
 237             next = *buffer->cur++;
 238         }
 239       while (next == '\\' || next == '?');
 240     }
 241
 242   return next;
 243 }
 244
 245 /* Obtain the next character, after trigraph conversion and skipping
 246    an arbitrarily long string of escaped newlines.  The common case of
 247    no trigraphs or escaped newlines falls through quickly.  On return,
 248    buffer->backup_to points to where to return to if the character is
 249    not to be processed.  */
 250 static cppchar_t
 251 get_effective_char (pfile)
 252      cpp_reader *pfile;
 253 {
 254   cppchar_t next;
 255   cpp_buffer *buffer = pfile->buffer;
 256
 257   buffer->backup_to = buffer->cur;
 258   next = *buffer->cur++;
 259   if (__builtin_expect (next == '?' || next == '\\', 0))
 260     next = skip_escaped_newlines (pfile);
 261
 262   return next;
 263 }
 264
 265 /* Skip a C-style block comment.  We find the end of the comment by
 266    seeing if an asterisk is before every '/' we encounter.  Returns
 267    nonzero if comment terminated by EOF, zero otherwise.  */
 268 static int
 269 skip_block_comment (pfile)
 270      cpp_reader *pfile;
 271 {
 272   cpp_buffer *buffer = pfile->buffer;
 273   cppchar_t c = EOF, prevc = EOF;
 274
 275   pfile->state.lexing_comment = 1;
 276   while (buffer->cur != buffer->rlimit)
 277     {
 278       prevc = c, c = *buffer->cur++;
 279
 280       /* FIXME: For speed, create a new character class of characters
 281          of interest inside block comments.  */
 282       if (c == '?' || c == '\\')
 283         c = skip_escaped_newlines (pfile);
 284
 285       /* People like decorating comments with '*', so check for '/'
 286          instead for efficiency.  */
 287       if (c == '/')
 288         {
 289           if (prevc == '*')
 290             break;
 291
 292           /* Warn about potential nested comments, but not if the '/'
 293              comes immediately before the true comment delimiter.
 294              Don't bother to get it right across escaped newlines.  */
 295           if (CPP_OPTION (pfile, warn_comments)
 296               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 297             cpp_error_with_line (pfile, DL_WARNING,
 298                                  pfile->line, CPP_BUF_COL (buffer),
 299                                  "\"/*\" within comment");
 300         }
 301       else if (is_vspace (c))
 302         handle_newline (pfile);
 303       else if (c == '\t')
 304         adjust_column (pfile);
 305     }
 306
 307   pfile->state.lexing_comment = 0;
 308   return c != '/' || prevc != '*';
 309 }
 310
 311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 312    terminating newline.  Handles escaped newlines.  Returns nonzero
 313    if a multiline comment.  */
 314 static int
 315 skip_line_comment (pfile)
 316      cpp_reader *pfile;
 317 {
 318   cpp_buffer *buffer = pfile->buffer;
 319   unsigned int orig_line = pfile->line;
 320   cppchar_t c;
 321 #ifdef MULTIBYTE_CHARS
 322   wchar_t wc;
 323   int char_len;
 324 #endif
 325
 326   pfile->state.lexing_comment = 1;
 327 #ifdef MULTIBYTE_CHARS
 328   /* Reset multibyte conversion state.  */
 329   (void) local_mbtowc (NULL, NULL, 0);
 330 #endif
 331   do
 332     {
 333       if (buffer->cur == buffer->rlimit)
 334         goto at_eof;
 335
 336 #ifdef MULTIBYTE_CHARS
 337       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 338                                buffer->rlimit - buffer->cur);
 339       if (char_len == -1)
 340         {
 341           cpp_error (pfile, DL_WARNING,
 342                      "ignoring invalid multibyte character");
 343           char_len = 1;
 344           c = *buffer->cur++;
 345         }
 346       else
 347         {
 348           buffer->cur += char_len;
 349           c = wc;
 350         }
 351 #else
 352       c = *buffer->cur++;
 353 #endif
 354       if (c == '?' || c == '\\')
 355         c = skip_escaped_newlines (pfile);
 356     }
 357   while (!is_vspace (c));
 358
 359   /* Step back over the newline, except at EOF.  */
 360   buffer->cur--;
 361  at_eof:
 362
 363   pfile->state.lexing_comment = 0;
 364   return orig_line != pfile->line;
 365 }
 366
 367 /* pfile->buffer->cur is one beyond the \t character.  Update
 368    col_adjust so we track the column correctly.  */
 369 static void
 370 adjust_column (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 375
 376   /* Round it up to multiple of the tabstop, but subtract 1 since the
 377      tab itself occupies a character position.  */
 378   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 379                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 380 }
 381
 382 /* Skips whitespace, saving the next non-whitespace character.
 383    Adjusts pfile->col_adjust to account for tabs.  Without this,
 384    tokens might be assigned an incorrect column.  */
 385 static int
 386 skip_whitespace (pfile, c)
 387      cpp_reader *pfile;
 388      cppchar_t c;
 389 {
 390   cpp_buffer *buffer = pfile->buffer;
 391   unsigned int warned = 0;
 392
 393   do
 394     {
 395       /* Horizontal space always OK.  */
 396       if (c == ' ')
 397         ;
 398       else if (c == '\t')
 399         adjust_column (pfile);
 400       /* Just \f \v or \0 left.  */
 401       else if (c == '\0')
 402         {
 403           if (buffer->cur - 1 == buffer->rlimit)
 404             return 0;
 405           if (!warned)
 406             {
 407               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 408               warned = 1;
 409             }
 410         }
 411       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 412         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 413                              CPP_BUF_COL (buffer),
 414                              "%s in preprocessing directive",
 415                              c == '\f' ? "form feed" : "vertical tab");
 416
 417       c = *buffer->cur++;
 418     }
 419   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 420   while (is_nvspace (c));
 421
 422   buffer->cur--;
 423   return 1;
 424 }
 425
 426 /* See if the characters of a number token are valid in a name (no
 427    '.', '+' or '-').  */
 428 static int
 429 name_p (pfile, string)
 430      cpp_reader *pfile;
 431      const cpp_string *string;
 432 {
 433   unsigned int i;
 434
 435   for (i = 0; i < string->len; i++)
 436     if (!is_idchar (string->text[i]))
 437       return 0;
 438
 439   return 1;
 440 }
 441
 442 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 443    a critical inner loop.  The common case is an identifier which has
 444    not been split by backslash-newline, does not contain a dollar
 445    sign, and has already been scanned (roughly 10:1 ratio of
 446    seen:unseen identifiers in normal code; the distribution is
 447    Poisson-like).  Second most common case is a new identifier, not
 448    split and no dollar sign.  The other possibilities are rare and
 449    have been relegated to parse_slow.  */
 450 static cpp_hashnode *
 451 parse_identifier (pfile)
 452      cpp_reader *pfile;
 453 {
 454   cpp_hashnode *result;
 455   const uchar *cur, *base;
 456
 457   /* Fast-path loop.  Skim over a normal identifier.
 458      N.B. ISIDNUM does not include $.  */
 459   cur = pfile->buffer->cur;
 460   while (ISIDNUM (*cur))
 461     cur++;
 462
 463   /* Check for slow-path cases.  */
 464   if (*cur == '?' || *cur == '\\' || *cur == '$')
 465     {
 466       unsigned int len;
 467
 468       base = parse_slow (pfile, cur, 0, &len);
 469       result = (cpp_hashnode *)
 470         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 471     }
 472   else
 473     {
 474       base = pfile->buffer->cur - 1;
 475       pfile->buffer->cur = cur;
 476       result = (cpp_hashnode *)
 477         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 478     }
 479
 480   /* Rarely, identifiers require diagnostics when lexed.
 481      XXX Has to be forced out of the fast path.  */
 482   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 483                         && !pfile->state.skipping, 0))
 484     {
 485       /* It is allowed to poison the same identifier twice.  */
 486       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 487         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 488                    NODE_NAME (result));
 489
 490       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 491          replacement list of a variadic macro.  */
 492       if (result == pfile->spec_nodes.n__VA_ARGS__
 493           && !pfile->state.va_args_ok)
 494         cpp_error (pfile, DL_PEDWARN,
 495         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 496     }
 497
 498   return result;
 499 }
 500
 501 /* Slow path.  This handles numbers and identifiers which have been
 502    split, or contain dollar signs.  The part of the token from
 503    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 504    1 if it's a number, and 2 if it has a leading period.  Returns a
 505    pointer to the token's NUL-terminated spelling in permanent
 506    storage, and sets PLEN to its length.  */
 507 static uchar *
 508 parse_slow (pfile, cur, number_p, plen)
 509      cpp_reader *pfile;
 510      const uchar *cur;
 511      int number_p;
 512      unsigned int *plen;
 513 {
 514   cpp_buffer *buffer = pfile->buffer;
 515   const uchar *base = buffer->cur - 1;
 516   struct obstack *stack = &pfile->hash_table->stack;
 517   unsigned int c, prevc, saw_dollar = 0;
 518
 519   /* Place any leading period.  */
 520   if (number_p == 2)
 521     obstack_1grow (stack, '.');
 522
 523   /* Copy the part of the token which is known to be okay.  */
 524   obstack_grow (stack, base, cur - base);
 525
 526   /* Now process the part which isn't.  We are looking at one of
 527      '$', '\\', or '?' on entry to this loop.  */
 528   prevc = cur[-1];
 529   c = *cur++;
 530   buffer->cur = cur;
 531   for (;;)
 532     {
 533       /* Potential escaped newline?  */
 534       buffer->backup_to = buffer->cur - 1;
 535       if (c == '?' || c == '\\')
 536         c = skip_escaped_newlines (pfile);
 537
 538       if (!is_idchar (c))
 539         {
 540           if (!number_p)
 541             break;
 542           if (c != '.' && !VALID_SIGN (c, prevc))
 543             break;
 544         }
 545
 546       /* Handle normal identifier characters in this loop.  */
 547       do
 548         {
 549           prevc = c;
 550           obstack_1grow (stack, c);
 551
 552           if (c == '$')
 553             saw_dollar++;
 554
 555           c = *buffer->cur++;
 556         }
 557       while (is_idchar (c));
 558     }
 559
 560   /* Step back over the unwanted char.  */
 561   BACKUP ();
 562
 563   /* $ is not an identifier character in the standard, but is commonly
 564      accepted as an extension.  Don't warn about it in skipped
 565      conditional blocks.  */
 566   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 567     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 568
 569   /* Identifiers and numbers are null-terminated.  */
 570   *plen = obstack_object_size (stack);
 571   obstack_1grow (stack, '\0');
 572   return obstack_finish (stack);
 573 }
 574
 575 /* Parse a number, beginning with character C, skipping embedded
 576    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 577    before C.  Place the result in NUMBER.  */
 578 static void
 579 parse_number (pfile, number, leading_period)
 580      cpp_reader *pfile;
 581      cpp_string *number;
 582      int leading_period;
 583 {
 584   const uchar *cur;
 585
 586   /* Fast-path loop.  Skim over a normal number.
 587      N.B. ISIDNUM does not include $.  */
 588   cur = pfile->buffer->cur;
 589   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 590     cur++;
 591
 592   /* Check for slow-path cases.  */
 593   if (*cur == '?' || *cur == '\\' || *cur == '$')
 594     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 595   else
 596     {
 597       const uchar *base = pfile->buffer->cur - 1;
 598       uchar *dest;
 599
 600       number->len = cur - base + leading_period;
 601       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 602       dest[number->len] = '\0';
 603       number->text = dest;
 604
 605       if (leading_period)
 606         *dest++ = '.';
 607       memcpy (dest, base, cur - base);
 608       pfile->buffer->cur = cur;
 609     }
 610 }
 611
 612 /* Subroutine of parse_string.  */
 613 static int
 614 unescaped_terminator_p (pfile, dest)
 615      cpp_reader *pfile;
 616      const unsigned char *dest;
 617 {
 618   const unsigned char *start, *temp;
 619
 620   /* In #include-style directives, terminators are not escapable.  */
 621   if (pfile->state.angled_headers)
 622     return 1;
 623
 624   start = BUFF_FRONT (pfile->u_buff);
 625
 626   /* An odd number of consecutive backslashes represents an escaped
 627      terminator.  */
 628   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 629     ;
 630
 631   return ((dest - temp) & 1) == 0;
 632 }
 633
 634 /* Parses a string, character constant, or angle-bracketed header file
 635    name.  Handles embedded trigraphs and escaped newlines.  The stored
 636    string is guaranteed NUL-terminated, but it is not guaranteed that
 637    this is the first NUL since embedded NULs are preserved.
 638
 639    When this function returns, buffer->cur points to the next
 640    character to be processed.  */
 641 static void
 642 parse_string (pfile, token, terminator)
 643      cpp_reader *pfile;
 644      cpp_token *token;
 645      cppchar_t terminator;
 646 {
 647   cpp_buffer *buffer = pfile->buffer;
 648   unsigned char *dest, *limit;
 649   cppchar_t c;
 650   bool warned_nulls = false;
 651 #ifdef MULTIBYTE_CHARS
 652   wchar_t wc;
 653   int char_len;
 654 #endif
 655
 656   dest = BUFF_FRONT (pfile->u_buff);
 657   limit = BUFF_LIMIT (pfile->u_buff);
 658
 659 #ifdef MULTIBYTE_CHARS
 660   /* Reset multibyte conversion state.  */
 661   (void) local_mbtowc (NULL, NULL, 0);
 662 #endif
 663   for (;;)
 664     {
 665       /* We need room for another char, possibly the terminating NUL.  */
 666       if ((size_t) (limit - dest) < 1)
 667         {
 668           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 669           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 670           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 671           limit = BUFF_LIMIT (pfile->u_buff);
 672         }
 673
 674 #ifdef MULTIBYTE_CHARS
 675       char_len = local_mbtowc (&wc, (const char *) buffer->cur,
 676                                buffer->rlimit - buffer->cur);
 677       if (char_len == -1)
 678         {
 679           cpp_error (pfile, DL_WARNING,
 680                      "ignoring invalid multibyte character");
 681           char_len = 1;
 682           c = *buffer->cur++;
 683         }
 684       else
 685         {
 686           buffer->cur += char_len;
 687           c = wc;
 688         }
 689 #else
 690       c = *buffer->cur++;
 691 #endif
 692
 693       /* Handle trigraphs, escaped newlines etc.  */
 694       if (c == '?' || c == '\\')
 695         c = skip_escaped_newlines (pfile);
 696
 697       if (c == terminator)
 698         {
 699           if (unescaped_terminator_p (pfile, dest))
 700             break;
 701         }
 702       else if (is_vspace (c))
 703         {
 704           /* No string literal may extend over multiple lines.  In
 705              assembly language, suppress the error except for <>
 706              includes.  This is a kludge around not knowing where
 707              comments are.  */
 708         unterminated:
 709           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 710             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 711                        (int) terminator);
 712           buffer->cur--;
 713           break;
 714         }
 715       else if (c == '\0')
 716         {
 717           if (buffer->cur - 1 == buffer->rlimit)
 718             goto unterminated;
 719           if (!warned_nulls)
 720             {
 721               warned_nulls = true;
 722               cpp_error (pfile, DL_WARNING,
 723                          "null character(s) preserved in literal");
 724             }
 725         }
 726 #ifdef MULTIBYTE_CHARS
 727       if (char_len > 1)
 728         {
 729           for ( ; char_len > 0; --char_len)
 730             *dest++ = (*buffer->cur - char_len);
 731         }
 732       else
 733 #endif
 734         *dest++ = c;
 735     }
 736
 737   *dest = '\0';
 738
 739   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 740   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 741   BUFF_FRONT (pfile->u_buff) = dest + 1;
 742 }
 743
 744 /* The stored comment includes the comment start and any terminator.  */
 745 static void
 746 save_comment (pfile, token, from, type)
 747      cpp_reader *pfile;
 748      cpp_token *token;
 749      const unsigned char *from;
 750      cppchar_t type;
 751 {
 752   unsigned char *buffer;
 753   unsigned int len, clen;
 754
 755   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 756
 757   /* C++ comments probably (not definitely) have moved past a new
 758      line, which we don't want to save in the comment.  */
 759   if (is_vspace (pfile->buffer->cur[-1]))
 760     len--;
 761
 762   /* If we are currently in a directive, then we need to store all
 763      C++ comments as C comments internally, and so we need to
 764      allocate a little extra space in that case.
 765
 766      Note that the only time we encounter a directive here is
 767      when we are saving comments in a "#define".  */
 768   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 769
 770   buffer = _cpp_unaligned_alloc (pfile, clen);
 771
 772   token->type = CPP_COMMENT;
 773   token->val.str.len = clen;
 774   token->val.str.text = buffer;
 775
 776   buffer[0] = '/';
 777   memcpy (buffer + 1, from, len - 1);
 778
 779   /* Finish conversion to a C comment, if necessary.  */
 780   if (pfile->state.in_directive && type == '/')
 781     {
 782       buffer[1] = '*';
 783       buffer[clen - 2] = '*';
 784       buffer[clen - 1] = '/';
 785     }
 786 }
 787
 788 /* Allocate COUNT tokens for RUN.  */
 789 void
 790 _cpp_init_tokenrun (run, count)
 791      tokenrun *run;
 792      unsigned int count;
 793 {
 794   run->base = xnewvec (cpp_token, count);
 795   run->limit = run->base + count;
 796   run->next = NULL;
 797 }
 798
 799 /* Returns the next tokenrun, or creates one if there is none.  */
 800 static tokenrun *
 801 next_tokenrun (run)
 802      tokenrun *run;
 803 {
 804   if (run->next == NULL)
 805     {
 806       run->next = xnew (tokenrun);
 807       run->next->prev = run;
 808       _cpp_init_tokenrun (run->next, 250);
 809     }
 810
 811   return run->next;
 812 }
 813
 814 /* Allocate a single token that is invalidated at the same time as the
 815    rest of the tokens on the line.  Has its line and col set to the
 816    same as the last lexed token, so that diagnostics appear in the
 817    right place.  */
 818 cpp_token *
 819 _cpp_temp_token (pfile)
 820      cpp_reader *pfile;
 821 {
 822   cpp_token *old, *result;
 823
 824   old = pfile->cur_token - 1;
 825   if (pfile->cur_token == pfile->cur_run->limit)
 826     {
 827       pfile->cur_run = next_tokenrun (pfile->cur_run);
 828       pfile->cur_token = pfile->cur_run->base;
 829     }
 830
 831   result = pfile->cur_token++;
 832   result->line = old->line;
 833   result->col = old->col;
 834   return result;
 835 }
 836
 837 /* Lex a token into RESULT (external interface).  Takes care of issues
 838    like directive handling, token lookahead, multiple include
 839    optimization and skipping.  */
 840 const cpp_token *
 841 _cpp_lex_token (pfile)
 842      cpp_reader *pfile;
 843 {
 844   cpp_token *result;
 845
 846   for (;;)
 847     {
 848       if (pfile->cur_token == pfile->cur_run->limit)
 849         {
 850           pfile->cur_run = next_tokenrun (pfile->cur_run);
 851           pfile->cur_token = pfile->cur_run->base;
 852         }
 853
 854       if (pfile->lookaheads)
 855         {
 856           pfile->lookaheads--;
 857           result = pfile->cur_token++;
 858         }
 859       else
 860         result = _cpp_lex_direct (pfile);
 861
 862       if (result->flags & BOL)
 863         {
 864           /* Is this a directive.  If _cpp_handle_directive returns
 865              false, it is an assembler #.  */
 866           if (result->type == CPP_HASH
 867               /* 6.10.3 p 11: Directives in a list of macro arguments
 868                  gives undefined behavior.  This implementation
 869                  handles the directive as normal.  */
 870               && pfile->state.parsing_args != 1
 871               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 872             continue;
 873           if (pfile->cb.line_change && !pfile->state.skipping)
 874             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 875         }
 876
 877       /* We don't skip tokens in directives.  */
 878       if (pfile->state.in_directive)
 879         break;
 880
 881       /* Outside a directive, invalidate controlling macros.  At file
 882          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 883          get here and MI optimisation works.  */
 884       pfile->mi_valid = false;
 885
 886       if (!pfile->state.skipping || result->type == CPP_EOF)
 887         break;
 888     }
 889
 890   return result;
 891 }
 892
 893 /* A NUL terminates the current buffer.  For ISO preprocessing this is
 894    EOF, but for traditional preprocessing it indicates we need a line
 895    refill.  Returns TRUE to continue preprocessing a new buffer, FALSE
 896    to return a CPP_EOF to the caller.  */
 897 static bool
 898 continue_after_nul (pfile)
 899      cpp_reader *pfile;
 900 {
 901   cpp_buffer *buffer = pfile->buffer;
 902   bool more = false;
 903
 904   buffer->saved_flags = BOL;
 905   if (CPP_OPTION (pfile, traditional))
 906     {
 907       if (pfile->state.in_directive)
 908         return false;
 909
 910       _cpp_remove_overlay (pfile);
 911       more = _cpp_read_logical_line_trad (pfile);
 912       _cpp_overlay_buffer (pfile, pfile->out.base,
 913                            pfile->out.cur - pfile->out.base);
 914       pfile->line = pfile->out.first_line;
 915     }
 916   else
 917     {
 918       /* Stop parsing arguments with a CPP_EOF.  When we finally come
 919          back here, do the work of popping the buffer.  */
 920       if (!pfile->state.parsing_args)
 921         {
 922           if (buffer->cur != buffer->line_base)
 923             {
 924               /* Non-empty files should end in a newline.  Don't warn
 925                  for command line and _Pragma buffers.  */
 926               if (!buffer->from_stage3)
 927                 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 928               handle_newline (pfile);
 929             }
 930
 931           /* Similarly, finish an in-progress directive with CPP_EOF
 932              before popping the buffer.  */
 933           if (!pfile->state.in_directive && buffer->prev)
 934             {
 935               more = !buffer->return_at_eof;
 936               _cpp_pop_buffer (pfile);
 937             }
 938         }
 939     }
 940
 941   return more;
 942 }
 943
 944 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 945   do {                                          \
 946     if (get_effective_char (pfile) == CHAR)     \
 947       result->type = THEN_TYPE;                 \
 948     else                                        \
 949       {                                         \
 950         BACKUP ();                              \
 951         result->type = ELSE_TYPE;               \
 952       }                                         \
 953   } while (0)
 954
 955 /* Lex a token into pfile->cur_token, which is also incremented, to
 956    get diagnostics pointing to the correct location.
 957
 958    Does not handle issues such as token lookahead, multiple-include
 959    optimisation, directives, skipping etc.  This function is only
 960    suitable for use by _cpp_lex_token, and in special cases like
 961    lex_expansion_token which doesn't care for any of these issues.
 962
 963    When meeting a newline, returns CPP_EOF if parsing a directive,
 964    otherwise returns to the start of the token buffer if permissible.
 965    Returns the location of the lexed token.  */
 966 cpp_token *
 967 _cpp_lex_direct (pfile)
 968      cpp_reader *pfile;
 969 {
 970   cppchar_t c;
 971   cpp_buffer *buffer;
 972   const unsigned char *comment_start;
 973   cpp_token *result = pfile->cur_token++;
 974
 975  fresh_line:
 976   buffer = pfile->buffer;
 977   result->flags = buffer->saved_flags;
 978   buffer->saved_flags = 0;
 979  update_tokens_line:
 980   result->line = pfile->line;
 981
 982  skipped_white:
 983   c = *buffer->cur++;
 984   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 985
 986  trigraph:
 987   switch (c)
 988     {
 989     case ' ': case '\t': case '\f': case '\v': case '\0':
 990       result->flags |= PREV_WHITE;
 991       if (skip_whitespace (pfile, c))
 992         goto skipped_white;
 993
 994       /* End of buffer.  */
 995       buffer->cur--;
 996       if (continue_after_nul (pfile))
 997         goto fresh_line;
 998       result->type = CPP_EOF;
 999       break;
1000
1001     case '\n': case '\r':
1002       handle_newline (pfile);
1003       buffer->saved_flags = BOL;
1004       if (! pfile->state.in_directive)
1005         {
1006           if (pfile->state.parsing_args == 2)
1007             buffer->saved_flags |= PREV_WHITE;
1008           if (!pfile->keep_tokens)
1009             {
1010               pfile->cur_run = &pfile->base_run;
1011               result = pfile->base_run.base;
1012               pfile->cur_token = result + 1;
1013             }
1014           goto fresh_line;
1015         }
1016       result->type = CPP_EOF;
1017       break;
1018
1019     case '?':
1020     case '\\':
1021       /* These could start an escaped newline, or '?' a trigraph.  Let
1022          skip_escaped_newlines do all the work.  */
1023       {
1024         unsigned int line = pfile->line;
1025
1026         c = skip_escaped_newlines (pfile);
1027         if (line != pfile->line)
1028           {
1029             buffer->cur--;
1030             /* We had at least one escaped newline of some sort.
1031                Update the token's line and column.  */
1032             goto update_tokens_line;
1033           }
1034       }
1035
1036       /* We are either the original '?' or '\\', or a trigraph.  */
1037       if (c == '?')
1038         result->type = CPP_QUERY;
1039       else if (c == '\\')
1040         goto random_char;
1041       else
1042         goto trigraph;
1043       break;
1044
1045     case '0': case '1': case '2': case '3': case '4':
1046     case '5': case '6': case '7': case '8': case '9':
1047       result->type = CPP_NUMBER;
1048       parse_number (pfile, &result->val.str, 0);
1049       break;
1050
1051     case 'L':
1052       /* 'L' may introduce wide characters or strings.  */
1053       {
1054         const unsigned char *pos = buffer->cur;
1055
1056         c = get_effective_char (pfile);
1057         if (c == '\'' || c == '"')
1058           {
1059             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1060             parse_string (pfile, result, c);
1061             break;
1062           }
1063         buffer->cur = pos;
1064       }
1065       /* Fall through.  */
1066
1067     start_ident:
1068     case '_':
1069     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1070     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1071     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1072     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1073     case 'y': case 'z':
1074     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1075     case 'G': case 'H': case 'I': case 'J': case 'K':
1076     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1077     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1078     case 'Y': case 'Z':
1079       result->type = CPP_NAME;
1080       result->val.node = parse_identifier (pfile);
1081
1082       /* Convert named operators to their proper types.  */
1083       if (result->val.node->flags & NODE_OPERATOR)
1084         {
1085           result->flags |= NAMED_OP;
1086           result->type = result->val.node->directive_index;
1087         }
1088       break;
1089
1090     case '\'':
1091     case '"':
1092       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1093       parse_string (pfile, result, c);
1094       break;
1095
1096     case '/':
1097       /* A potential block or line comment.  */
1098       comment_start = buffer->cur;
1099       c = get_effective_char (pfile);
1100
1101       if (c == '*')
1102         {
1103           if (skip_block_comment (pfile))
1104             cpp_error (pfile, DL_ERROR, "unterminated comment");
1105         }
1106       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1107                             || CPP_IN_SYSTEM_HEADER (pfile)))
1108         {
1109           /* Warn about comments only if pedantically GNUC89, and not
1110              in system headers.  */
1111           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1112               && ! buffer->warned_cplusplus_comments)
1113             {
1114               cpp_error (pfile, DL_PEDWARN,
1115                          "C++ style comments are not allowed in ISO C90");
1116               cpp_error (pfile, DL_PEDWARN,
1117                          "(this will be reported only once per input file)");
1118               buffer->warned_cplusplus_comments = 1;
1119             }
1120
1121           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1122             cpp_error (pfile, DL_WARNING, "multi-line comment");
1123         }
1124       else if (c == '=')
1125         {
1126           result->type = CPP_DIV_EQ;
1127           break;
1128         }
1129       else
1130         {
1131           BACKUP ();
1132           result->type = CPP_DIV;
1133           break;
1134         }
1135
1136       if (!pfile->state.save_comments)
1137         {
1138           result->flags |= PREV_WHITE;
1139           goto update_tokens_line;
1140         }
1141
1142       /* Save the comment as a token in its own right.  */
1143       save_comment (pfile, result, comment_start, c);
1144       break;
1145
1146     case '<':
1147       if (pfile->state.angled_headers)
1148         {
1149           result->type = CPP_HEADER_NAME;
1150           parse_string (pfile, result, '>');
1151           break;
1152         }
1153
1154       c = get_effective_char (pfile);
1155       if (c == '=')
1156         result->type = CPP_LESS_EQ;
1157       else if (c == '<')
1158         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1159       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1160         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1161       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1162         {
1163           result->type = CPP_OPEN_SQUARE;
1164           result->flags |= DIGRAPH;
1165         }
1166       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1167         {
1168           result->type = CPP_OPEN_BRACE;
1169           result->flags |= DIGRAPH;
1170         }
1171       else
1172         {
1173           BACKUP ();
1174           result->type = CPP_LESS;
1175         }
1176       break;
1177
1178     case '>':
1179       c = get_effective_char (pfile);
1180       if (c == '=')
1181         result->type = CPP_GREATER_EQ;
1182       else if (c == '>')
1183         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1184       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1185         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1186       else
1187         {
1188           BACKUP ();
1189           result->type = CPP_GREATER;
1190         }
1191       break;
1192
1193     case '%':
1194       c = get_effective_char (pfile);
1195       if (c == '=')
1196         result->type = CPP_MOD_EQ;
1197       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1198         {
1199           result->flags |= DIGRAPH;
1200           result->type = CPP_HASH;
1201           if (get_effective_char (pfile) == '%')
1202             {
1203               const unsigned char *pos = buffer->cur;
1204
1205               if (get_effective_char (pfile) == ':')
1206                 result->type = CPP_PASTE;
1207               else
1208                 buffer->cur = pos - 1;
1209             }
1210           else
1211             BACKUP ();
1212         }
1213       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1214         {
1215           result->flags |= DIGRAPH;
1216           result->type = CPP_CLOSE_BRACE;
1217         }
1218       else
1219         {
1220           BACKUP ();
1221           result->type = CPP_MOD;
1222         }
1223       break;
1224
1225     case '.':
1226       result->type = CPP_DOT;
1227       c = get_effective_char (pfile);
1228       if (c == '.')
1229         {
1230           const unsigned char *pos = buffer->cur;
1231
1232           if (get_effective_char (pfile) == '.')
1233             result->type = CPP_ELLIPSIS;
1234           else
1235             buffer->cur = pos - 1;
1236         }
1237       /* All known character sets have 0...9 contiguous.  */
1238       else if (ISDIGIT (c))
1239         {
1240           result->type = CPP_NUMBER;
1241           parse_number (pfile, &result->val.str, 1);
1242         }
1243       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1244         result->type = CPP_DOT_STAR;
1245       else
1246         BACKUP ();
1247       break;
1248
1249     case '+':
1250       c = get_effective_char (pfile);
1251       if (c == '+')
1252         result->type = CPP_PLUS_PLUS;
1253       else if (c == '=')
1254         result->type = CPP_PLUS_EQ;
1255       else
1256         {
1257           BACKUP ();
1258           result->type = CPP_PLUS;
1259         }
1260       break;
1261
1262     case '-':
1263       c = get_effective_char (pfile);
1264       if (c == '>')
1265         {
1266           result->type = CPP_DEREF;
1267           if (CPP_OPTION (pfile, cplusplus))
1268             {
1269               if (get_effective_char (pfile) == '*')
1270                 result->type = CPP_DEREF_STAR;
1271               else
1272                 BACKUP ();
1273             }
1274         }
1275       else if (c == '-')
1276         result->type = CPP_MINUS_MINUS;
1277       else if (c == '=')
1278         result->type = CPP_MINUS_EQ;
1279       else
1280         {
1281           BACKUP ();
1282           result->type = CPP_MINUS;
1283         }
1284       break;
1285
1286     case '&':
1287       c = get_effective_char (pfile);
1288       if (c == '&')
1289         result->type = CPP_AND_AND;
1290       else if (c == '=')
1291         result->type = CPP_AND_EQ;
1292       else
1293         {
1294           BACKUP ();
1295           result->type = CPP_AND;
1296         }
1297       break;
1298
1299     case '|':
1300       c = get_effective_char (pfile);
1301       if (c == '|')
1302         result->type = CPP_OR_OR;
1303       else if (c == '=')
1304         result->type = CPP_OR_EQ;
1305       else
1306         {
1307           BACKUP ();
1308           result->type = CPP_OR;
1309         }
1310       break;
1311
1312     case ':':
1313       c = get_effective_char (pfile);
1314       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1315         result->type = CPP_SCOPE;
1316       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1317         {
1318           result->flags |= DIGRAPH;
1319           result->type = CPP_CLOSE_SQUARE;
1320         }
1321       else
1322         {
1323           BACKUP ();
1324           result->type = CPP_COLON;
1325         }
1326       break;
1327
1328     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1329     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1330     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1331     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1332     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1333
1334     case '~': result->type = CPP_COMPL; break;
1335     case ',': result->type = CPP_COMMA; break;
1336     case '(': result->type = CPP_OPEN_PAREN; break;
1337     case ')': result->type = CPP_CLOSE_PAREN; break;
1338     case '[': result->type = CPP_OPEN_SQUARE; break;
1339     case ']': result->type = CPP_CLOSE_SQUARE; break;
1340     case '{': result->type = CPP_OPEN_BRACE; break;
1341     case '}': result->type = CPP_CLOSE_BRACE; break;
1342     case ';': result->type = CPP_SEMICOLON; break;
1343
1344       /* @ is a punctuator in Objective-C.  */
1345     case '@': result->type = CPP_ATSIGN; break;
1346
1347     case '$':
1348       if (CPP_OPTION (pfile, dollars_in_ident))
1349         goto start_ident;
1350       /* Fall through...  */
1351
1352     random_char:
1353     default:
1354       result->type = CPP_OTHER;
1355       result->val.c = c;
1356       break;
1357     }
1358
1359   return result;
1360 }
1361
1362 /* An upper bound on the number of bytes needed to spell TOKEN,
1363    including preceding whitespace.  */
1364 unsigned int
1365 cpp_token_len (token)
1366      const cpp_token *token;
1367 {
1368   unsigned int len;
1369
1370   switch (TOKEN_SPELL (token))
1371     {
1372     default:            len = 0;                                break;
1373     case SPELL_NUMBER:
1374     case SPELL_STRING:  len = token->val.str.len;               break;
1375     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1376     }
1377   /* 1 for whitespace, 4 for comment delimiters.  */
1378   return len + 5;
1379 }
1380
1381 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1382    already contain the enough space to hold the token's spelling.
1383    Returns a pointer to the character after the last character
1384    written.  */
1385 unsigned char *
1386 cpp_spell_token (pfile, token, buffer)
1387      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1388      const cpp_token *token;
1389      unsigned char *buffer;
1390 {
1391   switch (TOKEN_SPELL (token))
1392     {
1393     case SPELL_OPERATOR:
1394       {
1395         const unsigned char *spelling;
1396         unsigned char c;
1397
1398         if (token->flags & DIGRAPH)
1399           spelling
1400             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1401         else if (token->flags & NAMED_OP)
1402           goto spell_ident;
1403         else
1404           spelling = TOKEN_NAME (token);
1405
1406         while ((c = *spelling++) != '\0')
1407           *buffer++ = c;
1408       }
1409       break;
1410
1411     case SPELL_CHAR:
1412       *buffer++ = token->val.c;
1413       break;
1414
1415     spell_ident:
1416     case SPELL_IDENT:
1417       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1418       buffer += NODE_LEN (token->val.node);
1419       break;
1420
1421     case SPELL_NUMBER:
1422       memcpy (buffer, token->val.str.text, token->val.str.len);
1423       buffer += token->val.str.len;
1424       break;
1425
1426     case SPELL_STRING:
1427       {
1428         int left, right, tag;
1429         switch (token->type)
1430           {
1431           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1432           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1433           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1434           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1435           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1436           default:
1437             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1438                        TOKEN_NAME (token));
1439             return buffer;
1440           }
1441         if (tag) *buffer++ = tag;
1442         *buffer++ = left;
1443         memcpy (buffer, token->val.str.text, token->val.str.len);
1444         buffer += token->val.str.len;
1445         *buffer++ = right;
1446       }
1447       break;
1448
1449     case SPELL_NONE:
1450       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1451       break;
1452     }
1453
1454   return buffer;
1455 }
1456
1457 /* Returns TOKEN spelt as a null-terminated string.  The string is
1458    freed when the reader is destroyed.  Useful for diagnostics.  */
1459 unsigned char *
1460 cpp_token_as_text (pfile, token)
1461      cpp_reader *pfile;
1462      const cpp_token *token;
1463 {
1464   unsigned int len = cpp_token_len (token);
1465   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1466
1467   end = cpp_spell_token (pfile, token, start);
1468   end[0] = '\0';
1469
1470   return start;
1471 }
1472
1473 /* Used by C front ends, which really should move to using
1474    cpp_token_as_text.  */
1475 const char *
1476 cpp_type2name (type)
1477      enum cpp_ttype type;
1478 {
1479   return (const char *) token_spellings[type].name;
1480 }
1481
1482 /* Writes the spelling of token to FP, without any preceding space.
1483    Separated from cpp_spell_token for efficiency - to avoid stdio
1484    double-buffering.  */
1485 void
1486 cpp_output_token (token, fp)
1487      const cpp_token *token;
1488      FILE *fp;
1489 {
1490   switch (TOKEN_SPELL (token))
1491     {
1492     case SPELL_OPERATOR:
1493       {
1494         const unsigned char *spelling;
1495         int c;
1496
1497         if (token->flags & DIGRAPH)
1498           spelling
1499             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1500         else if (token->flags & NAMED_OP)
1501           goto spell_ident;
1502         else
1503           spelling = TOKEN_NAME (token);
1504
1505         c = *spelling;
1506         do
1507           putc (c, fp);
1508         while ((c = *++spelling) != '\0');
1509       }
1510       break;
1511
1512     case SPELL_CHAR:
1513       putc (token->val.c, fp);
1514       break;
1515
1516     spell_ident:
1517     case SPELL_IDENT:
1518       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1519     break;
1520
1521     case SPELL_NUMBER:
1522       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1523       break;
1524
1525     case SPELL_STRING:
1526       {
1527         int left, right, tag;
1528         switch (token->type)
1529           {
1530           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1531           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1532           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1533           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1534           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1535           default:
1536             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1537             return;
1538           }
1539         if (tag) putc (tag, fp);
1540         putc (left, fp);
1541         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1542         putc (right, fp);
1543       }
1544       break;
1545
1546     case SPELL_NONE:
1547       /* An error, most probably.  */
1548       break;
1549     }
1550 }
1551
1552 /* Compare two tokens.  */
1553 int
1554 _cpp_equiv_tokens (a, b)
1555      const cpp_token *a, *b;
1556 {
1557   if (a->type == b->type && a->flags == b->flags)
1558     switch (TOKEN_SPELL (a))
1559       {
1560       default:                  /* Keep compiler happy.  */
1561       case SPELL_OPERATOR:
1562         return 1;
1563       case SPELL_CHAR:
1564         return a->val.c == b->val.c; /* Character.  */
1565       case SPELL_NONE:
1566         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1567       case SPELL_IDENT:
1568         return a->val.node == b->val.node;
1569       case SPELL_NUMBER:
1570       case SPELL_STRING:
1571         return (a->val.str.len == b->val.str.len
1572                 && !memcmp (a->val.str.text, b->val.str.text,
1573                             a->val.str.len));
1574       }
1575
1576   return 0;
1577 }
1578
1579 /* Returns nonzero if a space should be inserted to avoid an
1580    accidental token paste for output.  For simplicity, it is
1581    conservative, and occasionally advises a space where one is not
1582    needed, e.g. "." and ".2".  */
1583 int
1584 cpp_avoid_paste (pfile, token1, token2)
1585      cpp_reader *pfile;
1586      const cpp_token *token1, *token2;
1587 {
1588   enum cpp_ttype a = token1->type, b = token2->type;
1589   cppchar_t c;
1590
1591   if (token1->flags & NAMED_OP)
1592     a = CPP_NAME;
1593   if (token2->flags & NAMED_OP)
1594     b = CPP_NAME;
1595
1596   c = EOF;
1597   if (token2->flags & DIGRAPH)
1598     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1599   else if (token_spellings[b].category == SPELL_OPERATOR)
1600     c = token_spellings[b].name[0];
1601
1602   /* Quickly get everything that can paste with an '='.  */
1603   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1604     return 1;
1605
1606   switch (a)
1607     {
1608     case CPP_GREATER:   return c == '>' || c == '?';
1609     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1610     case CPP_PLUS:      return c == '+';
1611     case CPP_MINUS:     return c == '-' || c == '>';
1612     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1613     case CPP_MOD:       return c == ':' || c == '>';
1614     case CPP_AND:       return c == '&';
1615     case CPP_OR:        return c == '|';
1616     case CPP_COLON:     return c == ':' || c == '>';
1617     case CPP_DEREF:     return c == '*';
1618     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1619     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1620     case CPP_NAME:      return ((b == CPP_NUMBER
1621                                  && name_p (pfile, &token2->val.str))
1622                                 || b == CPP_NAME
1623                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1624     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1625                                 || c == '.' || c == '+' || c == '-');
1626     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1627                                 && token1->val.c == '@'
1628                                 && (b == CPP_NAME || b == CPP_STRING));
1629     default:            break;
1630     }
1631
1632   return 0;
1633 }
1634
1635 /* Output all the remaining tokens on the current line, and a newline
1636    character, to FP.  Leading whitespace is removed.  If there are
1637    macros, special token padding is not performed.  */
1638 void
1639 cpp_output_line (pfile, fp)
1640      cpp_reader *pfile;
1641      FILE *fp;
1642 {
1643   const cpp_token *token;
1644
1645   token = cpp_get_token (pfile);
1646   while (token->type != CPP_EOF)
1647     {
1648       cpp_output_token (token, fp);
1649       token = cpp_get_token (pfile);
1650       if (token->flags & PREV_WHITE)
1651         putc (' ', fp);
1652     }
1653
1654   putc ('\n', fp);
1655 }
1656
1657 /* Returns the value of a hexadecimal digit.  */
1658 static unsigned int
1659 hex_digit_value (c)
1660      unsigned int c;
1661 {
1662   if (hex_p (c))
1663     return hex_value (c);
1664   else
1665     abort ();
1666 }
1667
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1669    failure if cpplib is not parsing C++ or C99.  Such failure is
1670    silent, and no variables are updated.  Otherwise returns 0, and
1671    warns if -Wtraditional.
1672
1673    [lex.charset]: The character designated by the universal character
1674    name \UNNNNNNNN is that character whose character short name in
1675    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676    universal character name \uNNNN is that character whose character
1677    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1678    for a universal character name is less than 0x20 or in the range
1679    0x7F-0x9F (inclusive), or if the universal character name
1680    designates a character in the basic source character set, then the
1681    program is ill-formed.
1682
1683    We assume that wchar_t is Unicode, so we don't need to do any
1684    mapping.  Is this ever wrong?
1685
1686    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687    LIMIT is the end of the string or charconst.  PSTR is updated to
1688    point after the UCS on return, and the UCS is written into PC.  */
1689
1690 static int
1691 maybe_read_ucs (pfile, pstr, limit, pc)
1692      cpp_reader *pfile;
1693      const unsigned char **pstr;
1694      const unsigned char *limit;
1695      cppchar_t *pc;
1696 {
1697   const unsigned char *p = *pstr;
1698   unsigned int code = 0;
1699   unsigned int c = *pc, length;
1700
1701   /* Only attempt to interpret a UCS for C++ and C99.  */
1702   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1703     return 1;
1704
1705   if (CPP_WTRADITIONAL (pfile))
1706     cpp_error (pfile, DL_WARNING,
1707                "the meaning of '\\%c' is different in traditional C", c);
1708
1709   length = (c == 'u' ? 4: 8);
1710
1711   if ((size_t) (limit - p) < length)
1712     {
1713       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1714       /* Skip to the end to avoid more diagnostics.  */
1715       p = limit;
1716     }
1717   else
1718     {
1719       for (; length; length--, p++)
1720         {
1721           c = *p;
1722           if (ISXDIGIT (c))
1723             code = (code << 4) + hex_digit_value (c);
1724           else
1725             {
1726               cpp_error (pfile, DL_ERROR,
1727                          "non-hex digit '%c' in universal-character-name", c);
1728               /* We shouldn't skip in case there are multibyte chars.  */
1729               break;
1730             }
1731         }
1732     }
1733
1734   if (CPP_OPTION (pfile, EBCDIC))
1735     {
1736       cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1737       code = 0x3f;  /* EBCDIC invalid character */
1738     }
1739   /* True extended characters are OK.  */
1740   else if (code >= 0xa0
1741            && !(code & 0x80000000)
1742            && !(code >= 0xD800 && code <= 0xDFFF))
1743     ;
1744   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1745      hex escapes so that this also works with EBCDIC hosts.  */
1746   else if (code == 0x24 || code == 0x40 || code == 0x60)
1747     ;
1748   /* Don't give another error if one occurred above.  */
1749   else if (length == 0)
1750     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1751
1752   *pstr = p;
1753   *pc = code;
1754   return 0;
1755 }
1756
1757 /* Returns the value of an escape sequence, truncated to the correct
1758    target precision.  PSTR points to the input pointer, which is just
1759    after the backslash.  LIMIT is how much text we have.  WIDE is true
1760    if the escape sequence is part of a wide character constant or
1761    string literal.  Handles all relevant diagnostics.  */
1762 cppchar_t
1763 cpp_parse_escape (pfile, pstr, limit, wide)
1764      cpp_reader *pfile;
1765      const unsigned char **pstr;
1766      const unsigned char *limit;
1767      int wide;
1768 {
1769   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1770   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1771   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1772
1773   int unknown = 0;
1774   const unsigned char *str = *pstr, *charconsts;
1775   cppchar_t c, mask;
1776   unsigned int width;
1777
1778   if (CPP_OPTION (pfile, EBCDIC))
1779     charconsts = ebcdic;
1780   else
1781     charconsts = ascii;
1782
1783   if (wide)
1784     width = CPP_OPTION (pfile, wchar_precision);
1785   else
1786     width = CPP_OPTION (pfile, char_precision);
1787   if (width < BITS_PER_CPPCHAR_T)
1788     mask = ((cppchar_t) 1 << width) - 1;
1789   else
1790     mask = ~0;
1791
1792   c = *str++;
1793   switch (c)
1794     {
1795     case '\\': case '\'': case '"': case '?': break;
1796     case 'b': c = charconsts[1];  break;
1797     case 'f': c = charconsts[3];  break;
1798     case 'n': c = charconsts[4];  break;
1799     case 'r': c = charconsts[5];  break;
1800     case 't': c = charconsts[6];  break;
1801     case 'v': c = charconsts[7];  break;
1802
1803     case '(': case '{': case '[': case '%':
1804       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1805          '\%' is used to prevent SCCS from getting confused.  */
1806       unknown = CPP_PEDANTIC (pfile);
1807       break;
1808
1809     case 'a':
1810       if (CPP_WTRADITIONAL (pfile))
1811         cpp_error (pfile, DL_WARNING,
1812                    "the meaning of '\\a' is different in traditional C");
1813       c = charconsts[0];
1814       break;
1815
1816     case 'e': case 'E':
1817       if (CPP_PEDANTIC (pfile))
1818         cpp_error (pfile, DL_PEDWARN,
1819                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1820       c = charconsts[2];
1821       break;
1822
1823     case 'u': case 'U':
1824       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1825       break;
1826
1827     case 'x':
1828       if (CPP_WTRADITIONAL (pfile))
1829         cpp_error (pfile, DL_WARNING,
1830                    "the meaning of '\\x' is different in traditional C");
1831
1832       {
1833         cppchar_t i = 0, overflow = 0;
1834         int digits_found = 0;
1835
1836         while (str < limit)
1837           {
1838             c = *str;
1839             if (! ISXDIGIT (c))
1840               break;
1841             str++;
1842             overflow |= i ^ (i << 4 >> 4);
1843             i = (i << 4) + hex_digit_value (c);
1844             digits_found = 1;
1845           }
1846
1847         if (!digits_found)
1848           cpp_error (pfile, DL_ERROR,
1849                        "\\x used with no following hex digits");
1850
1851         if (overflow | (i != (i & mask)))
1852           {
1853             cpp_error (pfile, DL_PEDWARN,
1854                        "hex escape sequence out of range");
1855             i &= mask;
1856           }
1857         c = i;
1858       }
1859       break;
1860
1861     case '0':  case '1':  case '2':  case '3':
1862     case '4':  case '5':  case '6':  case '7':
1863       {
1864         size_t count = 0;
1865         cppchar_t i = c - '0';
1866
1867         while (str < limit && ++count < 3)
1868           {
1869             c = *str;
1870             if (c < '0' || c > '7')
1871               break;
1872             str++;
1873             i = (i << 3) + c - '0';
1874           }
1875
1876         if (i != (i & mask))
1877           {
1878             cpp_error (pfile, DL_PEDWARN,
1879                        "octal escape sequence out of range");
1880             i &= mask;
1881           }
1882         c = i;
1883       }
1884       break;
1885
1886     default:
1887       unknown = 1;
1888       break;
1889     }
1890
1891   if (unknown)
1892     {
1893       if (ISGRAPH (c))
1894         cpp_error (pfile, DL_PEDWARN,
1895                    "unknown escape sequence '\\%c'", (int) c);
1896       else
1897         cpp_error (pfile, DL_PEDWARN,
1898                    "unknown escape sequence: '\\%03o'", (int) c);
1899     }
1900
1901   if (c > mask)
1902     {
1903       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1904       c &= mask;
1905     }
1906
1907   *pstr = str;
1908   return c;
1909 }
1910
1911 /* Interpret a (possibly wide) character constant in TOKEN.
1912    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1913    points to a variable that is filled in with the number of
1914    characters seen, and UNSIGNEDP to a variable that indicates whether
1915    the result has signed type.  */
1916 cppchar_t
1917 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1918      cpp_reader *pfile;
1919      const cpp_token *token;
1920      unsigned int *pchars_seen;
1921      int *unsignedp;
1922 {
1923   const unsigned char *str = token->val.str.text;
1924   const unsigned char *limit = str + token->val.str.len;
1925   unsigned int chars_seen = 0;
1926   size_t width, max_chars;
1927   cppchar_t c, mask, result = 0;
1928   bool unsigned_p;
1929
1930 #ifdef MULTIBYTE_CHARS
1931   (void) local_mbtowc (NULL, NULL, 0);
1932 #endif
1933
1934   /* Width in bits.  */
1935   if (token->type == CPP_CHAR)
1936     {
1937       width = CPP_OPTION (pfile, char_precision);
1938       max_chars = CPP_OPTION (pfile, int_precision) / width;
1939       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1940     }
1941   else
1942     {
1943       width = CPP_OPTION (pfile, wchar_precision);
1944       max_chars = 1;
1945       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1946     }
1947
1948   if (width < BITS_PER_CPPCHAR_T)
1949     mask = ((cppchar_t) 1 << width) - 1;
1950   else
1951     mask = ~0;
1952
1953   while (str < limit)
1954     {
1955 #ifdef MULTIBYTE_CHARS
1956       wchar_t wc;
1957       int char_len;
1958
1959       char_len = local_mbtowc (&wc, (const char *)str, limit - str);
1960       if (char_len == -1)
1961         {
1962           cpp_error (pfile, DL_WARNING,
1963                      "ignoring invalid multibyte character");
1964           c = *str++;
1965         }
1966       else
1967         {
1968           str += char_len;
1969           c = wc;
1970         }
1971 #else
1972       c = *str++;
1973 #endif
1974
1975       if (c == '\\')
1976         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1977
1978 #ifdef MAP_CHARACTER
1979       if (ISPRINT (c))
1980         c = MAP_CHARACTER (c);
1981 #endif
1982
1983       chars_seen++;
1984
1985       /* Truncate the character, scale the result and merge the two.  */
1986       c &= mask;
1987       if (width < BITS_PER_CPPCHAR_T)
1988         result = (result << width) | c;
1989       else
1990         result = c;
1991     }
1992
1993   if (chars_seen == 0)
1994     cpp_error (pfile, DL_ERROR, "empty character constant");
1995   else if (chars_seen > 1)
1996     {
1997       /* Multichar charconsts are of type int and therefore signed.  */
1998       unsigned_p = 0;
1999
2000       if (chars_seen > max_chars)
2001         {
2002           chars_seen = max_chars;
2003           cpp_error (pfile, DL_WARNING,
2004                      "character constant too long for its type");
2005         }
2006       else if (CPP_OPTION (pfile, warn_multichar))
2007         cpp_error (pfile, DL_WARNING, "multi-character character constant");
2008     }
2009
2010   /* Sign-extend or truncate the constant to cppchar_t.  The value is
2011      in WIDTH bits, but for multi-char charconsts it's value is the
2012      full target type's width.  */
2013   if (chars_seen > 1)
2014     width *= max_chars;
2015   if (width < BITS_PER_CPPCHAR_T)
2016     {
2017       mask = ((cppchar_t) 1 << width) - 1;
2018       if (unsigned_p || !(result & (1 << (width - 1))))
2019         result &= mask;
2020       else
2021         result |= ~mask;
2022     }
2023
2024   *pchars_seen = chars_seen;
2025   *unsignedp = unsigned_p;
2026   return result;
2027 }
2028
2029 /* Memory buffers.  Changing these three constants can have a dramatic
2030    effect on performance.  The values here are reasonable defaults,
2031    but might be tuned.  If you adjust them, be sure to test across a
2032    range of uses of cpplib, including heavy nested function-like macro
2033    expansion.  Also check the change in peak memory usage (NJAMD is a
2034    good tool for this).  */
2035 #define MIN_BUFF_SIZE 8000
2036 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2037 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2038         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2039
2040 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2041   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2042 #endif
2043
2044 /* Create a new allocation buffer.  Place the control block at the end
2045    of the buffer, so that buffer overflows will cause immediate chaos.  */
2046 static _cpp_buff *
2047 new_buff (len)
2048      size_t len;
2049 {
2050   _cpp_buff *result;
2051   unsigned char *base;
2052
2053   if (len < MIN_BUFF_SIZE)
2054     len = MIN_BUFF_SIZE;
2055   len = CPP_ALIGN (len);
2056
2057   base = xmalloc (len + sizeof (_cpp_buff));
2058   result = (_cpp_buff *) (base + len);
2059   result->base = base;
2060   result->cur = base;
2061   result->limit = base + len;
2062   result->next = NULL;
2063   return result;
2064 }
2065
2066 /* Place a chain of unwanted allocation buffers on the free list.  */
2067 void
2068 _cpp_release_buff (pfile, buff)
2069      cpp_reader *pfile;
2070      _cpp_buff *buff;
2071 {
2072   _cpp_buff *end = buff;
2073
2074   while (end->next)
2075     end = end->next;
2076   end->next = pfile->free_buffs;
2077   pfile->free_buffs = buff;
2078 }
2079
2080 /* Return a free buffer of size at least MIN_SIZE.  */
2081 _cpp_buff *
2082 _cpp_get_buff (pfile, min_size)
2083      cpp_reader *pfile;
2084      size_t min_size;
2085 {
2086   _cpp_buff *result, **p;
2087
2088   for (p = &pfile->free_buffs;; p = &(*p)->next)
2089     {
2090       size_t size;
2091
2092       if (*p == NULL)
2093         return new_buff (min_size);
2094       result = *p;
2095       size = result->limit - result->base;
2096       /* Return a buffer that's big enough, but don't waste one that's
2097          way too big.  */
2098       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2099         break;
2100     }
2101
2102   *p = result->next;
2103   result->next = NULL;
2104   result->cur = result->base;
2105   return result;
2106 }
2107
2108 /* Creates a new buffer with enough space to hold the uncommitted
2109    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2110    the excess bytes to the new buffer.  Chains the new buffer after
2111    BUFF, and returns the new buffer.  */
2112 _cpp_buff *
2113 _cpp_append_extend_buff (pfile, buff, min_extra)
2114      cpp_reader *pfile;
2115      _cpp_buff *buff;
2116      size_t min_extra;
2117 {
2118   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2119   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2120
2121   buff->next = new_buff;
2122   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2123   return new_buff;
2124 }
2125
2126 /* Creates a new buffer with enough space to hold the uncommitted
2127    remaining bytes of the buffer pointed to by BUFF, and at least
2128    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2129    Chains the new buffer before the buffer pointed to by BUFF, and
2130    updates the pointer to point to the new buffer.  */
2131 void
2132 _cpp_extend_buff (pfile, pbuff, min_extra)
2133      cpp_reader *pfile;
2134      _cpp_buff **pbuff;
2135      size_t min_extra;
2136 {
2137   _cpp_buff *new_buff, *old_buff = *pbuff;
2138   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2139
2140   new_buff = _cpp_get_buff (pfile, size);
2141   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2142   new_buff->next = old_buff;
2143   *pbuff = new_buff;
2144 }
2145
2146 /* Free a chain of buffers starting at BUFF.  */
2147 void
2148 _cpp_free_buff (buff)
2149      _cpp_buff *buff;
2150 {
2151   _cpp_buff *next;
2152
2153   for (; buff; buff = next)
2154     {
2155       next = buff->next;
2156       free (buff->base);
2157     }
2158 }
2159
2160 /* Allocate permanent, unaligned storage of length LEN.  */
2161 unsigned char *
2162 _cpp_unaligned_alloc (pfile, len)
2163      cpp_reader *pfile;
2164      size_t len;
2165 {
2166   _cpp_buff *buff = pfile->u_buff;
2167   unsigned char *result = buff->cur;
2168
2169   if (len > (size_t) (buff->limit - result))
2170     {
2171       buff = _cpp_get_buff (pfile, len);
2172       buff->next = pfile->u_buff;
2173       pfile->u_buff = buff;
2174       result = buff->cur;
2175     }
2176
2177   buff->cur = result + len;
2178   return result;
2179 }
2180
2181 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2182    That buffer is used for growing allocations when saving macro
2183    replacement lists in a #define, and when parsing an answer to an
2184    assertion in #assert, #unassert or #if (and therefore possibly
2185    whilst expanding macros).  It therefore must not be used by any
2186    code that they might call: specifically the lexer and the guts of
2187    the macro expander.
2188
2189    All existing other uses clearly fit this restriction: storing
2190    registered pragmas during initialization.  */
2191 unsigned char *
2192 _cpp_aligned_alloc (pfile, len)
2193      cpp_reader *pfile;
2194      size_t len;
2195 {
2196   _cpp_buff *buff = pfile->a_buff;
2197   unsigned char *result = buff->cur;
2198
2199   if (len > (size_t) (buff->limit - result))
2200     {
2201       buff = _cpp_get_buff (pfile, len);
2202       buff->next = pfile->a_buff;
2203       pfile->a_buff = buff;
2204       result = buff->cur;
2205     }
2206
2207   buff->cur = result + len;
2208   return result;
2209 }