libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Per Bothner, 1994-95.
   5    Based on CCCP program by Paul Rubin, June 1986
   6    Adapted to ANSI C, Richard Stallman, Jan 1987
   7    Broken out to separate file, Zack Weinberg, Mar 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 3, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "internal.h"
  27
  28 enum spell_type
  29 {
  30   SPELL_OPERATOR = 0,
  31   SPELL_IDENT,
  32   SPELL_LITERAL,
  33   SPELL_NONE
  34 };
  35
  36 struct token_spelling
  37 {
  38   enum spell_type category;
  39   const unsigned char *name;
  40 };
  41
  42 static const unsigned char *const digraph_spellings[] =
  43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  44
  45 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  46 #define TK(e, s) { SPELL_ ## s,    UC #e },
  47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  48 #undef OP
  49 #undef TK
  50
  51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  53
  54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  55 static int skip_line_comment (cpp_reader *);
  56 static void skip_whitespace (cpp_reader *, cppchar_t);
  57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  59 static void store_comment (cpp_reader *, cpp_token *);
  60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  61                             unsigned int, enum cpp_ttype);
  62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  63 static int name_p (cpp_reader *, const cpp_string *);
  64 static tokenrun *next_tokenrun (tokenrun *);
  65
  66 static _cpp_buff *new_buff (size_t);
  67
  68
  69 /* Utility routine:
  70
  71    Compares, the token TOKEN to the NUL-terminated string STRING.
  72    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  73 int
  74 cpp_ideq (const cpp_token *token, const char *string)
  75 {
  76   if (token->type != CPP_NAME)
  77     return 0;
  78
  79   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  80 }
  81
  82 /* Record a note TYPE at byte POS into the current cleaned logical
  83    line.  */
  84 static void
  85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  86 {
  87   if (buffer->notes_used == buffer->notes_cap)
  88     {
  89       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  90       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  91                                   buffer->notes_cap);
  92     }
  93
  94   buffer->notes[buffer->notes_used].pos = pos;
  95   buffer->notes[buffer->notes_used].type = type;
  96   buffer->notes_used++;
  97 }
  98
  99 /* Returns with a logical line that contains no escaped newlines or
 100    trigraphs.  This is a time-critical inner loop.  */
 101 void
 102 _cpp_clean_line (cpp_reader *pfile)
 103 {
 104   cpp_buffer *buffer;
 105   const uchar *s;
 106   uchar c, *d, *p;
 107
 108   buffer = pfile->buffer;
 109   buffer->cur_note = buffer->notes_used = 0;
 110   buffer->cur = buffer->line_base = buffer->next_line;
 111   buffer->need_line = false;
 112   s = buffer->next_line - 1;
 113
 114   if (!buffer->from_stage3)
 115     {
 116       const uchar *pbackslash = NULL;
 117
 118       /* Short circuit for the common case of an un-escaped line with
 119          no trigraphs.  The primary win here is by not writing any
 120          data back to memory until we have to.  */
 121       for (;;)
 122         {
 123           c = *++s;
 124           if (__builtin_expect (c == '\n', false)
 125               || __builtin_expect (c == '\r', false))
 126             {
 127               d = (uchar *) s;
 128
 129               if (__builtin_expect (s == buffer->rlimit, false))
 130                 goto done;
 131
 132               /* DOS line ending? */
 133               if (__builtin_expect (c == '\r', false)
 134                   && s[1] == '\n')
 135                 {
 136                   s++;
 137                   if (s == buffer->rlimit)
 138                     goto done;
 139                 }
 140
 141               if (__builtin_expect (pbackslash == NULL, true))
 142                 goto done;
 143
 144               /* Check for escaped newline.  */
 145               p = d;
 146               while (is_nvspace (p[-1]))
 147                 p--;
 148               if (p - 1 != pbackslash)
 149                 goto done;
 150
 151               /* Have an escaped newline; process it and proceed to
 152                  the slow path.  */
 153               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 154               d = p - 2;
 155               buffer->next_line = p - 1;
 156               break;
 157             }
 158           if (__builtin_expect (c == '\\', false))
 159             pbackslash = s;
 160           else if (__builtin_expect (c == '?', false)
 161                    && __builtin_expect (s[1] == '?', false)
 162                    && _cpp_trigraph_map[s[2]])
 163             {
 164               /* Have a trigraph.  We may or may not have to convert
 165                  it.  Add a line note regardless, for -Wtrigraphs.  */
 166               add_line_note (buffer, s, s[2]);
 167               if (CPP_OPTION (pfile, trigraphs))
 168                 {
 169                   /* We do, and that means we have to switch to the
 170                      slow path.  */
 171                   d = (uchar *) s;
 172                   *d = _cpp_trigraph_map[s[2]];
 173                   s += 2;
 174                   break;
 175                 }
 176             }
 177         }
 178
 179
 180       for (;;)
 181         {
 182           c = *++s;
 183           *++d = c;
 184
 185           if (c == '\n' || c == '\r')
 186             {
 187                   /* Handle DOS line endings.  */
 188               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 189                 s++;
 190               if (s == buffer->rlimit)
 191                 break;
 192
 193               /* Escaped?  */
 194               p = d;
 195               while (p != buffer->next_line && is_nvspace (p[-1]))
 196                 p--;
 197               if (p == buffer->next_line || p[-1] != '\\')
 198                 break;
 199
 200               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 201               d = p - 2;
 202               buffer->next_line = p - 1;
 203             }
 204           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 205             {
 206               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 207               add_line_note (buffer, d, s[2]);
 208               if (CPP_OPTION (pfile, trigraphs))
 209                 {
 210                   *d = _cpp_trigraph_map[s[2]];
 211                   s += 2;
 212                 }
 213             }
 214         }
 215     }
 216   else
 217     {
 218       do
 219         s++;
 220       while (*s != '\n' && *s != '\r');
 221       d = (uchar *) s;
 222
 223       /* Handle DOS line endings.  */
 224       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 225         s++;
 226     }
 227
 228  done:
 229   *d = '\n';
 230   /* A sentinel note that should never be processed.  */
 231   add_line_note (buffer, d + 1, '\n');
 232   buffer->next_line = s + 1;
 233 }
 234
 235 /* Return true if the trigraph indicated by NOTE should be warned
 236    about in a comment.  */
 237 static bool
 238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 239 {
 240   const uchar *p;
 241
 242   /* Within comments we don't warn about trigraphs, unless the
 243      trigraph forms an escaped newline, as that may change
 244      behavior.  */
 245   if (note->type != '/')
 246     return false;
 247
 248   /* If -trigraphs, then this was an escaped newline iff the next note
 249      is coincident.  */
 250   if (CPP_OPTION (pfile, trigraphs))
 251     return note[1].pos == note->pos;
 252
 253   /* Otherwise, see if this forms an escaped newline.  */
 254   p = note->pos + 3;
 255   while (is_nvspace (*p))
 256     p++;
 257
 258   /* There might have been escaped newlines between the trigraph and the
 259      newline we found.  Hence the position test.  */
 260   return (*p == '\n' && p < note[1].pos);
 261 }
 262
 263 /* Process the notes created by add_line_note as far as the current
 264    location.  */
 265 void
 266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 267 {
 268   cpp_buffer *buffer = pfile->buffer;
 269
 270   for (;;)
 271     {
 272       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 273       unsigned int col;
 274
 275       if (note->pos > buffer->cur)
 276         break;
 277
 278       buffer->cur_note++;
 279       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 280
 281       if (note->type == '\\' || note->type == ' ')
 282         {
 283           if (note->type == ' ' && !in_comment)
 284             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 285                                  "backslash and newline separated by space");
 286
 287           if (buffer->next_line > buffer->rlimit)
 288             {
 289               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 290                                    "backslash-newline at end of file");
 291               /* Prevent "no newline at end of file" warning.  */
 292               buffer->next_line = buffer->rlimit;
 293             }
 294
 295           buffer->line_base = note->pos;
 296           CPP_INCREMENT_LINE (pfile, 0);
 297         }
 298       else if (_cpp_trigraph_map[note->type])
 299         {
 300           if (CPP_OPTION (pfile, warn_trigraphs)
 301               && (!in_comment || warn_in_comment (pfile, note)))
 302             {
 303               if (CPP_OPTION (pfile, trigraphs))
 304                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 305                                      "trigraph ??%c converted to %c",
 306                                      note->type,
 307                                      (int) _cpp_trigraph_map[note->type]);
 308               else
 309                 {
 310                   cpp_error_with_line
 311                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 312                      "trigraph ??%c ignored, use -trigraphs to enable",
 313                      note->type);
 314                 }
 315             }
 316         }
 317       else
 318         abort ();
 319     }
 320 }
 321
 322 /* Skip a C-style block comment.  We find the end of the comment by
 323    seeing if an asterisk is before every '/' we encounter.  Returns
 324    nonzero if comment terminated by EOF, zero otherwise.
 325
 326    Buffer->cur points to the initial asterisk of the comment.  */
 327 bool
 328 _cpp_skip_block_comment (cpp_reader *pfile)
 329 {
 330   cpp_buffer *buffer = pfile->buffer;
 331   const uchar *cur = buffer->cur;
 332   uchar c;
 333
 334   cur++;
 335   if (*cur == '/')
 336     cur++;
 337
 338   for (;;)
 339     {
 340       /* People like decorating comments with '*', so check for '/'
 341          instead for efficiency.  */
 342       c = *cur++;
 343
 344       if (c == '/')
 345         {
 346           if (cur[-2] == '*')
 347             break;
 348
 349           /* Warn about potential nested comments, but not if the '/'
 350              comes immediately before the true comment delimiter.
 351              Don't bother to get it right across escaped newlines.  */
 352           if (CPP_OPTION (pfile, warn_comments)
 353               && cur[0] == '*' && cur[1] != '/')
 354             {
 355               buffer->cur = cur;
 356               cpp_error_with_line (pfile, CPP_DL_WARNING,
 357                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 358                                    "\"/*\" within comment");
 359             }
 360         }
 361       else if (c == '\n')
 362         {
 363           unsigned int cols;
 364           buffer->cur = cur - 1;
 365           _cpp_process_line_notes (pfile, true);
 366           if (buffer->next_line >= buffer->rlimit)
 367             return true;
 368           _cpp_clean_line (pfile);
 369
 370           cols = buffer->next_line - buffer->line_base;
 371           CPP_INCREMENT_LINE (pfile, cols);
 372
 373           cur = buffer->cur;
 374         }
 375     }
 376
 377   buffer->cur = cur;
 378   _cpp_process_line_notes (pfile, true);
 379   return false;
 380 }
 381
 382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 383    terminating newline.  Handles escaped newlines.  Returns nonzero
 384    if a multiline comment.  */
 385 static int
 386 skip_line_comment (cpp_reader *pfile)
 387 {
 388   cpp_buffer *buffer = pfile->buffer;
 389   source_location orig_line = pfile->line_table->highest_line;
 390
 391   while (*buffer->cur != '\n')
 392     buffer->cur++;
 393
 394   _cpp_process_line_notes (pfile, true);
 395   return orig_line != pfile->line_table->highest_line;
 396 }
 397
 398 /* Skips whitespace, saving the next non-whitespace character.  */
 399 static void
 400 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 401 {
 402   cpp_buffer *buffer = pfile->buffer;
 403   bool saw_NUL = false;
 404
 405   do
 406     {
 407       /* Horizontal space always OK.  */
 408       if (c == ' ' || c == '\t')
 409         ;
 410       /* Just \f \v or \0 left.  */
 411       else if (c == '\0')
 412         saw_NUL = true;
 413       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 414         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 415                              CPP_BUF_COL (buffer),
 416                              "%s in preprocessing directive",
 417                              c == '\f' ? "form feed" : "vertical tab");
 418
 419       c = *buffer->cur++;
 420     }
 421   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 422   while (is_nvspace (c));
 423
 424   if (saw_NUL)
 425     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 426
 427   buffer->cur--;
 428 }
 429
 430 /* See if the characters of a number token are valid in a name (no
 431    '.', '+' or '-').  */
 432 static int
 433 name_p (cpp_reader *pfile, const cpp_string *string)
 434 {
 435   unsigned int i;
 436
 437   for (i = 0; i < string->len; i++)
 438     if (!is_idchar (string->text[i]))
 439       return 0;
 440
 441   return 1;
 442 }
 443
 444 /* After parsing an identifier or other sequence, produce a warning about
 445    sequences not in NFC/NFKC.  */
 446 static void
 447 warn_about_normalization (cpp_reader *pfile,
 448                           const cpp_token *token,
 449                           const struct normalize_state *s)
 450 {
 451   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 452       && !pfile->state.skipping)
 453     {
 454       /* Make sure that the token is printed using UCNs, even
 455          if we'd otherwise happily print UTF-8.  */
 456       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 457       size_t sz;
 458
 459       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 460       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 461         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 462                              "`%.*s' is not in NFKC", (int) sz, buf);
 463       else
 464         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 465                              "`%.*s' is not in NFC", (int) sz, buf);
 466     }
 467 }
 468
 469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 470    an identifier.  FIRST is TRUE if this starts an identifier.  */
 471 static bool
 472 forms_identifier_p (cpp_reader *pfile, int first,
 473                     struct normalize_state *state)
 474 {
 475   cpp_buffer *buffer = pfile->buffer;
 476
 477   if (*buffer->cur == '$')
 478     {
 479       if (!CPP_OPTION (pfile, dollars_in_ident))
 480         return false;
 481
 482       buffer->cur++;
 483       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 484         {
 485           CPP_OPTION (pfile, warn_dollars) = 0;
 486           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 487         }
 488
 489       return true;
 490     }
 491
 492   /* Is this a syntactically valid UCN?  */
 493   if (CPP_OPTION (pfile, extended_identifiers)
 494       && *buffer->cur == '\\'
 495       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 496     {
 497       buffer->cur += 2;
 498       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 499                           state))
 500         return true;
 501       buffer->cur -= 2;
 502     }
 503
 504   return false;
 505 }
 506
 507 /* Helper function to get the cpp_hashnode of the identifier BASE.  */
 508 static cpp_hashnode *
 509 lex_identifier_intern (cpp_reader *pfile, const uchar *base)
 510 {
 511   cpp_hashnode *result;
 512   const uchar *cur;
 513   unsigned int len;
 514   unsigned int hash = HT_HASHSTEP (0, *base);
 515
 516   cur = base + 1;
 517   while (ISIDNUM (*cur))
 518     {
 519       hash = HT_HASHSTEP (hash, *cur);
 520       cur++;
 521     }
 522   len = cur - base;
 523   hash = HT_HASHFINISH (hash, len);
 524   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 525                                               base, len, hash, HT_ALLOC));
 526
 527   /* Rarely, identifiers require diagnostics when lexed.  */
 528   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 529                         && !pfile->state.skipping, 0))
 530     {
 531       /* It is allowed to poison the same identifier twice.  */
 532       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 533         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 534                    NODE_NAME (result));
 535
 536       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 537          replacement list of a variadic macro.  */
 538       if (result == pfile->spec_nodes.n__VA_ARGS__
 539           && !pfile->state.va_args_ok)
 540         cpp_error (pfile, CPP_DL_PEDWARN,
 541                    "__VA_ARGS__ can only appear in the expansion"
 542                    " of a C99 variadic macro");
 543
 544       /* For -Wc++-compat, warn about use of C++ named operators.  */
 545       if (result->flags & NODE_WARN_OPERATOR)
 546         cpp_error (pfile, CPP_DL_WARNING,
 547                    "identifier \"%s\" is a special operator name in C++",
 548                    NODE_NAME (result));
 549     }
 550
 551   return result;
 552 }
 553
 554 /* Get the cpp_hashnode of an identifier specified by NAME in
 555    the current cpp_reader object.  If none is found, NULL is returned.  */
 556 cpp_hashnode *
 557 _cpp_lex_identifier (cpp_reader *pfile, const char *name)
 558 {
 559   cpp_hashnode *result;
 560   result = lex_identifier_intern (pfile, (uchar *) name);
 561   return result;
 562 }
 563
 564 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 565 static cpp_hashnode *
 566 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 567                 struct normalize_state *nst)
 568 {
 569   cpp_hashnode *result;
 570   const uchar *cur;
 571   unsigned int len;
 572   unsigned int hash = HT_HASHSTEP (0, *base);
 573
 574   cur = pfile->buffer->cur;
 575   if (! starts_ucn)
 576     while (ISIDNUM (*cur))
 577       {
 578         hash = HT_HASHSTEP (hash, *cur);
 579         cur++;
 580       }
 581   pfile->buffer->cur = cur;
 582   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 583     {
 584       /* Slower version for identifiers containing UCNs (or $).  */
 585       do {
 586         while (ISIDNUM (*pfile->buffer->cur))
 587           {
 588             pfile->buffer->cur++;
 589             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 590           }
 591       } while (forms_identifier_p (pfile, false, nst));
 592       result = _cpp_interpret_identifier (pfile, base,
 593                                           pfile->buffer->cur - base);
 594     }
 595   else
 596     {
 597       len = cur - base;
 598       hash = HT_HASHFINISH (hash, len);
 599
 600       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 601                                                   base, len, hash, HT_ALLOC));
 602     }
 603
 604   /* Rarely, identifiers require diagnostics when lexed.  */
 605   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 606                         && !pfile->state.skipping, 0))
 607     {
 608       /* It is allowed to poison the same identifier twice.  */
 609       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 610         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 611                    NODE_NAME (result));
 612
 613       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 614          replacement list of a variadic macro.  */
 615       if (result == pfile->spec_nodes.n__VA_ARGS__
 616           && !pfile->state.va_args_ok)
 617         cpp_error (pfile, CPP_DL_PEDWARN,
 618                    "__VA_ARGS__ can only appear in the expansion"
 619                    " of a C99 variadic macro");
 620
 621       /* For -Wc++-compat, warn about use of C++ named operators.  */
 622       if (result->flags & NODE_WARN_OPERATOR)
 623         cpp_error (pfile, CPP_DL_WARNING,
 624                    "identifier \"%s\" is a special operator name in C++",
 625                    NODE_NAME (result));
 626     }
 627
 628   return result;
 629 }
 630
 631 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 632 static void
 633 lex_number (cpp_reader *pfile, cpp_string *number,
 634             struct normalize_state *nst)
 635 {
 636   const uchar *cur;
 637   const uchar *base;
 638   uchar *dest;
 639
 640   base = pfile->buffer->cur - 1;
 641   do
 642     {
 643       cur = pfile->buffer->cur;
 644
 645       /* N.B. ISIDNUM does not include $.  */
 646       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 647         {
 648           cur++;
 649           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 650         }
 651
 652       pfile->buffer->cur = cur;
 653     }
 654   while (forms_identifier_p (pfile, false, nst));
 655
 656   number->len = cur - base;
 657   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 658   memcpy (dest, base, number->len);
 659   dest[number->len] = '\0';
 660   number->text = dest;
 661 }
 662
 663 /* Create a token of type TYPE with a literal spelling.  */
 664 static void
 665 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 666                 unsigned int len, enum cpp_ttype type)
 667 {
 668   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 669
 670   memcpy (dest, base, len);
 671   dest[len] = '\0';
 672   token->type = type;
 673   token->val.str.len = len;
 674   token->val.str.text = dest;
 675 }
 676
 677 /* Lexes a raw string.  The stored string contains the spelling, including
 678    double quotes, delimiter string, '[' and ']', any leading
 679    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
 680    literal, or CPP_OTHER if it was not properly terminated.
 681
 682    The spelling is NUL-terminated, but it is not guaranteed that this
 683    is the first NUL since embedded NULs are preserved.  */
 684
 685 static void
 686 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 687                 const uchar *cur)
 688 {
 689   source_location saw_NUL = 0;
 690   const uchar *raw_prefix;
 691   unsigned int raw_prefix_len = 0;
 692   enum cpp_ttype type;
 693   size_t total_len = 0;
 694   _cpp_buff *first_buff = NULL, *last_buff = NULL;
 695
 696   type = (*base == 'L' ? CPP_WSTRING :
 697           *base == 'U' ? CPP_STRING32 :
 698           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 699           : CPP_STRING);
 700
 701   raw_prefix = cur + 1;
 702   while (raw_prefix_len < 16)
 703     {
 704       switch (raw_prefix[raw_prefix_len])
 705         {
 706         case ' ': case '[': case ']': case '\t':
 707         case '\v': case '\f': case '\n': default:
 708           break;
 709         /* Basic source charset except the above chars.  */
 710         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 711         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 712         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 713         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 714         case 'y': case 'z':
 715         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 716         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 717         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 718         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 719         case 'Y': case 'Z':
 720         case '0': case '1': case '2': case '3': case '4': case '5':
 721         case '6': case '7': case '8': case '9':
 722         case '_': case '{': case '}': case '#': case '(': case ')':
 723         case '<': case '>': case '%': case ':': case ';': case '.':
 724         case '?': case '*': case '+': case '-': case '/': case '^':
 725         case '&': case '|': case '~': case '!': case '=': case ',':
 726         case '\\': case '"': case '\'':
 727           raw_prefix_len++;
 728           continue;
 729         }
 730       break;
 731     }
 732
 733   if (raw_prefix[raw_prefix_len] != '[')
 734     {
 735       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
 736                 + 1;
 737       if (raw_prefix_len == 16)
 738         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 739                              "raw string delimiter longer than 16 characters");
 740       else
 741         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 742                              "invalid character '%c' in raw string delimiter",
 743                              (int) raw_prefix[raw_prefix_len]);
 744       pfile->buffer->cur = raw_prefix - 1;
 745       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
 746       return;
 747     }
 748
 749   cur = raw_prefix + raw_prefix_len + 1;
 750   for (;;)
 751     {
 752       cppchar_t c = *cur++;
 753
 754       if (c == ']'
 755           && strncmp ((const char *) cur, (const char *) raw_prefix,
 756                       raw_prefix_len) == 0
 757           && cur[raw_prefix_len] == '"')
 758         {
 759           cur += raw_prefix_len + 1;
 760           break;
 761         }
 762       else if (c == '\n')
 763         {
 764           if (pfile->state.in_directive
 765               || pfile->state.parsing_args
 766               || pfile->state.in_deferred_pragma)
 767             {
 768               cur--;
 769               type = CPP_OTHER;
 770               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
 771                                    "unterminated raw string");
 772               break;
 773             }
 774
 775           /* raw strings allow embedded non-escaped newlines, which
 776              complicates this routine a lot.  */
 777           if (first_buff == NULL)
 778             {
 779               total_len = cur - base;
 780               first_buff = last_buff = _cpp_get_buff (pfile, total_len);
 781               memcpy (BUFF_FRONT (last_buff), base, total_len);
 782               raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
 783               BUFF_FRONT (last_buff) += total_len;
 784             }
 785           else
 786             {
 787               size_t len = cur - base;
 788               size_t cur_len = len > BUFF_ROOM (last_buff)
 789                                ? BUFF_ROOM (last_buff) : len;
 790
 791               total_len += len;
 792               memcpy (BUFF_FRONT (last_buff), base, cur_len);
 793               BUFF_FRONT (last_buff) += cur_len;
 794               if (len > cur_len)
 795                 {
 796                   last_buff = _cpp_append_extend_buff (pfile, last_buff,
 797                                                        len - cur_len);
 798                   memcpy (BUFF_FRONT (last_buff), base + cur_len,
 799                           len - cur_len);
 800                   BUFF_FRONT (last_buff) += len - cur_len;
 801                 }
 802             }
 803
 804           if (pfile->buffer->cur < pfile->buffer->rlimit)
 805             CPP_INCREMENT_LINE (pfile, 0);
 806           pfile->buffer->need_line = true;
 807
 808           if (!_cpp_get_fresh_line (pfile))
 809             {
 810               source_location src_loc = token->src_loc;
 811               token->type = CPP_EOF;
 812               /* Tell the compiler the line number of the EOF token.  */
 813               token->src_loc = pfile->line_table->highest_line;
 814               token->flags = BOL;
 815               if (first_buff != NULL)
 816                 _cpp_release_buff (pfile, first_buff);
 817               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 818                                    "unterminated raw string");
 819               return;
 820             }
 821
 822           cur = base = pfile->buffer->cur;
 823         }
 824       else if (c == '\0' && !saw_NUL)
 825         LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
 826                                      CPP_BUF_COLUMN (pfile->buffer, cur));
 827     }
 828
 829   if (saw_NUL && !pfile->state.skipping)
 830     cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
 831                "null character(s) preserved in literal");
 832
 833   pfile->buffer->cur = cur;
 834   if (first_buff == NULL)
 835     create_literal (pfile, token, base, cur - base, type);
 836   else
 837     {
 838       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
 839
 840       token->type = type;
 841       token->val.str.len = total_len + (cur - base);
 842       token->val.str.text = dest;
 843       last_buff = first_buff;
 844       while (last_buff != NULL)
 845         {
 846           memcpy (dest, last_buff->base,
 847                   BUFF_FRONT (last_buff) - last_buff->base);
 848           dest += BUFF_FRONT (last_buff) - last_buff->base;
 849           last_buff = last_buff->next;
 850         }
 851       _cpp_release_buff (pfile, first_buff);
 852       memcpy (dest, base, cur - base);
 853       dest[cur - base] = '\0';
 854     }
 855 }
 856
 857 /* Lexes a string, character constant, or angle-bracketed header file
 858    name.  The stored string contains the spelling, including opening
 859    quote and any leading 'L', 'u', 'U' or 'u8' and optional
 860    'R' modifier.  It returns the type of the literal, or CPP_OTHER
 861    if it was not properly terminated, or CPP_LESS for an unterminated
 862    header name which must be relexed as normal tokens.
 863
 864    The spelling is NUL-terminated, but it is not guaranteed that this
 865    is the first NUL since embedded NULs are preserved.  */
 866 static void
 867 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 868 {
 869   bool saw_NUL = false;
 870   const uchar *cur;
 871   cppchar_t terminator;
 872   enum cpp_ttype type;
 873
 874   cur = base;
 875   terminator = *cur++;
 876   if (terminator == 'L' || terminator == 'U')
 877     terminator = *cur++;
 878   else if (terminator == 'u')
 879     {
 880       terminator = *cur++;
 881       if (terminator == '8')
 882         terminator = *cur++;
 883     }
 884   if (terminator == 'R')
 885     {
 886       lex_raw_string (pfile, token, base, cur);
 887       return;
 888     }
 889   if (terminator == '"')
 890     type = (*base == 'L' ? CPP_WSTRING :
 891             *base == 'U' ? CPP_STRING32 :
 892             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 893                          : CPP_STRING);
 894   else if (terminator == '\'')
 895     type = (*base == 'L' ? CPP_WCHAR :
 896             *base == 'U' ? CPP_CHAR32 :
 897             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
 898   else
 899     terminator = '>', type = CPP_HEADER_NAME;
 900
 901   for (;;)
 902     {
 903       cppchar_t c = *cur++;
 904
 905       /* In #include-style directives, terminators are not escapable.  */
 906       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 907         cur++;
 908       else if (c == terminator)
 909         break;
 910       else if (c == '\n')
 911         {
 912           cur--;
 913           /* Unmatched quotes always yield undefined behavior, but
 914              greedy lexing means that what appears to be an unterminated
 915              header name may actually be a legitimate sequence of tokens.  */
 916           if (terminator == '>')
 917             {
 918               token->type = CPP_LESS;
 919               return;
 920             }
 921           type = CPP_OTHER;
 922           break;
 923         }
 924       else if (c == '\0')
 925         saw_NUL = true;
 926     }
 927
 928   if (saw_NUL && !pfile->state.skipping)
 929     cpp_error (pfile, CPP_DL_WARNING,
 930                "null character(s) preserved in literal");
 931
 932   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
 933     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
 934                (int) terminator);
 935
 936   pfile->buffer->cur = cur;
 937   create_literal (pfile, token, base, cur - base, type);
 938 }
 939
 940 /* Return the comment table. The client may not make any assumption
 941    about the ordering of the table.  */
 942 cpp_comment_table *
 943 cpp_get_comments (cpp_reader *pfile)
 944 {
 945   return &pfile->comments;
 946 }
 947
 948 /* Append a comment to the end of the comment table. */
 949 static void
 950 store_comment (cpp_reader *pfile, cpp_token *token)
 951 {
 952   int len;
 953
 954   if (pfile->comments.allocated == 0)
 955     {
 956       pfile->comments.allocated = 256;
 957       pfile->comments.entries = (cpp_comment *) xmalloc
 958         (pfile->comments.allocated * sizeof (cpp_comment));
 959     }
 960
 961   if (pfile->comments.count == pfile->comments.allocated)
 962     {
 963       pfile->comments.allocated *= 2;
 964       pfile->comments.entries = (cpp_comment *) xrealloc
 965         (pfile->comments.entries,
 966          pfile->comments.allocated * sizeof (cpp_comment));
 967     }
 968
 969   len = token->val.str.len;
 970
 971   /* Copy comment. Note, token may not be NULL terminated. */
 972   pfile->comments.entries[pfile->comments.count].comment =
 973     (char *) xmalloc (sizeof (char) * (len + 1));
 974   memcpy (pfile->comments.entries[pfile->comments.count].comment,
 975           token->val.str.text, len);
 976   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
 977
 978   /* Set source location. */
 979   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
 980
 981   /* Increment the count of entries in the comment table. */
 982   pfile->comments.count++;
 983 }
 984
 985 /* The stored comment includes the comment start and any terminator.  */
 986 static void
 987 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 988               cppchar_t type)
 989 {
 990   unsigned char *buffer;
 991   unsigned int len, clen;
 992
 993   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 994
 995   /* C++ comments probably (not definitely) have moved past a new
 996      line, which we don't want to save in the comment.  */
 997   if (is_vspace (pfile->buffer->cur[-1]))
 998     len--;
 999
1000   /* If we are currently in a directive, then we need to store all
1001      C++ comments as C comments internally, and so we need to
1002      allocate a little extra space in that case.
1003
1004      Note that the only time we encounter a directive here is
1005      when we are saving comments in a "#define".  */
1006   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
1007
1008   buffer = _cpp_unaligned_alloc (pfile, clen);
1009
1010   token->type = CPP_COMMENT;
1011   token->val.str.len = clen;
1012   token->val.str.text = buffer;
1013
1014   buffer[0] = '/';
1015   memcpy (buffer + 1, from, len - 1);
1016
1017   /* Finish conversion to a C comment, if necessary.  */
1018   if (pfile->state.in_directive && type == '/')
1019     {
1020       buffer[1] = '*';
1021       buffer[clen - 2] = '*';
1022       buffer[clen - 1] = '/';
1023     }
1024
1025   /* Finally store this comment for use by clients of libcpp. */
1026   store_comment (pfile, token);
1027 }
1028
1029 /* Allocate COUNT tokens for RUN.  */
1030 void
1031 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1032 {
1033   run->base = XNEWVEC (cpp_token, count);
1034   run->limit = run->base + count;
1035   run->next = NULL;
1036 }
1037
1038 /* Returns the next tokenrun, or creates one if there is none.  */
1039 static tokenrun *
1040 next_tokenrun (tokenrun *run)
1041 {
1042   if (run->next == NULL)
1043     {
1044       run->next = XNEW (tokenrun);
1045       run->next->prev = run;
1046       _cpp_init_tokenrun (run->next, 250);
1047     }
1048
1049   return run->next;
1050 }
1051
1052 /* Look ahead in the input stream.  */
1053 const cpp_token *
1054 cpp_peek_token (cpp_reader *pfile, int index)
1055 {
1056   cpp_context *context = pfile->context;
1057   const cpp_token *peektok;
1058   int count;
1059
1060   /* First, scan through any pending cpp_context objects.  */
1061   while (context->prev)
1062     {
1063       ptrdiff_t sz = (context->direct_p
1064                       ? LAST (context).token - FIRST (context).token
1065                       : LAST (context).ptoken - FIRST (context).ptoken);
1066
1067       if (index < (int) sz)
1068         return (context->direct_p
1069                 ? FIRST (context).token + index
1070                 : *(FIRST (context).ptoken + index));
1071
1072       index -= (int) sz;
1073       context = context->prev;
1074     }
1075
1076   /* We will have to read some new tokens after all (and do so
1077      without invalidating preceding tokens).  */
1078   count = index;
1079   pfile->keep_tokens++;
1080
1081   do
1082     {
1083       peektok = _cpp_lex_token (pfile);
1084       if (peektok->type == CPP_EOF)
1085         return peektok;
1086     }
1087   while (index--);
1088
1089   _cpp_backup_tokens_direct (pfile, count + 1);
1090   pfile->keep_tokens--;
1091
1092   return peektok;
1093 }
1094
1095 /* Allocate a single token that is invalidated at the same time as the
1096    rest of the tokens on the line.  Has its line and col set to the
1097    same as the last lexed token, so that diagnostics appear in the
1098    right place.  */
1099 cpp_token *
1100 _cpp_temp_token (cpp_reader *pfile)
1101 {
1102   cpp_token *old, *result;
1103   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1104   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1105
1106   old = pfile->cur_token - 1;
1107   /* Any pre-existing lookaheads must not be clobbered.  */
1108   if (la)
1109     {
1110       if (sz <= la)
1111         {
1112           tokenrun *next = next_tokenrun (pfile->cur_run);
1113
1114           if (sz < la)
1115             memmove (next->base + 1, next->base,
1116                      (la - sz) * sizeof (cpp_token));
1117
1118           next->base[0] = pfile->cur_run->limit[-1];
1119         }
1120
1121       if (sz > 1)
1122         memmove (pfile->cur_token + 1, pfile->cur_token,
1123                  MIN (la, sz - 1) * sizeof (cpp_token));
1124     }
1125
1126   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1127     {
1128       pfile->cur_run = next_tokenrun (pfile->cur_run);
1129       pfile->cur_token = pfile->cur_run->base;
1130     }
1131
1132   result = pfile->cur_token++;
1133   result->src_loc = old->src_loc;
1134   return result;
1135 }
1136
1137 /* Lex a token into RESULT (external interface).  Takes care of issues
1138    like directive handling, token lookahead, multiple include
1139    optimization and skipping.  */
1140 const cpp_token *
1141 _cpp_lex_token (cpp_reader *pfile)
1142 {
1143   cpp_token *result;
1144
1145   for (;;)
1146     {
1147       if (pfile->cur_token == pfile->cur_run->limit)
1148         {
1149           pfile->cur_run = next_tokenrun (pfile->cur_run);
1150           pfile->cur_token = pfile->cur_run->base;
1151         }
1152       /* We assume that the current token is somewhere in the current
1153          run.  */
1154       if (pfile->cur_token < pfile->cur_run->base
1155           || pfile->cur_token >= pfile->cur_run->limit)
1156         abort ();
1157
1158       if (pfile->lookaheads)
1159         {
1160           pfile->lookaheads--;
1161           result = pfile->cur_token++;
1162         }
1163       else
1164         result = _cpp_lex_direct (pfile);
1165
1166       if (result->flags & BOL)
1167         {
1168           /* Is this a directive.  If _cpp_handle_directive returns
1169              false, it is an assembler #.  */
1170           if (result->type == CPP_HASH
1171               /* 6.10.3 p 11: Directives in a list of macro arguments
1172                  gives undefined behavior.  This implementation
1173                  handles the directive as normal.  */
1174               && pfile->state.parsing_args != 1)
1175             {
1176               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1177                 {
1178                   if (pfile->directive_result.type == CPP_PADDING)
1179                     continue;
1180                   result = &pfile->directive_result;
1181                 }
1182             }
1183           else if (pfile->state.in_deferred_pragma)
1184             result = &pfile->directive_result;
1185
1186           if (pfile->cb.line_change && !pfile->state.skipping)
1187             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1188         }
1189
1190       /* We don't skip tokens in directives.  */
1191       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1192         break;
1193
1194       /* Outside a directive, invalidate controlling macros.  At file
1195          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1196          get here and MI optimization works.  */
1197       pfile->mi_valid = false;
1198
1199       if (!pfile->state.skipping || result->type == CPP_EOF)
1200         break;
1201     }
1202
1203   return result;
1204 }
1205
1206 /* Returns true if a fresh line has been loaded.  */
1207 bool
1208 _cpp_get_fresh_line (cpp_reader *pfile)
1209 {
1210   int return_at_eof;
1211
1212   /* We can't get a new line until we leave the current directive.  */
1213   if (pfile->state.in_directive)
1214     return false;
1215
1216   for (;;)
1217     {
1218       cpp_buffer *buffer = pfile->buffer;
1219
1220       if (!buffer->need_line)
1221         return true;
1222
1223       if (buffer->next_line < buffer->rlimit)
1224         {
1225           _cpp_clean_line (pfile);
1226           return true;
1227         }
1228
1229       /* First, get out of parsing arguments state.  */
1230       if (pfile->state.parsing_args)
1231         return false;
1232
1233       /* End of buffer.  Non-empty files should end in a newline.  */
1234       if (buffer->buf != buffer->rlimit
1235           && buffer->next_line > buffer->rlimit
1236           && !buffer->from_stage3)
1237         {
1238           /* Clip to buffer size.  */
1239           buffer->next_line = buffer->rlimit;
1240         }
1241
1242       return_at_eof = buffer->return_at_eof;
1243       _cpp_pop_buffer (pfile);
1244       if (pfile->buffer == NULL || return_at_eof)
1245         return false;
1246     }
1247 }
1248
1249 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1250   do                                                    \
1251     {                                                   \
1252       result->type = ELSE_TYPE;                         \
1253       if (*buffer->cur == CHAR)                         \
1254         buffer->cur++, result->type = THEN_TYPE;        \
1255     }                                                   \
1256   while (0)
1257
1258 /* Lex a token into pfile->cur_token, which is also incremented, to
1259    get diagnostics pointing to the correct location.
1260
1261    Does not handle issues such as token lookahead, multiple-include
1262    optimization, directives, skipping etc.  This function is only
1263    suitable for use by _cpp_lex_token, and in special cases like
1264    lex_expansion_token which doesn't care for any of these issues.
1265
1266    When meeting a newline, returns CPP_EOF if parsing a directive,
1267    otherwise returns to the start of the token buffer if permissible.
1268    Returns the location of the lexed token.  */
1269 cpp_token *
1270 _cpp_lex_direct (cpp_reader *pfile)
1271 {
1272   cppchar_t c;
1273   cpp_buffer *buffer;
1274   const unsigned char *comment_start;
1275   cpp_token *result = pfile->cur_token++;
1276
1277  fresh_line:
1278   result->flags = 0;
1279   buffer = pfile->buffer;
1280   if (buffer->need_line)
1281     {
1282       if (pfile->state.in_deferred_pragma)
1283         {
1284           result->type = CPP_PRAGMA_EOL;
1285           pfile->state.in_deferred_pragma = false;
1286           if (!pfile->state.pragma_allow_expansion)
1287             pfile->state.prevent_expansion--;
1288           return result;
1289         }
1290       if (!_cpp_get_fresh_line (pfile))
1291         {
1292           result->type = CPP_EOF;
1293           if (!pfile->state.in_directive)
1294             {
1295               /* Tell the compiler the line number of the EOF token.  */
1296               result->src_loc = pfile->line_table->highest_line;
1297               result->flags = BOL;
1298             }
1299           return result;
1300         }
1301       if (!pfile->keep_tokens)
1302         {
1303           pfile->cur_run = &pfile->base_run;
1304           result = pfile->base_run.base;
1305           pfile->cur_token = result + 1;
1306         }
1307       result->flags = BOL;
1308       if (pfile->state.parsing_args == 2)
1309         result->flags |= PREV_WHITE;
1310     }
1311   buffer = pfile->buffer;
1312  update_tokens_line:
1313   result->src_loc = pfile->line_table->highest_line;
1314
1315  skipped_white:
1316   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1317       && !pfile->overlaid_buffer)
1318     {
1319       _cpp_process_line_notes (pfile, false);
1320       result->src_loc = pfile->line_table->highest_line;
1321     }
1322   c = *buffer->cur++;
1323
1324   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1325                                CPP_BUF_COLUMN (buffer, buffer->cur));
1326
1327   switch (c)
1328     {
1329     case ' ': case '\t': case '\f': case '\v': case '\0':
1330       result->flags |= PREV_WHITE;
1331       skip_whitespace (pfile, c);
1332       goto skipped_white;
1333
1334     case '\n':
1335       if (buffer->cur < buffer->rlimit)
1336         CPP_INCREMENT_LINE (pfile, 0);
1337       buffer->need_line = true;
1338       goto fresh_line;
1339
1340     case '0': case '1': case '2': case '3': case '4':
1341     case '5': case '6': case '7': case '8': case '9':
1342       {
1343         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1344         result->type = CPP_NUMBER;
1345         lex_number (pfile, &result->val.str, &nst);
1346         warn_about_normalization (pfile, result, &nst);
1347         break;
1348       }
1349
1350     case 'L':
1351     case 'u':
1352     case 'U':
1353     case 'R':
1354       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1355          wide strings or raw strings.  */
1356       if (c == 'L' || CPP_OPTION (pfile, uliterals))
1357         {
1358           if ((*buffer->cur == '\'' && c != 'R')
1359               || *buffer->cur == '"'
1360               || (*buffer->cur == 'R'
1361                   && c != 'R'
1362                   && buffer->cur[1] == '"'
1363                   && CPP_OPTION (pfile, uliterals))
1364               || (*buffer->cur == '8'
1365                   && c == 'u'
1366                   && (buffer->cur[1] == '"'
1367                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1368             {
1369               lex_string (pfile, result, buffer->cur - 1);
1370               break;
1371             }
1372         }
1373       /* Fall through.  */
1374
1375     case '_':
1376     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1377     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1378     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1379     case 's': case 't':           case 'v': case 'w': case 'x':
1380     case 'y': case 'z':
1381     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1382     case 'G': case 'H': case 'I': case 'J': case 'K':
1383     case 'M': case 'N': case 'O': case 'P': case 'Q':
1384     case 'S': case 'T':           case 'V': case 'W': case 'X':
1385     case 'Y': case 'Z':
1386       result->type = CPP_NAME;
1387       {
1388         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1389         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1390                                                 &nst);
1391         warn_about_normalization (pfile, result, &nst);
1392       }
1393
1394       /* Convert named operators to their proper types.  */
1395       if (result->val.node.node->flags & NODE_OPERATOR)
1396         {
1397           result->flags |= NAMED_OP;
1398           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1399         }
1400       break;
1401
1402     case '\'':
1403     case '"':
1404       lex_string (pfile, result, buffer->cur - 1);
1405       break;
1406
1407     case '/':
1408       /* A potential block or line comment.  */
1409       comment_start = buffer->cur;
1410       c = *buffer->cur;
1411
1412       if (c == '*')
1413         {
1414           if (_cpp_skip_block_comment (pfile))
1415             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1416         }
1417       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1418                             || cpp_in_system_header (pfile)))
1419         {
1420           /* Warn about comments only if pedantically GNUC89, and not
1421              in system headers.  */
1422           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1423               && ! buffer->warned_cplusplus_comments)
1424             {
1425               cpp_error (pfile, CPP_DL_PEDWARN,
1426                          "C++ style comments are not allowed in ISO C90");
1427               cpp_error (pfile, CPP_DL_PEDWARN,
1428                          "(this will be reported only once per input file)");
1429               buffer->warned_cplusplus_comments = 1;
1430             }
1431
1432           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1433             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1434         }
1435       else if (c == '=')
1436         {
1437           buffer->cur++;
1438           result->type = CPP_DIV_EQ;
1439           break;
1440         }
1441       else
1442         {
1443           result->type = CPP_DIV;
1444           break;
1445         }
1446
1447       if (!pfile->state.save_comments)
1448         {
1449           result->flags |= PREV_WHITE;
1450           goto update_tokens_line;
1451         }
1452
1453       /* Save the comment as a token in its own right.  */
1454       save_comment (pfile, result, comment_start, c);
1455       break;
1456
1457     case '<':
1458       if (pfile->state.angled_headers)
1459         {
1460           lex_string (pfile, result, buffer->cur - 1);
1461           if (result->type != CPP_LESS)
1462             break;
1463         }
1464
1465       result->type = CPP_LESS;
1466       if (*buffer->cur == '=')
1467         buffer->cur++, result->type = CPP_LESS_EQ;
1468       else if (*buffer->cur == '<')
1469         {
1470           buffer->cur++;
1471           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1472         }
1473       else if (CPP_OPTION (pfile, digraphs))
1474         {
1475           if (*buffer->cur == ':')
1476             {
1477               buffer->cur++;
1478               result->flags |= DIGRAPH;
1479               result->type = CPP_OPEN_SQUARE;
1480             }
1481           else if (*buffer->cur == '%')
1482             {
1483               buffer->cur++;
1484               result->flags |= DIGRAPH;
1485               result->type = CPP_OPEN_BRACE;
1486             }
1487         }
1488       break;
1489
1490     case '>':
1491       result->type = CPP_GREATER;
1492       if (*buffer->cur == '=')
1493         buffer->cur++, result->type = CPP_GREATER_EQ;
1494       else if (*buffer->cur == '>')
1495         {
1496           buffer->cur++;
1497           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1498         }
1499       break;
1500
1501     case '%':
1502       result->type = CPP_MOD;
1503       if (*buffer->cur == '=')
1504         buffer->cur++, result->type = CPP_MOD_EQ;
1505       else if (CPP_OPTION (pfile, digraphs))
1506         {
1507           if (*buffer->cur == ':')
1508             {
1509               buffer->cur++;
1510               result->flags |= DIGRAPH;
1511               result->type = CPP_HASH;
1512               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1513                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1514             }
1515           else if (*buffer->cur == '>')
1516             {
1517               buffer->cur++;
1518               result->flags |= DIGRAPH;
1519               result->type = CPP_CLOSE_BRACE;
1520             }
1521         }
1522       break;
1523
1524     case '.':
1525       result->type = CPP_DOT;
1526       if (ISDIGIT (*buffer->cur))
1527         {
1528           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1529           result->type = CPP_NUMBER;
1530           lex_number (pfile, &result->val.str, &nst);
1531           warn_about_normalization (pfile, result, &nst);
1532         }
1533       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1534         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1535       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1536         buffer->cur++, result->type = CPP_DOT_STAR;
1537       break;
1538
1539     case '+':
1540       result->type = CPP_PLUS;
1541       if (*buffer->cur == '+')
1542         buffer->cur++, result->type = CPP_PLUS_PLUS;
1543       else if (*buffer->cur == '=')
1544         buffer->cur++, result->type = CPP_PLUS_EQ;
1545       break;
1546
1547     case '-':
1548       result->type = CPP_MINUS;
1549       if (*buffer->cur == '>')
1550         {
1551           buffer->cur++;
1552           result->type = CPP_DEREF;
1553           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1554             buffer->cur++, result->type = CPP_DEREF_STAR;
1555         }
1556       else if (*buffer->cur == '-')
1557         buffer->cur++, result->type = CPP_MINUS_MINUS;
1558       else if (*buffer->cur == '=')
1559         buffer->cur++, result->type = CPP_MINUS_EQ;
1560       break;
1561
1562     case '&':
1563       result->type = CPP_AND;
1564       if (*buffer->cur == '&')
1565         buffer->cur++, result->type = CPP_AND_AND;
1566       else if (*buffer->cur == '=')
1567         buffer->cur++, result->type = CPP_AND_EQ;
1568       break;
1569
1570     case '|':
1571       result->type = CPP_OR;
1572       if (*buffer->cur == '|')
1573         buffer->cur++, result->type = CPP_OR_OR;
1574       else if (*buffer->cur == '=')
1575         buffer->cur++, result->type = CPP_OR_EQ;
1576       break;
1577
1578     case ':':
1579       result->type = CPP_COLON;
1580       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1581         buffer->cur++, result->type = CPP_SCOPE;
1582       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1583         {
1584           buffer->cur++;
1585           result->flags |= DIGRAPH;
1586           result->type = CPP_CLOSE_SQUARE;
1587         }
1588       break;
1589
1590     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1591     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1592     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1593     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1594     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1595
1596     case '?': result->type = CPP_QUERY; break;
1597     case '~': result->type = CPP_COMPL; break;
1598     case ',': result->type = CPP_COMMA; break;
1599     case '(': result->type = CPP_OPEN_PAREN; break;
1600     case ')': result->type = CPP_CLOSE_PAREN; break;
1601     case '[': result->type = CPP_OPEN_SQUARE; break;
1602     case ']': result->type = CPP_CLOSE_SQUARE; break;
1603     case '{': result->type = CPP_OPEN_BRACE; break;
1604     case '}': result->type = CPP_CLOSE_BRACE; break;
1605     case ';': result->type = CPP_SEMICOLON; break;
1606
1607       /* @ is a punctuator in Objective-C.  */
1608     case '@': result->type = CPP_ATSIGN; break;
1609
1610     case '$':
1611     case '\\':
1612       {
1613         const uchar *base = --buffer->cur;
1614         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1615
1616         if (forms_identifier_p (pfile, true, &nst))
1617           {
1618             result->type = CPP_NAME;
1619             result->val.node.node = lex_identifier (pfile, base, true, &nst);
1620             warn_about_normalization (pfile, result, &nst);
1621             break;
1622           }
1623         buffer->cur++;
1624       }
1625
1626     default:
1627       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1628       break;
1629     }
1630
1631   return result;
1632 }
1633
1634 /* An upper bound on the number of bytes needed to spell TOKEN.
1635    Does not include preceding whitespace.  */
1636 unsigned int
1637 cpp_token_len (const cpp_token *token)
1638 {
1639   unsigned int len;
1640
1641   switch (TOKEN_SPELL (token))
1642     {
1643     default:            len = 6;                                break;
1644     case SPELL_LITERAL: len = token->val.str.len;               break;
1645     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
1646     }
1647
1648   return len;
1649 }
1650
1651 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1652    Return the number of bytes read out of NAME.  (There are always
1653    10 bytes written to BUFFER.)  */
1654
1655 static size_t
1656 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1657 {
1658   int j;
1659   int ucn_len = 0;
1660   int ucn_len_c;
1661   unsigned t;
1662   unsigned long utf32;
1663
1664   /* Compute the length of the UTF-8 sequence.  */
1665   for (t = *name; t & 0x80; t <<= 1)
1666     ucn_len++;
1667
1668   utf32 = *name & (0x7F >> ucn_len);
1669   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1670     {
1671       utf32 = (utf32 << 6) | (*++name & 0x3F);
1672
1673       /* Ill-formed UTF-8.  */
1674       if ((*name & ~0x3F) != 0x80)
1675         abort ();
1676     }
1677
1678   *buffer++ = '\\';
1679   *buffer++ = 'U';
1680   for (j = 7; j >= 0; j--)
1681     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1682   return ucn_len;
1683 }
1684
1685 /* Given a token TYPE corresponding to a digraph, return a pointer to
1686    the spelling of the digraph.  */
1687 static const unsigned char *
1688 cpp_digraph2name (enum cpp_ttype type)
1689 {
1690   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1691 }
1692
1693 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1694    already contain the enough space to hold the token's spelling.
1695    Returns a pointer to the character after the last character written.
1696    FORSTRING is true if this is to be the spelling after translation
1697    phase 1 (this is different for UCNs).
1698    FIXME: Would be nice if we didn't need the PFILE argument.  */
1699 unsigned char *
1700 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1701                  unsigned char *buffer, bool forstring)
1702 {
1703   switch (TOKEN_SPELL (token))
1704     {
1705     case SPELL_OPERATOR:
1706       {
1707         const unsigned char *spelling;
1708         unsigned char c;
1709
1710         if (token->flags & DIGRAPH)
1711           spelling = cpp_digraph2name (token->type);
1712         else if (token->flags & NAMED_OP)
1713           goto spell_ident;
1714         else
1715           spelling = TOKEN_NAME (token);
1716
1717         while ((c = *spelling++) != '\0')
1718           *buffer++ = c;
1719       }
1720       break;
1721
1722     spell_ident:
1723     case SPELL_IDENT:
1724       if (forstring)
1725         {
1726           memcpy (buffer, NODE_NAME (token->val.node.node),
1727                   NODE_LEN (token->val.node.node));
1728           buffer += NODE_LEN (token->val.node.node);
1729         }
1730       else
1731         {
1732           size_t i;
1733           const unsigned char * name = NODE_NAME (token->val.node.node);
1734
1735           for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1736             if (name[i] & ~0x7F)
1737               {
1738                 i += utf8_to_ucn (buffer, name + i) - 1;
1739                 buffer += 10;
1740               }
1741             else
1742               *buffer++ = NODE_NAME (token->val.node.node)[i];
1743         }
1744       break;
1745
1746     case SPELL_LITERAL:
1747       memcpy (buffer, token->val.str.text, token->val.str.len);
1748       buffer += token->val.str.len;
1749       break;
1750
1751     case SPELL_NONE:
1752       cpp_error (pfile, CPP_DL_ICE,
1753                  "unspellable token %s", TOKEN_NAME (token));
1754       break;
1755     }
1756
1757   return buffer;
1758 }
1759
1760 /* Returns TOKEN spelt as a null-terminated string.  The string is
1761    freed when the reader is destroyed.  Useful for diagnostics.  */
1762 unsigned char *
1763 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1764 {
1765   unsigned int len = cpp_token_len (token) + 1;
1766   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1767
1768   end = cpp_spell_token (pfile, token, start, false);
1769   end[0] = '\0';
1770
1771   return start;
1772 }
1773
1774 /* Returns a pointer to a string which spells the token defined by
1775    TYPE and FLAGS.  Used by C front ends, which really should move to
1776    using cpp_token_as_text.  */
1777 const char *
1778 cpp_type2name (enum cpp_ttype type, unsigned char flags)
1779 {
1780   if (flags & DIGRAPH)
1781     return (const char *) cpp_digraph2name (type);
1782   else if (flags & NAMED_OP)
1783     return cpp_named_operator2name (type);
1784
1785   return (const char *) token_spellings[type].name;
1786 }
1787
1788 /* Writes the spelling of token to FP, without any preceding space.
1789    Separated from cpp_spell_token for efficiency - to avoid stdio
1790    double-buffering.  */
1791 void
1792 cpp_output_token (const cpp_token *token, FILE *fp)
1793 {
1794   switch (TOKEN_SPELL (token))
1795     {
1796     case SPELL_OPERATOR:
1797       {
1798         const unsigned char *spelling;
1799         int c;
1800
1801         if (token->flags & DIGRAPH)
1802           spelling = cpp_digraph2name (token->type);
1803         else if (token->flags & NAMED_OP)
1804           goto spell_ident;
1805         else
1806           spelling = TOKEN_NAME (token);
1807
1808         c = *spelling;
1809         do
1810           putc (c, fp);
1811         while ((c = *++spelling) != '\0');
1812       }
1813       break;
1814
1815     spell_ident:
1816     case SPELL_IDENT:
1817       {
1818         size_t i;
1819         const unsigned char * name = NODE_NAME (token->val.node.node);
1820
1821         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1822           if (name[i] & ~0x7F)
1823             {
1824               unsigned char buffer[10];
1825               i += utf8_to_ucn (buffer, name + i) - 1;
1826               fwrite (buffer, 1, 10, fp);
1827             }
1828           else
1829             fputc (NODE_NAME (token->val.node.node)[i], fp);
1830       }
1831       break;
1832
1833     case SPELL_LITERAL:
1834       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1835       break;
1836
1837     case SPELL_NONE:
1838       /* An error, most probably.  */
1839       break;
1840     }
1841 }
1842
1843 /* Compare two tokens.  */
1844 int
1845 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1846 {
1847   if (a->type == b->type && a->flags == b->flags)
1848     switch (TOKEN_SPELL (a))
1849       {
1850       default:                  /* Keep compiler happy.  */
1851       case SPELL_OPERATOR:
1852         /* token_no is used to track where multiple consecutive ##
1853            tokens were originally located.  */
1854         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1855       case SPELL_NONE:
1856         return (a->type != CPP_MACRO_ARG
1857                 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1858       case SPELL_IDENT:
1859         return a->val.node.node == b->val.node.node;
1860       case SPELL_LITERAL:
1861         return (a->val.str.len == b->val.str.len
1862                 && !memcmp (a->val.str.text, b->val.str.text,
1863                             a->val.str.len));
1864       }
1865
1866   return 0;
1867 }
1868
1869 /* Returns nonzero if a space should be inserted to avoid an
1870    accidental token paste for output.  For simplicity, it is
1871    conservative, and occasionally advises a space where one is not
1872    needed, e.g. "." and ".2".  */
1873 int
1874 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1875                  const cpp_token *token2)
1876 {
1877   enum cpp_ttype a = token1->type, b = token2->type;
1878   cppchar_t c;
1879
1880   if (token1->flags & NAMED_OP)
1881     a = CPP_NAME;
1882   if (token2->flags & NAMED_OP)
1883     b = CPP_NAME;
1884
1885   c = EOF;
1886   if (token2->flags & DIGRAPH)
1887     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1888   else if (token_spellings[b].category == SPELL_OPERATOR)
1889     c = token_spellings[b].name[0];
1890
1891   /* Quickly get everything that can paste with an '='.  */
1892   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1893     return 1;
1894
1895   switch (a)
1896     {
1897     case CPP_GREATER:   return c == '>';
1898     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
1899     case CPP_PLUS:      return c == '+';
1900     case CPP_MINUS:     return c == '-' || c == '>';
1901     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1902     case CPP_MOD:       return c == ':' || c == '>';
1903     case CPP_AND:       return c == '&';
1904     case CPP_OR:        return c == '|';
1905     case CPP_COLON:     return c == ':' || c == '>';
1906     case CPP_DEREF:     return c == '*';
1907     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1908     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1909     case CPP_NAME:      return ((b == CPP_NUMBER
1910                                  && name_p (pfile, &token2->val.str))
1911                                 || b == CPP_NAME
1912                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1913     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1914                                 || c == '.' || c == '+' || c == '-');
1915                                       /* UCNs */
1916     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1917                                  && b == CPP_NAME)
1918                                 || (CPP_OPTION (pfile, objc)
1919                                     && token1->val.str.text[0] == '@'
1920                                     && (b == CPP_NAME || b == CPP_STRING)));
1921     default:            break;
1922     }
1923
1924   return 0;
1925 }
1926
1927 /* Output all the remaining tokens on the current line, and a newline
1928    character, to FP.  Leading whitespace is removed.  If there are
1929    macros, special token padding is not performed.  */
1930 void
1931 cpp_output_line (cpp_reader *pfile, FILE *fp)
1932 {
1933   const cpp_token *token;
1934
1935   token = cpp_get_token (pfile);
1936   while (token->type != CPP_EOF)
1937     {
1938       cpp_output_token (token, fp);
1939       token = cpp_get_token (pfile);
1940       if (token->flags & PREV_WHITE)
1941         putc (' ', fp);
1942     }
1943
1944   putc ('\n', fp);
1945 }
1946
1947 /* Return a string representation of all the remaining tokens on the
1948    current line.  The result is allocated using xmalloc and must be
1949    freed by the caller.  */
1950 unsigned char *
1951 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1952 {
1953   const cpp_token *token;
1954   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1955   unsigned int alloced = 120 + out;
1956   unsigned char *result = (unsigned char *) xmalloc (alloced);
1957
1958   /* If DIR_NAME is empty, there are no initial contents.  */
1959   if (dir_name)
1960     {
1961       sprintf ((char *) result, "#%s ", dir_name);
1962       out += 2;
1963     }
1964
1965   token = cpp_get_token (pfile);
1966   while (token->type != CPP_EOF)
1967     {
1968       unsigned char *last;
1969       /* Include room for a possible space and the terminating nul.  */
1970       unsigned int len = cpp_token_len (token) + 2;
1971
1972       if (out + len > alloced)
1973         {
1974           alloced *= 2;
1975           if (out + len > alloced)
1976             alloced = out + len;
1977           result = (unsigned char *) xrealloc (result, alloced);
1978         }
1979
1980       last = cpp_spell_token (pfile, token, &result[out], 0);
1981       out = last - result;
1982
1983       token = cpp_get_token (pfile);
1984       if (token->flags & PREV_WHITE)
1985         result[out++] = ' ';
1986     }
1987
1988   result[out] = '\0';
1989   return result;
1990 }
1991
1992 /* Memory buffers.  Changing these three constants can have a dramatic
1993    effect on performance.  The values here are reasonable defaults,
1994    but might be tuned.  If you adjust them, be sure to test across a
1995    range of uses of cpplib, including heavy nested function-like macro
1996    expansion.  Also check the change in peak memory usage (NJAMD is a
1997    good tool for this).  */
1998 #define MIN_BUFF_SIZE 8000
1999 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2000 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2001         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2002
2003 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2004   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2005 #endif
2006
2007 /* Create a new allocation buffer.  Place the control block at the end
2008    of the buffer, so that buffer overflows will cause immediate chaos.  */
2009 static _cpp_buff *
2010 new_buff (size_t len)
2011 {
2012   _cpp_buff *result;
2013   unsigned char *base;
2014
2015   if (len < MIN_BUFF_SIZE)
2016     len = MIN_BUFF_SIZE;
2017   len = CPP_ALIGN (len);
2018
2019   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
2020   result = (_cpp_buff *) (base + len);
2021   result->base = base;
2022   result->cur = base;
2023   result->limit = base + len;
2024   result->next = NULL;
2025   return result;
2026 }
2027
2028 /* Place a chain of unwanted allocation buffers on the free list.  */
2029 void
2030 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
2031 {
2032   _cpp_buff *end = buff;
2033
2034   while (end->next)
2035     end = end->next;
2036   end->next = pfile->free_buffs;
2037   pfile->free_buffs = buff;
2038 }
2039
2040 /* Return a free buffer of size at least MIN_SIZE.  */
2041 _cpp_buff *
2042 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
2043 {
2044   _cpp_buff *result, **p;
2045
2046   for (p = &pfile->free_buffs;; p = &(*p)->next)
2047     {
2048       size_t size;
2049
2050       if (*p == NULL)
2051         return new_buff (min_size);
2052       result = *p;
2053       size = result->limit - result->base;
2054       /* Return a buffer that's big enough, but don't waste one that's
2055          way too big.  */
2056       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2057         break;
2058     }
2059
2060   *p = result->next;
2061   result->next = NULL;
2062   result->cur = result->base;
2063   return result;
2064 }
2065
2066 /* Creates a new buffer with enough space to hold the uncommitted
2067    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2068    the excess bytes to the new buffer.  Chains the new buffer after
2069    BUFF, and returns the new buffer.  */
2070 _cpp_buff *
2071 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2072 {
2073   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2074   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2075
2076   buff->next = new_buff;
2077   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2078   return new_buff;
2079 }
2080
2081 /* Creates a new buffer with enough space to hold the uncommitted
2082    remaining bytes of the buffer pointed to by BUFF, and at least
2083    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2084    Chains the new buffer before the buffer pointed to by BUFF, and
2085    updates the pointer to point to the new buffer.  */
2086 void
2087 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2088 {
2089   _cpp_buff *new_buff, *old_buff = *pbuff;
2090   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2091
2092   new_buff = _cpp_get_buff (pfile, size);
2093   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2094   new_buff->next = old_buff;
2095   *pbuff = new_buff;
2096 }
2097
2098 /* Free a chain of buffers starting at BUFF.  */
2099 void
2100 _cpp_free_buff (_cpp_buff *buff)
2101 {
2102   _cpp_buff *next;
2103
2104   for (; buff; buff = next)
2105     {
2106       next = buff->next;
2107       free (buff->base);
2108     }
2109 }
2110
2111 /* Allocate permanent, unaligned storage of length LEN.  */
2112 unsigned char *
2113 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2114 {
2115   _cpp_buff *buff = pfile->u_buff;
2116   unsigned char *result = buff->cur;
2117
2118   if (len > (size_t) (buff->limit - result))
2119     {
2120       buff = _cpp_get_buff (pfile, len);
2121       buff->next = pfile->u_buff;
2122       pfile->u_buff = buff;
2123       result = buff->cur;
2124     }
2125
2126   buff->cur = result + len;
2127   return result;
2128 }
2129
2130 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2131    That buffer is used for growing allocations when saving macro
2132    replacement lists in a #define, and when parsing an answer to an
2133    assertion in #assert, #unassert or #if (and therefore possibly
2134    whilst expanding macros).  It therefore must not be used by any
2135    code that they might call: specifically the lexer and the guts of
2136    the macro expander.
2137
2138    All existing other uses clearly fit this restriction: storing
2139    registered pragmas during initialization.  */
2140 unsigned char *
2141 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2142 {
2143   _cpp_buff *buff = pfile->a_buff;
2144   unsigned char *result = buff->cur;
2145
2146   if (len > (size_t) (buff->limit - result))
2147     {
2148       buff = _cpp_get_buff (pfile, len);
2149       buff->next = pfile->a_buff;
2150       pfile->a_buff = buff;
2151       result = buff->cur;
2152     }
2153
2154   buff->cur = result + len;
2155   return result;
2156 }
2157
2158 /* Say which field of TOK is in use.  */
2159
2160 enum cpp_token_fld_kind
2161 cpp_token_val_index (cpp_token *tok)
2162 {
2163   switch (TOKEN_SPELL (tok))
2164     {
2165     case SPELL_IDENT:
2166       return CPP_TOKEN_FLD_NODE;
2167     case SPELL_LITERAL:
2168       return CPP_TOKEN_FLD_STR;
2169     case SPELL_OPERATOR:
2170       if (tok->type == CPP_PASTE)
2171         return CPP_TOKEN_FLD_TOKEN_NO;
2172       else
2173         return CPP_TOKEN_FLD_NONE;
2174     case SPELL_NONE:
2175       if (tok->type == CPP_MACRO_ARG)
2176         return CPP_TOKEN_FLD_ARG_NO;
2177       else if (tok->type == CPP_PADDING)
2178         return CPP_TOKEN_FLD_SOURCE;
2179       else if (tok->type == CPP_PRAGMA)
2180         return CPP_TOKEN_FLD_PRAGMA;
2181       /* else fall through */
2182     default:
2183       return CPP_TOKEN_FLD_NONE;
2184     }
2185 }