libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
   3    Free Software Foundation, Inc.
   4    Contributed by Per Bothner, 1994-95.
   5    Based on CCCP program by Paul Rubin, June 1986
   6    Adapted to ANSI C, Richard Stallman, Jan 1987
   7    Broken out to separate file, Zack Weinberg, Mar 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 3, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "internal.h"
  27
  28 enum spell_type
  29 {
  30   SPELL_OPERATOR = 0,
  31   SPELL_IDENT,
  32   SPELL_LITERAL,
  33   SPELL_NONE
  34 };
  35
  36 struct token_spelling
  37 {
  38   enum spell_type category;
  39   const unsigned char *name;
  40 };
  41
  42 static const unsigned char *const digraph_spellings[] =
  43 { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
  44
  45 #define OP(e, s) { SPELL_OPERATOR, UC s  },
  46 #define TK(e, s) { SPELL_ ## s,    UC #e },
  47 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  48 #undef OP
  49 #undef TK
  50
  51 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  52 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  53
  54 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  55 static int skip_line_comment (cpp_reader *);
  56 static void skip_whitespace (cpp_reader *, cppchar_t);
  57 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  58 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  59 static void store_comment (cpp_reader *, cpp_token *);
  60 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  61                             unsigned int, enum cpp_ttype);
  62 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  63 static int name_p (cpp_reader *, const cpp_string *);
  64 static tokenrun *next_tokenrun (tokenrun *);
  65
  66 static _cpp_buff *new_buff (size_t);
  67
  68
  69 /* Utility routine:
  70
  71    Compares, the token TOKEN to the NUL-terminated string STRING.
  72    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  73 int
  74 cpp_ideq (const cpp_token *token, const char *string)
  75 {
  76   if (token->type != CPP_NAME)
  77     return 0;
  78
  79   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
  80 }
  81
  82 /* Record a note TYPE at byte POS into the current cleaned logical
  83    line.  */
  84 static void
  85 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  86 {
  87   if (buffer->notes_used == buffer->notes_cap)
  88     {
  89       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  90       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  91                                   buffer->notes_cap);
  92     }
  93
  94   buffer->notes[buffer->notes_used].pos = pos;
  95   buffer->notes[buffer->notes_used].type = type;
  96   buffer->notes_used++;
  97 }
  98
  99 /* Returns with a logical line that contains no escaped newlines or
 100    trigraphs.  This is a time-critical inner loop.  */
 101 void
 102 _cpp_clean_line (cpp_reader *pfile)
 103 {
 104   cpp_buffer *buffer;
 105   const uchar *s;
 106   uchar c, *d, *p;
 107
 108   buffer = pfile->buffer;
 109   buffer->cur_note = buffer->notes_used = 0;
 110   buffer->cur = buffer->line_base = buffer->next_line;
 111   buffer->need_line = false;
 112   s = buffer->next_line - 1;
 113
 114   if (!buffer->from_stage3)
 115     {
 116       const uchar *pbackslash = NULL;
 117
 118       /* Short circuit for the common case of an un-escaped line with
 119          no trigraphs.  The primary win here is by not writing any
 120          data back to memory until we have to.  */
 121       for (;;)
 122         {
 123           c = *++s;
 124           if (__builtin_expect (c == '\n', false)
 125               || __builtin_expect (c == '\r', false))
 126             {
 127               d = (uchar *) s;
 128
 129               if (__builtin_expect (s == buffer->rlimit, false))
 130                 goto done;
 131
 132               /* DOS line ending? */
 133               if (__builtin_expect (c == '\r', false)
 134                   && s[1] == '\n')
 135                 {
 136                   s++;
 137                   if (s == buffer->rlimit)
 138                     goto done;
 139                 }
 140
 141               if (__builtin_expect (pbackslash == NULL, true))
 142                 goto done;
 143
 144               /* Check for escaped newline.  */
 145               p = d;
 146               while (is_nvspace (p[-1]))
 147                 p--;
 148               if (p - 1 != pbackslash)
 149                 goto done;
 150
 151               /* Have an escaped newline; process it and proceed to
 152                  the slow path.  */
 153               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 154               d = p - 2;
 155               buffer->next_line = p - 1;
 156               break;
 157             }
 158           if (__builtin_expect (c == '\\', false))
 159             pbackslash = s;
 160           else if (__builtin_expect (c == '?', false)
 161                    && __builtin_expect (s[1] == '?', false)
 162                    && _cpp_trigraph_map[s[2]])
 163             {
 164               /* Have a trigraph.  We may or may not have to convert
 165                  it.  Add a line note regardless, for -Wtrigraphs.  */
 166               add_line_note (buffer, s, s[2]);
 167               if (CPP_OPTION (pfile, trigraphs))
 168                 {
 169                   /* We do, and that means we have to switch to the
 170                      slow path.  */
 171                   d = (uchar *) s;
 172                   *d = _cpp_trigraph_map[s[2]];
 173                   s += 2;
 174                   break;
 175                 }
 176             }
 177         }
 178
 179
 180       for (;;)
 181         {
 182           c = *++s;
 183           *++d = c;
 184
 185           if (c == '\n' || c == '\r')
 186             {
 187                   /* Handle DOS line endings.  */
 188               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 189                 s++;
 190               if (s == buffer->rlimit)
 191                 break;
 192
 193               /* Escaped?  */
 194               p = d;
 195               while (p != buffer->next_line && is_nvspace (p[-1]))
 196                 p--;
 197               if (p == buffer->next_line || p[-1] != '\\')
 198                 break;
 199
 200               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 201               d = p - 2;
 202               buffer->next_line = p - 1;
 203             }
 204           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 205             {
 206               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 207               add_line_note (buffer, d, s[2]);
 208               if (CPP_OPTION (pfile, trigraphs))
 209                 {
 210                   *d = _cpp_trigraph_map[s[2]];
 211                   s += 2;
 212                 }
 213             }
 214         }
 215     }
 216   else
 217     {
 218       do
 219         s++;
 220       while (*s != '\n' && *s != '\r');
 221       d = (uchar *) s;
 222
 223       /* Handle DOS line endings.  */
 224       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 225         s++;
 226     }
 227
 228  done:
 229   *d = '\n';
 230   /* A sentinel note that should never be processed.  */
 231   add_line_note (buffer, d + 1, '\n');
 232   buffer->next_line = s + 1;
 233 }
 234
 235 /* Return true if the trigraph indicated by NOTE should be warned
 236    about in a comment.  */
 237 static bool
 238 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 239 {
 240   const uchar *p;
 241
 242   /* Within comments we don't warn about trigraphs, unless the
 243      trigraph forms an escaped newline, as that may change
 244      behavior.  */
 245   if (note->type != '/')
 246     return false;
 247
 248   /* If -trigraphs, then this was an escaped newline iff the next note
 249      is coincident.  */
 250   if (CPP_OPTION (pfile, trigraphs))
 251     return note[1].pos == note->pos;
 252
 253   /* Otherwise, see if this forms an escaped newline.  */
 254   p = note->pos + 3;
 255   while (is_nvspace (*p))
 256     p++;
 257
 258   /* There might have been escaped newlines between the trigraph and the
 259      newline we found.  Hence the position test.  */
 260   return (*p == '\n' && p < note[1].pos);
 261 }
 262
 263 /* Process the notes created by add_line_note as far as the current
 264    location.  */
 265 void
 266 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 267 {
 268   cpp_buffer *buffer = pfile->buffer;
 269
 270   for (;;)
 271     {
 272       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 273       unsigned int col;
 274
 275       if (note->pos > buffer->cur)
 276         break;
 277
 278       buffer->cur_note++;
 279       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 280
 281       if (note->type == '\\' || note->type == ' ')
 282         {
 283           if (note->type == ' ' && !in_comment)
 284             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 285                                  "backslash and newline separated by space");
 286
 287           if (buffer->next_line > buffer->rlimit)
 288             {
 289               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 290                                    "backslash-newline at end of file");
 291               /* Prevent "no newline at end of file" warning.  */
 292               buffer->next_line = buffer->rlimit;
 293             }
 294
 295           buffer->line_base = note->pos;
 296           CPP_INCREMENT_LINE (pfile, 0);
 297         }
 298       else if (_cpp_trigraph_map[note->type])
 299         {
 300           if (CPP_OPTION (pfile, warn_trigraphs)
 301               && (!in_comment || warn_in_comment (pfile, note)))
 302             {
 303               if (CPP_OPTION (pfile, trigraphs))
 304                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 305                                      "trigraph ??%c converted to %c",
 306                                      note->type,
 307                                      (int) _cpp_trigraph_map[note->type]);
 308               else
 309                 {
 310                   cpp_error_with_line
 311                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 312                      "trigraph ??%c ignored, use -trigraphs to enable",
 313                      note->type);
 314                 }
 315             }
 316         }
 317       else
 318         abort ();
 319     }
 320 }
 321
 322 /* Skip a C-style block comment.  We find the end of the comment by
 323    seeing if an asterisk is before every '/' we encounter.  Returns
 324    nonzero if comment terminated by EOF, zero otherwise.
 325
 326    Buffer->cur points to the initial asterisk of the comment.  */
 327 bool
 328 _cpp_skip_block_comment (cpp_reader *pfile)
 329 {
 330   cpp_buffer *buffer = pfile->buffer;
 331   const uchar *cur = buffer->cur;
 332   uchar c;
 333
 334   cur++;
 335   if (*cur == '/')
 336     cur++;
 337
 338   for (;;)
 339     {
 340       /* People like decorating comments with '*', so check for '/'
 341          instead for efficiency.  */
 342       c = *cur++;
 343
 344       if (c == '/')
 345         {
 346           if (cur[-2] == '*')
 347             break;
 348
 349           /* Warn about potential nested comments, but not if the '/'
 350              comes immediately before the true comment delimiter.
 351              Don't bother to get it right across escaped newlines.  */
 352           if (CPP_OPTION (pfile, warn_comments)
 353               && cur[0] == '*' && cur[1] != '/')
 354             {
 355               buffer->cur = cur;
 356               cpp_error_with_line (pfile, CPP_DL_WARNING,
 357                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 358                                    "\"/*\" within comment");
 359             }
 360         }
 361       else if (c == '\n')
 362         {
 363           unsigned int cols;
 364           buffer->cur = cur - 1;
 365           _cpp_process_line_notes (pfile, true);
 366           if (buffer->next_line >= buffer->rlimit)
 367             return true;
 368           _cpp_clean_line (pfile);
 369
 370           cols = buffer->next_line - buffer->line_base;
 371           CPP_INCREMENT_LINE (pfile, cols);
 372
 373           cur = buffer->cur;
 374         }
 375     }
 376
 377   buffer->cur = cur;
 378   _cpp_process_line_notes (pfile, true);
 379   return false;
 380 }
 381
 382 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 383    terminating newline.  Handles escaped newlines.  Returns nonzero
 384    if a multiline comment.  */
 385 static int
 386 skip_line_comment (cpp_reader *pfile)
 387 {
 388   cpp_buffer *buffer = pfile->buffer;
 389   source_location orig_line = pfile->line_table->highest_line;
 390
 391   while (*buffer->cur != '\n')
 392     buffer->cur++;
 393
 394   _cpp_process_line_notes (pfile, true);
 395   return orig_line != pfile->line_table->highest_line;
 396 }
 397
 398 /* Skips whitespace, saving the next non-whitespace character.  */
 399 static void
 400 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 401 {
 402   cpp_buffer *buffer = pfile->buffer;
 403   bool saw_NUL = false;
 404
 405   do
 406     {
 407       /* Horizontal space always OK.  */
 408       if (c == ' ' || c == '\t')
 409         ;
 410       /* Just \f \v or \0 left.  */
 411       else if (c == '\0')
 412         saw_NUL = true;
 413       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 414         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 415                              CPP_BUF_COL (buffer),
 416                              "%s in preprocessing directive",
 417                              c == '\f' ? "form feed" : "vertical tab");
 418
 419       c = *buffer->cur++;
 420     }
 421   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 422   while (is_nvspace (c));
 423
 424   if (saw_NUL)
 425     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 426
 427   buffer->cur--;
 428 }
 429
 430 /* See if the characters of a number token are valid in a name (no
 431    '.', '+' or '-').  */
 432 static int
 433 name_p (cpp_reader *pfile, const cpp_string *string)
 434 {
 435   unsigned int i;
 436
 437   for (i = 0; i < string->len; i++)
 438     if (!is_idchar (string->text[i]))
 439       return 0;
 440
 441   return 1;
 442 }
 443
 444 /* After parsing an identifier or other sequence, produce a warning about
 445    sequences not in NFC/NFKC.  */
 446 static void
 447 warn_about_normalization (cpp_reader *pfile,
 448                           const cpp_token *token,
 449                           const struct normalize_state *s)
 450 {
 451   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 452       && !pfile->state.skipping)
 453     {
 454       /* Make sure that the token is printed using UCNs, even
 455          if we'd otherwise happily print UTF-8.  */
 456       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 457       size_t sz;
 458
 459       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 460       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 461         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 462                              "`%.*s' is not in NFKC", (int) sz, buf);
 463       else
 464         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 465                              "`%.*s' is not in NFC", (int) sz, buf);
 466     }
 467 }
 468
 469 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 470    an identifier.  FIRST is TRUE if this starts an identifier.  */
 471 static bool
 472 forms_identifier_p (cpp_reader *pfile, int first,
 473                     struct normalize_state *state)
 474 {
 475   cpp_buffer *buffer = pfile->buffer;
 476
 477   if (*buffer->cur == '$')
 478     {
 479       if (!CPP_OPTION (pfile, dollars_in_ident))
 480         return false;
 481
 482       buffer->cur++;
 483       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 484         {
 485           CPP_OPTION (pfile, warn_dollars) = 0;
 486           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 487         }
 488
 489       return true;
 490     }
 491
 492   /* Is this a syntactically valid UCN?  */
 493   if (CPP_OPTION (pfile, extended_identifiers)
 494       && *buffer->cur == '\\'
 495       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 496     {
 497       buffer->cur += 2;
 498       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 499                           state))
 500         return true;
 501       buffer->cur -= 2;
 502     }
 503
 504   return false;
 505 }
 506
 507 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 508 static cpp_hashnode *
 509 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 510                 struct normalize_state *nst)
 511 {
 512   cpp_hashnode *result;
 513   const uchar *cur;
 514   unsigned int len;
 515   unsigned int hash = HT_HASHSTEP (0, *base);
 516
 517   cur = pfile->buffer->cur;
 518   if (! starts_ucn)
 519     while (ISIDNUM (*cur))
 520       {
 521         hash = HT_HASHSTEP (hash, *cur);
 522         cur++;
 523       }
 524   pfile->buffer->cur = cur;
 525   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 526     {
 527       /* Slower version for identifiers containing UCNs (or $).  */
 528       do {
 529         while (ISIDNUM (*pfile->buffer->cur))
 530           {
 531             pfile->buffer->cur++;
 532             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 533           }
 534       } while (forms_identifier_p (pfile, false, nst));
 535       result = _cpp_interpret_identifier (pfile, base,
 536                                           pfile->buffer->cur - base);
 537     }
 538   else
 539     {
 540       len = cur - base;
 541       hash = HT_HASHFINISH (hash, len);
 542
 543       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
 544                                                   base, len, hash, HT_ALLOC));
 545     }
 546
 547   /* Rarely, identifiers require diagnostics when lexed.  */
 548   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 549                         && !pfile->state.skipping, 0))
 550     {
 551       /* It is allowed to poison the same identifier twice.  */
 552       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 553         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 554                    NODE_NAME (result));
 555
 556       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 557          replacement list of a variadic macro.  */
 558       if (result == pfile->spec_nodes.n__VA_ARGS__
 559           && !pfile->state.va_args_ok)
 560         cpp_error (pfile, CPP_DL_PEDWARN,
 561                    "__VA_ARGS__ can only appear in the expansion"
 562                    " of a C99 variadic macro");
 563
 564       /* For -Wc++-compat, warn about use of C++ named operators.  */
 565       if (result->flags & NODE_WARN_OPERATOR)
 566         cpp_error (pfile, CPP_DL_WARNING,
 567                    "identifier \"%s\" is a special operator name in C++",
 568                    NODE_NAME (result));
 569     }
 570
 571   return result;
 572 }
 573
 574 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 575 static void
 576 lex_number (cpp_reader *pfile, cpp_string *number,
 577             struct normalize_state *nst)
 578 {
 579   const uchar *cur;
 580   const uchar *base;
 581   uchar *dest;
 582
 583   base = pfile->buffer->cur - 1;
 584   do
 585     {
 586       cur = pfile->buffer->cur;
 587
 588       /* N.B. ISIDNUM does not include $.  */
 589       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 590         {
 591           cur++;
 592           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 593         }
 594
 595       pfile->buffer->cur = cur;
 596     }
 597   while (forms_identifier_p (pfile, false, nst));
 598
 599   number->len = cur - base;
 600   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 601   memcpy (dest, base, number->len);
 602   dest[number->len] = '\0';
 603   number->text = dest;
 604 }
 605
 606 /* Create a token of type TYPE with a literal spelling.  */
 607 static void
 608 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 609                 unsigned int len, enum cpp_ttype type)
 610 {
 611   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 612
 613   memcpy (dest, base, len);
 614   dest[len] = '\0';
 615   token->type = type;
 616   token->val.str.len = len;
 617   token->val.str.text = dest;
 618 }
 619
 620 /* Lexes a raw string.  The stored string contains the spelling, including
 621    double quotes, delimiter string, '[' and ']', any leading
 622    'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
 623    literal, or CPP_OTHER if it was not properly terminated.
 624
 625    The spelling is NUL-terminated, but it is not guaranteed that this
 626    is the first NUL since embedded NULs are preserved.  */
 627
 628 static void
 629 lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
 630                 const uchar *cur)
 631 {
 632   source_location saw_NUL = 0;
 633   const uchar *raw_prefix;
 634   unsigned int raw_prefix_len = 0;
 635   enum cpp_ttype type;
 636   size_t total_len = 0;
 637   _cpp_buff *first_buff = NULL, *last_buff = NULL;
 638
 639   type = (*base == 'L' ? CPP_WSTRING :
 640           *base == 'U' ? CPP_STRING32 :
 641           *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 642           : CPP_STRING);
 643
 644   raw_prefix = cur + 1;
 645   while (raw_prefix_len < 16)
 646     {
 647       switch (raw_prefix[raw_prefix_len])
 648         {
 649         case ' ': case '[': case ']': case '\t':
 650         case '\v': case '\f': case '\n': default:
 651           break;
 652         /* Basic source charset except the above chars.  */
 653         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 654         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 655         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 656         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 657         case 'y': case 'z':
 658         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 659         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 660         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 661         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 662         case 'Y': case 'Z':
 663         case '0': case '1': case '2': case '3': case '4': case '5':
 664         case '6': case '7': case '8': case '9':
 665         case '_': case '{': case '}': case '#': case '(': case ')':
 666         case '<': case '>': case '%': case ':': case ';': case '.':
 667         case '?': case '*': case '+': case '-': case '/': case '^':
 668         case '&': case '|': case '~': case '!': case '=': case ',':
 669         case '\\': case '"': case '\'':
 670           raw_prefix_len++;
 671           continue;
 672         }
 673       break;
 674     }
 675
 676   if (raw_prefix[raw_prefix_len] != '[')
 677     {
 678       int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
 679                 + 1;
 680       if (raw_prefix_len == 16)
 681         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 682                              "raw string delimiter longer than 16 characters");
 683       else
 684         cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
 685                              "invalid character '%c' in raw string delimiter",
 686                              (int) raw_prefix[raw_prefix_len]);
 687       pfile->buffer->cur = raw_prefix - 1;
 688       create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
 689       return;
 690     }
 691
 692   cur = raw_prefix + raw_prefix_len + 1;
 693   for (;;)
 694     {
 695       cppchar_t c = *cur++;
 696
 697       if (c == ']'
 698           && strncmp ((const char *) cur, (const char *) raw_prefix,
 699                       raw_prefix_len) == 0
 700           && cur[raw_prefix_len] == '"')
 701         {
 702           cur += raw_prefix_len + 1;
 703           break;
 704         }
 705       else if (c == '\n')
 706         {
 707           if (pfile->state.in_directive
 708               || pfile->state.parsing_args
 709               || pfile->state.in_deferred_pragma)
 710             {
 711               cur--;
 712               type = CPP_OTHER;
 713               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
 714                                    "unterminated raw string");
 715               break;
 716             }
 717
 718           /* raw strings allow embedded non-escaped newlines, which
 719              complicates this routine a lot.  */
 720           if (first_buff == NULL)
 721             {
 722               total_len = cur - base;
 723               first_buff = last_buff = _cpp_get_buff (pfile, total_len);
 724               memcpy (BUFF_FRONT (last_buff), base, total_len);
 725               raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
 726               BUFF_FRONT (last_buff) += total_len;
 727             }
 728           else
 729             {
 730               size_t len = cur - base;
 731               size_t cur_len = len > BUFF_ROOM (last_buff)
 732                                ? BUFF_ROOM (last_buff) : len;
 733
 734               total_len += len;
 735               memcpy (BUFF_FRONT (last_buff), base, cur_len);
 736               BUFF_FRONT (last_buff) += cur_len;
 737               if (len > cur_len)
 738                 {
 739                   last_buff = _cpp_append_extend_buff (pfile, last_buff,
 740                                                        len - cur_len);
 741                   memcpy (BUFF_FRONT (last_buff), base + cur_len,
 742                           len - cur_len);
 743                   BUFF_FRONT (last_buff) += len - cur_len;
 744                 }
 745             }
 746
 747           if (pfile->buffer->cur < pfile->buffer->rlimit)
 748             CPP_INCREMENT_LINE (pfile, 0);
 749           pfile->buffer->need_line = true;
 750
 751           if (!_cpp_get_fresh_line (pfile))
 752             {
 753               source_location src_loc = token->src_loc;
 754               token->type = CPP_EOF;
 755               /* Tell the compiler the line number of the EOF token.  */
 756               token->src_loc = pfile->line_table->highest_line;
 757               token->flags = BOL;
 758               if (first_buff != NULL)
 759                 _cpp_release_buff (pfile, first_buff);
 760               cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
 761                                    "unterminated raw string");
 762               return;
 763             }
 764
 765           cur = base = pfile->buffer->cur;
 766         }
 767       else if (c == '\0' && !saw_NUL)
 768         LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
 769                                      CPP_BUF_COLUMN (pfile->buffer, cur));
 770     }
 771
 772   if (saw_NUL && !pfile->state.skipping)
 773     cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
 774                "null character(s) preserved in literal");
 775
 776   pfile->buffer->cur = cur;
 777   if (first_buff == NULL)
 778     create_literal (pfile, token, base, cur - base, type);
 779   else
 780     {
 781       uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
 782
 783       token->type = type;
 784       token->val.str.len = total_len + (cur - base);
 785       token->val.str.text = dest;
 786       last_buff = first_buff;
 787       while (last_buff != NULL)
 788         {
 789           memcpy (dest, last_buff->base,
 790                   BUFF_FRONT (last_buff) - last_buff->base);
 791           dest += BUFF_FRONT (last_buff) - last_buff->base;
 792           last_buff = last_buff->next;
 793         }
 794       _cpp_release_buff (pfile, first_buff);
 795       memcpy (dest, base, cur - base);
 796       dest[cur - base] = '\0';
 797     }
 798 }
 799
 800 /* Lexes a string, character constant, or angle-bracketed header file
 801    name.  The stored string contains the spelling, including opening
 802    quote and any leading 'L', 'u', 'U' or 'u8' and optional
 803    'R' modifier.  It returns the type of the literal, or CPP_OTHER
 804    if it was not properly terminated, or CPP_LESS for an unterminated
 805    header name which must be relexed as normal tokens.
 806
 807    The spelling is NUL-terminated, but it is not guaranteed that this
 808    is the first NUL since embedded NULs are preserved.  */
 809 static void
 810 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 811 {
 812   bool saw_NUL = false;
 813   const uchar *cur;
 814   cppchar_t terminator;
 815   enum cpp_ttype type;
 816
 817   cur = base;
 818   terminator = *cur++;
 819   if (terminator == 'L' || terminator == 'U')
 820     terminator = *cur++;
 821   else if (terminator == 'u')
 822     {
 823       terminator = *cur++;
 824       if (terminator == '8')
 825         terminator = *cur++;
 826     }
 827   if (terminator == 'R')
 828     {
 829       lex_raw_string (pfile, token, base, cur);
 830       return;
 831     }
 832   if (terminator == '"')
 833     type = (*base == 'L' ? CPP_WSTRING :
 834             *base == 'U' ? CPP_STRING32 :
 835             *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
 836                          : CPP_STRING);
 837   else if (terminator == '\'')
 838     type = (*base == 'L' ? CPP_WCHAR :
 839             *base == 'U' ? CPP_CHAR32 :
 840             *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
 841   else
 842     terminator = '>', type = CPP_HEADER_NAME;
 843
 844   for (;;)
 845     {
 846       cppchar_t c = *cur++;
 847
 848       /* In #include-style directives, terminators are not escapable.  */
 849       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 850         cur++;
 851       else if (c == terminator)
 852         break;
 853       else if (c == '\n')
 854         {
 855           cur--;
 856           /* Unmatched quotes always yield undefined behavior, but
 857              greedy lexing means that what appears to be an unterminated
 858              header name may actually be a legitimate sequence of tokens.  */
 859           if (terminator == '>')
 860             {
 861               token->type = CPP_LESS;
 862               return;
 863             }
 864           type = CPP_OTHER;
 865           break;
 866         }
 867       else if (c == '\0')
 868         saw_NUL = true;
 869     }
 870
 871   if (saw_NUL && !pfile->state.skipping)
 872     cpp_error (pfile, CPP_DL_WARNING,
 873                "null character(s) preserved in literal");
 874
 875   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
 876     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
 877                (int) terminator);
 878
 879   pfile->buffer->cur = cur;
 880   create_literal (pfile, token, base, cur - base, type);
 881 }
 882
 883 /* Return the comment table. The client may not make any assumption
 884    about the ordering of the table.  */
 885 cpp_comment_table *
 886 cpp_get_comments (cpp_reader *pfile)
 887 {
 888   return &pfile->comments;
 889 }
 890
 891 /* Append a comment to the end of the comment table. */
 892 static void
 893 store_comment (cpp_reader *pfile, cpp_token *token)
 894 {
 895   int len;
 896
 897   if (pfile->comments.allocated == 0)
 898     {
 899       pfile->comments.allocated = 256;
 900       pfile->comments.entries = (cpp_comment *) xmalloc
 901         (pfile->comments.allocated * sizeof (cpp_comment));
 902     }
 903
 904   if (pfile->comments.count == pfile->comments.allocated)
 905     {
 906       pfile->comments.allocated *= 2;
 907       pfile->comments.entries = (cpp_comment *) xrealloc
 908         (pfile->comments.entries,
 909          pfile->comments.allocated * sizeof (cpp_comment));
 910     }
 911
 912   len = token->val.str.len;
 913
 914   /* Copy comment. Note, token may not be NULL terminated. */
 915   pfile->comments.entries[pfile->comments.count].comment =
 916     (char *) xmalloc (sizeof (char) * (len + 1));
 917   memcpy (pfile->comments.entries[pfile->comments.count].comment,
 918           token->val.str.text, len);
 919   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
 920
 921   /* Set source location. */
 922   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
 923
 924   /* Increment the count of entries in the comment table. */
 925   pfile->comments.count++;
 926 }
 927
 928 /* The stored comment includes the comment start and any terminator.  */
 929 static void
 930 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 931               cppchar_t type)
 932 {
 933   unsigned char *buffer;
 934   unsigned int len, clen;
 935
 936   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 937
 938   /* C++ comments probably (not definitely) have moved past a new
 939      line, which we don't want to save in the comment.  */
 940   if (is_vspace (pfile->buffer->cur[-1]))
 941     len--;
 942
 943   /* If we are currently in a directive, then we need to store all
 944      C++ comments as C comments internally, and so we need to
 945      allocate a little extra space in that case.
 946
 947      Note that the only time we encounter a directive here is
 948      when we are saving comments in a "#define".  */
 949   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 950
 951   buffer = _cpp_unaligned_alloc (pfile, clen);
 952
 953   token->type = CPP_COMMENT;
 954   token->val.str.len = clen;
 955   token->val.str.text = buffer;
 956
 957   buffer[0] = '/';
 958   memcpy (buffer + 1, from, len - 1);
 959
 960   /* Finish conversion to a C comment, if necessary.  */
 961   if (pfile->state.in_directive && type == '/')
 962     {
 963       buffer[1] = '*';
 964       buffer[clen - 2] = '*';
 965       buffer[clen - 1] = '/';
 966     }
 967
 968   /* Finally store this comment for use by clients of libcpp. */
 969   store_comment (pfile, token);
 970 }
 971
 972 /* Allocate COUNT tokens for RUN.  */
 973 void
 974 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 975 {
 976   run->base = XNEWVEC (cpp_token, count);
 977   run->limit = run->base + count;
 978   run->next = NULL;
 979 }
 980
 981 /* Returns the next tokenrun, or creates one if there is none.  */
 982 static tokenrun *
 983 next_tokenrun (tokenrun *run)
 984 {
 985   if (run->next == NULL)
 986     {
 987       run->next = XNEW (tokenrun);
 988       run->next->prev = run;
 989       _cpp_init_tokenrun (run->next, 250);
 990     }
 991
 992   return run->next;
 993 }
 994
 995 /* Look ahead in the input stream.  */
 996 const cpp_token *
 997 cpp_peek_token (cpp_reader *pfile, int index)
 998 {
 999   cpp_context *context = pfile->context;
1000   const cpp_token *peektok;
1001   int count;
1002
1003   /* First, scan through any pending cpp_context objects.  */
1004   while (context->prev)
1005     {
1006       ptrdiff_t sz = (context->direct_p
1007                       ? LAST (context).token - FIRST (context).token
1008                       : LAST (context).ptoken - FIRST (context).ptoken);
1009
1010       if (index < (int) sz)
1011         return (context->direct_p
1012                 ? FIRST (context).token + index
1013                 : *(FIRST (context).ptoken + index));
1014
1015       index -= (int) sz;
1016       context = context->prev;
1017     }
1018
1019   /* We will have to read some new tokens after all (and do so
1020      without invalidating preceding tokens).  */
1021   count = index;
1022   pfile->keep_tokens++;
1023
1024   do
1025     {
1026       peektok = _cpp_lex_token (pfile);
1027       if (peektok->type == CPP_EOF)
1028         return peektok;
1029     }
1030   while (index--);
1031
1032   _cpp_backup_tokens_direct (pfile, count + 1);
1033   pfile->keep_tokens--;
1034
1035   return peektok;
1036 }
1037
1038 /* Allocate a single token that is invalidated at the same time as the
1039    rest of the tokens on the line.  Has its line and col set to the
1040    same as the last lexed token, so that diagnostics appear in the
1041    right place.  */
1042 cpp_token *
1043 _cpp_temp_token (cpp_reader *pfile)
1044 {
1045   cpp_token *old, *result;
1046   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1047   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1048
1049   old = pfile->cur_token - 1;
1050   /* Any pre-existing lookaheads must not be clobbered.  */
1051   if (la)
1052     {
1053       if (sz <= la)
1054         {
1055           tokenrun *next = next_tokenrun (pfile->cur_run);
1056
1057           if (sz < la)
1058             memmove (next->base + 1, next->base,
1059                      (la - sz) * sizeof (cpp_token));
1060
1061           next->base[0] = pfile->cur_run->limit[-1];
1062         }
1063
1064       if (sz > 1)
1065         memmove (pfile->cur_token + 1, pfile->cur_token,
1066                  MIN (la, sz - 1) * sizeof (cpp_token));
1067     }
1068
1069   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1070     {
1071       pfile->cur_run = next_tokenrun (pfile->cur_run);
1072       pfile->cur_token = pfile->cur_run->base;
1073     }
1074
1075   result = pfile->cur_token++;
1076   result->src_loc = old->src_loc;
1077   return result;
1078 }
1079
1080 /* Lex a token into RESULT (external interface).  Takes care of issues
1081    like directive handling, token lookahead, multiple include
1082    optimization and skipping.  */
1083 const cpp_token *
1084 _cpp_lex_token (cpp_reader *pfile)
1085 {
1086   cpp_token *result;
1087
1088   for (;;)
1089     {
1090       if (pfile->cur_token == pfile->cur_run->limit)
1091         {
1092           pfile->cur_run = next_tokenrun (pfile->cur_run);
1093           pfile->cur_token = pfile->cur_run->base;
1094         }
1095       /* We assume that the current token is somewhere in the current
1096          run.  */
1097       if (pfile->cur_token < pfile->cur_run->base
1098           || pfile->cur_token >= pfile->cur_run->limit)
1099         abort ();
1100
1101       if (pfile->lookaheads)
1102         {
1103           pfile->lookaheads--;
1104           result = pfile->cur_token++;
1105         }
1106       else
1107         result = _cpp_lex_direct (pfile);
1108
1109       if (result->flags & BOL)
1110         {
1111           /* Is this a directive.  If _cpp_handle_directive returns
1112              false, it is an assembler #.  */
1113           if (result->type == CPP_HASH
1114               /* 6.10.3 p 11: Directives in a list of macro arguments
1115                  gives undefined behavior.  This implementation
1116                  handles the directive as normal.  */
1117               && pfile->state.parsing_args != 1)
1118             {
1119               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1120                 {
1121                   if (pfile->directive_result.type == CPP_PADDING)
1122                     continue;
1123                   result = &pfile->directive_result;
1124                 }
1125             }
1126           else if (pfile->state.in_deferred_pragma)
1127             result = &pfile->directive_result;
1128
1129           if (pfile->cb.line_change && !pfile->state.skipping)
1130             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1131         }
1132
1133       /* We don't skip tokens in directives.  */
1134       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1135         break;
1136
1137       /* Outside a directive, invalidate controlling macros.  At file
1138          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1139          get here and MI optimization works.  */
1140       pfile->mi_valid = false;
1141
1142       if (!pfile->state.skipping || result->type == CPP_EOF)
1143         break;
1144     }
1145
1146   return result;
1147 }
1148
1149 /* Returns true if a fresh line has been loaded.  */
1150 bool
1151 _cpp_get_fresh_line (cpp_reader *pfile)
1152 {
1153   int return_at_eof;
1154
1155   /* We can't get a new line until we leave the current directive.  */
1156   if (pfile->state.in_directive)
1157     return false;
1158
1159   for (;;)
1160     {
1161       cpp_buffer *buffer = pfile->buffer;
1162
1163       if (!buffer->need_line)
1164         return true;
1165
1166       if (buffer->next_line < buffer->rlimit)
1167         {
1168           _cpp_clean_line (pfile);
1169           return true;
1170         }
1171
1172       /* First, get out of parsing arguments state.  */
1173       if (pfile->state.parsing_args)
1174         return false;
1175
1176       /* End of buffer.  Non-empty files should end in a newline.  */
1177       if (buffer->buf != buffer->rlimit
1178           && buffer->next_line > buffer->rlimit
1179           && !buffer->from_stage3)
1180         {
1181           /* Clip to buffer size.  */
1182           buffer->next_line = buffer->rlimit;
1183         }
1184
1185       return_at_eof = buffer->return_at_eof;
1186       _cpp_pop_buffer (pfile);
1187       if (pfile->buffer == NULL || return_at_eof)
1188         return false;
1189     }
1190 }
1191
1192 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
1193   do                                                    \
1194     {                                                   \
1195       result->type = ELSE_TYPE;                         \
1196       if (*buffer->cur == CHAR)                         \
1197         buffer->cur++, result->type = THEN_TYPE;        \
1198     }                                                   \
1199   while (0)
1200
1201 /* Lex a token into pfile->cur_token, which is also incremented, to
1202    get diagnostics pointing to the correct location.
1203
1204    Does not handle issues such as token lookahead, multiple-include
1205    optimization, directives, skipping etc.  This function is only
1206    suitable for use by _cpp_lex_token, and in special cases like
1207    lex_expansion_token which doesn't care for any of these issues.
1208
1209    When meeting a newline, returns CPP_EOF if parsing a directive,
1210    otherwise returns to the start of the token buffer if permissible.
1211    Returns the location of the lexed token.  */
1212 cpp_token *
1213 _cpp_lex_direct (cpp_reader *pfile)
1214 {
1215   cppchar_t c;
1216   cpp_buffer *buffer;
1217   const unsigned char *comment_start;
1218   cpp_token *result = pfile->cur_token++;
1219
1220  fresh_line:
1221   result->flags = 0;
1222   buffer = pfile->buffer;
1223   if (buffer->need_line)
1224     {
1225       if (pfile->state.in_deferred_pragma)
1226         {
1227           result->type = CPP_PRAGMA_EOL;
1228           pfile->state.in_deferred_pragma = false;
1229           if (!pfile->state.pragma_allow_expansion)
1230             pfile->state.prevent_expansion--;
1231           return result;
1232         }
1233       if (!_cpp_get_fresh_line (pfile))
1234         {
1235           result->type = CPP_EOF;
1236           if (!pfile->state.in_directive)
1237             {
1238               /* Tell the compiler the line number of the EOF token.  */
1239               result->src_loc = pfile->line_table->highest_line;
1240               result->flags = BOL;
1241             }
1242           return result;
1243         }
1244       if (!pfile->keep_tokens)
1245         {
1246           pfile->cur_run = &pfile->base_run;
1247           result = pfile->base_run.base;
1248           pfile->cur_token = result + 1;
1249         }
1250       result->flags = BOL;
1251       if (pfile->state.parsing_args == 2)
1252         result->flags |= PREV_WHITE;
1253     }
1254   buffer = pfile->buffer;
1255  update_tokens_line:
1256   result->src_loc = pfile->line_table->highest_line;
1257
1258  skipped_white:
1259   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1260       && !pfile->overlaid_buffer)
1261     {
1262       _cpp_process_line_notes (pfile, false);
1263       result->src_loc = pfile->line_table->highest_line;
1264     }
1265   c = *buffer->cur++;
1266
1267   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
1268                                CPP_BUF_COLUMN (buffer, buffer->cur));
1269
1270   switch (c)
1271     {
1272     case ' ': case '\t': case '\f': case '\v': case '\0':
1273       result->flags |= PREV_WHITE;
1274       skip_whitespace (pfile, c);
1275       goto skipped_white;
1276
1277     case '\n':
1278       if (buffer->cur < buffer->rlimit)
1279         CPP_INCREMENT_LINE (pfile, 0);
1280       buffer->need_line = true;
1281       goto fresh_line;
1282
1283     case '0': case '1': case '2': case '3': case '4':
1284     case '5': case '6': case '7': case '8': case '9':
1285       {
1286         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1287         result->type = CPP_NUMBER;
1288         lex_number (pfile, &result->val.str, &nst);
1289         warn_about_normalization (pfile, result, &nst);
1290         break;
1291       }
1292
1293     case 'L':
1294     case 'u':
1295     case 'U':
1296     case 'R':
1297       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1298          wide strings or raw strings.  */
1299       if (c == 'L' || CPP_OPTION (pfile, uliterals))
1300         {
1301           if ((*buffer->cur == '\'' && c != 'R')
1302               || *buffer->cur == '"'
1303               || (*buffer->cur == 'R'
1304                   && c != 'R'
1305                   && buffer->cur[1] == '"'
1306                   && CPP_OPTION (pfile, uliterals))
1307               || (*buffer->cur == '8'
1308                   && c == 'u'
1309                   && (buffer->cur[1] == '"'
1310                       || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
1311             {
1312               lex_string (pfile, result, buffer->cur - 1);
1313               break;
1314             }
1315         }
1316       /* Fall through.  */
1317
1318     case '_':
1319     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1320     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1321     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1322     case 's': case 't':           case 'v': case 'w': case 'x':
1323     case 'y': case 'z':
1324     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1325     case 'G': case 'H': case 'I': case 'J': case 'K':
1326     case 'M': case 'N': case 'O': case 'P': case 'Q':
1327     case 'S': case 'T':           case 'V': case 'W': case 'X':
1328     case 'Y': case 'Z':
1329       result->type = CPP_NAME;
1330       {
1331         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1332         result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1333                                                 &nst);
1334         warn_about_normalization (pfile, result, &nst);
1335       }
1336
1337       /* Convert named operators to their proper types.  */
1338       if (result->val.node.node->flags & NODE_OPERATOR)
1339         {
1340           result->flags |= NAMED_OP;
1341           result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1342         }
1343       break;
1344
1345     case '\'':
1346     case '"':
1347       lex_string (pfile, result, buffer->cur - 1);
1348       break;
1349
1350     case '/':
1351       /* A potential block or line comment.  */
1352       comment_start = buffer->cur;
1353       c = *buffer->cur;
1354
1355       if (c == '*')
1356         {
1357           if (_cpp_skip_block_comment (pfile))
1358             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1359         }
1360       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1361                             || cpp_in_system_header (pfile)))
1362         {
1363           /* Warn about comments only if pedantically GNUC89, and not
1364              in system headers.  */
1365           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1366               && ! buffer->warned_cplusplus_comments)
1367             {
1368               cpp_error (pfile, CPP_DL_PEDWARN,
1369                          "C++ style comments are not allowed in ISO C90");
1370               cpp_error (pfile, CPP_DL_PEDWARN,
1371                          "(this will be reported only once per input file)");
1372               buffer->warned_cplusplus_comments = 1;
1373             }
1374
1375           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1376             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1377         }
1378       else if (c == '=')
1379         {
1380           buffer->cur++;
1381           result->type = CPP_DIV_EQ;
1382           break;
1383         }
1384       else
1385         {
1386           result->type = CPP_DIV;
1387           break;
1388         }
1389
1390       if (!pfile->state.save_comments)
1391         {
1392           result->flags |= PREV_WHITE;
1393           goto update_tokens_line;
1394         }
1395
1396       /* Save the comment as a token in its own right.  */
1397       save_comment (pfile, result, comment_start, c);
1398       break;
1399
1400     case '<':
1401       if (pfile->state.angled_headers)
1402         {
1403           lex_string (pfile, result, buffer->cur - 1);
1404           if (result->type != CPP_LESS)
1405             break;
1406         }
1407
1408       result->type = CPP_LESS;
1409       if (*buffer->cur == '=')
1410         buffer->cur++, result->type = CPP_LESS_EQ;
1411       else if (*buffer->cur == '<')
1412         {
1413           buffer->cur++;
1414           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1415         }
1416       else if (CPP_OPTION (pfile, digraphs))
1417         {
1418           if (*buffer->cur == ':')
1419             {
1420               buffer->cur++;
1421               result->flags |= DIGRAPH;
1422               result->type = CPP_OPEN_SQUARE;
1423             }
1424           else if (*buffer->cur == '%')
1425             {
1426               buffer->cur++;
1427               result->flags |= DIGRAPH;
1428               result->type = CPP_OPEN_BRACE;
1429             }
1430         }
1431       break;
1432
1433     case '>':
1434       result->type = CPP_GREATER;
1435       if (*buffer->cur == '=')
1436         buffer->cur++, result->type = CPP_GREATER_EQ;
1437       else if (*buffer->cur == '>')
1438         {
1439           buffer->cur++;
1440           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1441         }
1442       break;
1443
1444     case '%':
1445       result->type = CPP_MOD;
1446       if (*buffer->cur == '=')
1447         buffer->cur++, result->type = CPP_MOD_EQ;
1448       else if (CPP_OPTION (pfile, digraphs))
1449         {
1450           if (*buffer->cur == ':')
1451             {
1452               buffer->cur++;
1453               result->flags |= DIGRAPH;
1454               result->type = CPP_HASH;
1455               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1456                 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1457             }
1458           else if (*buffer->cur == '>')
1459             {
1460               buffer->cur++;
1461               result->flags |= DIGRAPH;
1462               result->type = CPP_CLOSE_BRACE;
1463             }
1464         }
1465       break;
1466
1467     case '.':
1468       result->type = CPP_DOT;
1469       if (ISDIGIT (*buffer->cur))
1470         {
1471           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1472           result->type = CPP_NUMBER;
1473           lex_number (pfile, &result->val.str, &nst);
1474           warn_about_normalization (pfile, result, &nst);
1475         }
1476       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1477         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1478       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1479         buffer->cur++, result->type = CPP_DOT_STAR;
1480       break;
1481
1482     case '+':
1483       result->type = CPP_PLUS;
1484       if (*buffer->cur == '+')
1485         buffer->cur++, result->type = CPP_PLUS_PLUS;
1486       else if (*buffer->cur == '=')
1487         buffer->cur++, result->type = CPP_PLUS_EQ;
1488       break;
1489
1490     case '-':
1491       result->type = CPP_MINUS;
1492       if (*buffer->cur == '>')
1493         {
1494           buffer->cur++;
1495           result->type = CPP_DEREF;
1496           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1497             buffer->cur++, result->type = CPP_DEREF_STAR;
1498         }
1499       else if (*buffer->cur == '-')
1500         buffer->cur++, result->type = CPP_MINUS_MINUS;
1501       else if (*buffer->cur == '=')
1502         buffer->cur++, result->type = CPP_MINUS_EQ;
1503       break;
1504
1505     case '&':
1506       result->type = CPP_AND;
1507       if (*buffer->cur == '&')
1508         buffer->cur++, result->type = CPP_AND_AND;
1509       else if (*buffer->cur == '=')
1510         buffer->cur++, result->type = CPP_AND_EQ;
1511       break;
1512
1513     case '|':
1514       result->type = CPP_OR;
1515       if (*buffer->cur == '|')
1516         buffer->cur++, result->type = CPP_OR_OR;
1517       else if (*buffer->cur == '=')
1518         buffer->cur++, result->type = CPP_OR_EQ;
1519       break;
1520
1521     case ':':
1522       result->type = CPP_COLON;
1523       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1524         buffer->cur++, result->type = CPP_SCOPE;
1525       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1526         {
1527           buffer->cur++;
1528           result->flags |= DIGRAPH;
1529           result->type = CPP_CLOSE_SQUARE;
1530         }
1531       break;
1532
1533     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1534     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1535     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1536     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1537     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1538
1539     case '?': result->type = CPP_QUERY; break;
1540     case '~': result->type = CPP_COMPL; break;
1541     case ',': result->type = CPP_COMMA; break;
1542     case '(': result->type = CPP_OPEN_PAREN; break;
1543     case ')': result->type = CPP_CLOSE_PAREN; break;
1544     case '[': result->type = CPP_OPEN_SQUARE; break;
1545     case ']': result->type = CPP_CLOSE_SQUARE; break;
1546     case '{': result->type = CPP_OPEN_BRACE; break;
1547     case '}': result->type = CPP_CLOSE_BRACE; break;
1548     case ';': result->type = CPP_SEMICOLON; break;
1549
1550       /* @ is a punctuator in Objective-C.  */
1551     case '@': result->type = CPP_ATSIGN; break;
1552
1553     case '$':
1554     case '\\':
1555       {
1556         const uchar *base = --buffer->cur;
1557         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1558
1559         if (forms_identifier_p (pfile, true, &nst))
1560           {
1561             result->type = CPP_NAME;
1562             result->val.node.node = lex_identifier (pfile, base, true, &nst);
1563             warn_about_normalization (pfile, result, &nst);
1564             break;
1565           }
1566         buffer->cur++;
1567       }
1568
1569     default:
1570       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1571       break;
1572     }
1573
1574   return result;
1575 }
1576
1577 /* An upper bound on the number of bytes needed to spell TOKEN.
1578    Does not include preceding whitespace.  */
1579 unsigned int
1580 cpp_token_len (const cpp_token *token)
1581 {
1582   unsigned int len;
1583
1584   switch (TOKEN_SPELL (token))
1585     {
1586     default:            len = 6;                                break;
1587     case SPELL_LITERAL: len = token->val.str.len;               break;
1588     case SPELL_IDENT:   len = NODE_LEN (token->val.node.node) * 10;     break;
1589     }
1590
1591   return len;
1592 }
1593
1594 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1595    Return the number of bytes read out of NAME.  (There are always
1596    10 bytes written to BUFFER.)  */
1597
1598 static size_t
1599 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1600 {
1601   int j;
1602   int ucn_len = 0;
1603   int ucn_len_c;
1604   unsigned t;
1605   unsigned long utf32;
1606
1607   /* Compute the length of the UTF-8 sequence.  */
1608   for (t = *name; t & 0x80; t <<= 1)
1609     ucn_len++;
1610
1611   utf32 = *name & (0x7F >> ucn_len);
1612   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1613     {
1614       utf32 = (utf32 << 6) | (*++name & 0x3F);
1615
1616       /* Ill-formed UTF-8.  */
1617       if ((*name & ~0x3F) != 0x80)
1618         abort ();
1619     }
1620
1621   *buffer++ = '\\';
1622   *buffer++ = 'U';
1623   for (j = 7; j >= 0; j--)
1624     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1625   return ucn_len;
1626 }
1627
1628 /* Given a token TYPE corresponding to a digraph, return a pointer to
1629    the spelling of the digraph.  */
1630 static const unsigned char *
1631 cpp_digraph2name (enum cpp_ttype type)
1632 {
1633   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1634 }
1635
1636 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1637    already contain the enough space to hold the token's spelling.
1638    Returns a pointer to the character after the last character written.
1639    FORSTRING is true if this is to be the spelling after translation
1640    phase 1 (this is different for UCNs).
1641    FIXME: Would be nice if we didn't need the PFILE argument.  */
1642 unsigned char *
1643 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1644                  unsigned char *buffer, bool forstring)
1645 {
1646   switch (TOKEN_SPELL (token))
1647     {
1648     case SPELL_OPERATOR:
1649       {
1650         const unsigned char *spelling;
1651         unsigned char c;
1652
1653         if (token->flags & DIGRAPH)
1654           spelling = cpp_digraph2name (token->type);
1655         else if (token->flags & NAMED_OP)
1656           goto spell_ident;
1657         else
1658           spelling = TOKEN_NAME (token);
1659
1660         while ((c = *spelling++) != '\0')
1661           *buffer++ = c;
1662       }
1663       break;
1664
1665     spell_ident:
1666     case SPELL_IDENT:
1667       if (forstring)
1668         {
1669           memcpy (buffer, NODE_NAME (token->val.node.node),
1670                   NODE_LEN (token->val.node.node));
1671           buffer += NODE_LEN (token->val.node.node);
1672         }
1673       else
1674         {
1675           size_t i;
1676           const unsigned char * name = NODE_NAME (token->val.node.node);
1677
1678           for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1679             if (name[i] & ~0x7F)
1680               {
1681                 i += utf8_to_ucn (buffer, name + i) - 1;
1682                 buffer += 10;
1683               }
1684             else
1685               *buffer++ = NODE_NAME (token->val.node.node)[i];
1686         }
1687       break;
1688
1689     case SPELL_LITERAL:
1690       memcpy (buffer, token->val.str.text, token->val.str.len);
1691       buffer += token->val.str.len;
1692       break;
1693
1694     case SPELL_NONE:
1695       cpp_error (pfile, CPP_DL_ICE,
1696                  "unspellable token %s", TOKEN_NAME (token));
1697       break;
1698     }
1699
1700   return buffer;
1701 }
1702
1703 /* Returns TOKEN spelt as a null-terminated string.  The string is
1704    freed when the reader is destroyed.  Useful for diagnostics.  */
1705 unsigned char *
1706 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1707 {
1708   unsigned int len = cpp_token_len (token) + 1;
1709   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1710
1711   end = cpp_spell_token (pfile, token, start, false);
1712   end[0] = '\0';
1713
1714   return start;
1715 }
1716
1717 /* Returns a pointer to a string which spells the token defined by
1718    TYPE and FLAGS.  Used by C front ends, which really should move to
1719    using cpp_token_as_text.  */
1720 const char *
1721 cpp_type2name (enum cpp_ttype type, unsigned char flags)
1722 {
1723   if (flags & DIGRAPH)
1724     return (const char *) cpp_digraph2name (type);
1725   else if (flags & NAMED_OP)
1726     return cpp_named_operator2name (type);
1727
1728   return (const char *) token_spellings[type].name;
1729 }
1730
1731 /* Writes the spelling of token to FP, without any preceding space.
1732    Separated from cpp_spell_token for efficiency - to avoid stdio
1733    double-buffering.  */
1734 void
1735 cpp_output_token (const cpp_token *token, FILE *fp)
1736 {
1737   switch (TOKEN_SPELL (token))
1738     {
1739     case SPELL_OPERATOR:
1740       {
1741         const unsigned char *spelling;
1742         int c;
1743
1744         if (token->flags & DIGRAPH)
1745           spelling = cpp_digraph2name (token->type);
1746         else if (token->flags & NAMED_OP)
1747           goto spell_ident;
1748         else
1749           spelling = TOKEN_NAME (token);
1750
1751         c = *spelling;
1752         do
1753           putc (c, fp);
1754         while ((c = *++spelling) != '\0');
1755       }
1756       break;
1757
1758     spell_ident:
1759     case SPELL_IDENT:
1760       {
1761         size_t i;
1762         const unsigned char * name = NODE_NAME (token->val.node.node);
1763
1764         for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1765           if (name[i] & ~0x7F)
1766             {
1767               unsigned char buffer[10];
1768               i += utf8_to_ucn (buffer, name + i) - 1;
1769               fwrite (buffer, 1, 10, fp);
1770             }
1771           else
1772             fputc (NODE_NAME (token->val.node.node)[i], fp);
1773       }
1774       break;
1775
1776     case SPELL_LITERAL:
1777       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1778       break;
1779
1780     case SPELL_NONE:
1781       /* An error, most probably.  */
1782       break;
1783     }
1784 }
1785
1786 /* Compare two tokens.  */
1787 int
1788 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1789 {
1790   if (a->type == b->type && a->flags == b->flags)
1791     switch (TOKEN_SPELL (a))
1792       {
1793       default:                  /* Keep compiler happy.  */
1794       case SPELL_OPERATOR:
1795         /* token_no is used to track where multiple consecutive ##
1796            tokens were originally located.  */
1797         return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1798       case SPELL_NONE:
1799         return (a->type != CPP_MACRO_ARG
1800                 || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
1801       case SPELL_IDENT:
1802         return a->val.node.node == b->val.node.node;
1803       case SPELL_LITERAL:
1804         return (a->val.str.len == b->val.str.len
1805                 && !memcmp (a->val.str.text, b->val.str.text,
1806                             a->val.str.len));
1807       }
1808
1809   return 0;
1810 }
1811
1812 /* Returns nonzero if a space should be inserted to avoid an
1813    accidental token paste for output.  For simplicity, it is
1814    conservative, and occasionally advises a space where one is not
1815    needed, e.g. "." and ".2".  */
1816 int
1817 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1818                  const cpp_token *token2)
1819 {
1820   enum cpp_ttype a = token1->type, b = token2->type;
1821   cppchar_t c;
1822
1823   if (token1->flags & NAMED_OP)
1824     a = CPP_NAME;
1825   if (token2->flags & NAMED_OP)
1826     b = CPP_NAME;
1827
1828   c = EOF;
1829   if (token2->flags & DIGRAPH)
1830     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1831   else if (token_spellings[b].category == SPELL_OPERATOR)
1832     c = token_spellings[b].name[0];
1833
1834   /* Quickly get everything that can paste with an '='.  */
1835   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1836     return 1;
1837
1838   switch (a)
1839     {
1840     case CPP_GREATER:   return c == '>';
1841     case CPP_LESS:      return c == '<' || c == '%' || c == ':';
1842     case CPP_PLUS:      return c == '+';
1843     case CPP_MINUS:     return c == '-' || c == '>';
1844     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1845     case CPP_MOD:       return c == ':' || c == '>';
1846     case CPP_AND:       return c == '&';
1847     case CPP_OR:        return c == '|';
1848     case CPP_COLON:     return c == ':' || c == '>';
1849     case CPP_DEREF:     return c == '*';
1850     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1851     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1852     case CPP_NAME:      return ((b == CPP_NUMBER
1853                                  && name_p (pfile, &token2->val.str))
1854                                 || b == CPP_NAME
1855                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1856     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1857                                 || c == '.' || c == '+' || c == '-');
1858                                       /* UCNs */
1859     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1860                                  && b == CPP_NAME)
1861                                 || (CPP_OPTION (pfile, objc)
1862                                     && token1->val.str.text[0] == '@'
1863                                     && (b == CPP_NAME || b == CPP_STRING)));
1864     default:            break;
1865     }
1866
1867   return 0;
1868 }
1869
1870 /* Output all the remaining tokens on the current line, and a newline
1871    character, to FP.  Leading whitespace is removed.  If there are
1872    macros, special token padding is not performed.  */
1873 void
1874 cpp_output_line (cpp_reader *pfile, FILE *fp)
1875 {
1876   const cpp_token *token;
1877
1878   token = cpp_get_token (pfile);
1879   while (token->type != CPP_EOF)
1880     {
1881       cpp_output_token (token, fp);
1882       token = cpp_get_token (pfile);
1883       if (token->flags & PREV_WHITE)
1884         putc (' ', fp);
1885     }
1886
1887   putc ('\n', fp);
1888 }
1889
1890 /* Return a string representation of all the remaining tokens on the
1891    current line.  The result is allocated using xmalloc and must be
1892    freed by the caller.  */
1893 unsigned char *
1894 cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1895 {
1896   const cpp_token *token;
1897   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1898   unsigned int alloced = 120 + out;
1899   unsigned char *result = (unsigned char *) xmalloc (alloced);
1900
1901   /* If DIR_NAME is empty, there are no initial contents.  */
1902   if (dir_name)
1903     {
1904       sprintf ((char *) result, "#%s ", dir_name);
1905       out += 2;
1906     }
1907
1908   token = cpp_get_token (pfile);
1909   while (token->type != CPP_EOF)
1910     {
1911       unsigned char *last;
1912       /* Include room for a possible space and the terminating nul.  */
1913       unsigned int len = cpp_token_len (token) + 2;
1914
1915       if (out + len > alloced)
1916         {
1917           alloced *= 2;
1918           if (out + len > alloced)
1919             alloced = out + len;
1920           result = (unsigned char *) xrealloc (result, alloced);
1921         }
1922
1923       last = cpp_spell_token (pfile, token, &result[out], 0);
1924       out = last - result;
1925
1926       token = cpp_get_token (pfile);
1927       if (token->flags & PREV_WHITE)
1928         result[out++] = ' ';
1929     }
1930
1931   result[out] = '\0';
1932   return result;
1933 }
1934
1935 /* Memory buffers.  Changing these three constants can have a dramatic
1936    effect on performance.  The values here are reasonable defaults,
1937    but might be tuned.  If you adjust them, be sure to test across a
1938    range of uses of cpplib, including heavy nested function-like macro
1939    expansion.  Also check the change in peak memory usage (NJAMD is a
1940    good tool for this).  */
1941 #define MIN_BUFF_SIZE 8000
1942 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1943 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1944         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1945
1946 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1947   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1948 #endif
1949
1950 /* Create a new allocation buffer.  Place the control block at the end
1951    of the buffer, so that buffer overflows will cause immediate chaos.  */
1952 static _cpp_buff *
1953 new_buff (size_t len)
1954 {
1955   _cpp_buff *result;
1956   unsigned char *base;
1957
1958   if (len < MIN_BUFF_SIZE)
1959     len = MIN_BUFF_SIZE;
1960   len = CPP_ALIGN (len);
1961
1962   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1963   result = (_cpp_buff *) (base + len);
1964   result->base = base;
1965   result->cur = base;
1966   result->limit = base + len;
1967   result->next = NULL;
1968   return result;
1969 }
1970
1971 /* Place a chain of unwanted allocation buffers on the free list.  */
1972 void
1973 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1974 {
1975   _cpp_buff *end = buff;
1976
1977   while (end->next)
1978     end = end->next;
1979   end->next = pfile->free_buffs;
1980   pfile->free_buffs = buff;
1981 }
1982
1983 /* Return a free buffer of size at least MIN_SIZE.  */
1984 _cpp_buff *
1985 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1986 {
1987   _cpp_buff *result, **p;
1988
1989   for (p = &pfile->free_buffs;; p = &(*p)->next)
1990     {
1991       size_t size;
1992
1993       if (*p == NULL)
1994         return new_buff (min_size);
1995       result = *p;
1996       size = result->limit - result->base;
1997       /* Return a buffer that's big enough, but don't waste one that's
1998          way too big.  */
1999       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2000         break;
2001     }
2002
2003   *p = result->next;
2004   result->next = NULL;
2005   result->cur = result->base;
2006   return result;
2007 }
2008
2009 /* Creates a new buffer with enough space to hold the uncommitted
2010    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2011    the excess bytes to the new buffer.  Chains the new buffer after
2012    BUFF, and returns the new buffer.  */
2013 _cpp_buff *
2014 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
2015 {
2016   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2017   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2018
2019   buff->next = new_buff;
2020   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2021   return new_buff;
2022 }
2023
2024 /* Creates a new buffer with enough space to hold the uncommitted
2025    remaining bytes of the buffer pointed to by BUFF, and at least
2026    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2027    Chains the new buffer before the buffer pointed to by BUFF, and
2028    updates the pointer to point to the new buffer.  */
2029 void
2030 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
2031 {
2032   _cpp_buff *new_buff, *old_buff = *pbuff;
2033   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2034
2035   new_buff = _cpp_get_buff (pfile, size);
2036   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2037   new_buff->next = old_buff;
2038   *pbuff = new_buff;
2039 }
2040
2041 /* Free a chain of buffers starting at BUFF.  */
2042 void
2043 _cpp_free_buff (_cpp_buff *buff)
2044 {
2045   _cpp_buff *next;
2046
2047   for (; buff; buff = next)
2048     {
2049       next = buff->next;
2050       free (buff->base);
2051     }
2052 }
2053
2054 /* Allocate permanent, unaligned storage of length LEN.  */
2055 unsigned char *
2056 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
2057 {
2058   _cpp_buff *buff = pfile->u_buff;
2059   unsigned char *result = buff->cur;
2060
2061   if (len > (size_t) (buff->limit - result))
2062     {
2063       buff = _cpp_get_buff (pfile, len);
2064       buff->next = pfile->u_buff;
2065       pfile->u_buff = buff;
2066       result = buff->cur;
2067     }
2068
2069   buff->cur = result + len;
2070   return result;
2071 }
2072
2073 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2074    That buffer is used for growing allocations when saving macro
2075    replacement lists in a #define, and when parsing an answer to an
2076    assertion in #assert, #unassert or #if (and therefore possibly
2077    whilst expanding macros).  It therefore must not be used by any
2078    code that they might call: specifically the lexer and the guts of
2079    the macro expander.
2080
2081    All existing other uses clearly fit this restriction: storing
2082    registered pragmas during initialization.  */
2083 unsigned char *
2084 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
2085 {
2086   _cpp_buff *buff = pfile->a_buff;
2087   unsigned char *result = buff->cur;
2088
2089   if (len > (size_t) (buff->limit - result))
2090     {
2091       buff = _cpp_get_buff (pfile, len);
2092       buff->next = pfile->a_buff;
2093       pfile->a_buff = buff;
2094       result = buff->cur;
2095     }
2096
2097   buff->cur = result + len;
2098   return result;
2099 }
2100
2101 /* Say which field of TOK is in use.  */
2102
2103 enum cpp_token_fld_kind
2104 cpp_token_val_index (cpp_token *tok)
2105 {
2106   switch (TOKEN_SPELL (tok))
2107     {
2108     case SPELL_IDENT:
2109       return CPP_TOKEN_FLD_NODE;
2110     case SPELL_LITERAL:
2111       return CPP_TOKEN_FLD_STR;
2112     case SPELL_OPERATOR:
2113       if (tok->type == CPP_PASTE)
2114         return CPP_TOKEN_FLD_TOKEN_NO;
2115       else
2116         return CPP_TOKEN_FLD_NONE;
2117     case SPELL_NONE:
2118       if (tok->type == CPP_MACRO_ARG)
2119         return CPP_TOKEN_FLD_ARG_NO;
2120       else if (tok->type == CPP_PADDING)
2121         return CPP_TOKEN_FLD_SOURCE;
2122       else if (tok->type == CPP_PRAGMA)
2123         return CPP_TOKEN_FLD_PRAGMA;
2124       /* else fall through */
2125     default:
2126       return CPP_TOKEN_FLD_NONE;
2127     }
2128 }