libcpp/lex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "internal.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s  },
  45 #define TK(e, s) { SPELL_ ## s,    U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  57 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  58 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  59                             unsigned int, enum cpp_ttype);
  60 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  61 static int name_p (cpp_reader *, const cpp_string *);
  62 static tokenrun *next_tokenrun (tokenrun *);
  63
  64 static _cpp_buff *new_buff (size_t);
  65
  66
  67 /* Utility routine:
  68
  69    Compares, the token TOKEN to the NUL-terminated string STRING.
  70    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  71 int
  72 cpp_ideq (const cpp_token *token, const char *string)
  73 {
  74   if (token->type != CPP_NAME)
  75     return 0;
  76
  77   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  78 }
  79
  80 /* Record a note TYPE at byte POS into the current cleaned logical
  81    line.  */
  82 static void
  83 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  84 {
  85   if (buffer->notes_used == buffer->notes_cap)
  86     {
  87       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  88       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
  89                                   buffer->notes_cap);
  90     }
  91
  92   buffer->notes[buffer->notes_used].pos = pos;
  93   buffer->notes[buffer->notes_used].type = type;
  94   buffer->notes_used++;
  95 }
  96
  97 /* Returns with a logical line that contains no escaped newlines or
  98    trigraphs.  This is a time-critical inner loop.  */
  99 void
 100 _cpp_clean_line (cpp_reader *pfile)
 101 {
 102   cpp_buffer *buffer;
 103   const uchar *s;
 104   uchar c, *d, *p;
 105
 106   buffer = pfile->buffer;
 107   buffer->cur_note = buffer->notes_used = 0;
 108   buffer->cur = buffer->line_base = buffer->next_line;
 109   buffer->need_line = false;
 110   s = buffer->next_line - 1;
 111
 112   if (!buffer->from_stage3)
 113     {
 114       /* Short circuit for the common case of an un-escaped line with
 115          no trigraphs.  The primary win here is by not writing any
 116          data back to memory until we have to.  */
 117       for (;;)
 118         {
 119           c = *++s;
 120           if (c == '\n' || c == '\r')
 121             {
 122               d = (uchar *) s;
 123
 124               if (s == buffer->rlimit)
 125                 goto done;
 126
 127               /* DOS line ending? */
 128               if (c == '\r' && s[1] == '\n')
 129                 s++;
 130
 131               if (s == buffer->rlimit)
 132                 goto done;
 133
 134               /* check for escaped newline */
 135               p = d;
 136               while (p != buffer->next_line && is_nvspace (p[-1]))
 137                 p--;
 138               if (p == buffer->next_line || p[-1] != '\\')
 139                 goto done;
 140
 141               /* Have an escaped newline; process it and proceed to
 142                  the slow path.  */
 143               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 144               d = p - 2;
 145               buffer->next_line = p - 1;
 146               break;
 147             }
 148           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 149             {
 150               /* Have a trigraph.  We may or may not have to convert
 151                  it.  Add a line note regardless, for -Wtrigraphs.  */
 152               add_line_note (buffer, s, s[2]);
 153               if (CPP_OPTION (pfile, trigraphs))
 154                 {
 155                   /* We do, and that means we have to switch to the
 156                      slow path.  */
 157                   d = (uchar *) s;
 158                   *d = _cpp_trigraph_map[s[2]];
 159                   s += 2;
 160                   break;
 161                 }
 162             }
 163         }
 164
 165
 166       for (;;)
 167         {
 168           c = *++s;
 169           *++d = c;
 170
 171           if (c == '\n' || c == '\r')
 172             {
 173                   /* Handle DOS line endings.  */
 174               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 175                 s++;
 176               if (s == buffer->rlimit)
 177                 break;
 178
 179               /* Escaped?  */
 180               p = d;
 181               while (p != buffer->next_line && is_nvspace (p[-1]))
 182                 p--;
 183               if (p == buffer->next_line || p[-1] != '\\')
 184                 break;
 185
 186               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 187               d = p - 2;
 188               buffer->next_line = p - 1;
 189             }
 190           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 191             {
 192               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 193               add_line_note (buffer, d, s[2]);
 194               if (CPP_OPTION (pfile, trigraphs))
 195                 {
 196                   *d = _cpp_trigraph_map[s[2]];
 197                   s += 2;
 198                 }
 199             }
 200         }
 201     }
 202   else
 203     {
 204       do
 205         s++;
 206       while (*s != '\n' && *s != '\r');
 207       d = (uchar *) s;
 208
 209       /* Handle DOS line endings.  */
 210       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 211         s++;
 212     }
 213
 214  done:
 215   *d = '\n';
 216   /* A sentinel note that should never be processed.  */
 217   add_line_note (buffer, d + 1, '\n');
 218   buffer->next_line = s + 1;
 219 }
 220
 221 /* Return true if the trigraph indicated by NOTE should be warned
 222    about in a comment.  */
 223 static bool
 224 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 225 {
 226   const uchar *p;
 227
 228   /* Within comments we don't warn about trigraphs, unless the
 229      trigraph forms an escaped newline, as that may change
 230      behavior.  */
 231   if (note->type != '/')
 232     return false;
 233
 234   /* If -trigraphs, then this was an escaped newline iff the next note
 235      is coincident.  */
 236   if (CPP_OPTION (pfile, trigraphs))
 237     return note[1].pos == note->pos;
 238
 239   /* Otherwise, see if this forms an escaped newline.  */
 240   p = note->pos + 3;
 241   while (is_nvspace (*p))
 242     p++;
 243
 244   /* There might have been escaped newlines between the trigraph and the
 245      newline we found.  Hence the position test.  */
 246   return (*p == '\n' && p < note[1].pos);
 247 }
 248
 249 /* Process the notes created by add_line_note as far as the current
 250    location.  */
 251 void
 252 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 253 {
 254   cpp_buffer *buffer = pfile->buffer;
 255
 256   for (;;)
 257     {
 258       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 259       unsigned int col;
 260
 261       if (note->pos > buffer->cur)
 262         break;
 263
 264       buffer->cur_note++;
 265       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 266
 267       if (note->type == '\\' || note->type == ' ')
 268         {
 269           if (note->type == ' ' && !in_comment)
 270             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 271                                  "backslash and newline separated by space");
 272
 273           if (buffer->next_line > buffer->rlimit)
 274             {
 275               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
 276                                    "backslash-newline at end of file");
 277               /* Prevent "no newline at end of file" warning.  */
 278               buffer->next_line = buffer->rlimit;
 279             }
 280
 281           buffer->line_base = note->pos;
 282           CPP_INCREMENT_LINE (pfile, 0);
 283         }
 284       else if (_cpp_trigraph_map[note->type])
 285         {
 286           if (CPP_OPTION (pfile, warn_trigraphs)
 287               && (!in_comment || warn_in_comment (pfile, note)))
 288             {
 289               if (CPP_OPTION (pfile, trigraphs))
 290                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 291                                      "trigraph ??%c converted to %c",
 292                                      note->type,
 293                                      (int) _cpp_trigraph_map[note->type]);
 294               else
 295                 {
 296                   cpp_error_with_line
 297                     (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
 298                      "trigraph ??%c ignored, use -trigraphs to enable",
 299                      note->type);
 300                 }
 301             }
 302         }
 303       else
 304         abort ();
 305     }
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    nonzero if comment terminated by EOF, zero otherwise.
 311
 312    Buffer->cur points to the initial asterisk of the comment.  */
 313 bool
 314 _cpp_skip_block_comment (cpp_reader *pfile)
 315 {
 316   cpp_buffer *buffer = pfile->buffer;
 317   const uchar *cur = buffer->cur;
 318   uchar c;
 319
 320   cur++;
 321   if (*cur == '/')
 322     cur++;
 323
 324   for (;;)
 325     {
 326       /* People like decorating comments with '*', so check for '/'
 327          instead for efficiency.  */
 328       c = *cur++;
 329
 330       if (c == '/')
 331         {
 332           if (cur[-2] == '*')
 333             break;
 334
 335           /* Warn about potential nested comments, but not if the '/'
 336              comes immediately before the true comment delimiter.
 337              Don't bother to get it right across escaped newlines.  */
 338           if (CPP_OPTION (pfile, warn_comments)
 339               && cur[0] == '*' && cur[1] != '/')
 340             {
 341               buffer->cur = cur;
 342               cpp_error_with_line (pfile, CPP_DL_WARNING,
 343                                    pfile->line_table->highest_line, CPP_BUF_COL (buffer),
 344                                    "\"/*\" within comment");
 345             }
 346         }
 347       else if (c == '\n')
 348         {
 349           unsigned int cols;
 350           buffer->cur = cur - 1;
 351           _cpp_process_line_notes (pfile, true);
 352           if (buffer->next_line >= buffer->rlimit)
 353             return true;
 354           _cpp_clean_line (pfile);
 355
 356           cols = buffer->next_line - buffer->line_base;
 357           CPP_INCREMENT_LINE (pfile, cols);
 358
 359           cur = buffer->cur;
 360         }
 361     }
 362
 363   buffer->cur = cur;
 364   _cpp_process_line_notes (pfile, true);
 365   return false;
 366 }
 367
 368 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 369    terminating newline.  Handles escaped newlines.  Returns nonzero
 370    if a multiline comment.  */
 371 static int
 372 skip_line_comment (cpp_reader *pfile)
 373 {
 374   cpp_buffer *buffer = pfile->buffer;
 375   unsigned int orig_line = pfile->line_table->highest_line;
 376
 377   while (*buffer->cur != '\n')
 378     buffer->cur++;
 379
 380   _cpp_process_line_notes (pfile, true);
 381   return orig_line != pfile->line_table->highest_line;
 382 }
 383
 384 /* Skips whitespace, saving the next non-whitespace character.  */
 385 static void
 386 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 387 {
 388   cpp_buffer *buffer = pfile->buffer;
 389   bool saw_NUL = false;
 390
 391   do
 392     {
 393       /* Horizontal space always OK.  */
 394       if (c == ' ' || c == '\t')
 395         ;
 396       /* Just \f \v or \0 left.  */
 397       else if (c == '\0')
 398         saw_NUL = true;
 399       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 400         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 401                              CPP_BUF_COL (buffer),
 402                              "%s in preprocessing directive",
 403                              c == '\f' ? "form feed" : "vertical tab");
 404
 405       c = *buffer->cur++;
 406     }
 407   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 408   while (is_nvspace (c));
 409
 410   if (saw_NUL)
 411     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 412
 413   buffer->cur--;
 414 }
 415
 416 /* See if the characters of a number token are valid in a name (no
 417    '.', '+' or '-').  */
 418 static int
 419 name_p (cpp_reader *pfile, const cpp_string *string)
 420 {
 421   unsigned int i;
 422
 423   for (i = 0; i < string->len; i++)
 424     if (!is_idchar (string->text[i]))
 425       return 0;
 426
 427   return 1;
 428 }
 429
 430 /* After parsing an identifier or other sequence, produce a warning about
 431    sequences not in NFC/NFKC.  */
 432 static void
 433 warn_about_normalization (cpp_reader *pfile,
 434                           const cpp_token *token,
 435                           const struct normalize_state *s)
 436 {
 437   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
 438       && !pfile->state.skipping)
 439     {
 440       /* Make sure that the token is printed using UCNs, even
 441          if we'd otherwise happily print UTF-8.  */
 442       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
 443       size_t sz;
 444
 445       sz = cpp_spell_token (pfile, token, buf, false) - buf;
 446       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
 447         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 448                              "`%.*s' is not in NFKC", (int) sz, buf);
 449       else
 450         cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
 451                              "`%.*s' is not in NFC", (int) sz, buf);
 452     }
 453 }
 454
 455 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 456    an identifier.  FIRST is TRUE if this starts an identifier.  */
 457 static bool
 458 forms_identifier_p (cpp_reader *pfile, int first,
 459                     struct normalize_state *state)
 460 {
 461   cpp_buffer *buffer = pfile->buffer;
 462
 463   if (*buffer->cur == '$')
 464     {
 465       if (!CPP_OPTION (pfile, dollars_in_ident))
 466         return false;
 467
 468       buffer->cur++;
 469       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 470         {
 471           CPP_OPTION (pfile, warn_dollars) = 0;
 472           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 473         }
 474
 475       return true;
 476     }
 477
 478   /* Is this a syntactically valid UCN?  */
 479   if (CPP_OPTION (pfile, extended_identifiers)
 480       && *buffer->cur == '\\'
 481       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 482     {
 483       buffer->cur += 2;
 484       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
 485                           state))
 486         return true;
 487       buffer->cur -= 2;
 488     }
 489
 490   return false;
 491 }
 492
 493 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 494 static cpp_hashnode *
 495 lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
 496                 struct normalize_state *nst)
 497 {
 498   cpp_hashnode *result;
 499   const uchar *cur;
 500   unsigned int len;
 501   unsigned int hash = HT_HASHSTEP (0, *base);
 502
 503   cur = pfile->buffer->cur;
 504   if (! starts_ucn)
 505     while (ISIDNUM (*cur))
 506       {
 507         hash = HT_HASHSTEP (hash, *cur);
 508         cur++;
 509       }
 510   pfile->buffer->cur = cur;
 511   if (starts_ucn || forms_identifier_p (pfile, false, nst))
 512     {
 513       /* Slower version for identifiers containing UCNs (or $).  */
 514       do {
 515         while (ISIDNUM (*pfile->buffer->cur))
 516           {
 517             pfile->buffer->cur++;
 518             NORMALIZE_STATE_UPDATE_IDNUM (nst);
 519           }
 520       } while (forms_identifier_p (pfile, false, nst));
 521       result = _cpp_interpret_identifier (pfile, base,
 522                                           pfile->buffer->cur - base);
 523     }
 524   else
 525     {
 526       len = cur - base;
 527       hash = HT_HASHFINISH (hash, len);
 528
 529       result = (cpp_hashnode *)
 530         ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
 531     }
 532
 533   /* Rarely, identifiers require diagnostics when lexed.  */
 534   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 535                         && !pfile->state.skipping, 0))
 536     {
 537       /* It is allowed to poison the same identifier twice.  */
 538       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 539         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 540                    NODE_NAME (result));
 541
 542       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 543          replacement list of a variadic macro.  */
 544       if (result == pfile->spec_nodes.n__VA_ARGS__
 545           && !pfile->state.va_args_ok)
 546         cpp_error (pfile, CPP_DL_PEDWARN,
 547                    "__VA_ARGS__ can only appear in the expansion"
 548                    " of a C99 variadic macro");
 549     }
 550
 551   return result;
 552 }
 553
 554 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 555 static void
 556 lex_number (cpp_reader *pfile, cpp_string *number,
 557             struct normalize_state *nst)
 558 {
 559   const uchar *cur;
 560   const uchar *base;
 561   uchar *dest;
 562
 563   base = pfile->buffer->cur - 1;
 564   do
 565     {
 566       cur = pfile->buffer->cur;
 567
 568       /* N.B. ISIDNUM does not include $.  */
 569       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 570         {
 571           cur++;
 572           NORMALIZE_STATE_UPDATE_IDNUM (nst);
 573         }
 574
 575       pfile->buffer->cur = cur;
 576     }
 577   while (forms_identifier_p (pfile, false, nst));
 578
 579   number->len = cur - base;
 580   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 581   memcpy (dest, base, number->len);
 582   dest[number->len] = '\0';
 583   number->text = dest;
 584 }
 585
 586 /* Create a token of type TYPE with a literal spelling.  */
 587 static void
 588 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 589                 unsigned int len, enum cpp_ttype type)
 590 {
 591   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 592
 593   memcpy (dest, base, len);
 594   dest[len] = '\0';
 595   token->type = type;
 596   token->val.str.len = len;
 597   token->val.str.text = dest;
 598 }
 599
 600 /* Lexes a string, character constant, or angle-bracketed header file
 601    name.  The stored string contains the spelling, including opening
 602    quote and leading any leading 'L'.  It returns the type of the
 603    literal, or CPP_OTHER if it was not properly terminated.
 604
 605    The spelling is NUL-terminated, but it is not guaranteed that this
 606    is the first NUL since embedded NULs are preserved.  */
 607 static void
 608 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 609 {
 610   bool saw_NUL = false;
 611   const uchar *cur;
 612   cppchar_t terminator;
 613   enum cpp_ttype type;
 614
 615   cur = base;
 616   terminator = *cur++;
 617   if (terminator == 'L')
 618     terminator = *cur++;
 619   if (terminator == '\"')
 620     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 621   else if (terminator == '\'')
 622     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 623   else
 624     terminator = '>', type = CPP_HEADER_NAME;
 625
 626   for (;;)
 627     {
 628       cppchar_t c = *cur++;
 629
 630       /* In #include-style directives, terminators are not escapable.  */
 631       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 632         cur++;
 633       else if (c == terminator)
 634         break;
 635       else if (c == '\n')
 636         {
 637           cur--;
 638           type = CPP_OTHER;
 639           break;
 640         }
 641       else if (c == '\0')
 642         saw_NUL = true;
 643     }
 644
 645   if (saw_NUL && !pfile->state.skipping)
 646     cpp_error (pfile, CPP_DL_WARNING,
 647                "null character(s) preserved in literal");
 648
 649   pfile->buffer->cur = cur;
 650   create_literal (pfile, token, base, cur - base, type);
 651 }
 652
 653 /* The stored comment includes the comment start and any terminator.  */
 654 static void
 655 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 656               cppchar_t type)
 657 {
 658   unsigned char *buffer;
 659   unsigned int len, clen;
 660
 661   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 662
 663   /* C++ comments probably (not definitely) have moved past a new
 664      line, which we don't want to save in the comment.  */
 665   if (is_vspace (pfile->buffer->cur[-1]))
 666     len--;
 667
 668   /* If we are currently in a directive, then we need to store all
 669      C++ comments as C comments internally, and so we need to
 670      allocate a little extra space in that case.
 671
 672      Note that the only time we encounter a directive here is
 673      when we are saving comments in a "#define".  */
 674   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 675
 676   buffer = _cpp_unaligned_alloc (pfile, clen);
 677
 678   token->type = CPP_COMMENT;
 679   token->val.str.len = clen;
 680   token->val.str.text = buffer;
 681
 682   buffer[0] = '/';
 683   memcpy (buffer + 1, from, len - 1);
 684
 685   /* Finish conversion to a C comment, if necessary.  */
 686   if (pfile->state.in_directive && type == '/')
 687     {
 688       buffer[1] = '*';
 689       buffer[clen - 2] = '*';
 690       buffer[clen - 1] = '/';
 691     }
 692 }
 693
 694 /* Allocate COUNT tokens for RUN.  */
 695 void
 696 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 697 {
 698   run->base = XNEWVEC (cpp_token, count);
 699   run->limit = run->base + count;
 700   run->next = NULL;
 701 }
 702
 703 /* Returns the next tokenrun, or creates one if there is none.  */
 704 static tokenrun *
 705 next_tokenrun (tokenrun *run)
 706 {
 707   if (run->next == NULL)
 708     {
 709       run->next = XNEW (tokenrun);
 710       run->next->prev = run;
 711       _cpp_init_tokenrun (run->next, 250);
 712     }
 713
 714   return run->next;
 715 }
 716
 717 /* Allocate a single token that is invalidated at the same time as the
 718    rest of the tokens on the line.  Has its line and col set to the
 719    same as the last lexed token, so that diagnostics appear in the
 720    right place.  */
 721 cpp_token *
 722 _cpp_temp_token (cpp_reader *pfile)
 723 {
 724   cpp_token *old, *result;
 725
 726   old = pfile->cur_token - 1;
 727   if (pfile->cur_token == pfile->cur_run->limit)
 728     {
 729       pfile->cur_run = next_tokenrun (pfile->cur_run);
 730       pfile->cur_token = pfile->cur_run->base;
 731     }
 732
 733   result = pfile->cur_token++;
 734   result->src_loc = old->src_loc;
 735   return result;
 736 }
 737
 738 /* Lex a token into RESULT (external interface).  Takes care of issues
 739    like directive handling, token lookahead, multiple include
 740    optimization and skipping.  */
 741 const cpp_token *
 742 _cpp_lex_token (cpp_reader *pfile)
 743 {
 744   cpp_token *result;
 745
 746   for (;;)
 747     {
 748       if (pfile->cur_token == pfile->cur_run->limit)
 749         {
 750           pfile->cur_run = next_tokenrun (pfile->cur_run);
 751           pfile->cur_token = pfile->cur_run->base;
 752         }
 753
 754       if (pfile->lookaheads)
 755         {
 756           pfile->lookaheads--;
 757           result = pfile->cur_token++;
 758         }
 759       else
 760         result = _cpp_lex_direct (pfile);
 761
 762       if (result->flags & BOL)
 763         {
 764           /* Is this a directive.  If _cpp_handle_directive returns
 765              false, it is an assembler #.  */
 766           if (result->type == CPP_HASH
 767               /* 6.10.3 p 11: Directives in a list of macro arguments
 768                  gives undefined behavior.  This implementation
 769                  handles the directive as normal.  */
 770               && pfile->state.parsing_args != 1)
 771             {
 772               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 773                 {
 774                   if (pfile->directive_result.type == CPP_PADDING)
 775                     continue;
 776                   result = &pfile->directive_result;
 777                 }
 778             }
 779           else if (pfile->state.in_deferred_pragma)
 780             result = &pfile->directive_result;
 781
 782           if (pfile->cb.line_change && !pfile->state.skipping)
 783             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 784         }
 785
 786       /* We don't skip tokens in directives.  */
 787       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
 788         break;
 789
 790       /* Outside a directive, invalidate controlling macros.  At file
 791          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 792          get here and MI optimization works.  */
 793       pfile->mi_valid = false;
 794
 795       if (!pfile->state.skipping || result->type == CPP_EOF)
 796         break;
 797     }
 798
 799   return result;
 800 }
 801
 802 /* Returns true if a fresh line has been loaded.  */
 803 bool
 804 _cpp_get_fresh_line (cpp_reader *pfile)
 805 {
 806   int return_at_eof;
 807
 808   /* We can't get a new line until we leave the current directive.  */
 809   if (pfile->state.in_directive)
 810     return false;
 811
 812   for (;;)
 813     {
 814       cpp_buffer *buffer = pfile->buffer;
 815
 816       if (!buffer->need_line)
 817         return true;
 818
 819       if (buffer->next_line < buffer->rlimit)
 820         {
 821           _cpp_clean_line (pfile);
 822           return true;
 823         }
 824
 825       /* First, get out of parsing arguments state.  */
 826       if (pfile->state.parsing_args)
 827         return false;
 828
 829       /* End of buffer.  Non-empty files should end in a newline.  */
 830       if (buffer->buf != buffer->rlimit
 831           && buffer->next_line > buffer->rlimit
 832           && !buffer->from_stage3)
 833         {
 834           /* Only warn once.  */
 835           buffer->next_line = buffer->rlimit;
 836           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
 837                                CPP_BUF_COLUMN (buffer, buffer->cur),
 838                                "no newline at end of file");
 839         }
 840
 841       return_at_eof = buffer->return_at_eof;
 842       _cpp_pop_buffer (pfile);
 843       if (pfile->buffer == NULL || return_at_eof)
 844         return false;
 845     }
 846 }
 847
 848 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 849   do                                                    \
 850     {                                                   \
 851       result->type = ELSE_TYPE;                         \
 852       if (*buffer->cur == CHAR)                         \
 853         buffer->cur++, result->type = THEN_TYPE;        \
 854     }                                                   \
 855   while (0)
 856
 857 /* Lex a token into pfile->cur_token, which is also incremented, to
 858    get diagnostics pointing to the correct location.
 859
 860    Does not handle issues such as token lookahead, multiple-include
 861    optimization, directives, skipping etc.  This function is only
 862    suitable for use by _cpp_lex_token, and in special cases like
 863    lex_expansion_token which doesn't care for any of these issues.
 864
 865    When meeting a newline, returns CPP_EOF if parsing a directive,
 866    otherwise returns to the start of the token buffer if permissible.
 867    Returns the location of the lexed token.  */
 868 cpp_token *
 869 _cpp_lex_direct (cpp_reader *pfile)
 870 {
 871   cppchar_t c;
 872   cpp_buffer *buffer;
 873   const unsigned char *comment_start;
 874   cpp_token *result = pfile->cur_token++;
 875
 876  fresh_line:
 877   result->flags = 0;
 878   buffer = pfile->buffer;
 879   if (buffer->need_line)
 880     {
 881       if (pfile->state.in_deferred_pragma)
 882         {
 883           result->type = CPP_PRAGMA_EOL;
 884           pfile->state.in_deferred_pragma = false;
 885           if (!pfile->state.pragma_allow_expansion)
 886             pfile->state.prevent_expansion--;
 887           return result;
 888         }
 889       if (!_cpp_get_fresh_line (pfile))
 890         {
 891           result->type = CPP_EOF;
 892           if (!pfile->state.in_directive)
 893             {
 894               /* Tell the compiler the line number of the EOF token.  */
 895               result->src_loc = pfile->line_table->highest_line;
 896               result->flags = BOL;
 897             }
 898           return result;
 899         }
 900       if (!pfile->keep_tokens)
 901         {
 902           pfile->cur_run = &pfile->base_run;
 903           result = pfile->base_run.base;
 904           pfile->cur_token = result + 1;
 905         }
 906       result->flags = BOL;
 907       if (pfile->state.parsing_args == 2)
 908         result->flags |= PREV_WHITE;
 909     }
 910   buffer = pfile->buffer;
 911  update_tokens_line:
 912   result->src_loc = pfile->line_table->highest_line;
 913
 914  skipped_white:
 915   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 916       && !pfile->overlaid_buffer)
 917     {
 918       _cpp_process_line_notes (pfile, false);
 919       result->src_loc = pfile->line_table->highest_line;
 920     }
 921   c = *buffer->cur++;
 922
 923   LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
 924                                CPP_BUF_COLUMN (buffer, buffer->cur));
 925
 926   switch (c)
 927     {
 928     case ' ': case '\t': case '\f': case '\v': case '\0':
 929       result->flags |= PREV_WHITE;
 930       skip_whitespace (pfile, c);
 931       goto skipped_white;
 932
 933     case '\n':
 934       if (buffer->cur < buffer->rlimit)
 935         CPP_INCREMENT_LINE (pfile, 0);
 936       buffer->need_line = true;
 937       goto fresh_line;
 938
 939     case '0': case '1': case '2': case '3': case '4':
 940     case '5': case '6': case '7': case '8': case '9':
 941       {
 942         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 943         result->type = CPP_NUMBER;
 944         lex_number (pfile, &result->val.str, &nst);
 945         warn_about_normalization (pfile, result, &nst);
 946         break;
 947       }
 948
 949     case 'L':
 950       /* 'L' may introduce wide characters or strings.  */
 951       if (*buffer->cur == '\'' || *buffer->cur == '"')
 952         {
 953           lex_string (pfile, result, buffer->cur - 1);
 954           break;
 955         }
 956       /* Fall through.  */
 957
 958     case '_':
 959     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 960     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 961     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 962     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 963     case 'y': case 'z':
 964     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 965     case 'G': case 'H': case 'I': case 'J': case 'K':
 966     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 967     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 968     case 'Y': case 'Z':
 969       result->type = CPP_NAME;
 970       {
 971         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 972         result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
 973                                            &nst);
 974         warn_about_normalization (pfile, result, &nst);
 975       }
 976
 977       /* Convert named operators to their proper types.  */
 978       if (result->val.node->flags & NODE_OPERATOR)
 979         {
 980           result->flags |= NAMED_OP;
 981           result->type = (enum cpp_ttype) result->val.node->directive_index;
 982         }
 983       break;
 984
 985     case '\'':
 986     case '"':
 987       lex_string (pfile, result, buffer->cur - 1);
 988       break;
 989
 990     case '/':
 991       /* A potential block or line comment.  */
 992       comment_start = buffer->cur;
 993       c = *buffer->cur;
 994
 995       if (c == '*')
 996         {
 997           if (_cpp_skip_block_comment (pfile))
 998             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 999         }
1000       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1001                             || cpp_in_system_header (pfile)))
1002         {
1003           /* Warn about comments only if pedantically GNUC89, and not
1004              in system headers.  */
1005           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1006               && ! buffer->warned_cplusplus_comments)
1007             {
1008               cpp_error (pfile, CPP_DL_PEDWARN,
1009                          "C++ style comments are not allowed in ISO C90");
1010               cpp_error (pfile, CPP_DL_PEDWARN,
1011                          "(this will be reported only once per input file)");
1012               buffer->warned_cplusplus_comments = 1;
1013             }
1014
1015           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1016             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
1017         }
1018       else if (c == '=')
1019         {
1020           buffer->cur++;
1021           result->type = CPP_DIV_EQ;
1022           break;
1023         }
1024       else
1025         {
1026           result->type = CPP_DIV;
1027           break;
1028         }
1029
1030       if (!pfile->state.save_comments)
1031         {
1032           result->flags |= PREV_WHITE;
1033           goto update_tokens_line;
1034         }
1035
1036       /* Save the comment as a token in its own right.  */
1037       save_comment (pfile, result, comment_start, c);
1038       break;
1039
1040     case '<':
1041       if (pfile->state.angled_headers)
1042         {
1043           lex_string (pfile, result, buffer->cur - 1);
1044           break;
1045         }
1046
1047       result->type = CPP_LESS;
1048       if (*buffer->cur == '=')
1049         buffer->cur++, result->type = CPP_LESS_EQ;
1050       else if (*buffer->cur == '<')
1051         {
1052           buffer->cur++;
1053           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1054         }
1055       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1056         {
1057           buffer->cur++;
1058           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1059         }
1060       else if (CPP_OPTION (pfile, digraphs))
1061         {
1062           if (*buffer->cur == ':')
1063             {
1064               buffer->cur++;
1065               result->flags |= DIGRAPH;
1066               result->type = CPP_OPEN_SQUARE;
1067             }
1068           else if (*buffer->cur == '%')
1069             {
1070               buffer->cur++;
1071               result->flags |= DIGRAPH;
1072               result->type = CPP_OPEN_BRACE;
1073             }
1074         }
1075       break;
1076
1077     case '>':
1078       result->type = CPP_GREATER;
1079       if (*buffer->cur == '=')
1080         buffer->cur++, result->type = CPP_GREATER_EQ;
1081       else if (*buffer->cur == '>')
1082         {
1083           buffer->cur++;
1084           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1085         }
1086       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1087         {
1088           buffer->cur++;
1089           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1090         }
1091       break;
1092
1093     case '%':
1094       result->type = CPP_MOD;
1095       if (*buffer->cur == '=')
1096         buffer->cur++, result->type = CPP_MOD_EQ;
1097       else if (CPP_OPTION (pfile, digraphs))
1098         {
1099           if (*buffer->cur == ':')
1100             {
1101               buffer->cur++;
1102               result->flags |= DIGRAPH;
1103               result->type = CPP_HASH;
1104               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1105                 buffer->cur += 2, result->type = CPP_PASTE;
1106             }
1107           else if (*buffer->cur == '>')
1108             {
1109               buffer->cur++;
1110               result->flags |= DIGRAPH;
1111               result->type = CPP_CLOSE_BRACE;
1112             }
1113         }
1114       break;
1115
1116     case '.':
1117       result->type = CPP_DOT;
1118       if (ISDIGIT (*buffer->cur))
1119         {
1120           struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1121           result->type = CPP_NUMBER;
1122           lex_number (pfile, &result->val.str, &nst);
1123           warn_about_normalization (pfile, result, &nst);
1124         }
1125       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1126         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1127       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1128         buffer->cur++, result->type = CPP_DOT_STAR;
1129       break;
1130
1131     case '+':
1132       result->type = CPP_PLUS;
1133       if (*buffer->cur == '+')
1134         buffer->cur++, result->type = CPP_PLUS_PLUS;
1135       else if (*buffer->cur == '=')
1136         buffer->cur++, result->type = CPP_PLUS_EQ;
1137       break;
1138
1139     case '-':
1140       result->type = CPP_MINUS;
1141       if (*buffer->cur == '>')
1142         {
1143           buffer->cur++;
1144           result->type = CPP_DEREF;
1145           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1146             buffer->cur++, result->type = CPP_DEREF_STAR;
1147         }
1148       else if (*buffer->cur == '-')
1149         buffer->cur++, result->type = CPP_MINUS_MINUS;
1150       else if (*buffer->cur == '=')
1151         buffer->cur++, result->type = CPP_MINUS_EQ;
1152       break;
1153
1154     case '&':
1155       result->type = CPP_AND;
1156       if (*buffer->cur == '&')
1157         buffer->cur++, result->type = CPP_AND_AND;
1158       else if (*buffer->cur == '=')
1159         buffer->cur++, result->type = CPP_AND_EQ;
1160       break;
1161
1162     case '|':
1163       result->type = CPP_OR;
1164       if (*buffer->cur == '|')
1165         buffer->cur++, result->type = CPP_OR_OR;
1166       else if (*buffer->cur == '=')
1167         buffer->cur++, result->type = CPP_OR_EQ;
1168       break;
1169
1170     case ':':
1171       result->type = CPP_COLON;
1172       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1173         buffer->cur++, result->type = CPP_SCOPE;
1174       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1175         {
1176           buffer->cur++;
1177           result->flags |= DIGRAPH;
1178           result->type = CPP_CLOSE_SQUARE;
1179         }
1180       break;
1181
1182     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1183     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1184     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1185     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1186     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1187
1188     case '?': result->type = CPP_QUERY; break;
1189     case '~': result->type = CPP_COMPL; break;
1190     case ',': result->type = CPP_COMMA; break;
1191     case '(': result->type = CPP_OPEN_PAREN; break;
1192     case ')': result->type = CPP_CLOSE_PAREN; break;
1193     case '[': result->type = CPP_OPEN_SQUARE; break;
1194     case ']': result->type = CPP_CLOSE_SQUARE; break;
1195     case '{': result->type = CPP_OPEN_BRACE; break;
1196     case '}': result->type = CPP_CLOSE_BRACE; break;
1197     case ';': result->type = CPP_SEMICOLON; break;
1198
1199       /* @ is a punctuator in Objective-C.  */
1200     case '@': result->type = CPP_ATSIGN; break;
1201
1202     case '$':
1203     case '\\':
1204       {
1205         const uchar *base = --buffer->cur;
1206         struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1207
1208         if (forms_identifier_p (pfile, true, &nst))
1209           {
1210             result->type = CPP_NAME;
1211             result->val.node = lex_identifier (pfile, base, true, &nst);
1212             warn_about_normalization (pfile, result, &nst);
1213             break;
1214           }
1215         buffer->cur++;
1216       }
1217
1218     default:
1219       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1220       break;
1221     }
1222
1223   return result;
1224 }
1225
1226 /* An upper bound on the number of bytes needed to spell TOKEN.
1227    Does not include preceding whitespace.  */
1228 unsigned int
1229 cpp_token_len (const cpp_token *token)
1230 {
1231   unsigned int len;
1232
1233   switch (TOKEN_SPELL (token))
1234     {
1235     default:            len = 4;                                break;
1236     case SPELL_LITERAL: len = token->val.str.len;               break;
1237     case SPELL_IDENT:   len = NODE_LEN (token->val.node) * 10;  break;
1238     }
1239
1240   return len;
1241 }
1242
1243 /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1244    Return the number of bytes read out of NAME.  (There are always
1245    10 bytes written to BUFFER.)  */
1246
1247 static size_t
1248 utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1249 {
1250   int j;
1251   int ucn_len = 0;
1252   int ucn_len_c;
1253   unsigned t;
1254   unsigned long utf32;
1255
1256   /* Compute the length of the UTF-8 sequence.  */
1257   for (t = *name; t & 0x80; t <<= 1)
1258     ucn_len++;
1259
1260   utf32 = *name & (0x7F >> ucn_len);
1261   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1262     {
1263       utf32 = (utf32 << 6) | (*++name & 0x3F);
1264
1265       /* Ill-formed UTF-8.  */
1266       if ((*name & ~0x3F) != 0x80)
1267         abort ();
1268     }
1269
1270   *buffer++ = '\\';
1271   *buffer++ = 'U';
1272   for (j = 7; j >= 0; j--)
1273     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1274   return ucn_len;
1275 }
1276
1277
1278 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1279    already contain the enough space to hold the token's spelling.
1280    Returns a pointer to the character after the last character written.
1281    FORSTRING is true if this is to be the spelling after translation
1282    phase 1 (this is different for UCNs).
1283    FIXME: Would be nice if we didn't need the PFILE argument.  */
1284 unsigned char *
1285 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1286                  unsigned char *buffer, bool forstring)
1287 {
1288   switch (TOKEN_SPELL (token))
1289     {
1290     case SPELL_OPERATOR:
1291       {
1292         const unsigned char *spelling;
1293         unsigned char c;
1294
1295         if (token->flags & DIGRAPH)
1296           spelling
1297             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1298         else if (token->flags & NAMED_OP)
1299           goto spell_ident;
1300         else
1301           spelling = TOKEN_NAME (token);
1302
1303         while ((c = *spelling++) != '\0')
1304           *buffer++ = c;
1305       }
1306       break;
1307
1308     spell_ident:
1309     case SPELL_IDENT:
1310       if (forstring)
1311         {
1312           memcpy (buffer, NODE_NAME (token->val.node),
1313                   NODE_LEN (token->val.node));
1314           buffer += NODE_LEN (token->val.node);
1315         }
1316       else
1317         {
1318           size_t i;
1319           const unsigned char * name = NODE_NAME (token->val.node);
1320
1321           for (i = 0; i < NODE_LEN (token->val.node); i++)
1322             if (name[i] & ~0x7F)
1323               {
1324                 i += utf8_to_ucn (buffer, name + i) - 1;
1325                 buffer += 10;
1326               }
1327             else
1328               *buffer++ = NODE_NAME (token->val.node)[i];
1329         }
1330       break;
1331
1332     case SPELL_LITERAL:
1333       memcpy (buffer, token->val.str.text, token->val.str.len);
1334       buffer += token->val.str.len;
1335       break;
1336
1337     case SPELL_NONE:
1338       cpp_error (pfile, CPP_DL_ICE,
1339                  "unspellable token %s", TOKEN_NAME (token));
1340       break;
1341     }
1342
1343   return buffer;
1344 }
1345
1346 /* Returns TOKEN spelt as a null-terminated string.  The string is
1347    freed when the reader is destroyed.  Useful for diagnostics.  */
1348 unsigned char *
1349 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1350 {
1351   unsigned int len = cpp_token_len (token) + 1;
1352   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1353
1354   end = cpp_spell_token (pfile, token, start, false);
1355   end[0] = '\0';
1356
1357   return start;
1358 }
1359
1360 /* Used by C front ends, which really should move to using
1361    cpp_token_as_text.  */
1362 const char *
1363 cpp_type2name (enum cpp_ttype type)
1364 {
1365   return (const char *) token_spellings[type].name;
1366 }
1367
1368 /* Writes the spelling of token to FP, without any preceding space.
1369    Separated from cpp_spell_token for efficiency - to avoid stdio
1370    double-buffering.  */
1371 void
1372 cpp_output_token (const cpp_token *token, FILE *fp)
1373 {
1374   switch (TOKEN_SPELL (token))
1375     {
1376     case SPELL_OPERATOR:
1377       {
1378         const unsigned char *spelling;
1379         int c;
1380
1381         if (token->flags & DIGRAPH)
1382           spelling
1383             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1384         else if (token->flags & NAMED_OP)
1385           goto spell_ident;
1386         else
1387           spelling = TOKEN_NAME (token);
1388
1389         c = *spelling;
1390         do
1391           putc (c, fp);
1392         while ((c = *++spelling) != '\0');
1393       }
1394       break;
1395
1396     spell_ident:
1397     case SPELL_IDENT:
1398       {
1399         size_t i;
1400         const unsigned char * name = NODE_NAME (token->val.node);
1401
1402         for (i = 0; i < NODE_LEN (token->val.node); i++)
1403           if (name[i] & ~0x7F)
1404             {
1405               unsigned char buffer[10];
1406               i += utf8_to_ucn (buffer, name + i) - 1;
1407               fwrite (buffer, 1, 10, fp);
1408             }
1409           else
1410             fputc (NODE_NAME (token->val.node)[i], fp);
1411       }
1412       break;
1413
1414     case SPELL_LITERAL:
1415       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1416       break;
1417
1418     case SPELL_NONE:
1419       /* An error, most probably.  */
1420       break;
1421     }
1422 }
1423
1424 /* Compare two tokens.  */
1425 int
1426 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1427 {
1428   if (a->type == b->type && a->flags == b->flags)
1429     switch (TOKEN_SPELL (a))
1430       {
1431       default:                  /* Keep compiler happy.  */
1432       case SPELL_OPERATOR:
1433         return 1;
1434       case SPELL_NONE:
1435         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1436       case SPELL_IDENT:
1437         return a->val.node == b->val.node;
1438       case SPELL_LITERAL:
1439         return (a->val.str.len == b->val.str.len
1440                 && !memcmp (a->val.str.text, b->val.str.text,
1441                             a->val.str.len));
1442       }
1443
1444   return 0;
1445 }
1446
1447 /* Returns nonzero if a space should be inserted to avoid an
1448    accidental token paste for output.  For simplicity, it is
1449    conservative, and occasionally advises a space where one is not
1450    needed, e.g. "." and ".2".  */
1451 int
1452 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1453                  const cpp_token *token2)
1454 {
1455   enum cpp_ttype a = token1->type, b = token2->type;
1456   cppchar_t c;
1457
1458   if (token1->flags & NAMED_OP)
1459     a = CPP_NAME;
1460   if (token2->flags & NAMED_OP)
1461     b = CPP_NAME;
1462
1463   c = EOF;
1464   if (token2->flags & DIGRAPH)
1465     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1466   else if (token_spellings[b].category == SPELL_OPERATOR)
1467     c = token_spellings[b].name[0];
1468
1469   /* Quickly get everything that can paste with an '='.  */
1470   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1471     return 1;
1472
1473   switch (a)
1474     {
1475     case CPP_GREATER:   return c == '>' || c == '?';
1476     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1477     case CPP_PLUS:      return c == '+';
1478     case CPP_MINUS:     return c == '-' || c == '>';
1479     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1480     case CPP_MOD:       return c == ':' || c == '>';
1481     case CPP_AND:       return c == '&';
1482     case CPP_OR:        return c == '|';
1483     case CPP_COLON:     return c == ':' || c == '>';
1484     case CPP_DEREF:     return c == '*';
1485     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1486     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1487     case CPP_NAME:      return ((b == CPP_NUMBER
1488                                  && name_p (pfile, &token2->val.str))
1489                                 || b == CPP_NAME
1490                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1491     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1492                                 || c == '.' || c == '+' || c == '-');
1493                                       /* UCNs */
1494     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1495                                  && b == CPP_NAME)
1496                                 || (CPP_OPTION (pfile, objc)
1497                                     && token1->val.str.text[0] == '@'
1498                                     && (b == CPP_NAME || b == CPP_STRING)));
1499     default:            break;
1500     }
1501
1502   return 0;
1503 }
1504
1505 /* Output all the remaining tokens on the current line, and a newline
1506    character, to FP.  Leading whitespace is removed.  If there are
1507    macros, special token padding is not performed.  */
1508 void
1509 cpp_output_line (cpp_reader *pfile, FILE *fp)
1510 {
1511   const cpp_token *token;
1512
1513   token = cpp_get_token (pfile);
1514   while (token->type != CPP_EOF)
1515     {
1516       cpp_output_token (token, fp);
1517       token = cpp_get_token (pfile);
1518       if (token->flags & PREV_WHITE)
1519         putc (' ', fp);
1520     }
1521
1522   putc ('\n', fp);
1523 }
1524
1525 /* Memory buffers.  Changing these three constants can have a dramatic
1526    effect on performance.  The values here are reasonable defaults,
1527    but might be tuned.  If you adjust them, be sure to test across a
1528    range of uses of cpplib, including heavy nested function-like macro
1529    expansion.  Also check the change in peak memory usage (NJAMD is a
1530    good tool for this).  */
1531 #define MIN_BUFF_SIZE 8000
1532 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1533 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1534         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1535
1536 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1537   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1538 #endif
1539
1540 /* Create a new allocation buffer.  Place the control block at the end
1541    of the buffer, so that buffer overflows will cause immediate chaos.  */
1542 static _cpp_buff *
1543 new_buff (size_t len)
1544 {
1545   _cpp_buff *result;
1546   unsigned char *base;
1547
1548   if (len < MIN_BUFF_SIZE)
1549     len = MIN_BUFF_SIZE;
1550   len = CPP_ALIGN (len);
1551
1552   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1553   result = (_cpp_buff *) (base + len);
1554   result->base = base;
1555   result->cur = base;
1556   result->limit = base + len;
1557   result->next = NULL;
1558   return result;
1559 }
1560
1561 /* Place a chain of unwanted allocation buffers on the free list.  */
1562 void
1563 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1564 {
1565   _cpp_buff *end = buff;
1566
1567   while (end->next)
1568     end = end->next;
1569   end->next = pfile->free_buffs;
1570   pfile->free_buffs = buff;
1571 }
1572
1573 /* Return a free buffer of size at least MIN_SIZE.  */
1574 _cpp_buff *
1575 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1576 {
1577   _cpp_buff *result, **p;
1578
1579   for (p = &pfile->free_buffs;; p = &(*p)->next)
1580     {
1581       size_t size;
1582
1583       if (*p == NULL)
1584         return new_buff (min_size);
1585       result = *p;
1586       size = result->limit - result->base;
1587       /* Return a buffer that's big enough, but don't waste one that's
1588          way too big.  */
1589       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1590         break;
1591     }
1592
1593   *p = result->next;
1594   result->next = NULL;
1595   result->cur = result->base;
1596   return result;
1597 }
1598
1599 /* Creates a new buffer with enough space to hold the uncommitted
1600    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1601    the excess bytes to the new buffer.  Chains the new buffer after
1602    BUFF, and returns the new buffer.  */
1603 _cpp_buff *
1604 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1605 {
1606   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1607   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1608
1609   buff->next = new_buff;
1610   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1611   return new_buff;
1612 }
1613
1614 /* Creates a new buffer with enough space to hold the uncommitted
1615    remaining bytes of the buffer pointed to by BUFF, and at least
1616    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1617    Chains the new buffer before the buffer pointed to by BUFF, and
1618    updates the pointer to point to the new buffer.  */
1619 void
1620 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1621 {
1622   _cpp_buff *new_buff, *old_buff = *pbuff;
1623   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1624
1625   new_buff = _cpp_get_buff (pfile, size);
1626   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1627   new_buff->next = old_buff;
1628   *pbuff = new_buff;
1629 }
1630
1631 /* Free a chain of buffers starting at BUFF.  */
1632 void
1633 _cpp_free_buff (_cpp_buff *buff)
1634 {
1635   _cpp_buff *next;
1636
1637   for (; buff; buff = next)
1638     {
1639       next = buff->next;
1640       free (buff->base);
1641     }
1642 }
1643
1644 /* Allocate permanent, unaligned storage of length LEN.  */
1645 unsigned char *
1646 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1647 {
1648   _cpp_buff *buff = pfile->u_buff;
1649   unsigned char *result = buff->cur;
1650
1651   if (len > (size_t) (buff->limit - result))
1652     {
1653       buff = _cpp_get_buff (pfile, len);
1654       buff->next = pfile->u_buff;
1655       pfile->u_buff = buff;
1656       result = buff->cur;
1657     }
1658
1659   buff->cur = result + len;
1660   return result;
1661 }
1662
1663 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1664    That buffer is used for growing allocations when saving macro
1665    replacement lists in a #define, and when parsing an answer to an
1666    assertion in #assert, #unassert or #if (and therefore possibly
1667    whilst expanding macros).  It therefore must not be used by any
1668    code that they might call: specifically the lexer and the guts of
1669    the macro expander.
1670
1671    All existing other uses clearly fit this restriction: storing
1672    registered pragmas during initialization.  */
1673 unsigned char *
1674 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1675 {
1676   _cpp_buff *buff = pfile->a_buff;
1677   unsigned char *result = buff->cur;
1678
1679   if (len > (size_t) (buff->limit - result))
1680     {
1681       buff = _cpp_get_buff (pfile, len);
1682       buff->next = pfile->a_buff;
1683       pfile->a_buff = buff;
1684       result = buff->cur;
1685     }
1686
1687   buff->cur = result + len;
1688   return result;
1689 }
1690
1691 /* Say which field of TOK is in use.  */
1692
1693 enum cpp_token_fld_kind
1694 cpp_token_val_index (cpp_token *tok)
1695 {
1696   switch (TOKEN_SPELL (tok))
1697     {
1698     case SPELL_IDENT:
1699       return CPP_TOKEN_FLD_NODE;
1700     case SPELL_LITERAL:
1701       return CPP_TOKEN_FLD_STR;
1702     case SPELL_NONE:
1703       if (tok->type == CPP_MACRO_ARG)
1704         return CPP_TOKEN_FLD_ARG_NO;
1705       else if (tok->type == CPP_PADDING)
1706         return CPP_TOKEN_FLD_SOURCE;
1707       else if (tok->type == CPP_PRAGMA)
1708         return CPP_TOKEN_FLD_PRAGMA;
1709       /* else fall through */
1710     default:
1711       return CPP_TOKEN_FLD_NONE;
1712     }
1713 }