gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "cpplib.h"
  25 #include "cpphash.h"
  26
  27 enum spell_type
  28 {
  29   SPELL_OPERATOR = 0,
  30   SPELL_IDENT,
  31   SPELL_LITERAL,
  32   SPELL_NONE
  33 };
  34
  35 struct token_spelling
  36 {
  37   enum spell_type category;
  38   const unsigned char *name;
  39 };
  40
  41 static const unsigned char *const digraph_spellings[] =
  42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  43
  44 #define OP(e, s) { SPELL_OPERATOR, U s           },
  45 #define TK(e, s) { s,              U #e },
  46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  47 #undef OP
  48 #undef TK
  49
  50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  52
  53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  54 static int skip_line_comment (cpp_reader *);
  55 static void skip_whitespace (cpp_reader *, cppchar_t);
  56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  57 static void lex_number (cpp_reader *, cpp_string *);
  58 static bool forms_identifier_p (cpp_reader *, int);
  59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  62                             unsigned int, enum cpp_ttype);
  63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  64 static int name_p (cpp_reader *, const cpp_string *);
  65 static tokenrun *next_tokenrun (tokenrun *);
  66
  67 static _cpp_buff *new_buff (size_t);
  68
  69
  70 /* Utility routine:
  71
  72    Compares, the token TOKEN to the NUL-terminated string STRING.
  73    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  74 int
  75 cpp_ideq (const cpp_token *token, const char *string)
  76 {
  77   if (token->type != CPP_NAME)
  78     return 0;
  79
  80   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  81 }
  82
  83 /* Record a note TYPE at byte POS into the current cleaned logical
  84    line.  */
  85 static void
  86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  87 {
  88   if (buffer->notes_used == buffer->notes_cap)
  89     {
  90       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  91       buffer->notes = xrealloc (buffer->notes,
  92                                 buffer->notes_cap * sizeof (_cpp_line_note));
  93     }
  94
  95   buffer->notes[buffer->notes_used].pos = pos;
  96   buffer->notes[buffer->notes_used].type = type;
  97   buffer->notes_used++;
  98 }
  99
 100 /* Returns with a logical line that contains no escaped newlines or
 101    trigraphs.  This is a time-critical inner loop.  */
 102 void
 103 _cpp_clean_line (cpp_reader *pfile)
 104 {
 105   cpp_buffer *buffer;
 106   const uchar *s;
 107   uchar c, *d, *p;
 108
 109   buffer = pfile->buffer;
 110   buffer->cur_note = buffer->notes_used = 0;
 111   buffer->cur = buffer->line_base = buffer->next_line;
 112   buffer->need_line = false;
 113   s = buffer->next_line - 1;
 114
 115   if (!buffer->from_stage3)
 116     {
 117       /* Short circuit for the common case of an un-escaped line with
 118          no trigraphs.  The primary win here is by not writing any
 119          data back to memory until we have to.  */
 120       for (;;)
 121         {
 122           c = *++s;
 123           if (c == '\n' || c == '\r')
 124             {
 125               d = (uchar *) s;
 126
 127               if (s == buffer->rlimit)
 128                 goto done;
 129
 130               /* DOS line ending? */
 131               if (c == '\r' && s[1] == '\n')
 132                 s++;
 133
 134               if (s == buffer->rlimit)
 135                 goto done;
 136
 137               /* check for escaped newline */
 138               p = d;
 139               while (p != buffer->next_line && is_nvspace (p[-1]))
 140                 p--;
 141               if (p == buffer->next_line || p[-1] != '\\')
 142                 goto done;
 143
 144               /* Have an escaped newline; process it and proceed to
 145                  the slow path.  */
 146               add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
 147               d = p - 2;
 148               buffer->next_line = p - 1;
 149               break;
 150             }
 151           if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 152             {
 153               /* Have a trigraph.  We may or may not have to convert
 154                  it.  Add a line note regardless, for -Wtrigraphs.  */
 155               add_line_note (buffer, s, s[2]);
 156               if (CPP_OPTION (pfile, trigraphs))
 157                 {
 158                   /* We do, and that means we have to switch to the
 159                      slow path.  */
 160                   d = (uchar *) s;
 161                   *d = _cpp_trigraph_map[s[2]];
 162                   s += 2;
 163                   break;
 164                 }
 165             }
 166         }
 167
 168
 169       for (;;)
 170         {
 171           c = *++s;
 172           *++d = c;
 173
 174           if (c == '\n' || c == '\r')
 175             {
 176                   /* Handle DOS line endings.  */
 177               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 178                 s++;
 179               if (s == buffer->rlimit)
 180                 break;
 181
 182               /* Escaped?  */
 183               p = d;
 184               while (p != buffer->next_line && is_nvspace (p[-1]))
 185                 p--;
 186               if (p == buffer->next_line || p[-1] != '\\')
 187                 break;
 188
 189               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 190               d = p - 2;
 191               buffer->next_line = p - 1;
 192             }
 193           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 194             {
 195               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 196               add_line_note (buffer, d, s[2]);
 197               if (CPP_OPTION (pfile, trigraphs))
 198                 {
 199                   *d = _cpp_trigraph_map[s[2]];
 200                   s += 2;
 201                 }
 202             }
 203         }
 204     }
 205   else
 206     {
 207       do
 208         s++;
 209       while (*s != '\n' && *s != '\r');
 210       d = (uchar *) s;
 211
 212       /* Handle DOS line endings.  */
 213       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 214         s++;
 215     }
 216
 217  done:
 218   *d = '\n';
 219   /* A sentinel note that should never be processed.  */
 220   add_line_note (buffer, d + 1, '\n');
 221   buffer->next_line = s + 1;
 222 }
 223
 224 /* Return true if the trigraph indicated by NOTE should be warned
 225    about in a comment.  */
 226 static bool
 227 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 228 {
 229   const uchar *p;
 230
 231   /* Within comments we don't warn about trigraphs, unless the
 232      trigraph forms an escaped newline, as that may change
 233      behavior.  */
 234   if (note->type != '/')
 235     return false;
 236
 237   /* If -trigraphs, then this was an escaped newline iff the next note
 238      is coincident.  */
 239   if (CPP_OPTION (pfile, trigraphs))
 240     return note[1].pos == note->pos;
 241
 242   /* Otherwise, see if this forms an escaped newline.  */
 243   p = note->pos + 3;
 244   while (is_nvspace (*p))
 245     p++;
 246
 247   /* There might have been escaped newlines between the trigraph and the
 248      newline we found.  Hence the position test.  */
 249   return (*p == '\n' && p < note[1].pos);
 250 }
 251
 252 /* Process the notes created by add_line_note as far as the current
 253    location.  */
 254 void
 255 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 256 {
 257   cpp_buffer *buffer = pfile->buffer;
 258
 259   for (;;)
 260     {
 261       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 262       unsigned int col;
 263
 264       if (note->pos > buffer->cur)
 265         break;
 266
 267       buffer->cur_note++;
 268       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 269
 270       if (note->type == '\\' || note->type == ' ')
 271         {
 272           if (note->type == ' ' && !in_comment)
 273             cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 274                                  "backslash and newline separated by space");
 275
 276           if (buffer->next_line > buffer->rlimit)
 277             {
 278               cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line, col,
 279                                    "backslash-newline at end of file");
 280               /* Prevent "no newline at end of file" warning.  */
 281               buffer->next_line = buffer->rlimit;
 282             }
 283
 284           buffer->line_base = note->pos;
 285           CPP_INCREMENT_LINE (pfile, 0);
 286         }
 287       else if (_cpp_trigraph_map[note->type])
 288         {
 289           if (CPP_OPTION (pfile, warn_trigraphs)
 290               && (!in_comment || warn_in_comment (pfile, note)))
 291             {
 292               if (CPP_OPTION (pfile, trigraphs))
 293                 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
 294                                      "trigraph ??%c converted to %c",
 295                                      note->type,
 296                                      (int) _cpp_trigraph_map[note->type]);
 297               else
 298                 {
 299                   cpp_error_with_line
 300                     (pfile, CPP_DL_WARNING, pfile->line, col,
 301                      "trigraph ??%c ignored, use -trigraphs to enable",
 302                      note->type);
 303                 }
 304             }
 305         }
 306       else
 307         abort ();
 308     }
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    nonzero if comment terminated by EOF, zero otherwise.
 314
 315    Buffer->cur points to the initial asterisk of the comment.  */
 316 bool
 317 _cpp_skip_block_comment (cpp_reader *pfile)
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   const uchar *cur = buffer->cur;
 321   uchar c;
 322
 323   cur++;
 324   if (*cur == '/')
 325     cur++;
 326
 327   for (;;)
 328     {
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       c = *cur++;
 332
 333       if (c == '/')
 334         {
 335           if (cur[-2] == '*')
 336             break;
 337
 338           /* Warn about potential nested comments, but not if the '/'
 339              comes immediately before the true comment delimiter.
 340              Don't bother to get it right across escaped newlines.  */
 341           if (CPP_OPTION (pfile, warn_comments)
 342               && cur[0] == '*' && cur[1] != '/')
 343             {
 344               buffer->cur = cur;
 345               cpp_error_with_line (pfile, CPP_DL_WARNING,
 346                                    pfile->line, CPP_BUF_COL (buffer),
 347                                    "\"/*\" within comment");
 348             }
 349         }
 350       else if (c == '\n')
 351         {
 352           unsigned int cols;
 353           buffer->cur = cur - 1;
 354           _cpp_process_line_notes (pfile, true);
 355           if (buffer->next_line >= buffer->rlimit)
 356             return true;
 357           _cpp_clean_line (pfile);
 358
 359           cols = buffer->next_line - buffer->line_base;
 360           CPP_INCREMENT_LINE (pfile, cols);
 361
 362           cur = buffer->cur;
 363         }
 364     }
 365
 366   buffer->cur = cur;
 367   _cpp_process_line_notes (pfile, true);
 368   return false;
 369 }
 370
 371 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 372    terminating newline.  Handles escaped newlines.  Returns nonzero
 373    if a multiline comment.  */
 374 static int
 375 skip_line_comment (cpp_reader *pfile)
 376 {
 377   cpp_buffer *buffer = pfile->buffer;
 378   unsigned int orig_line = pfile->line;
 379
 380   while (*buffer->cur != '\n')
 381     buffer->cur++;
 382
 383   _cpp_process_line_notes (pfile, true);
 384   return orig_line != pfile->line;
 385 }
 386
 387 /* Skips whitespace, saving the next non-whitespace character.  */
 388 static void
 389 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 390 {
 391   cpp_buffer *buffer = pfile->buffer;
 392   bool saw_NUL = false;
 393
 394   do
 395     {
 396       /* Horizontal space always OK.  */
 397       if (c == ' ' || c == '\t')
 398         ;
 399       /* Just \f \v or \0 left.  */
 400       else if (c == '\0')
 401         saw_NUL = true;
 402       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 403         cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
 404                              CPP_BUF_COL (buffer),
 405                              "%s in preprocessing directive",
 406                              c == '\f' ? "form feed" : "vertical tab");
 407
 408       c = *buffer->cur++;
 409     }
 410   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 411   while (is_nvspace (c));
 412
 413   if (saw_NUL)
 414     cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
 415
 416   buffer->cur--;
 417 }
 418
 419 /* See if the characters of a number token are valid in a name (no
 420    '.', '+' or '-').  */
 421 static int
 422 name_p (cpp_reader *pfile, const cpp_string *string)
 423 {
 424   unsigned int i;
 425
 426   for (i = 0; i < string->len; i++)
 427     if (!is_idchar (string->text[i]))
 428       return 0;
 429
 430   return 1;
 431 }
 432
 433 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 434    an identifier.  FIRST is TRUE if this starts an identifier.  */
 435 static bool
 436 forms_identifier_p (cpp_reader *pfile, int first)
 437 {
 438   cpp_buffer *buffer = pfile->buffer;
 439
 440   if (*buffer->cur == '$')
 441     {
 442       if (!CPP_OPTION (pfile, dollars_in_ident))
 443         return false;
 444
 445       buffer->cur++;
 446       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 447         {
 448           CPP_OPTION (pfile, warn_dollars) = 0;
 449           cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 450         }
 451
 452       return true;
 453     }
 454
 455   /* Is this a syntactically valid UCN?  */
 456   if (0 && *buffer->cur == '\\'
 457       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 458     {
 459       buffer->cur += 2;
 460       if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 461         return true;
 462       buffer->cur -= 2;
 463     }
 464
 465   return false;
 466 }
 467
 468 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 469 static cpp_hashnode *
 470 lex_identifier (cpp_reader *pfile, const uchar *base)
 471 {
 472   cpp_hashnode *result;
 473   const uchar *cur;
 474
 475   do
 476     {
 477       cur = pfile->buffer->cur;
 478
 479       /* N.B. ISIDNUM does not include $.  */
 480       while (ISIDNUM (*cur))
 481         cur++;
 482
 483       pfile->buffer->cur = cur;
 484     }
 485   while (forms_identifier_p (pfile, false));
 486
 487   result = (cpp_hashnode *)
 488     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 489
 490   /* Rarely, identifiers require diagnostics when lexed.  */
 491   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 492                         && !pfile->state.skipping, 0))
 493     {
 494       /* It is allowed to poison the same identifier twice.  */
 495       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 496         cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
 497                    NODE_NAME (result));
 498
 499       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 500          replacement list of a variadic macro.  */
 501       if (result == pfile->spec_nodes.n__VA_ARGS__
 502           && !pfile->state.va_args_ok)
 503         cpp_error (pfile, CPP_DL_PEDWARN,
 504                    "__VA_ARGS__ can only appear in the expansion"
 505                    " of a C99 variadic macro");
 506     }
 507
 508   return result;
 509 }
 510
 511 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 512 static void
 513 lex_number (cpp_reader *pfile, cpp_string *number)
 514 {
 515   const uchar *cur;
 516   const uchar *base;
 517   uchar *dest;
 518
 519   base = pfile->buffer->cur - 1;
 520   do
 521     {
 522       cur = pfile->buffer->cur;
 523
 524       /* N.B. ISIDNUM does not include $.  */
 525       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 526         cur++;
 527
 528       pfile->buffer->cur = cur;
 529     }
 530   while (forms_identifier_p (pfile, false));
 531
 532   number->len = cur - base;
 533   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 534   memcpy (dest, base, number->len);
 535   dest[number->len] = '\0';
 536   number->text = dest;
 537 }
 538
 539 /* Create a token of type TYPE with a literal spelling.  */
 540 static void
 541 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 542                 unsigned int len, enum cpp_ttype type)
 543 {
 544   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 545
 546   memcpy (dest, base, len);
 547   dest[len] = '\0';
 548   token->type = type;
 549   token->val.str.len = len;
 550   token->val.str.text = dest;
 551 }
 552
 553 /* Lexes a string, character constant, or angle-bracketed header file
 554    name.  The stored string contains the spelling, including opening
 555    quote and leading any leading 'L'.  It returns the type of the
 556    literal, or CPP_OTHER if it was not properly terminated.
 557
 558    The spelling is NUL-terminated, but it is not guaranteed that this
 559    is the first NUL since embedded NULs are preserved.  */
 560 static void
 561 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 562 {
 563   bool saw_NUL = false;
 564   const uchar *cur;
 565   cppchar_t terminator;
 566   enum cpp_ttype type;
 567
 568   cur = base;
 569   terminator = *cur++;
 570   if (terminator == 'L')
 571     terminator = *cur++;
 572   if (terminator == '\"')
 573     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 574   else if (terminator == '\'')
 575     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 576   else
 577     terminator = '>', type = CPP_HEADER_NAME;
 578
 579   for (;;)
 580     {
 581       cppchar_t c = *cur++;
 582
 583       /* In #include-style directives, terminators are not escapable.  */
 584       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 585         cur++;
 586       else if (c == terminator)
 587         break;
 588       else if (c == '\n')
 589         {
 590           cur--;
 591           type = CPP_OTHER;
 592           break;
 593         }
 594       else if (c == '\0')
 595         saw_NUL = true;
 596     }
 597
 598   if (saw_NUL && !pfile->state.skipping)
 599     cpp_error (pfile, CPP_DL_WARNING,
 600                "null character(s) preserved in literal");
 601
 602   pfile->buffer->cur = cur;
 603   create_literal (pfile, token, base, cur - base, type);
 604 }
 605
 606 /* The stored comment includes the comment start and any terminator.  */
 607 static void
 608 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 609               cppchar_t type)
 610 {
 611   unsigned char *buffer;
 612   unsigned int len, clen;
 613
 614   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 615
 616   /* C++ comments probably (not definitely) have moved past a new
 617      line, which we don't want to save in the comment.  */
 618   if (is_vspace (pfile->buffer->cur[-1]))
 619     len--;
 620
 621   /* If we are currently in a directive, then we need to store all
 622      C++ comments as C comments internally, and so we need to
 623      allocate a little extra space in that case.
 624
 625      Note that the only time we encounter a directive here is
 626      when we are saving comments in a "#define".  */
 627   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 628
 629   buffer = _cpp_unaligned_alloc (pfile, clen);
 630
 631   token->type = CPP_COMMENT;
 632   token->val.str.len = clen;
 633   token->val.str.text = buffer;
 634
 635   buffer[0] = '/';
 636   memcpy (buffer + 1, from, len - 1);
 637
 638   /* Finish conversion to a C comment, if necessary.  */
 639   if (pfile->state.in_directive && type == '/')
 640     {
 641       buffer[1] = '*';
 642       buffer[clen - 2] = '*';
 643       buffer[clen - 1] = '/';
 644     }
 645 }
 646
 647 /* Allocate COUNT tokens for RUN.  */
 648 void
 649 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 650 {
 651   run->base = xnewvec (cpp_token, count);
 652   run->limit = run->base + count;
 653   run->next = NULL;
 654 }
 655
 656 /* Returns the next tokenrun, or creates one if there is none.  */
 657 static tokenrun *
 658 next_tokenrun (tokenrun *run)
 659 {
 660   if (run->next == NULL)
 661     {
 662       run->next = xnew (tokenrun);
 663       run->next->prev = run;
 664       _cpp_init_tokenrun (run->next, 250);
 665     }
 666
 667   return run->next;
 668 }
 669
 670 /* Allocate a single token that is invalidated at the same time as the
 671    rest of the tokens on the line.  Has its line and col set to the
 672    same as the last lexed token, so that diagnostics appear in the
 673    right place.  */
 674 cpp_token *
 675 _cpp_temp_token (cpp_reader *pfile)
 676 {
 677   cpp_token *old, *result;
 678
 679   old = pfile->cur_token - 1;
 680   if (pfile->cur_token == pfile->cur_run->limit)
 681     {
 682       pfile->cur_run = next_tokenrun (pfile->cur_run);
 683       pfile->cur_token = pfile->cur_run->base;
 684     }
 685
 686   result = pfile->cur_token++;
 687   result->src_loc = old->src_loc;
 688   return result;
 689 }
 690
 691 /* Lex a token into RESULT (external interface).  Takes care of issues
 692    like directive handling, token lookahead, multiple include
 693    optimization and skipping.  */
 694 const cpp_token *
 695 _cpp_lex_token (cpp_reader *pfile)
 696 {
 697   cpp_token *result;
 698
 699   for (;;)
 700     {
 701       if (pfile->cur_token == pfile->cur_run->limit)
 702         {
 703           pfile->cur_run = next_tokenrun (pfile->cur_run);
 704           pfile->cur_token = pfile->cur_run->base;
 705         }
 706
 707       if (pfile->lookaheads)
 708         {
 709           pfile->lookaheads--;
 710           result = pfile->cur_token++;
 711         }
 712       else
 713         result = _cpp_lex_direct (pfile);
 714
 715       if (result->flags & BOL)
 716         {
 717           /* Is this a directive.  If _cpp_handle_directive returns
 718              false, it is an assembler #.  */
 719           if (result->type == CPP_HASH
 720               /* 6.10.3 p 11: Directives in a list of macro arguments
 721                  gives undefined behavior.  This implementation
 722                  handles the directive as normal.  */
 723               && pfile->state.parsing_args != 1
 724               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 725             continue;
 726           if (pfile->cb.line_change && !pfile->state.skipping)
 727             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 728         }
 729
 730       /* We don't skip tokens in directives.  */
 731       if (pfile->state.in_directive)
 732         break;
 733
 734       /* Outside a directive, invalidate controlling macros.  At file
 735          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 736          get here and MI optimization works.  */
 737       pfile->mi_valid = false;
 738
 739       if (!pfile->state.skipping || result->type == CPP_EOF)
 740         break;
 741     }
 742
 743   return result;
 744 }
 745
 746 /* Returns true if a fresh line has been loaded.  */
 747 bool
 748 _cpp_get_fresh_line (cpp_reader *pfile)
 749 {
 750   /* We can't get a new line until we leave the current directive.  */
 751   if (pfile->state.in_directive)
 752     return false;
 753
 754   for (;;)
 755     {
 756       cpp_buffer *buffer = pfile->buffer;
 757
 758       if (!buffer->need_line)
 759         return true;
 760
 761       if (buffer->next_line < buffer->rlimit)
 762         {
 763           _cpp_clean_line (pfile);
 764           return true;
 765         }
 766
 767       /* First, get out of parsing arguments state.  */
 768       if (pfile->state.parsing_args)
 769         return false;
 770
 771       /* End of buffer.  Non-empty files should end in a newline.  */
 772       if (buffer->buf != buffer->rlimit
 773           && buffer->next_line > buffer->rlimit
 774           && !buffer->from_stage3)
 775         {
 776           /* Only warn once.  */
 777           buffer->next_line = buffer->rlimit;
 778           cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
 779                                CPP_BUF_COLUMN (buffer, buffer->cur),
 780                                "no newline at end of file");
 781         }
 782
 783       _cpp_pop_buffer (pfile);
 784       if (pfile->buffer == NULL)
 785         return false;
 786     }
 787 }
 788
 789 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 790   do                                                    \
 791     {                                                   \
 792       result->type = ELSE_TYPE;                         \
 793       if (*buffer->cur == CHAR)                         \
 794         buffer->cur++, result->type = THEN_TYPE;        \
 795     }                                                   \
 796   while (0)
 797
 798 /* Lex a token into pfile->cur_token, which is also incremented, to
 799    get diagnostics pointing to the correct location.
 800
 801    Does not handle issues such as token lookahead, multiple-include
 802    optimization, directives, skipping etc.  This function is only
 803    suitable for use by _cpp_lex_token, and in special cases like
 804    lex_expansion_token which doesn't care for any of these issues.
 805
 806    When meeting a newline, returns CPP_EOF if parsing a directive,
 807    otherwise returns to the start of the token buffer if permissible.
 808    Returns the location of the lexed token.  */
 809 cpp_token *
 810 _cpp_lex_direct (cpp_reader *pfile)
 811 {
 812   cppchar_t c;
 813   cpp_buffer *buffer;
 814   const unsigned char *comment_start;
 815   cpp_token *result = pfile->cur_token++;
 816
 817  fresh_line:
 818   result->flags = 0;
 819   buffer = pfile->buffer;
 820   if (buffer->need_line)
 821     {
 822       if (!_cpp_get_fresh_line (pfile))
 823         {
 824           result->type = CPP_EOF;
 825           if (!pfile->state.in_directive)
 826             {
 827               /* Tell the compiler the line number of the EOF token.  */
 828               result->src_loc = pfile->line;
 829               result->flags = BOL;
 830             }
 831           return result;
 832         }
 833       if (!pfile->keep_tokens)
 834         {
 835           pfile->cur_run = &pfile->base_run;
 836           result = pfile->base_run.base;
 837           pfile->cur_token = result + 1;
 838         }
 839       result->flags = BOL;
 840       if (pfile->state.parsing_args == 2)
 841         result->flags |= PREV_WHITE;
 842     }
 843   buffer = pfile->buffer;
 844  update_tokens_line:
 845   result->src_loc = pfile->line;
 846
 847  skipped_white:
 848   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 849       && !pfile->overlaid_buffer)
 850     {
 851       _cpp_process_line_notes (pfile, false);
 852       result->src_loc = pfile->line;
 853     }
 854   c = *buffer->cur++;
 855
 856   result->src_loc = linemap_position_for_column (pfile->line_table,
 857                                                  CPP_BUF_COLUMN (buffer, buffer->cur));
 858
 859   switch (c)
 860     {
 861     case ' ': case '\t': case '\f': case '\v': case '\0':
 862       result->flags |= PREV_WHITE;
 863       skip_whitespace (pfile, c);
 864       goto skipped_white;
 865
 866     case '\n':
 867       if (buffer->cur < buffer->rlimit)
 868         CPP_INCREMENT_LINE (pfile, 0);
 869       buffer->need_line = true;
 870       goto fresh_line;
 871
 872     case '0': case '1': case '2': case '3': case '4':
 873     case '5': case '6': case '7': case '8': case '9':
 874       result->type = CPP_NUMBER;
 875       lex_number (pfile, &result->val.str);
 876       break;
 877
 878     case 'L':
 879       /* 'L' may introduce wide characters or strings.  */
 880       if (*buffer->cur == '\'' || *buffer->cur == '"')
 881         {
 882           lex_string (pfile, result, buffer->cur - 1);
 883           break;
 884         }
 885       /* Fall through.  */
 886
 887     case '_':
 888     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 889     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 890     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 891     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 892     case 'y': case 'z':
 893     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 894     case 'G': case 'H': case 'I': case 'J': case 'K':
 895     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 896     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 897     case 'Y': case 'Z':
 898       result->type = CPP_NAME;
 899       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 900
 901       /* Convert named operators to their proper types.  */
 902       if (result->val.node->flags & NODE_OPERATOR)
 903         {
 904           result->flags |= NAMED_OP;
 905           result->type = result->val.node->directive_index;
 906         }
 907       break;
 908
 909     case '\'':
 910     case '"':
 911       lex_string (pfile, result, buffer->cur - 1);
 912       break;
 913
 914     case '/':
 915       /* A potential block or line comment.  */
 916       comment_start = buffer->cur;
 917       c = *buffer->cur;
 918
 919       if (c == '*')
 920         {
 921           if (_cpp_skip_block_comment (pfile))
 922             cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
 923         }
 924       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 925                             || cpp_in_system_header (pfile)))
 926         {
 927           /* Warn about comments only if pedantically GNUC89, and not
 928              in system headers.  */
 929           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 930               && ! buffer->warned_cplusplus_comments)
 931             {
 932               cpp_error (pfile, CPP_DL_PEDWARN,
 933                          "C++ style comments are not allowed in ISO C90");
 934               cpp_error (pfile, CPP_DL_PEDWARN,
 935                          "(this will be reported only once per input file)");
 936               buffer->warned_cplusplus_comments = 1;
 937             }
 938
 939           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 940             cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
 941         }
 942       else if (c == '=')
 943         {
 944           buffer->cur++;
 945           result->type = CPP_DIV_EQ;
 946           break;
 947         }
 948       else
 949         {
 950           result->type = CPP_DIV;
 951           break;
 952         }
 953
 954       if (!pfile->state.save_comments)
 955         {
 956           result->flags |= PREV_WHITE;
 957           goto update_tokens_line;
 958         }
 959
 960       /* Save the comment as a token in its own right.  */
 961       save_comment (pfile, result, comment_start, c);
 962       break;
 963
 964     case '<':
 965       if (pfile->state.angled_headers)
 966         {
 967           lex_string (pfile, result, buffer->cur - 1);
 968           break;
 969         }
 970
 971       result->type = CPP_LESS;
 972       if (*buffer->cur == '=')
 973         buffer->cur++, result->type = CPP_LESS_EQ;
 974       else if (*buffer->cur == '<')
 975         {
 976           buffer->cur++;
 977           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 978         }
 979       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 980         {
 981           buffer->cur++;
 982           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 983         }
 984       else if (CPP_OPTION (pfile, digraphs))
 985         {
 986           if (*buffer->cur == ':')
 987             {
 988               buffer->cur++;
 989               result->flags |= DIGRAPH;
 990               result->type = CPP_OPEN_SQUARE;
 991             }
 992           else if (*buffer->cur == '%')
 993             {
 994               buffer->cur++;
 995               result->flags |= DIGRAPH;
 996               result->type = CPP_OPEN_BRACE;
 997             }
 998         }
 999       break;
1000
1001     case '>':
1002       result->type = CPP_GREATER;
1003       if (*buffer->cur == '=')
1004         buffer->cur++, result->type = CPP_GREATER_EQ;
1005       else if (*buffer->cur == '>')
1006         {
1007           buffer->cur++;
1008           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1009         }
1010       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1011         {
1012           buffer->cur++;
1013           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1014         }
1015       break;
1016
1017     case '%':
1018       result->type = CPP_MOD;
1019       if (*buffer->cur == '=')
1020         buffer->cur++, result->type = CPP_MOD_EQ;
1021       else if (CPP_OPTION (pfile, digraphs))
1022         {
1023           if (*buffer->cur == ':')
1024             {
1025               buffer->cur++;
1026               result->flags |= DIGRAPH;
1027               result->type = CPP_HASH;
1028               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1029                 buffer->cur += 2, result->type = CPP_PASTE;
1030             }
1031           else if (*buffer->cur == '>')
1032             {
1033               buffer->cur++;
1034               result->flags |= DIGRAPH;
1035               result->type = CPP_CLOSE_BRACE;
1036             }
1037         }
1038       break;
1039
1040     case '.':
1041       result->type = CPP_DOT;
1042       if (ISDIGIT (*buffer->cur))
1043         {
1044           result->type = CPP_NUMBER;
1045           lex_number (pfile, &result->val.str);
1046         }
1047       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1048         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1049       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1050         buffer->cur++, result->type = CPP_DOT_STAR;
1051       break;
1052
1053     case '+':
1054       result->type = CPP_PLUS;
1055       if (*buffer->cur == '+')
1056         buffer->cur++, result->type = CPP_PLUS_PLUS;
1057       else if (*buffer->cur == '=')
1058         buffer->cur++, result->type = CPP_PLUS_EQ;
1059       break;
1060
1061     case '-':
1062       result->type = CPP_MINUS;
1063       if (*buffer->cur == '>')
1064         {
1065           buffer->cur++;
1066           result->type = CPP_DEREF;
1067           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1068             buffer->cur++, result->type = CPP_DEREF_STAR;
1069         }
1070       else if (*buffer->cur == '-')
1071         buffer->cur++, result->type = CPP_MINUS_MINUS;
1072       else if (*buffer->cur == '=')
1073         buffer->cur++, result->type = CPP_MINUS_EQ;
1074       break;
1075
1076     case '&':
1077       result->type = CPP_AND;
1078       if (*buffer->cur == '&')
1079         buffer->cur++, result->type = CPP_AND_AND;
1080       else if (*buffer->cur == '=')
1081         buffer->cur++, result->type = CPP_AND_EQ;
1082       break;
1083
1084     case '|':
1085       result->type = CPP_OR;
1086       if (*buffer->cur == '|')
1087         buffer->cur++, result->type = CPP_OR_OR;
1088       else if (*buffer->cur == '=')
1089         buffer->cur++, result->type = CPP_OR_EQ;
1090       break;
1091
1092     case ':':
1093       result->type = CPP_COLON;
1094       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1095         buffer->cur++, result->type = CPP_SCOPE;
1096       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1097         {
1098           buffer->cur++;
1099           result->flags |= DIGRAPH;
1100           result->type = CPP_CLOSE_SQUARE;
1101         }
1102       break;
1103
1104     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1105     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1106     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1107     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1108     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1109
1110     case '?': result->type = CPP_QUERY; break;
1111     case '~': result->type = CPP_COMPL; break;
1112     case ',': result->type = CPP_COMMA; break;
1113     case '(': result->type = CPP_OPEN_PAREN; break;
1114     case ')': result->type = CPP_CLOSE_PAREN; break;
1115     case '[': result->type = CPP_OPEN_SQUARE; break;
1116     case ']': result->type = CPP_CLOSE_SQUARE; break;
1117     case '{': result->type = CPP_OPEN_BRACE; break;
1118     case '}': result->type = CPP_CLOSE_BRACE; break;
1119     case ';': result->type = CPP_SEMICOLON; break;
1120
1121       /* @ is a punctuator in Objective-C.  */
1122     case '@': result->type = CPP_ATSIGN; break;
1123
1124     case '$':
1125     case '\\':
1126       {
1127         const uchar *base = --buffer->cur;
1128
1129         if (forms_identifier_p (pfile, true))
1130           {
1131             result->type = CPP_NAME;
1132             result->val.node = lex_identifier (pfile, base);
1133             break;
1134           }
1135         buffer->cur++;
1136       }
1137
1138     default:
1139       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1140       break;
1141     }
1142
1143   return result;
1144 }
1145
1146 /* An upper bound on the number of bytes needed to spell TOKEN.
1147    Does not include preceding whitespace.  */
1148 unsigned int
1149 cpp_token_len (const cpp_token *token)
1150 {
1151   unsigned int len;
1152
1153   switch (TOKEN_SPELL (token))
1154     {
1155     default:            len = 4;                                break;
1156     case SPELL_LITERAL: len = token->val.str.len;               break;
1157     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1158     }
1159
1160   return len;
1161 }
1162
1163 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1164    already contain the enough space to hold the token's spelling.
1165    Returns a pointer to the character after the last character written.
1166    FIXME: Would be nice if we didn't need the PFILE argument.  */
1167 unsigned char *
1168 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1169                  unsigned char *buffer)
1170 {
1171   switch (TOKEN_SPELL (token))
1172     {
1173     case SPELL_OPERATOR:
1174       {
1175         const unsigned char *spelling;
1176         unsigned char c;
1177
1178         if (token->flags & DIGRAPH)
1179           spelling
1180             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1181         else if (token->flags & NAMED_OP)
1182           goto spell_ident;
1183         else
1184           spelling = TOKEN_NAME (token);
1185
1186         while ((c = *spelling++) != '\0')
1187           *buffer++ = c;
1188       }
1189       break;
1190
1191     spell_ident:
1192     case SPELL_IDENT:
1193       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1194       buffer += NODE_LEN (token->val.node);
1195       break;
1196
1197     case SPELL_LITERAL:
1198       memcpy (buffer, token->val.str.text, token->val.str.len);
1199       buffer += token->val.str.len;
1200       break;
1201
1202     case SPELL_NONE:
1203       cpp_error (pfile, CPP_DL_ICE,
1204                  "unspellable token %s", TOKEN_NAME (token));
1205       break;
1206     }
1207
1208   return buffer;
1209 }
1210
1211 /* Returns TOKEN spelt as a null-terminated string.  The string is
1212    freed when the reader is destroyed.  Useful for diagnostics.  */
1213 unsigned char *
1214 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1215 {
1216   unsigned int len = cpp_token_len (token) + 1;
1217   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1218
1219   end = cpp_spell_token (pfile, token, start);
1220   end[0] = '\0';
1221
1222   return start;
1223 }
1224
1225 /* Used by C front ends, which really should move to using
1226    cpp_token_as_text.  */
1227 const char *
1228 cpp_type2name (enum cpp_ttype type)
1229 {
1230   return (const char *) token_spellings[type].name;
1231 }
1232
1233 /* Writes the spelling of token to FP, without any preceding space.
1234    Separated from cpp_spell_token for efficiency - to avoid stdio
1235    double-buffering.  */
1236 void
1237 cpp_output_token (const cpp_token *token, FILE *fp)
1238 {
1239   switch (TOKEN_SPELL (token))
1240     {
1241     case SPELL_OPERATOR:
1242       {
1243         const unsigned char *spelling;
1244         int c;
1245
1246         if (token->flags & DIGRAPH)
1247           spelling
1248             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1249         else if (token->flags & NAMED_OP)
1250           goto spell_ident;
1251         else
1252           spelling = TOKEN_NAME (token);
1253
1254         c = *spelling;
1255         do
1256           putc (c, fp);
1257         while ((c = *++spelling) != '\0');
1258       }
1259       break;
1260
1261     spell_ident:
1262     case SPELL_IDENT:
1263       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1264     break;
1265
1266     case SPELL_LITERAL:
1267       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1268       break;
1269
1270     case SPELL_NONE:
1271       /* An error, most probably.  */
1272       break;
1273     }
1274 }
1275
1276 /* Compare two tokens.  */
1277 int
1278 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1279 {
1280   if (a->type == b->type && a->flags == b->flags)
1281     switch (TOKEN_SPELL (a))
1282       {
1283       default:                  /* Keep compiler happy.  */
1284       case SPELL_OPERATOR:
1285         return 1;
1286       case SPELL_NONE:
1287         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1288       case SPELL_IDENT:
1289         return a->val.node == b->val.node;
1290       case SPELL_LITERAL:
1291         return (a->val.str.len == b->val.str.len
1292                 && !memcmp (a->val.str.text, b->val.str.text,
1293                             a->val.str.len));
1294       }
1295
1296   return 0;
1297 }
1298
1299 /* Returns nonzero if a space should be inserted to avoid an
1300    accidental token paste for output.  For simplicity, it is
1301    conservative, and occasionally advises a space where one is not
1302    needed, e.g. "." and ".2".  */
1303 int
1304 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1305                  const cpp_token *token2)
1306 {
1307   enum cpp_ttype a = token1->type, b = token2->type;
1308   cppchar_t c;
1309
1310   if (token1->flags & NAMED_OP)
1311     a = CPP_NAME;
1312   if (token2->flags & NAMED_OP)
1313     b = CPP_NAME;
1314
1315   c = EOF;
1316   if (token2->flags & DIGRAPH)
1317     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1318   else if (token_spellings[b].category == SPELL_OPERATOR)
1319     c = token_spellings[b].name[0];
1320
1321   /* Quickly get everything that can paste with an '='.  */
1322   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1323     return 1;
1324
1325   switch (a)
1326     {
1327     case CPP_GREATER:   return c == '>' || c == '?';
1328     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1329     case CPP_PLUS:      return c == '+';
1330     case CPP_MINUS:     return c == '-' || c == '>';
1331     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1332     case CPP_MOD:       return c == ':' || c == '>';
1333     case CPP_AND:       return c == '&';
1334     case CPP_OR:        return c == '|';
1335     case CPP_COLON:     return c == ':' || c == '>';
1336     case CPP_DEREF:     return c == '*';
1337     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1338     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1339     case CPP_NAME:      return ((b == CPP_NUMBER
1340                                  && name_p (pfile, &token2->val.str))
1341                                 || b == CPP_NAME
1342                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1343     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1344                                 || c == '.' || c == '+' || c == '-');
1345                                       /* UCNs */
1346     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1347                                  && b == CPP_NAME)
1348                                 || (CPP_OPTION (pfile, objc)
1349                                     && token1->val.str.text[0] == '@'
1350                                     && (b == CPP_NAME || b == CPP_STRING)));
1351     default:            break;
1352     }
1353
1354   return 0;
1355 }
1356
1357 /* Output all the remaining tokens on the current line, and a newline
1358    character, to FP.  Leading whitespace is removed.  If there are
1359    macros, special token padding is not performed.  */
1360 void
1361 cpp_output_line (cpp_reader *pfile, FILE *fp)
1362 {
1363   const cpp_token *token;
1364
1365   token = cpp_get_token (pfile);
1366   while (token->type != CPP_EOF)
1367     {
1368       cpp_output_token (token, fp);
1369       token = cpp_get_token (pfile);
1370       if (token->flags & PREV_WHITE)
1371         putc (' ', fp);
1372     }
1373
1374   putc ('\n', fp);
1375 }
1376
1377 /* Memory buffers.  Changing these three constants can have a dramatic
1378    effect on performance.  The values here are reasonable defaults,
1379    but might be tuned.  If you adjust them, be sure to test across a
1380    range of uses of cpplib, including heavy nested function-like macro
1381    expansion.  Also check the change in peak memory usage (NJAMD is a
1382    good tool for this).  */
1383 #define MIN_BUFF_SIZE 8000
1384 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1385 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1386         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1387
1388 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1389   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1390 #endif
1391
1392 /* Create a new allocation buffer.  Place the control block at the end
1393    of the buffer, so that buffer overflows will cause immediate chaos.  */
1394 static _cpp_buff *
1395 new_buff (size_t len)
1396 {
1397   _cpp_buff *result;
1398   unsigned char *base;
1399
1400   if (len < MIN_BUFF_SIZE)
1401     len = MIN_BUFF_SIZE;
1402   len = CPP_ALIGN (len);
1403
1404   base = xmalloc (len + sizeof (_cpp_buff));
1405   result = (_cpp_buff *) (base + len);
1406   result->base = base;
1407   result->cur = base;
1408   result->limit = base + len;
1409   result->next = NULL;
1410   return result;
1411 }
1412
1413 /* Place a chain of unwanted allocation buffers on the free list.  */
1414 void
1415 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1416 {
1417   _cpp_buff *end = buff;
1418
1419   while (end->next)
1420     end = end->next;
1421   end->next = pfile->free_buffs;
1422   pfile->free_buffs = buff;
1423 }
1424
1425 /* Return a free buffer of size at least MIN_SIZE.  */
1426 _cpp_buff *
1427 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1428 {
1429   _cpp_buff *result, **p;
1430
1431   for (p = &pfile->free_buffs;; p = &(*p)->next)
1432     {
1433       size_t size;
1434
1435       if (*p == NULL)
1436         return new_buff (min_size);
1437       result = *p;
1438       size = result->limit - result->base;
1439       /* Return a buffer that's big enough, but don't waste one that's
1440          way too big.  */
1441       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1442         break;
1443     }
1444
1445   *p = result->next;
1446   result->next = NULL;
1447   result->cur = result->base;
1448   return result;
1449 }
1450
1451 /* Creates a new buffer with enough space to hold the uncommitted
1452    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1453    the excess bytes to the new buffer.  Chains the new buffer after
1454    BUFF, and returns the new buffer.  */
1455 _cpp_buff *
1456 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1457 {
1458   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1459   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1460
1461   buff->next = new_buff;
1462   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1463   return new_buff;
1464 }
1465
1466 /* Creates a new buffer with enough space to hold the uncommitted
1467    remaining bytes of the buffer pointed to by BUFF, and at least
1468    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1469    Chains the new buffer before the buffer pointed to by BUFF, and
1470    updates the pointer to point to the new buffer.  */
1471 void
1472 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1473 {
1474   _cpp_buff *new_buff, *old_buff = *pbuff;
1475   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1476
1477   new_buff = _cpp_get_buff (pfile, size);
1478   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1479   new_buff->next = old_buff;
1480   *pbuff = new_buff;
1481 }
1482
1483 /* Free a chain of buffers starting at BUFF.  */
1484 void
1485 _cpp_free_buff (_cpp_buff *buff)
1486 {
1487   _cpp_buff *next;
1488
1489   for (; buff; buff = next)
1490     {
1491       next = buff->next;
1492       free (buff->base);
1493     }
1494 }
1495
1496 /* Allocate permanent, unaligned storage of length LEN.  */
1497 unsigned char *
1498 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1499 {
1500   _cpp_buff *buff = pfile->u_buff;
1501   unsigned char *result = buff->cur;
1502
1503   if (len > (size_t) (buff->limit - result))
1504     {
1505       buff = _cpp_get_buff (pfile, len);
1506       buff->next = pfile->u_buff;
1507       pfile->u_buff = buff;
1508       result = buff->cur;
1509     }
1510
1511   buff->cur = result + len;
1512   return result;
1513 }
1514
1515 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1516    That buffer is used for growing allocations when saving macro
1517    replacement lists in a #define, and when parsing an answer to an
1518    assertion in #assert, #unassert or #if (and therefore possibly
1519    whilst expanding macros).  It therefore must not be used by any
1520    code that they might call: specifically the lexer and the guts of
1521    the macro expander.
1522
1523    All existing other uses clearly fit this restriction: storing
1524    registered pragmas during initialization.  */
1525 unsigned char *
1526 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1527 {
1528   _cpp_buff *buff = pfile->a_buff;
1529   unsigned char *result = buff->cur;
1530
1531   if (len > (size_t) (buff->limit - result))
1532     {
1533       buff = _cpp_get_buff (pfile, len);
1534       buff->next = pfile->a_buff;
1535       pfile->a_buff = buff;
1536       result = buff->cur;
1537     }
1538
1539   buff->cur = result + len;
1540   return result;
1541 }