gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else if (buffer->cur != buffer->last_Wtrigraphs)
 163         {
 164           buffer->last_Wtrigraphs = buffer->cur;
 165           cpp_warning_with_line (pfile, buffer->lineno,
 166                                  CPP_BUF_COL (buffer) - 2,
 167                                  "trigraph ??%c ignored", (int) from_char);
 168         }
 169     }
 170
 171   return accept;
 172 }
 173
 174 /* Assumes local variables buffer and result.  */
 175 #define ACCEPT_CHAR(t) \
 176   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 177
 178 /* When we move to multibyte character sets, add to these something
 179    that saves and restores the state of the multibyte conversion
 180    library.  This probably involves saving and restoring a "cookie".
 181    In the case of glibc it is an 8-byte structure, so is not a high
 182    overhead operation.  In any case, it's out of the fast path.  */
 183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 185
 186 /* Skips any escaped newlines introduced by NEXT, which is either a
 187    '?' or a '\\'.  Returns the next character, which will also have
 188    been placed in buffer->read_ahead.  This routine performs
 189    preprocessing stages 1 and 2 of the ISO C standard.  */
 190 static cppchar_t
 191 skip_escaped_newlines (buffer, next)
 192      cpp_buffer *buffer;
 193      cppchar_t next;
 194 {
 195   /* Only do this if we apply stages 1 and 2.  */
 196   if (!buffer->from_stage3)
 197     {
 198       cppchar_t next1;
 199       const unsigned char *saved_cur;
 200       int space;
 201
 202       do
 203         {
 204           if (buffer->cur == buffer->rlimit)
 205             break;
 206
 207           SAVE_STATE ();
 208           if (next == '?')
 209             {
 210               next1 = *buffer->cur++;
 211               if (next1 != '?' || buffer->cur == buffer->rlimit)
 212                 {
 213                   RESTORE_STATE ();
 214                   break;
 215                 }
 216
 217               next1 = *buffer->cur++;
 218               if (!_cpp_trigraph_map[next1]
 219                   || !trigraph_ok (buffer->pfile, next1))
 220                 {
 221                   RESTORE_STATE ();
 222                   break;
 223                 }
 224
 225               /* We have a full trigraph here.  */
 226               next = _cpp_trigraph_map[next1];
 227               if (next != '\\' || buffer->cur == buffer->rlimit)
 228                 break;
 229               SAVE_STATE ();
 230             }
 231
 232           /* We have a backslash, and room for at least one more character.  */
 233           space = 0;
 234           do
 235             {
 236               next1 = *buffer->cur++;
 237               if (!is_nvspace (next1))
 238                 break;
 239               space = 1;
 240             }
 241           while (buffer->cur < buffer->rlimit);
 242
 243           if (!is_vspace (next1))
 244             {
 245               RESTORE_STATE ();
 246               break;
 247             }
 248
 249           if (space && !buffer->pfile->state.lexing_comment)
 250             cpp_warning (buffer->pfile,
 251                          "backslash and newline separated by space");
 252
 253           next = handle_newline (buffer, next1);
 254           if (next == EOF)
 255             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 256         }
 257       while (next == '\\' || next == '?');
 258     }
 259
 260   buffer->read_ahead = next;
 261   return next;
 262 }
 263
 264 /* Obtain the next character, after trigraph conversion and skipping
 265    an arbitrary string of escaped newlines.  The common case of no
 266    trigraphs or escaped newlines falls through quickly.  */
 267 static cppchar_t
 268 get_effective_char (buffer)
 269      cpp_buffer *buffer;
 270 {
 271   cppchar_t next = EOF;
 272
 273   if (buffer->cur < buffer->rlimit)
 274     {
 275       next = *buffer->cur++;
 276
 277       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 278          can introduce escaped newlines, which we want to skip, or
 279          UCNs, which, depending upon lexer state, we will handle in
 280          the future.  */
 281       if (next == '?' || next == '\\')
 282         next = skip_escaped_newlines (buffer, next);
 283     }
 284
 285   buffer->read_ahead = next;
 286   return next;
 287 }
 288
 289 /* Skip a C-style block comment.  We find the end of the comment by
 290    seeing if an asterisk is before every '/' we encounter.  Returns
 291    non-zero if comment terminated by EOF, zero otherwise.  */
 292 static int
 293 skip_block_comment (pfile)
 294      cpp_reader *pfile;
 295 {
 296   cpp_buffer *buffer = pfile->buffer;
 297   cppchar_t c = EOF, prevc = EOF;
 298
 299   pfile->state.lexing_comment = 1;
 300   while (buffer->cur != buffer->rlimit)
 301     {
 302       prevc = c, c = *buffer->cur++;
 303
 304     next_char:
 305       /* FIXME: For speed, create a new character class of characters
 306          of interest inside block comments.  */
 307       if (c == '?' || c == '\\')
 308         c = skip_escaped_newlines (buffer, c);
 309
 310       /* People like decorating comments with '*', so check for '/'
 311          instead for efficiency.  */
 312       if (c == '/')
 313         {
 314           if (prevc == '*')
 315             break;
 316
 317           /* Warn about potential nested comments, but not if the '/'
 318              comes immediately before the true comment delimeter.
 319              Don't bother to get it right across escaped newlines.  */
 320           if (CPP_OPTION (pfile, warn_comments)
 321               && buffer->cur != buffer->rlimit)
 322             {
 323               prevc = c, c = *buffer->cur++;
 324               if (c == '*' && buffer->cur != buffer->rlimit)
 325                 {
 326                   prevc = c, c = *buffer->cur++;
 327                   if (c != '/')
 328                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 329                                            CPP_BUF_COL (buffer),
 330                                            "\"/*\" within comment");
 331                 }
 332               goto next_char;
 333             }
 334         }
 335       else if (is_vspace (c))
 336         {
 337           prevc = c, c = handle_newline (buffer, c);
 338           goto next_char;
 339         }
 340       else if (c == '\t')
 341         adjust_column (pfile);
 342     }
 343
 344   pfile->state.lexing_comment = 0;
 345   buffer->read_ahead = EOF;
 346   return c != '/' || prevc != '*';
 347 }
 348
 349 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 350    non-zero if a multiline comment.  The following new line, if any,
 351    is left in buffer->read_ahead.  */
 352 static int
 353 skip_line_comment (pfile)
 354      cpp_reader *pfile;
 355 {
 356   cpp_buffer *buffer = pfile->buffer;
 357   unsigned int orig_lineno = buffer->lineno;
 358   cppchar_t c;
 359
 360   pfile->state.lexing_comment = 1;
 361   do
 362     {
 363       c = EOF;
 364       if (buffer->cur == buffer->rlimit)
 365         break;
 366
 367       c = *buffer->cur++;
 368       if (c == '?' || c == '\\')
 369         c = skip_escaped_newlines (buffer, c);
 370     }
 371   while (!is_vspace (c));
 372
 373   pfile->state.lexing_comment = 0;
 374   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 375   return orig_lineno != buffer->lineno;
 376 }
 377
 378 /* pfile->buffer->cur is one beyond the \t character.  Update
 379    col_adjust so we track the column correctly.  */
 380 static void
 381 adjust_column (pfile)
 382      cpp_reader *pfile;
 383 {
 384   cpp_buffer *buffer = pfile->buffer;
 385   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 386
 387   /* Round it up to multiple of the tabstop, but subtract 1 since the
 388      tab itself occupies a character position.  */
 389   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 390                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 391 }
 392
 393 /* Skips whitespace, saving the next non-whitespace character.
 394    Adjusts pfile->col_adjust to account for tabs.  Without this,
 395    tokens might be assigned an incorrect column.  */
 396 static void
 397 skip_whitespace (pfile, c)
 398      cpp_reader *pfile;
 399      cppchar_t c;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int warned = 0;
 403
 404   do
 405     {
 406       /* Horizontal space always OK.  */
 407       if (c == ' ')
 408         ;
 409       else if (c == '\t')
 410         adjust_column (pfile);
 411       /* Just \f \v or \0 left.  */
 412       else if (c == '\0')
 413         {
 414           if (!warned)
 415             {
 416               cpp_warning (pfile, "null character(s) ignored");
 417               warned = 1;
 418             }
 419         }
 420       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 421         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 422                                CPP_BUF_COL (buffer),
 423                                "%s in preprocessing directive",
 424                                c == '\f' ? "form feed" : "vertical tab");
 425
 426       c = EOF;
 427       if (buffer->cur == buffer->rlimit)
 428         break;
 429       c = *buffer->cur++;
 430     }
 431   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 432   while (is_nvspace (c));
 433
 434   /* Remember the next character.  */
 435   buffer->read_ahead = c;
 436 }
 437
 438 /* See if the characters of a number token are valid in a name (no
 439    '.', '+' or '-').  */
 440 static int
 441 name_p (pfile, string)
 442      cpp_reader *pfile;
 443      const cpp_string *string;
 444 {
 445   unsigned int i;
 446
 447   for (i = 0; i < string->len; i++)
 448     if (!is_idchar (string->text[i]))
 449       return 0;
 450
 451   return 1;
 452 }
 453
 454 /* Parse an identifier, skipping embedded backslash-newlines.
 455    Calculate the hash value of the token while parsing, for improved
 456    performance.  The hashing algorithm *must* match cpp_lookup().  */
 457
 458 static cpp_hashnode *
 459 parse_identifier (pfile, c)
 460      cpp_reader *pfile;
 461      cppchar_t c;
 462 {
 463   cpp_hashnode *result;
 464   cpp_buffer *buffer = pfile->buffer;
 465   unsigned char *dest, *limit;
 466   unsigned int r = 0, saw_dollar = 0;
 467
 468   dest = POOL_FRONT (&pfile->ident_pool);
 469   limit = POOL_LIMIT (&pfile->ident_pool);
 470
 471   do
 472     {
 473       do
 474         {
 475           /* Need room for terminating null.  */
 476           if (dest + 1 >= limit)
 477             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 478
 479           *dest++ = c;
 480           r = HASHSTEP (r, c);
 481
 482           if (c == '$')
 483             saw_dollar++;
 484
 485           c = EOF;
 486           if (buffer->cur == buffer->rlimit)
 487             break;
 488
 489           c = *buffer->cur++;
 490         }
 491       while (is_idchar (c));
 492
 493       /* Potential escaped newline?  */
 494       if (c != '?' && c != '\\')
 495         break;
 496       c = skip_escaped_newlines (buffer, c);
 497     }
 498   while (is_idchar (c));
 499
 500   /* Remember the next character.  */
 501   buffer->read_ahead = c;
 502
 503   /* $ is not a identifier character in the standard, but is commonly
 504      accepted as an extension.  Don't warn about it in skipped
 505      conditional blocks.  */
 506   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 507     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 508
 509   /* Identifiers are null-terminated.  */
 510   *dest = '\0';
 511
 512   /* This routine commits the memory if necessary.  */
 513   result = _cpp_lookup_with_hash (pfile,
 514                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 515
 516   /* Some identifiers require diagnostics when lexed.  */
 517   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 518     {
 519       /* It is allowed to poison the same identifier twice.  */
 520       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 521         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 522
 523       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 524          replacement list of a variadic macro.  */
 525       if (result == pfile->spec_nodes.n__VA_ARGS__
 526           && !pfile->state.va_args_ok)
 527         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Parse a number, skipping embedded backslash-newlines.  */
 534 static void
 535 parse_number (pfile, number, c, leading_period)
 536      cpp_reader *pfile;
 537      cpp_string *number;
 538      cppchar_t c;
 539      int leading_period;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   cpp_pool *pool = &pfile->ident_pool;
 543   unsigned char *dest, *limit;
 544
 545   dest = POOL_FRONT (pool);
 546   limit = POOL_LIMIT (pool);
 547
 548   /* Place a leading period.  */
 549   if (leading_period)
 550     {
 551       if (dest >= limit)
 552         limit = _cpp_next_chunk (pool, 0, &dest);
 553       *dest++ = '.';
 554     }
 555
 556   do
 557     {
 558       do
 559         {
 560           /* Need room for terminating null.  */
 561           if (dest + 1 >= limit)
 562             limit = _cpp_next_chunk (pool, 0, &dest);
 563           *dest++ = c;
 564
 565           c = EOF;
 566           if (buffer->cur == buffer->rlimit)
 567             break;
 568
 569           c = *buffer->cur++;
 570         }
 571       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 572
 573       /* Potential escaped newline?  */
 574       if (c != '?' && c != '\\')
 575         break;
 576       c = skip_escaped_newlines (buffer, c);
 577     }
 578   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 579
 580   /* Remember the next character.  */
 581   buffer->read_ahead = c;
 582
 583   /* Null-terminate the number.  */
 584   *dest = '\0';
 585
 586   number->text = POOL_FRONT (pool);
 587   number->len = dest - number->text;
 588   POOL_COMMIT (pool, number->len + 1);
 589 }
 590
 591 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 592 static void
 593 unterminated (pfile, term)
 594      cpp_reader *pfile;
 595      int term;
 596 {
 597   cpp_error (pfile, "missing terminating %c character", term);
 598
 599   if (term == '\"' && pfile->mlstring_pos.line
 600       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 601     {
 602       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 603                            pfile->mlstring_pos.col,
 604                            "possible start of unterminated string literal");
 605       pfile->mlstring_pos.line = 0;
 606     }
 607 }
 608
 609 /* Subroutine of parse_string.  */
 610 static int
 611 unescaped_terminator_p (pfile, dest)
 612      cpp_reader *pfile;
 613      const unsigned char *dest;
 614 {
 615   const unsigned char *start, *temp;
 616
 617   /* In #include-style directives, terminators are not escapeable.  */
 618   if (pfile->state.angled_headers)
 619     return 1;
 620
 621   start = POOL_FRONT (&pfile->ident_pool);
 622
 623   /* An odd number of consecutive backslashes represents an escaped
 624      terminator.  */
 625   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 626     ;
 627
 628   return ((dest - temp) & 1) == 0;
 629 }
 630
 631 /* Parses a string, character constant, or angle-bracketed header file
 632    name.  Handles embedded trigraphs and escaped newlines.  The stored
 633    string is guaranteed NUL-terminated, but it is not guaranteed that
 634    this is the first NUL since embedded NULs are preserved.
 635
 636    Multi-line strings are allowed, but they are deprecated.  */
 637 static void
 638 parse_string (pfile, token, terminator)
 639      cpp_reader *pfile;
 640      cpp_token *token;
 641      cppchar_t terminator;
 642 {
 643   cpp_buffer *buffer = pfile->buffer;
 644   cpp_pool *pool = &pfile->ident_pool;
 645   unsigned char *dest, *limit;
 646   cppchar_t c;
 647   unsigned int nulls = 0;
 648
 649   dest = POOL_FRONT (pool);
 650   limit = POOL_LIMIT (pool);
 651
 652   for (;;)
 653     {
 654       if (buffer->cur == buffer->rlimit)
 655         c = EOF;
 656       else
 657         c = *buffer->cur++;
 658
 659     have_char:
 660       /* We need space for the terminating NUL.  */
 661       if (dest >= limit)
 662         limit = _cpp_next_chunk (pool, 0, &dest);
 663
 664       if (c == EOF)
 665         {
 666           unterminated (pfile, terminator);
 667           break;
 668         }
 669
 670       /* Handle trigraphs, escaped newlines etc.  */
 671       if (c == '?' || c == '\\')
 672         c = skip_escaped_newlines (buffer, c);
 673
 674       if (c == terminator && unescaped_terminator_p (pfile, dest))
 675         {
 676           c = EOF;
 677           break;
 678         }
 679       else if (is_vspace (c))
 680         {
 681           /* In assembly language, silently terminate string and
 682              character literals at end of line.  This is a kludge
 683              around not knowing where comments are.  */
 684           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 685             break;
 686
 687           /* Character constants and header names may not extend over
 688              multiple lines.  In Standard C, neither may strings.
 689              Unfortunately, we accept multiline strings as an
 690              extension, except in #include family directives.  */
 691           if (terminator != '"' || pfile->state.angled_headers)
 692             {
 693               unterminated (pfile, terminator);
 694               break;
 695             }
 696
 697           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 698           if (pfile->mlstring_pos.line == 0)
 699             pfile->mlstring_pos = pfile->lexer_pos;
 700
 701           c = handle_newline (buffer, c);
 702           *dest++ = '\n';
 703           goto have_char;
 704         }
 705       else if (c == '\0')
 706         {
 707           if (nulls++ == 0)
 708             cpp_warning (pfile, "null character(s) preserved in literal");
 709         }
 710
 711       *dest++ = c;
 712     }
 713
 714   /* Remember the next character.  */
 715   buffer->read_ahead = c;
 716   *dest = '\0';
 717
 718   token->val.str.text = POOL_FRONT (pool);
 719   token->val.str.len = dest - token->val.str.text;
 720   POOL_COMMIT (pool, token->val.str.len + 1);
 721 }
 722
 723 /* The stored comment includes the comment start and any terminator.  */
 724 static void
 725 save_comment (pfile, token, from)
 726      cpp_reader *pfile;
 727      cpp_token *token;
 728      const unsigned char *from;
 729 {
 730   unsigned char *buffer;
 731   unsigned int len;
 732
 733   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 734   /* C++ comments probably (not definitely) have moved past a new
 735      line, which we don't want to save in the comment.  */
 736   if (pfile->buffer->read_ahead != EOF)
 737     len--;
 738   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 739
 740   token->type = CPP_COMMENT;
 741   token->val.str.len = len;
 742   token->val.str.text = buffer;
 743
 744   buffer[0] = '/';
 745   memcpy (buffer + 1, from, len - 1);
 746 }
 747
 748 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 749    want to avoid stepping back when lexing %:%X.  */
 750 static void
 751 lex_percent (buffer, result)
 752      cpp_buffer *buffer;
 753      cpp_token *result;
 754 {
 755   cppchar_t c;
 756
 757   result->type = CPP_MOD;
 758   /* Parsing %:%X could leave an extra character.  */
 759   if (buffer->extra_char == EOF)
 760     c = get_effective_char (buffer);
 761   else
 762     {
 763       c = buffer->read_ahead = buffer->extra_char;
 764       buffer->extra_char = EOF;
 765     }
 766
 767   if (c == '=')
 768     ACCEPT_CHAR (CPP_MOD_EQ);
 769   else if (CPP_OPTION (buffer->pfile, digraphs))
 770     {
 771       if (c == ':')
 772         {
 773           result->flags |= DIGRAPH;
 774           ACCEPT_CHAR (CPP_HASH);
 775           if (get_effective_char (buffer) == '%')
 776             {
 777               buffer->extra_char = get_effective_char (buffer);
 778               if (buffer->extra_char == ':')
 779                 {
 780                   buffer->extra_char = EOF;
 781                   ACCEPT_CHAR (CPP_PASTE);
 782                 }
 783               else
 784                 /* We'll catch the extra_char when we're called back.  */
 785                 buffer->read_ahead = '%';
 786             }
 787         }
 788       else if (c == '>')
 789         {
 790           result->flags |= DIGRAPH;
 791           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 792         }
 793     }
 794 }
 795
 796 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 797    want to avoid stepping back when lexing '...' or '.123'.  In the
 798    latter case we should also set a flag for parse_number.  */
 799 static void
 800 lex_dot (pfile, result)
 801      cpp_reader *pfile;
 802      cpp_token *result;
 803 {
 804   cpp_buffer *buffer = pfile->buffer;
 805   cppchar_t c;
 806
 807   /* Parsing ..X could leave an extra character.  */
 808   if (buffer->extra_char == EOF)
 809     c = get_effective_char (buffer);
 810   else
 811     {
 812       c = buffer->read_ahead = buffer->extra_char;
 813       buffer->extra_char = EOF;
 814     }
 815
 816   /* All known character sets have 0...9 contiguous.  */
 817   if (c >= '0' && c <= '9')
 818     {
 819       result->type = CPP_NUMBER;
 820       parse_number (pfile, &result->val.str, c, 1);
 821     }
 822   else
 823     {
 824       result->type = CPP_DOT;
 825       if (c == '.')
 826         {
 827           buffer->extra_char = get_effective_char (buffer);
 828           if (buffer->extra_char == '.')
 829             {
 830               buffer->extra_char = EOF;
 831               ACCEPT_CHAR (CPP_ELLIPSIS);
 832             }
 833           else
 834             /* We'll catch the extra_char when we're called back.  */
 835             buffer->read_ahead = '.';
 836         }
 837       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 838         ACCEPT_CHAR (CPP_DOT_STAR);
 839     }
 840 }
 841
 842 void
 843 _cpp_lex_token (pfile, result)
 844      cpp_reader *pfile;
 845      cpp_token *result;
 846 {
 847   cppchar_t c;
 848   cpp_buffer *buffer;
 849   const unsigned char *comment_start;
 850   unsigned char bol;
 851
 852  skip:
 853   bol = pfile->state.next_bol;
 854  done_directive:
 855   buffer = pfile->buffer;
 856   pfile->state.next_bol = 0;
 857   result->flags = buffer->saved_flags;
 858   buffer->saved_flags = 0;
 859  next_char:
 860   pfile->lexer_pos.line = buffer->lineno;
 861  next_char2:
 862   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 863
 864   c = buffer->read_ahead;
 865   if (c == EOF && buffer->cur < buffer->rlimit)
 866     {
 867       c = *buffer->cur++;
 868       pfile->lexer_pos.col++;
 869     }
 870
 871  do_switch:
 872   buffer->read_ahead = EOF;
 873   switch (c)
 874     {
 875     case EOF:
 876       /* Non-empty files should end in a newline.  Ignore for command
 877          line and _Pragma buffers.  */
 878       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 879         cpp_pedwarn (pfile, "no newline at end of file");
 880       pfile->state.next_bol = 1;
 881       pfile->skipping = 0;      /* In case missing #endif.  */
 882       result->type = CPP_EOF;
 883       /* Don't do MI optimisation.  */
 884       return;
 885
 886     case ' ': case '\t': case '\f': case '\v': case '\0':
 887       skip_whitespace (pfile, c);
 888       result->flags |= PREV_WHITE;
 889       goto next_char2;
 890
 891     case '\n': case '\r':
 892       if (!pfile->state.in_directive)
 893         {
 894           handle_newline (buffer, c);
 895           bol = 1;
 896           pfile->lexer_pos.output_line = buffer->lineno;
 897           /* This is a new line, so clear any white space flag.
 898              Newlines in arguments are white space (6.10.3.10);
 899              parse_arg takes care of that.  */
 900           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 901           goto next_char;
 902         }
 903
 904       /* Don't let directives spill over to the next line.  */
 905       buffer->read_ahead = c;
 906       pfile->state.next_bol = 1;
 907       result->type = CPP_EOF;
 908       /* Don't break; pfile->skipping might be true.  */
 909       return;
 910
 911     case '?':
 912     case '\\':
 913       /* These could start an escaped newline, or '?' a trigraph.  Let
 914          skip_escaped_newlines do all the work.  */
 915       {
 916         unsigned int lineno = buffer->lineno;
 917
 918         c = skip_escaped_newlines (buffer, c);
 919         if (lineno != buffer->lineno)
 920           /* We had at least one escaped newline of some sort, and the
 921              next character is in buffer->read_ahead.  Update the
 922              token's line and column.  */
 923             goto next_char;
 924
 925         /* We are either the original '?' or '\\', or a trigraph.  */
 926         result->type = CPP_QUERY;
 927         buffer->read_ahead = EOF;
 928         if (c == '\\')
 929           goto random_char;
 930         else if (c != '?')
 931           goto do_switch;
 932       }
 933       break;
 934
 935     case '0': case '1': case '2': case '3': case '4':
 936     case '5': case '6': case '7': case '8': case '9':
 937       result->type = CPP_NUMBER;
 938       parse_number (pfile, &result->val.str, c, 0);
 939       break;
 940
 941     case '$':
 942       if (!CPP_OPTION (pfile, dollars_in_ident))
 943         goto random_char;
 944       /* Fall through... */
 945
 946     case '_':
 947     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 948     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 949     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 950     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 951     case 'y': case 'z':
 952     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 953     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 954     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 955     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 956     case 'Y': case 'Z':
 957       result->type = CPP_NAME;
 958       result->val.node = parse_identifier (pfile, c);
 959
 960       /* 'L' may introduce wide characters or strings.  */
 961       if (result->val.node == pfile->spec_nodes.n_L)
 962         {
 963           c = buffer->read_ahead; /* For make_string.  */
 964           if (c == '\'' || c == '"')
 965             {
 966               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 967               goto make_string;
 968             }
 969         }
 970       /* Convert named operators to their proper types.  */
 971       else if (result->val.node->flags & NODE_OPERATOR)
 972         {
 973           result->flags |= NAMED_OP;
 974           result->type = result->val.node->value.operator;
 975         }
 976       break;
 977
 978     case '\'':
 979     case '"':
 980       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 981     make_string:
 982       parse_string (pfile, result, c);
 983       break;
 984
 985     case '/':
 986       /* A potential block or line comment.  */
 987       comment_start = buffer->cur;
 988       result->type = CPP_DIV;
 989       c = get_effective_char (buffer);
 990       if (c == '=')
 991         ACCEPT_CHAR (CPP_DIV_EQ);
 992       if (c != '/' && c != '*')
 993         break;
 994
 995       if (c == '*')
 996         {
 997           if (skip_block_comment (pfile))
 998             cpp_error_with_line (pfile, pfile->lexer_pos.line,
 999                                  pfile->lexer_pos.col,
1000                                  "unterminated comment");
1001         }
1002       else
1003         {
1004           if (!CPP_OPTION (pfile, cplusplus_comments)
1005               && !CPP_IN_SYSTEM_HEADER (pfile))
1006             break;
1007
1008           /* Warn about comments only if pedantically GNUC89, and not
1009              in system headers.  */
1010           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1011               && ! buffer->warned_cplusplus_comments)
1012             {
1013               cpp_pedwarn (pfile,
1014                            "C++ style comments are not allowed in ISO C89");
1015               cpp_pedwarn (pfile,
1016                            "(this will be reported only once per input file)");
1017               buffer->warned_cplusplus_comments = 1;
1018             }
1019
1020           /* Skip_line_comment updates buffer->read_ahead.  */
1021           if (skip_line_comment (pfile))
1022             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1023                                    pfile->lexer_pos.col,
1024                                    "multi-line comment");
1025         }
1026
1027       /* Skipping the comment has updated buffer->read_ahead.  */
1028       if (!pfile->state.save_comments)
1029         {
1030           result->flags |= PREV_WHITE;
1031           goto next_char;
1032         }
1033
1034       /* Save the comment as a token in its own right.  */
1035       save_comment (pfile, result, comment_start);
1036       /* Don't do MI optimisation.  */
1037       return;
1038
1039     case '<':
1040       if (pfile->state.angled_headers)
1041         {
1042           result->type = CPP_HEADER_NAME;
1043           c = '>';              /* terminator.  */
1044           goto make_string;
1045         }
1046
1047       result->type = CPP_LESS;
1048       c = get_effective_char (buffer);
1049       if (c == '=')
1050         ACCEPT_CHAR (CPP_LESS_EQ);
1051       else if (c == '<')
1052         {
1053           ACCEPT_CHAR (CPP_LSHIFT);
1054           if (get_effective_char (buffer) == '=')
1055             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1056         }
1057       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1058         {
1059           ACCEPT_CHAR (CPP_MIN);
1060           if (get_effective_char (buffer) == '=')
1061             ACCEPT_CHAR (CPP_MIN_EQ);
1062         }
1063       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1064         {
1065           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1066           result->flags |= DIGRAPH;
1067         }
1068       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1069         {
1070           ACCEPT_CHAR (CPP_OPEN_BRACE);
1071           result->flags |= DIGRAPH;
1072         }
1073       break;
1074
1075     case '>':
1076       result->type = CPP_GREATER;
1077       c = get_effective_char (buffer);
1078       if (c == '=')
1079         ACCEPT_CHAR (CPP_GREATER_EQ);
1080       else if (c == '>')
1081         {
1082           ACCEPT_CHAR (CPP_RSHIFT);
1083           if (get_effective_char (buffer) == '=')
1084             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1085         }
1086       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1087         {
1088           ACCEPT_CHAR (CPP_MAX);
1089           if (get_effective_char (buffer) == '=')
1090             ACCEPT_CHAR (CPP_MAX_EQ);
1091         }
1092       break;
1093
1094     case '%':
1095       lex_percent (buffer, result);
1096       if (result->type == CPP_HASH)
1097         goto do_hash;
1098       break;
1099
1100     case '.':
1101       lex_dot (pfile, result);
1102       break;
1103
1104     case '+':
1105       result->type = CPP_PLUS;
1106       c = get_effective_char (buffer);
1107       if (c == '=')
1108         ACCEPT_CHAR (CPP_PLUS_EQ);
1109       else if (c == '+')
1110         ACCEPT_CHAR (CPP_PLUS_PLUS);
1111       break;
1112
1113     case '-':
1114       result->type = CPP_MINUS;
1115       c = get_effective_char (buffer);
1116       if (c == '>')
1117         {
1118           ACCEPT_CHAR (CPP_DEREF);
1119           if (CPP_OPTION (pfile, cplusplus)
1120               && get_effective_char (buffer) == '*')
1121             ACCEPT_CHAR (CPP_DEREF_STAR);
1122         }
1123       else if (c == '=')
1124         ACCEPT_CHAR (CPP_MINUS_EQ);
1125       else if (c == '-')
1126         ACCEPT_CHAR (CPP_MINUS_MINUS);
1127       break;
1128
1129     case '*':
1130       result->type = CPP_MULT;
1131       if (get_effective_char (buffer) == '=')
1132         ACCEPT_CHAR (CPP_MULT_EQ);
1133       break;
1134
1135     case '=':
1136       result->type = CPP_EQ;
1137       if (get_effective_char (buffer) == '=')
1138         ACCEPT_CHAR (CPP_EQ_EQ);
1139       break;
1140
1141     case '!':
1142       result->type = CPP_NOT;
1143       if (get_effective_char (buffer) == '=')
1144         ACCEPT_CHAR (CPP_NOT_EQ);
1145       break;
1146
1147     case '&':
1148       result->type = CPP_AND;
1149       c = get_effective_char (buffer);
1150       if (c == '=')
1151         ACCEPT_CHAR (CPP_AND_EQ);
1152       else if (c == '&')
1153         ACCEPT_CHAR (CPP_AND_AND);
1154       break;
1155
1156     case '#':
1157       c = buffer->extra_char;   /* Can be set by error condition below.  */
1158       if (c != EOF)
1159         {
1160           buffer->read_ahead = c;
1161           buffer->extra_char = EOF;
1162         }
1163       else
1164         c = get_effective_char (buffer);
1165
1166       if (c == '#')
1167         {
1168           ACCEPT_CHAR (CPP_PASTE);
1169           break;
1170         }
1171
1172       result->type = CPP_HASH;
1173     do_hash:
1174       if (!bol)
1175         break;
1176       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1177          tokens within the list of arguments that would otherwise act
1178          as preprocessing directives, the behavior is undefined.
1179
1180          This implementation will report a hard error, terminate the
1181          macro invocation, and proceed to process the directive.  */
1182       if (pfile->state.parsing_args)
1183         {
1184           if (pfile->state.parsing_args == 2)
1185             cpp_error (pfile,
1186                        "directives may not be used inside a macro argument");
1187
1188           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1189           buffer->extra_char = buffer->read_ahead;
1190           buffer->read_ahead = '#';
1191           pfile->state.next_bol = 1;
1192           result->type = CPP_EOF;
1193
1194           /* Get whitespace right - newline_in_args sets it.  */
1195           if (pfile->lexer_pos.col == 1)
1196             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1197         }
1198       else
1199         {
1200           /* This is the hash introducing a directive.  */
1201           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1202             goto done_directive; /* bol still 1.  */
1203           /* This is in fact an assembler #.  */
1204         }
1205       break;
1206
1207     case '|':
1208       result->type = CPP_OR;
1209       c = get_effective_char (buffer);
1210       if (c == '=')
1211         ACCEPT_CHAR (CPP_OR_EQ);
1212       else if (c == '|')
1213         ACCEPT_CHAR (CPP_OR_OR);
1214       break;
1215
1216     case '^':
1217       result->type = CPP_XOR;
1218       if (get_effective_char (buffer) == '=')
1219         ACCEPT_CHAR (CPP_XOR_EQ);
1220       break;
1221
1222     case ':':
1223       result->type = CPP_COLON;
1224       c = get_effective_char (buffer);
1225       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1226         ACCEPT_CHAR (CPP_SCOPE);
1227       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1228         {
1229           result->flags |= DIGRAPH;
1230           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1231         }
1232       break;
1233
1234     case '~': result->type = CPP_COMPL; break;
1235     case ',': result->type = CPP_COMMA; break;
1236     case '(': result->type = CPP_OPEN_PAREN; break;
1237     case ')': result->type = CPP_CLOSE_PAREN; break;
1238     case '[': result->type = CPP_OPEN_SQUARE; break;
1239     case ']': result->type = CPP_CLOSE_SQUARE; break;
1240     case '{': result->type = CPP_OPEN_BRACE; break;
1241     case '}': result->type = CPP_CLOSE_BRACE; break;
1242     case ';': result->type = CPP_SEMICOLON; break;
1243
1244       /* @ is a punctuator in Objective C.  */
1245     case '@': result->type = CPP_ATSIGN; break;
1246
1247     random_char:
1248     default:
1249       result->type = CPP_OTHER;
1250       result->val.c = c;
1251       break;
1252     }
1253
1254   if (pfile->skipping)
1255     goto skip;
1256
1257   /* If not in a directive, this token invalidates controlling macros.  */
1258   if (!pfile->state.in_directive)
1259     pfile->mi_state = MI_FAILED;
1260 }
1261
1262 /* An upper bound on the number of bytes needed to spell a token,
1263    including preceding whitespace.  */
1264 unsigned int
1265 cpp_token_len (token)
1266      const cpp_token *token;
1267 {
1268   unsigned int len;
1269
1270   switch (TOKEN_SPELL (token))
1271     {
1272     default:            len = 0;                        break;
1273     case SPELL_STRING:  len = token->val.str.len;       break;
1274     case SPELL_IDENT:   len = token->val.node->length;  break;
1275     }
1276   /* 1 for whitespace, 4 for comment delimeters.  */
1277   return len + 5;
1278 }
1279
1280 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1281    already contain the enough space to hold the token's spelling.
1282    Returns a pointer to the character after the last character
1283    written.  */
1284 unsigned char *
1285 cpp_spell_token (pfile, token, buffer)
1286      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1287      const cpp_token *token;
1288      unsigned char *buffer;
1289 {
1290   switch (TOKEN_SPELL (token))
1291     {
1292     case SPELL_OPERATOR:
1293       {
1294         const unsigned char *spelling;
1295         unsigned char c;
1296
1297         if (token->flags & DIGRAPH)
1298           spelling
1299             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1300         else if (token->flags & NAMED_OP)
1301           goto spell_ident;
1302         else
1303           spelling = TOKEN_NAME (token);
1304
1305         while ((c = *spelling++) != '\0')
1306           *buffer++ = c;
1307       }
1308       break;
1309
1310     case SPELL_IDENT:
1311       spell_ident:
1312       memcpy (buffer, token->val.node->name, token->val.node->length);
1313       buffer += token->val.node->length;
1314       break;
1315
1316     case SPELL_STRING:
1317       {
1318         int left, right, tag;
1319         switch (token->type)
1320           {
1321           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1322           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1323           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1324           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1325           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1326           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1327           }
1328         if (tag) *buffer++ = tag;
1329         if (left) *buffer++ = left;
1330         memcpy (buffer, token->val.str.text, token->val.str.len);
1331         buffer += token->val.str.len;
1332         if (right) *buffer++ = right;
1333       }
1334       break;
1335
1336     case SPELL_CHAR:
1337       *buffer++ = token->val.c;
1338       break;
1339
1340     case SPELL_NONE:
1341       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1342       break;
1343     }
1344
1345   return buffer;
1346 }
1347
1348 /* Returns a token as a null-terminated string.  The string is
1349    temporary, and automatically freed later.  Useful for diagnostics.  */
1350 unsigned char *
1351 cpp_token_as_text (pfile, token)
1352      cpp_reader *pfile;
1353      const cpp_token *token;
1354 {
1355   unsigned int len = cpp_token_len (token);
1356   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1357
1358   end = cpp_spell_token (pfile, token, start);
1359   end[0] = '\0';
1360
1361   return start;
1362 }
1363
1364 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1365 const char *
1366 cpp_type2name (type)
1367      enum cpp_ttype type;
1368 {
1369   return (const char *) token_spellings[type].name;
1370 }
1371
1372 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1373    for efficiency - to avoid double-buffering.  Also, outputs a space
1374    if PREV_WHITE is flagged.  */
1375 void
1376 cpp_output_token (token, fp)
1377      const cpp_token *token;
1378      FILE *fp;
1379 {
1380   if (token->flags & PREV_WHITE)
1381     putc (' ', fp);
1382
1383   switch (TOKEN_SPELL (token))
1384     {
1385     case SPELL_OPERATOR:
1386       {
1387         const unsigned char *spelling;
1388
1389         if (token->flags & DIGRAPH)
1390           spelling
1391             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1392         else if (token->flags & NAMED_OP)
1393           goto spell_ident;
1394         else
1395           spelling = TOKEN_NAME (token);
1396
1397         ufputs (spelling, fp);
1398       }
1399       break;
1400
1401     spell_ident:
1402     case SPELL_IDENT:
1403       ufputs (token->val.node->name, fp);
1404     break;
1405
1406     case SPELL_STRING:
1407       {
1408         int left, right, tag;
1409         switch (token->type)
1410           {
1411           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1412           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1413           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1414           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1415           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1416           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1417           }
1418         if (tag) putc (tag, fp);
1419         if (left) putc (left, fp);
1420         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1421         if (right) putc (right, fp);
1422       }
1423       break;
1424
1425     case SPELL_CHAR:
1426       putc (token->val.c, fp);
1427       break;
1428
1429     case SPELL_NONE:
1430       /* An error, most probably.  */
1431       break;
1432     }
1433 }
1434
1435 /* Compare two tokens.  */
1436 int
1437 _cpp_equiv_tokens (a, b)
1438      const cpp_token *a, *b;
1439 {
1440   if (a->type == b->type && a->flags == b->flags)
1441     switch (TOKEN_SPELL (a))
1442       {
1443       default:                  /* Keep compiler happy.  */
1444       case SPELL_OPERATOR:
1445         return 1;
1446       case SPELL_CHAR:
1447         return a->val.c == b->val.c; /* Character.  */
1448       case SPELL_NONE:
1449         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1450       case SPELL_IDENT:
1451         return a->val.node == b->val.node;
1452       case SPELL_STRING:
1453         return (a->val.str.len == b->val.str.len
1454                 && !memcmp (a->val.str.text, b->val.str.text,
1455                             a->val.str.len));
1456       }
1457
1458   return 0;
1459 }
1460
1461 /* Determine whether two tokens can be pasted together, and if so,
1462    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1463    be pasted, or the appropriate type for the merged token if they
1464    can.  */
1465 enum cpp_ttype
1466 cpp_can_paste (pfile, token1, token2, digraph)
1467      cpp_reader * pfile;
1468      const cpp_token *token1, *token2;
1469      int* digraph;
1470 {
1471   enum cpp_ttype a = token1->type, b = token2->type;
1472   int cxx = CPP_OPTION (pfile, cplusplus);
1473
1474   /* Treat named operators as if they were ordinary NAMEs.  */
1475   if (token1->flags & NAMED_OP)
1476     a = CPP_NAME;
1477   if (token2->flags & NAMED_OP)
1478     b = CPP_NAME;
1479
1480   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1481     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1482
1483   switch (a)
1484     {
1485     case CPP_GREATER:
1486       if (b == a) return CPP_RSHIFT;
1487       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1488       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1489       break;
1490     case CPP_LESS:
1491       if (b == a) return CPP_LSHIFT;
1492       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1493       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1494       if (CPP_OPTION (pfile, digraphs))
1495         {
1496           if (b == CPP_COLON)
1497             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1498           if (b == CPP_MOD)
1499             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1500         }
1501       break;
1502
1503     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1504     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1505     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1506
1507     case CPP_MINUS:
1508       if (b == a)               return CPP_MINUS_MINUS;
1509       if (b == CPP_GREATER)     return CPP_DEREF;
1510       break;
1511     case CPP_COLON:
1512       if (b == a && cxx)        return CPP_SCOPE;
1513       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1514         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1515       break;
1516
1517     case CPP_MOD:
1518       if (CPP_OPTION (pfile, digraphs))
1519         {
1520           if (b == CPP_GREATER)
1521             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1522           if (b == CPP_COLON)
1523             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1524         }
1525       break;
1526     case CPP_DEREF:
1527       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1528       break;
1529     case CPP_DOT:
1530       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1531       if (b == CPP_NUMBER)      return CPP_NUMBER;
1532       break;
1533
1534     case CPP_HASH:
1535       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1536         /* %:%: digraph */
1537         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1538       break;
1539
1540     case CPP_NAME:
1541       if (b == CPP_NAME)        return CPP_NAME;
1542       if (b == CPP_NUMBER
1543           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1544       if (b == CPP_CHAR
1545           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1546       if (b == CPP_STRING
1547           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1548       break;
1549
1550     case CPP_NUMBER:
1551       if (b == CPP_NUMBER)      return CPP_NUMBER;
1552       if (b == CPP_NAME)        return CPP_NUMBER;
1553       if (b == CPP_DOT)         return CPP_NUMBER;
1554       /* Numbers cannot have length zero, so this is safe.  */
1555       if ((b == CPP_PLUS || b == CPP_MINUS)
1556           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1557         return CPP_NUMBER;
1558       break;
1559
1560     default:
1561       break;
1562     }
1563
1564   return CPP_EOF;
1565 }
1566
1567 /* Returns nonzero if a space should be inserted to avoid an
1568    accidental token paste for output.  For simplicity, it is
1569    conservative, and occasionally advises a space where one is not
1570    needed, e.g. "." and ".2".  */
1571
1572 int
1573 cpp_avoid_paste (pfile, token1, token2)
1574      cpp_reader *pfile;
1575      const cpp_token *token1, *token2;
1576 {
1577   enum cpp_ttype a = token1->type, b = token2->type;
1578   cppchar_t c;
1579
1580   if (token1->flags & NAMED_OP)
1581     a = CPP_NAME;
1582   if (token2->flags & NAMED_OP)
1583     b = CPP_NAME;
1584
1585   c = EOF;
1586   if (token2->flags & DIGRAPH)
1587     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1588   else if (token_spellings[b].category == SPELL_OPERATOR)
1589     c = token_spellings[b].name[0];
1590
1591   /* Quickly get everything that can paste with an '='.  */
1592   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1593     return 1;
1594
1595   switch (a)
1596     {
1597     case CPP_GREATER:   return c == '>' || c == '?';
1598     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1599     case CPP_PLUS:      return c == '+';
1600     case CPP_MINUS:     return c == '-' || c == '>';
1601     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1602     case CPP_MOD:       return c == ':' || c == '>';
1603     case CPP_AND:       return c == '&';
1604     case CPP_OR:        return c == '|';
1605     case CPP_COLON:     return c == ':' || c == '>';
1606     case CPP_DEREF:     return c == '*';
1607     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1608     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1609     case CPP_NAME:      return ((b == CPP_NUMBER
1610                                  && name_p (pfile, &token2->val.str))
1611                                 || b == CPP_NAME
1612                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1613     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1614                                 || c == '.' || c == '+' || c == '-');
1615     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1616                                 && token1->val.c == '@'
1617                                 && (b == CPP_NAME || b == CPP_STRING));
1618     default:            break;
1619     }
1620
1621   return 0;
1622 }
1623
1624 /* Output all the remaining tokens on the current line, and a newline
1625    character, to FP.  Leading whitespace is removed.  */
1626 void
1627 cpp_output_line (pfile, fp)
1628      cpp_reader *pfile;
1629      FILE *fp;
1630 {
1631   cpp_token token;
1632
1633   cpp_get_token (pfile, &token);
1634   token.flags &= ~PREV_WHITE;
1635   while (token.type != CPP_EOF)
1636     {
1637       cpp_output_token (&token, fp);
1638       cpp_get_token (pfile, &token);
1639     }
1640
1641   putc ('\n', fp);
1642 }
1643
1644 /* Memory pools.  */
1645
1646 struct dummy
1647 {
1648   char c;
1649   union
1650   {
1651     double d;
1652     int *p;
1653   } u;
1654 };
1655
1656 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1657
1658 static int
1659 chunk_suitable (pool, chunk, size)
1660      cpp_pool *pool;
1661      cpp_chunk *chunk;
1662      unsigned int size;
1663 {
1664   /* Being at least twice SIZE means we can use memcpy in
1665      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1666      anyway.  */
1667   return (chunk && pool->locked != chunk
1668           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1669 }
1670
1671 /* Returns the end of the new pool.  PTR points to a char in the old
1672    pool, and is updated to point to the same char in the new pool.  */
1673 unsigned char *
1674 _cpp_next_chunk (pool, len, ptr)
1675      cpp_pool *pool;
1676      unsigned int len;
1677      unsigned char **ptr;
1678 {
1679   cpp_chunk *chunk = pool->cur->next;
1680
1681   /* LEN is the minimum size we want in the new pool.  */
1682   len += POOL_ROOM (pool);
1683   if (! chunk_suitable (pool, chunk, len))
1684     {
1685       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1686
1687       chunk->next = pool->cur->next;
1688       pool->cur->next = chunk;
1689     }
1690
1691   /* Update the pointer before changing chunk's front.  */
1692   if (ptr)
1693     *ptr += chunk->base - POOL_FRONT (pool);
1694
1695   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1696   chunk->front = chunk->base;
1697
1698   pool->cur = chunk;
1699   return POOL_LIMIT (pool);
1700 }
1701
1702 static cpp_chunk *
1703 new_chunk (size)
1704      unsigned int size;
1705 {
1706   unsigned char *base;
1707   cpp_chunk *result;
1708
1709   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1710   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1711   /* Put the chunk descriptor at the end.  Then chunk overruns will
1712      cause obvious chaos.  */
1713   result = (cpp_chunk *) (base + size);
1714   result->base = base;
1715   result->front = base;
1716   result->limit = base + size;
1717   result->next = 0;
1718
1719   return result;
1720 }
1721
1722 void
1723 _cpp_init_pool (pool, size, align, temp)
1724      cpp_pool *pool;
1725      unsigned int size, align, temp;
1726 {
1727   if (align == 0)
1728     align = DEFAULT_ALIGNMENT;
1729   if (align & (align - 1))
1730     abort ();
1731   pool->align = align;
1732   pool->cur = new_chunk (size);
1733   pool->locked = 0;
1734   pool->locks = 0;
1735   if (temp)
1736     pool->cur->next = pool->cur;
1737 }
1738
1739 void
1740 _cpp_lock_pool (pool)
1741      cpp_pool *pool;
1742 {
1743   if (pool->locks++ == 0)
1744     pool->locked = pool->cur;
1745 }
1746
1747 void
1748 _cpp_unlock_pool (pool)
1749      cpp_pool *pool;
1750 {
1751   if (--pool->locks == 0)
1752     pool->locked = 0;
1753 }
1754
1755 void
1756 _cpp_free_pool (pool)
1757      cpp_pool *pool;
1758 {
1759   cpp_chunk *chunk = pool->cur, *next;
1760
1761   do
1762     {
1763       next = chunk->next;
1764       free (chunk->base);
1765       chunk = next;
1766     }
1767   while (chunk && chunk != pool->cur);
1768 }
1769
1770 /* Reserve LEN bytes from a memory pool.  */
1771 unsigned char *
1772 _cpp_pool_reserve (pool, len)
1773      cpp_pool *pool;
1774      unsigned int len;
1775 {
1776   len = POOL_ALIGN (len, pool->align);
1777   if (len > (unsigned int) POOL_ROOM (pool))
1778     _cpp_next_chunk (pool, len, 0);
1779
1780   return POOL_FRONT (pool);
1781 }
1782
1783 /* Allocate LEN bytes from a memory pool.  */
1784 unsigned char *
1785 _cpp_pool_alloc (pool, len)
1786      cpp_pool *pool;
1787      unsigned int len;
1788 {
1789   unsigned char *result = _cpp_pool_reserve (pool, len);
1790
1791   POOL_COMMIT (pool, len);
1792   return result;
1793 }