gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else if (buffer->cur != buffer->last_Wtrigraphs)
 163         {
 164           buffer->last_Wtrigraphs = buffer->cur;
 165           cpp_warning_with_line (pfile, buffer->lineno,
 166                                  CPP_BUF_COL (buffer) - 2,
 167                                  "trigraph ??%c ignored", (int) from_char);
 168         }
 169     }
 170
 171   return accept;
 172 }
 173
 174 /* Assumes local variables buffer and result.  */
 175 #define ACCEPT_CHAR(t) \
 176   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 177
 178 /* When we move to multibyte character sets, add to these something
 179    that saves and restores the state of the multibyte conversion
 180    library.  This probably involves saving and restoring a "cookie".
 181    In the case of glibc it is an 8-byte structure, so is not a high
 182    overhead operation.  In any case, it's out of the fast path.  */
 183 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 184 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 185
 186 /* Skips any escaped newlines introduced by NEXT, which is either a
 187    '?' or a '\\'.  Returns the next character, which will also have
 188    been placed in buffer->read_ahead.  This routine performs
 189    preprocessing stages 1 and 2 of the ISO C standard.  */
 190 static cppchar_t
 191 skip_escaped_newlines (buffer, next)
 192      cpp_buffer *buffer;
 193      cppchar_t next;
 194 {
 195   /* Only do this if we apply stages 1 and 2.  */
 196   if (!buffer->from_stage3)
 197     {
 198       cppchar_t next1;
 199       const unsigned char *saved_cur;
 200       int space;
 201
 202       do
 203         {
 204           if (buffer->cur == buffer->rlimit)
 205             break;
 206
 207           SAVE_STATE ();
 208           if (next == '?')
 209             {
 210               next1 = *buffer->cur++;
 211               if (next1 != '?' || buffer->cur == buffer->rlimit)
 212                 {
 213                   RESTORE_STATE ();
 214                   break;
 215                 }
 216
 217               next1 = *buffer->cur++;
 218               if (!_cpp_trigraph_map[next1]
 219                   || !trigraph_ok (buffer->pfile, next1))
 220                 {
 221                   RESTORE_STATE ();
 222                   break;
 223                 }
 224
 225               /* We have a full trigraph here.  */
 226               next = _cpp_trigraph_map[next1];
 227               if (next != '\\' || buffer->cur == buffer->rlimit)
 228                 break;
 229               SAVE_STATE ();
 230             }
 231
 232           /* We have a backslash, and room for at least one more character.  */
 233           space = 0;
 234           do
 235             {
 236               next1 = *buffer->cur++;
 237               if (!is_nvspace (next1))
 238                 break;
 239               space = 1;
 240             }
 241           while (buffer->cur < buffer->rlimit);
 242
 243           if (!is_vspace (next1))
 244             {
 245               RESTORE_STATE ();
 246               break;
 247             }
 248
 249           if (space && !buffer->pfile->state.lexing_comment)
 250             cpp_warning (buffer->pfile,
 251                          "backslash and newline separated by space");
 252
 253           next = handle_newline (buffer, next1);
 254           if (next == EOF)
 255             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 256         }
 257       while (next == '\\' || next == '?');
 258     }
 259
 260   buffer->read_ahead = next;
 261   return next;
 262 }
 263
 264 /* Obtain the next character, after trigraph conversion and skipping
 265    an arbitrary string of escaped newlines.  The common case of no
 266    trigraphs or escaped newlines falls through quickly.  */
 267 static cppchar_t
 268 get_effective_char (buffer)
 269      cpp_buffer *buffer;
 270 {
 271   cppchar_t next = EOF;
 272
 273   if (buffer->cur < buffer->rlimit)
 274     {
 275       next = *buffer->cur++;
 276
 277       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 278          can introduce escaped newlines, which we want to skip, or
 279          UCNs, which, depending upon lexer state, we will handle in
 280          the future.  */
 281       if (next == '?' || next == '\\')
 282         next = skip_escaped_newlines (buffer, next);
 283     }
 284
 285   buffer->read_ahead = next;
 286   return next;
 287 }
 288
 289 /* Skip a C-style block comment.  We find the end of the comment by
 290    seeing if an asterisk is before every '/' we encounter.  Returns
 291    non-zero if comment terminated by EOF, zero otherwise.  */
 292 static int
 293 skip_block_comment (pfile)
 294      cpp_reader *pfile;
 295 {
 296   cpp_buffer *buffer = pfile->buffer;
 297   cppchar_t c = EOF, prevc = EOF;
 298
 299   pfile->state.lexing_comment = 1;
 300   while (buffer->cur != buffer->rlimit)
 301     {
 302       prevc = c, c = *buffer->cur++;
 303
 304     next_char:
 305       /* FIXME: For speed, create a new character class of characters
 306          of interest inside block comments.  */
 307       if (c == '?' || c == '\\')
 308         c = skip_escaped_newlines (buffer, c);
 309
 310       /* People like decorating comments with '*', so check for '/'
 311          instead for efficiency.  */
 312       if (c == '/')
 313         {
 314           if (prevc == '*')
 315             break;
 316
 317           /* Warn about potential nested comments, but not if the '/'
 318              comes immediately before the true comment delimeter.
 319              Don't bother to get it right across escaped newlines.  */
 320           if (CPP_OPTION (pfile, warn_comments)
 321               && buffer->cur != buffer->rlimit)
 322             {
 323               prevc = c, c = *buffer->cur++;
 324               if (c == '*' && buffer->cur != buffer->rlimit)
 325                 {
 326                   prevc = c, c = *buffer->cur++;
 327                   if (c != '/')
 328                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 329                                            CPP_BUF_COL (buffer),
 330                                            "\"/*\" within comment");
 331                 }
 332               goto next_char;
 333             }
 334         }
 335       else if (is_vspace (c))
 336         {
 337           prevc = c, c = handle_newline (buffer, c);
 338           goto next_char;
 339         }
 340       else if (c == '\t')
 341         adjust_column (pfile);
 342     }
 343
 344   pfile->state.lexing_comment = 0;
 345   buffer->read_ahead = EOF;
 346   return c != '/' || prevc != '*';
 347 }
 348
 349 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 350    non-zero if a multiline comment.  The following new line, if any,
 351    is left in buffer->read_ahead.  */
 352 static int
 353 skip_line_comment (pfile)
 354      cpp_reader *pfile;
 355 {
 356   cpp_buffer *buffer = pfile->buffer;
 357   unsigned int orig_lineno = buffer->lineno;
 358   cppchar_t c;
 359
 360   pfile->state.lexing_comment = 1;
 361   do
 362     {
 363       c = EOF;
 364       if (buffer->cur == buffer->rlimit)
 365         break;
 366
 367       c = *buffer->cur++;
 368       if (c == '?' || c == '\\')
 369         c = skip_escaped_newlines (buffer, c);
 370     }
 371   while (!is_vspace (c));
 372
 373   pfile->state.lexing_comment = 0;
 374   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 375   return orig_lineno != buffer->lineno;
 376 }
 377
 378 /* pfile->buffer->cur is one beyond the \t character.  Update
 379    col_adjust so we track the column correctly.  */
 380 static void
 381 adjust_column (pfile)
 382      cpp_reader *pfile;
 383 {
 384   cpp_buffer *buffer = pfile->buffer;
 385   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 386
 387   /* Round it up to multiple of the tabstop, but subtract 1 since the
 388      tab itself occupies a character position.  */
 389   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 390                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 391 }
 392
 393 /* Skips whitespace, saving the next non-whitespace character.
 394    Adjusts pfile->col_adjust to account for tabs.  Without this,
 395    tokens might be assigned an incorrect column.  */
 396 static void
 397 skip_whitespace (pfile, c)
 398      cpp_reader *pfile;
 399      cppchar_t c;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int warned = 0;
 403
 404   do
 405     {
 406       /* Horizontal space always OK.  */
 407       if (c == ' ')
 408         ;
 409       else if (c == '\t')
 410         adjust_column (pfile);
 411       /* Just \f \v or \0 left.  */
 412       else if (c == '\0')
 413         {
 414           if (!warned)
 415             {
 416               cpp_warning (pfile, "null character(s) ignored");
 417               warned = 1;
 418             }
 419         }
 420       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 421         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 422                                CPP_BUF_COL (buffer),
 423                                "%s in preprocessing directive",
 424                                c == '\f' ? "form feed" : "vertical tab");
 425
 426       c = EOF;
 427       if (buffer->cur == buffer->rlimit)
 428         break;
 429       c = *buffer->cur++;
 430     }
 431   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 432   while (is_nvspace (c));
 433
 434   /* Remember the next character.  */
 435   buffer->read_ahead = c;
 436 }
 437
 438 /* See if the characters of a number token are valid in a name (no
 439    '.', '+' or '-').  */
 440 static int
 441 name_p (pfile, string)
 442      cpp_reader *pfile;
 443      const cpp_string *string;
 444 {
 445   unsigned int i;
 446
 447   for (i = 0; i < string->len; i++)
 448     if (!is_idchar (string->text[i]))
 449       return 0;
 450
 451   return 1;
 452 }
 453
 454 /* Parse an identifier, skipping embedded backslash-newlines.
 455    Calculate the hash value of the token while parsing, for improved
 456    performance.  The hashing algorithm *must* match cpp_lookup().  */
 457
 458 static cpp_hashnode *
 459 parse_identifier (pfile, c)
 460      cpp_reader *pfile;
 461      cppchar_t c;
 462 {
 463   cpp_hashnode *result;
 464   cpp_buffer *buffer = pfile->buffer;
 465   unsigned char *dest, *limit;
 466   unsigned int r = 0, saw_dollar = 0;
 467
 468   dest = POOL_FRONT (&pfile->ident_pool);
 469   limit = POOL_LIMIT (&pfile->ident_pool);
 470
 471   do
 472     {
 473       do
 474         {
 475           /* Need room for terminating null.  */
 476           if (dest + 1 >= limit)
 477             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 478
 479           *dest++ = c;
 480           r = HASHSTEP (r, c);
 481
 482           if (c == '$')
 483             saw_dollar++;
 484
 485           c = EOF;
 486           if (buffer->cur == buffer->rlimit)
 487             break;
 488
 489           c = *buffer->cur++;
 490         }
 491       while (is_idchar (c));
 492
 493       /* Potential escaped newline?  */
 494       if (c != '?' && c != '\\')
 495         break;
 496       c = skip_escaped_newlines (buffer, c);
 497     }
 498   while (is_idchar (c));
 499
 500   /* Remember the next character.  */
 501   buffer->read_ahead = c;
 502
 503   /* $ is not a identifier character in the standard, but is commonly
 504      accepted as an extension.  Don't warn about it in skipped
 505      conditional blocks.  */
 506   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 507     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 508
 509   /* Identifiers are null-terminated.  */
 510   *dest = '\0';
 511
 512   /* This routine commits the memory if necessary.  */
 513   result = _cpp_lookup_with_hash (pfile,
 514                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 515
 516   /* Some identifiers require diagnostics when lexed.  */
 517   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 518     {
 519       /* It is allowed to poison the same identifier twice.  */
 520       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 521         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 522
 523       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 524          replacement list of a variadic macro.  */
 525       if (result == pfile->spec_nodes.n__VA_ARGS__
 526           && !pfile->state.va_args_ok)
 527         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Parse a number, skipping embedded backslash-newlines.  */
 534 static void
 535 parse_number (pfile, number, c, leading_period)
 536      cpp_reader *pfile;
 537      cpp_string *number;
 538      cppchar_t c;
 539      int leading_period;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   cpp_pool *pool = &pfile->ident_pool;
 543   unsigned char *dest, *limit;
 544
 545   dest = POOL_FRONT (pool);
 546   limit = POOL_LIMIT (pool);
 547
 548   /* Place a leading period.  */
 549   if (leading_period)
 550     {
 551       if (dest >= limit)
 552         limit = _cpp_next_chunk (pool, 0, &dest);
 553       *dest++ = '.';
 554     }
 555
 556   do
 557     {
 558       do
 559         {
 560           /* Need room for terminating null.  */
 561           if (dest + 1 >= limit)
 562             limit = _cpp_next_chunk (pool, 0, &dest);
 563           *dest++ = c;
 564
 565           c = EOF;
 566           if (buffer->cur == buffer->rlimit)
 567             break;
 568
 569           c = *buffer->cur++;
 570         }
 571       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 572
 573       /* Potential escaped newline?  */
 574       if (c != '?' && c != '\\')
 575         break;
 576       c = skip_escaped_newlines (buffer, c);
 577     }
 578   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 579
 580   /* Remember the next character.  */
 581   buffer->read_ahead = c;
 582
 583   /* Null-terminate the number.  */
 584   *dest = '\0';
 585
 586   number->text = POOL_FRONT (pool);
 587   number->len = dest - number->text;
 588   POOL_COMMIT (pool, number->len + 1);
 589 }
 590
 591 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 592 static void
 593 unterminated (pfile, term)
 594      cpp_reader *pfile;
 595      int term;
 596 {
 597   cpp_error (pfile, "missing terminating %c character", term);
 598
 599   if (term == '\"' && pfile->mlstring_pos.line
 600       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 601     {
 602       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 603                            pfile->mlstring_pos.col,
 604                            "possible start of unterminated string literal");
 605       pfile->mlstring_pos.line = 0;
 606     }
 607 }
 608
 609 /* Subroutine of parse_string.  */
 610 static int
 611 unescaped_terminator_p (pfile, dest)
 612      cpp_reader *pfile;
 613      const unsigned char *dest;
 614 {
 615   const unsigned char *start, *temp;
 616
 617   /* In #include-style directives, terminators are not escapeable.  */
 618   if (pfile->state.angled_headers)
 619     return 1;
 620
 621   start = POOL_FRONT (&pfile->ident_pool);
 622
 623   /* An odd number of consecutive backslashes represents an escaped
 624      terminator.  */
 625   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 626     ;
 627
 628   return ((dest - temp) & 1) == 0;
 629 }
 630
 631 /* Parses a string, character constant, or angle-bracketed header file
 632    name.  Handles embedded trigraphs and escaped newlines.
 633
 634    Multi-line strings are allowed, but they are deprecated within
 635    directives.  */
 636 static void
 637 parse_string (pfile, token, terminator)
 638      cpp_reader *pfile;
 639      cpp_token *token;
 640      cppchar_t terminator;
 641 {
 642   cpp_buffer *buffer = pfile->buffer;
 643   cpp_pool *pool = &pfile->ident_pool;
 644   unsigned char *dest, *limit;
 645   cppchar_t c;
 646   unsigned int nulls = 0;
 647
 648   dest = POOL_FRONT (pool);
 649   limit = POOL_LIMIT (pool);
 650
 651   for (;;)
 652     {
 653       if (buffer->cur == buffer->rlimit)
 654         {
 655           c = EOF;
 656           unterminated (pfile, terminator);
 657           break;
 658         }
 659       c = *buffer->cur++;
 660
 661     have_char:
 662       /* Handle trigraphs, escaped newlines etc.  */
 663       if (c == '?' || c == '\\')
 664         c = skip_escaped_newlines (buffer, c);
 665
 666       if (c == terminator && unescaped_terminator_p (pfile, dest))
 667         {
 668           c = EOF;
 669           break;
 670         }
 671       else if (is_vspace (c))
 672         {
 673           /* In assembly language, silently terminate string and
 674              character literals at end of line.  This is a kludge
 675              around not knowing where comments are.  */
 676           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 677             break;
 678
 679           /* Character constants and header names may not extend over
 680              multiple lines.  In Standard C, neither may strings.
 681              Unfortunately, we accept multiline strings as an
 682              extension, except in #include family directives.  */
 683           if (terminator != '"' || pfile->state.angled_headers)
 684             {
 685               unterminated (pfile, terminator);
 686               break;
 687             }
 688
 689           if (! cpp_sys_macro_p (pfile))
 690             cpp_pedwarn (pfile, "multi-line string constants are deprecated");
 691           if (pfile->mlstring_pos.line == 0)
 692             pfile->mlstring_pos = pfile->lexer_pos;
 693
 694           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 695           c = '\n';
 696         }
 697       else if (c == '\0')
 698         {
 699           if (nulls++ == 0)
 700             cpp_warning (pfile, "null character(s) preserved in literal");
 701         }
 702
 703       /* No terminating null for strings - they could contain nulls.  */
 704       if (dest >= limit)
 705         limit = _cpp_next_chunk (pool, 0, &dest);
 706       *dest++ = c;
 707
 708       /* If we had a new line, the next character is in read_ahead.  */
 709       if (c != '\n')
 710         continue;
 711       c = buffer->read_ahead;
 712       if (c != EOF)
 713         goto have_char;
 714     }
 715
 716   /* Remember the next character.  */
 717   buffer->read_ahead = c;
 718
 719   token->val.str.text = POOL_FRONT (pool);
 720   token->val.str.len = dest - token->val.str.text;
 721   POOL_COMMIT (pool, token->val.str.len);
 722 }
 723
 724 /* The stored comment includes the comment start and any terminator.  */
 725 static void
 726 save_comment (pfile, token, from)
 727      cpp_reader *pfile;
 728      cpp_token *token;
 729      const unsigned char *from;
 730 {
 731   unsigned char *buffer;
 732   unsigned int len;
 733
 734   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 735   /* C++ comments probably (not definitely) have moved past a new
 736      line, which we don't want to save in the comment.  */
 737   if (pfile->buffer->read_ahead != EOF)
 738     len--;
 739   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 740
 741   token->type = CPP_COMMENT;
 742   token->val.str.len = len;
 743   token->val.str.text = buffer;
 744
 745   buffer[0] = '/';
 746   memcpy (buffer + 1, from, len - 1);
 747 }
 748
 749 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 750    want to avoid stepping back when lexing %:%X.  */
 751 static void
 752 lex_percent (buffer, result)
 753      cpp_buffer *buffer;
 754      cpp_token *result;
 755 {
 756   cppchar_t c;
 757
 758   result->type = CPP_MOD;
 759   /* Parsing %:%X could leave an extra character.  */
 760   if (buffer->extra_char == EOF)
 761     c = get_effective_char (buffer);
 762   else
 763     {
 764       c = buffer->read_ahead = buffer->extra_char;
 765       buffer->extra_char = EOF;
 766     }
 767
 768   if (c == '=')
 769     ACCEPT_CHAR (CPP_MOD_EQ);
 770   else if (CPP_OPTION (buffer->pfile, digraphs))
 771     {
 772       if (c == ':')
 773         {
 774           result->flags |= DIGRAPH;
 775           ACCEPT_CHAR (CPP_HASH);
 776           if (get_effective_char (buffer) == '%')
 777             {
 778               buffer->extra_char = get_effective_char (buffer);
 779               if (buffer->extra_char == ':')
 780                 {
 781                   buffer->extra_char = EOF;
 782                   ACCEPT_CHAR (CPP_PASTE);
 783                 }
 784               else
 785                 /* We'll catch the extra_char when we're called back.  */
 786                 buffer->read_ahead = '%';
 787             }
 788         }
 789       else if (c == '>')
 790         {
 791           result->flags |= DIGRAPH;
 792           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 793         }
 794     }
 795 }
 796
 797 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 798    want to avoid stepping back when lexing '...' or '.123'.  In the
 799    latter case we should also set a flag for parse_number.  */
 800 static void
 801 lex_dot (pfile, result)
 802      cpp_reader *pfile;
 803      cpp_token *result;
 804 {
 805   cpp_buffer *buffer = pfile->buffer;
 806   cppchar_t c;
 807
 808   /* Parsing ..X could leave an extra character.  */
 809   if (buffer->extra_char == EOF)
 810     c = get_effective_char (buffer);
 811   else
 812     {
 813       c = buffer->read_ahead = buffer->extra_char;
 814       buffer->extra_char = EOF;
 815     }
 816
 817   /* All known character sets have 0...9 contiguous.  */
 818   if (c >= '0' && c <= '9')
 819     {
 820       result->type = CPP_NUMBER;
 821       parse_number (pfile, &result->val.str, c, 1);
 822     }
 823   else
 824     {
 825       result->type = CPP_DOT;
 826       if (c == '.')
 827         {
 828           buffer->extra_char = get_effective_char (buffer);
 829           if (buffer->extra_char == '.')
 830             {
 831               buffer->extra_char = EOF;
 832               ACCEPT_CHAR (CPP_ELLIPSIS);
 833             }
 834           else
 835             /* We'll catch the extra_char when we're called back.  */
 836             buffer->read_ahead = '.';
 837         }
 838       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 839         ACCEPT_CHAR (CPP_DOT_STAR);
 840     }
 841 }
 842
 843 void
 844 _cpp_lex_token (pfile, result)
 845      cpp_reader *pfile;
 846      cpp_token *result;
 847 {
 848   cppchar_t c;
 849   cpp_buffer *buffer;
 850   const unsigned char *comment_start;
 851   unsigned char bol;
 852
 853  skip:
 854   bol = pfile->state.next_bol;
 855  done_directive:
 856   buffer = pfile->buffer;
 857   pfile->state.next_bol = 0;
 858   result->flags = buffer->saved_flags;
 859   buffer->saved_flags = 0;
 860  next_char:
 861   pfile->lexer_pos.line = buffer->lineno;
 862  next_char2:
 863   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 864
 865   c = buffer->read_ahead;
 866   if (c == EOF && buffer->cur < buffer->rlimit)
 867     {
 868       c = *buffer->cur++;
 869       pfile->lexer_pos.col++;
 870     }
 871
 872  do_switch:
 873   buffer->read_ahead = EOF;
 874   switch (c)
 875     {
 876     case EOF:
 877       /* Non-empty files should end in a newline.  Ignore for command
 878          line and _Pragma buffers.  */
 879       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 880         cpp_pedwarn (pfile, "no newline at end of file");
 881       pfile->state.next_bol = 1;
 882       pfile->skipping = 0;      /* In case missing #endif.  */
 883       result->type = CPP_EOF;
 884       /* Don't do MI optimisation.  */
 885       return;
 886
 887     case ' ': case '\t': case '\f': case '\v': case '\0':
 888       skip_whitespace (pfile, c);
 889       result->flags |= PREV_WHITE;
 890       goto next_char2;
 891
 892     case '\n': case '\r':
 893       if (!pfile->state.in_directive)
 894         {
 895           handle_newline (buffer, c);
 896           bol = 1;
 897           pfile->lexer_pos.output_line = buffer->lineno;
 898           /* This is a new line, so clear any white space flag.
 899              Newlines in arguments are white space (6.10.3.10);
 900              parse_arg takes care of that.  */
 901           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 902           goto next_char;
 903         }
 904
 905       /* Don't let directives spill over to the next line.  */
 906       buffer->read_ahead = c;
 907       pfile->state.next_bol = 1;
 908       result->type = CPP_EOF;
 909       /* Don't break; pfile->skipping might be true.  */
 910       return;
 911
 912     case '?':
 913     case '\\':
 914       /* These could start an escaped newline, or '?' a trigraph.  Let
 915          skip_escaped_newlines do all the work.  */
 916       {
 917         unsigned int lineno = buffer->lineno;
 918
 919         c = skip_escaped_newlines (buffer, c);
 920         if (lineno != buffer->lineno)
 921           /* We had at least one escaped newline of some sort, and the
 922              next character is in buffer->read_ahead.  Update the
 923              token's line and column.  */
 924             goto next_char;
 925
 926         /* We are either the original '?' or '\\', or a trigraph.  */
 927         result->type = CPP_QUERY;
 928         buffer->read_ahead = EOF;
 929         if (c == '\\')
 930           goto random_char;
 931         else if (c != '?')
 932           goto do_switch;
 933       }
 934       break;
 935
 936     case '0': case '1': case '2': case '3': case '4':
 937     case '5': case '6': case '7': case '8': case '9':
 938       result->type = CPP_NUMBER;
 939       parse_number (pfile, &result->val.str, c, 0);
 940       break;
 941
 942     case '$':
 943       if (!CPP_OPTION (pfile, dollars_in_ident))
 944         goto random_char;
 945       /* Fall through... */
 946
 947     case '_':
 948     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 949     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 950     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 951     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 952     case 'y': case 'z':
 953     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 954     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 955     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 956     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 957     case 'Y': case 'Z':
 958       result->type = CPP_NAME;
 959       result->val.node = parse_identifier (pfile, c);
 960
 961       /* 'L' may introduce wide characters or strings.  */
 962       if (result->val.node == pfile->spec_nodes.n_L)
 963         {
 964           c = buffer->read_ahead; /* For make_string.  */
 965           if (c == '\'' || c == '"')
 966             {
 967               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 968               goto make_string;
 969             }
 970         }
 971       /* Convert named operators to their proper types.  */
 972       else if (result->val.node->flags & NODE_OPERATOR)
 973         {
 974           result->flags |= NAMED_OP;
 975           result->type = result->val.node->value.operator;
 976         }
 977       break;
 978
 979     case '\'':
 980     case '"':
 981       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 982     make_string:
 983       parse_string (pfile, result, c);
 984       break;
 985
 986     case '/':
 987       /* A potential block or line comment.  */
 988       comment_start = buffer->cur;
 989       result->type = CPP_DIV;
 990       c = get_effective_char (buffer);
 991       if (c == '=')
 992         ACCEPT_CHAR (CPP_DIV_EQ);
 993       if (c != '/' && c != '*')
 994         break;
 995
 996       if (c == '*')
 997         {
 998           if (skip_block_comment (pfile))
 999             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1000                                  pfile->lexer_pos.col,
1001                                  "unterminated comment");
1002         }
1003       else
1004         {
1005           if (!CPP_OPTION (pfile, cplusplus_comments)
1006               && !CPP_IN_SYSTEM_HEADER (pfile))
1007             break;
1008
1009           /* Warn about comments only if pedantically GNUC89, and not
1010              in system headers.  */
1011           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1012               && ! buffer->warned_cplusplus_comments)
1013             {
1014               cpp_pedwarn (pfile,
1015                            "C++ style comments are not allowed in ISO C89");
1016               cpp_pedwarn (pfile,
1017                            "(this will be reported only once per input file)");
1018               buffer->warned_cplusplus_comments = 1;
1019             }
1020
1021           /* Skip_line_comment updates buffer->read_ahead.  */
1022           if (skip_line_comment (pfile))
1023             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1024                                    pfile->lexer_pos.col,
1025                                    "multi-line comment");
1026         }
1027
1028       /* Skipping the comment has updated buffer->read_ahead.  */
1029       if (!pfile->state.save_comments)
1030         {
1031           result->flags |= PREV_WHITE;
1032           goto next_char;
1033         }
1034
1035       /* Save the comment as a token in its own right.  */
1036       save_comment (pfile, result, comment_start);
1037       /* Don't do MI optimisation.  */
1038       return;
1039
1040     case '<':
1041       if (pfile->state.angled_headers)
1042         {
1043           result->type = CPP_HEADER_NAME;
1044           c = '>';              /* terminator.  */
1045           goto make_string;
1046         }
1047
1048       result->type = CPP_LESS;
1049       c = get_effective_char (buffer);
1050       if (c == '=')
1051         ACCEPT_CHAR (CPP_LESS_EQ);
1052       else if (c == '<')
1053         {
1054           ACCEPT_CHAR (CPP_LSHIFT);
1055           if (get_effective_char (buffer) == '=')
1056             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1057         }
1058       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1059         {
1060           ACCEPT_CHAR (CPP_MIN);
1061           if (get_effective_char (buffer) == '=')
1062             ACCEPT_CHAR (CPP_MIN_EQ);
1063         }
1064       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1065         {
1066           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1067           result->flags |= DIGRAPH;
1068         }
1069       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1070         {
1071           ACCEPT_CHAR (CPP_OPEN_BRACE);
1072           result->flags |= DIGRAPH;
1073         }
1074       break;
1075
1076     case '>':
1077       result->type = CPP_GREATER;
1078       c = get_effective_char (buffer);
1079       if (c == '=')
1080         ACCEPT_CHAR (CPP_GREATER_EQ);
1081       else if (c == '>')
1082         {
1083           ACCEPT_CHAR (CPP_RSHIFT);
1084           if (get_effective_char (buffer) == '=')
1085             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1086         }
1087       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1088         {
1089           ACCEPT_CHAR (CPP_MAX);
1090           if (get_effective_char (buffer) == '=')
1091             ACCEPT_CHAR (CPP_MAX_EQ);
1092         }
1093       break;
1094
1095     case '%':
1096       lex_percent (buffer, result);
1097       if (result->type == CPP_HASH)
1098         goto do_hash;
1099       break;
1100
1101     case '.':
1102       lex_dot (pfile, result);
1103       break;
1104
1105     case '+':
1106       result->type = CPP_PLUS;
1107       c = get_effective_char (buffer);
1108       if (c == '=')
1109         ACCEPT_CHAR (CPP_PLUS_EQ);
1110       else if (c == '+')
1111         ACCEPT_CHAR (CPP_PLUS_PLUS);
1112       break;
1113
1114     case '-':
1115       result->type = CPP_MINUS;
1116       c = get_effective_char (buffer);
1117       if (c == '>')
1118         {
1119           ACCEPT_CHAR (CPP_DEREF);
1120           if (CPP_OPTION (pfile, cplusplus)
1121               && get_effective_char (buffer) == '*')
1122             ACCEPT_CHAR (CPP_DEREF_STAR);
1123         }
1124       else if (c == '=')
1125         ACCEPT_CHAR (CPP_MINUS_EQ);
1126       else if (c == '-')
1127         ACCEPT_CHAR (CPP_MINUS_MINUS);
1128       break;
1129
1130     case '*':
1131       result->type = CPP_MULT;
1132       if (get_effective_char (buffer) == '=')
1133         ACCEPT_CHAR (CPP_MULT_EQ);
1134       break;
1135
1136     case '=':
1137       result->type = CPP_EQ;
1138       if (get_effective_char (buffer) == '=')
1139         ACCEPT_CHAR (CPP_EQ_EQ);
1140       break;
1141
1142     case '!':
1143       result->type = CPP_NOT;
1144       if (get_effective_char (buffer) == '=')
1145         ACCEPT_CHAR (CPP_NOT_EQ);
1146       break;
1147
1148     case '&':
1149       result->type = CPP_AND;
1150       c = get_effective_char (buffer);
1151       if (c == '=')
1152         ACCEPT_CHAR (CPP_AND_EQ);
1153       else if (c == '&')
1154         ACCEPT_CHAR (CPP_AND_AND);
1155       break;
1156
1157     case '#':
1158       c = buffer->extra_char;   /* Can be set by error condition below.  */
1159       if (c != EOF)
1160         {
1161           buffer->read_ahead = c;
1162           buffer->extra_char = EOF;
1163         }
1164       else
1165         c = get_effective_char (buffer);
1166
1167       if (c == '#')
1168         {
1169           ACCEPT_CHAR (CPP_PASTE);
1170           break;
1171         }
1172
1173       result->type = CPP_HASH;
1174     do_hash:
1175       if (bol)
1176         {
1177           if (pfile->state.parsing_args)
1178             {
1179               /* 6.10.3 paragraph 11: If there are sequences of
1180                  preprocessing tokens within the list of arguments that
1181                  would otherwise act as preprocessing directives, the
1182                  behavior is undefined.
1183
1184                  This implementation will report a hard error, terminate
1185                  the macro invocation, and proceed to process the
1186                  directive.  */
1187               cpp_error (pfile,
1188                          "directives may not be used inside a macro argument");
1189
1190               /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1191               buffer->extra_char = buffer->read_ahead;
1192               buffer->read_ahead = '#';
1193               pfile->state.next_bol = 1;
1194               result->type = CPP_EOF;
1195
1196               /* Get whitespace right - newline_in_args sets it.  */
1197               if (pfile->lexer_pos.col == 1)
1198                 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1199             }
1200           else
1201             {
1202               /* This is the hash introducing a directive.  */
1203               if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1204                 goto done_directive; /* bol still 1.  */
1205               /* This is in fact an assembler #.  */
1206             }
1207         }
1208       break;
1209
1210     case '|':
1211       result->type = CPP_OR;
1212       c = get_effective_char (buffer);
1213       if (c == '=')
1214         ACCEPT_CHAR (CPP_OR_EQ);
1215       else if (c == '|')
1216         ACCEPT_CHAR (CPP_OR_OR);
1217       break;
1218
1219     case '^':
1220       result->type = CPP_XOR;
1221       if (get_effective_char (buffer) == '=')
1222         ACCEPT_CHAR (CPP_XOR_EQ);
1223       break;
1224
1225     case ':':
1226       result->type = CPP_COLON;
1227       c = get_effective_char (buffer);
1228       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1229         ACCEPT_CHAR (CPP_SCOPE);
1230       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1231         {
1232           result->flags |= DIGRAPH;
1233           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1234         }
1235       break;
1236
1237     case '~': result->type = CPP_COMPL; break;
1238     case ',': result->type = CPP_COMMA; break;
1239     case '(': result->type = CPP_OPEN_PAREN; break;
1240     case ')': result->type = CPP_CLOSE_PAREN; break;
1241     case '[': result->type = CPP_OPEN_SQUARE; break;
1242     case ']': result->type = CPP_CLOSE_SQUARE; break;
1243     case '{': result->type = CPP_OPEN_BRACE; break;
1244     case '}': result->type = CPP_CLOSE_BRACE; break;
1245     case ';': result->type = CPP_SEMICOLON; break;
1246
1247     case '@':
1248       if (CPP_OPTION (pfile, objc))
1249         {
1250           /* In Objective C, '@' may begin keywords or strings, like
1251              @keyword or @"string".  It would be nice to call
1252              get_effective_char here and test the result.  However, we
1253              would then need to pass 2 characters to parse_identifier,
1254              making it ugly and slowing down its main loop.  Instead,
1255              we assume we have an identifier, and recover if not.  */
1256           result->type = CPP_NAME;
1257           result->val.node = parse_identifier (pfile, c);
1258           if (result->val.node->length != 1)
1259             break;
1260
1261           /* OK, so it wasn't an identifier.  Maybe a string?  */
1262           if (buffer->read_ahead == '"')
1263             {
1264               c = '"';
1265               ACCEPT_CHAR (CPP_OSTRING);
1266               goto make_string;
1267             }
1268         }
1269       goto random_char;
1270
1271     random_char:
1272     default:
1273       result->type = CPP_OTHER;
1274       result->val.c = c;
1275       break;
1276     }
1277
1278   if (pfile->skipping)
1279     goto skip;
1280
1281   /* If not in a directive, this token invalidates controlling macros.  */
1282   if (!pfile->state.in_directive)
1283     pfile->mi_state = MI_FAILED;
1284 }
1285
1286 /* An upper bound on the number of bytes needed to spell a token,
1287    including preceding whitespace.  */
1288 unsigned int
1289 cpp_token_len (token)
1290      const cpp_token *token;
1291 {
1292   unsigned int len;
1293
1294   switch (TOKEN_SPELL (token))
1295     {
1296     default:            len = 0;                        break;
1297     case SPELL_STRING:  len = token->val.str.len;       break;
1298     case SPELL_IDENT:   len = token->val.node->length;  break;
1299     }
1300   /* 1 for whitespace, 4 for comment delimeters.  */
1301   return len + 5;
1302 }
1303
1304 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1305    already contain the enough space to hold the token's spelling.
1306    Returns a pointer to the character after the last character
1307    written.  */
1308 unsigned char *
1309 cpp_spell_token (pfile, token, buffer)
1310      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1311      const cpp_token *token;
1312      unsigned char *buffer;
1313 {
1314   switch (TOKEN_SPELL (token))
1315     {
1316     case SPELL_OPERATOR:
1317       {
1318         const unsigned char *spelling;
1319         unsigned char c;
1320
1321         if (token->flags & DIGRAPH)
1322           spelling
1323             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1324         else if (token->flags & NAMED_OP)
1325           goto spell_ident;
1326         else
1327           spelling = TOKEN_NAME (token);
1328
1329         while ((c = *spelling++) != '\0')
1330           *buffer++ = c;
1331       }
1332       break;
1333
1334     case SPELL_IDENT:
1335       spell_ident:
1336       memcpy (buffer, token->val.node->name, token->val.node->length);
1337       buffer += token->val.node->length;
1338       break;
1339
1340     case SPELL_STRING:
1341       {
1342         int left, right, tag;
1343         switch (token->type)
1344           {
1345           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1346           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1347           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1348           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1349           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1350           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1351           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1352           }
1353         if (tag) *buffer++ = tag;
1354         if (left) *buffer++ = left;
1355         memcpy (buffer, token->val.str.text, token->val.str.len);
1356         buffer += token->val.str.len;
1357         if (right) *buffer++ = right;
1358       }
1359       break;
1360
1361     case SPELL_CHAR:
1362       *buffer++ = token->val.c;
1363       break;
1364
1365     case SPELL_NONE:
1366       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1367       break;
1368     }
1369
1370   return buffer;
1371 }
1372
1373 /* Returns a token as a null-terminated string.  The string is
1374    temporary, and automatically freed later.  Useful for diagnostics.  */
1375 unsigned char *
1376 cpp_token_as_text (pfile, token)
1377      cpp_reader *pfile;
1378      const cpp_token *token;
1379 {
1380   unsigned int len = cpp_token_len (token);
1381   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1382
1383   end = cpp_spell_token (pfile, token, start);
1384   end[0] = '\0';
1385
1386   return start;
1387 }
1388
1389 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1390 const char *
1391 cpp_type2name (type)
1392      enum cpp_ttype type;
1393 {
1394   return (const char *) token_spellings[type].name;
1395 }
1396
1397 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1398    for efficiency - to avoid double-buffering.  Also, outputs a space
1399    if PREV_WHITE is flagged.  */
1400 void
1401 cpp_output_token (token, fp)
1402      const cpp_token *token;
1403      FILE *fp;
1404 {
1405   if (token->flags & PREV_WHITE)
1406     putc (' ', fp);
1407
1408   switch (TOKEN_SPELL (token))
1409     {
1410     case SPELL_OPERATOR:
1411       {
1412         const unsigned char *spelling;
1413
1414         if (token->flags & DIGRAPH)
1415           spelling
1416             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1417         else if (token->flags & NAMED_OP)
1418           goto spell_ident;
1419         else
1420           spelling = TOKEN_NAME (token);
1421
1422         ufputs (spelling, fp);
1423       }
1424       break;
1425
1426     spell_ident:
1427     case SPELL_IDENT:
1428       ufputs (token->val.node->name, fp);
1429     break;
1430
1431     case SPELL_STRING:
1432       {
1433         int left, right, tag;
1434         switch (token->type)
1435           {
1436           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1437           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1438           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1439           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1440           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1441           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1442           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1443           }
1444         if (tag) putc (tag, fp);
1445         if (left) putc (left, fp);
1446         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1447         if (right) putc (right, fp);
1448       }
1449       break;
1450
1451     case SPELL_CHAR:
1452       putc (token->val.c, fp);
1453       break;
1454
1455     case SPELL_NONE:
1456       /* An error, most probably.  */
1457       break;
1458     }
1459 }
1460
1461 /* Compare two tokens.  */
1462 int
1463 _cpp_equiv_tokens (a, b)
1464      const cpp_token *a, *b;
1465 {
1466   if (a->type == b->type && a->flags == b->flags)
1467     switch (TOKEN_SPELL (a))
1468       {
1469       default:                  /* Keep compiler happy.  */
1470       case SPELL_OPERATOR:
1471         return 1;
1472       case SPELL_CHAR:
1473         return a->val.c == b->val.c; /* Character.  */
1474       case SPELL_NONE:
1475         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1476       case SPELL_IDENT:
1477         return a->val.node == b->val.node;
1478       case SPELL_STRING:
1479         return (a->val.str.len == b->val.str.len
1480                 && !memcmp (a->val.str.text, b->val.str.text,
1481                             a->val.str.len));
1482       }
1483
1484   return 0;
1485 }
1486
1487 #if 0
1488 /* Compare two token lists.  */
1489 int
1490 _cpp_equiv_toklists (a, b)
1491      const struct toklist *a, *b;
1492 {
1493   unsigned int i, count;
1494
1495   count = a->limit - a->first;
1496   if (count != (b->limit - b->first))
1497     return 0;
1498
1499   for (i = 0; i < count; i++)
1500     if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1501       return 0;
1502
1503   return 1;
1504 }
1505 #endif
1506
1507 /* Determine whether two tokens can be pasted together, and if so,
1508    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1509    be pasted, or the appropriate type for the merged token if they
1510    can.  */
1511 enum cpp_ttype
1512 cpp_can_paste (pfile, token1, token2, digraph)
1513      cpp_reader * pfile;
1514      const cpp_token *token1, *token2;
1515      int* digraph;
1516 {
1517   enum cpp_ttype a = token1->type, b = token2->type;
1518   int cxx = CPP_OPTION (pfile, cplusplus);
1519
1520   /* Treat named operators as if they were ordinary NAMEs.  */
1521   if (token1->flags & NAMED_OP)
1522     a = CPP_NAME;
1523   if (token2->flags & NAMED_OP)
1524     b = CPP_NAME;
1525
1526   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1527     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1528
1529   switch (a)
1530     {
1531     case CPP_GREATER:
1532       if (b == a) return CPP_RSHIFT;
1533       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1534       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1535       break;
1536     case CPP_LESS:
1537       if (b == a) return CPP_LSHIFT;
1538       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1539       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1540       if (CPP_OPTION (pfile, digraphs))
1541         {
1542           if (b == CPP_COLON)
1543             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1544           if (b == CPP_MOD)
1545             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1546         }
1547       break;
1548
1549     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1550     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1551     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1552
1553     case CPP_MINUS:
1554       if (b == a)               return CPP_MINUS_MINUS;
1555       if (b == CPP_GREATER)     return CPP_DEREF;
1556       break;
1557     case CPP_COLON:
1558       if (b == a && cxx)        return CPP_SCOPE;
1559       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1560         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1561       break;
1562
1563     case CPP_MOD:
1564       if (CPP_OPTION (pfile, digraphs))
1565         {
1566           if (b == CPP_GREATER)
1567             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1568           if (b == CPP_COLON)
1569             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1570         }
1571       break;
1572     case CPP_DEREF:
1573       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1574       break;
1575     case CPP_DOT:
1576       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1577       if (b == CPP_NUMBER)      return CPP_NUMBER;
1578       break;
1579
1580     case CPP_HASH:
1581       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1582         /* %:%: digraph */
1583         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1584       break;
1585
1586     case CPP_NAME:
1587       if (b == CPP_NAME)        return CPP_NAME;
1588       if (b == CPP_NUMBER
1589           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1590       if (b == CPP_CHAR
1591           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1592       if (b == CPP_STRING
1593           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1594       break;
1595
1596     case CPP_NUMBER:
1597       if (b == CPP_NUMBER)      return CPP_NUMBER;
1598       if (b == CPP_NAME)        return CPP_NUMBER;
1599       if (b == CPP_DOT)         return CPP_NUMBER;
1600       /* Numbers cannot have length zero, so this is safe.  */
1601       if ((b == CPP_PLUS || b == CPP_MINUS)
1602           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1603         return CPP_NUMBER;
1604       break;
1605
1606     case CPP_OTHER:
1607       if (CPP_OPTION (pfile, objc) && token1->val.c == '@')
1608         {
1609           if (b == CPP_NAME)    return CPP_NAME;
1610           if (b == CPP_STRING)  return CPP_OSTRING;
1611         }
1612
1613     default:
1614       break;
1615     }
1616
1617   return CPP_EOF;
1618 }
1619
1620 /* Returns nonzero if a space should be inserted to avoid an
1621    accidental token paste for output.  For simplicity, it is
1622    conservative, and occasionally advises a space where one is not
1623    needed, e.g. "." and ".2".  */
1624
1625 int
1626 cpp_avoid_paste (pfile, token1, token2)
1627      cpp_reader *pfile;
1628      const cpp_token *token1, *token2;
1629 {
1630   enum cpp_ttype a = token1->type, b = token2->type;
1631   cppchar_t c;
1632
1633   if (token1->flags & NAMED_OP)
1634     a = CPP_NAME;
1635   if (token2->flags & NAMED_OP)
1636     b = CPP_NAME;
1637
1638   c = EOF;
1639   if (token2->flags & DIGRAPH)
1640     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1641   else if (token_spellings[b].category == SPELL_OPERATOR)
1642     c = token_spellings[b].name[0];
1643
1644   /* Quickly get everything that can paste with an '='.  */
1645   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1646     return 1;
1647
1648   switch (a)
1649     {
1650     case CPP_GREATER:   return c == '>' || c == '?';
1651     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1652     case CPP_PLUS:      return c == '+';
1653     case CPP_MINUS:     return c == '-' || c == '>';
1654     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1655     case CPP_MOD:       return c == ':' || c == '>';
1656     case CPP_AND:       return c == '&';
1657     case CPP_OR:        return c == '|';
1658     case CPP_COLON:     return c == ':' || c == '>';
1659     case CPP_DEREF:     return c == '*';
1660     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1661     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1662     case CPP_NAME:      return ((b == CPP_NUMBER
1663                                  && name_p (pfile, &token2->val.str))
1664                                 || b == CPP_NAME
1665                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1666     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1667                                 || c == '.' || c == '+' || c == '-');
1668     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1669                                 && token1->val.c == '@'
1670                                 && (b == CPP_NAME || b == CPP_STRING));
1671     default:            break;
1672     }
1673
1674   return 0;
1675 }
1676
1677 /* Output all the remaining tokens on the current line, and a newline
1678    character, to FP.  Leading whitespace is removed.  */
1679 void
1680 cpp_output_line (pfile, fp)
1681      cpp_reader *pfile;
1682      FILE *fp;
1683 {
1684   cpp_token token;
1685
1686   cpp_get_token (pfile, &token);
1687   token.flags &= ~PREV_WHITE;
1688   while (token.type != CPP_EOF)
1689     {
1690       cpp_output_token (&token, fp);
1691       cpp_get_token (pfile, &token);
1692     }
1693
1694   putc ('\n', fp);
1695 }
1696
1697 /* Memory pools.  */
1698
1699 struct dummy
1700 {
1701   char c;
1702   union
1703   {
1704     double d;
1705     int *p;
1706   } u;
1707 };
1708
1709 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1710
1711 static int
1712 chunk_suitable (pool, chunk, size)
1713      cpp_pool *pool;
1714      cpp_chunk *chunk;
1715      unsigned int size;
1716 {
1717   /* Being at least twice SIZE means we can use memcpy in
1718      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1719      anyway.  */
1720   return (chunk && pool->locked != chunk
1721           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1722 }
1723
1724 /* Returns the end of the new pool.  PTR points to a char in the old
1725    pool, and is updated to point to the same char in the new pool.  */
1726 unsigned char *
1727 _cpp_next_chunk (pool, len, ptr)
1728      cpp_pool *pool;
1729      unsigned int len;
1730      unsigned char **ptr;
1731 {
1732   cpp_chunk *chunk = pool->cur->next;
1733
1734   /* LEN is the minimum size we want in the new pool.  */
1735   len += POOL_ROOM (pool);
1736   if (! chunk_suitable (pool, chunk, len))
1737     {
1738       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1739
1740       chunk->next = pool->cur->next;
1741       pool->cur->next = chunk;
1742     }
1743
1744   /* Update the pointer before changing chunk's front.  */
1745   if (ptr)
1746     *ptr += chunk->base - POOL_FRONT (pool);
1747
1748   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1749   chunk->front = chunk->base;
1750
1751   pool->cur = chunk;
1752   return POOL_LIMIT (pool);
1753 }
1754
1755 static cpp_chunk *
1756 new_chunk (size)
1757      unsigned int size;
1758 {
1759   unsigned char *base;
1760   cpp_chunk *result;
1761
1762   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1763   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1764   /* Put the chunk descriptor at the end.  Then chunk overruns will
1765      cause obvious chaos.  */
1766   result = (cpp_chunk *) (base + size);
1767   result->base = base;
1768   result->front = base;
1769   result->limit = base + size;
1770   result->next = 0;
1771
1772   return result;
1773 }
1774
1775 void
1776 _cpp_init_pool (pool, size, align, temp)
1777      cpp_pool *pool;
1778      unsigned int size, align, temp;
1779 {
1780   if (align == 0)
1781     align = DEFAULT_ALIGNMENT;
1782   if (align & (align - 1))
1783     abort ();
1784   pool->align = align;
1785   pool->cur = new_chunk (size);
1786   pool->locked = 0;
1787   pool->locks = 0;
1788   if (temp)
1789     pool->cur->next = pool->cur;
1790 }
1791
1792 void
1793 _cpp_lock_pool (pool)
1794      cpp_pool *pool;
1795 {
1796   if (pool->locks++ == 0)
1797     pool->locked = pool->cur;
1798 }
1799
1800 void
1801 _cpp_unlock_pool (pool)
1802      cpp_pool *pool;
1803 {
1804   if (--pool->locks == 0)
1805     pool->locked = 0;
1806 }
1807
1808 void
1809 _cpp_free_pool (pool)
1810      cpp_pool *pool;
1811 {
1812   cpp_chunk *chunk = pool->cur, *next;
1813
1814   do
1815     {
1816       next = chunk->next;
1817       free (chunk->base);
1818       chunk = next;
1819     }
1820   while (chunk && chunk != pool->cur);
1821 }
1822
1823 /* Reserve LEN bytes from a memory pool.  */
1824 unsigned char *
1825 _cpp_pool_reserve (pool, len)
1826      cpp_pool *pool;
1827      unsigned int len;
1828 {
1829   len = POOL_ALIGN (len, pool->align);
1830   if (len > (unsigned int) POOL_ROOM (pool))
1831     _cpp_next_chunk (pool, len, 0);
1832
1833   return POOL_FRONT (pool);
1834 }
1835
1836 /* Allocate LEN bytes from a memory pool.  */
1837 unsigned char *
1838 _cpp_pool_alloc (pool, len)
1839      cpp_pool *pool;
1840      unsigned int len;
1841 {
1842   unsigned char *result = _cpp_pool_reserve (pool, len);
1843
1844   POOL_COMMIT (pool, len);
1845   return result;
1846 }