gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41 #include "symcat.h"
  42
  43 /* Tokens with SPELL_STRING store their spelling in the token list,
  44    and it's length in the token->val.name.len.  */
  45 enum spell_type
  46 {
  47   SPELL_OPERATOR = 0,
  48   SPELL_CHAR,
  49   SPELL_IDENT,
  50   SPELL_STRING,
  51   SPELL_NONE
  52 };
  53
  54 struct token_spelling
  55 {
  56   enum spell_type category;
  57   const unsigned char *name;
  58 };
  59
  60 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  61                                              U":>", U"<%", U"%>"};
  62
  63 #define OP(e, s) { SPELL_OPERATOR, U s           },
  64 #define TK(e, s) { s,              U STRINGX (e) },
  65 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  66 #undef OP
  67 #undef TK
  68
  69 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  70 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  71
  72 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  74 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  75
  76 static int skip_block_comment PARAMS ((cpp_reader *));
  77 static int skip_line_comment PARAMS ((cpp_reader *));
  78 static void adjust_column PARAMS ((cpp_reader *));
  79 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  80 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  81 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  82 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  83 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  84 static void unterminated PARAMS ((cpp_reader *, int));
  85 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  87 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  88 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  89 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  90
  91 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  92 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  93
  94 /* Utility routine:
  95
  96    Compares, the token TOKEN to the NUL-terminated string STRING.
  97    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  98
  99 int
 100 cpp_ideq (token, string)
 101      const cpp_token *token;
 102      const char *string;
 103 {
 104   if (token->type != CPP_NAME)
 105     return 0;
 106
 107   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 108 }
 109
 110 /* Call when meeting a newline.  Returns the character after the newline
 111    (or carriage-return newline combination), or EOF.  */
 112 static cppchar_t
 113 handle_newline (buffer, newline_char)
 114      cpp_buffer *buffer;
 115      cppchar_t newline_char;
 116 {
 117   cppchar_t next = EOF;
 118
 119   buffer->col_adjust = 0;
 120   buffer->lineno++;
 121   buffer->line_base = buffer->cur;
 122
 123   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 124   if (buffer->cur < buffer->rlimit)
 125     {
 126       next = *buffer->cur++;
 127       if (next + newline_char == '\r' + '\n')
 128         {
 129           buffer->line_base = buffer->cur;
 130           if (buffer->cur < buffer->rlimit)
 131             next = *buffer->cur++;
 132           else
 133             next = EOF;
 134         }
 135     }
 136
 137   buffer->read_ahead = next;
 138   return next;
 139 }
 140
 141 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 142    encountered.  It warns if necessary, and returns true if the
 143    trigraph should be honoured.  FROM_CHAR is the third character of a
 144    trigraph, and presumed to be the previous character for position
 145    reporting.  */
 146 static int
 147 trigraph_ok (pfile, from_char)
 148      cpp_reader *pfile;
 149      cppchar_t from_char;
 150 {
 151   int accept = CPP_OPTION (pfile, trigraphs);
 152
 153   /* Don't warn about trigraphs in comments.  */
 154   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 155     {
 156       cpp_buffer *buffer = pfile->buffer;
 157       if (accept)
 158         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else
 163         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 164                                "trigraph ??%c ignored", (int) from_char);
 165     }
 166
 167   return accept;
 168 }
 169
 170 /* Assumes local variables buffer and result.  */
 171 #define ACCEPT_CHAR(t) \
 172   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 173
 174 /* When we move to multibyte character sets, add to these something
 175    that saves and restores the state of the multibyte conversion
 176    library.  This probably involves saving and restoring a "cookie".
 177    In the case of glibc it is an 8-byte structure, so is not a high
 178    overhead operation.  In any case, it's out of the fast path.  */
 179 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 180 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 181
 182 /* Skips any escaped newlines introduced by NEXT, which is either a
 183    '?' or a '\\'.  Returns the next character, which will also have
 184    been placed in buffer->read_ahead.  This routine performs
 185    preprocessing stages 1 and 2 of the ISO C standard.  */
 186 static cppchar_t
 187 skip_escaped_newlines (buffer, next)
 188      cpp_buffer *buffer;
 189      cppchar_t next;
 190 {
 191   /* Only do this if we apply stages 1 and 2.  */
 192   if (!buffer->from_stage3)
 193     {
 194       cppchar_t next1;
 195       const unsigned char *saved_cur;
 196       int space;
 197
 198       do
 199         {
 200           if (buffer->cur == buffer->rlimit)
 201             break;
 202
 203           SAVE_STATE ();
 204           if (next == '?')
 205             {
 206               next1 = *buffer->cur++;
 207               if (next1 != '?' || buffer->cur == buffer->rlimit)
 208                 {
 209                   RESTORE_STATE ();
 210                   break;
 211                 }
 212
 213               next1 = *buffer->cur++;
 214               if (!_cpp_trigraph_map[next1]
 215                   || !trigraph_ok (buffer->pfile, next1))
 216                 {
 217                   RESTORE_STATE ();
 218                   break;
 219                 }
 220
 221               /* We have a full trigraph here.  */
 222               next = _cpp_trigraph_map[next1];
 223               if (next != '\\' || buffer->cur == buffer->rlimit)
 224                 break;
 225               SAVE_STATE ();
 226             }
 227
 228           /* We have a backslash, and room for at least one more character.  */
 229           space = 0;
 230           do
 231             {
 232               next1 = *buffer->cur++;
 233               if (!is_nvspace (next1))
 234                 break;
 235               space = 1;
 236             }
 237           while (buffer->cur < buffer->rlimit);
 238
 239           if (!is_vspace (next1))
 240             {
 241               RESTORE_STATE ();
 242               break;
 243             }
 244
 245           if (space)
 246             cpp_warning (buffer->pfile,
 247                          "backslash and newline separated by space");
 248
 249           next = handle_newline (buffer, next1);
 250           if (next == EOF)
 251             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 252         }
 253       while (next == '\\' || next == '?');
 254     }
 255
 256   buffer->read_ahead = next;
 257   return next;
 258 }
 259
 260 /* Obtain the next character, after trigraph conversion and skipping
 261    an arbitrary string of escaped newlines.  The common case of no
 262    trigraphs or escaped newlines falls through quickly.  */
 263 static cppchar_t
 264 get_effective_char (buffer)
 265      cpp_buffer *buffer;
 266 {
 267   cppchar_t next = EOF;
 268
 269   if (buffer->cur < buffer->rlimit)
 270     {
 271       next = *buffer->cur++;
 272
 273       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 274          can introduce escaped newlines, which we want to skip, or
 275          UCNs, which, depending upon lexer state, we will handle in
 276          the future.  */
 277       if (next == '?' || next == '\\')
 278         next = skip_escaped_newlines (buffer, next);
 279     }
 280
 281   buffer->read_ahead = next;
 282   return next;
 283 }
 284
 285 /* Skip a C-style block comment.  We find the end of the comment by
 286    seeing if an asterisk is before every '/' we encounter.  Returns
 287    non-zero if comment terminated by EOF, zero otherwise.  */
 288 static int
 289 skip_block_comment (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *buffer = pfile->buffer;
 293   cppchar_t c = EOF, prevc = EOF;
 294
 295   pfile->state.lexing_comment = 1;
 296   while (buffer->cur != buffer->rlimit)
 297     {
 298       prevc = c, c = *buffer->cur++;
 299
 300     next_char:
 301       /* FIXME: For speed, create a new character class of characters
 302          of interest inside block comments.  */
 303       if (c == '?' || c == '\\')
 304         c = skip_escaped_newlines (buffer, c);
 305
 306       /* People like decorating comments with '*', so check for '/'
 307          instead for efficiency.  */
 308       if (c == '/')
 309         {
 310           if (prevc == '*')
 311             break;
 312
 313           /* Warn about potential nested comments, but not if the '/'
 314              comes immediately before the true comment delimeter.
 315              Don't bother to get it right across escaped newlines.  */
 316           if (CPP_OPTION (pfile, warn_comments)
 317               && buffer->cur != buffer->rlimit)
 318             {
 319               prevc = c, c = *buffer->cur++;
 320               if (c == '*' && buffer->cur != buffer->rlimit)
 321                 {
 322                   prevc = c, c = *buffer->cur++;
 323                   if (c != '/')
 324                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 325                                            CPP_BUF_COL (buffer),
 326                                            "\"/*\" within comment");
 327                 }
 328               goto next_char;
 329             }
 330         }
 331       else if (is_vspace (c))
 332         {
 333           prevc = c, c = handle_newline (buffer, c);
 334           goto next_char;
 335         }
 336       else if (c == '\t')
 337         adjust_column (pfile);
 338     }
 339
 340   pfile->state.lexing_comment = 0;
 341   buffer->read_ahead = EOF;
 342   return c != '/' || prevc != '*';
 343 }
 344
 345 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 346    non-zero if a multiline comment.  The following new line, if any,
 347    is left in buffer->read_ahead.  */
 348 static int
 349 skip_line_comment (pfile)
 350      cpp_reader *pfile;
 351 {
 352   cpp_buffer *buffer = pfile->buffer;
 353   unsigned int orig_lineno = buffer->lineno;
 354   cppchar_t c;
 355
 356   pfile->state.lexing_comment = 1;
 357   do
 358     {
 359       c = EOF;
 360       if (buffer->cur == buffer->rlimit)
 361         break;
 362
 363       c = *buffer->cur++;
 364       if (c == '?' || c == '\\')
 365         c = skip_escaped_newlines (buffer, c);
 366     }
 367   while (!is_vspace (c));
 368
 369   pfile->state.lexing_comment = 0;
 370   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 371   return orig_lineno != buffer->lineno;
 372 }
 373
 374 /* pfile->buffer->cur is one beyond the \t character.  Update
 375    col_adjust so we track the column correctly.  */
 376 static void
 377 adjust_column (pfile)
 378      cpp_reader *pfile;
 379 {
 380   cpp_buffer *buffer = pfile->buffer;
 381   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 382
 383   /* Round it up to multiple of the tabstop, but subtract 1 since the
 384      tab itself occupies a character position.  */
 385   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 386                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 387 }
 388
 389 /* Skips whitespace, saving the next non-whitespace character.
 390    Adjusts pfile->col_adjust to account for tabs.  Without this,
 391    tokens might be assigned an incorrect column.  */
 392 static void
 393 skip_whitespace (pfile, c)
 394      cpp_reader *pfile;
 395      cppchar_t c;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398   unsigned int warned = 0;
 399
 400   do
 401     {
 402       /* Horizontal space always OK.  */
 403       if (c == ' ')
 404         ;
 405       else if (c == '\t')
 406         adjust_column (pfile);
 407       /* Just \f \v or \0 left.  */
 408       else if (c == '\0')
 409         {
 410           if (!warned)
 411             {
 412               cpp_warning (pfile, "null character(s) ignored");
 413               warned = 1;
 414             }
 415         }
 416       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 417         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 418                                CPP_BUF_COL (buffer),
 419                                "%s in preprocessing directive",
 420                                c == '\f' ? "form feed" : "vertical tab");
 421
 422       c = EOF;
 423       if (buffer->cur == buffer->rlimit)
 424         break;
 425       c = *buffer->cur++;
 426     }
 427   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 428   while (is_nvspace (c));
 429
 430   /* Remember the next character.  */
 431   buffer->read_ahead = c;
 432 }
 433
 434 /* See if the characters of a number token are valid in a name (no
 435    '.', '+' or '-').  */
 436 static int
 437 name_p (pfile, string)
 438      cpp_reader *pfile;
 439      const cpp_string *string;
 440 {
 441   unsigned int i;
 442
 443   for (i = 0; i < string->len; i++)
 444     if (!is_idchar (string->text[i]))
 445       return 0;
 446
 447   return 1;
 448 }
 449
 450 /* Parse an identifier, skipping embedded backslash-newlines.
 451    Calculate the hash value of the token while parsing, for improved
 452    performance.  The hashing algorithm *must* match cpp_lookup().  */
 453
 454 static cpp_hashnode *
 455 parse_identifier (pfile, c)
 456      cpp_reader *pfile;
 457      cppchar_t c;
 458 {
 459   cpp_hashnode *result;
 460   cpp_buffer *buffer = pfile->buffer;
 461   unsigned char *dest, *limit;
 462   unsigned int r = 0, saw_dollar = 0;
 463
 464   dest = POOL_FRONT (&pfile->ident_pool);
 465   limit = POOL_LIMIT (&pfile->ident_pool);
 466
 467   do
 468     {
 469       do
 470         {
 471           /* Need room for terminating null.  */
 472           if (dest + 1 >= limit)
 473             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 474
 475           *dest++ = c;
 476           r = HASHSTEP (r, c);
 477
 478           if (c == '$')
 479             saw_dollar++;
 480
 481           c = EOF;
 482           if (buffer->cur == buffer->rlimit)
 483             break;
 484
 485           c = *buffer->cur++;
 486         }
 487       while (is_idchar (c));
 488
 489       /* Potential escaped newline?  */
 490       if (c != '?' && c != '\\')
 491         break;
 492       c = skip_escaped_newlines (buffer, c);
 493     }
 494   while (is_idchar (c));
 495
 496   /* Remember the next character.  */
 497   buffer->read_ahead = c;
 498
 499   /* $ is not a identifier character in the standard, but is commonly
 500      accepted as an extension.  Don't warn about it in skipped
 501      conditional blocks.  */
 502   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 503     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 504
 505   /* Identifiers are null-terminated.  */
 506   *dest = '\0';
 507
 508   /* This routine commits the memory if necessary.  */
 509   result = _cpp_lookup_with_hash (pfile,
 510                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 511
 512   /* Some identifiers require diagnostics when lexed.  */
 513   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 514     {
 515       /* It is allowed to poison the same identifier twice.  */
 516       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 517         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 518
 519       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 520          replacement list of a variable-arguments macro.  */
 521       if (result == pfile->spec_nodes.n__VA_ARGS__
 522           && !pfile->state.va_args_ok)
 523         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
 524     }
 525
 526   return result;
 527 }
 528
 529 /* Parse a number, skipping embedded backslash-newlines.  */
 530 static void
 531 parse_number (pfile, number, c, leading_period)
 532      cpp_reader *pfile;
 533      cpp_string *number;
 534      cppchar_t c;
 535      int leading_period;
 536 {
 537   cpp_buffer *buffer = pfile->buffer;
 538   cpp_pool *pool = pfile->string_pool;
 539   unsigned char *dest, *limit;
 540
 541   dest = POOL_FRONT (pool);
 542   limit = POOL_LIMIT (pool);
 543
 544   /* Place a leading period.  */
 545   if (leading_period)
 546     {
 547       if (dest >= limit)
 548         limit = _cpp_next_chunk (pool, 0, &dest);
 549       *dest++ = '.';
 550     }
 551
 552   do
 553     {
 554       do
 555         {
 556           /* Need room for terminating null.  */
 557           if (dest + 1 >= limit)
 558             limit = _cpp_next_chunk (pool, 0, &dest);
 559           *dest++ = c;
 560
 561           c = EOF;
 562           if (buffer->cur == buffer->rlimit)
 563             break;
 564
 565           c = *buffer->cur++;
 566         }
 567       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (buffer, c);
 573     }
 574   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* Null-terminate the number.  */
 580   *dest = '\0';
 581
 582   number->text = POOL_FRONT (pool);
 583   number->len = dest - number->text;
 584   POOL_COMMIT (pool, number->len + 1);
 585 }
 586
 587 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 588 static void
 589 unterminated (pfile, term)
 590      cpp_reader *pfile;
 591      int term;
 592 {
 593   cpp_error (pfile, "missing terminating %c character", term);
 594
 595   if (term == '\"' && pfile->mlstring_pos.line
 596       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 597     {
 598       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 599                            pfile->mlstring_pos.col,
 600                            "possible start of unterminated string literal");
 601       pfile->mlstring_pos.line = 0;
 602     }
 603 }
 604
 605 /* Subroutine of parse_string.  */
 606 static int
 607 unescaped_terminator_p (pfile, dest)
 608      cpp_reader *pfile;
 609      const unsigned char *dest;
 610 {
 611   const unsigned char *start, *temp;
 612
 613   /* In #include-style directives, terminators are not escapeable.  */
 614   if (pfile->state.angled_headers)
 615     return 1;
 616
 617   start = POOL_FRONT (pfile->string_pool);
 618
 619   /* An odd number of consecutive backslashes represents an escaped
 620      terminator.  */
 621   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 622     ;
 623
 624   return ((dest - temp) & 1) == 0;
 625 }
 626
 627 /* Parses a string, character constant, or angle-bracketed header file
 628    name.  Handles embedded trigraphs and escaped newlines.
 629
 630    Multi-line strings are allowed, but they are deprecated within
 631    directives.  */
 632 static void
 633 parse_string (pfile, token, terminator)
 634      cpp_reader *pfile;
 635      cpp_token *token;
 636      cppchar_t terminator;
 637 {
 638   cpp_buffer *buffer = pfile->buffer;
 639   cpp_pool *pool = pfile->string_pool;
 640   unsigned char *dest, *limit;
 641   cppchar_t c;
 642   unsigned int nulls = 0;
 643
 644   dest = POOL_FRONT (pool);
 645   limit = POOL_LIMIT (pool);
 646
 647   for (;;)
 648     {
 649       if (buffer->cur == buffer->rlimit)
 650         {
 651           c = EOF;
 652           unterminated (pfile, terminator);
 653           break;
 654         }
 655       c = *buffer->cur++;
 656
 657     have_char:
 658       /* Handle trigraphs, escaped newlines etc.  */
 659       if (c == '?' || c == '\\')
 660         c = skip_escaped_newlines (buffer, c);
 661
 662       if (c == terminator && unescaped_terminator_p (pfile, dest))
 663         {
 664           c = EOF;
 665           break;
 666         }
 667       else if (is_vspace (c))
 668         {
 669           /* In assembly language, silently terminate string and
 670              character literals at end of line.  This is a kludge
 671              around not knowing where comments are.  */
 672           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
 673             break;
 674
 675           /* Character constants and header names may not extend over
 676              multiple lines.  In Standard C, neither may strings.
 677              Unfortunately, we accept multiline strings as an
 678              extension, except in #include family directives.  */
 679           if (terminator != '"' || pfile->state.angled_headers)
 680             {
 681               unterminated (pfile, terminator);
 682               break;
 683             }
 684
 685           if (pfile->mlstring_pos.line == 0)
 686             {
 687               pfile->mlstring_pos = pfile->lexer_pos;
 688               if (CPP_PEDANTIC (pfile))
 689                 cpp_pedwarn (pfile, "multi-line string constant");
 690             }
 691
 692           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 693           c = '\n';
 694         }
 695       else if (c == '\0')
 696         {
 697           if (nulls++ == 0)
 698             cpp_warning (pfile, "null character(s) preserved in literal");
 699         }
 700
 701       /* No terminating null for strings - they could contain nulls.  */
 702       if (dest >= limit)
 703         limit = _cpp_next_chunk (pool, 0, &dest);
 704       *dest++ = c;
 705
 706       /* If we had a new line, the next character is in read_ahead.  */
 707       if (c != '\n')
 708         continue;
 709       c = buffer->read_ahead;
 710       if (c != EOF)
 711         goto have_char;
 712     }
 713
 714   /* Remember the next character.  */
 715   buffer->read_ahead = c;
 716
 717   token->val.str.text = POOL_FRONT (pool);
 718   token->val.str.len = dest - token->val.str.text;
 719   POOL_COMMIT (pool, token->val.str.len);
 720 }
 721
 722 /* The stored comment includes the comment start and any terminator.  */
 723 static void
 724 save_comment (pfile, token, from)
 725      cpp_reader *pfile;
 726      cpp_token *token;
 727      const unsigned char *from;
 728 {
 729   unsigned char *buffer;
 730   unsigned int len;
 731
 732   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 733   /* C++ comments probably (not definitely) have moved past a new
 734      line, which we don't want to save in the comment.  */
 735   if (pfile->buffer->read_ahead != EOF)
 736     len--;
 737   buffer = _cpp_pool_alloc (pfile->string_pool, len);
 738
 739   token->type = CPP_COMMENT;
 740   token->val.str.len = len;
 741   token->val.str.text = buffer;
 742
 743   buffer[0] = '/';
 744   memcpy (buffer + 1, from, len - 1);
 745 }
 746
 747 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 748    want to avoid stepping back when lexing %:%X.  */
 749 static void
 750 lex_percent (buffer, result)
 751      cpp_buffer *buffer;
 752      cpp_token *result;
 753 {
 754   cppchar_t c;
 755
 756   result->type = CPP_MOD;
 757   /* Parsing %:%X could leave an extra character.  */
 758   if (buffer->extra_char == EOF)
 759     c = get_effective_char (buffer);
 760   else
 761     {
 762       c = buffer->read_ahead = buffer->extra_char;
 763       buffer->extra_char = EOF;
 764     }
 765
 766   if (c == '=')
 767     ACCEPT_CHAR (CPP_MOD_EQ);
 768   else if (CPP_OPTION (buffer->pfile, digraphs))
 769     {
 770       if (c == ':')
 771         {
 772           result->flags |= DIGRAPH;
 773           ACCEPT_CHAR (CPP_HASH);
 774           if (get_effective_char (buffer) == '%')
 775             {
 776               buffer->extra_char = get_effective_char (buffer);
 777               if (buffer->extra_char == ':')
 778                 {
 779                   buffer->extra_char = EOF;
 780                   ACCEPT_CHAR (CPP_PASTE);
 781                 }
 782               else
 783                 /* We'll catch the extra_char when we're called back.  */
 784                 buffer->read_ahead = '%';
 785             }
 786         }
 787       else if (c == '>')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 791         }
 792     }
 793 }
 794
 795 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 796    want to avoid stepping back when lexing '...' or '.123'.  In the
 797    latter case we should also set a flag for parse_number.  */
 798 static void
 799 lex_dot (pfile, result)
 800      cpp_reader *pfile;
 801      cpp_token *result;
 802 {
 803   cpp_buffer *buffer = pfile->buffer;
 804   cppchar_t c;
 805
 806   /* Parsing ..X could leave an extra character.  */
 807   if (buffer->extra_char == EOF)
 808     c = get_effective_char (buffer);
 809   else
 810     {
 811       c = buffer->read_ahead = buffer->extra_char;
 812       buffer->extra_char = EOF;
 813     }
 814
 815   /* All known character sets have 0...9 contiguous.  */
 816   if (c >= '0' && c <= '9')
 817     {
 818       result->type = CPP_NUMBER;
 819       parse_number (pfile, &result->val.str, c, 1);
 820     }
 821   else
 822     {
 823       result->type = CPP_DOT;
 824       if (c == '.')
 825         {
 826           buffer->extra_char = get_effective_char (buffer);
 827           if (buffer->extra_char == '.')
 828             {
 829               buffer->extra_char = EOF;
 830               ACCEPT_CHAR (CPP_ELLIPSIS);
 831             }
 832           else
 833             /* We'll catch the extra_char when we're called back.  */
 834             buffer->read_ahead = '.';
 835         }
 836       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 837         ACCEPT_CHAR (CPP_DOT_STAR);
 838     }
 839 }
 840
 841 void
 842 _cpp_lex_token (pfile, result)
 843      cpp_reader *pfile;
 844      cpp_token *result;
 845 {
 846   cppchar_t c;
 847   cpp_buffer *buffer = pfile->buffer;
 848   const unsigned char *comment_start;
 849   unsigned char was_skip_newlines = pfile->state.skip_newlines;
 850   unsigned char newline_in_args = 0;
 851
 852   pfile->state.skip_newlines = 0;
 853   result->flags = 0;
 854  next_char:
 855   pfile->lexer_pos.line = buffer->lineno;
 856  next_char2:
 857   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 858
 859   c = buffer->read_ahead;
 860   if (c == EOF && buffer->cur < buffer->rlimit)
 861     {
 862       c = *buffer->cur++;
 863       pfile->lexer_pos.col++;
 864     }
 865
 866  do_switch:
 867   buffer->read_ahead = EOF;
 868   switch (c)
 869     {
 870     case EOF:
 871       /* Non-empty files should end in a newline.  Ignore for command
 872          line and _Pragma buffers.  */
 873       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 874         cpp_pedwarn (pfile, "no newline at end of file");
 875       pfile->state.skip_newlines = 1;
 876       result->type = CPP_EOF;
 877       break;
 878
 879     case ' ': case '\t': case '\f': case '\v': case '\0':
 880       skip_whitespace (pfile, c);
 881       result->flags |= PREV_WHITE;
 882       goto next_char2;
 883
 884     case '\n': case '\r':
 885       /* Don't let directives spill over to the next line.  */
 886       if (pfile->state.in_directive)
 887         buffer->read_ahead = c;
 888       else
 889         {
 890           handle_newline (buffer, c);
 891
 892           pfile->lexer_pos.output_line = buffer->lineno;
 893
 894           /* Skip newlines in macro arguments (except in directives).  */
 895           if (pfile->state.parsing_args)
 896             {
 897               /* Set the whitespace flag.   */
 898               newline_in_args = 1;
 899               result->flags |= PREV_WHITE;
 900               goto next_char;
 901             }
 902
 903           if (was_skip_newlines)
 904             {
 905               /* Clear any whitespace flag.   */
 906               result->flags &= ~PREV_WHITE;
 907               goto next_char;
 908             }
 909         }
 910
 911       /* Next we're at BOL, so skip new lines.  */
 912       pfile->state.skip_newlines = 1;
 913       result->type = CPP_EOF;
 914       break;
 915
 916     case '?':
 917     case '\\':
 918       /* These could start an escaped newline, or '?' a trigraph.  Let
 919          skip_escaped_newlines do all the work.  */
 920       {
 921         unsigned int lineno = buffer->lineno;
 922
 923         c = skip_escaped_newlines (buffer, c);
 924         if (lineno != buffer->lineno)
 925           /* We had at least one escaped newline of some sort, and the
 926              next character is in buffer->read_ahead.  Update the
 927              token's line and column.  */
 928             goto next_char;
 929
 930         /* We are either the original '?' or '\\', or a trigraph.  */
 931         result->type = CPP_QUERY;
 932         buffer->read_ahead = EOF;
 933         if (c == '\\')
 934           goto random_char;
 935         else if (c != '?')
 936           goto do_switch;
 937       }
 938       break;
 939
 940     case '0': case '1': case '2': case '3': case '4':
 941     case '5': case '6': case '7': case '8': case '9':
 942       result->type = CPP_NUMBER;
 943       parse_number (pfile, &result->val.str, c, 0);
 944       break;
 945
 946     case '$':
 947       if (!CPP_OPTION (pfile, dollars_in_ident))
 948         goto random_char;
 949       /* Fall through... */
 950
 951     case '_':
 952     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 953     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 954     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 955     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 956     case 'y': case 'z':
 957     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 958     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 959     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 960     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 961     case 'Y': case 'Z':
 962       result->type = CPP_NAME;
 963       result->val.node = parse_identifier (pfile, c);
 964
 965       /* 'L' may introduce wide characters or strings.  */
 966       if (result->val.node == pfile->spec_nodes.n_L)
 967         {
 968           c = buffer->read_ahead; /* For make_string.  */
 969           if (c == '\'' || c == '"')
 970             {
 971               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 972               goto make_string;
 973             }
 974         }
 975       /* Convert named operators to their proper types.  */
 976       else if (result->val.node->flags & NODE_OPERATOR)
 977         {
 978           result->flags |= NAMED_OP;
 979           result->type = result->val.node->value.operator;
 980         }
 981       break;
 982
 983     case '\'':
 984     case '"':
 985       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 986     make_string:
 987       parse_string (pfile, result, c);
 988       break;
 989
 990     case '/':
 991       /* A potential block or line comment.  */
 992       comment_start = buffer->cur;
 993       result->type = CPP_DIV;
 994       c = get_effective_char (buffer);
 995       if (c == '=')
 996         ACCEPT_CHAR (CPP_DIV_EQ);
 997       if (c != '/' && c != '*')
 998         break;
 999
1000       if (c == '*')
1001         {
1002           if (skip_block_comment (pfile))
1003             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1004                                  pfile->lexer_pos.col,
1005                                  "unterminated comment");
1006         }
1007       else
1008         {
1009           if (!CPP_OPTION (pfile, cplusplus_comments)
1010               && !CPP_IN_SYSTEM_HEADER (pfile))
1011             break;
1012
1013           /* We silently allow C++ comments in system headers,
1014              irrespective of conformance mode, because lots of
1015              broken systems do that and trying to clean it up in
1016              fixincludes is a nightmare.  */
1017           if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1018               && ! buffer->warned_cplusplus_comments)
1019             {
1020               cpp_pedwarn (pfile,
1021                            "C++ style comments are not allowed in ISO C89");
1022               cpp_pedwarn (pfile,
1023                            "(this will be reported only once per input file)");
1024               buffer->warned_cplusplus_comments = 1;
1025             }
1026
1027           /* Skip_line_comment updates buffer->read_ahead.  */
1028           if (skip_line_comment (pfile))
1029             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1030                                    pfile->lexer_pos.col,
1031                                    "multi-line comment");
1032         }
1033
1034       /* Skipping the comment has updated buffer->read_ahead.  */
1035       if (!pfile->state.save_comments)
1036         {
1037           result->flags |= PREV_WHITE;
1038           goto next_char;
1039         }
1040
1041       /* Save the comment as a token in its own right.  */
1042       save_comment (pfile, result, comment_start);
1043       break;
1044
1045     case '<':
1046       if (pfile->state.angled_headers)
1047         {
1048           result->type = CPP_HEADER_NAME;
1049           c = '>';              /* terminator.  */
1050           goto make_string;
1051         }
1052
1053       result->type = CPP_LESS;
1054       c = get_effective_char (buffer);
1055       if (c == '=')
1056         ACCEPT_CHAR (CPP_LESS_EQ);
1057       else if (c == '<')
1058         {
1059           ACCEPT_CHAR (CPP_LSHIFT);
1060           if (get_effective_char (buffer) == '=')
1061             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1062         }
1063       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1064         {
1065           ACCEPT_CHAR (CPP_MIN);
1066           if (get_effective_char (buffer) == '=')
1067             ACCEPT_CHAR (CPP_MIN_EQ);
1068         }
1069       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1070         {
1071           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1072           result->flags |= DIGRAPH;
1073         }
1074       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1075         {
1076           ACCEPT_CHAR (CPP_OPEN_BRACE);
1077           result->flags |= DIGRAPH;
1078         }
1079       break;
1080
1081     case '>':
1082       result->type = CPP_GREATER;
1083       c = get_effective_char (buffer);
1084       if (c == '=')
1085         ACCEPT_CHAR (CPP_GREATER_EQ);
1086       else if (c == '>')
1087         {
1088           ACCEPT_CHAR (CPP_RSHIFT);
1089           if (get_effective_char (buffer) == '=')
1090             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1091         }
1092       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1093         {
1094           ACCEPT_CHAR (CPP_MAX);
1095           if (get_effective_char (buffer) == '=')
1096             ACCEPT_CHAR (CPP_MAX_EQ);
1097         }
1098       break;
1099
1100     case '%':
1101       lex_percent (buffer, result);
1102       if (result->type == CPP_HASH)
1103         goto do_hash;
1104       break;
1105
1106     case '.':
1107       lex_dot (pfile, result);
1108       break;
1109
1110     case '+':
1111       result->type = CPP_PLUS;
1112       c = get_effective_char (buffer);
1113       if (c == '=')
1114         ACCEPT_CHAR (CPP_PLUS_EQ);
1115       else if (c == '+')
1116         ACCEPT_CHAR (CPP_PLUS_PLUS);
1117       break;
1118
1119     case '-':
1120       result->type = CPP_MINUS;
1121       c = get_effective_char (buffer);
1122       if (c == '>')
1123         {
1124           ACCEPT_CHAR (CPP_DEREF);
1125           if (CPP_OPTION (pfile, cplusplus)
1126               && get_effective_char (buffer) == '*')
1127             ACCEPT_CHAR (CPP_DEREF_STAR);
1128         }
1129       else if (c == '=')
1130         ACCEPT_CHAR (CPP_MINUS_EQ);
1131       else if (c == '-')
1132         ACCEPT_CHAR (CPP_MINUS_MINUS);
1133       break;
1134
1135     case '*':
1136       result->type = CPP_MULT;
1137       if (get_effective_char (buffer) == '=')
1138         ACCEPT_CHAR (CPP_MULT_EQ);
1139       break;
1140
1141     case '=':
1142       result->type = CPP_EQ;
1143       if (get_effective_char (buffer) == '=')
1144         ACCEPT_CHAR (CPP_EQ_EQ);
1145       break;
1146
1147     case '!':
1148       result->type = CPP_NOT;
1149       if (get_effective_char (buffer) == '=')
1150         ACCEPT_CHAR (CPP_NOT_EQ);
1151       break;
1152
1153     case '&':
1154       result->type = CPP_AND;
1155       c = get_effective_char (buffer);
1156       if (c == '=')
1157         ACCEPT_CHAR (CPP_AND_EQ);
1158       else if (c == '&')
1159         ACCEPT_CHAR (CPP_AND_AND);
1160       break;
1161
1162     case '#':
1163       if (get_effective_char (buffer) == '#')
1164         ACCEPT_CHAR (CPP_PASTE);
1165       else
1166         {
1167           result->type = CPP_HASH;
1168         do_hash:
1169           /* CPP_DHASH is the hash introducing a directive.  */
1170           if (was_skip_newlines || newline_in_args)
1171             {
1172               result->type = CPP_DHASH;
1173               /* Get whitespace right - newline_in_args sets it.  */
1174               if (pfile->lexer_pos.col == 1)
1175                 result->flags &= ~PREV_WHITE;
1176             }
1177         }
1178       break;
1179
1180     case '|':
1181       result->type = CPP_OR;
1182       c = get_effective_char (buffer);
1183       if (c == '=')
1184         ACCEPT_CHAR (CPP_OR_EQ);
1185       else if (c == '|')
1186         ACCEPT_CHAR (CPP_OR_OR);
1187       break;
1188
1189     case '^':
1190       result->type = CPP_XOR;
1191       if (get_effective_char (buffer) == '=')
1192         ACCEPT_CHAR (CPP_XOR_EQ);
1193       break;
1194
1195     case ':':
1196       result->type = CPP_COLON;
1197       c = get_effective_char (buffer);
1198       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1199         ACCEPT_CHAR (CPP_SCOPE);
1200       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1201         {
1202           result->flags |= DIGRAPH;
1203           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1204         }
1205       break;
1206
1207     case '~': result->type = CPP_COMPL; break;
1208     case ',': result->type = CPP_COMMA; break;
1209     case '(': result->type = CPP_OPEN_PAREN; break;
1210     case ')': result->type = CPP_CLOSE_PAREN; break;
1211     case '[': result->type = CPP_OPEN_SQUARE; break;
1212     case ']': result->type = CPP_CLOSE_SQUARE; break;
1213     case '{': result->type = CPP_OPEN_BRACE; break;
1214     case '}': result->type = CPP_CLOSE_BRACE; break;
1215     case ';': result->type = CPP_SEMICOLON; break;
1216
1217     case '@':
1218       if (CPP_OPTION (pfile, objc))
1219         {
1220           /* In Objective C, '@' may begin keywords or strings, like
1221              @keyword or @"string".  It would be nice to call
1222              get_effective_char here and test the result.  However, we
1223              would then need to pass 2 characters to parse_identifier,
1224              making it ugly and slowing down its main loop.  Instead,
1225              we assume we have an identifier, and recover if not.  */
1226           result->type = CPP_NAME;
1227           result->val.node = parse_identifier (pfile, c);
1228           if (result->val.node->length != 1)
1229             break;
1230
1231           /* OK, so it wasn't an identifier.  Maybe a string?  */
1232           if (buffer->read_ahead == '"')
1233             {
1234               c = '"';
1235               ACCEPT_CHAR (CPP_OSTRING);
1236               goto make_string;
1237             }
1238         }
1239       goto random_char;
1240
1241     random_char:
1242     default:
1243       result->type = CPP_OTHER;
1244       result->val.aux = c;
1245       break;
1246     }
1247 }
1248
1249 /* An upper bound on the number of bytes needed to spell a token,
1250    including preceding whitespace.  */
1251 unsigned int
1252 cpp_token_len (token)
1253      const cpp_token *token;
1254 {
1255   unsigned int len;
1256
1257   switch (TOKEN_SPELL (token))
1258     {
1259     default:            len = 0;                        break;
1260     case SPELL_STRING:  len = token->val.str.len;       break;
1261     case SPELL_IDENT:   len = token->val.node->length;  break;
1262     }
1263   /* 1 for whitespace, 4 for comment delimeters.  */
1264   return len + 5;
1265 }
1266
1267 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1268    already contain the enough space to hold the token's spelling.
1269    Returns a pointer to the character after the last character
1270    written.  */
1271 unsigned char *
1272 cpp_spell_token (pfile, token, buffer)
1273      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1274      const cpp_token *token;
1275      unsigned char *buffer;
1276 {
1277   switch (TOKEN_SPELL (token))
1278     {
1279     case SPELL_OPERATOR:
1280       {
1281         const unsigned char *spelling;
1282         unsigned char c;
1283
1284         if (token->flags & DIGRAPH)
1285           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1286         else if (token->flags & NAMED_OP)
1287           goto spell_ident;
1288         else
1289           spelling = TOKEN_NAME (token);
1290
1291         while ((c = *spelling++) != '\0')
1292           *buffer++ = c;
1293       }
1294       break;
1295
1296     case SPELL_IDENT:
1297       spell_ident:
1298       memcpy (buffer, token->val.node->name, token->val.node->length);
1299       buffer += token->val.node->length;
1300       break;
1301
1302     case SPELL_STRING:
1303       {
1304         int left, right, tag;
1305         switch (token->type)
1306           {
1307           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1308           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1309           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1310           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1311           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1312           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1313           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1314           }
1315         if (tag) *buffer++ = tag;
1316         if (left) *buffer++ = left;
1317         memcpy (buffer, token->val.str.text, token->val.str.len);
1318         buffer += token->val.str.len;
1319         if (right) *buffer++ = right;
1320       }
1321       break;
1322
1323     case SPELL_CHAR:
1324       *buffer++ = token->val.aux;
1325       break;
1326
1327     case SPELL_NONE:
1328       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1329       break;
1330     }
1331
1332   return buffer;
1333 }
1334
1335 /* Returns a token as a null-terminated string.  The string is
1336    temporary, and automatically freed later.  Useful for diagnostics.  */
1337 unsigned char *
1338 cpp_token_as_text (pfile, token)
1339      cpp_reader *pfile;
1340      const cpp_token *token;
1341 {
1342   unsigned int len = cpp_token_len (token);
1343   unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
1344
1345   end = cpp_spell_token (pfile, token, start);
1346   end[0] = '\0';
1347
1348   return start;
1349 }
1350
1351 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1352 const char *
1353 cpp_type2name (type)
1354      enum cpp_ttype type;
1355 {
1356   return (const char *) token_spellings[type].name;
1357 }
1358
1359 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1360    for efficiency - to avoid double-buffering.  Also, outputs a space
1361    if PREV_WHITE is flagged.  */
1362 void
1363 cpp_output_token (token, fp)
1364      const cpp_token *token;
1365      FILE *fp;
1366 {
1367   if (token->flags & PREV_WHITE)
1368     putc (' ', fp);
1369
1370   switch (TOKEN_SPELL (token))
1371     {
1372     case SPELL_OPERATOR:
1373       {
1374         const unsigned char *spelling;
1375
1376         if (token->flags & DIGRAPH)
1377           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1378         else if (token->flags & NAMED_OP)
1379           goto spell_ident;
1380         else
1381           spelling = TOKEN_NAME (token);
1382
1383         ufputs (spelling, fp);
1384       }
1385       break;
1386
1387     spell_ident:
1388     case SPELL_IDENT:
1389       ufputs (token->val.node->name, fp);
1390     break;
1391
1392     case SPELL_STRING:
1393       {
1394         int left, right, tag;
1395         switch (token->type)
1396           {
1397           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1398           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1399           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1400           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1401           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1402           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1403           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1404           }
1405         if (tag) putc (tag, fp);
1406         if (left) putc (left, fp);
1407         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1408         if (right) putc (right, fp);
1409       }
1410       break;
1411
1412     case SPELL_CHAR:
1413       putc (token->val.aux, fp);
1414       break;
1415
1416     case SPELL_NONE:
1417       /* An error, most probably.  */
1418       break;
1419     }
1420 }
1421
1422 /* Compare two tokens.  */
1423 int
1424 _cpp_equiv_tokens (a, b)
1425      const cpp_token *a, *b;
1426 {
1427   if (a->type == b->type && a->flags == b->flags)
1428     switch (TOKEN_SPELL (a))
1429       {
1430       default:                  /* Keep compiler happy.  */
1431       case SPELL_OPERATOR:
1432         return 1;
1433       case SPELL_CHAR:
1434         return a->val.aux == b->val.aux; /* Character.  */
1435       case SPELL_NONE:
1436         return (a->type != CPP_MACRO_ARG || a->val.aux == b->val.aux);
1437       case SPELL_IDENT:
1438         return a->val.node == b->val.node;
1439       case SPELL_STRING:
1440         return (a->val.str.len == b->val.str.len
1441                 && !memcmp (a->val.str.text, b->val.str.text,
1442                             a->val.str.len));
1443       }
1444
1445   return 0;
1446 }
1447
1448 #if 0
1449 /* Compare two token lists.  */
1450 int
1451 _cpp_equiv_toklists (a, b)
1452      const struct toklist *a, *b;
1453 {
1454   unsigned int i, count;
1455
1456   count = a->limit - a->first;
1457   if (count != (b->limit - b->first))
1458     return 0;
1459
1460   for (i = 0; i < count; i++)
1461     if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1462       return 0;
1463
1464   return 1;
1465 }
1466 #endif
1467
1468 /* Determine whether two tokens can be pasted together, and if so,
1469    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1470    be pasted, or the appropriate type for the merged token if they
1471    can.  */
1472 enum cpp_ttype
1473 cpp_can_paste (pfile, token1, token2, digraph)
1474      cpp_reader * pfile;
1475      const cpp_token *token1, *token2;
1476      int* digraph;
1477 {
1478   enum cpp_ttype a = token1->type, b = token2->type;
1479   int cxx = CPP_OPTION (pfile, cplusplus);
1480
1481   /* Treat named operators as if they were ordinary NAMEs.  */
1482   if (token1->flags & NAMED_OP)
1483     a = CPP_NAME;
1484   if (token2->flags & NAMED_OP)
1485     b = CPP_NAME;
1486
1487   if (a <= CPP_LAST_EQ && b == CPP_EQ)
1488     return a + (CPP_EQ_EQ - CPP_EQ);
1489
1490   switch (a)
1491     {
1492     case CPP_GREATER:
1493       if (b == a) return CPP_RSHIFT;
1494       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1495       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1496       break;
1497     case CPP_LESS:
1498       if (b == a) return CPP_LSHIFT;
1499       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1500       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1501       if (CPP_OPTION (pfile, digraphs))
1502         {
1503           if (b == CPP_COLON)
1504             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1505           if (b == CPP_MOD)
1506             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1507         }
1508       break;
1509
1510     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1511     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1512     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1513
1514     case CPP_MINUS:
1515       if (b == a)               return CPP_MINUS_MINUS;
1516       if (b == CPP_GREATER)     return CPP_DEREF;
1517       break;
1518     case CPP_COLON:
1519       if (b == a && cxx)        return CPP_SCOPE;
1520       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1521         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1522       break;
1523
1524     case CPP_MOD:
1525       if (CPP_OPTION (pfile, digraphs))
1526         {
1527           if (b == CPP_GREATER)
1528             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1529           if (b == CPP_COLON)
1530             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1531         }
1532       break;
1533     case CPP_DEREF:
1534       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1535       break;
1536     case CPP_DOT:
1537       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1538       if (b == CPP_NUMBER)      return CPP_NUMBER;
1539       break;
1540
1541     case CPP_HASH:
1542       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1543         /* %:%: digraph */
1544         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1545       break;
1546
1547     case CPP_NAME:
1548       if (b == CPP_NAME)        return CPP_NAME;
1549       if (b == CPP_NUMBER
1550           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1551       if (b == CPP_CHAR
1552           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1553       if (b == CPP_STRING
1554           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1555       break;
1556
1557     case CPP_NUMBER:
1558       if (b == CPP_NUMBER)      return CPP_NUMBER;
1559       if (b == CPP_NAME)        return CPP_NUMBER;
1560       if (b == CPP_DOT)         return CPP_NUMBER;
1561       /* Numbers cannot have length zero, so this is safe.  */
1562       if ((b == CPP_PLUS || b == CPP_MINUS)
1563           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1564         return CPP_NUMBER;
1565       break;
1566
1567     case CPP_OTHER:
1568       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
1569         {
1570           if (b == CPP_NAME)    return CPP_NAME;
1571           if (b == CPP_STRING)  return CPP_OSTRING;
1572         }
1573
1574     default:
1575       break;
1576     }
1577
1578   return CPP_EOF;
1579 }
1580
1581 /* Returns nonzero if a space should be inserted to avoid an
1582    accidental token paste for output.  For simplicity, it is
1583    conservative, and occasionally advises a space where one is not
1584    needed, e.g. "." and ".2".  */
1585
1586 int
1587 cpp_avoid_paste (pfile, token1, token2)
1588      cpp_reader *pfile;
1589      const cpp_token *token1, *token2;
1590 {
1591   enum cpp_ttype a = token1->type, b = token2->type;
1592   cppchar_t c;
1593
1594   if (token1->flags & NAMED_OP)
1595     a = CPP_NAME;
1596   if (token2->flags & NAMED_OP)
1597     b = CPP_NAME;
1598
1599   c = EOF;
1600   if (token2->flags & DIGRAPH)
1601     c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1602   else if (token_spellings[b].category == SPELL_OPERATOR)
1603     c = token_spellings[b].name[0];
1604
1605   /* Quickly get everything that can paste with an '='.  */
1606   if (a <= CPP_LAST_EQ && c == '=')
1607     return 1;
1608
1609   switch (a)
1610     {
1611     case CPP_GREATER:   return c == '>' || c == '?';
1612     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1613     case CPP_PLUS:      return c == '+';
1614     case CPP_MINUS:     return c == '-' || c == '>';
1615     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1616     case CPP_MOD:       return c == ':' || c == '>';
1617     case CPP_AND:       return c == '&';
1618     case CPP_OR:        return c == '|';
1619     case CPP_COLON:     return c == ':' || c == '>';
1620     case CPP_DEREF:     return c == '*';
1621     case CPP_DOT:       return c == '.' || c == '%';
1622     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1623     case CPP_NAME:      return ((b == CPP_NUMBER
1624                                  && name_p (pfile, &token2->val.str))
1625                                 || b == CPP_NAME
1626                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1627     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1628                                 || c == '.' || c == '+' || c == '-');
1629     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1630                                 && token1->val.aux == '@'
1631                                 && (b == CPP_NAME || b == CPP_STRING));
1632     default:            break;
1633     }
1634
1635   return 0;
1636 }
1637
1638 /* Output all the remaining tokens on the current line, and a newline
1639    character, to FP.  Leading whitespace is removed.  */
1640 void
1641 cpp_output_line (pfile, fp)
1642      cpp_reader *pfile;
1643      FILE *fp;
1644 {
1645   cpp_token token;
1646
1647   _cpp_get_token (pfile, &token);
1648   token.flags &= ~PREV_WHITE;
1649   while (token.type != CPP_EOF)
1650     {
1651       cpp_output_token (&token, fp);
1652       _cpp_get_token (pfile, &token);
1653     }
1654
1655   putc ('\n', fp);
1656 }
1657
1658 /* Memory pools.  */
1659
1660 struct dummy
1661 {
1662   char c;
1663   union
1664   {
1665     double d;
1666     int *p;
1667   } u;
1668 };
1669
1670 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1671
1672 static int
1673 chunk_suitable (pool, chunk, size)
1674      cpp_pool *pool;
1675      cpp_chunk *chunk;
1676      unsigned int size;
1677 {
1678   /* Being at least twice SIZE means we can use memcpy in
1679      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1680      anyway.  */
1681   return (chunk && pool->locked != chunk
1682           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1683 }
1684
1685 /* Returns the end of the new pool.  PTR points to a char in the old
1686    pool, and is updated to point to the same char in the new pool.  */
1687 unsigned char *
1688 _cpp_next_chunk (pool, len, ptr)
1689      cpp_pool *pool;
1690      unsigned int len;
1691      unsigned char **ptr;
1692 {
1693   cpp_chunk *chunk = pool->cur->next;
1694
1695   /* LEN is the minimum size we want in the new pool.  */
1696   len += POOL_ROOM (pool);
1697   if (! chunk_suitable (pool, chunk, len))
1698     {
1699       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1700
1701       chunk->next = pool->cur->next;
1702       pool->cur->next = chunk;
1703     }
1704
1705   /* Update the pointer before changing chunk's front.  */
1706   if (ptr)
1707     *ptr += chunk->base - POOL_FRONT (pool);
1708
1709   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1710   chunk->front = chunk->base;
1711
1712   pool->cur = chunk;
1713   return POOL_LIMIT (pool);
1714 }
1715
1716 static cpp_chunk *
1717 new_chunk (size)
1718      unsigned int size;
1719 {
1720   unsigned char *base;
1721   cpp_chunk *result;
1722
1723   size = ALIGN (size, DEFAULT_ALIGNMENT);
1724   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1725   /* Put the chunk descriptor at the end.  Then chunk overruns will
1726      cause obvious chaos.  */
1727   result = (cpp_chunk *) (base + size);
1728   result->base = base;
1729   result->front = base;
1730   result->limit = base + size;
1731   result->next = 0;
1732
1733   return result;
1734 }
1735
1736 void
1737 _cpp_init_pool (pool, size, align, temp)
1738      cpp_pool *pool;
1739      unsigned int size, align, temp;
1740 {
1741   if (align == 0)
1742     align = DEFAULT_ALIGNMENT;
1743   if (align & (align - 1))
1744     abort ();
1745   pool->align = align;
1746   pool->cur = new_chunk (size);
1747   pool->locked = 0;
1748   pool->locks = 0;
1749   if (temp)
1750     pool->cur->next = pool->cur;
1751 }
1752
1753 void
1754 _cpp_lock_pool (pool)
1755      cpp_pool *pool;
1756 {
1757   if (pool->locks++ == 0)
1758     pool->locked = pool->cur;
1759 }
1760
1761 void
1762 _cpp_unlock_pool (pool)
1763      cpp_pool *pool;
1764 {
1765   if (--pool->locks == 0)
1766     pool->locked = 0;
1767 }
1768
1769 void
1770 _cpp_free_pool (pool)
1771      cpp_pool *pool;
1772 {
1773   cpp_chunk *chunk = pool->cur, *next;
1774
1775   do
1776     {
1777       next = chunk->next;
1778       free (chunk->base);
1779       chunk = next;
1780     }
1781   while (chunk && chunk != pool->cur);
1782 }
1783
1784 /* Reserve LEN bytes from a memory pool.  */
1785 unsigned char *
1786 _cpp_pool_reserve (pool, len)
1787      cpp_pool *pool;
1788      unsigned int len;
1789 {
1790   len = ALIGN (len, pool->align);
1791   if (len > (unsigned int) POOL_ROOM (pool))
1792     _cpp_next_chunk (pool, len, 0);
1793
1794   return POOL_FRONT (pool);
1795 }
1796
1797 /* Allocate LEN bytes from a memory pool.  */
1798 unsigned char *
1799 _cpp_pool_alloc (pool, len)
1800      cpp_pool *pool;
1801      unsigned int len;
1802 {
1803   unsigned char *result = _cpp_pool_reserve (pool, len);
1804
1805   POOL_COMMIT (pool, len);
1806   return result;
1807 }