gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* Tokens with SPELL_STRING store their spelling in the token list,
  43    and it's length in the token->val.name.len.  */
  44 enum spell_type
  45 {
  46   SPELL_OPERATOR = 0,
  47   SPELL_CHAR,
  48   SPELL_IDENT,
  49   SPELL_STRING,
  50   SPELL_NONE
  51 };
  52
  53 struct token_spelling
  54 {
  55   enum spell_type category;
  56   const unsigned char *name;
  57 };
  58
  59 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  60                                              U":>", U"<%", U"%>"};
  61
  62 #define OP(e, s) { SPELL_OPERATOR, U s           },
  63 #define TK(e, s) { s,              U STRINGX (e) },
  64 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  65 #undef OP
  66 #undef TK
  67
  68 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  69 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  70
  71 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  73 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  74
  75 static int skip_block_comment PARAMS ((cpp_reader *));
  76 static int skip_line_comment PARAMS ((cpp_reader *));
  77 static void adjust_column PARAMS ((cpp_reader *));
  78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  80 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  81 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  82 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  83 static void unterminated PARAMS ((cpp_reader *, int));
  84 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  85 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  86 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  87 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
  88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  89
  90 static cpp_chunk *new_chunk PARAMS ((unsigned int));
  91 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
  92
  93 /* Utility routine:
  94
  95    Compares, the token TOKEN to the NUL-terminated string STRING.
  96    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  97
  98 int
  99 cpp_ideq (token, string)
 100      const cpp_token *token;
 101      const char *string;
 102 {
 103   if (token->type != CPP_NAME)
 104     return 0;
 105
 106   return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
 107 }
 108
 109 /* Call when meeting a newline.  Returns the character after the newline
 110    (or carriage-return newline combination), or EOF.  */
 111 static cppchar_t
 112 handle_newline (buffer, newline_char)
 113      cpp_buffer *buffer;
 114      cppchar_t newline_char;
 115 {
 116   cppchar_t next = EOF;
 117
 118   buffer->col_adjust = 0;
 119   buffer->lineno++;
 120   buffer->line_base = buffer->cur;
 121
 122   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 123   if (buffer->cur < buffer->rlimit)
 124     {
 125       next = *buffer->cur++;
 126       if (next + newline_char == '\r' + '\n')
 127         {
 128           buffer->line_base = buffer->cur;
 129           if (buffer->cur < buffer->rlimit)
 130             next = *buffer->cur++;
 131           else
 132             next = EOF;
 133         }
 134     }
 135
 136   buffer->read_ahead = next;
 137   return next;
 138 }
 139
 140 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 141    encountered.  It warns if necessary, and returns true if the
 142    trigraph should be honoured.  FROM_CHAR is the third character of a
 143    trigraph, and presumed to be the previous character for position
 144    reporting.  */
 145 static int
 146 trigraph_ok (pfile, from_char)
 147      cpp_reader *pfile;
 148      cppchar_t from_char;
 149 {
 150   int accept = CPP_OPTION (pfile, trigraphs);
 151
 152   /* Don't warn about trigraphs in comments.  */
 153   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 154     {
 155       cpp_buffer *buffer = pfile->buffer;
 156       if (accept)
 157         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 158                                "trigraph ??%c converted to %c",
 159                                (int) from_char,
 160                                (int) _cpp_trigraph_map[from_char]);
 161       else if (buffer->cur != buffer->last_Wtrigraphs)
 162         {
 163           buffer->last_Wtrigraphs = buffer->cur;
 164           cpp_warning_with_line (pfile, buffer->lineno,
 165                                  CPP_BUF_COL (buffer) - 2,
 166                                  "trigraph ??%c ignored", (int) from_char);
 167         }
 168     }
 169
 170   return accept;
 171 }
 172
 173 /* Assumes local variables buffer and result.  */
 174 #define ACCEPT_CHAR(t) \
 175   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 176
 177 /* When we move to multibyte character sets, add to these something
 178    that saves and restores the state of the multibyte conversion
 179    library.  This probably involves saving and restoring a "cookie".
 180    In the case of glibc it is an 8-byte structure, so is not a high
 181    overhead operation.  In any case, it's out of the fast path.  */
 182 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 183 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 184
 185 /* Skips any escaped newlines introduced by NEXT, which is either a
 186    '?' or a '\\'.  Returns the next character, which will also have
 187    been placed in buffer->read_ahead.  This routine performs
 188    preprocessing stages 1 and 2 of the ISO C standard.  */
 189 static cppchar_t
 190 skip_escaped_newlines (buffer, next)
 191      cpp_buffer *buffer;
 192      cppchar_t next;
 193 {
 194   /* Only do this if we apply stages 1 and 2.  */
 195   if (!buffer->from_stage3)
 196     {
 197       cppchar_t next1;
 198       const unsigned char *saved_cur;
 199       int space;
 200
 201       do
 202         {
 203           if (buffer->cur == buffer->rlimit)
 204             break;
 205
 206           SAVE_STATE ();
 207           if (next == '?')
 208             {
 209               next1 = *buffer->cur++;
 210               if (next1 != '?' || buffer->cur == buffer->rlimit)
 211                 {
 212                   RESTORE_STATE ();
 213                   break;
 214                 }
 215
 216               next1 = *buffer->cur++;
 217               if (!_cpp_trigraph_map[next1]
 218                   || !trigraph_ok (buffer->pfile, next1))
 219                 {
 220                   RESTORE_STATE ();
 221                   break;
 222                 }
 223
 224               /* We have a full trigraph here.  */
 225               next = _cpp_trigraph_map[next1];
 226               if (next != '\\' || buffer->cur == buffer->rlimit)
 227                 break;
 228               SAVE_STATE ();
 229             }
 230
 231           /* We have a backslash, and room for at least one more character.  */
 232           space = 0;
 233           do
 234             {
 235               next1 = *buffer->cur++;
 236               if (!is_nvspace (next1))
 237                 break;
 238               space = 1;
 239             }
 240           while (buffer->cur < buffer->rlimit);
 241
 242           if (!is_vspace (next1))
 243             {
 244               RESTORE_STATE ();
 245               break;
 246             }
 247
 248           if (space && !buffer->pfile->state.lexing_comment)
 249             cpp_warning (buffer->pfile,
 250                          "backslash and newline separated by space");
 251
 252           next = handle_newline (buffer, next1);
 253           if (next == EOF)
 254             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 255         }
 256       while (next == '\\' || next == '?');
 257     }
 258
 259   buffer->read_ahead = next;
 260   return next;
 261 }
 262
 263 /* Obtain the next character, after trigraph conversion and skipping
 264    an arbitrary string of escaped newlines.  The common case of no
 265    trigraphs or escaped newlines falls through quickly.  */
 266 static cppchar_t
 267 get_effective_char (buffer)
 268      cpp_buffer *buffer;
 269 {
 270   cppchar_t next = EOF;
 271
 272   if (buffer->cur < buffer->rlimit)
 273     {
 274       next = *buffer->cur++;
 275
 276       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 277          can introduce escaped newlines, which we want to skip, or
 278          UCNs, which, depending upon lexer state, we will handle in
 279          the future.  */
 280       if (next == '?' || next == '\\')
 281         next = skip_escaped_newlines (buffer, next);
 282     }
 283
 284   buffer->read_ahead = next;
 285   return next;
 286 }
 287
 288 /* Skip a C-style block comment.  We find the end of the comment by
 289    seeing if an asterisk is before every '/' we encounter.  Returns
 290    non-zero if comment terminated by EOF, zero otherwise.  */
 291 static int
 292 skip_block_comment (pfile)
 293      cpp_reader *pfile;
 294 {
 295   cpp_buffer *buffer = pfile->buffer;
 296   cppchar_t c = EOF, prevc = EOF;
 297
 298   pfile->state.lexing_comment = 1;
 299   while (buffer->cur != buffer->rlimit)
 300     {
 301       prevc = c, c = *buffer->cur++;
 302
 303     next_char:
 304       /* FIXME: For speed, create a new character class of characters
 305          of interest inside block comments.  */
 306       if (c == '?' || c == '\\')
 307         c = skip_escaped_newlines (buffer, c);
 308
 309       /* People like decorating comments with '*', so check for '/'
 310          instead for efficiency.  */
 311       if (c == '/')
 312         {
 313           if (prevc == '*')
 314             break;
 315
 316           /* Warn about potential nested comments, but not if the '/'
 317              comes immediately before the true comment delimeter.
 318              Don't bother to get it right across escaped newlines.  */
 319           if (CPP_OPTION (pfile, warn_comments)
 320               && buffer->cur != buffer->rlimit)
 321             {
 322               prevc = c, c = *buffer->cur++;
 323               if (c == '*' && buffer->cur != buffer->rlimit)
 324                 {
 325                   prevc = c, c = *buffer->cur++;
 326                   if (c != '/')
 327                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 328                                            CPP_BUF_COL (buffer),
 329                                            "\"/*\" within comment");
 330                 }
 331               goto next_char;
 332             }
 333         }
 334       else if (is_vspace (c))
 335         {
 336           prevc = c, c = handle_newline (buffer, c);
 337           goto next_char;
 338         }
 339       else if (c == '\t')
 340         adjust_column (pfile);
 341     }
 342
 343   pfile->state.lexing_comment = 0;
 344   buffer->read_ahead = EOF;
 345   return c != '/' || prevc != '*';
 346 }
 347
 348 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 349    non-zero if a multiline comment.  The following new line, if any,
 350    is left in buffer->read_ahead.  */
 351 static int
 352 skip_line_comment (pfile)
 353      cpp_reader *pfile;
 354 {
 355   cpp_buffer *buffer = pfile->buffer;
 356   unsigned int orig_lineno = buffer->lineno;
 357   cppchar_t c;
 358
 359   pfile->state.lexing_comment = 1;
 360   do
 361     {
 362       c = EOF;
 363       if (buffer->cur == buffer->rlimit)
 364         break;
 365
 366       c = *buffer->cur++;
 367       if (c == '?' || c == '\\')
 368         c = skip_escaped_newlines (buffer, c);
 369     }
 370   while (!is_vspace (c));
 371
 372   pfile->state.lexing_comment = 0;
 373   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 374   return orig_lineno != buffer->lineno;
 375 }
 376
 377 /* pfile->buffer->cur is one beyond the \t character.  Update
 378    col_adjust so we track the column correctly.  */
 379 static void
 380 adjust_column (pfile)
 381      cpp_reader *pfile;
 382 {
 383   cpp_buffer *buffer = pfile->buffer;
 384   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 385
 386   /* Round it up to multiple of the tabstop, but subtract 1 since the
 387      tab itself occupies a character position.  */
 388   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 389                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 390 }
 391
 392 /* Skips whitespace, saving the next non-whitespace character.
 393    Adjusts pfile->col_adjust to account for tabs.  Without this,
 394    tokens might be assigned an incorrect column.  */
 395 static void
 396 skip_whitespace (pfile, c)
 397      cpp_reader *pfile;
 398      cppchar_t c;
 399 {
 400   cpp_buffer *buffer = pfile->buffer;
 401   unsigned int warned = 0;
 402
 403   do
 404     {
 405       /* Horizontal space always OK.  */
 406       if (c == ' ')
 407         ;
 408       else if (c == '\t')
 409         adjust_column (pfile);
 410       /* Just \f \v or \0 left.  */
 411       else if (c == '\0')
 412         {
 413           if (!warned)
 414             {
 415               cpp_warning (pfile, "null character(s) ignored");
 416               warned = 1;
 417             }
 418         }
 419       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 420         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 421                                CPP_BUF_COL (buffer),
 422                                "%s in preprocessing directive",
 423                                c == '\f' ? "form feed" : "vertical tab");
 424
 425       c = EOF;
 426       if (buffer->cur == buffer->rlimit)
 427         break;
 428       c = *buffer->cur++;
 429     }
 430   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 431   while (is_nvspace (c));
 432
 433   /* Remember the next character.  */
 434   buffer->read_ahead = c;
 435 }
 436
 437 /* See if the characters of a number token are valid in a name (no
 438    '.', '+' or '-').  */
 439 static int
 440 name_p (pfile, string)
 441      cpp_reader *pfile;
 442      const cpp_string *string;
 443 {
 444   unsigned int i;
 445
 446   for (i = 0; i < string->len; i++)
 447     if (!is_idchar (string->text[i]))
 448       return 0;
 449
 450   return 1;
 451 }
 452
 453 /* Parse an identifier, skipping embedded backslash-newlines.
 454    Calculate the hash value of the token while parsing, for improved
 455    performance.  The hashing algorithm *must* match cpp_lookup().  */
 456
 457 static cpp_hashnode *
 458 parse_identifier (pfile, c)
 459      cpp_reader *pfile;
 460      cppchar_t c;
 461 {
 462   cpp_hashnode *result;
 463   cpp_buffer *buffer = pfile->buffer;
 464   unsigned char *dest, *limit;
 465   unsigned int r = 0, saw_dollar = 0;
 466
 467   dest = POOL_FRONT (&pfile->ident_pool);
 468   limit = POOL_LIMIT (&pfile->ident_pool);
 469
 470   do
 471     {
 472       do
 473         {
 474           /* Need room for terminating null.  */
 475           if (dest + 1 >= limit)
 476             limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
 477
 478           *dest++ = c;
 479           r = HASHSTEP (r, c);
 480
 481           if (c == '$')
 482             saw_dollar++;
 483
 484           c = EOF;
 485           if (buffer->cur == buffer->rlimit)
 486             break;
 487
 488           c = *buffer->cur++;
 489         }
 490       while (is_idchar (c));
 491
 492       /* Potential escaped newline?  */
 493       if (c != '?' && c != '\\')
 494         break;
 495       c = skip_escaped_newlines (buffer, c);
 496     }
 497   while (is_idchar (c));
 498
 499   /* Remember the next character.  */
 500   buffer->read_ahead = c;
 501
 502   /* $ is not a identifier character in the standard, but is commonly
 503      accepted as an extension.  Don't warn about it in skipped
 504      conditional blocks.  */
 505   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 506     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 507
 508   /* Identifiers are null-terminated.  */
 509   *dest = '\0';
 510
 511   /* This routine commits the memory if necessary.  */
 512   result = _cpp_lookup_with_hash (pfile,
 513                                   dest - POOL_FRONT (&pfile->ident_pool), r);
 514
 515   /* Some identifiers require diagnostics when lexed.  */
 516   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 517     {
 518       /* It is allowed to poison the same identifier twice.  */
 519       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 520         cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 527     }
 528
 529   return result;
 530 }
 531
 532 /* Parse a number, skipping embedded backslash-newlines.  */
 533 static void
 534 parse_number (pfile, number, c, leading_period)
 535      cpp_reader *pfile;
 536      cpp_string *number;
 537      cppchar_t c;
 538      int leading_period;
 539 {
 540   cpp_buffer *buffer = pfile->buffer;
 541   cpp_pool *pool = &pfile->ident_pool;
 542   unsigned char *dest, *limit;
 543
 544   dest = POOL_FRONT (pool);
 545   limit = POOL_LIMIT (pool);
 546
 547   /* Place a leading period.  */
 548   if (leading_period)
 549     {
 550       if (dest >= limit)
 551         limit = _cpp_next_chunk (pool, 0, &dest);
 552       *dest++ = '.';
 553     }
 554
 555   do
 556     {
 557       do
 558         {
 559           /* Need room for terminating null.  */
 560           if (dest + 1 >= limit)
 561             limit = _cpp_next_chunk (pool, 0, &dest);
 562           *dest++ = c;
 563
 564           c = EOF;
 565           if (buffer->cur == buffer->rlimit)
 566             break;
 567
 568           c = *buffer->cur++;
 569         }
 570       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 571
 572       /* Potential escaped newline?  */
 573       if (c != '?' && c != '\\')
 574         break;
 575       c = skip_escaped_newlines (buffer, c);
 576     }
 577   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 578
 579   /* Remember the next character.  */
 580   buffer->read_ahead = c;
 581
 582   /* Null-terminate the number.  */
 583   *dest = '\0';
 584
 585   number->text = POOL_FRONT (pool);
 586   number->len = dest - number->text;
 587   POOL_COMMIT (pool, number->len + 1);
 588 }
 589
 590 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 591 static void
 592 unterminated (pfile, term)
 593      cpp_reader *pfile;
 594      int term;
 595 {
 596   cpp_error (pfile, "missing terminating %c character", term);
 597
 598   if (term == '\"' && pfile->mlstring_pos.line
 599       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 600     {
 601       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 602                            pfile->mlstring_pos.col,
 603                            "possible start of unterminated string literal");
 604       pfile->mlstring_pos.line = 0;
 605     }
 606 }
 607
 608 /* Subroutine of parse_string.  */
 609 static int
 610 unescaped_terminator_p (pfile, dest)
 611      cpp_reader *pfile;
 612      const unsigned char *dest;
 613 {
 614   const unsigned char *start, *temp;
 615
 616   /* In #include-style directives, terminators are not escapeable.  */
 617   if (pfile->state.angled_headers)
 618     return 1;
 619
 620   start = POOL_FRONT (&pfile->ident_pool);
 621
 622   /* An odd number of consecutive backslashes represents an escaped
 623      terminator.  */
 624   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 625     ;
 626
 627   return ((dest - temp) & 1) == 0;
 628 }
 629
 630 /* Parses a string, character constant, or angle-bracketed header file
 631    name.  Handles embedded trigraphs and escaped newlines.  The stored
 632    string is guaranteed NUL-terminated, but it is not guaranteed that
 633    this is the first NUL since embedded NULs are preserved.
 634
 635    Multi-line strings are allowed, but they are deprecated.  */
 636 static void
 637 parse_string (pfile, token, terminator)
 638      cpp_reader *pfile;
 639      cpp_token *token;
 640      cppchar_t terminator;
 641 {
 642   cpp_buffer *buffer = pfile->buffer;
 643   cpp_pool *pool = &pfile->ident_pool;
 644   unsigned char *dest, *limit;
 645   cppchar_t c;
 646   unsigned int nulls = 0;
 647
 648   dest = POOL_FRONT (pool);
 649   limit = POOL_LIMIT (pool);
 650
 651   for (;;)
 652     {
 653       if (buffer->cur == buffer->rlimit)
 654         c = EOF;
 655       else
 656         c = *buffer->cur++;
 657
 658     have_char:
 659       /* We need space for the terminating NUL.  */
 660       if (dest >= limit)
 661         limit = _cpp_next_chunk (pool, 0, &dest);
 662
 663       if (c == EOF)
 664         {
 665           unterminated (pfile, terminator);
 666           break;
 667         }
 668
 669       /* Handle trigraphs, escaped newlines etc.  */
 670       if (c == '?' || c == '\\')
 671         c = skip_escaped_newlines (buffer, c);
 672
 673       if (c == terminator && unescaped_terminator_p (pfile, dest))
 674         {
 675           c = EOF;
 676           break;
 677         }
 678       else if (is_vspace (c))
 679         {
 680           /* In assembly language, silently terminate string and
 681              character literals at end of line.  This is a kludge
 682              around not knowing where comments are.  */
 683           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 684             break;
 685
 686           /* Character constants and header names may not extend over
 687              multiple lines.  In Standard C, neither may strings.
 688              Unfortunately, we accept multiline strings as an
 689              extension, except in #include family directives.  */
 690           if (terminator != '"' || pfile->state.angled_headers)
 691             {
 692               unterminated (pfile, terminator);
 693               break;
 694             }
 695
 696           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 697           if (pfile->mlstring_pos.line == 0)
 698             pfile->mlstring_pos = pfile->lexer_pos;
 699
 700           c = handle_newline (buffer, c);
 701           *dest++ = '\n';
 702           goto have_char;
 703         }
 704       else if (c == '\0')
 705         {
 706           if (nulls++ == 0)
 707             cpp_warning (pfile, "null character(s) preserved in literal");
 708         }
 709
 710       *dest++ = c;
 711     }
 712
 713   /* Remember the next character.  */
 714   buffer->read_ahead = c;
 715   *dest = '\0';
 716
 717   token->val.str.text = POOL_FRONT (pool);
 718   token->val.str.len = dest - token->val.str.text;
 719   POOL_COMMIT (pool, token->val.str.len + 1);
 720 }
 721
 722 /* The stored comment includes the comment start and any terminator.  */
 723 static void
 724 save_comment (pfile, token, from)
 725      cpp_reader *pfile;
 726      cpp_token *token;
 727      const unsigned char *from;
 728 {
 729   unsigned char *buffer;
 730   unsigned int len;
 731
 732   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 733   /* C++ comments probably (not definitely) have moved past a new
 734      line, which we don't want to save in the comment.  */
 735   if (pfile->buffer->read_ahead != EOF)
 736     len--;
 737   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 738
 739   token->type = CPP_COMMENT;
 740   token->val.str.len = len;
 741   token->val.str.text = buffer;
 742
 743   buffer[0] = '/';
 744   memcpy (buffer + 1, from, len - 1);
 745 }
 746
 747 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 748    want to avoid stepping back when lexing %:%X.  */
 749 static void
 750 lex_percent (buffer, result)
 751      cpp_buffer *buffer;
 752      cpp_token *result;
 753 {
 754   cppchar_t c;
 755
 756   result->type = CPP_MOD;
 757   /* Parsing %:%X could leave an extra character.  */
 758   if (buffer->extra_char == EOF)
 759     c = get_effective_char (buffer);
 760   else
 761     {
 762       c = buffer->read_ahead = buffer->extra_char;
 763       buffer->extra_char = EOF;
 764     }
 765
 766   if (c == '=')
 767     ACCEPT_CHAR (CPP_MOD_EQ);
 768   else if (CPP_OPTION (buffer->pfile, digraphs))
 769     {
 770       if (c == ':')
 771         {
 772           result->flags |= DIGRAPH;
 773           ACCEPT_CHAR (CPP_HASH);
 774           if (get_effective_char (buffer) == '%')
 775             {
 776               buffer->extra_char = get_effective_char (buffer);
 777               if (buffer->extra_char == ':')
 778                 {
 779                   buffer->extra_char = EOF;
 780                   ACCEPT_CHAR (CPP_PASTE);
 781                 }
 782               else
 783                 /* We'll catch the extra_char when we're called back.  */
 784                 buffer->read_ahead = '%';
 785             }
 786         }
 787       else if (c == '>')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 791         }
 792     }
 793 }
 794
 795 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 796    want to avoid stepping back when lexing '...' or '.123'.  In the
 797    latter case we should also set a flag for parse_number.  */
 798 static void
 799 lex_dot (pfile, result)
 800      cpp_reader *pfile;
 801      cpp_token *result;
 802 {
 803   cpp_buffer *buffer = pfile->buffer;
 804   cppchar_t c;
 805
 806   /* Parsing ..X could leave an extra character.  */
 807   if (buffer->extra_char == EOF)
 808     c = get_effective_char (buffer);
 809   else
 810     {
 811       c = buffer->read_ahead = buffer->extra_char;
 812       buffer->extra_char = EOF;
 813     }
 814
 815   /* All known character sets have 0...9 contiguous.  */
 816   if (c >= '0' && c <= '9')
 817     {
 818       result->type = CPP_NUMBER;
 819       parse_number (pfile, &result->val.str, c, 1);
 820     }
 821   else
 822     {
 823       result->type = CPP_DOT;
 824       if (c == '.')
 825         {
 826           buffer->extra_char = get_effective_char (buffer);
 827           if (buffer->extra_char == '.')
 828             {
 829               buffer->extra_char = EOF;
 830               ACCEPT_CHAR (CPP_ELLIPSIS);
 831             }
 832           else
 833             /* We'll catch the extra_char when we're called back.  */
 834             buffer->read_ahead = '.';
 835         }
 836       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 837         ACCEPT_CHAR (CPP_DOT_STAR);
 838     }
 839 }
 840
 841 void
 842 _cpp_lex_token (pfile, result)
 843      cpp_reader *pfile;
 844      cpp_token *result;
 845 {
 846   cppchar_t c;
 847   cpp_buffer *buffer;
 848   const unsigned char *comment_start;
 849   unsigned char bol;
 850
 851  skip:
 852   bol = pfile->state.next_bol;
 853  done_directive:
 854   buffer = pfile->buffer;
 855   pfile->state.next_bol = 0;
 856   result->flags = buffer->saved_flags;
 857   buffer->saved_flags = 0;
 858  next_char:
 859   pfile->lexer_pos.line = buffer->lineno;
 860  next_char2:
 861   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 862
 863   c = buffer->read_ahead;
 864   if (c == EOF && buffer->cur < buffer->rlimit)
 865     {
 866       c = *buffer->cur++;
 867       pfile->lexer_pos.col++;
 868     }
 869
 870  do_switch:
 871   buffer->read_ahead = EOF;
 872   switch (c)
 873     {
 874     case EOF:
 875       /* Non-empty files should end in a newline.  Ignore for command
 876          line and _Pragma buffers.  */
 877       if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 878         cpp_pedwarn (pfile, "no newline at end of file");
 879       pfile->state.next_bol = 1;
 880       pfile->skipping = 0;      /* In case missing #endif.  */
 881       result->type = CPP_EOF;
 882       /* Don't do MI optimisation.  */
 883       return;
 884
 885     case ' ': case '\t': case '\f': case '\v': case '\0':
 886       skip_whitespace (pfile, c);
 887       result->flags |= PREV_WHITE;
 888       goto next_char2;
 889
 890     case '\n': case '\r':
 891       if (!pfile->state.in_directive)
 892         {
 893           handle_newline (buffer, c);
 894           bol = 1;
 895           pfile->lexer_pos.output_line = buffer->lineno;
 896           /* This is a new line, so clear any white space flag.
 897              Newlines in arguments are white space (6.10.3.10);
 898              parse_arg takes care of that.  */
 899           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 900           goto next_char;
 901         }
 902
 903       /* Don't let directives spill over to the next line.  */
 904       buffer->read_ahead = c;
 905       pfile->state.next_bol = 1;
 906       result->type = CPP_EOF;
 907       /* Don't break; pfile->skipping might be true.  */
 908       return;
 909
 910     case '?':
 911     case '\\':
 912       /* These could start an escaped newline, or '?' a trigraph.  Let
 913          skip_escaped_newlines do all the work.  */
 914       {
 915         unsigned int lineno = buffer->lineno;
 916
 917         c = skip_escaped_newlines (buffer, c);
 918         if (lineno != buffer->lineno)
 919           /* We had at least one escaped newline of some sort, and the
 920              next character is in buffer->read_ahead.  Update the
 921              token's line and column.  */
 922             goto next_char;
 923
 924         /* We are either the original '?' or '\\', or a trigraph.  */
 925         result->type = CPP_QUERY;
 926         buffer->read_ahead = EOF;
 927         if (c == '\\')
 928           goto random_char;
 929         else if (c != '?')
 930           goto do_switch;
 931       }
 932       break;
 933
 934     case '0': case '1': case '2': case '3': case '4':
 935     case '5': case '6': case '7': case '8': case '9':
 936       result->type = CPP_NUMBER;
 937       parse_number (pfile, &result->val.str, c, 0);
 938       break;
 939
 940     case '$':
 941       if (!CPP_OPTION (pfile, dollars_in_ident))
 942         goto random_char;
 943       /* Fall through... */
 944
 945     case '_':
 946     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 947     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 948     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 949     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 950     case 'y': case 'z':
 951     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 952     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 953     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 954     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 955     case 'Y': case 'Z':
 956       result->type = CPP_NAME;
 957       result->val.node = parse_identifier (pfile, c);
 958
 959       /* 'L' may introduce wide characters or strings.  */
 960       if (result->val.node == pfile->spec_nodes.n_L)
 961         {
 962           c = buffer->read_ahead; /* For make_string.  */
 963           if (c == '\'' || c == '"')
 964             {
 965               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 966               goto make_string;
 967             }
 968         }
 969       /* Convert named operators to their proper types.  */
 970       else if (result->val.node->flags & NODE_OPERATOR)
 971         {
 972           result->flags |= NAMED_OP;
 973           result->type = result->val.node->value.operator;
 974         }
 975       break;
 976
 977     case '\'':
 978     case '"':
 979       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 980     make_string:
 981       parse_string (pfile, result, c);
 982       break;
 983
 984     case '/':
 985       /* A potential block or line comment.  */
 986       comment_start = buffer->cur;
 987       result->type = CPP_DIV;
 988       c = get_effective_char (buffer);
 989       if (c == '=')
 990         ACCEPT_CHAR (CPP_DIV_EQ);
 991       if (c != '/' && c != '*')
 992         break;
 993
 994       if (c == '*')
 995         {
 996           if (skip_block_comment (pfile))
 997             cpp_error_with_line (pfile, pfile->lexer_pos.line,
 998                                  pfile->lexer_pos.col,
 999                                  "unterminated comment");
1000         }
1001       else
1002         {
1003           if (!CPP_OPTION (pfile, cplusplus_comments)
1004               && !CPP_IN_SYSTEM_HEADER (pfile))
1005             break;
1006
1007           /* Warn about comments only if pedantically GNUC89, and not
1008              in system headers.  */
1009           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1010               && ! buffer->warned_cplusplus_comments)
1011             {
1012               cpp_pedwarn (pfile,
1013                            "C++ style comments are not allowed in ISO C89");
1014               cpp_pedwarn (pfile,
1015                            "(this will be reported only once per input file)");
1016               buffer->warned_cplusplus_comments = 1;
1017             }
1018
1019           /* Skip_line_comment updates buffer->read_ahead.  */
1020           if (skip_line_comment (pfile))
1021             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1022                                    pfile->lexer_pos.col,
1023                                    "multi-line comment");
1024         }
1025
1026       /* Skipping the comment has updated buffer->read_ahead.  */
1027       if (!pfile->state.save_comments)
1028         {
1029           result->flags |= PREV_WHITE;
1030           goto next_char;
1031         }
1032
1033       /* Save the comment as a token in its own right.  */
1034       save_comment (pfile, result, comment_start);
1035       /* Don't do MI optimisation.  */
1036       return;
1037
1038     case '<':
1039       if (pfile->state.angled_headers)
1040         {
1041           result->type = CPP_HEADER_NAME;
1042           c = '>';              /* terminator.  */
1043           goto make_string;
1044         }
1045
1046       result->type = CPP_LESS;
1047       c = get_effective_char (buffer);
1048       if (c == '=')
1049         ACCEPT_CHAR (CPP_LESS_EQ);
1050       else if (c == '<')
1051         {
1052           ACCEPT_CHAR (CPP_LSHIFT);
1053           if (get_effective_char (buffer) == '=')
1054             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1055         }
1056       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1057         {
1058           ACCEPT_CHAR (CPP_MIN);
1059           if (get_effective_char (buffer) == '=')
1060             ACCEPT_CHAR (CPP_MIN_EQ);
1061         }
1062       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1063         {
1064           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1065           result->flags |= DIGRAPH;
1066         }
1067       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1068         {
1069           ACCEPT_CHAR (CPP_OPEN_BRACE);
1070           result->flags |= DIGRAPH;
1071         }
1072       break;
1073
1074     case '>':
1075       result->type = CPP_GREATER;
1076       c = get_effective_char (buffer);
1077       if (c == '=')
1078         ACCEPT_CHAR (CPP_GREATER_EQ);
1079       else if (c == '>')
1080         {
1081           ACCEPT_CHAR (CPP_RSHIFT);
1082           if (get_effective_char (buffer) == '=')
1083             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1084         }
1085       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1086         {
1087           ACCEPT_CHAR (CPP_MAX);
1088           if (get_effective_char (buffer) == '=')
1089             ACCEPT_CHAR (CPP_MAX_EQ);
1090         }
1091       break;
1092
1093     case '%':
1094       lex_percent (buffer, result);
1095       if (result->type == CPP_HASH)
1096         goto do_hash;
1097       break;
1098
1099     case '.':
1100       lex_dot (pfile, result);
1101       break;
1102
1103     case '+':
1104       result->type = CPP_PLUS;
1105       c = get_effective_char (buffer);
1106       if (c == '=')
1107         ACCEPT_CHAR (CPP_PLUS_EQ);
1108       else if (c == '+')
1109         ACCEPT_CHAR (CPP_PLUS_PLUS);
1110       break;
1111
1112     case '-':
1113       result->type = CPP_MINUS;
1114       c = get_effective_char (buffer);
1115       if (c == '>')
1116         {
1117           ACCEPT_CHAR (CPP_DEREF);
1118           if (CPP_OPTION (pfile, cplusplus)
1119               && get_effective_char (buffer) == '*')
1120             ACCEPT_CHAR (CPP_DEREF_STAR);
1121         }
1122       else if (c == '=')
1123         ACCEPT_CHAR (CPP_MINUS_EQ);
1124       else if (c == '-')
1125         ACCEPT_CHAR (CPP_MINUS_MINUS);
1126       break;
1127
1128     case '*':
1129       result->type = CPP_MULT;
1130       if (get_effective_char (buffer) == '=')
1131         ACCEPT_CHAR (CPP_MULT_EQ);
1132       break;
1133
1134     case '=':
1135       result->type = CPP_EQ;
1136       if (get_effective_char (buffer) == '=')
1137         ACCEPT_CHAR (CPP_EQ_EQ);
1138       break;
1139
1140     case '!':
1141       result->type = CPP_NOT;
1142       if (get_effective_char (buffer) == '=')
1143         ACCEPT_CHAR (CPP_NOT_EQ);
1144       break;
1145
1146     case '&':
1147       result->type = CPP_AND;
1148       c = get_effective_char (buffer);
1149       if (c == '=')
1150         ACCEPT_CHAR (CPP_AND_EQ);
1151       else if (c == '&')
1152         ACCEPT_CHAR (CPP_AND_AND);
1153       break;
1154
1155     case '#':
1156       c = buffer->extra_char;   /* Can be set by error condition below.  */
1157       if (c != EOF)
1158         {
1159           buffer->read_ahead = c;
1160           buffer->extra_char = EOF;
1161         }
1162       else
1163         c = get_effective_char (buffer);
1164
1165       if (c == '#')
1166         {
1167           ACCEPT_CHAR (CPP_PASTE);
1168           break;
1169         }
1170
1171       result->type = CPP_HASH;
1172     do_hash:
1173       if (!bol)
1174         break;
1175       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1176          tokens within the list of arguments that would otherwise act
1177          as preprocessing directives, the behavior is undefined.
1178
1179          This implementation will report a hard error, terminate the
1180          macro invocation, and proceed to process the directive.  */
1181       if (pfile->state.parsing_args)
1182         {
1183           if (pfile->state.parsing_args == 2)
1184             cpp_error (pfile,
1185                        "directives may not be used inside a macro argument");
1186
1187           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1188           buffer->extra_char = buffer->read_ahead;
1189           buffer->read_ahead = '#';
1190           pfile->state.next_bol = 1;
1191           result->type = CPP_EOF;
1192
1193           /* Get whitespace right - newline_in_args sets it.  */
1194           if (pfile->lexer_pos.col == 1)
1195             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1196         }
1197       else
1198         {
1199           /* This is the hash introducing a directive.  */
1200           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1201             goto done_directive; /* bol still 1.  */
1202           /* This is in fact an assembler #.  */
1203         }
1204       break;
1205
1206     case '|':
1207       result->type = CPP_OR;
1208       c = get_effective_char (buffer);
1209       if (c == '=')
1210         ACCEPT_CHAR (CPP_OR_EQ);
1211       else if (c == '|')
1212         ACCEPT_CHAR (CPP_OR_OR);
1213       break;
1214
1215     case '^':
1216       result->type = CPP_XOR;
1217       if (get_effective_char (buffer) == '=')
1218         ACCEPT_CHAR (CPP_XOR_EQ);
1219       break;
1220
1221     case ':':
1222       result->type = CPP_COLON;
1223       c = get_effective_char (buffer);
1224       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1225         ACCEPT_CHAR (CPP_SCOPE);
1226       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1227         {
1228           result->flags |= DIGRAPH;
1229           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1230         }
1231       break;
1232
1233     case '~': result->type = CPP_COMPL; break;
1234     case ',': result->type = CPP_COMMA; break;
1235     case '(': result->type = CPP_OPEN_PAREN; break;
1236     case ')': result->type = CPP_CLOSE_PAREN; break;
1237     case '[': result->type = CPP_OPEN_SQUARE; break;
1238     case ']': result->type = CPP_CLOSE_SQUARE; break;
1239     case '{': result->type = CPP_OPEN_BRACE; break;
1240     case '}': result->type = CPP_CLOSE_BRACE; break;
1241     case ';': result->type = CPP_SEMICOLON; break;
1242
1243       /* @ is a punctuator in Objective C.  */
1244     case '@': result->type = CPP_ATSIGN; break;
1245
1246     random_char:
1247     default:
1248       result->type = CPP_OTHER;
1249       result->val.c = c;
1250       break;
1251     }
1252
1253   if (pfile->skipping)
1254     goto skip;
1255
1256   /* If not in a directive, this token invalidates controlling macros.  */
1257   if (!pfile->state.in_directive)
1258     pfile->mi_state = MI_FAILED;
1259 }
1260
1261 /* An upper bound on the number of bytes needed to spell a token,
1262    including preceding whitespace.  */
1263 unsigned int
1264 cpp_token_len (token)
1265      const cpp_token *token;
1266 {
1267   unsigned int len;
1268
1269   switch (TOKEN_SPELL (token))
1270     {
1271     default:            len = 0;                        break;
1272     case SPELL_STRING:  len = token->val.str.len;       break;
1273     case SPELL_IDENT:   len = token->val.node->length;  break;
1274     }
1275   /* 1 for whitespace, 4 for comment delimeters.  */
1276   return len + 5;
1277 }
1278
1279 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1280    already contain the enough space to hold the token's spelling.
1281    Returns a pointer to the character after the last character
1282    written.  */
1283 unsigned char *
1284 cpp_spell_token (pfile, token, buffer)
1285      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1286      const cpp_token *token;
1287      unsigned char *buffer;
1288 {
1289   switch (TOKEN_SPELL (token))
1290     {
1291     case SPELL_OPERATOR:
1292       {
1293         const unsigned char *spelling;
1294         unsigned char c;
1295
1296         if (token->flags & DIGRAPH)
1297           spelling
1298             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1299         else if (token->flags & NAMED_OP)
1300           goto spell_ident;
1301         else
1302           spelling = TOKEN_NAME (token);
1303
1304         while ((c = *spelling++) != '\0')
1305           *buffer++ = c;
1306       }
1307       break;
1308
1309     case SPELL_IDENT:
1310       spell_ident:
1311       memcpy (buffer, token->val.node->name, token->val.node->length);
1312       buffer += token->val.node->length;
1313       break;
1314
1315     case SPELL_STRING:
1316       {
1317         int left, right, tag;
1318         switch (token->type)
1319           {
1320           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1321           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1322           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1323           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1324           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1325           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1326           }
1327         if (tag) *buffer++ = tag;
1328         if (left) *buffer++ = left;
1329         memcpy (buffer, token->val.str.text, token->val.str.len);
1330         buffer += token->val.str.len;
1331         if (right) *buffer++ = right;
1332       }
1333       break;
1334
1335     case SPELL_CHAR:
1336       *buffer++ = token->val.c;
1337       break;
1338
1339     case SPELL_NONE:
1340       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1341       break;
1342     }
1343
1344   return buffer;
1345 }
1346
1347 /* Returns a token as a null-terminated string.  The string is
1348    temporary, and automatically freed later.  Useful for diagnostics.  */
1349 unsigned char *
1350 cpp_token_as_text (pfile, token)
1351      cpp_reader *pfile;
1352      const cpp_token *token;
1353 {
1354   unsigned int len = cpp_token_len (token);
1355   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1356
1357   end = cpp_spell_token (pfile, token, start);
1358   end[0] = '\0';
1359
1360   return start;
1361 }
1362
1363 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1364 const char *
1365 cpp_type2name (type)
1366      enum cpp_ttype type;
1367 {
1368   return (const char *) token_spellings[type].name;
1369 }
1370
1371 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1372    for efficiency - to avoid double-buffering.  Also, outputs a space
1373    if PREV_WHITE is flagged.  */
1374 void
1375 cpp_output_token (token, fp)
1376      const cpp_token *token;
1377      FILE *fp;
1378 {
1379   if (token->flags & PREV_WHITE)
1380     putc (' ', fp);
1381
1382   switch (TOKEN_SPELL (token))
1383     {
1384     case SPELL_OPERATOR:
1385       {
1386         const unsigned char *spelling;
1387
1388         if (token->flags & DIGRAPH)
1389           spelling
1390             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1391         else if (token->flags & NAMED_OP)
1392           goto spell_ident;
1393         else
1394           spelling = TOKEN_NAME (token);
1395
1396         ufputs (spelling, fp);
1397       }
1398       break;
1399
1400     spell_ident:
1401     case SPELL_IDENT:
1402       ufputs (token->val.node->name, fp);
1403     break;
1404
1405     case SPELL_STRING:
1406       {
1407         int left, right, tag;
1408         switch (token->type)
1409           {
1410           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1411           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1412           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1413           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1414           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1415           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1416           }
1417         if (tag) putc (tag, fp);
1418         if (left) putc (left, fp);
1419         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1420         if (right) putc (right, fp);
1421       }
1422       break;
1423
1424     case SPELL_CHAR:
1425       putc (token->val.c, fp);
1426       break;
1427
1428     case SPELL_NONE:
1429       /* An error, most probably.  */
1430       break;
1431     }
1432 }
1433
1434 /* Compare two tokens.  */
1435 int
1436 _cpp_equiv_tokens (a, b)
1437      const cpp_token *a, *b;
1438 {
1439   if (a->type == b->type && a->flags == b->flags)
1440     switch (TOKEN_SPELL (a))
1441       {
1442       default:                  /* Keep compiler happy.  */
1443       case SPELL_OPERATOR:
1444         return 1;
1445       case SPELL_CHAR:
1446         return a->val.c == b->val.c; /* Character.  */
1447       case SPELL_NONE:
1448         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1449       case SPELL_IDENT:
1450         return a->val.node == b->val.node;
1451       case SPELL_STRING:
1452         return (a->val.str.len == b->val.str.len
1453                 && !memcmp (a->val.str.text, b->val.str.text,
1454                             a->val.str.len));
1455       }
1456
1457   return 0;
1458 }
1459
1460 /* Determine whether two tokens can be pasted together, and if so,
1461    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1462    be pasted, or the appropriate type for the merged token if they
1463    can.  */
1464 enum cpp_ttype
1465 cpp_can_paste (pfile, token1, token2, digraph)
1466      cpp_reader * pfile;
1467      const cpp_token *token1, *token2;
1468      int* digraph;
1469 {
1470   enum cpp_ttype a = token1->type, b = token2->type;
1471   int cxx = CPP_OPTION (pfile, cplusplus);
1472
1473   /* Treat named operators as if they were ordinary NAMEs.  */
1474   if (token1->flags & NAMED_OP)
1475     a = CPP_NAME;
1476   if (token2->flags & NAMED_OP)
1477     b = CPP_NAME;
1478
1479   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1480     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1481
1482   switch (a)
1483     {
1484     case CPP_GREATER:
1485       if (b == a) return CPP_RSHIFT;
1486       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1487       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1488       break;
1489     case CPP_LESS:
1490       if (b == a) return CPP_LSHIFT;
1491       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1492       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1493       if (CPP_OPTION (pfile, digraphs))
1494         {
1495           if (b == CPP_COLON)
1496             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1497           if (b == CPP_MOD)
1498             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1499         }
1500       break;
1501
1502     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1503     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1504     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1505
1506     case CPP_MINUS:
1507       if (b == a)               return CPP_MINUS_MINUS;
1508       if (b == CPP_GREATER)     return CPP_DEREF;
1509       break;
1510     case CPP_COLON:
1511       if (b == a && cxx)        return CPP_SCOPE;
1512       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1513         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1514       break;
1515
1516     case CPP_MOD:
1517       if (CPP_OPTION (pfile, digraphs))
1518         {
1519           if (b == CPP_GREATER)
1520             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1521           if (b == CPP_COLON)
1522             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1523         }
1524       break;
1525     case CPP_DEREF:
1526       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1527       break;
1528     case CPP_DOT:
1529       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1530       if (b == CPP_NUMBER)      return CPP_NUMBER;
1531       break;
1532
1533     case CPP_HASH:
1534       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1535         /* %:%: digraph */
1536         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1537       break;
1538
1539     case CPP_NAME:
1540       if (b == CPP_NAME)        return CPP_NAME;
1541       if (b == CPP_NUMBER
1542           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1543       if (b == CPP_CHAR
1544           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1545       if (b == CPP_STRING
1546           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1547       break;
1548
1549     case CPP_NUMBER:
1550       if (b == CPP_NUMBER)      return CPP_NUMBER;
1551       if (b == CPP_NAME)        return CPP_NUMBER;
1552       if (b == CPP_DOT)         return CPP_NUMBER;
1553       /* Numbers cannot have length zero, so this is safe.  */
1554       if ((b == CPP_PLUS || b == CPP_MINUS)
1555           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1556         return CPP_NUMBER;
1557       break;
1558
1559     default:
1560       break;
1561     }
1562
1563   return CPP_EOF;
1564 }
1565
1566 /* Returns nonzero if a space should be inserted to avoid an
1567    accidental token paste for output.  For simplicity, it is
1568    conservative, and occasionally advises a space where one is not
1569    needed, e.g. "." and ".2".  */
1570
1571 int
1572 cpp_avoid_paste (pfile, token1, token2)
1573      cpp_reader *pfile;
1574      const cpp_token *token1, *token2;
1575 {
1576   enum cpp_ttype a = token1->type, b = token2->type;
1577   cppchar_t c;
1578
1579   if (token1->flags & NAMED_OP)
1580     a = CPP_NAME;
1581   if (token2->flags & NAMED_OP)
1582     b = CPP_NAME;
1583
1584   c = EOF;
1585   if (token2->flags & DIGRAPH)
1586     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1587   else if (token_spellings[b].category == SPELL_OPERATOR)
1588     c = token_spellings[b].name[0];
1589
1590   /* Quickly get everything that can paste with an '='.  */
1591   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1592     return 1;
1593
1594   switch (a)
1595     {
1596     case CPP_GREATER:   return c == '>' || c == '?';
1597     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1598     case CPP_PLUS:      return c == '+';
1599     case CPP_MINUS:     return c == '-' || c == '>';
1600     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1601     case CPP_MOD:       return c == ':' || c == '>';
1602     case CPP_AND:       return c == '&';
1603     case CPP_OR:        return c == '|';
1604     case CPP_COLON:     return c == ':' || c == '>';
1605     case CPP_DEREF:     return c == '*';
1606     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1607     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1608     case CPP_NAME:      return ((b == CPP_NUMBER
1609                                  && name_p (pfile, &token2->val.str))
1610                                 || b == CPP_NAME
1611                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1612     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1613                                 || c == '.' || c == '+' || c == '-');
1614     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1615                                 && token1->val.c == '@'
1616                                 && (b == CPP_NAME || b == CPP_STRING));
1617     default:            break;
1618     }
1619
1620   return 0;
1621 }
1622
1623 /* Output all the remaining tokens on the current line, and a newline
1624    character, to FP.  Leading whitespace is removed.  */
1625 void
1626 cpp_output_line (pfile, fp)
1627      cpp_reader *pfile;
1628      FILE *fp;
1629 {
1630   cpp_token token;
1631
1632   cpp_get_token (pfile, &token);
1633   token.flags &= ~PREV_WHITE;
1634   while (token.type != CPP_EOF)
1635     {
1636       cpp_output_token (&token, fp);
1637       cpp_get_token (pfile, &token);
1638     }
1639
1640   putc ('\n', fp);
1641 }
1642
1643 /* Memory pools.  */
1644
1645 struct dummy
1646 {
1647   char c;
1648   union
1649   {
1650     double d;
1651     int *p;
1652   } u;
1653 };
1654
1655 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1656
1657 static int
1658 chunk_suitable (pool, chunk, size)
1659      cpp_pool *pool;
1660      cpp_chunk *chunk;
1661      unsigned int size;
1662 {
1663   /* Being at least twice SIZE means we can use memcpy in
1664      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
1665      anyway.  */
1666   return (chunk && pool->locked != chunk
1667           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1668 }
1669
1670 /* Returns the end of the new pool.  PTR points to a char in the old
1671    pool, and is updated to point to the same char in the new pool.  */
1672 unsigned char *
1673 _cpp_next_chunk (pool, len, ptr)
1674      cpp_pool *pool;
1675      unsigned int len;
1676      unsigned char **ptr;
1677 {
1678   cpp_chunk *chunk = pool->cur->next;
1679
1680   /* LEN is the minimum size we want in the new pool.  */
1681   len += POOL_ROOM (pool);
1682   if (! chunk_suitable (pool, chunk, len))
1683     {
1684       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
1685
1686       chunk->next = pool->cur->next;
1687       pool->cur->next = chunk;
1688     }
1689
1690   /* Update the pointer before changing chunk's front.  */
1691   if (ptr)
1692     *ptr += chunk->base - POOL_FRONT (pool);
1693
1694   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1695   chunk->front = chunk->base;
1696
1697   pool->cur = chunk;
1698   return POOL_LIMIT (pool);
1699 }
1700
1701 static cpp_chunk *
1702 new_chunk (size)
1703      unsigned int size;
1704 {
1705   unsigned char *base;
1706   cpp_chunk *result;
1707
1708   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
1709   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1710   /* Put the chunk descriptor at the end.  Then chunk overruns will
1711      cause obvious chaos.  */
1712   result = (cpp_chunk *) (base + size);
1713   result->base = base;
1714   result->front = base;
1715   result->limit = base + size;
1716   result->next = 0;
1717
1718   return result;
1719 }
1720
1721 void
1722 _cpp_init_pool (pool, size, align, temp)
1723      cpp_pool *pool;
1724      unsigned int size, align, temp;
1725 {
1726   if (align == 0)
1727     align = DEFAULT_ALIGNMENT;
1728   if (align & (align - 1))
1729     abort ();
1730   pool->align = align;
1731   pool->cur = new_chunk (size);
1732   pool->locked = 0;
1733   pool->locks = 0;
1734   if (temp)
1735     pool->cur->next = pool->cur;
1736 }
1737
1738 void
1739 _cpp_lock_pool (pool)
1740      cpp_pool *pool;
1741 {
1742   if (pool->locks++ == 0)
1743     pool->locked = pool->cur;
1744 }
1745
1746 void
1747 _cpp_unlock_pool (pool)
1748      cpp_pool *pool;
1749 {
1750   if (--pool->locks == 0)
1751     pool->locked = 0;
1752 }
1753
1754 void
1755 _cpp_free_pool (pool)
1756      cpp_pool *pool;
1757 {
1758   cpp_chunk *chunk = pool->cur, *next;
1759
1760   do
1761     {
1762       next = chunk->next;
1763       free (chunk->base);
1764       chunk = next;
1765     }
1766   while (chunk && chunk != pool->cur);
1767 }
1768
1769 /* Reserve LEN bytes from a memory pool.  */
1770 unsigned char *
1771 _cpp_pool_reserve (pool, len)
1772      cpp_pool *pool;
1773      unsigned int len;
1774 {
1775   len = POOL_ALIGN (len, pool->align);
1776   if (len > (unsigned int) POOL_ROOM (pool))
1777     _cpp_next_chunk (pool, len, 0);
1778
1779   return POOL_FRONT (pool);
1780 }
1781
1782 /* Allocate LEN bytes from a memory pool.  */
1783 unsigned char *
1784 _cpp_pool_alloc (pool, len)
1785      cpp_pool *pool;
1786      unsigned int len;
1787 {
1788   unsigned char *result = _cpp_pool_reserve (pool, len);
1789
1790   POOL_COMMIT (pool, len);
1791   return result;
1792 }