gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static cpp_token *lex_token PARAMS ((cpp_reader *, cpp_token *));
 106 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 107
 108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 110 static unsigned int hex_digit_value PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   cpp_pool *pool = &pfile->ident_pool;
 603   unsigned char *dest, *limit;
 604
 605   dest = POOL_FRONT (pool);
 606   limit = POOL_LIMIT (pool);
 607
 608   /* Place a leading period.  */
 609   if (leading_period)
 610     {
 611       if (dest >= limit)
 612         limit = _cpp_next_chunk (pool, 0, &dest);
 613       *dest++ = '.';
 614     }
 615
 616   do
 617     {
 618       do
 619         {
 620           /* Need room for terminating null.  */
 621           if (dest + 1 >= limit)
 622             limit = _cpp_next_chunk (pool, 0, &dest);
 623           *dest++ = c;
 624
 625           c = EOF;
 626           if (buffer->cur == buffer->rlimit)
 627             break;
 628
 629           c = *buffer->cur++;
 630         }
 631       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 632
 633       /* Potential escaped newline?  */
 634       if (c != '?' && c != '\\')
 635         break;
 636       c = skip_escaped_newlines (pfile, c);
 637     }
 638   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640   /* Remember the next character.  */
 641   buffer->read_ahead = c;
 642
 643   /* Null-terminate the number.  */
 644   *dest = '\0';
 645
 646   number->text = POOL_FRONT (pool);
 647   number->len = dest - number->text;
 648   POOL_COMMIT (pool, number->len + 1);
 649 }
 650
 651 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 652 static void
 653 unterminated (pfile, term)
 654      cpp_reader *pfile;
 655      int term;
 656 {
 657   cpp_error (pfile, "missing terminating %c character", term);
 658
 659   if (term == '\"' && pfile->mlstring_pos.line
 660       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 661     {
 662       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 663                            pfile->mlstring_pos.col,
 664                            "possible start of unterminated string literal");
 665       pfile->mlstring_pos.line = 0;
 666     }
 667 }
 668
 669 /* Subroutine of parse_string.  */
 670 static int
 671 unescaped_terminator_p (pfile, dest)
 672      cpp_reader *pfile;
 673      const unsigned char *dest;
 674 {
 675   const unsigned char *start, *temp;
 676
 677   /* In #include-style directives, terminators are not escapeable.  */
 678   if (pfile->state.angled_headers)
 679     return 1;
 680
 681   start = POOL_FRONT (&pfile->ident_pool);
 682
 683   /* An odd number of consecutive backslashes represents an escaped
 684      terminator.  */
 685   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 686     ;
 687
 688   return ((dest - temp) & 1) == 0;
 689 }
 690
 691 /* Parses a string, character constant, or angle-bracketed header file
 692    name.  Handles embedded trigraphs and escaped newlines.  The stored
 693    string is guaranteed NUL-terminated, but it is not guaranteed that
 694    this is the first NUL since embedded NULs are preserved.
 695
 696    Multi-line strings are allowed, but they are deprecated.  */
 697 static void
 698 parse_string (pfile, token, terminator)
 699      cpp_reader *pfile;
 700      cpp_token *token;
 701      cppchar_t terminator;
 702 {
 703   cpp_buffer *buffer = pfile->buffer;
 704   cpp_pool *pool = &pfile->ident_pool;
 705   unsigned char *dest, *limit;
 706   cppchar_t c;
 707   bool warned_nulls = false, warned_multi = false;
 708
 709   dest = POOL_FRONT (pool);
 710   limit = POOL_LIMIT (pool);
 711
 712   for (;;)
 713     {
 714       if (buffer->cur == buffer->rlimit)
 715         c = EOF;
 716       else
 717         c = *buffer->cur++;
 718
 719     have_char:
 720       /* We need space for the terminating NUL.  */
 721       if (dest >= limit)
 722         limit = _cpp_next_chunk (pool, 0, &dest);
 723
 724       if (c == EOF)
 725         {
 726           unterminated (pfile, terminator);
 727           break;
 728         }
 729
 730       /* Handle trigraphs, escaped newlines etc.  */
 731       if (c == '?' || c == '\\')
 732         c = skip_escaped_newlines (pfile, c);
 733
 734       if (c == terminator && unescaped_terminator_p (pfile, dest))
 735         {
 736           c = EOF;
 737           break;
 738         }
 739       else if (is_vspace (c))
 740         {
 741           /* In assembly language, silently terminate string and
 742              character literals at end of line.  This is a kludge
 743              around not knowing where comments are.  */
 744           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 745             break;
 746
 747           /* Character constants and header names may not extend over
 748              multiple lines.  In Standard C, neither may strings.
 749              Unfortunately, we accept multiline strings as an
 750              extension, except in #include family directives.  */
 751           if (terminator != '"' || pfile->state.angled_headers)
 752             {
 753               unterminated (pfile, terminator);
 754               break;
 755             }
 756
 757           if (!warned_multi)
 758             {
 759               warned_multi = true;
 760               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 761             }
 762
 763           if (pfile->mlstring_pos.line == 0)
 764             pfile->mlstring_pos = pfile->lexer_pos;
 765
 766           c = handle_newline (pfile, c);
 767           *dest++ = '\n';
 768           goto have_char;
 769         }
 770       else if (c == '\0' && !warned_nulls)
 771         {
 772           warned_nulls = true;
 773           cpp_warning (pfile, "null character(s) preserved in literal");
 774         }
 775
 776       *dest++ = c;
 777     }
 778
 779   /* Remember the next character.  */
 780   buffer->read_ahead = c;
 781   *dest = '\0';
 782
 783   token->val.str.text = POOL_FRONT (pool);
 784   token->val.str.len = dest - token->val.str.text;
 785   POOL_COMMIT (pool, token->val.str.len + 1);
 786 }
 787
 788 /* The stored comment includes the comment start and any terminator.  */
 789 static void
 790 save_comment (pfile, token, from)
 791      cpp_reader *pfile;
 792      cpp_token *token;
 793      const unsigned char *from;
 794 {
 795   unsigned char *buffer;
 796   unsigned int len;
 797
 798   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 799   /* C++ comments probably (not definitely) have moved past a new
 800      line, which we don't want to save in the comment.  */
 801   if (pfile->buffer->read_ahead != EOF)
 802     len--;
 803   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 804
 805   token->type = CPP_COMMENT;
 806   token->val.str.len = len;
 807   token->val.str.text = buffer;
 808
 809   buffer[0] = '/';
 810   memcpy (buffer + 1, from, len - 1);
 811 }
 812
 813 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 814    want to avoid stepping back when lexing %:%X.  */
 815 static void
 816 lex_percent (pfile, result)
 817      cpp_reader *pfile;
 818      cpp_token *result;
 819 {
 820   cpp_buffer *buffer= pfile->buffer;
 821   cppchar_t c;
 822
 823   result->type = CPP_MOD;
 824   /* Parsing %:%X could leave an extra character.  */
 825   if (buffer->extra_char == EOF)
 826     c = get_effective_char (pfile);
 827   else
 828     {
 829       c = buffer->read_ahead = buffer->extra_char;
 830       buffer->extra_char = EOF;
 831     }
 832
 833   if (c == '=')
 834     ACCEPT_CHAR (CPP_MOD_EQ);
 835   else if (CPP_OPTION (pfile, digraphs))
 836     {
 837       if (c == ':')
 838         {
 839           result->flags |= DIGRAPH;
 840           ACCEPT_CHAR (CPP_HASH);
 841           if (get_effective_char (pfile) == '%')
 842             {
 843               buffer->extra_char = get_effective_char (pfile);
 844               if (buffer->extra_char == ':')
 845                 {
 846                   buffer->extra_char = EOF;
 847                   ACCEPT_CHAR (CPP_PASTE);
 848                 }
 849               else
 850                 /* We'll catch the extra_char when we're called back.  */
 851                 buffer->read_ahead = '%';
 852             }
 853         }
 854       else if (c == '>')
 855         {
 856           result->flags |= DIGRAPH;
 857           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 858         }
 859     }
 860 }
 861
 862 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 863    want to avoid stepping back when lexing '...' or '.123'.  In the
 864    latter case we should also set a flag for parse_number.  */
 865 static void
 866 lex_dot (pfile, result)
 867      cpp_reader *pfile;
 868      cpp_token *result;
 869 {
 870   cpp_buffer *buffer = pfile->buffer;
 871   cppchar_t c;
 872
 873   /* Parsing ..X could leave an extra character.  */
 874   if (buffer->extra_char == EOF)
 875     c = get_effective_char (pfile);
 876   else
 877     {
 878       c = buffer->read_ahead = buffer->extra_char;
 879       buffer->extra_char = EOF;
 880     }
 881
 882   /* All known character sets have 0...9 contiguous.  */
 883   if (c >= '0' && c <= '9')
 884     {
 885       result->type = CPP_NUMBER;
 886       parse_number (pfile, &result->val.str, c, 1);
 887     }
 888   else
 889     {
 890       result->type = CPP_DOT;
 891       if (c == '.')
 892         {
 893           buffer->extra_char = get_effective_char (pfile);
 894           if (buffer->extra_char == '.')
 895             {
 896               buffer->extra_char = EOF;
 897               ACCEPT_CHAR (CPP_ELLIPSIS);
 898             }
 899           else
 900             /* We'll catch the extra_char when we're called back.  */
 901             buffer->read_ahead = '.';
 902         }
 903       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 904         ACCEPT_CHAR (CPP_DOT_STAR);
 905     }
 906 }
 907
 908 /* Allocate COUNT tokens for RUN.  */
 909 void
 910 _cpp_init_tokenrun (run, count)
 911      tokenrun *run;
 912      unsigned int count;
 913 {
 914   run->base = xnewvec (cpp_token, count);
 915   run->limit = run->base + count;
 916   run->next = NULL;
 917 }
 918
 919 /* Returns the next tokenrun, or creates one if there is none.  */
 920 static tokenrun *
 921 next_tokenrun (run)
 922      tokenrun *run;
 923 {
 924   if (run->next == NULL)
 925     {
 926       run->next = xnew (tokenrun);
 927       run->next->prev = run;
 928       _cpp_init_tokenrun (run->next, 250);
 929     }
 930
 931   return run->next;
 932 }
 933
 934 /* Lex a token into RESULT (external interface).  */
 935 void
 936 _cpp_lex_token (pfile, dest)
 937      cpp_reader *pfile;
 938      cpp_token *dest;
 939 {
 940   cpp_token *result;
 941
 942   for (;;)
 943     {
 944       if (pfile->cur_token == pfile->cur_run->limit)
 945         {
 946           pfile->cur_run = next_tokenrun (pfile->cur_run);
 947           pfile->cur_token = pfile->cur_run->base;
 948         }
 949       result = pfile->cur_token++;
 950
 951       if (pfile->lookaheads)
 952         pfile->lookaheads--;
 953       else
 954         result = lex_token (pfile, result);
 955
 956       if (result->flags & BOL)
 957         {
 958           /* Is this a directive.  If _cpp_handle_directive returns
 959              false, it is an assembler #.  */
 960           if (result->type == CPP_HASH
 961               && !pfile->state.parsing_args
 962               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 963             continue;
 964           if (pfile->cb.line_change && !pfile->state.skipping)
 965             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 966         }
 967
 968       /* We don't skip tokens in directives.  */
 969       if (pfile->state.in_directive)
 970         break;
 971
 972       /* Outside a directive, invalidate controlling macros.  At file
 973          EOF, lex_token takes care of popping the buffer, so we never
 974          get here and MI optimisation works.  */
 975       pfile->mi_valid = false;
 976
 977       if (!pfile->state.skipping || result->type == CPP_EOF)
 978         break;
 979     }
 980
 981   *dest = *result;
 982 }
 983
 984 /* Lex a token into RESULT.  When meeting a newline, returns CPP_EOF
 985    if parsing a directive, otherwise returns to the start of the token
 986    buffer if permissible.  Returns the location of the lexed token.  */
 987 static cpp_token *
 988 lex_token (pfile, result)
 989      cpp_reader *pfile;
 990      cpp_token *result;
 991 {
 992   cppchar_t c;
 993   cpp_buffer *buffer;
 994   const unsigned char *comment_start;
 995
 996  fresh_line:
 997   buffer = pfile->buffer;
 998   result->flags = buffer->saved_flags;
 999   buffer->saved_flags = 0;
1000  update_tokens_line:
1001   pfile->lexer_pos.line = pfile->line;
1002   result->line = pfile->line;
1003
1004  skipped_white:
1005   c = buffer->read_ahead;
1006   if (c == EOF && buffer->cur < buffer->rlimit)
1007     c = *buffer->cur++;
1008   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1009   pfile->lexer_pos.col = result->col;
1010   buffer->read_ahead = EOF;
1011
1012  trigraph:
1013   switch (c)
1014     {
1015     case EOF:
1016       buffer->saved_flags = BOL;
1017       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1018         {
1019           if (buffer->cur != buffer->line_base)
1020             {
1021               /* Non-empty files should end in a newline.  Don't warn
1022                  for command line and _Pragma buffers.  */
1023               if (!buffer->from_stage3)
1024                 cpp_pedwarn (pfile, "no newline at end of file");
1025               handle_newline (pfile, '\n');
1026             }
1027
1028           /* Don't pop the last buffer.  */
1029           if (buffer->prev)
1030             {
1031               unsigned char stop = buffer->return_at_eof;
1032
1033               _cpp_pop_buffer (pfile);
1034               if (!stop)
1035                 goto fresh_line;
1036             }
1037         }
1038       result->type = CPP_EOF;
1039       break;
1040
1041     case ' ': case '\t': case '\f': case '\v': case '\0':
1042       skip_whitespace (pfile, c);
1043       result->flags |= PREV_WHITE;
1044       goto skipped_white;
1045
1046     case '\n': case '\r':
1047       handle_newline (pfile, c);
1048       buffer->saved_flags = BOL;
1049       if (! pfile->state.in_directive)
1050         {
1051           if (!pfile->keep_tokens)
1052             {
1053               pfile->cur_run = &pfile->base_run;
1054               result = pfile->base_run.base;
1055               pfile->cur_token = result + 1;
1056             }
1057           goto fresh_line;
1058         }
1059       result->type = CPP_EOF;
1060       break;
1061
1062     case '?':
1063     case '\\':
1064       /* These could start an escaped newline, or '?' a trigraph.  Let
1065          skip_escaped_newlines do all the work.  */
1066       {
1067         unsigned int line = pfile->line;
1068
1069         c = skip_escaped_newlines (pfile, c);
1070         if (line != pfile->line)
1071           /* We had at least one escaped newline of some sort, and the
1072              next character is in buffer->read_ahead.  Update the
1073              token's line and column.  */
1074             goto update_tokens_line;
1075
1076         /* We are either the original '?' or '\\', or a trigraph.  */
1077         result->type = CPP_QUERY;
1078         buffer->read_ahead = EOF;
1079         if (c == '\\')
1080           goto random_char;
1081         else if (c != '?')
1082           goto trigraph;
1083       }
1084       break;
1085
1086     case '0': case '1': case '2': case '3': case '4':
1087     case '5': case '6': case '7': case '8': case '9':
1088       result->type = CPP_NUMBER;
1089       parse_number (pfile, &result->val.str, c, 0);
1090       break;
1091
1092     case '$':
1093       if (!CPP_OPTION (pfile, dollars_in_ident))
1094         goto random_char;
1095       /* Fall through...  */
1096
1097     case '_':
1098     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1099     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1100     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1101     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1102     case 'y': case 'z':
1103     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1104     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1105     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1106     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1107     case 'Y': case 'Z':
1108       result->type = CPP_NAME;
1109       result->val.node = parse_identifier (pfile);
1110
1111       /* 'L' may introduce wide characters or strings.  */
1112       if (result->val.node == pfile->spec_nodes.n_L)
1113         {
1114           c = buffer->read_ahead;
1115           if (c == EOF && buffer->cur < buffer->rlimit)
1116             c = *buffer->cur;
1117           if (c == '\'' || c == '"')
1118             {
1119               buffer->cur++;
1120               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1121               goto make_string;
1122             }
1123         }
1124       /* Convert named operators to their proper types.  */
1125       else if (result->val.node->flags & NODE_OPERATOR)
1126         {
1127           result->flags |= NAMED_OP;
1128           result->type = result->val.node->value.operator;
1129         }
1130       break;
1131
1132     case '\'':
1133     case '"':
1134       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1135     make_string:
1136       parse_string (pfile, result, c);
1137       break;
1138
1139     case '/':
1140       /* A potential block or line comment.  */
1141       comment_start = buffer->cur;
1142       result->type = CPP_DIV;
1143       c = get_effective_char (pfile);
1144       if (c == '=')
1145         ACCEPT_CHAR (CPP_DIV_EQ);
1146       if (c != '/' && c != '*')
1147         break;
1148
1149       if (c == '*')
1150         {
1151           if (skip_block_comment (pfile))
1152             cpp_error (pfile, "unterminated comment");
1153         }
1154       else
1155         {
1156           if (!CPP_OPTION (pfile, cplusplus_comments)
1157               && !CPP_IN_SYSTEM_HEADER (pfile))
1158             break;
1159
1160           /* Warn about comments only if pedantically GNUC89, and not
1161              in system headers.  */
1162           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1163               && ! buffer->warned_cplusplus_comments)
1164             {
1165               cpp_pedwarn (pfile,
1166                            "C++ style comments are not allowed in ISO C89");
1167               cpp_pedwarn (pfile,
1168                            "(this will be reported only once per input file)");
1169               buffer->warned_cplusplus_comments = 1;
1170             }
1171
1172           /* Skip_line_comment updates buffer->read_ahead.  */
1173           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1174             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1175                                    pfile->lexer_pos.col,
1176                                    "multi-line comment");
1177         }
1178
1179       /* Skipping the comment has updated buffer->read_ahead.  */
1180       if (!pfile->state.save_comments)
1181         {
1182           result->flags |= PREV_WHITE;
1183           goto update_tokens_line;
1184         }
1185
1186       /* Save the comment as a token in its own right.  */
1187       save_comment (pfile, result, comment_start);
1188       /* Don't do MI optimisation.  */
1189       break;
1190
1191     case '<':
1192       if (pfile->state.angled_headers)
1193         {
1194           result->type = CPP_HEADER_NAME;
1195           c = '>';              /* terminator.  */
1196           goto make_string;
1197         }
1198
1199       result->type = CPP_LESS;
1200       c = get_effective_char (pfile);
1201       if (c == '=')
1202         ACCEPT_CHAR (CPP_LESS_EQ);
1203       else if (c == '<')
1204         {
1205           ACCEPT_CHAR (CPP_LSHIFT);
1206           if (get_effective_char (pfile) == '=')
1207             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1208         }
1209       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1210         {
1211           ACCEPT_CHAR (CPP_MIN);
1212           if (get_effective_char (pfile) == '=')
1213             ACCEPT_CHAR (CPP_MIN_EQ);
1214         }
1215       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1216         {
1217           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1218           result->flags |= DIGRAPH;
1219         }
1220       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1221         {
1222           ACCEPT_CHAR (CPP_OPEN_BRACE);
1223           result->flags |= DIGRAPH;
1224         }
1225       break;
1226
1227     case '>':
1228       result->type = CPP_GREATER;
1229       c = get_effective_char (pfile);
1230       if (c == '=')
1231         ACCEPT_CHAR (CPP_GREATER_EQ);
1232       else if (c == '>')
1233         {
1234           ACCEPT_CHAR (CPP_RSHIFT);
1235           if (get_effective_char (pfile) == '=')
1236             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1237         }
1238       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1239         {
1240           ACCEPT_CHAR (CPP_MAX);
1241           if (get_effective_char (pfile) == '=')
1242             ACCEPT_CHAR (CPP_MAX_EQ);
1243         }
1244       break;
1245
1246     case '%':
1247       lex_percent (pfile, result);
1248       break;
1249
1250     case '.':
1251       lex_dot (pfile, result);
1252       break;
1253
1254     case '+':
1255       result->type = CPP_PLUS;
1256       c = get_effective_char (pfile);
1257       if (c == '=')
1258         ACCEPT_CHAR (CPP_PLUS_EQ);
1259       else if (c == '+')
1260         ACCEPT_CHAR (CPP_PLUS_PLUS);
1261       break;
1262
1263     case '-':
1264       result->type = CPP_MINUS;
1265       c = get_effective_char (pfile);
1266       if (c == '>')
1267         {
1268           ACCEPT_CHAR (CPP_DEREF);
1269           if (CPP_OPTION (pfile, cplusplus)
1270               && get_effective_char (pfile) == '*')
1271             ACCEPT_CHAR (CPP_DEREF_STAR);
1272         }
1273       else if (c == '=')
1274         ACCEPT_CHAR (CPP_MINUS_EQ);
1275       else if (c == '-')
1276         ACCEPT_CHAR (CPP_MINUS_MINUS);
1277       break;
1278
1279     case '*':
1280       result->type = CPP_MULT;
1281       if (get_effective_char (pfile) == '=')
1282         ACCEPT_CHAR (CPP_MULT_EQ);
1283       break;
1284
1285     case '=':
1286       result->type = CPP_EQ;
1287       if (get_effective_char (pfile) == '=')
1288         ACCEPT_CHAR (CPP_EQ_EQ);
1289       break;
1290
1291     case '!':
1292       result->type = CPP_NOT;
1293       if (get_effective_char (pfile) == '=')
1294         ACCEPT_CHAR (CPP_NOT_EQ);
1295       break;
1296
1297     case '&':
1298       result->type = CPP_AND;
1299       c = get_effective_char (pfile);
1300       if (c == '=')
1301         ACCEPT_CHAR (CPP_AND_EQ);
1302       else if (c == '&')
1303         ACCEPT_CHAR (CPP_AND_AND);
1304       break;
1305
1306     case '#':
1307       result->type = CPP_HASH;
1308       if (get_effective_char (pfile) == '#')
1309           ACCEPT_CHAR (CPP_PASTE);
1310       break;
1311
1312     case '|':
1313       result->type = CPP_OR;
1314       c = get_effective_char (pfile);
1315       if (c == '=')
1316         ACCEPT_CHAR (CPP_OR_EQ);
1317       else if (c == '|')
1318         ACCEPT_CHAR (CPP_OR_OR);
1319       break;
1320
1321     case '^':
1322       result->type = CPP_XOR;
1323       if (get_effective_char (pfile) == '=')
1324         ACCEPT_CHAR (CPP_XOR_EQ);
1325       break;
1326
1327     case ':':
1328       result->type = CPP_COLON;
1329       c = get_effective_char (pfile);
1330       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1331         ACCEPT_CHAR (CPP_SCOPE);
1332       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1333         {
1334           result->flags |= DIGRAPH;
1335           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1336         }
1337       break;
1338
1339     case '~': result->type = CPP_COMPL; break;
1340     case ',': result->type = CPP_COMMA; break;
1341     case '(': result->type = CPP_OPEN_PAREN; break;
1342     case ')': result->type = CPP_CLOSE_PAREN; break;
1343     case '[': result->type = CPP_OPEN_SQUARE; break;
1344     case ']': result->type = CPP_CLOSE_SQUARE; break;
1345     case '{': result->type = CPP_OPEN_BRACE; break;
1346     case '}': result->type = CPP_CLOSE_BRACE; break;
1347     case ';': result->type = CPP_SEMICOLON; break;
1348
1349       /* @ is a punctuator in Objective C.  */
1350     case '@': result->type = CPP_ATSIGN; break;
1351
1352     random_char:
1353     default:
1354       result->type = CPP_OTHER;
1355       result->val.c = c;
1356       break;
1357     }
1358
1359   return result;
1360 }
1361
1362 /* An upper bound on the number of bytes needed to spell a token,
1363    including preceding whitespace.  */
1364 unsigned int
1365 cpp_token_len (token)
1366      const cpp_token *token;
1367 {
1368   unsigned int len;
1369
1370   switch (TOKEN_SPELL (token))
1371     {
1372     default:            len = 0;                                break;
1373     case SPELL_STRING:  len = token->val.str.len;               break;
1374     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1375     }
1376   /* 1 for whitespace, 4 for comment delimeters.  */
1377   return len + 5;
1378 }
1379
1380 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1381    already contain the enough space to hold the token's spelling.
1382    Returns a pointer to the character after the last character
1383    written.  */
1384 unsigned char *
1385 cpp_spell_token (pfile, token, buffer)
1386      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1387      const cpp_token *token;
1388      unsigned char *buffer;
1389 {
1390   switch (TOKEN_SPELL (token))
1391     {
1392     case SPELL_OPERATOR:
1393       {
1394         const unsigned char *spelling;
1395         unsigned char c;
1396
1397         if (token->flags & DIGRAPH)
1398           spelling
1399             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1400         else if (token->flags & NAMED_OP)
1401           goto spell_ident;
1402         else
1403           spelling = TOKEN_NAME (token);
1404
1405         while ((c = *spelling++) != '\0')
1406           *buffer++ = c;
1407       }
1408       break;
1409
1410     case SPELL_IDENT:
1411       spell_ident:
1412       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1413       buffer += NODE_LEN (token->val.node);
1414       break;
1415
1416     case SPELL_STRING:
1417       {
1418         int left, right, tag;
1419         switch (token->type)
1420           {
1421           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1422           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1423           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1424           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1425           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1426           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1427           }
1428         if (tag) *buffer++ = tag;
1429         if (left) *buffer++ = left;
1430         memcpy (buffer, token->val.str.text, token->val.str.len);
1431         buffer += token->val.str.len;
1432         if (right) *buffer++ = right;
1433       }
1434       break;
1435
1436     case SPELL_CHAR:
1437       *buffer++ = token->val.c;
1438       break;
1439
1440     case SPELL_NONE:
1441       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1442       break;
1443     }
1444
1445   return buffer;
1446 }
1447
1448 /* Returns a token as a null-terminated string.  The string is
1449    temporary, and automatically freed later.  Useful for diagnostics.  */
1450 unsigned char *
1451 cpp_token_as_text (pfile, token)
1452      cpp_reader *pfile;
1453      const cpp_token *token;
1454 {
1455   unsigned int len = cpp_token_len (token);
1456   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1457
1458   end = cpp_spell_token (pfile, token, start);
1459   end[0] = '\0';
1460
1461   return start;
1462 }
1463
1464 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1465 const char *
1466 cpp_type2name (type)
1467      enum cpp_ttype type;
1468 {
1469   return (const char *) token_spellings[type].name;
1470 }
1471
1472 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1473    for efficiency - to avoid double-buffering.  Also, outputs a space
1474    if PREV_WHITE is flagged.  */
1475 void
1476 cpp_output_token (token, fp)
1477      const cpp_token *token;
1478      FILE *fp;
1479 {
1480   if (token->flags & PREV_WHITE)
1481     putc (' ', fp);
1482
1483   switch (TOKEN_SPELL (token))
1484     {
1485     case SPELL_OPERATOR:
1486       {
1487         const unsigned char *spelling;
1488
1489         if (token->flags & DIGRAPH)
1490           spelling
1491             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1492         else if (token->flags & NAMED_OP)
1493           goto spell_ident;
1494         else
1495           spelling = TOKEN_NAME (token);
1496
1497         ufputs (spelling, fp);
1498       }
1499       break;
1500
1501     spell_ident:
1502     case SPELL_IDENT:
1503       ufputs (NODE_NAME (token->val.node), fp);
1504     break;
1505
1506     case SPELL_STRING:
1507       {
1508         int left, right, tag;
1509         switch (token->type)
1510           {
1511           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1512           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1513           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1514           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1515           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1516           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1517           }
1518         if (tag) putc (tag, fp);
1519         if (left) putc (left, fp);
1520         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1521         if (right) putc (right, fp);
1522       }
1523       break;
1524
1525     case SPELL_CHAR:
1526       putc (token->val.c, fp);
1527       break;
1528
1529     case SPELL_NONE:
1530       /* An error, most probably.  */
1531       break;
1532     }
1533 }
1534
1535 /* Compare two tokens.  */
1536 int
1537 _cpp_equiv_tokens (a, b)
1538      const cpp_token *a, *b;
1539 {
1540   if (a->type == b->type && a->flags == b->flags)
1541     switch (TOKEN_SPELL (a))
1542       {
1543       default:                  /* Keep compiler happy.  */
1544       case SPELL_OPERATOR:
1545         return 1;
1546       case SPELL_CHAR:
1547         return a->val.c == b->val.c; /* Character.  */
1548       case SPELL_NONE:
1549         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1550       case SPELL_IDENT:
1551         return a->val.node == b->val.node;
1552       case SPELL_STRING:
1553         return (a->val.str.len == b->val.str.len
1554                 && !memcmp (a->val.str.text, b->val.str.text,
1555                             a->val.str.len));
1556       }
1557
1558   return 0;
1559 }
1560
1561 /* Determine whether two tokens can be pasted together, and if so,
1562    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1563    be pasted, or the appropriate type for the merged token if they
1564    can.  */
1565 enum cpp_ttype
1566 cpp_can_paste (pfile, token1, token2, digraph)
1567      cpp_reader * pfile;
1568      const cpp_token *token1, *token2;
1569      int* digraph;
1570 {
1571   enum cpp_ttype a = token1->type, b = token2->type;
1572   int cxx = CPP_OPTION (pfile, cplusplus);
1573
1574   /* Treat named operators as if they were ordinary NAMEs.  */
1575   if (token1->flags & NAMED_OP)
1576     a = CPP_NAME;
1577   if (token2->flags & NAMED_OP)
1578     b = CPP_NAME;
1579
1580   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1581     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1582
1583   switch (a)
1584     {
1585     case CPP_GREATER:
1586       if (b == a) return CPP_RSHIFT;
1587       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1588       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1589       break;
1590     case CPP_LESS:
1591       if (b == a) return CPP_LSHIFT;
1592       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1593       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1594       if (CPP_OPTION (pfile, digraphs))
1595         {
1596           if (b == CPP_COLON)
1597             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1598           if (b == CPP_MOD)
1599             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1600         }
1601       break;
1602
1603     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1604     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1605     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1606
1607     case CPP_MINUS:
1608       if (b == a)               return CPP_MINUS_MINUS;
1609       if (b == CPP_GREATER)     return CPP_DEREF;
1610       break;
1611     case CPP_COLON:
1612       if (b == a && cxx)        return CPP_SCOPE;
1613       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1614         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1615       break;
1616
1617     case CPP_MOD:
1618       if (CPP_OPTION (pfile, digraphs))
1619         {
1620           if (b == CPP_GREATER)
1621             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1622           if (b == CPP_COLON)
1623             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1624         }
1625       break;
1626     case CPP_DEREF:
1627       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1628       break;
1629     case CPP_DOT:
1630       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1631       if (b == CPP_NUMBER)      return CPP_NUMBER;
1632       break;
1633
1634     case CPP_HASH:
1635       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1636         /* %:%: digraph */
1637         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1638       break;
1639
1640     case CPP_NAME:
1641       if (b == CPP_NAME)        return CPP_NAME;
1642       if (b == CPP_NUMBER
1643           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1644       if (b == CPP_CHAR
1645           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1646       if (b == CPP_STRING
1647           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1648       break;
1649
1650     case CPP_NUMBER:
1651       if (b == CPP_NUMBER)      return CPP_NUMBER;
1652       if (b == CPP_NAME)        return CPP_NUMBER;
1653       if (b == CPP_DOT)         return CPP_NUMBER;
1654       /* Numbers cannot have length zero, so this is safe.  */
1655       if ((b == CPP_PLUS || b == CPP_MINUS)
1656           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1657         return CPP_NUMBER;
1658       break;
1659
1660     default:
1661       break;
1662     }
1663
1664   return CPP_EOF;
1665 }
1666
1667 /* Returns nonzero if a space should be inserted to avoid an
1668    accidental token paste for output.  For simplicity, it is
1669    conservative, and occasionally advises a space where one is not
1670    needed, e.g. "." and ".2".  */
1671
1672 int
1673 cpp_avoid_paste (pfile, token1, token2)
1674      cpp_reader *pfile;
1675      const cpp_token *token1, *token2;
1676 {
1677   enum cpp_ttype a = token1->type, b = token2->type;
1678   cppchar_t c;
1679
1680   if (token1->flags & NAMED_OP)
1681     a = CPP_NAME;
1682   if (token2->flags & NAMED_OP)
1683     b = CPP_NAME;
1684
1685   c = EOF;
1686   if (token2->flags & DIGRAPH)
1687     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1688   else if (token_spellings[b].category == SPELL_OPERATOR)
1689     c = token_spellings[b].name[0];
1690
1691   /* Quickly get everything that can paste with an '='.  */
1692   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1693     return 1;
1694
1695   switch (a)
1696     {
1697     case CPP_GREATER:   return c == '>' || c == '?';
1698     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1699     case CPP_PLUS:      return c == '+';
1700     case CPP_MINUS:     return c == '-' || c == '>';
1701     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1702     case CPP_MOD:       return c == ':' || c == '>';
1703     case CPP_AND:       return c == '&';
1704     case CPP_OR:        return c == '|';
1705     case CPP_COLON:     return c == ':' || c == '>';
1706     case CPP_DEREF:     return c == '*';
1707     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1708     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1709     case CPP_NAME:      return ((b == CPP_NUMBER
1710                                  && name_p (pfile, &token2->val.str))
1711                                 || b == CPP_NAME
1712                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1713     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1714                                 || c == '.' || c == '+' || c == '-');
1715     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1716                                 && token1->val.c == '@'
1717                                 && (b == CPP_NAME || b == CPP_STRING));
1718     default:            break;
1719     }
1720
1721   return 0;
1722 }
1723
1724 /* Output all the remaining tokens on the current line, and a newline
1725    character, to FP.  Leading whitespace is removed.  */
1726 void
1727 cpp_output_line (pfile, fp)
1728      cpp_reader *pfile;
1729      FILE *fp;
1730 {
1731   cpp_token token;
1732
1733   cpp_get_token (pfile, &token);
1734   token.flags &= ~PREV_WHITE;
1735   while (token.type != CPP_EOF)
1736     {
1737       cpp_output_token (&token, fp);
1738       cpp_get_token (pfile, &token);
1739     }
1740
1741   putc ('\n', fp);
1742 }
1743
1744 /* Returns the value of a hexadecimal digit.  */
1745 static unsigned int
1746 hex_digit_value (c)
1747      unsigned int c;
1748 {
1749   if (c >= 'a' && c <= 'f')
1750     return c - 'a' + 10;
1751   if (c >= 'A' && c <= 'F')
1752     return c - 'A' + 10;
1753   if (c >= '0' && c <= '9')
1754     return c - '0';
1755   abort ();
1756 }
1757
1758 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1759    failure if cpplib is not parsing C++ or C99.  Such failure is
1760    silent, and no variables are updated.  Otherwise returns 0, and
1761    warns if -Wtraditional.
1762
1763    [lex.charset]: The character designated by the universal character
1764    name \UNNNNNNNN is that character whose character short name in
1765    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1766    universal character name \uNNNN is that character whose character
1767    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1768    for a universal character name is less than 0x20 or in the range
1769    0x7F-0x9F (inclusive), or if the universal character name
1770    designates a character in the basic source character set, then the
1771    program is ill-formed.
1772
1773    We assume that wchar_t is Unicode, so we don't need to do any
1774    mapping.  Is this ever wrong?
1775
1776    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1777    LIMIT is the end of the string or charconst.  PSTR is updated to
1778    point after the UCS on return, and the UCS is written into PC.  */
1779
1780 static int
1781 maybe_read_ucs (pfile, pstr, limit, pc)
1782      cpp_reader *pfile;
1783      const unsigned char **pstr;
1784      const unsigned char *limit;
1785      unsigned int *pc;
1786 {
1787   const unsigned char *p = *pstr;
1788   unsigned int code = 0;
1789   unsigned int c = *pc, length;
1790
1791   /* Only attempt to interpret a UCS for C++ and C99.  */
1792   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1793     return 1;
1794
1795   if (CPP_WTRADITIONAL (pfile))
1796     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1797
1798   length = (c == 'u' ? 4: 8);
1799
1800   if ((size_t) (limit - p) < length)
1801     {
1802       cpp_error (pfile, "incomplete universal-character-name");
1803       /* Skip to the end to avoid more diagnostics.  */
1804       p = limit;
1805     }
1806   else
1807     {
1808       for (; length; length--, p++)
1809         {
1810           c = *p;
1811           if (ISXDIGIT (c))
1812             code = (code << 4) + hex_digit_value (c);
1813           else
1814             {
1815               cpp_error (pfile,
1816                          "non-hex digit '%c' in universal-character-name", c);
1817               /* We shouldn't skip in case there are multibyte chars.  */
1818               break;
1819             }
1820         }
1821     }
1822
1823 #ifdef TARGET_EBCDIC
1824   cpp_error (pfile, "universal-character-name on EBCDIC target");
1825   code = 0x3f;  /* EBCDIC invalid character */
1826 #else
1827  /* True extended characters are OK.  */
1828   if (code >= 0xa0
1829       && !(code & 0x80000000)
1830       && !(code >= 0xD800 && code <= 0xDFFF))
1831     ;
1832   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1833      hex escapes so that this also works with EBCDIC hosts.  */
1834   else if (code == 0x24 || code == 0x40 || code == 0x60)
1835     ;
1836   /* Don't give another error if one occurred above.  */
1837   else if (length == 0)
1838     cpp_error (pfile, "universal-character-name out of range");
1839 #endif
1840
1841   *pstr = p;
1842   *pc = code;
1843   return 0;
1844 }
1845
1846 /* Interpret an escape sequence, and return its value.  PSTR points to
1847    the input pointer, which is just after the backslash.  LIMIT is how
1848    much text we have.  MASK is a bitmask for the precision for the
1849    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1850    interpret escapes that did not exist in traditional C.
1851
1852    Handles all relevant diagnostics.  */
1853
1854 unsigned int
1855 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1856      cpp_reader *pfile;
1857      const unsigned char **pstr;
1858      const unsigned char *limit;
1859      unsigned HOST_WIDE_INT mask;
1860      int traditional;
1861 {
1862   int unknown = 0;
1863   const unsigned char *str = *pstr;
1864   unsigned int c = *str++;
1865
1866   switch (c)
1867     {
1868     case '\\': case '\'': case '"': case '?': break;
1869     case 'b': c = TARGET_BS;      break;
1870     case 'f': c = TARGET_FF;      break;
1871     case 'n': c = TARGET_NEWLINE; break;
1872     case 'r': c = TARGET_CR;      break;
1873     case 't': c = TARGET_TAB;     break;
1874     case 'v': c = TARGET_VT;      break;
1875
1876     case '(': case '{': case '[': case '%':
1877       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1878          '\%' is used to prevent SCCS from getting confused.  */
1879       unknown = CPP_PEDANTIC (pfile);
1880       break;
1881
1882     case 'a':
1883       if (CPP_WTRADITIONAL (pfile))
1884         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1885       if (!traditional)
1886         c = TARGET_BELL;
1887       break;
1888
1889     case 'e': case 'E':
1890       if (CPP_PEDANTIC (pfile))
1891         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1892       c = TARGET_ESC;
1893       break;
1894
1895     case 'u': case 'U':
1896       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1897       break;
1898
1899     case 'x':
1900       if (CPP_WTRADITIONAL (pfile))
1901         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1902
1903       if (!traditional)
1904         {
1905           unsigned int i = 0, overflow = 0;
1906           int digits_found = 0;
1907
1908           while (str < limit)
1909             {
1910               c = *str;
1911               if (! ISXDIGIT (c))
1912                 break;
1913               str++;
1914               overflow |= i ^ (i << 4 >> 4);
1915               i = (i << 4) + hex_digit_value (c);
1916               digits_found = 1;
1917             }
1918
1919           if (!digits_found)
1920             cpp_error (pfile, "\\x used with no following hex digits");
1921
1922           if (overflow | (i != (i & mask)))
1923             {
1924               cpp_pedwarn (pfile, "hex escape sequence out of range");
1925               i &= mask;
1926             }
1927           c = i;
1928         }
1929       break;
1930
1931     case '0':  case '1':  case '2':  case '3':
1932     case '4':  case '5':  case '6':  case '7':
1933       {
1934         unsigned int i = c - '0';
1935         int count = 0;
1936
1937         while (str < limit && ++count < 3)
1938           {
1939             c = *str;
1940             if (c < '0' || c > '7')
1941               break;
1942             str++;
1943             i = (i << 3) + c - '0';
1944           }
1945
1946         if (i != (i & mask))
1947           {
1948             cpp_pedwarn (pfile, "octal escape sequence out of range");
1949             i &= mask;
1950           }
1951         c = i;
1952       }
1953       break;
1954
1955     default:
1956       unknown = 1;
1957       break;
1958     }
1959
1960   if (unknown)
1961     {
1962       if (ISGRAPH (c))
1963         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1964       else
1965         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1966     }
1967
1968   if (c > mask)
1969     cpp_pedwarn (pfile, "escape sequence out of range for character");
1970
1971   *pstr = str;
1972   return c;
1973 }
1974
1975 #ifndef MAX_CHAR_TYPE_SIZE
1976 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1977 #endif
1978
1979 #ifndef MAX_WCHAR_TYPE_SIZE
1980 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1981 #endif
1982
1983 /* Interpret a (possibly wide) character constant in TOKEN.
1984    WARN_MULTI warns about multi-character charconsts, if not
1985    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1986    that did not exist in traditional C.  PCHARS_SEEN points to a
1987    variable that is filled in with the number of characters seen.  */
1988 HOST_WIDE_INT
1989 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1990      cpp_reader *pfile;
1991      const cpp_token *token;
1992      int warn_multi;
1993      int traditional;
1994      unsigned int *pchars_seen;
1995 {
1996   const unsigned char *str = token->val.str.text;
1997   const unsigned char *limit = str + token->val.str.len;
1998   unsigned int chars_seen = 0;
1999   unsigned int width, max_chars, c;
2000   unsigned HOST_WIDE_INT mask;
2001   HOST_WIDE_INT result = 0;
2002
2003 #ifdef MULTIBYTE_CHARS
2004   (void) local_mbtowc (NULL, NULL, 0);
2005 #endif
2006
2007   /* Width in bits.  */
2008   if (token->type == CPP_CHAR)
2009     width = MAX_CHAR_TYPE_SIZE;
2010   else
2011     width = MAX_WCHAR_TYPE_SIZE;
2012
2013   if (width < HOST_BITS_PER_WIDE_INT)
2014     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2015   else
2016     mask = ~0;
2017   max_chars = HOST_BITS_PER_WIDE_INT / width;
2018
2019   while (str < limit)
2020     {
2021 #ifdef MULTIBYTE_CHARS
2022       wchar_t wc;
2023       int char_len;
2024
2025       char_len = local_mbtowc (&wc, str, limit - str);
2026       if (char_len == -1)
2027         {
2028           cpp_warning (pfile, "ignoring invalid multibyte character");
2029           c = *str++;
2030         }
2031       else
2032         {
2033           str += char_len;
2034           c = wc;
2035         }
2036 #else
2037       c = *str++;
2038 #endif
2039
2040       if (c == '\\')
2041         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2042
2043 #ifdef MAP_CHARACTER
2044       if (ISPRINT (c))
2045         c = MAP_CHARACTER (c);
2046 #endif
2047
2048       /* Merge character into result; ignore excess chars.  */
2049       if (++chars_seen <= max_chars)
2050         {
2051           if (width < HOST_BITS_PER_WIDE_INT)
2052             result = (result << width) | (c & mask);
2053           else
2054             result = c;
2055         }
2056     }
2057
2058   if (chars_seen == 0)
2059     cpp_error (pfile, "empty character constant");
2060   else if (chars_seen > max_chars)
2061     {
2062       chars_seen = max_chars;
2063       cpp_warning (pfile, "character constant too long");
2064     }
2065   else if (chars_seen > 1 && !traditional && warn_multi)
2066     cpp_warning (pfile, "multi-character character constant");
2067
2068   /* If char type is signed, sign-extend the constant.  The
2069      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2070   if (token->type == CPP_CHAR && chars_seen)
2071     {
2072       unsigned int nbits = chars_seen * width;
2073       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2074
2075       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2076           || ((result >> (nbits - 1)) & 1) == 0)
2077         result &= mask;
2078       else
2079         result |= ~mask;
2080     }
2081
2082   *pchars_seen = chars_seen;
2083   return result;
2084 }
2085
2086 /* Memory pools.  */
2087
2088 struct dummy
2089 {
2090   char c;
2091   union
2092   {
2093     double d;
2094     int *p;
2095   } u;
2096 };
2097
2098 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2099
2100 static int
2101 chunk_suitable (pool, chunk, size)
2102      cpp_pool *pool;
2103      cpp_chunk *chunk;
2104      unsigned int size;
2105 {
2106   /* Being at least twice SIZE means we can use memcpy in
2107      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2108      anyway.  */
2109   return (chunk && pool->locked != chunk
2110           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2111 }
2112
2113 /* Returns the end of the new pool.  PTR points to a char in the old
2114    pool, and is updated to point to the same char in the new pool.  */
2115 unsigned char *
2116 _cpp_next_chunk (pool, len, ptr)
2117      cpp_pool *pool;
2118      unsigned int len;
2119      unsigned char **ptr;
2120 {
2121   cpp_chunk *chunk = pool->cur->next;
2122
2123   /* LEN is the minimum size we want in the new pool.  */
2124   len += POOL_ROOM (pool);
2125   if (! chunk_suitable (pool, chunk, len))
2126     {
2127       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2128
2129       chunk->next = pool->cur->next;
2130       pool->cur->next = chunk;
2131     }
2132
2133   /* Update the pointer before changing chunk's front.  */
2134   if (ptr)
2135     *ptr += chunk->base - POOL_FRONT (pool);
2136
2137   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2138   chunk->front = chunk->base;
2139
2140   pool->cur = chunk;
2141   return POOL_LIMIT (pool);
2142 }
2143
2144 static cpp_chunk *
2145 new_chunk (size)
2146      unsigned int size;
2147 {
2148   unsigned char *base;
2149   cpp_chunk *result;
2150
2151   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2152   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2153   /* Put the chunk descriptor at the end.  Then chunk overruns will
2154      cause obvious chaos.  */
2155   result = (cpp_chunk *) (base + size);
2156   result->base = base;
2157   result->front = base;
2158   result->limit = base + size;
2159   result->next = 0;
2160
2161   return result;
2162 }
2163
2164 void
2165 _cpp_init_pool (pool, size, align, temp)
2166      cpp_pool *pool;
2167      unsigned int size, align, temp;
2168 {
2169   if (align == 0)
2170     align = DEFAULT_ALIGNMENT;
2171   if (align & (align - 1))
2172     abort ();
2173   pool->align = align;
2174   pool->first = new_chunk (size);
2175   pool->cur = pool->first;
2176   pool->locked = 0;
2177   pool->locks = 0;
2178   if (temp)
2179     pool->cur->next = pool->cur;
2180 }
2181
2182 void
2183 _cpp_lock_pool (pool)
2184      cpp_pool *pool;
2185 {
2186   if (pool->locks++ == 0)
2187     pool->locked = pool->cur;
2188 }
2189
2190 void
2191 _cpp_unlock_pool (pool)
2192      cpp_pool *pool;
2193 {
2194   if (--pool->locks == 0)
2195     pool->locked = 0;
2196 }
2197
2198 void
2199 _cpp_free_pool (pool)
2200      cpp_pool *pool;
2201 {
2202   cpp_chunk *chunk = pool->first, *next;
2203
2204   do
2205     {
2206       next = chunk->next;
2207       free (chunk->base);
2208       chunk = next;
2209     }
2210   while (chunk && chunk != pool->first);
2211 }
2212
2213 /* Reserve LEN bytes from a memory pool.  */
2214 unsigned char *
2215 _cpp_pool_reserve (pool, len)
2216      cpp_pool *pool;
2217      unsigned int len;
2218 {
2219   len = POOL_ALIGN (len, pool->align);
2220   if (len > (unsigned int) POOL_ROOM (pool))
2221     _cpp_next_chunk (pool, len, 0);
2222
2223   return POOL_FRONT (pool);
2224 }
2225
2226 /* Allocate LEN bytes from a memory pool.  */
2227 unsigned char *
2228 _cpp_pool_alloc (pool, len)
2229      cpp_pool *pool;
2230      unsigned int len;
2231 {
2232   unsigned char *result = _cpp_pool_reserve (pool, len);
2233
2234   POOL_COMMIT (pool, len);
2235   return result;
2236 }