gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (pfile, newline_char)
 128      cpp_reader *pfile;
 129      cppchar_t newline_char;
 130 {
 131   cpp_buffer *buffer;
 132   cppchar_t next = EOF;
 133
 134   pfile->line++;
 135   buffer = pfile->buffer;
 136   buffer->col_adjust = 0;
 137   buffer->line_base = buffer->cur;
 138
 139   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 140   if (buffer->cur < buffer->rlimit)
 141     {
 142       next = *buffer->cur++;
 143       if (next + newline_char == '\r' + '\n')
 144         {
 145           buffer->line_base = buffer->cur;
 146           if (buffer->cur < buffer->rlimit)
 147             next = *buffer->cur++;
 148           else
 149             next = EOF;
 150         }
 151     }
 152
 153   buffer->read_ahead = next;
 154   return next;
 155 }
 156
 157 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 158    encountered.  It warns if necessary, and returns true if the
 159    trigraph should be honoured.  FROM_CHAR is the third character of a
 160    trigraph, and presumed to be the previous character for position
 161    reporting.  */
 162 static int
 163 trigraph_ok (pfile, from_char)
 164      cpp_reader *pfile;
 165      cppchar_t from_char;
 166 {
 167   int accept = CPP_OPTION (pfile, trigraphs);
 168
 169   /* Don't warn about trigraphs in comments.  */
 170   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 171     {
 172       cpp_buffer *buffer = pfile->buffer;
 173
 174       if (accept)
 175         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, pfile->line,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (pfile, next)
 209      cpp_reader *pfile;
 210      cppchar_t next;
 211 {
 212   cpp_buffer *buffer = pfile->buffer;
 213
 214   /* Only do this if we apply stages 1 and 2.  */
 215   if (!buffer->from_stage3)
 216     {
 217       cppchar_t next1;
 218       const unsigned char *saved_cur;
 219       int space;
 220
 221       do
 222         {
 223           if (buffer->cur == buffer->rlimit)
 224             break;
 225
 226           SAVE_STATE ();
 227           if (next == '?')
 228             {
 229               next1 = *buffer->cur++;
 230               if (next1 != '?' || buffer->cur == buffer->rlimit)
 231                 {
 232                   RESTORE_STATE ();
 233                   break;
 234                 }
 235
 236               next1 = *buffer->cur++;
 237               if (!_cpp_trigraph_map[next1]
 238                   || !trigraph_ok (pfile, next1))
 239                 {
 240                   RESTORE_STATE ();
 241                   break;
 242                 }
 243
 244               /* We have a full trigraph here.  */
 245               next = _cpp_trigraph_map[next1];
 246               if (next != '\\' || buffer->cur == buffer->rlimit)
 247                 break;
 248               SAVE_STATE ();
 249             }
 250
 251           /* We have a backslash, and room for at least one more character.  */
 252           space = 0;
 253           do
 254             {
 255               next1 = *buffer->cur++;
 256               if (!is_nvspace (next1))
 257                 break;
 258               space = 1;
 259             }
 260           while (buffer->cur < buffer->rlimit);
 261
 262           if (!is_vspace (next1))
 263             {
 264               RESTORE_STATE ();
 265               break;
 266             }
 267
 268           if (space && !pfile->state.lexing_comment)
 269             cpp_warning (pfile, "backslash and newline separated by space");
 270
 271           next = handle_newline (pfile, next1);
 272           if (next == EOF)
 273             cpp_pedwarn (pfile, "backslash-newline at end of file");
 274         }
 275       while (next == '\\' || next == '?');
 276     }
 277
 278   buffer->read_ahead = next;
 279   return next;
 280 }
 281
 282 /* Obtain the next character, after trigraph conversion and skipping
 283    an arbitrary string of escaped newlines.  The common case of no
 284    trigraphs or escaped newlines falls through quickly.  */
 285 static cppchar_t
 286 get_effective_char (pfile)
 287      cpp_reader *pfile;
 288 {
 289   cpp_buffer *buffer = pfile->buffer;
 290   cppchar_t next = EOF;
 291
 292   if (buffer->cur < buffer->rlimit)
 293     {
 294       next = *buffer->cur++;
 295
 296       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 297          can introduce escaped newlines, which we want to skip, or
 298          UCNs, which, depending upon lexer state, we will handle in
 299          the future.  */
 300       if (next == '?' || next == '\\')
 301         next = skip_escaped_newlines (pfile, next);
 302     }
 303
 304   buffer->read_ahead = next;
 305   return next;
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    non-zero if comment terminated by EOF, zero otherwise.  */
 311 static int
 312 skip_block_comment (pfile)
 313      cpp_reader *pfile;
 314 {
 315   cpp_buffer *buffer = pfile->buffer;
 316   cppchar_t c = EOF, prevc = EOF;
 317
 318   pfile->state.lexing_comment = 1;
 319   while (buffer->cur != buffer->rlimit)
 320     {
 321       prevc = c, c = *buffer->cur++;
 322
 323     next_char:
 324       /* FIXME: For speed, create a new character class of characters
 325          of interest inside block comments.  */
 326       if (c == '?' || c == '\\')
 327         c = skip_escaped_newlines (pfile, c);
 328
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       if (c == '/')
 332         {
 333           if (prevc == '*')
 334             break;
 335
 336           /* Warn about potential nested comments, but not if the '/'
 337              comes immediately before the true comment delimeter.
 338              Don't bother to get it right across escaped newlines.  */
 339           if (CPP_OPTION (pfile, warn_comments)
 340               && buffer->cur != buffer->rlimit)
 341             {
 342               prevc = c, c = *buffer->cur++;
 343               if (c == '*' && buffer->cur != buffer->rlimit)
 344                 {
 345                   prevc = c, c = *buffer->cur++;
 346                   if (c != '/')
 347                     cpp_warning_with_line (pfile, pfile->line,
 348                                            CPP_BUF_COL (buffer) - 2,
 349                                            "\"/*\" within comment");
 350                 }
 351               goto next_char;
 352             }
 353         }
 354       else if (is_vspace (c))
 355         {
 356           prevc = c, c = handle_newline (pfile, c);
 357           goto next_char;
 358         }
 359       else if (c == '\t')
 360         adjust_column (pfile);
 361     }
 362
 363   pfile->state.lexing_comment = 0;
 364   buffer->read_ahead = EOF;
 365   return c != '/' || prevc != '*';
 366 }
 367
 368 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 369    non-zero if a multiline comment.  The following new line, if any,
 370    is left in buffer->read_ahead.  */
 371 static int
 372 skip_line_comment (pfile)
 373      cpp_reader *pfile;
 374 {
 375   cpp_buffer *buffer = pfile->buffer;
 376   unsigned int orig_line = pfile->line;
 377   cppchar_t c;
 378
 379   pfile->state.lexing_comment = 1;
 380   do
 381     {
 382       c = EOF;
 383       if (buffer->cur == buffer->rlimit)
 384         break;
 385
 386       c = *buffer->cur++;
 387       if (c == '?' || c == '\\')
 388         c = skip_escaped_newlines (pfile, c);
 389     }
 390   while (!is_vspace (c));
 391
 392   pfile->state.lexing_comment = 0;
 393   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 394   return orig_line != pfile->line;
 395 }
 396
 397 /* pfile->buffer->cur is one beyond the \t character.  Update
 398    col_adjust so we track the column correctly.  */
 399 static void
 400 adjust_column (pfile)
 401      cpp_reader *pfile;
 402 {
 403   cpp_buffer *buffer = pfile->buffer;
 404   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 405
 406   /* Round it up to multiple of the tabstop, but subtract 1 since the
 407      tab itself occupies a character position.  */
 408   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 409                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 410 }
 411
 412 /* Skips whitespace, saving the next non-whitespace character.
 413    Adjusts pfile->col_adjust to account for tabs.  Without this,
 414    tokens might be assigned an incorrect column.  */
 415 static void
 416 skip_whitespace (pfile, c)
 417      cpp_reader *pfile;
 418      cppchar_t c;
 419 {
 420   cpp_buffer *buffer = pfile->buffer;
 421   unsigned int warned = 0;
 422
 423   do
 424     {
 425       /* Horizontal space always OK.  */
 426       if (c == ' ')
 427         ;
 428       else if (c == '\t')
 429         adjust_column (pfile);
 430       /* Just \f \v or \0 left.  */
 431       else if (c == '\0')
 432         {
 433           if (!warned)
 434             {
 435               cpp_warning (pfile, "null character(s) ignored");
 436               warned = 1;
 437             }
 438         }
 439       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 440         cpp_pedwarn_with_line (pfile, pfile->line,
 441                                CPP_BUF_COL (buffer),
 442                                "%s in preprocessing directive",
 443                                c == '\f' ? "form feed" : "vertical tab");
 444
 445       c = EOF;
 446       if (buffer->cur == buffer->rlimit)
 447         break;
 448       c = *buffer->cur++;
 449     }
 450   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 451   while (is_nvspace (c));
 452
 453   /* Remember the next character.  */
 454   buffer->read_ahead = c;
 455 }
 456
 457 /* See if the characters of a number token are valid in a name (no
 458    '.', '+' or '-').  */
 459 static int
 460 name_p (pfile, string)
 461      cpp_reader *pfile;
 462      const cpp_string *string;
 463 {
 464   unsigned int i;
 465
 466   for (i = 0; i < string->len; i++)
 467     if (!is_idchar (string->text[i]))
 468       return 0;
 469
 470   return 1;
 471 }
 472
 473 /* Parse an identifier, skipping embedded backslash-newlines.
 474    Calculate the hash value of the token while parsing, for improved
 475    performance.  The hashing algorithm *must* match cpp_lookup().  */
 476
 477 static cpp_hashnode *
 478 parse_identifier (pfile, c)
 479      cpp_reader *pfile;
 480      cppchar_t c;
 481 {
 482   cpp_hashnode *result;
 483   cpp_buffer *buffer = pfile->buffer;
 484   unsigned int saw_dollar = 0, len;
 485   struct obstack *stack = &pfile->hash_table->stack;
 486
 487   do
 488     {
 489       do
 490         {
 491           obstack_1grow (stack, c);
 492
 493           if (c == '$')
 494             saw_dollar++;
 495
 496           c = EOF;
 497           if (buffer->cur == buffer->rlimit)
 498             break;
 499
 500           c = *buffer->cur++;
 501         }
 502       while (is_idchar (c));
 503
 504       /* Potential escaped newline?  */
 505       if (c != '?' && c != '\\')
 506         break;
 507       c = skip_escaped_newlines (pfile, c);
 508     }
 509   while (is_idchar (c));
 510
 511   /* Remember the next character.  */
 512   buffer->read_ahead = c;
 513
 514   /* $ is not a identifier character in the standard, but is commonly
 515      accepted as an extension.  Don't warn about it in skipped
 516      conditional blocks.  */
 517   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 518     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 519
 520   /* Identifiers are null-terminated.  */
 521   len = obstack_object_size (stack);
 522   obstack_1grow (stack, '\0');
 523
 524   /* This routine commits the memory if necessary.  */
 525   result = (cpp_hashnode *)
 526     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 527
 528   /* Some identifiers require diagnostics when lexed.  */
 529   if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
 530     {
 531       /* It is allowed to poison the same identifier twice.  */
 532       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 533         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 534                    NODE_NAME (result));
 535
 536       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 537          replacement list of a variadic macro.  */
 538       if (result == pfile->spec_nodes.n__VA_ARGS__
 539           && !pfile->state.va_args_ok)
 540         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 541     }
 542
 543   return result;
 544 }
 545
 546 /* Parse a number, skipping embedded backslash-newlines.  */
 547 static void
 548 parse_number (pfile, number, c, leading_period)
 549      cpp_reader *pfile;
 550      cpp_string *number;
 551      cppchar_t c;
 552      int leading_period;
 553 {
 554   cpp_buffer *buffer = pfile->buffer;
 555   cpp_pool *pool = &pfile->ident_pool;
 556   unsigned char *dest, *limit;
 557
 558   dest = POOL_FRONT (pool);
 559   limit = POOL_LIMIT (pool);
 560
 561   /* Place a leading period.  */
 562   if (leading_period)
 563     {
 564       if (dest >= limit)
 565         limit = _cpp_next_chunk (pool, 0, &dest);
 566       *dest++ = '.';
 567     }
 568
 569   do
 570     {
 571       do
 572         {
 573           /* Need room for terminating null.  */
 574           if (dest + 1 >= limit)
 575             limit = _cpp_next_chunk (pool, 0, &dest);
 576           *dest++ = c;
 577
 578           c = EOF;
 579           if (buffer->cur == buffer->rlimit)
 580             break;
 581
 582           c = *buffer->cur++;
 583         }
 584       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 585
 586       /* Potential escaped newline?  */
 587       if (c != '?' && c != '\\')
 588         break;
 589       c = skip_escaped_newlines (pfile, c);
 590     }
 591   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 592
 593   /* Remember the next character.  */
 594   buffer->read_ahead = c;
 595
 596   /* Null-terminate the number.  */
 597   *dest = '\0';
 598
 599   number->text = POOL_FRONT (pool);
 600   number->len = dest - number->text;
 601   POOL_COMMIT (pool, number->len + 1);
 602 }
 603
 604 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 605 static void
 606 unterminated (pfile, term)
 607      cpp_reader *pfile;
 608      int term;
 609 {
 610   cpp_error (pfile, "missing terminating %c character", term);
 611
 612   if (term == '\"' && pfile->mlstring_pos.line
 613       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 614     {
 615       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 616                            pfile->mlstring_pos.col,
 617                            "possible start of unterminated string literal");
 618       pfile->mlstring_pos.line = 0;
 619     }
 620 }
 621
 622 /* Subroutine of parse_string.  */
 623 static int
 624 unescaped_terminator_p (pfile, dest)
 625      cpp_reader *pfile;
 626      const unsigned char *dest;
 627 {
 628   const unsigned char *start, *temp;
 629
 630   /* In #include-style directives, terminators are not escapeable.  */
 631   if (pfile->state.angled_headers)
 632     return 1;
 633
 634   start = POOL_FRONT (&pfile->ident_pool);
 635
 636   /* An odd number of consecutive backslashes represents an escaped
 637      terminator.  */
 638   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 639     ;
 640
 641   return ((dest - temp) & 1) == 0;
 642 }
 643
 644 /* Parses a string, character constant, or angle-bracketed header file
 645    name.  Handles embedded trigraphs and escaped newlines.  The stored
 646    string is guaranteed NUL-terminated, but it is not guaranteed that
 647    this is the first NUL since embedded NULs are preserved.
 648
 649    Multi-line strings are allowed, but they are deprecated.  */
 650 static void
 651 parse_string (pfile, token, terminator)
 652      cpp_reader *pfile;
 653      cpp_token *token;
 654      cppchar_t terminator;
 655 {
 656   cpp_buffer *buffer = pfile->buffer;
 657   cpp_pool *pool = &pfile->ident_pool;
 658   unsigned char *dest, *limit;
 659   cppchar_t c;
 660   bool warned_nulls = false, warned_multi = false;
 661
 662   dest = POOL_FRONT (pool);
 663   limit = POOL_LIMIT (pool);
 664
 665   for (;;)
 666     {
 667       if (buffer->cur == buffer->rlimit)
 668         c = EOF;
 669       else
 670         c = *buffer->cur++;
 671
 672     have_char:
 673       /* We need space for the terminating NUL.  */
 674       if (dest >= limit)
 675         limit = _cpp_next_chunk (pool, 0, &dest);
 676
 677       if (c == EOF)
 678         {
 679           unterminated (pfile, terminator);
 680           break;
 681         }
 682
 683       /* Handle trigraphs, escaped newlines etc.  */
 684       if (c == '?' || c == '\\')
 685         c = skip_escaped_newlines (pfile, c);
 686
 687       if (c == terminator && unescaped_terminator_p (pfile, dest))
 688         {
 689           c = EOF;
 690           break;
 691         }
 692       else if (is_vspace (c))
 693         {
 694           /* In assembly language, silently terminate string and
 695              character literals at end of line.  This is a kludge
 696              around not knowing where comments are.  */
 697           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 698             break;
 699
 700           /* Character constants and header names may not extend over
 701              multiple lines.  In Standard C, neither may strings.
 702              Unfortunately, we accept multiline strings as an
 703              extension, except in #include family directives.  */
 704           if (terminator != '"' || pfile->state.angled_headers)
 705             {
 706               unterminated (pfile, terminator);
 707               break;
 708             }
 709
 710           if (!warned_multi)
 711             {
 712               warned_multi = true;
 713               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 714             }
 715
 716           if (pfile->mlstring_pos.line == 0)
 717             pfile->mlstring_pos = pfile->lexer_pos;
 718
 719           c = handle_newline (pfile, c);
 720           *dest++ = '\n';
 721           goto have_char;
 722         }
 723       else if (c == '\0' && !warned_nulls)
 724         {
 725           warned_nulls = true;
 726           cpp_warning (pfile, "null character(s) preserved in literal");
 727         }
 728
 729       *dest++ = c;
 730     }
 731
 732   /* Remember the next character.  */
 733   buffer->read_ahead = c;
 734   *dest = '\0';
 735
 736   token->val.str.text = POOL_FRONT (pool);
 737   token->val.str.len = dest - token->val.str.text;
 738   POOL_COMMIT (pool, token->val.str.len + 1);
 739 }
 740
 741 /* The stored comment includes the comment start and any terminator.  */
 742 static void
 743 save_comment (pfile, token, from)
 744      cpp_reader *pfile;
 745      cpp_token *token;
 746      const unsigned char *from;
 747 {
 748   unsigned char *buffer;
 749   unsigned int len;
 750
 751   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 752   /* C++ comments probably (not definitely) have moved past a new
 753      line, which we don't want to save in the comment.  */
 754   if (pfile->buffer->read_ahead != EOF)
 755     len--;
 756   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 757
 758   token->type = CPP_COMMENT;
 759   token->val.str.len = len;
 760   token->val.str.text = buffer;
 761
 762   buffer[0] = '/';
 763   memcpy (buffer + 1, from, len - 1);
 764 }
 765
 766 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 767    want to avoid stepping back when lexing %:%X.  */
 768 static void
 769 lex_percent (pfile, result)
 770      cpp_reader *pfile;
 771      cpp_token *result;
 772 {
 773   cpp_buffer *buffer= pfile->buffer;
 774   cppchar_t c;
 775
 776   result->type = CPP_MOD;
 777   /* Parsing %:%X could leave an extra character.  */
 778   if (buffer->extra_char == EOF)
 779     c = get_effective_char (pfile);
 780   else
 781     {
 782       c = buffer->read_ahead = buffer->extra_char;
 783       buffer->extra_char = EOF;
 784     }
 785
 786   if (c == '=')
 787     ACCEPT_CHAR (CPP_MOD_EQ);
 788   else if (CPP_OPTION (pfile, digraphs))
 789     {
 790       if (c == ':')
 791         {
 792           result->flags |= DIGRAPH;
 793           ACCEPT_CHAR (CPP_HASH);
 794           if (get_effective_char (pfile) == '%')
 795             {
 796               buffer->extra_char = get_effective_char (pfile);
 797               if (buffer->extra_char == ':')
 798                 {
 799                   buffer->extra_char = EOF;
 800                   ACCEPT_CHAR (CPP_PASTE);
 801                 }
 802               else
 803                 /* We'll catch the extra_char when we're called back.  */
 804                 buffer->read_ahead = '%';
 805             }
 806         }
 807       else if (c == '>')
 808         {
 809           result->flags |= DIGRAPH;
 810           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 811         }
 812     }
 813 }
 814
 815 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 816    want to avoid stepping back when lexing '...' or '.123'.  In the
 817    latter case we should also set a flag for parse_number.  */
 818 static void
 819 lex_dot (pfile, result)
 820      cpp_reader *pfile;
 821      cpp_token *result;
 822 {
 823   cpp_buffer *buffer = pfile->buffer;
 824   cppchar_t c;
 825
 826   /* Parsing ..X could leave an extra character.  */
 827   if (buffer->extra_char == EOF)
 828     c = get_effective_char (pfile);
 829   else
 830     {
 831       c = buffer->read_ahead = buffer->extra_char;
 832       buffer->extra_char = EOF;
 833     }
 834
 835   /* All known character sets have 0...9 contiguous.  */
 836   if (c >= '0' && c <= '9')
 837     {
 838       result->type = CPP_NUMBER;
 839       parse_number (pfile, &result->val.str, c, 1);
 840     }
 841   else
 842     {
 843       result->type = CPP_DOT;
 844       if (c == '.')
 845         {
 846           buffer->extra_char = get_effective_char (pfile);
 847           if (buffer->extra_char == '.')
 848             {
 849               buffer->extra_char = EOF;
 850               ACCEPT_CHAR (CPP_ELLIPSIS);
 851             }
 852           else
 853             /* We'll catch the extra_char when we're called back.  */
 854             buffer->read_ahead = '.';
 855         }
 856       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 857         ACCEPT_CHAR (CPP_DOT_STAR);
 858     }
 859 }
 860
 861 void
 862 _cpp_lex_token (pfile, result)
 863      cpp_reader *pfile;
 864      cpp_token *result;
 865 {
 866   cppchar_t c;
 867   cpp_buffer *buffer;
 868   const unsigned char *comment_start;
 869   int bol;
 870
 871  next_token:
 872   buffer = pfile->buffer;
 873   result->flags = buffer->saved_flags;
 874   buffer->saved_flags = 0;
 875   bol = (buffer->cur <= buffer->line_base + 1
 876          && pfile->lexer_pos.output_line == pfile->line);
 877  next_char:
 878   pfile->lexer_pos.line = pfile->line;
 879   result->line = pfile->line;
 880  next_char2:
 881   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 882
 883   c = buffer->read_ahead;
 884   if (c == EOF && buffer->cur < buffer->rlimit)
 885     {
 886       c = *buffer->cur++;
 887       pfile->lexer_pos.col++;
 888     }
 889   result->col = pfile->lexer_pos.col;
 890
 891  do_switch:
 892   buffer->read_ahead = EOF;
 893   switch (c)
 894     {
 895     case EOF:
 896       /* Non-empty files should end in a newline.  Don't warn for
 897          command line and _Pragma buffers.  */
 898       if (pfile->lexer_pos.col != 0)
 899         {
 900           /* Account for the missing \n, prevent multiple warnings.  */
 901           pfile->line++;
 902           pfile->lexer_pos.col = 0;
 903           if (!buffer->from_stage3)
 904             cpp_pedwarn (pfile, "no newline at end of file");
 905         }
 906
 907       /* To prevent bogus diagnostics, only pop the buffer when
 908          in-progress directives and arguments have been taken care of.
 909          Decrement the line to terminate an in-progress directive.  */
 910       if (pfile->state.in_directive)
 911         pfile->lexer_pos.output_line = pfile->line--;
 912       else if (! pfile->state.parsing_args)
 913         {
 914           /* Don't pop the last buffer.  */
 915           if (buffer->prev)
 916             {
 917               unsigned char stop = buffer->return_at_eof;
 918
 919               _cpp_pop_buffer (pfile);
 920               if (!stop)
 921                 goto next_token;
 922             }
 923         }
 924       result->type = CPP_EOF;
 925       return;
 926
 927     case ' ': case '\t': case '\f': case '\v': case '\0':
 928       skip_whitespace (pfile, c);
 929       result->flags |= PREV_WHITE;
 930       goto next_char2;
 931
 932     case '\n': case '\r':
 933       if (pfile->state.in_directive)
 934         {
 935           result->type = CPP_EOF;
 936           if (pfile->state.parsing_args)
 937             buffer->read_ahead = c;
 938           else
 939             {
 940               handle_newline (pfile, c);
 941               /* Decrementing pfile->line allows directives to
 942                  recognise that the newline has been seen, and also
 943                  means that diagnostics don't point to the next line.  */
 944               pfile->lexer_pos.output_line = pfile->line--;
 945             }
 946           return;
 947         }
 948
 949       handle_newline (pfile, c);
 950       /* This is a new line, so clear any white space flag.  Newlines
 951          in arguments are white space (6.10.3.10); parse_arg takes
 952          care of that.  */
 953       result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 954       bol = 1;
 955       if (pfile->state.parsing_args != 2)
 956         pfile->lexer_pos.output_line = pfile->line;
 957       goto next_char;
 958
 959     case '?':
 960     case '\\':
 961       /* These could start an escaped newline, or '?' a trigraph.  Let
 962          skip_escaped_newlines do all the work.  */
 963       {
 964         unsigned int line = pfile->line;
 965
 966         c = skip_escaped_newlines (pfile, c);
 967         if (line != pfile->line)
 968           /* We had at least one escaped newline of some sort, and the
 969              next character is in buffer->read_ahead.  Update the
 970              token's line and column.  */
 971             goto next_char;
 972
 973         /* We are either the original '?' or '\\', or a trigraph.  */
 974         result->type = CPP_QUERY;
 975         buffer->read_ahead = EOF;
 976         if (c == '\\')
 977           goto random_char;
 978         else if (c != '?')
 979           goto do_switch;
 980       }
 981       break;
 982
 983     case '0': case '1': case '2': case '3': case '4':
 984     case '5': case '6': case '7': case '8': case '9':
 985       result->type = CPP_NUMBER;
 986       parse_number (pfile, &result->val.str, c, 0);
 987       break;
 988
 989     case '$':
 990       if (!CPP_OPTION (pfile, dollars_in_ident))
 991         goto random_char;
 992       /* Fall through...  */
 993
 994     case '_':
 995     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 996     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 997     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 998     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 999     case 'y': case 'z':
1000     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1001     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1002     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1003     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1004     case 'Y': case 'Z':
1005       result->type = CPP_NAME;
1006       result->val.node = parse_identifier (pfile, c);
1007
1008       /* 'L' may introduce wide characters or strings.  */
1009       if (result->val.node == pfile->spec_nodes.n_L)
1010         {
1011           c = buffer->read_ahead; /* For make_string.  */
1012           if (c == '\'' || c == '"')
1013             {
1014               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1015               goto make_string;
1016             }
1017         }
1018       /* Convert named operators to their proper types.  */
1019       else if (result->val.node->flags & NODE_OPERATOR)
1020         {
1021           result->flags |= NAMED_OP;
1022           result->type = result->val.node->value.operator;
1023         }
1024       break;
1025
1026     case '\'':
1027     case '"':
1028       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1029     make_string:
1030       parse_string (pfile, result, c);
1031       break;
1032
1033     case '/':
1034       /* A potential block or line comment.  */
1035       comment_start = buffer->cur;
1036       result->type = CPP_DIV;
1037       c = get_effective_char (pfile);
1038       if (c == '=')
1039         ACCEPT_CHAR (CPP_DIV_EQ);
1040       if (c != '/' && c != '*')
1041         break;
1042
1043       if (c == '*')
1044         {
1045           if (skip_block_comment (pfile))
1046             cpp_error (pfile, "unterminated comment");
1047         }
1048       else
1049         {
1050           if (!CPP_OPTION (pfile, cplusplus_comments)
1051               && !CPP_IN_SYSTEM_HEADER (pfile))
1052             break;
1053
1054           /* Warn about comments only if pedantically GNUC89, and not
1055              in system headers.  */
1056           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1057               && ! buffer->warned_cplusplus_comments)
1058             {
1059               cpp_pedwarn (pfile,
1060                            "C++ style comments are not allowed in ISO C89");
1061               cpp_pedwarn (pfile,
1062                            "(this will be reported only once per input file)");
1063               buffer->warned_cplusplus_comments = 1;
1064             }
1065
1066           /* Skip_line_comment updates buffer->read_ahead.  */
1067           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1068             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1069                                    pfile->lexer_pos.col,
1070                                    "multi-line comment");
1071         }
1072
1073       /* Skipping the comment has updated buffer->read_ahead.  */
1074       if (!pfile->state.save_comments)
1075         {
1076           result->flags |= PREV_WHITE;
1077           goto next_char;
1078         }
1079
1080       /* Save the comment as a token in its own right.  */
1081       save_comment (pfile, result, comment_start);
1082       /* Don't do MI optimisation.  */
1083       return;
1084
1085     case '<':
1086       if (pfile->state.angled_headers)
1087         {
1088           result->type = CPP_HEADER_NAME;
1089           c = '>';              /* terminator.  */
1090           goto make_string;
1091         }
1092
1093       result->type = CPP_LESS;
1094       c = get_effective_char (pfile);
1095       if (c == '=')
1096         ACCEPT_CHAR (CPP_LESS_EQ);
1097       else if (c == '<')
1098         {
1099           ACCEPT_CHAR (CPP_LSHIFT);
1100           if (get_effective_char (pfile) == '=')
1101             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1102         }
1103       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1104         {
1105           ACCEPT_CHAR (CPP_MIN);
1106           if (get_effective_char (pfile) == '=')
1107             ACCEPT_CHAR (CPP_MIN_EQ);
1108         }
1109       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1110         {
1111           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1112           result->flags |= DIGRAPH;
1113         }
1114       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1115         {
1116           ACCEPT_CHAR (CPP_OPEN_BRACE);
1117           result->flags |= DIGRAPH;
1118         }
1119       break;
1120
1121     case '>':
1122       result->type = CPP_GREATER;
1123       c = get_effective_char (pfile);
1124       if (c == '=')
1125         ACCEPT_CHAR (CPP_GREATER_EQ);
1126       else if (c == '>')
1127         {
1128           ACCEPT_CHAR (CPP_RSHIFT);
1129           if (get_effective_char (pfile) == '=')
1130             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1131         }
1132       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133         {
1134           ACCEPT_CHAR (CPP_MAX);
1135           if (get_effective_char (pfile) == '=')
1136             ACCEPT_CHAR (CPP_MAX_EQ);
1137         }
1138       break;
1139
1140     case '%':
1141       lex_percent (pfile, result);
1142       if (result->type == CPP_HASH)
1143         goto do_hash;
1144       break;
1145
1146     case '.':
1147       lex_dot (pfile, result);
1148       break;
1149
1150     case '+':
1151       result->type = CPP_PLUS;
1152       c = get_effective_char (pfile);
1153       if (c == '=')
1154         ACCEPT_CHAR (CPP_PLUS_EQ);
1155       else if (c == '+')
1156         ACCEPT_CHAR (CPP_PLUS_PLUS);
1157       break;
1158
1159     case '-':
1160       result->type = CPP_MINUS;
1161       c = get_effective_char (pfile);
1162       if (c == '>')
1163         {
1164           ACCEPT_CHAR (CPP_DEREF);
1165           if (CPP_OPTION (pfile, cplusplus)
1166               && get_effective_char (pfile) == '*')
1167             ACCEPT_CHAR (CPP_DEREF_STAR);
1168         }
1169       else if (c == '=')
1170         ACCEPT_CHAR (CPP_MINUS_EQ);
1171       else if (c == '-')
1172         ACCEPT_CHAR (CPP_MINUS_MINUS);
1173       break;
1174
1175     case '*':
1176       result->type = CPP_MULT;
1177       if (get_effective_char (pfile) == '=')
1178         ACCEPT_CHAR (CPP_MULT_EQ);
1179       break;
1180
1181     case '=':
1182       result->type = CPP_EQ;
1183       if (get_effective_char (pfile) == '=')
1184         ACCEPT_CHAR (CPP_EQ_EQ);
1185       break;
1186
1187     case '!':
1188       result->type = CPP_NOT;
1189       if (get_effective_char (pfile) == '=')
1190         ACCEPT_CHAR (CPP_NOT_EQ);
1191       break;
1192
1193     case '&':
1194       result->type = CPP_AND;
1195       c = get_effective_char (pfile);
1196       if (c == '=')
1197         ACCEPT_CHAR (CPP_AND_EQ);
1198       else if (c == '&')
1199         ACCEPT_CHAR (CPP_AND_AND);
1200       break;
1201
1202     case '#':
1203       c = buffer->extra_char;   /* Can be set by error condition below.  */
1204       if (c != EOF)
1205         {
1206           buffer->read_ahead = c;
1207           buffer->extra_char = EOF;
1208         }
1209       else
1210         c = get_effective_char (pfile);
1211
1212       if (c == '#')
1213         {
1214           ACCEPT_CHAR (CPP_PASTE);
1215           break;
1216         }
1217
1218       result->type = CPP_HASH;
1219     do_hash:
1220       if (!bol)
1221         break;
1222       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1223          tokens within the list of arguments that would otherwise act
1224          as preprocessing directives, the behavior is undefined.
1225
1226          This implementation will report a hard error, terminate the
1227          macro invocation, and proceed to process the directive.  */
1228       if (pfile->state.parsing_args)
1229         {
1230           pfile->lexer_pos.output_line = pfile->line;
1231           if (pfile->state.parsing_args == 2)
1232             {
1233               cpp_error (pfile,
1234                          "directives may not be used inside a macro argument");
1235               result->type = CPP_EOF;
1236             }
1237         }
1238       /* in_directive can be true inside a _Pragma.  */
1239       else if (!pfile->state.in_directive)
1240         {
1241           /* This is the hash introducing a directive.  If the return
1242              value is false, it is an assembler #.  */
1243           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1244             goto next_token;
1245         }
1246       break;
1247
1248     case '|':
1249       result->type = CPP_OR;
1250       c = get_effective_char (pfile);
1251       if (c == '=')
1252         ACCEPT_CHAR (CPP_OR_EQ);
1253       else if (c == '|')
1254         ACCEPT_CHAR (CPP_OR_OR);
1255       break;
1256
1257     case '^':
1258       result->type = CPP_XOR;
1259       if (get_effective_char (pfile) == '=')
1260         ACCEPT_CHAR (CPP_XOR_EQ);
1261       break;
1262
1263     case ':':
1264       result->type = CPP_COLON;
1265       c = get_effective_char (pfile);
1266       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1267         ACCEPT_CHAR (CPP_SCOPE);
1268       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1269         {
1270           result->flags |= DIGRAPH;
1271           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1272         }
1273       break;
1274
1275     case '~': result->type = CPP_COMPL; break;
1276     case ',': result->type = CPP_COMMA; break;
1277     case '(': result->type = CPP_OPEN_PAREN; break;
1278     case ')': result->type = CPP_CLOSE_PAREN; break;
1279     case '[': result->type = CPP_OPEN_SQUARE; break;
1280     case ']': result->type = CPP_CLOSE_SQUARE; break;
1281     case '{': result->type = CPP_OPEN_BRACE; break;
1282     case '}': result->type = CPP_CLOSE_BRACE; break;
1283     case ';': result->type = CPP_SEMICOLON; break;
1284
1285       /* @ is a punctuator in Objective C.  */
1286     case '@': result->type = CPP_ATSIGN; break;
1287
1288     random_char:
1289     default:
1290       result->type = CPP_OTHER;
1291       result->val.c = c;
1292       break;
1293     }
1294
1295   if (!pfile->state.in_directive && pfile->state.skipping)
1296     goto next_char;
1297
1298   /* If not in a directive, this token invalidates controlling macros.  */
1299   if (!pfile->state.in_directive)
1300     pfile->mi_valid = false;
1301 }
1302
1303 /* An upper bound on the number of bytes needed to spell a token,
1304    including preceding whitespace.  */
1305 unsigned int
1306 cpp_token_len (token)
1307      const cpp_token *token;
1308 {
1309   unsigned int len;
1310
1311   switch (TOKEN_SPELL (token))
1312     {
1313     default:            len = 0;                                break;
1314     case SPELL_STRING:  len = token->val.str.len;               break;
1315     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1316     }
1317   /* 1 for whitespace, 4 for comment delimeters.  */
1318   return len + 5;
1319 }
1320
1321 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1322    already contain the enough space to hold the token's spelling.
1323    Returns a pointer to the character after the last character
1324    written.  */
1325 unsigned char *
1326 cpp_spell_token (pfile, token, buffer)
1327      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1328      const cpp_token *token;
1329      unsigned char *buffer;
1330 {
1331   switch (TOKEN_SPELL (token))
1332     {
1333     case SPELL_OPERATOR:
1334       {
1335         const unsigned char *spelling;
1336         unsigned char c;
1337
1338         if (token->flags & DIGRAPH)
1339           spelling
1340             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1341         else if (token->flags & NAMED_OP)
1342           goto spell_ident;
1343         else
1344           spelling = TOKEN_NAME (token);
1345
1346         while ((c = *spelling++) != '\0')
1347           *buffer++ = c;
1348       }
1349       break;
1350
1351     case SPELL_IDENT:
1352       spell_ident:
1353       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1354       buffer += NODE_LEN (token->val.node);
1355       break;
1356
1357     case SPELL_STRING:
1358       {
1359         int left, right, tag;
1360         switch (token->type)
1361           {
1362           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1363           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1364           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1365           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1366           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1367           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1368           }
1369         if (tag) *buffer++ = tag;
1370         if (left) *buffer++ = left;
1371         memcpy (buffer, token->val.str.text, token->val.str.len);
1372         buffer += token->val.str.len;
1373         if (right) *buffer++ = right;
1374       }
1375       break;
1376
1377     case SPELL_CHAR:
1378       *buffer++ = token->val.c;
1379       break;
1380
1381     case SPELL_NONE:
1382       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1383       break;
1384     }
1385
1386   return buffer;
1387 }
1388
1389 /* Returns a token as a null-terminated string.  The string is
1390    temporary, and automatically freed later.  Useful for diagnostics.  */
1391 unsigned char *
1392 cpp_token_as_text (pfile, token)
1393      cpp_reader *pfile;
1394      const cpp_token *token;
1395 {
1396   unsigned int len = cpp_token_len (token);
1397   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1398
1399   end = cpp_spell_token (pfile, token, start);
1400   end[0] = '\0';
1401
1402   return start;
1403 }
1404
1405 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1406 const char *
1407 cpp_type2name (type)
1408      enum cpp_ttype type;
1409 {
1410   return (const char *) token_spellings[type].name;
1411 }
1412
1413 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1414    for efficiency - to avoid double-buffering.  Also, outputs a space
1415    if PREV_WHITE is flagged.  */
1416 void
1417 cpp_output_token (token, fp)
1418      const cpp_token *token;
1419      FILE *fp;
1420 {
1421   if (token->flags & PREV_WHITE)
1422     putc (' ', fp);
1423
1424   switch (TOKEN_SPELL (token))
1425     {
1426     case SPELL_OPERATOR:
1427       {
1428         const unsigned char *spelling;
1429
1430         if (token->flags & DIGRAPH)
1431           spelling
1432             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1433         else if (token->flags & NAMED_OP)
1434           goto spell_ident;
1435         else
1436           spelling = TOKEN_NAME (token);
1437
1438         ufputs (spelling, fp);
1439       }
1440       break;
1441
1442     spell_ident:
1443     case SPELL_IDENT:
1444       ufputs (NODE_NAME (token->val.node), fp);
1445     break;
1446
1447     case SPELL_STRING:
1448       {
1449         int left, right, tag;
1450         switch (token->type)
1451           {
1452           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1453           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1454           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1455           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1456           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1457           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1458           }
1459         if (tag) putc (tag, fp);
1460         if (left) putc (left, fp);
1461         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1462         if (right) putc (right, fp);
1463       }
1464       break;
1465
1466     case SPELL_CHAR:
1467       putc (token->val.c, fp);
1468       break;
1469
1470     case SPELL_NONE:
1471       /* An error, most probably.  */
1472       break;
1473     }
1474 }
1475
1476 /* Compare two tokens.  */
1477 int
1478 _cpp_equiv_tokens (a, b)
1479      const cpp_token *a, *b;
1480 {
1481   if (a->type == b->type && a->flags == b->flags)
1482     switch (TOKEN_SPELL (a))
1483       {
1484       default:                  /* Keep compiler happy.  */
1485       case SPELL_OPERATOR:
1486         return 1;
1487       case SPELL_CHAR:
1488         return a->val.c == b->val.c; /* Character.  */
1489       case SPELL_NONE:
1490         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1491       case SPELL_IDENT:
1492         return a->val.node == b->val.node;
1493       case SPELL_STRING:
1494         return (a->val.str.len == b->val.str.len
1495                 && !memcmp (a->val.str.text, b->val.str.text,
1496                             a->val.str.len));
1497       }
1498
1499   return 0;
1500 }
1501
1502 /* Determine whether two tokens can be pasted together, and if so,
1503    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1504    be pasted, or the appropriate type for the merged token if they
1505    can.  */
1506 enum cpp_ttype
1507 cpp_can_paste (pfile, token1, token2, digraph)
1508      cpp_reader * pfile;
1509      const cpp_token *token1, *token2;
1510      int* digraph;
1511 {
1512   enum cpp_ttype a = token1->type, b = token2->type;
1513   int cxx = CPP_OPTION (pfile, cplusplus);
1514
1515   /* Treat named operators as if they were ordinary NAMEs.  */
1516   if (token1->flags & NAMED_OP)
1517     a = CPP_NAME;
1518   if (token2->flags & NAMED_OP)
1519     b = CPP_NAME;
1520
1521   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1522     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1523
1524   switch (a)
1525     {
1526     case CPP_GREATER:
1527       if (b == a) return CPP_RSHIFT;
1528       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1529       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1530       break;
1531     case CPP_LESS:
1532       if (b == a) return CPP_LSHIFT;
1533       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1534       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1535       if (CPP_OPTION (pfile, digraphs))
1536         {
1537           if (b == CPP_COLON)
1538             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1539           if (b == CPP_MOD)
1540             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1541         }
1542       break;
1543
1544     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1545     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1546     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1547
1548     case CPP_MINUS:
1549       if (b == a)               return CPP_MINUS_MINUS;
1550       if (b == CPP_GREATER)     return CPP_DEREF;
1551       break;
1552     case CPP_COLON:
1553       if (b == a && cxx)        return CPP_SCOPE;
1554       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1555         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1556       break;
1557
1558     case CPP_MOD:
1559       if (CPP_OPTION (pfile, digraphs))
1560         {
1561           if (b == CPP_GREATER)
1562             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1563           if (b == CPP_COLON)
1564             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1565         }
1566       break;
1567     case CPP_DEREF:
1568       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1569       break;
1570     case CPP_DOT:
1571       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1572       if (b == CPP_NUMBER)      return CPP_NUMBER;
1573       break;
1574
1575     case CPP_HASH:
1576       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1577         /* %:%: digraph */
1578         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1579       break;
1580
1581     case CPP_NAME:
1582       if (b == CPP_NAME)        return CPP_NAME;
1583       if (b == CPP_NUMBER
1584           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1585       if (b == CPP_CHAR
1586           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1587       if (b == CPP_STRING
1588           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1589       break;
1590
1591     case CPP_NUMBER:
1592       if (b == CPP_NUMBER)      return CPP_NUMBER;
1593       if (b == CPP_NAME)        return CPP_NUMBER;
1594       if (b == CPP_DOT)         return CPP_NUMBER;
1595       /* Numbers cannot have length zero, so this is safe.  */
1596       if ((b == CPP_PLUS || b == CPP_MINUS)
1597           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1598         return CPP_NUMBER;
1599       break;
1600
1601     default:
1602       break;
1603     }
1604
1605   return CPP_EOF;
1606 }
1607
1608 /* Returns nonzero if a space should be inserted to avoid an
1609    accidental token paste for output.  For simplicity, it is
1610    conservative, and occasionally advises a space where one is not
1611    needed, e.g. "." and ".2".  */
1612
1613 int
1614 cpp_avoid_paste (pfile, token1, token2)
1615      cpp_reader *pfile;
1616      const cpp_token *token1, *token2;
1617 {
1618   enum cpp_ttype a = token1->type, b = token2->type;
1619   cppchar_t c;
1620
1621   if (token1->flags & NAMED_OP)
1622     a = CPP_NAME;
1623   if (token2->flags & NAMED_OP)
1624     b = CPP_NAME;
1625
1626   c = EOF;
1627   if (token2->flags & DIGRAPH)
1628     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1629   else if (token_spellings[b].category == SPELL_OPERATOR)
1630     c = token_spellings[b].name[0];
1631
1632   /* Quickly get everything that can paste with an '='.  */
1633   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1634     return 1;
1635
1636   switch (a)
1637     {
1638     case CPP_GREATER:   return c == '>' || c == '?';
1639     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1640     case CPP_PLUS:      return c == '+';
1641     case CPP_MINUS:     return c == '-' || c == '>';
1642     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1643     case CPP_MOD:       return c == ':' || c == '>';
1644     case CPP_AND:       return c == '&';
1645     case CPP_OR:        return c == '|';
1646     case CPP_COLON:     return c == ':' || c == '>';
1647     case CPP_DEREF:     return c == '*';
1648     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1649     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1650     case CPP_NAME:      return ((b == CPP_NUMBER
1651                                  && name_p (pfile, &token2->val.str))
1652                                 || b == CPP_NAME
1653                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1654     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1655                                 || c == '.' || c == '+' || c == '-');
1656     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1657                                 && token1->val.c == '@'
1658                                 && (b == CPP_NAME || b == CPP_STRING));
1659     default:            break;
1660     }
1661
1662   return 0;
1663 }
1664
1665 /* Output all the remaining tokens on the current line, and a newline
1666    character, to FP.  Leading whitespace is removed.  */
1667 void
1668 cpp_output_line (pfile, fp)
1669      cpp_reader *pfile;
1670      FILE *fp;
1671 {
1672   cpp_token token;
1673
1674   cpp_get_token (pfile, &token);
1675   token.flags &= ~PREV_WHITE;
1676   while (token.type != CPP_EOF)
1677     {
1678       cpp_output_token (&token, fp);
1679       cpp_get_token (pfile, &token);
1680     }
1681
1682   putc ('\n', fp);
1683 }
1684
1685 /* Returns the value of a hexadecimal digit.  */
1686 static unsigned int
1687 hex_digit_value (c)
1688      unsigned int c;
1689 {
1690   if (c >= 'a' && c <= 'f')
1691     return c - 'a' + 10;
1692   if (c >= 'A' && c <= 'F')
1693     return c - 'A' + 10;
1694   if (c >= '0' && c <= '9')
1695     return c - '0';
1696   abort ();
1697 }
1698
1699 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1700    failure if cpplib is not parsing C++ or C99.  Such failure is
1701    silent, and no variables are updated.  Otherwise returns 0, and
1702    warns if -Wtraditional.
1703
1704    [lex.charset]: The character designated by the universal character
1705    name \UNNNNNNNN is that character whose character short name in
1706    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707    universal character name \uNNNN is that character whose character
1708    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1709    for a universal character name is less than 0x20 or in the range
1710    0x7F-0x9F (inclusive), or if the universal character name
1711    designates a character in the basic source character set, then the
1712    program is ill-formed.
1713
1714    We assume that wchar_t is Unicode, so we don't need to do any
1715    mapping.  Is this ever wrong?
1716
1717    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718    LIMIT is the end of the string or charconst.  PSTR is updated to
1719    point after the UCS on return, and the UCS is written into PC.  */
1720
1721 static int
1722 maybe_read_ucs (pfile, pstr, limit, pc)
1723      cpp_reader *pfile;
1724      const unsigned char **pstr;
1725      const unsigned char *limit;
1726      unsigned int *pc;
1727 {
1728   const unsigned char *p = *pstr;
1729   unsigned int code = 0;
1730   unsigned int c = *pc, length;
1731
1732   /* Only attempt to interpret a UCS for C++ and C99.  */
1733   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734     return 1;
1735
1736   if (CPP_WTRADITIONAL (pfile))
1737     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1738
1739   length = (c == 'u' ? 4: 8);
1740
1741   if ((size_t) (limit - p) < length)
1742     {
1743       cpp_error (pfile, "incomplete universal-character-name");
1744       /* Skip to the end to avoid more diagnostics.  */
1745       p = limit;
1746     }
1747   else
1748     {
1749       for (; length; length--, p++)
1750         {
1751           c = *p;
1752           if (ISXDIGIT (c))
1753             code = (code << 4) + hex_digit_value (c);
1754           else
1755             {
1756               cpp_error (pfile,
1757                          "non-hex digit '%c' in universal-character-name", c);
1758               /* We shouldn't skip in case there are multibyte chars.  */
1759               break;
1760             }
1761         }
1762     }
1763
1764 #ifdef TARGET_EBCDIC
1765   cpp_error (pfile, "universal-character-name on EBCDIC target");
1766   code = 0x3f;  /* EBCDIC invalid character */
1767 #else
1768  /* True extended characters are OK.  */
1769   if (code >= 0xa0
1770       && !(code & 0x80000000)
1771       && !(code >= 0xD800 && code <= 0xDFFF))
1772     ;
1773   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1774      hex escapes so that this also works with EBCDIC hosts.  */
1775   else if (code == 0x24 || code == 0x40 || code == 0x60)
1776     ;
1777   /* Don't give another error if one occurred above.  */
1778   else if (length == 0)
1779     cpp_error (pfile, "universal-character-name out of range");
1780 #endif
1781
1782   *pstr = p;
1783   *pc = code;
1784   return 0;
1785 }
1786
1787 /* Interpret an escape sequence, and return its value.  PSTR points to
1788    the input pointer, which is just after the backslash.  LIMIT is how
1789    much text we have.  MASK is a bitmask for the precision for the
1790    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1791    interpret escapes that did not exist in traditional C.
1792
1793    Handles all relevant diagnostics.  */
1794
1795 unsigned int
1796 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1797      cpp_reader *pfile;
1798      const unsigned char **pstr;
1799      const unsigned char *limit;
1800      unsigned HOST_WIDE_INT mask;
1801      int traditional;
1802 {
1803   int unknown = 0;
1804   const unsigned char *str = *pstr;
1805   unsigned int c = *str++;
1806
1807   switch (c)
1808     {
1809     case '\\': case '\'': case '"': case '?': break;
1810     case 'b': c = TARGET_BS;      break;
1811     case 'f': c = TARGET_FF;      break;
1812     case 'n': c = TARGET_NEWLINE; break;
1813     case 'r': c = TARGET_CR;      break;
1814     case 't': c = TARGET_TAB;     break;
1815     case 'v': c = TARGET_VT;      break;
1816
1817     case '(': case '{': case '[': case '%':
1818       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819          '\%' is used to prevent SCCS from getting confused.  */
1820       unknown = CPP_PEDANTIC (pfile);
1821       break;
1822
1823     case 'a':
1824       if (CPP_WTRADITIONAL (pfile))
1825         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1826       if (!traditional)
1827         c = TARGET_BELL;
1828       break;
1829
1830     case 'e': case 'E':
1831       if (CPP_PEDANTIC (pfile))
1832         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1833       c = TARGET_ESC;
1834       break;
1835
1836     case 'u': case 'U':
1837       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1838       break;
1839
1840     case 'x':
1841       if (CPP_WTRADITIONAL (pfile))
1842         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843
1844       if (!traditional)
1845         {
1846           unsigned int i = 0, overflow = 0;
1847           int digits_found = 0;
1848
1849           while (str < limit)
1850             {
1851               c = *str;
1852               if (! ISXDIGIT (c))
1853                 break;
1854               str++;
1855               overflow |= i ^ (i << 4 >> 4);
1856               i = (i << 4) + hex_digit_value (c);
1857               digits_found = 1;
1858             }
1859
1860           if (!digits_found)
1861             cpp_error (pfile, "\\x used with no following hex digits");
1862
1863           if (overflow | (i != (i & mask)))
1864             {
1865               cpp_pedwarn (pfile, "hex escape sequence out of range");
1866               i &= mask;
1867             }
1868           c = i;
1869         }
1870       break;
1871
1872     case '0':  case '1':  case '2':  case '3':
1873     case '4':  case '5':  case '6':  case '7':
1874       {
1875         unsigned int i = c - '0';
1876         int count = 0;
1877
1878         while (str < limit && ++count < 3)
1879           {
1880             c = *str;
1881             if (c < '0' || c > '7')
1882               break;
1883             str++;
1884             i = (i << 3) + c - '0';
1885           }
1886
1887         if (i != (i & mask))
1888           {
1889             cpp_pedwarn (pfile, "octal escape sequence out of range");
1890             i &= mask;
1891           }
1892         c = i;
1893       }
1894       break;
1895
1896     default:
1897       unknown = 1;
1898       break;
1899     }
1900
1901   if (unknown)
1902     {
1903       if (ISGRAPH (c))
1904         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1905       else
1906         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907     }
1908
1909   if (c > mask)
1910     cpp_pedwarn (pfile, "escape sequence out of range for character");
1911
1912   *pstr = str;
1913   return c;
1914 }
1915
1916 #ifndef MAX_CHAR_TYPE_SIZE
1917 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918 #endif
1919
1920 #ifndef MAX_WCHAR_TYPE_SIZE
1921 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922 #endif
1923
1924 /* Interpret a (possibly wide) character constant in TOKEN.
1925    WARN_MULTI warns about multi-character charconsts, if not
1926    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1927    that did not exist in traditional C.  PCHARS_SEEN points to a
1928    variable that is filled in with the number of characters seen.  */
1929 HOST_WIDE_INT
1930 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1931      cpp_reader *pfile;
1932      const cpp_token *token;
1933      int warn_multi;
1934      int traditional;
1935      unsigned int *pchars_seen;
1936 {
1937   const unsigned char *str = token->val.str.text;
1938   const unsigned char *limit = str + token->val.str.len;
1939   unsigned int chars_seen = 0;
1940   unsigned int width, max_chars, c;
1941   unsigned HOST_WIDE_INT mask;
1942   HOST_WIDE_INT result = 0;
1943
1944 #ifdef MULTIBYTE_CHARS
1945   (void) local_mbtowc (NULL, NULL, 0);
1946 #endif
1947
1948   /* Width in bits.  */
1949   if (token->type == CPP_CHAR)
1950     width = MAX_CHAR_TYPE_SIZE;
1951   else
1952     width = MAX_WCHAR_TYPE_SIZE;
1953
1954   if (width < HOST_BITS_PER_WIDE_INT)
1955     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956   else
1957     mask = ~0;
1958   max_chars = HOST_BITS_PER_WIDE_INT / width;
1959
1960   while (str < limit)
1961     {
1962 #ifdef MULTIBYTE_CHARS
1963       wchar_t wc;
1964       int char_len;
1965
1966       char_len = local_mbtowc (&wc, str, limit - str);
1967       if (char_len == -1)
1968         {
1969           cpp_warning (pfile, "ignoring invalid multibyte character");
1970           c = *str++;
1971         }
1972       else
1973         {
1974           str += char_len;
1975           c = wc;
1976         }
1977 #else
1978       c = *str++;
1979 #endif
1980
1981       if (c == '\\')
1982         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1983
1984 #ifdef MAP_CHARACTER
1985       if (ISPRINT (c))
1986         c = MAP_CHARACTER (c);
1987 #endif
1988
1989       /* Merge character into result; ignore excess chars.  */
1990       if (++chars_seen <= max_chars)
1991         {
1992           if (width < HOST_BITS_PER_WIDE_INT)
1993             result = (result << width) | (c & mask);
1994           else
1995             result = c;
1996         }
1997     }
1998
1999   if (chars_seen == 0)
2000     cpp_error (pfile, "empty character constant");
2001   else if (chars_seen > max_chars)
2002     {
2003       chars_seen = max_chars;
2004       cpp_warning (pfile, "character constant too long");
2005     }
2006   else if (chars_seen > 1 && !traditional && warn_multi)
2007     cpp_warning (pfile, "multi-character character constant");
2008
2009   /* If char type is signed, sign-extend the constant.  The
2010      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2011   if (token->type == CPP_CHAR && chars_seen)
2012     {
2013       unsigned int nbits = chars_seen * width;
2014       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2015
2016       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017           || ((result >> (nbits - 1)) & 1) == 0)
2018         result &= mask;
2019       else
2020         result |= ~mask;
2021     }
2022
2023   *pchars_seen = chars_seen;
2024   return result;
2025 }
2026
2027 /* Memory pools.  */
2028
2029 struct dummy
2030 {
2031   char c;
2032   union
2033   {
2034     double d;
2035     int *p;
2036   } u;
2037 };
2038
2039 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2040
2041 static int
2042 chunk_suitable (pool, chunk, size)
2043      cpp_pool *pool;
2044      cpp_chunk *chunk;
2045      unsigned int size;
2046 {
2047   /* Being at least twice SIZE means we can use memcpy in
2048      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2049      anyway.  */
2050   return (chunk && pool->locked != chunk
2051           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2052 }
2053
2054 /* Returns the end of the new pool.  PTR points to a char in the old
2055    pool, and is updated to point to the same char in the new pool.  */
2056 unsigned char *
2057 _cpp_next_chunk (pool, len, ptr)
2058      cpp_pool *pool;
2059      unsigned int len;
2060      unsigned char **ptr;
2061 {
2062   cpp_chunk *chunk = pool->cur->next;
2063
2064   /* LEN is the minimum size we want in the new pool.  */
2065   len += POOL_ROOM (pool);
2066   if (! chunk_suitable (pool, chunk, len))
2067     {
2068       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2069
2070       chunk->next = pool->cur->next;
2071       pool->cur->next = chunk;
2072     }
2073
2074   /* Update the pointer before changing chunk's front.  */
2075   if (ptr)
2076     *ptr += chunk->base - POOL_FRONT (pool);
2077
2078   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2079   chunk->front = chunk->base;
2080
2081   pool->cur = chunk;
2082   return POOL_LIMIT (pool);
2083 }
2084
2085 static cpp_chunk *
2086 new_chunk (size)
2087      unsigned int size;
2088 {
2089   unsigned char *base;
2090   cpp_chunk *result;
2091
2092   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2093   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2094   /* Put the chunk descriptor at the end.  Then chunk overruns will
2095      cause obvious chaos.  */
2096   result = (cpp_chunk *) (base + size);
2097   result->base = base;
2098   result->front = base;
2099   result->limit = base + size;
2100   result->next = 0;
2101
2102   return result;
2103 }
2104
2105 void
2106 _cpp_init_pool (pool, size, align, temp)
2107      cpp_pool *pool;
2108      unsigned int size, align, temp;
2109 {
2110   if (align == 0)
2111     align = DEFAULT_ALIGNMENT;
2112   if (align & (align - 1))
2113     abort ();
2114   pool->align = align;
2115   pool->first = new_chunk (size);
2116   pool->cur = pool->first;
2117   pool->locked = 0;
2118   pool->locks = 0;
2119   if (temp)
2120     pool->cur->next = pool->cur;
2121 }
2122
2123 void
2124 _cpp_lock_pool (pool)
2125      cpp_pool *pool;
2126 {
2127   if (pool->locks++ == 0)
2128     pool->locked = pool->cur;
2129 }
2130
2131 void
2132 _cpp_unlock_pool (pool)
2133      cpp_pool *pool;
2134 {
2135   if (--pool->locks == 0)
2136     pool->locked = 0;
2137 }
2138
2139 void
2140 _cpp_free_pool (pool)
2141      cpp_pool *pool;
2142 {
2143   cpp_chunk *chunk = pool->first, *next;
2144
2145   do
2146     {
2147       next = chunk->next;
2148       free (chunk->base);
2149       chunk = next;
2150     }
2151   while (chunk && chunk != pool->first);
2152 }
2153
2154 /* Reserve LEN bytes from a memory pool.  */
2155 unsigned char *
2156 _cpp_pool_reserve (pool, len)
2157      cpp_pool *pool;
2158      unsigned int len;
2159 {
2160   len = POOL_ALIGN (len, pool->align);
2161   if (len > (unsigned int) POOL_ROOM (pool))
2162     _cpp_next_chunk (pool, len, 0);
2163
2164   return POOL_FRONT (pool);
2165 }
2166
2167 /* Allocate LEN bytes from a memory pool.  */
2168 unsigned char *
2169 _cpp_pool_alloc (pool, len)
2170      cpp_pool *pool;
2171      unsigned int len;
2172 {
2173   unsigned char *result = _cpp_pool_reserve (pool, len);
2174
2175   POOL_COMMIT (pool, len);
2176   return result;
2177 }