gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (pfile, newline_char)
 128      cpp_reader *pfile;
 129      cppchar_t newline_char;
 130 {
 131   cpp_buffer *buffer;
 132   cppchar_t next = EOF;
 133
 134   pfile->line++;
 135   pfile->pseudo_newlines++;
 136
 137   buffer = pfile->buffer;
 138   buffer->col_adjust = 0;
 139   buffer->lineno++;
 140   buffer->line_base = buffer->cur;
 141
 142   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 143   if (buffer->cur < buffer->rlimit)
 144     {
 145       next = *buffer->cur++;
 146       if (next + newline_char == '\r' + '\n')
 147         {
 148           buffer->line_base = buffer->cur;
 149           if (buffer->cur < buffer->rlimit)
 150             next = *buffer->cur++;
 151           else
 152             next = EOF;
 153         }
 154     }
 155
 156   buffer->read_ahead = next;
 157   return next;
 158 }
 159
 160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 161    encountered.  It warns if necessary, and returns true if the
 162    trigraph should be honoured.  FROM_CHAR is the third character of a
 163    trigraph, and presumed to be the previous character for position
 164    reporting.  */
 165 static int
 166 trigraph_ok (pfile, from_char)
 167      cpp_reader *pfile;
 168      cppchar_t from_char;
 169 {
 170   int accept = CPP_OPTION (pfile, trigraphs);
 171
 172   /* Don't warn about trigraphs in comments.  */
 173   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 174     {
 175       cpp_buffer *buffer = pfile->buffer;
 176       if (accept)
 177         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 178                                "trigraph ??%c converted to %c",
 179                                (int) from_char,
 180                                (int) _cpp_trigraph_map[from_char]);
 181       else if (buffer->cur != buffer->last_Wtrigraphs)
 182         {
 183           buffer->last_Wtrigraphs = buffer->cur;
 184           cpp_warning_with_line (pfile, buffer->lineno,
 185                                  CPP_BUF_COL (buffer) - 2,
 186                                  "trigraph ??%c ignored", (int) from_char);
 187         }
 188     }
 189
 190   return accept;
 191 }
 192
 193 /* Assumes local variables buffer and result.  */
 194 #define ACCEPT_CHAR(t) \
 195   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 196
 197 /* When we move to multibyte character sets, add to these something
 198    that saves and restores the state of the multibyte conversion
 199    library.  This probably involves saving and restoring a "cookie".
 200    In the case of glibc it is an 8-byte structure, so is not a high
 201    overhead operation.  In any case, it's out of the fast path.  */
 202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 204
 205 /* Skips any escaped newlines introduced by NEXT, which is either a
 206    '?' or a '\\'.  Returns the next character, which will also have
 207    been placed in buffer->read_ahead.  This routine performs
 208    preprocessing stages 1 and 2 of the ISO C standard.  */
 209 static cppchar_t
 210 skip_escaped_newlines (buffer, next)
 211      cpp_buffer *buffer;
 212      cppchar_t next;
 213 {
 214   /* Only do this if we apply stages 1 and 2.  */
 215   if (!buffer->from_stage3)
 216     {
 217       cppchar_t next1;
 218       const unsigned char *saved_cur;
 219       int space;
 220
 221       do
 222         {
 223           if (buffer->cur == buffer->rlimit)
 224             break;
 225
 226           SAVE_STATE ();
 227           if (next == '?')
 228             {
 229               next1 = *buffer->cur++;
 230               if (next1 != '?' || buffer->cur == buffer->rlimit)
 231                 {
 232                   RESTORE_STATE ();
 233                   break;
 234                 }
 235
 236               next1 = *buffer->cur++;
 237               if (!_cpp_trigraph_map[next1]
 238                   || !trigraph_ok (buffer->pfile, next1))
 239                 {
 240                   RESTORE_STATE ();
 241                   break;
 242                 }
 243
 244               /* We have a full trigraph here.  */
 245               next = _cpp_trigraph_map[next1];
 246               if (next != '\\' || buffer->cur == buffer->rlimit)
 247                 break;
 248               SAVE_STATE ();
 249             }
 250
 251           /* We have a backslash, and room for at least one more character.  */
 252           space = 0;
 253           do
 254             {
 255               next1 = *buffer->cur++;
 256               if (!is_nvspace (next1))
 257                 break;
 258               space = 1;
 259             }
 260           while (buffer->cur < buffer->rlimit);
 261
 262           if (!is_vspace (next1))
 263             {
 264               RESTORE_STATE ();
 265               break;
 266             }
 267
 268           if (space && !buffer->pfile->state.lexing_comment)
 269             cpp_warning (buffer->pfile,
 270                          "backslash and newline separated by space");
 271
 272           next = handle_newline (buffer->pfile, next1);
 273           if (next == EOF)
 274             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 275         }
 276       while (next == '\\' || next == '?');
 277     }
 278
 279   buffer->read_ahead = next;
 280   return next;
 281 }
 282
 283 /* Obtain the next character, after trigraph conversion and skipping
 284    an arbitrary string of escaped newlines.  The common case of no
 285    trigraphs or escaped newlines falls through quickly.  */
 286 static cppchar_t
 287 get_effective_char (buffer)
 288      cpp_buffer *buffer;
 289 {
 290   cppchar_t next = EOF;
 291
 292   if (buffer->cur < buffer->rlimit)
 293     {
 294       next = *buffer->cur++;
 295
 296       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 297          can introduce escaped newlines, which we want to skip, or
 298          UCNs, which, depending upon lexer state, we will handle in
 299          the future.  */
 300       if (next == '?' || next == '\\')
 301         next = skip_escaped_newlines (buffer, next);
 302     }
 303
 304   buffer->read_ahead = next;
 305   return next;
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    non-zero if comment terminated by EOF, zero otherwise.  */
 311 static int
 312 skip_block_comment (pfile)
 313      cpp_reader *pfile;
 314 {
 315   cpp_buffer *buffer = pfile->buffer;
 316   cppchar_t c = EOF, prevc = EOF;
 317
 318   pfile->state.lexing_comment = 1;
 319   while (buffer->cur != buffer->rlimit)
 320     {
 321       prevc = c, c = *buffer->cur++;
 322
 323     next_char:
 324       /* FIXME: For speed, create a new character class of characters
 325          of interest inside block comments.  */
 326       if (c == '?' || c == '\\')
 327         c = skip_escaped_newlines (buffer, c);
 328
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       if (c == '/')
 332         {
 333           if (prevc == '*')
 334             break;
 335
 336           /* Warn about potential nested comments, but not if the '/'
 337              comes immediately before the true comment delimeter.
 338              Don't bother to get it right across escaped newlines.  */
 339           if (CPP_OPTION (pfile, warn_comments)
 340               && buffer->cur != buffer->rlimit)
 341             {
 342               prevc = c, c = *buffer->cur++;
 343               if (c == '*' && buffer->cur != buffer->rlimit)
 344                 {
 345                   prevc = c, c = *buffer->cur++;
 346                   if (c != '/')
 347                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 348                                            CPP_BUF_COL (buffer),
 349                                            "\"/*\" within comment");
 350                 }
 351               goto next_char;
 352             }
 353         }
 354       else if (is_vspace (c))
 355         {
 356           prevc = c, c = handle_newline (pfile, c);
 357           goto next_char;
 358         }
 359       else if (c == '\t')
 360         adjust_column (pfile);
 361     }
 362
 363   pfile->state.lexing_comment = 0;
 364   buffer->read_ahead = EOF;
 365   return c != '/' || prevc != '*';
 366 }
 367
 368 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 369    non-zero if a multiline comment.  The following new line, if any,
 370    is left in buffer->read_ahead.  */
 371 static int
 372 skip_line_comment (pfile)
 373      cpp_reader *pfile;
 374 {
 375   cpp_buffer *buffer = pfile->buffer;
 376   unsigned int orig_lineno = buffer->lineno;
 377   cppchar_t c;
 378
 379   pfile->state.lexing_comment = 1;
 380   do
 381     {
 382       c = EOF;
 383       if (buffer->cur == buffer->rlimit)
 384         break;
 385
 386       c = *buffer->cur++;
 387       if (c == '?' || c == '\\')
 388         c = skip_escaped_newlines (buffer, c);
 389     }
 390   while (!is_vspace (c));
 391
 392   pfile->state.lexing_comment = 0;
 393   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 394   return orig_lineno != buffer->lineno;
 395 }
 396
 397 /* pfile->buffer->cur is one beyond the \t character.  Update
 398    col_adjust so we track the column correctly.  */
 399 static void
 400 adjust_column (pfile)
 401      cpp_reader *pfile;
 402 {
 403   cpp_buffer *buffer = pfile->buffer;
 404   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 405
 406   /* Round it up to multiple of the tabstop, but subtract 1 since the
 407      tab itself occupies a character position.  */
 408   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 409                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 410 }
 411
 412 /* Skips whitespace, saving the next non-whitespace character.
 413    Adjusts pfile->col_adjust to account for tabs.  Without this,
 414    tokens might be assigned an incorrect column.  */
 415 static void
 416 skip_whitespace (pfile, c)
 417      cpp_reader *pfile;
 418      cppchar_t c;
 419 {
 420   cpp_buffer *buffer = pfile->buffer;
 421   unsigned int warned = 0;
 422
 423   do
 424     {
 425       /* Horizontal space always OK.  */
 426       if (c == ' ')
 427         ;
 428       else if (c == '\t')
 429         adjust_column (pfile);
 430       /* Just \f \v or \0 left.  */
 431       else if (c == '\0')
 432         {
 433           if (!warned)
 434             {
 435               cpp_warning (pfile, "null character(s) ignored");
 436               warned = 1;
 437             }
 438         }
 439       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 440         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 441                                CPP_BUF_COL (buffer),
 442                                "%s in preprocessing directive",
 443                                c == '\f' ? "form feed" : "vertical tab");
 444
 445       c = EOF;
 446       if (buffer->cur == buffer->rlimit)
 447         break;
 448       c = *buffer->cur++;
 449     }
 450   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 451   while (is_nvspace (c));
 452
 453   /* Remember the next character.  */
 454   buffer->read_ahead = c;
 455 }
 456
 457 /* See if the characters of a number token are valid in a name (no
 458    '.', '+' or '-').  */
 459 static int
 460 name_p (pfile, string)
 461      cpp_reader *pfile;
 462      const cpp_string *string;
 463 {
 464   unsigned int i;
 465
 466   for (i = 0; i < string->len; i++)
 467     if (!is_idchar (string->text[i]))
 468       return 0;
 469
 470   return 1;
 471 }
 472
 473 /* Parse an identifier, skipping embedded backslash-newlines.
 474    Calculate the hash value of the token while parsing, for improved
 475    performance.  The hashing algorithm *must* match cpp_lookup().  */
 476
 477 static cpp_hashnode *
 478 parse_identifier (pfile, c)
 479      cpp_reader *pfile;
 480      cppchar_t c;
 481 {
 482   cpp_hashnode *result;
 483   cpp_buffer *buffer = pfile->buffer;
 484   unsigned int saw_dollar = 0, len;
 485   struct obstack *stack = &pfile->hash_table->stack;
 486
 487   do
 488     {
 489       do
 490         {
 491           obstack_1grow (stack, c);
 492
 493           if (c == '$')
 494             saw_dollar++;
 495
 496           c = EOF;
 497           if (buffer->cur == buffer->rlimit)
 498             break;
 499
 500           c = *buffer->cur++;
 501         }
 502       while (is_idchar (c));
 503
 504       /* Potential escaped newline?  */
 505       if (c != '?' && c != '\\')
 506         break;
 507       c = skip_escaped_newlines (buffer, c);
 508     }
 509   while (is_idchar (c));
 510
 511   /* Remember the next character.  */
 512   buffer->read_ahead = c;
 513
 514   /* $ is not a identifier character in the standard, but is commonly
 515      accepted as an extension.  Don't warn about it in skipped
 516      conditional blocks.  */
 517   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 518     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 519
 520   /* Identifiers are null-terminated.  */
 521   len = obstack_object_size (stack);
 522   obstack_1grow (stack, '\0');
 523
 524   /* This routine commits the memory if necessary.  */
 525   result = (cpp_hashnode *)
 526     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 527
 528   /* Some identifiers require diagnostics when lexed.  */
 529   if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
 530     {
 531       /* It is allowed to poison the same identifier twice.  */
 532       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 533         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 534                    NODE_NAME (result));
 535
 536       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 537          replacement list of a variadic macro.  */
 538       if (result == pfile->spec_nodes.n__VA_ARGS__
 539           && !pfile->state.va_args_ok)
 540         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 541     }
 542
 543   return result;
 544 }
 545
 546 /* Parse a number, skipping embedded backslash-newlines.  */
 547 static void
 548 parse_number (pfile, number, c, leading_period)
 549      cpp_reader *pfile;
 550      cpp_string *number;
 551      cppchar_t c;
 552      int leading_period;
 553 {
 554   cpp_buffer *buffer = pfile->buffer;
 555   cpp_pool *pool = &pfile->ident_pool;
 556   unsigned char *dest, *limit;
 557
 558   dest = POOL_FRONT (pool);
 559   limit = POOL_LIMIT (pool);
 560
 561   /* Place a leading period.  */
 562   if (leading_period)
 563     {
 564       if (dest >= limit)
 565         limit = _cpp_next_chunk (pool, 0, &dest);
 566       *dest++ = '.';
 567     }
 568
 569   do
 570     {
 571       do
 572         {
 573           /* Need room for terminating null.  */
 574           if (dest + 1 >= limit)
 575             limit = _cpp_next_chunk (pool, 0, &dest);
 576           *dest++ = c;
 577
 578           c = EOF;
 579           if (buffer->cur == buffer->rlimit)
 580             break;
 581
 582           c = *buffer->cur++;
 583         }
 584       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 585
 586       /* Potential escaped newline?  */
 587       if (c != '?' && c != '\\')
 588         break;
 589       c = skip_escaped_newlines (buffer, c);
 590     }
 591   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 592
 593   /* Remember the next character.  */
 594   buffer->read_ahead = c;
 595
 596   /* Null-terminate the number.  */
 597   *dest = '\0';
 598
 599   number->text = POOL_FRONT (pool);
 600   number->len = dest - number->text;
 601   POOL_COMMIT (pool, number->len + 1);
 602 }
 603
 604 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 605 static void
 606 unterminated (pfile, term)
 607      cpp_reader *pfile;
 608      int term;
 609 {
 610   cpp_error (pfile, "missing terminating %c character", term);
 611
 612   if (term == '\"' && pfile->mlstring_pos.line
 613       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 614     {
 615       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 616                            pfile->mlstring_pos.col,
 617                            "possible start of unterminated string literal");
 618       pfile->mlstring_pos.line = 0;
 619     }
 620 }
 621
 622 /* Subroutine of parse_string.  */
 623 static int
 624 unescaped_terminator_p (pfile, dest)
 625      cpp_reader *pfile;
 626      const unsigned char *dest;
 627 {
 628   const unsigned char *start, *temp;
 629
 630   /* In #include-style directives, terminators are not escapeable.  */
 631   if (pfile->state.angled_headers)
 632     return 1;
 633
 634   start = POOL_FRONT (&pfile->ident_pool);
 635
 636   /* An odd number of consecutive backslashes represents an escaped
 637      terminator.  */
 638   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 639     ;
 640
 641   return ((dest - temp) & 1) == 0;
 642 }
 643
 644 /* Parses a string, character constant, or angle-bracketed header file
 645    name.  Handles embedded trigraphs and escaped newlines.  The stored
 646    string is guaranteed NUL-terminated, but it is not guaranteed that
 647    this is the first NUL since embedded NULs are preserved.
 648
 649    Multi-line strings are allowed, but they are deprecated.  */
 650 static void
 651 parse_string (pfile, token, terminator)
 652      cpp_reader *pfile;
 653      cpp_token *token;
 654      cppchar_t terminator;
 655 {
 656   cpp_buffer *buffer = pfile->buffer;
 657   cpp_pool *pool = &pfile->ident_pool;
 658   unsigned char *dest, *limit;
 659   cppchar_t c;
 660   bool warned_nulls = false, warned_multi = false;
 661
 662   dest = POOL_FRONT (pool);
 663   limit = POOL_LIMIT (pool);
 664
 665   for (;;)
 666     {
 667       if (buffer->cur == buffer->rlimit)
 668         c = EOF;
 669       else
 670         c = *buffer->cur++;
 671
 672     have_char:
 673       /* We need space for the terminating NUL.  */
 674       if (dest >= limit)
 675         limit = _cpp_next_chunk (pool, 0, &dest);
 676
 677       if (c == EOF)
 678         {
 679           unterminated (pfile, terminator);
 680           break;
 681         }
 682
 683       /* Handle trigraphs, escaped newlines etc.  */
 684       if (c == '?' || c == '\\')
 685         c = skip_escaped_newlines (buffer, c);
 686
 687       if (c == terminator && unescaped_terminator_p (pfile, dest))
 688         {
 689           c = EOF;
 690           break;
 691         }
 692       else if (is_vspace (c))
 693         {
 694           /* In assembly language, silently terminate string and
 695              character literals at end of line.  This is a kludge
 696              around not knowing where comments are.  */
 697           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 698             break;
 699
 700           /* Character constants and header names may not extend over
 701              multiple lines.  In Standard C, neither may strings.
 702              Unfortunately, we accept multiline strings as an
 703              extension, except in #include family directives.  */
 704           if (terminator != '"' || pfile->state.angled_headers)
 705             {
 706               unterminated (pfile, terminator);
 707               break;
 708             }
 709
 710           if (!warned_multi)
 711             {
 712               warned_multi = true;
 713               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 714             }
 715
 716           if (pfile->mlstring_pos.line == 0)
 717             pfile->mlstring_pos = pfile->lexer_pos;
 718
 719           c = handle_newline (pfile, c);
 720           *dest++ = '\n';
 721           goto have_char;
 722         }
 723       else if (c == '\0' && !warned_nulls)
 724         {
 725           warned_nulls = true;
 726           cpp_warning (pfile, "null character(s) preserved in literal");
 727         }
 728
 729       *dest++ = c;
 730     }
 731
 732   /* Remember the next character.  */
 733   buffer->read_ahead = c;
 734   *dest = '\0';
 735
 736   token->val.str.text = POOL_FRONT (pool);
 737   token->val.str.len = dest - token->val.str.text;
 738   POOL_COMMIT (pool, token->val.str.len + 1);
 739 }
 740
 741 /* The stored comment includes the comment start and any terminator.  */
 742 static void
 743 save_comment (pfile, token, from)
 744      cpp_reader *pfile;
 745      cpp_token *token;
 746      const unsigned char *from;
 747 {
 748   unsigned char *buffer;
 749   unsigned int len;
 750
 751   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 752   /* C++ comments probably (not definitely) have moved past a new
 753      line, which we don't want to save in the comment.  */
 754   if (pfile->buffer->read_ahead != EOF)
 755     len--;
 756   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 757
 758   token->type = CPP_COMMENT;
 759   token->val.str.len = len;
 760   token->val.str.text = buffer;
 761
 762   buffer[0] = '/';
 763   memcpy (buffer + 1, from, len - 1);
 764 }
 765
 766 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 767    want to avoid stepping back when lexing %:%X.  */
 768 static void
 769 lex_percent (buffer, result)
 770      cpp_buffer *buffer;
 771      cpp_token *result;
 772 {
 773   cppchar_t c;
 774
 775   result->type = CPP_MOD;
 776   /* Parsing %:%X could leave an extra character.  */
 777   if (buffer->extra_char == EOF)
 778     c = get_effective_char (buffer);
 779   else
 780     {
 781       c = buffer->read_ahead = buffer->extra_char;
 782       buffer->extra_char = EOF;
 783     }
 784
 785   if (c == '=')
 786     ACCEPT_CHAR (CPP_MOD_EQ);
 787   else if (CPP_OPTION (buffer->pfile, digraphs))
 788     {
 789       if (c == ':')
 790         {
 791           result->flags |= DIGRAPH;
 792           ACCEPT_CHAR (CPP_HASH);
 793           if (get_effective_char (buffer) == '%')
 794             {
 795               buffer->extra_char = get_effective_char (buffer);
 796               if (buffer->extra_char == ':')
 797                 {
 798                   buffer->extra_char = EOF;
 799                   ACCEPT_CHAR (CPP_PASTE);
 800                 }
 801               else
 802                 /* We'll catch the extra_char when we're called back.  */
 803                 buffer->read_ahead = '%';
 804             }
 805         }
 806       else if (c == '>')
 807         {
 808           result->flags |= DIGRAPH;
 809           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 810         }
 811     }
 812 }
 813
 814 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 815    want to avoid stepping back when lexing '...' or '.123'.  In the
 816    latter case we should also set a flag for parse_number.  */
 817 static void
 818 lex_dot (pfile, result)
 819      cpp_reader *pfile;
 820      cpp_token *result;
 821 {
 822   cpp_buffer *buffer = pfile->buffer;
 823   cppchar_t c;
 824
 825   /* Parsing ..X could leave an extra character.  */
 826   if (buffer->extra_char == EOF)
 827     c = get_effective_char (buffer);
 828   else
 829     {
 830       c = buffer->read_ahead = buffer->extra_char;
 831       buffer->extra_char = EOF;
 832     }
 833
 834   /* All known character sets have 0...9 contiguous.  */
 835   if (c >= '0' && c <= '9')
 836     {
 837       result->type = CPP_NUMBER;
 838       parse_number (pfile, &result->val.str, c, 1);
 839     }
 840   else
 841     {
 842       result->type = CPP_DOT;
 843       if (c == '.')
 844         {
 845           buffer->extra_char = get_effective_char (buffer);
 846           if (buffer->extra_char == '.')
 847             {
 848               buffer->extra_char = EOF;
 849               ACCEPT_CHAR (CPP_ELLIPSIS);
 850             }
 851           else
 852             /* We'll catch the extra_char when we're called back.  */
 853             buffer->read_ahead = '.';
 854         }
 855       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 856         ACCEPT_CHAR (CPP_DOT_STAR);
 857     }
 858 }
 859
 860 void
 861 _cpp_lex_token (pfile, result)
 862      cpp_reader *pfile;
 863      cpp_token *result;
 864 {
 865   cppchar_t c;
 866   cpp_buffer *buffer;
 867   const unsigned char *comment_start;
 868   unsigned char bol;
 869
 870  skip:
 871   bol = pfile->state.next_bol;
 872  done_directive:
 873   buffer = pfile->buffer;
 874   pfile->state.next_bol = 0;
 875   result->flags = buffer->saved_flags;
 876   buffer->saved_flags = 0;
 877  next_char:
 878   pfile->lexer_pos.line = buffer->lineno;
 879   result->line = pfile->line;
 880  next_char2:
 881   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 882
 883   c = buffer->read_ahead;
 884   if (c == EOF && buffer->cur < buffer->rlimit)
 885     {
 886       c = *buffer->cur++;
 887       pfile->lexer_pos.col++;
 888     }
 889   result->col = pfile->lexer_pos.col;
 890
 891  do_switch:
 892   buffer->read_ahead = EOF;
 893   switch (c)
 894     {
 895     case EOF:
 896       if (!pfile->state.in_directive)
 897         {
 898           unsigned char ret = pfile->buffer->return_at_eof;
 899
 900           /* Non-empty files should end in a newline.  Don't warn for
 901              command line and _Pragma buffers.  */
 902           if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
 903             cpp_pedwarn (pfile, "no newline at end of file");
 904           _cpp_pop_buffer (pfile);
 905           if (pfile->buffer && !ret)
 906             {
 907               bol = 1;
 908               goto done_directive;
 909             }
 910         }
 911       pfile->state.next_bol = 1;
 912       result->type = CPP_EOF;
 913       return;
 914
 915     case ' ': case '\t': case '\f': case '\v': case '\0':
 916       skip_whitespace (pfile, c);
 917       result->flags |= PREV_WHITE;
 918       goto next_char2;
 919
 920     case '\n': case '\r':
 921       if (!pfile->state.in_directive)
 922         {
 923           handle_newline (pfile, c);
 924           if (!pfile->state.parsing_args)
 925             pfile->pseudo_newlines = 0;
 926           bol = 1;
 927           pfile->lexer_pos.output_line = buffer->lineno;
 928           /* This is a new line, so clear any white space flag.
 929                   Newlines in arguments are white space (6.10.3.10);
 930                   parse_arg takes care of that.  */
 931           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 932           goto next_char;
 933         }
 934
 935       /* Don't let directives spill over to the next line.  */
 936       buffer->read_ahead = c;
 937       pfile->state.next_bol = 1;
 938       result->type = CPP_EOF;
 939       /* Don't break; pfile->state.skipping might be true.  */
 940       return;
 941
 942     case '?':
 943     case '\\':
 944       /* These could start an escaped newline, or '?' a trigraph.  Let
 945          skip_escaped_newlines do all the work.  */
 946       {
 947         unsigned int lineno = buffer->lineno;
 948
 949         c = skip_escaped_newlines (buffer, c);
 950         if (lineno != buffer->lineno)
 951           /* We had at least one escaped newline of some sort, and the
 952              next character is in buffer->read_ahead.  Update the
 953              token's line and column.  */
 954             goto next_char;
 955
 956         /* We are either the original '?' or '\\', or a trigraph.  */
 957         result->type = CPP_QUERY;
 958         buffer->read_ahead = EOF;
 959         if (c == '\\')
 960           goto random_char;
 961         else if (c != '?')
 962           goto do_switch;
 963       }
 964       break;
 965
 966     case '0': case '1': case '2': case '3': case '4':
 967     case '5': case '6': case '7': case '8': case '9':
 968       result->type = CPP_NUMBER;
 969       parse_number (pfile, &result->val.str, c, 0);
 970       break;
 971
 972     case '$':
 973       if (!CPP_OPTION (pfile, dollars_in_ident))
 974         goto random_char;
 975       /* Fall through...  */
 976
 977     case '_':
 978     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 979     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 980     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 981     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 982     case 'y': case 'z':
 983     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 984     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 985     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 986     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 987     case 'Y': case 'Z':
 988       result->type = CPP_NAME;
 989       result->val.node = parse_identifier (pfile, c);
 990
 991       /* 'L' may introduce wide characters or strings.  */
 992       if (result->val.node == pfile->spec_nodes.n_L)
 993         {
 994           c = buffer->read_ahead; /* For make_string.  */
 995           if (c == '\'' || c == '"')
 996             {
 997               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 998               goto make_string;
 999             }
1000         }
1001       /* Convert named operators to their proper types.  */
1002       else if (result->val.node->flags & NODE_OPERATOR)
1003         {
1004           result->flags |= NAMED_OP;
1005           result->type = result->val.node->value.operator;
1006         }
1007       break;
1008
1009     case '\'':
1010     case '"':
1011       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1012     make_string:
1013       parse_string (pfile, result, c);
1014       break;
1015
1016     case '/':
1017       /* A potential block or line comment.  */
1018       comment_start = buffer->cur;
1019       result->type = CPP_DIV;
1020       c = get_effective_char (buffer);
1021       if (c == '=')
1022         ACCEPT_CHAR (CPP_DIV_EQ);
1023       if (c != '/' && c != '*')
1024         break;
1025
1026       if (c == '*')
1027         {
1028           if (skip_block_comment (pfile))
1029             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1030                                  pfile->lexer_pos.col,
1031                                  "unterminated comment");
1032         }
1033       else
1034         {
1035           if (!CPP_OPTION (pfile, cplusplus_comments)
1036               && !CPP_IN_SYSTEM_HEADER (pfile))
1037             break;
1038
1039           /* Warn about comments only if pedantically GNUC89, and not
1040              in system headers.  */
1041           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1042               && ! buffer->warned_cplusplus_comments)
1043             {
1044               cpp_pedwarn (pfile,
1045                            "C++ style comments are not allowed in ISO C89");
1046               cpp_pedwarn (pfile,
1047                            "(this will be reported only once per input file)");
1048               buffer->warned_cplusplus_comments = 1;
1049             }
1050
1051           /* Skip_line_comment updates buffer->read_ahead.  */
1052           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1053             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1054                                    pfile->lexer_pos.col,
1055                                    "multi-line comment");
1056         }
1057
1058       /* Skipping the comment has updated buffer->read_ahead.  */
1059       if (!pfile->state.save_comments)
1060         {
1061           result->flags |= PREV_WHITE;
1062           goto next_char;
1063         }
1064
1065       /* Save the comment as a token in its own right.  */
1066       save_comment (pfile, result, comment_start);
1067       /* Don't do MI optimisation.  */
1068       return;
1069
1070     case '<':
1071       if (pfile->state.angled_headers)
1072         {
1073           result->type = CPP_HEADER_NAME;
1074           c = '>';              /* terminator.  */
1075           goto make_string;
1076         }
1077
1078       result->type = CPP_LESS;
1079       c = get_effective_char (buffer);
1080       if (c == '=')
1081         ACCEPT_CHAR (CPP_LESS_EQ);
1082       else if (c == '<')
1083         {
1084           ACCEPT_CHAR (CPP_LSHIFT);
1085           if (get_effective_char (buffer) == '=')
1086             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1087         }
1088       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1089         {
1090           ACCEPT_CHAR (CPP_MIN);
1091           if (get_effective_char (buffer) == '=')
1092             ACCEPT_CHAR (CPP_MIN_EQ);
1093         }
1094       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1095         {
1096           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1097           result->flags |= DIGRAPH;
1098         }
1099       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1100         {
1101           ACCEPT_CHAR (CPP_OPEN_BRACE);
1102           result->flags |= DIGRAPH;
1103         }
1104       break;
1105
1106     case '>':
1107       result->type = CPP_GREATER;
1108       c = get_effective_char (buffer);
1109       if (c == '=')
1110         ACCEPT_CHAR (CPP_GREATER_EQ);
1111       else if (c == '>')
1112         {
1113           ACCEPT_CHAR (CPP_RSHIFT);
1114           if (get_effective_char (buffer) == '=')
1115             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1116         }
1117       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1118         {
1119           ACCEPT_CHAR (CPP_MAX);
1120           if (get_effective_char (buffer) == '=')
1121             ACCEPT_CHAR (CPP_MAX_EQ);
1122         }
1123       break;
1124
1125     case '%':
1126       lex_percent (buffer, result);
1127       if (result->type == CPP_HASH)
1128         goto do_hash;
1129       break;
1130
1131     case '.':
1132       lex_dot (pfile, result);
1133       break;
1134
1135     case '+':
1136       result->type = CPP_PLUS;
1137       c = get_effective_char (buffer);
1138       if (c == '=')
1139         ACCEPT_CHAR (CPP_PLUS_EQ);
1140       else if (c == '+')
1141         ACCEPT_CHAR (CPP_PLUS_PLUS);
1142       break;
1143
1144     case '-':
1145       result->type = CPP_MINUS;
1146       c = get_effective_char (buffer);
1147       if (c == '>')
1148         {
1149           ACCEPT_CHAR (CPP_DEREF);
1150           if (CPP_OPTION (pfile, cplusplus)
1151               && get_effective_char (buffer) == '*')
1152             ACCEPT_CHAR (CPP_DEREF_STAR);
1153         }
1154       else if (c == '=')
1155         ACCEPT_CHAR (CPP_MINUS_EQ);
1156       else if (c == '-')
1157         ACCEPT_CHAR (CPP_MINUS_MINUS);
1158       break;
1159
1160     case '*':
1161       result->type = CPP_MULT;
1162       if (get_effective_char (buffer) == '=')
1163         ACCEPT_CHAR (CPP_MULT_EQ);
1164       break;
1165
1166     case '=':
1167       result->type = CPP_EQ;
1168       if (get_effective_char (buffer) == '=')
1169         ACCEPT_CHAR (CPP_EQ_EQ);
1170       break;
1171
1172     case '!':
1173       result->type = CPP_NOT;
1174       if (get_effective_char (buffer) == '=')
1175         ACCEPT_CHAR (CPP_NOT_EQ);
1176       break;
1177
1178     case '&':
1179       result->type = CPP_AND;
1180       c = get_effective_char (buffer);
1181       if (c == '=')
1182         ACCEPT_CHAR (CPP_AND_EQ);
1183       else if (c == '&')
1184         ACCEPT_CHAR (CPP_AND_AND);
1185       break;
1186
1187     case '#':
1188       c = buffer->extra_char;   /* Can be set by error condition below.  */
1189       if (c != EOF)
1190         {
1191           buffer->read_ahead = c;
1192           buffer->extra_char = EOF;
1193         }
1194       else
1195         c = get_effective_char (buffer);
1196
1197       if (c == '#')
1198         {
1199           ACCEPT_CHAR (CPP_PASTE);
1200           break;
1201         }
1202
1203       result->type = CPP_HASH;
1204     do_hash:
1205       if (!bol)
1206         break;
1207       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1208          tokens within the list of arguments that would otherwise act
1209          as preprocessing directives, the behavior is undefined.
1210
1211          This implementation will report a hard error, terminate the
1212          macro invocation, and proceed to process the directive.  */
1213       if (pfile->state.parsing_args)
1214         {
1215           if (pfile->state.parsing_args == 2)
1216             cpp_error (pfile,
1217                        "directives may not be used inside a macro argument");
1218
1219           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1220           buffer->extra_char = buffer->read_ahead;
1221           buffer->read_ahead = '#';
1222           pfile->state.next_bol = 1;
1223           result->type = CPP_EOF;
1224
1225           /* Get whitespace right - newline_in_args sets it.  */
1226           if (pfile->lexer_pos.col == 1)
1227             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1228         }
1229       else
1230         {
1231           /* This is the hash introducing a directive.  */
1232           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1233             goto done_directive; /* bol still 1.  */
1234           /* This is in fact an assembler #.  */
1235         }
1236       break;
1237
1238     case '|':
1239       result->type = CPP_OR;
1240       c = get_effective_char (buffer);
1241       if (c == '=')
1242         ACCEPT_CHAR (CPP_OR_EQ);
1243       else if (c == '|')
1244         ACCEPT_CHAR (CPP_OR_OR);
1245       break;
1246
1247     case '^':
1248       result->type = CPP_XOR;
1249       if (get_effective_char (buffer) == '=')
1250         ACCEPT_CHAR (CPP_XOR_EQ);
1251       break;
1252
1253     case ':':
1254       result->type = CPP_COLON;
1255       c = get_effective_char (buffer);
1256       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1257         ACCEPT_CHAR (CPP_SCOPE);
1258       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1259         {
1260           result->flags |= DIGRAPH;
1261           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1262         }
1263       break;
1264
1265     case '~': result->type = CPP_COMPL; break;
1266     case ',': result->type = CPP_COMMA; break;
1267     case '(': result->type = CPP_OPEN_PAREN; break;
1268     case ')': result->type = CPP_CLOSE_PAREN; break;
1269     case '[': result->type = CPP_OPEN_SQUARE; break;
1270     case ']': result->type = CPP_CLOSE_SQUARE; break;
1271     case '{': result->type = CPP_OPEN_BRACE; break;
1272     case '}': result->type = CPP_CLOSE_BRACE; break;
1273     case ';': result->type = CPP_SEMICOLON; break;
1274
1275       /* @ is a punctuator in Objective C.  */
1276     case '@': result->type = CPP_ATSIGN; break;
1277
1278     random_char:
1279     default:
1280       result->type = CPP_OTHER;
1281       result->val.c = c;
1282       break;
1283     }
1284
1285   if (!pfile->state.in_directive && pfile->state.skipping)
1286     goto skip;
1287
1288   /* If not in a directive, this token invalidates controlling macros.  */
1289   if (!pfile->state.in_directive)
1290     pfile->mi_valid = false;
1291 }
1292
1293 /* An upper bound on the number of bytes needed to spell a token,
1294    including preceding whitespace.  */
1295 unsigned int
1296 cpp_token_len (token)
1297      const cpp_token *token;
1298 {
1299   unsigned int len;
1300
1301   switch (TOKEN_SPELL (token))
1302     {
1303     default:            len = 0;                                break;
1304     case SPELL_STRING:  len = token->val.str.len;               break;
1305     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1306     }
1307   /* 1 for whitespace, 4 for comment delimeters.  */
1308   return len + 5;
1309 }
1310
1311 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1312    already contain the enough space to hold the token's spelling.
1313    Returns a pointer to the character after the last character
1314    written.  */
1315 unsigned char *
1316 cpp_spell_token (pfile, token, buffer)
1317      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1318      const cpp_token *token;
1319      unsigned char *buffer;
1320 {
1321   switch (TOKEN_SPELL (token))
1322     {
1323     case SPELL_OPERATOR:
1324       {
1325         const unsigned char *spelling;
1326         unsigned char c;
1327
1328         if (token->flags & DIGRAPH)
1329           spelling
1330             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1331         else if (token->flags & NAMED_OP)
1332           goto spell_ident;
1333         else
1334           spelling = TOKEN_NAME (token);
1335
1336         while ((c = *spelling++) != '\0')
1337           *buffer++ = c;
1338       }
1339       break;
1340
1341     case SPELL_IDENT:
1342       spell_ident:
1343       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1344       buffer += NODE_LEN (token->val.node);
1345       break;
1346
1347     case SPELL_STRING:
1348       {
1349         int left, right, tag;
1350         switch (token->type)
1351           {
1352           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1353           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1354           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1355           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1356           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1357           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1358           }
1359         if (tag) *buffer++ = tag;
1360         if (left) *buffer++ = left;
1361         memcpy (buffer, token->val.str.text, token->val.str.len);
1362         buffer += token->val.str.len;
1363         if (right) *buffer++ = right;
1364       }
1365       break;
1366
1367     case SPELL_CHAR:
1368       *buffer++ = token->val.c;
1369       break;
1370
1371     case SPELL_NONE:
1372       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1373       break;
1374     }
1375
1376   return buffer;
1377 }
1378
1379 /* Returns a token as a null-terminated string.  The string is
1380    temporary, and automatically freed later.  Useful for diagnostics.  */
1381 unsigned char *
1382 cpp_token_as_text (pfile, token)
1383      cpp_reader *pfile;
1384      const cpp_token *token;
1385 {
1386   unsigned int len = cpp_token_len (token);
1387   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1388
1389   end = cpp_spell_token (pfile, token, start);
1390   end[0] = '\0';
1391
1392   return start;
1393 }
1394
1395 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1396 const char *
1397 cpp_type2name (type)
1398      enum cpp_ttype type;
1399 {
1400   return (const char *) token_spellings[type].name;
1401 }
1402
1403 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1404    for efficiency - to avoid double-buffering.  Also, outputs a space
1405    if PREV_WHITE is flagged.  */
1406 void
1407 cpp_output_token (token, fp)
1408      const cpp_token *token;
1409      FILE *fp;
1410 {
1411   if (token->flags & PREV_WHITE)
1412     putc (' ', fp);
1413
1414   switch (TOKEN_SPELL (token))
1415     {
1416     case SPELL_OPERATOR:
1417       {
1418         const unsigned char *spelling;
1419
1420         if (token->flags & DIGRAPH)
1421           spelling
1422             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1423         else if (token->flags & NAMED_OP)
1424           goto spell_ident;
1425         else
1426           spelling = TOKEN_NAME (token);
1427
1428         ufputs (spelling, fp);
1429       }
1430       break;
1431
1432     spell_ident:
1433     case SPELL_IDENT:
1434       ufputs (NODE_NAME (token->val.node), fp);
1435     break;
1436
1437     case SPELL_STRING:
1438       {
1439         int left, right, tag;
1440         switch (token->type)
1441           {
1442           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1443           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1444           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1445           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1446           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1447           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1448           }
1449         if (tag) putc (tag, fp);
1450         if (left) putc (left, fp);
1451         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1452         if (right) putc (right, fp);
1453       }
1454       break;
1455
1456     case SPELL_CHAR:
1457       putc (token->val.c, fp);
1458       break;
1459
1460     case SPELL_NONE:
1461       /* An error, most probably.  */
1462       break;
1463     }
1464 }
1465
1466 /* Compare two tokens.  */
1467 int
1468 _cpp_equiv_tokens (a, b)
1469      const cpp_token *a, *b;
1470 {
1471   if (a->type == b->type && a->flags == b->flags)
1472     switch (TOKEN_SPELL (a))
1473       {
1474       default:                  /* Keep compiler happy.  */
1475       case SPELL_OPERATOR:
1476         return 1;
1477       case SPELL_CHAR:
1478         return a->val.c == b->val.c; /* Character.  */
1479       case SPELL_NONE:
1480         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1481       case SPELL_IDENT:
1482         return a->val.node == b->val.node;
1483       case SPELL_STRING:
1484         return (a->val.str.len == b->val.str.len
1485                 && !memcmp (a->val.str.text, b->val.str.text,
1486                             a->val.str.len));
1487       }
1488
1489   return 0;
1490 }
1491
1492 /* Determine whether two tokens can be pasted together, and if so,
1493    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1494    be pasted, or the appropriate type for the merged token if they
1495    can.  */
1496 enum cpp_ttype
1497 cpp_can_paste (pfile, token1, token2, digraph)
1498      cpp_reader * pfile;
1499      const cpp_token *token1, *token2;
1500      int* digraph;
1501 {
1502   enum cpp_ttype a = token1->type, b = token2->type;
1503   int cxx = CPP_OPTION (pfile, cplusplus);
1504
1505   /* Treat named operators as if they were ordinary NAMEs.  */
1506   if (token1->flags & NAMED_OP)
1507     a = CPP_NAME;
1508   if (token2->flags & NAMED_OP)
1509     b = CPP_NAME;
1510
1511   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1512     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1513
1514   switch (a)
1515     {
1516     case CPP_GREATER:
1517       if (b == a) return CPP_RSHIFT;
1518       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1519       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1520       break;
1521     case CPP_LESS:
1522       if (b == a) return CPP_LSHIFT;
1523       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1524       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1525       if (CPP_OPTION (pfile, digraphs))
1526         {
1527           if (b == CPP_COLON)
1528             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1529           if (b == CPP_MOD)
1530             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1531         }
1532       break;
1533
1534     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1535     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1536     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1537
1538     case CPP_MINUS:
1539       if (b == a)               return CPP_MINUS_MINUS;
1540       if (b == CPP_GREATER)     return CPP_DEREF;
1541       break;
1542     case CPP_COLON:
1543       if (b == a && cxx)        return CPP_SCOPE;
1544       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1545         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1546       break;
1547
1548     case CPP_MOD:
1549       if (CPP_OPTION (pfile, digraphs))
1550         {
1551           if (b == CPP_GREATER)
1552             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1553           if (b == CPP_COLON)
1554             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1555         }
1556       break;
1557     case CPP_DEREF:
1558       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1559       break;
1560     case CPP_DOT:
1561       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1562       if (b == CPP_NUMBER)      return CPP_NUMBER;
1563       break;
1564
1565     case CPP_HASH:
1566       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1567         /* %:%: digraph */
1568         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1569       break;
1570
1571     case CPP_NAME:
1572       if (b == CPP_NAME)        return CPP_NAME;
1573       if (b == CPP_NUMBER
1574           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1575       if (b == CPP_CHAR
1576           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1577       if (b == CPP_STRING
1578           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1579       break;
1580
1581     case CPP_NUMBER:
1582       if (b == CPP_NUMBER)      return CPP_NUMBER;
1583       if (b == CPP_NAME)        return CPP_NUMBER;
1584       if (b == CPP_DOT)         return CPP_NUMBER;
1585       /* Numbers cannot have length zero, so this is safe.  */
1586       if ((b == CPP_PLUS || b == CPP_MINUS)
1587           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1588         return CPP_NUMBER;
1589       break;
1590
1591     default:
1592       break;
1593     }
1594
1595   return CPP_EOF;
1596 }
1597
1598 /* Returns nonzero if a space should be inserted to avoid an
1599    accidental token paste for output.  For simplicity, it is
1600    conservative, and occasionally advises a space where one is not
1601    needed, e.g. "." and ".2".  */
1602
1603 int
1604 cpp_avoid_paste (pfile, token1, token2)
1605      cpp_reader *pfile;
1606      const cpp_token *token1, *token2;
1607 {
1608   enum cpp_ttype a = token1->type, b = token2->type;
1609   cppchar_t c;
1610
1611   if (token1->flags & NAMED_OP)
1612     a = CPP_NAME;
1613   if (token2->flags & NAMED_OP)
1614     b = CPP_NAME;
1615
1616   c = EOF;
1617   if (token2->flags & DIGRAPH)
1618     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1619   else if (token_spellings[b].category == SPELL_OPERATOR)
1620     c = token_spellings[b].name[0];
1621
1622   /* Quickly get everything that can paste with an '='.  */
1623   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1624     return 1;
1625
1626   switch (a)
1627     {
1628     case CPP_GREATER:   return c == '>' || c == '?';
1629     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1630     case CPP_PLUS:      return c == '+';
1631     case CPP_MINUS:     return c == '-' || c == '>';
1632     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1633     case CPP_MOD:       return c == ':' || c == '>';
1634     case CPP_AND:       return c == '&';
1635     case CPP_OR:        return c == '|';
1636     case CPP_COLON:     return c == ':' || c == '>';
1637     case CPP_DEREF:     return c == '*';
1638     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1639     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1640     case CPP_NAME:      return ((b == CPP_NUMBER
1641                                  && name_p (pfile, &token2->val.str))
1642                                 || b == CPP_NAME
1643                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1644     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1645                                 || c == '.' || c == '+' || c == '-');
1646     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1647                                 && token1->val.c == '@'
1648                                 && (b == CPP_NAME || b == CPP_STRING));
1649     default:            break;
1650     }
1651
1652   return 0;
1653 }
1654
1655 /* Output all the remaining tokens on the current line, and a newline
1656    character, to FP.  Leading whitespace is removed.  */
1657 void
1658 cpp_output_line (pfile, fp)
1659      cpp_reader *pfile;
1660      FILE *fp;
1661 {
1662   cpp_token token;
1663
1664   cpp_get_token (pfile, &token);
1665   token.flags &= ~PREV_WHITE;
1666   while (token.type != CPP_EOF)
1667     {
1668       cpp_output_token (&token, fp);
1669       cpp_get_token (pfile, &token);
1670     }
1671
1672   putc ('\n', fp);
1673 }
1674
1675 /* Returns the value of a hexadecimal digit.  */
1676 static unsigned int
1677 hex_digit_value (c)
1678      unsigned int c;
1679 {
1680   if (c >= 'a' && c <= 'f')
1681     return c - 'a' + 10;
1682   if (c >= 'A' && c <= 'F')
1683     return c - 'A' + 10;
1684   if (c >= '0' && c <= '9')
1685     return c - '0';
1686   abort ();
1687 }
1688
1689 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1690    failure if cpplib is not parsing C++ or C99.  Such failure is
1691    silent, and no variables are updated.  Otherwise returns 0, and
1692    warns if -Wtraditional.
1693
1694    [lex.charset]: The character designated by the universal character
1695    name \UNNNNNNNN is that character whose character short name in
1696    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1697    universal character name \uNNNN is that character whose character
1698    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1699    for a universal character name is less than 0x20 or in the range
1700    0x7F-0x9F (inclusive), or if the universal character name
1701    designates a character in the basic source character set, then the
1702    program is ill-formed.
1703
1704    We assume that wchar_t is Unicode, so we don't need to do any
1705    mapping.  Is this ever wrong?
1706
1707    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1708    LIMIT is the end of the string or charconst.  PSTR is updated to
1709    point after the UCS on return, and the UCS is written into PC.  */
1710
1711 static int
1712 maybe_read_ucs (pfile, pstr, limit, pc)
1713      cpp_reader *pfile;
1714      const unsigned char **pstr;
1715      const unsigned char *limit;
1716      unsigned int *pc;
1717 {
1718   const unsigned char *p = *pstr;
1719   unsigned int code = 0;
1720   unsigned int c = *pc, length;
1721
1722   /* Only attempt to interpret a UCS for C++ and C99.  */
1723   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1724     return 1;
1725
1726   if (CPP_WTRADITIONAL (pfile))
1727     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1728
1729   length = (c == 'u' ? 4: 8);
1730
1731   if ((size_t) (limit - p) < length)
1732     {
1733       cpp_error (pfile, "incomplete universal-character-name");
1734       /* Skip to the end to avoid more diagnostics.  */
1735       p = limit;
1736     }
1737   else
1738     {
1739       for (; length; length--, p++)
1740         {
1741           c = *p;
1742           if (ISXDIGIT (c))
1743             code = (code << 4) + hex_digit_value (c);
1744           else
1745             {
1746               cpp_error (pfile,
1747                          "non-hex digit '%c' in universal-character-name", c);
1748               /* We shouldn't skip in case there are multibyte chars.  */
1749               break;
1750             }
1751         }
1752     }
1753
1754 #ifdef TARGET_EBCDIC
1755   cpp_error (pfile, "universal-character-name on EBCDIC target");
1756   code = 0x3f;  /* EBCDIC invalid character */
1757 #else
1758  /* True extended characters are OK.  */
1759   if (code >= 0xa0
1760       && !(code & 0x80000000)
1761       && !(code >= 0xD800 && code <= 0xDFFF))
1762     ;
1763   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1764      hex escapes so that this also works with EBCDIC hosts.  */
1765   else if (code == 0x24 || code == 0x40 || code == 0x60)
1766     ;
1767   /* Don't give another error if one occurred above.  */
1768   else if (length == 0)
1769     cpp_error (pfile, "universal-character-name out of range");
1770 #endif
1771
1772   *pstr = p;
1773   *pc = code;
1774   return 0;
1775 }
1776
1777 /* Interpret an escape sequence, and return its value.  PSTR points to
1778    the input pointer, which is just after the backslash.  LIMIT is how
1779    much text we have.  MASK is a bitmask for the precision for the
1780    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1781    interpret escapes that did not exist in traditional C.
1782
1783    Handles all relevant diagnostics.  */
1784
1785 unsigned int
1786 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1787      cpp_reader *pfile;
1788      const unsigned char **pstr;
1789      const unsigned char *limit;
1790      unsigned HOST_WIDE_INT mask;
1791      int traditional;
1792 {
1793   int unknown = 0;
1794   const unsigned char *str = *pstr;
1795   unsigned int c = *str++;
1796
1797   switch (c)
1798     {
1799     case '\\': case '\'': case '"': case '?': break;
1800     case 'b': c = TARGET_BS;      break;
1801     case 'f': c = TARGET_FF;      break;
1802     case 'n': c = TARGET_NEWLINE; break;
1803     case 'r': c = TARGET_CR;      break;
1804     case 't': c = TARGET_TAB;     break;
1805     case 'v': c = TARGET_VT;      break;
1806
1807     case '(': case '{': case '[': case '%':
1808       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1809          '\%' is used to prevent SCCS from getting confused.  */
1810       unknown = CPP_PEDANTIC (pfile);
1811       break;
1812
1813     case 'a':
1814       if (CPP_WTRADITIONAL (pfile))
1815         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1816       if (!traditional)
1817         c = TARGET_BELL;
1818       break;
1819
1820     case 'e': case 'E':
1821       if (CPP_PEDANTIC (pfile))
1822         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1823       c = TARGET_ESC;
1824       break;
1825
1826     case 'u': case 'U':
1827       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1828       break;
1829
1830     case 'x':
1831       if (CPP_WTRADITIONAL (pfile))
1832         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1833
1834       if (!traditional)
1835         {
1836           unsigned int i = 0, overflow = 0;
1837           int digits_found = 0;
1838
1839           while (str < limit)
1840             {
1841               c = *str;
1842               if (! ISXDIGIT (c))
1843                 break;
1844               str++;
1845               overflow |= i ^ (i << 4 >> 4);
1846               i = (i << 4) + hex_digit_value (c);
1847               digits_found = 1;
1848             }
1849
1850           if (!digits_found)
1851             cpp_error (pfile, "\\x used with no following hex digits");
1852
1853           if (overflow | (i != (i & mask)))
1854             {
1855               cpp_pedwarn (pfile, "hex escape sequence out of range");
1856               i &= mask;
1857             }
1858           c = i;
1859         }
1860       break;
1861
1862     case '0':  case '1':  case '2':  case '3':
1863     case '4':  case '5':  case '6':  case '7':
1864       {
1865         unsigned int i = c - '0';
1866         int count = 0;
1867
1868         while (str < limit && ++count < 3)
1869           {
1870             c = *str;
1871             if (c < '0' || c > '7')
1872               break;
1873             str++;
1874             i = (i << 3) + c - '0';
1875           }
1876
1877         if (i != (i & mask))
1878           {
1879             cpp_pedwarn (pfile, "octal escape sequence out of range");
1880             i &= mask;
1881           }
1882         c = i;
1883       }
1884       break;
1885
1886     default:
1887       unknown = 1;
1888       break;
1889     }
1890
1891   if (unknown)
1892     {
1893       if (ISGRAPH (c))
1894         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1895       else
1896         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1897     }
1898
1899   if (c > mask)
1900     cpp_pedwarn (pfile, "escape sequence out of range for character");
1901
1902   *pstr = str;
1903   return c;
1904 }
1905
1906 #ifndef MAX_CHAR_TYPE_SIZE
1907 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1908 #endif
1909
1910 #ifndef MAX_WCHAR_TYPE_SIZE
1911 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1912 #endif
1913
1914 /* Interpret a (possibly wide) character constant in TOKEN.
1915    WARN_MULTI warns about multi-character charconsts, if not
1916    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1917    that did not exist in traditional C.  PCHARS_SEEN points to a
1918    variable that is filled in with the number of characters seen.  */
1919 HOST_WIDE_INT
1920 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1921      cpp_reader *pfile;
1922      const cpp_token *token;
1923      int warn_multi;
1924      int traditional;
1925      unsigned int *pchars_seen;
1926 {
1927   const unsigned char *str = token->val.str.text;
1928   const unsigned char *limit = str + token->val.str.len;
1929   unsigned int chars_seen = 0;
1930   unsigned int width, max_chars, c;
1931   unsigned HOST_WIDE_INT mask;
1932   HOST_WIDE_INT result = 0;
1933
1934 #ifdef MULTIBYTE_CHARS
1935   (void) local_mbtowc (NULL, NULL, 0);
1936 #endif
1937
1938   /* Width in bits.  */
1939   if (token->type == CPP_CHAR)
1940     width = MAX_CHAR_TYPE_SIZE;
1941   else
1942     width = MAX_WCHAR_TYPE_SIZE;
1943
1944   if (width < HOST_BITS_PER_WIDE_INT)
1945     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1946   else
1947     mask = ~0;
1948   max_chars = HOST_BITS_PER_WIDE_INT / width;
1949
1950   while (str < limit)
1951     {
1952 #ifdef MULTIBYTE_CHARS
1953       wchar_t wc;
1954       int char_len;
1955
1956       char_len = local_mbtowc (&wc, str, limit - str);
1957       if (char_len == -1)
1958         {
1959           cpp_warning (pfile, "ignoring invalid multibyte character");
1960           c = *str++;
1961         }
1962       else
1963         {
1964           str += char_len;
1965           c = wc;
1966         }
1967 #else
1968       c = *str++;
1969 #endif
1970
1971       if (c == '\\')
1972         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1973
1974 #ifdef MAP_CHARACTER
1975       if (ISPRINT (c))
1976         c = MAP_CHARACTER (c);
1977 #endif
1978
1979       /* Merge character into result; ignore excess chars.  */
1980       if (++chars_seen <= max_chars)
1981         {
1982           if (width < HOST_BITS_PER_WIDE_INT)
1983             result = (result << width) | (c & mask);
1984           else
1985             result = c;
1986         }
1987     }
1988
1989   if (chars_seen == 0)
1990     cpp_error (pfile, "empty character constant");
1991   else if (chars_seen > max_chars)
1992     {
1993       chars_seen = max_chars;
1994       cpp_warning (pfile, "character constant too long");
1995     }
1996   else if (chars_seen > 1 && !traditional && warn_multi)
1997     cpp_warning (pfile, "multi-character character constant");
1998
1999   /* If char type is signed, sign-extend the constant.  The
2000      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2001   if (token->type == CPP_CHAR && chars_seen)
2002     {
2003       unsigned int nbits = chars_seen * width;
2004       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2005
2006       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2007           || ((result >> (nbits - 1)) & 1) == 0)
2008         result &= mask;
2009       else
2010         result |= ~mask;
2011     }
2012
2013   *pchars_seen = chars_seen;
2014   return result;
2015 }
2016
2017 /* Memory pools.  */
2018
2019 struct dummy
2020 {
2021   char c;
2022   union
2023   {
2024     double d;
2025     int *p;
2026   } u;
2027 };
2028
2029 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2030
2031 static int
2032 chunk_suitable (pool, chunk, size)
2033      cpp_pool *pool;
2034      cpp_chunk *chunk;
2035      unsigned int size;
2036 {
2037   /* Being at least twice SIZE means we can use memcpy in
2038      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2039      anyway.  */
2040   return (chunk && pool->locked != chunk
2041           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2042 }
2043
2044 /* Returns the end of the new pool.  PTR points to a char in the old
2045    pool, and is updated to point to the same char in the new pool.  */
2046 unsigned char *
2047 _cpp_next_chunk (pool, len, ptr)
2048      cpp_pool *pool;
2049      unsigned int len;
2050      unsigned char **ptr;
2051 {
2052   cpp_chunk *chunk = pool->cur->next;
2053
2054   /* LEN is the minimum size we want in the new pool.  */
2055   len += POOL_ROOM (pool);
2056   if (! chunk_suitable (pool, chunk, len))
2057     {
2058       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2059
2060       chunk->next = pool->cur->next;
2061       pool->cur->next = chunk;
2062     }
2063
2064   /* Update the pointer before changing chunk's front.  */
2065   if (ptr)
2066     *ptr += chunk->base - POOL_FRONT (pool);
2067
2068   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2069   chunk->front = chunk->base;
2070
2071   pool->cur = chunk;
2072   return POOL_LIMIT (pool);
2073 }
2074
2075 static cpp_chunk *
2076 new_chunk (size)
2077      unsigned int size;
2078 {
2079   unsigned char *base;
2080   cpp_chunk *result;
2081
2082   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2083   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2084   /* Put the chunk descriptor at the end.  Then chunk overruns will
2085      cause obvious chaos.  */
2086   result = (cpp_chunk *) (base + size);
2087   result->base = base;
2088   result->front = base;
2089   result->limit = base + size;
2090   result->next = 0;
2091
2092   return result;
2093 }
2094
2095 void
2096 _cpp_init_pool (pool, size, align, temp)
2097      cpp_pool *pool;
2098      unsigned int size, align, temp;
2099 {
2100   if (align == 0)
2101     align = DEFAULT_ALIGNMENT;
2102   if (align & (align - 1))
2103     abort ();
2104   pool->align = align;
2105   pool->cur = new_chunk (size);
2106   pool->locked = 0;
2107   pool->locks = 0;
2108   if (temp)
2109     pool->cur->next = pool->cur;
2110 }
2111
2112 void
2113 _cpp_lock_pool (pool)
2114      cpp_pool *pool;
2115 {
2116   if (pool->locks++ == 0)
2117     pool->locked = pool->cur;
2118 }
2119
2120 void
2121 _cpp_unlock_pool (pool)
2122      cpp_pool *pool;
2123 {
2124   if (--pool->locks == 0)
2125     pool->locked = 0;
2126 }
2127
2128 void
2129 _cpp_free_pool (pool)
2130      cpp_pool *pool;
2131 {
2132   cpp_chunk *chunk = pool->cur, *next;
2133
2134   do
2135     {
2136       next = chunk->next;
2137       free (chunk->base);
2138       chunk = next;
2139     }
2140   while (chunk && chunk != pool->cur);
2141 }
2142
2143 /* Reserve LEN bytes from a memory pool.  */
2144 unsigned char *
2145 _cpp_pool_reserve (pool, len)
2146      cpp_pool *pool;
2147      unsigned int len;
2148 {
2149   len = POOL_ALIGN (len, pool->align);
2150   if (len > (unsigned int) POOL_ROOM (pool))
2151     _cpp_next_chunk (pool, len, 0);
2152
2153   return POOL_FRONT (pool);
2154 }
2155
2156 /* Allocate LEN bytes from a memory pool.  */
2157 unsigned char *
2158 _cpp_pool_alloc (pool, len)
2159      cpp_pool *pool;
2160      unsigned int len;
2161 {
2162   unsigned char *result = _cpp_pool_reserve (pool, len);
2163
2164   POOL_COMMIT (pool, len);
2165   return result;
2166 }