gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (pfile, newline_char)
 128      cpp_reader *pfile;
 129      cppchar_t newline_char;
 130 {
 131   cpp_buffer *buffer;
 132   cppchar_t next = EOF;
 133
 134   pfile->line++;
 135   buffer = pfile->buffer;
 136   buffer->col_adjust = 0;
 137   buffer->line_base = buffer->cur;
 138
 139   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 140   if (buffer->cur < buffer->rlimit)
 141     {
 142       next = *buffer->cur++;
 143       if (next + newline_char == '\r' + '\n')
 144         {
 145           buffer->line_base = buffer->cur;
 146           if (buffer->cur < buffer->rlimit)
 147             next = *buffer->cur++;
 148           else
 149             next = EOF;
 150         }
 151     }
 152
 153   buffer->read_ahead = next;
 154   return next;
 155 }
 156
 157 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 158    encountered.  It warns if necessary, and returns true if the
 159    trigraph should be honoured.  FROM_CHAR is the third character of a
 160    trigraph, and presumed to be the previous character for position
 161    reporting.  */
 162 static int
 163 trigraph_ok (pfile, from_char)
 164      cpp_reader *pfile;
 165      cppchar_t from_char;
 166 {
 167   int accept = CPP_OPTION (pfile, trigraphs);
 168
 169   /* Don't warn about trigraphs in comments.  */
 170   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 171     {
 172       cpp_buffer *buffer = pfile->buffer;
 173
 174       if (accept)
 175         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, pfile->line,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (buffer, next)
 209      cpp_buffer *buffer;
 210      cppchar_t next;
 211 {
 212   /* Only do this if we apply stages 1 and 2.  */
 213   if (!buffer->from_stage3)
 214     {
 215       cppchar_t next1;
 216       const unsigned char *saved_cur;
 217       int space;
 218
 219       do
 220         {
 221           if (buffer->cur == buffer->rlimit)
 222             break;
 223
 224           SAVE_STATE ();
 225           if (next == '?')
 226             {
 227               next1 = *buffer->cur++;
 228               if (next1 != '?' || buffer->cur == buffer->rlimit)
 229                 {
 230                   RESTORE_STATE ();
 231                   break;
 232                 }
 233
 234               next1 = *buffer->cur++;
 235               if (!_cpp_trigraph_map[next1]
 236                   || !trigraph_ok (buffer->pfile, next1))
 237                 {
 238                   RESTORE_STATE ();
 239                   break;
 240                 }
 241
 242               /* We have a full trigraph here.  */
 243               next = _cpp_trigraph_map[next1];
 244               if (next != '\\' || buffer->cur == buffer->rlimit)
 245                 break;
 246               SAVE_STATE ();
 247             }
 248
 249           /* We have a backslash, and room for at least one more character.  */
 250           space = 0;
 251           do
 252             {
 253               next1 = *buffer->cur++;
 254               if (!is_nvspace (next1))
 255                 break;
 256               space = 1;
 257             }
 258           while (buffer->cur < buffer->rlimit);
 259
 260           if (!is_vspace (next1))
 261             {
 262               RESTORE_STATE ();
 263               break;
 264             }
 265
 266           if (space && !buffer->pfile->state.lexing_comment)
 267             cpp_warning (buffer->pfile,
 268                          "backslash and newline separated by space");
 269
 270           next = handle_newline (buffer->pfile, next1);
 271           if (next == EOF)
 272             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 273         }
 274       while (next == '\\' || next == '?');
 275     }
 276
 277   buffer->read_ahead = next;
 278   return next;
 279 }
 280
 281 /* Obtain the next character, after trigraph conversion and skipping
 282    an arbitrary string of escaped newlines.  The common case of no
 283    trigraphs or escaped newlines falls through quickly.  */
 284 static cppchar_t
 285 get_effective_char (buffer)
 286      cpp_buffer *buffer;
 287 {
 288   cppchar_t next = EOF;
 289
 290   if (buffer->cur < buffer->rlimit)
 291     {
 292       next = *buffer->cur++;
 293
 294       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 295          can introduce escaped newlines, which we want to skip, or
 296          UCNs, which, depending upon lexer state, we will handle in
 297          the future.  */
 298       if (next == '?' || next == '\\')
 299         next = skip_escaped_newlines (buffer, next);
 300     }
 301
 302   buffer->read_ahead = next;
 303   return next;
 304 }
 305
 306 /* Skip a C-style block comment.  We find the end of the comment by
 307    seeing if an asterisk is before every '/' we encounter.  Returns
 308    non-zero if comment terminated by EOF, zero otherwise.  */
 309 static int
 310 skip_block_comment (pfile)
 311      cpp_reader *pfile;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   cppchar_t c = EOF, prevc = EOF;
 315
 316   pfile->state.lexing_comment = 1;
 317   while (buffer->cur != buffer->rlimit)
 318     {
 319       prevc = c, c = *buffer->cur++;
 320
 321     next_char:
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (buffer, c);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimeter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur != buffer->rlimit)
 339             {
 340               prevc = c, c = *buffer->cur++;
 341               if (c == '*' && buffer->cur != buffer->rlimit)
 342                 {
 343                   prevc = c, c = *buffer->cur++;
 344                   if (c != '/')
 345                     cpp_warning_with_line (pfile, pfile->line,
 346                                            CPP_BUF_COL (buffer) - 2,
 347                                            "\"/*\" within comment");
 348                 }
 349               goto next_char;
 350             }
 351         }
 352       else if (is_vspace (c))
 353         {
 354           prevc = c, c = handle_newline (pfile, c);
 355           goto next_char;
 356         }
 357       else if (c == '\t')
 358         adjust_column (pfile);
 359     }
 360
 361   pfile->state.lexing_comment = 0;
 362   buffer->read_ahead = EOF;
 363   return c != '/' || prevc != '*';
 364 }
 365
 366 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 367    non-zero if a multiline comment.  The following new line, if any,
 368    is left in buffer->read_ahead.  */
 369 static int
 370 skip_line_comment (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_line = pfile->line;
 375   cppchar_t c;
 376
 377   pfile->state.lexing_comment = 1;
 378   do
 379     {
 380       c = EOF;
 381       if (buffer->cur == buffer->rlimit)
 382         break;
 383
 384       c = *buffer->cur++;
 385       if (c == '?' || c == '\\')
 386         c = skip_escaped_newlines (buffer, c);
 387     }
 388   while (!is_vspace (c));
 389
 390   pfile->state.lexing_comment = 0;
 391   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 392   return orig_line != pfile->line;
 393 }
 394
 395 /* pfile->buffer->cur is one beyond the \t character.  Update
 396    col_adjust so we track the column correctly.  */
 397 static void
 398 adjust_column (pfile)
 399      cpp_reader *pfile;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 403
 404   /* Round it up to multiple of the tabstop, but subtract 1 since the
 405      tab itself occupies a character position.  */
 406   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 407                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 408 }
 409
 410 /* Skips whitespace, saving the next non-whitespace character.
 411    Adjusts pfile->col_adjust to account for tabs.  Without this,
 412    tokens might be assigned an incorrect column.  */
 413 static void
 414 skip_whitespace (pfile, c)
 415      cpp_reader *pfile;
 416      cppchar_t c;
 417 {
 418   cpp_buffer *buffer = pfile->buffer;
 419   unsigned int warned = 0;
 420
 421   do
 422     {
 423       /* Horizontal space always OK.  */
 424       if (c == ' ')
 425         ;
 426       else if (c == '\t')
 427         adjust_column (pfile);
 428       /* Just \f \v or \0 left.  */
 429       else if (c == '\0')
 430         {
 431           if (!warned)
 432             {
 433               cpp_warning (pfile, "null character(s) ignored");
 434               warned = 1;
 435             }
 436         }
 437       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 438         cpp_pedwarn_with_line (pfile, pfile->line,
 439                                CPP_BUF_COL (buffer),
 440                                "%s in preprocessing directive",
 441                                c == '\f' ? "form feed" : "vertical tab");
 442
 443       c = EOF;
 444       if (buffer->cur == buffer->rlimit)
 445         break;
 446       c = *buffer->cur++;
 447     }
 448   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 449   while (is_nvspace (c));
 450
 451   /* Remember the next character.  */
 452   buffer->read_ahead = c;
 453 }
 454
 455 /* See if the characters of a number token are valid in a name (no
 456    '.', '+' or '-').  */
 457 static int
 458 name_p (pfile, string)
 459      cpp_reader *pfile;
 460      const cpp_string *string;
 461 {
 462   unsigned int i;
 463
 464   for (i = 0; i < string->len; i++)
 465     if (!is_idchar (string->text[i]))
 466       return 0;
 467
 468   return 1;
 469 }
 470
 471 /* Parse an identifier, skipping embedded backslash-newlines.
 472    Calculate the hash value of the token while parsing, for improved
 473    performance.  The hashing algorithm *must* match cpp_lookup().  */
 474
 475 static cpp_hashnode *
 476 parse_identifier (pfile, c)
 477      cpp_reader *pfile;
 478      cppchar_t c;
 479 {
 480   cpp_hashnode *result;
 481   cpp_buffer *buffer = pfile->buffer;
 482   unsigned int saw_dollar = 0, len;
 483   struct obstack *stack = &pfile->hash_table->stack;
 484
 485   do
 486     {
 487       do
 488         {
 489           obstack_1grow (stack, c);
 490
 491           if (c == '$')
 492             saw_dollar++;
 493
 494           c = EOF;
 495           if (buffer->cur == buffer->rlimit)
 496             break;
 497
 498           c = *buffer->cur++;
 499         }
 500       while (is_idchar (c));
 501
 502       /* Potential escaped newline?  */
 503       if (c != '?' && c != '\\')
 504         break;
 505       c = skip_escaped_newlines (buffer, c);
 506     }
 507   while (is_idchar (c));
 508
 509   /* Remember the next character.  */
 510   buffer->read_ahead = c;
 511
 512   /* $ is not a identifier character in the standard, but is commonly
 513      accepted as an extension.  Don't warn about it in skipped
 514      conditional blocks.  */
 515   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 516     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 517
 518   /* Identifiers are null-terminated.  */
 519   len = obstack_object_size (stack);
 520   obstack_1grow (stack, '\0');
 521
 522   /* This routine commits the memory if necessary.  */
 523   result = (cpp_hashnode *)
 524     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 525
 526   /* Some identifiers require diagnostics when lexed.  */
 527   if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
 528     {
 529       /* It is allowed to poison the same identifier twice.  */
 530       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 531         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 532                    NODE_NAME (result));
 533
 534       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 535          replacement list of a variadic macro.  */
 536       if (result == pfile->spec_nodes.n__VA_ARGS__
 537           && !pfile->state.va_args_ok)
 538         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 539     }
 540
 541   return result;
 542 }
 543
 544 /* Parse a number, skipping embedded backslash-newlines.  */
 545 static void
 546 parse_number (pfile, number, c, leading_period)
 547      cpp_reader *pfile;
 548      cpp_string *number;
 549      cppchar_t c;
 550      int leading_period;
 551 {
 552   cpp_buffer *buffer = pfile->buffer;
 553   cpp_pool *pool = &pfile->ident_pool;
 554   unsigned char *dest, *limit;
 555
 556   dest = POOL_FRONT (pool);
 557   limit = POOL_LIMIT (pool);
 558
 559   /* Place a leading period.  */
 560   if (leading_period)
 561     {
 562       if (dest >= limit)
 563         limit = _cpp_next_chunk (pool, 0, &dest);
 564       *dest++ = '.';
 565     }
 566
 567   do
 568     {
 569       do
 570         {
 571           /* Need room for terminating null.  */
 572           if (dest + 1 >= limit)
 573             limit = _cpp_next_chunk (pool, 0, &dest);
 574           *dest++ = c;
 575
 576           c = EOF;
 577           if (buffer->cur == buffer->rlimit)
 578             break;
 579
 580           c = *buffer->cur++;
 581         }
 582       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 583
 584       /* Potential escaped newline?  */
 585       if (c != '?' && c != '\\')
 586         break;
 587       c = skip_escaped_newlines (buffer, c);
 588     }
 589   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591   /* Remember the next character.  */
 592   buffer->read_ahead = c;
 593
 594   /* Null-terminate the number.  */
 595   *dest = '\0';
 596
 597   number->text = POOL_FRONT (pool);
 598   number->len = dest - number->text;
 599   POOL_COMMIT (pool, number->len + 1);
 600 }
 601
 602 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 603 static void
 604 unterminated (pfile, term)
 605      cpp_reader *pfile;
 606      int term;
 607 {
 608   cpp_error (pfile, "missing terminating %c character", term);
 609
 610   if (term == '\"' && pfile->mlstring_pos.line
 611       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 612     {
 613       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 614                            pfile->mlstring_pos.col,
 615                            "possible start of unterminated string literal");
 616       pfile->mlstring_pos.line = 0;
 617     }
 618 }
 619
 620 /* Subroutine of parse_string.  */
 621 static int
 622 unescaped_terminator_p (pfile, dest)
 623      cpp_reader *pfile;
 624      const unsigned char *dest;
 625 {
 626   const unsigned char *start, *temp;
 627
 628   /* In #include-style directives, terminators are not escapeable.  */
 629   if (pfile->state.angled_headers)
 630     return 1;
 631
 632   start = POOL_FRONT (&pfile->ident_pool);
 633
 634   /* An odd number of consecutive backslashes represents an escaped
 635      terminator.  */
 636   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 637     ;
 638
 639   return ((dest - temp) & 1) == 0;
 640 }
 641
 642 /* Parses a string, character constant, or angle-bracketed header file
 643    name.  Handles embedded trigraphs and escaped newlines.  The stored
 644    string is guaranteed NUL-terminated, but it is not guaranteed that
 645    this is the first NUL since embedded NULs are preserved.
 646
 647    Multi-line strings are allowed, but they are deprecated.  */
 648 static void
 649 parse_string (pfile, token, terminator)
 650      cpp_reader *pfile;
 651      cpp_token *token;
 652      cppchar_t terminator;
 653 {
 654   cpp_buffer *buffer = pfile->buffer;
 655   cpp_pool *pool = &pfile->ident_pool;
 656   unsigned char *dest, *limit;
 657   cppchar_t c;
 658   bool warned_nulls = false, warned_multi = false;
 659
 660   dest = POOL_FRONT (pool);
 661   limit = POOL_LIMIT (pool);
 662
 663   for (;;)
 664     {
 665       if (buffer->cur == buffer->rlimit)
 666         c = EOF;
 667       else
 668         c = *buffer->cur++;
 669
 670     have_char:
 671       /* We need space for the terminating NUL.  */
 672       if (dest >= limit)
 673         limit = _cpp_next_chunk (pool, 0, &dest);
 674
 675       if (c == EOF)
 676         {
 677           unterminated (pfile, terminator);
 678           break;
 679         }
 680
 681       /* Handle trigraphs, escaped newlines etc.  */
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (buffer, c);
 684
 685       if (c == terminator && unescaped_terminator_p (pfile, dest))
 686         {
 687           c = EOF;
 688           break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* In assembly language, silently terminate string and
 693              character literals at end of line.  This is a kludge
 694              around not knowing where comments are.  */
 695           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 696             break;
 697
 698           /* Character constants and header names may not extend over
 699              multiple lines.  In Standard C, neither may strings.
 700              Unfortunately, we accept multiline strings as an
 701              extension, except in #include family directives.  */
 702           if (terminator != '"' || pfile->state.angled_headers)
 703             {
 704               unterminated (pfile, terminator);
 705               break;
 706             }
 707
 708           if (!warned_multi)
 709             {
 710               warned_multi = true;
 711               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 712             }
 713
 714           if (pfile->mlstring_pos.line == 0)
 715             pfile->mlstring_pos = pfile->lexer_pos;
 716
 717           c = handle_newline (pfile, c);
 718           *dest++ = '\n';
 719           goto have_char;
 720         }
 721       else if (c == '\0' && !warned_nulls)
 722         {
 723           warned_nulls = true;
 724           cpp_warning (pfile, "null character(s) preserved in literal");
 725         }
 726
 727       *dest++ = c;
 728     }
 729
 730   /* Remember the next character.  */
 731   buffer->read_ahead = c;
 732   *dest = '\0';
 733
 734   token->val.str.text = POOL_FRONT (pool);
 735   token->val.str.len = dest - token->val.str.text;
 736   POOL_COMMIT (pool, token->val.str.len + 1);
 737 }
 738
 739 /* The stored comment includes the comment start and any terminator.  */
 740 static void
 741 save_comment (pfile, token, from)
 742      cpp_reader *pfile;
 743      cpp_token *token;
 744      const unsigned char *from;
 745 {
 746   unsigned char *buffer;
 747   unsigned int len;
 748
 749   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 750   /* C++ comments probably (not definitely) have moved past a new
 751      line, which we don't want to save in the comment.  */
 752   if (pfile->buffer->read_ahead != EOF)
 753     len--;
 754   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 755
 756   token->type = CPP_COMMENT;
 757   token->val.str.len = len;
 758   token->val.str.text = buffer;
 759
 760   buffer[0] = '/';
 761   memcpy (buffer + 1, from, len - 1);
 762 }
 763
 764 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 765    want to avoid stepping back when lexing %:%X.  */
 766 static void
 767 lex_percent (buffer, result)
 768      cpp_buffer *buffer;
 769      cpp_token *result;
 770 {
 771   cppchar_t c;
 772
 773   result->type = CPP_MOD;
 774   /* Parsing %:%X could leave an extra character.  */
 775   if (buffer->extra_char == EOF)
 776     c = get_effective_char (buffer);
 777   else
 778     {
 779       c = buffer->read_ahead = buffer->extra_char;
 780       buffer->extra_char = EOF;
 781     }
 782
 783   if (c == '=')
 784     ACCEPT_CHAR (CPP_MOD_EQ);
 785   else if (CPP_OPTION (buffer->pfile, digraphs))
 786     {
 787       if (c == ':')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_HASH);
 791           if (get_effective_char (buffer) == '%')
 792             {
 793               buffer->extra_char = get_effective_char (buffer);
 794               if (buffer->extra_char == ':')
 795                 {
 796                   buffer->extra_char = EOF;
 797                   ACCEPT_CHAR (CPP_PASTE);
 798                 }
 799               else
 800                 /* We'll catch the extra_char when we're called back.  */
 801                 buffer->read_ahead = '%';
 802             }
 803         }
 804       else if (c == '>')
 805         {
 806           result->flags |= DIGRAPH;
 807           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 808         }
 809     }
 810 }
 811
 812 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 813    want to avoid stepping back when lexing '...' or '.123'.  In the
 814    latter case we should also set a flag for parse_number.  */
 815 static void
 816 lex_dot (pfile, result)
 817      cpp_reader *pfile;
 818      cpp_token *result;
 819 {
 820   cpp_buffer *buffer = pfile->buffer;
 821   cppchar_t c;
 822
 823   /* Parsing ..X could leave an extra character.  */
 824   if (buffer->extra_char == EOF)
 825     c = get_effective_char (buffer);
 826   else
 827     {
 828       c = buffer->read_ahead = buffer->extra_char;
 829       buffer->extra_char = EOF;
 830     }
 831
 832   /* All known character sets have 0...9 contiguous.  */
 833   if (c >= '0' && c <= '9')
 834     {
 835       result->type = CPP_NUMBER;
 836       parse_number (pfile, &result->val.str, c, 1);
 837     }
 838   else
 839     {
 840       result->type = CPP_DOT;
 841       if (c == '.')
 842         {
 843           buffer->extra_char = get_effective_char (buffer);
 844           if (buffer->extra_char == '.')
 845             {
 846               buffer->extra_char = EOF;
 847               ACCEPT_CHAR (CPP_ELLIPSIS);
 848             }
 849           else
 850             /* We'll catch the extra_char when we're called back.  */
 851             buffer->read_ahead = '.';
 852         }
 853       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 854         ACCEPT_CHAR (CPP_DOT_STAR);
 855     }
 856 }
 857
 858 void
 859 _cpp_lex_token (pfile, result)
 860      cpp_reader *pfile;
 861      cpp_token *result;
 862 {
 863   cppchar_t c;
 864   cpp_buffer *buffer;
 865   const unsigned char *comment_start;
 866   int bol;
 867
 868  next_token:
 869   buffer = pfile->buffer;
 870   result->flags = buffer->saved_flags;
 871   buffer->saved_flags = 0;
 872   bol = (buffer->cur <= buffer->line_base + 1
 873          && pfile->lexer_pos.output_line == pfile->line);
 874  next_char:
 875   pfile->lexer_pos.line = pfile->line;
 876   result->line = pfile->line;
 877  next_char2:
 878   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 879
 880   c = buffer->read_ahead;
 881   if (c == EOF && buffer->cur < buffer->rlimit)
 882     {
 883       c = *buffer->cur++;
 884       pfile->lexer_pos.col++;
 885     }
 886   result->col = pfile->lexer_pos.col;
 887
 888  do_switch:
 889   buffer->read_ahead = EOF;
 890   switch (c)
 891     {
 892     case EOF:
 893       /* Non-empty files should end in a newline.  Don't warn for
 894          command line and _Pragma buffers.  */
 895       if (pfile->lexer_pos.col != 0)
 896         {
 897           /* Account for the missing \n, prevent multiple warnings.  */
 898           pfile->line++;
 899           pfile->lexer_pos.col = 0;
 900           if (!buffer->from_stage3)
 901             cpp_pedwarn (pfile, "no newline at end of file");
 902         }
 903
 904       /* To prevent bogus diagnostics, only pop the buffer when
 905          in-progress directives and arguments have been taken care of.
 906          Decrement the line to terminate an in-progress directive.  */
 907       if (pfile->state.in_directive)
 908         pfile->lexer_pos.output_line = pfile->line--;
 909       else if (! pfile->state.parsing_args)
 910         {
 911           /* Don't pop the last buffer.  */
 912           if (buffer->prev)
 913             {
 914               unsigned char stop = buffer->return_at_eof;
 915
 916               _cpp_pop_buffer (pfile);
 917               /* Push the next -included file, if any.  */
 918               if (!pfile->buffer->prev)
 919                 _cpp_push_next_buffer (pfile);
 920               if (!stop)
 921                 goto next_token;
 922             }
 923         }
 924       result->type = CPP_EOF;
 925       return;
 926
 927     case ' ': case '\t': case '\f': case '\v': case '\0':
 928       skip_whitespace (pfile, c);
 929       result->flags |= PREV_WHITE;
 930       goto next_char2;
 931
 932     case '\n': case '\r':
 933       if (pfile->state.in_directive)
 934         {
 935           result->type = CPP_EOF;
 936           if (pfile->state.parsing_args)
 937             buffer->read_ahead = c;
 938           else
 939             {
 940               handle_newline (pfile, c);
 941               /* Decrementing pfile->line allows directives to
 942                  recognise that the newline has been seen, and also
 943                  means that diagnostics don't point to the next line.  */
 944               pfile->lexer_pos.output_line = pfile->line--;
 945             }
 946           return;
 947         }
 948
 949       handle_newline (pfile, c);
 950       /* This is a new line, so clear any white space flag.  Newlines
 951          in arguments are white space (6.10.3.10); parse_arg takes
 952          care of that.  */
 953       result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 954       bol = 1;
 955       if (pfile->state.parsing_args != 2)
 956         pfile->lexer_pos.output_line = pfile->line;
 957       goto next_char;
 958
 959     case '?':
 960     case '\\':
 961       /* These could start an escaped newline, or '?' a trigraph.  Let
 962          skip_escaped_newlines do all the work.  */
 963       {
 964         unsigned int line = pfile->line;
 965
 966         c = skip_escaped_newlines (buffer, c);
 967         if (line != pfile->line)
 968           /* We had at least one escaped newline of some sort, and the
 969              next character is in buffer->read_ahead.  Update the
 970              token's line and column.  */
 971             goto next_char;
 972
 973         /* We are either the original '?' or '\\', or a trigraph.  */
 974         result->type = CPP_QUERY;
 975         buffer->read_ahead = EOF;
 976         if (c == '\\')
 977           goto random_char;
 978         else if (c != '?')
 979           goto do_switch;
 980       }
 981       break;
 982
 983     case '0': case '1': case '2': case '3': case '4':
 984     case '5': case '6': case '7': case '8': case '9':
 985       result->type = CPP_NUMBER;
 986       parse_number (pfile, &result->val.str, c, 0);
 987       break;
 988
 989     case '$':
 990       if (!CPP_OPTION (pfile, dollars_in_ident))
 991         goto random_char;
 992       /* Fall through...  */
 993
 994     case '_':
 995     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 996     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 997     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 998     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 999     case 'y': case 'z':
1000     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1001     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1002     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1003     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1004     case 'Y': case 'Z':
1005       result->type = CPP_NAME;
1006       result->val.node = parse_identifier (pfile, c);
1007
1008       /* 'L' may introduce wide characters or strings.  */
1009       if (result->val.node == pfile->spec_nodes.n_L)
1010         {
1011           c = buffer->read_ahead; /* For make_string.  */
1012           if (c == '\'' || c == '"')
1013             {
1014               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1015               goto make_string;
1016             }
1017         }
1018       /* Convert named operators to their proper types.  */
1019       else if (result->val.node->flags & NODE_OPERATOR)
1020         {
1021           result->flags |= NAMED_OP;
1022           result->type = result->val.node->value.operator;
1023         }
1024       break;
1025
1026     case '\'':
1027     case '"':
1028       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1029     make_string:
1030       parse_string (pfile, result, c);
1031       break;
1032
1033     case '/':
1034       /* A potential block or line comment.  */
1035       comment_start = buffer->cur;
1036       result->type = CPP_DIV;
1037       c = get_effective_char (buffer);
1038       if (c == '=')
1039         ACCEPT_CHAR (CPP_DIV_EQ);
1040       if (c != '/' && c != '*')
1041         break;
1042
1043       if (c == '*')
1044         {
1045           if (skip_block_comment (pfile))
1046             cpp_error (pfile, "unterminated comment");
1047         }
1048       else
1049         {
1050           if (!CPP_OPTION (pfile, cplusplus_comments)
1051               && !CPP_IN_SYSTEM_HEADER (pfile))
1052             break;
1053
1054           /* Warn about comments only if pedantically GNUC89, and not
1055              in system headers.  */
1056           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1057               && ! buffer->warned_cplusplus_comments)
1058             {
1059               cpp_pedwarn (pfile,
1060                            "C++ style comments are not allowed in ISO C89");
1061               cpp_pedwarn (pfile,
1062                            "(this will be reported only once per input file)");
1063               buffer->warned_cplusplus_comments = 1;
1064             }
1065
1066           /* Skip_line_comment updates buffer->read_ahead.  */
1067           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1068             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1069                                    pfile->lexer_pos.col,
1070                                    "multi-line comment");
1071         }
1072
1073       /* Skipping the comment has updated buffer->read_ahead.  */
1074       if (!pfile->state.save_comments)
1075         {
1076           result->flags |= PREV_WHITE;
1077           goto next_char;
1078         }
1079
1080       /* Save the comment as a token in its own right.  */
1081       save_comment (pfile, result, comment_start);
1082       /* Don't do MI optimisation.  */
1083       return;
1084
1085     case '<':
1086       if (pfile->state.angled_headers)
1087         {
1088           result->type = CPP_HEADER_NAME;
1089           c = '>';              /* terminator.  */
1090           goto make_string;
1091         }
1092
1093       result->type = CPP_LESS;
1094       c = get_effective_char (buffer);
1095       if (c == '=')
1096         ACCEPT_CHAR (CPP_LESS_EQ);
1097       else if (c == '<')
1098         {
1099           ACCEPT_CHAR (CPP_LSHIFT);
1100           if (get_effective_char (buffer) == '=')
1101             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1102         }
1103       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1104         {
1105           ACCEPT_CHAR (CPP_MIN);
1106           if (get_effective_char (buffer) == '=')
1107             ACCEPT_CHAR (CPP_MIN_EQ);
1108         }
1109       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1110         {
1111           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1112           result->flags |= DIGRAPH;
1113         }
1114       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1115         {
1116           ACCEPT_CHAR (CPP_OPEN_BRACE);
1117           result->flags |= DIGRAPH;
1118         }
1119       break;
1120
1121     case '>':
1122       result->type = CPP_GREATER;
1123       c = get_effective_char (buffer);
1124       if (c == '=')
1125         ACCEPT_CHAR (CPP_GREATER_EQ);
1126       else if (c == '>')
1127         {
1128           ACCEPT_CHAR (CPP_RSHIFT);
1129           if (get_effective_char (buffer) == '=')
1130             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1131         }
1132       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133         {
1134           ACCEPT_CHAR (CPP_MAX);
1135           if (get_effective_char (buffer) == '=')
1136             ACCEPT_CHAR (CPP_MAX_EQ);
1137         }
1138       break;
1139
1140     case '%':
1141       lex_percent (buffer, result);
1142       if (result->type == CPP_HASH)
1143         goto do_hash;
1144       break;
1145
1146     case '.':
1147       lex_dot (pfile, result);
1148       break;
1149
1150     case '+':
1151       result->type = CPP_PLUS;
1152       c = get_effective_char (buffer);
1153       if (c == '=')
1154         ACCEPT_CHAR (CPP_PLUS_EQ);
1155       else if (c == '+')
1156         ACCEPT_CHAR (CPP_PLUS_PLUS);
1157       break;
1158
1159     case '-':
1160       result->type = CPP_MINUS;
1161       c = get_effective_char (buffer);
1162       if (c == '>')
1163         {
1164           ACCEPT_CHAR (CPP_DEREF);
1165           if (CPP_OPTION (pfile, cplusplus)
1166               && get_effective_char (buffer) == '*')
1167             ACCEPT_CHAR (CPP_DEREF_STAR);
1168         }
1169       else if (c == '=')
1170         ACCEPT_CHAR (CPP_MINUS_EQ);
1171       else if (c == '-')
1172         ACCEPT_CHAR (CPP_MINUS_MINUS);
1173       break;
1174
1175     case '*':
1176       result->type = CPP_MULT;
1177       if (get_effective_char (buffer) == '=')
1178         ACCEPT_CHAR (CPP_MULT_EQ);
1179       break;
1180
1181     case '=':
1182       result->type = CPP_EQ;
1183       if (get_effective_char (buffer) == '=')
1184         ACCEPT_CHAR (CPP_EQ_EQ);
1185       break;
1186
1187     case '!':
1188       result->type = CPP_NOT;
1189       if (get_effective_char (buffer) == '=')
1190         ACCEPT_CHAR (CPP_NOT_EQ);
1191       break;
1192
1193     case '&':
1194       result->type = CPP_AND;
1195       c = get_effective_char (buffer);
1196       if (c == '=')
1197         ACCEPT_CHAR (CPP_AND_EQ);
1198       else if (c == '&')
1199         ACCEPT_CHAR (CPP_AND_AND);
1200       break;
1201
1202     case '#':
1203       c = buffer->extra_char;   /* Can be set by error condition below.  */
1204       if (c != EOF)
1205         {
1206           buffer->read_ahead = c;
1207           buffer->extra_char = EOF;
1208         }
1209       else
1210         c = get_effective_char (buffer);
1211
1212       if (c == '#')
1213         {
1214           ACCEPT_CHAR (CPP_PASTE);
1215           break;
1216         }
1217
1218       result->type = CPP_HASH;
1219     do_hash:
1220       if (!bol)
1221         break;
1222       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1223          tokens within the list of arguments that would otherwise act
1224          as preprocessing directives, the behavior is undefined.
1225
1226          This implementation will report a hard error, terminate the
1227          macro invocation, and proceed to process the directive.  */
1228       if (pfile->state.parsing_args)
1229         {
1230           pfile->lexer_pos.output_line = pfile->line;
1231           if (pfile->state.parsing_args == 2)
1232             {
1233               cpp_error (pfile,
1234                          "directives may not be used inside a macro argument");
1235               result->type = CPP_EOF;
1236             }
1237         }
1238       /* in_directive can be true inside a _Pragma.  */
1239       else if (!pfile->state.in_directive)
1240         {
1241           /* This is the hash introducing a directive.  If the return
1242              value is false, it is an assembler #.  */
1243           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1244             goto next_token;
1245         }
1246       break;
1247
1248     case '|':
1249       result->type = CPP_OR;
1250       c = get_effective_char (buffer);
1251       if (c == '=')
1252         ACCEPT_CHAR (CPP_OR_EQ);
1253       else if (c == '|')
1254         ACCEPT_CHAR (CPP_OR_OR);
1255       break;
1256
1257     case '^':
1258       result->type = CPP_XOR;
1259       if (get_effective_char (buffer) == '=')
1260         ACCEPT_CHAR (CPP_XOR_EQ);
1261       break;
1262
1263     case ':':
1264       result->type = CPP_COLON;
1265       c = get_effective_char (buffer);
1266       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1267         ACCEPT_CHAR (CPP_SCOPE);
1268       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1269         {
1270           result->flags |= DIGRAPH;
1271           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1272         }
1273       break;
1274
1275     case '~': result->type = CPP_COMPL; break;
1276     case ',': result->type = CPP_COMMA; break;
1277     case '(': result->type = CPP_OPEN_PAREN; break;
1278     case ')': result->type = CPP_CLOSE_PAREN; break;
1279     case '[': result->type = CPP_OPEN_SQUARE; break;
1280     case ']': result->type = CPP_CLOSE_SQUARE; break;
1281     case '{': result->type = CPP_OPEN_BRACE; break;
1282     case '}': result->type = CPP_CLOSE_BRACE; break;
1283     case ';': result->type = CPP_SEMICOLON; break;
1284
1285       /* @ is a punctuator in Objective C.  */
1286     case '@': result->type = CPP_ATSIGN; break;
1287
1288     random_char:
1289     default:
1290       result->type = CPP_OTHER;
1291       result->val.c = c;
1292       break;
1293     }
1294
1295   if (!pfile->state.in_directive && pfile->state.skipping)
1296     goto next_char;
1297
1298   /* If not in a directive, this token invalidates controlling macros.  */
1299   if (!pfile->state.in_directive)
1300     pfile->mi_valid = false;
1301 }
1302
1303 /* An upper bound on the number of bytes needed to spell a token,
1304    including preceding whitespace.  */
1305 unsigned int
1306 cpp_token_len (token)
1307      const cpp_token *token;
1308 {
1309   unsigned int len;
1310
1311   switch (TOKEN_SPELL (token))
1312     {
1313     default:            len = 0;                                break;
1314     case SPELL_STRING:  len = token->val.str.len;               break;
1315     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1316     }
1317   /* 1 for whitespace, 4 for comment delimeters.  */
1318   return len + 5;
1319 }
1320
1321 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1322    already contain the enough space to hold the token's spelling.
1323    Returns a pointer to the character after the last character
1324    written.  */
1325 unsigned char *
1326 cpp_spell_token (pfile, token, buffer)
1327      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1328      const cpp_token *token;
1329      unsigned char *buffer;
1330 {
1331   switch (TOKEN_SPELL (token))
1332     {
1333     case SPELL_OPERATOR:
1334       {
1335         const unsigned char *spelling;
1336         unsigned char c;
1337
1338         if (token->flags & DIGRAPH)
1339           spelling
1340             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1341         else if (token->flags & NAMED_OP)
1342           goto spell_ident;
1343         else
1344           spelling = TOKEN_NAME (token);
1345
1346         while ((c = *spelling++) != '\0')
1347           *buffer++ = c;
1348       }
1349       break;
1350
1351     case SPELL_IDENT:
1352       spell_ident:
1353       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1354       buffer += NODE_LEN (token->val.node);
1355       break;
1356
1357     case SPELL_STRING:
1358       {
1359         int left, right, tag;
1360         switch (token->type)
1361           {
1362           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1363           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1364           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1365           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1366           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1367           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1368           }
1369         if (tag) *buffer++ = tag;
1370         if (left) *buffer++ = left;
1371         memcpy (buffer, token->val.str.text, token->val.str.len);
1372         buffer += token->val.str.len;
1373         if (right) *buffer++ = right;
1374       }
1375       break;
1376
1377     case SPELL_CHAR:
1378       *buffer++ = token->val.c;
1379       break;
1380
1381     case SPELL_NONE:
1382       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1383       break;
1384     }
1385
1386   return buffer;
1387 }
1388
1389 /* Returns a token as a null-terminated string.  The string is
1390    temporary, and automatically freed later.  Useful for diagnostics.  */
1391 unsigned char *
1392 cpp_token_as_text (pfile, token)
1393      cpp_reader *pfile;
1394      const cpp_token *token;
1395 {
1396   unsigned int len = cpp_token_len (token);
1397   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1398
1399   end = cpp_spell_token (pfile, token, start);
1400   end[0] = '\0';
1401
1402   return start;
1403 }
1404
1405 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1406 const char *
1407 cpp_type2name (type)
1408      enum cpp_ttype type;
1409 {
1410   return (const char *) token_spellings[type].name;
1411 }
1412
1413 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1414    for efficiency - to avoid double-buffering.  Also, outputs a space
1415    if PREV_WHITE is flagged.  */
1416 void
1417 cpp_output_token (token, fp)
1418      const cpp_token *token;
1419      FILE *fp;
1420 {
1421   if (token->flags & PREV_WHITE)
1422     putc (' ', fp);
1423
1424   switch (TOKEN_SPELL (token))
1425     {
1426     case SPELL_OPERATOR:
1427       {
1428         const unsigned char *spelling;
1429
1430         if (token->flags & DIGRAPH)
1431           spelling
1432             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1433         else if (token->flags & NAMED_OP)
1434           goto spell_ident;
1435         else
1436           spelling = TOKEN_NAME (token);
1437
1438         ufputs (spelling, fp);
1439       }
1440       break;
1441
1442     spell_ident:
1443     case SPELL_IDENT:
1444       ufputs (NODE_NAME (token->val.node), fp);
1445     break;
1446
1447     case SPELL_STRING:
1448       {
1449         int left, right, tag;
1450         switch (token->type)
1451           {
1452           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1453           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1454           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1455           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1456           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1457           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1458           }
1459         if (tag) putc (tag, fp);
1460         if (left) putc (left, fp);
1461         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1462         if (right) putc (right, fp);
1463       }
1464       break;
1465
1466     case SPELL_CHAR:
1467       putc (token->val.c, fp);
1468       break;
1469
1470     case SPELL_NONE:
1471       /* An error, most probably.  */
1472       break;
1473     }
1474 }
1475
1476 /* Compare two tokens.  */
1477 int
1478 _cpp_equiv_tokens (a, b)
1479      const cpp_token *a, *b;
1480 {
1481   if (a->type == b->type && a->flags == b->flags)
1482     switch (TOKEN_SPELL (a))
1483       {
1484       default:                  /* Keep compiler happy.  */
1485       case SPELL_OPERATOR:
1486         return 1;
1487       case SPELL_CHAR:
1488         return a->val.c == b->val.c; /* Character.  */
1489       case SPELL_NONE:
1490         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1491       case SPELL_IDENT:
1492         return a->val.node == b->val.node;
1493       case SPELL_STRING:
1494         return (a->val.str.len == b->val.str.len
1495                 && !memcmp (a->val.str.text, b->val.str.text,
1496                             a->val.str.len));
1497       }
1498
1499   return 0;
1500 }
1501
1502 /* Determine whether two tokens can be pasted together, and if so,
1503    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1504    be pasted, or the appropriate type for the merged token if they
1505    can.  */
1506 enum cpp_ttype
1507 cpp_can_paste (pfile, token1, token2, digraph)
1508      cpp_reader * pfile;
1509      const cpp_token *token1, *token2;
1510      int* digraph;
1511 {
1512   enum cpp_ttype a = token1->type, b = token2->type;
1513   int cxx = CPP_OPTION (pfile, cplusplus);
1514
1515   /* Treat named operators as if they were ordinary NAMEs.  */
1516   if (token1->flags & NAMED_OP)
1517     a = CPP_NAME;
1518   if (token2->flags & NAMED_OP)
1519     b = CPP_NAME;
1520
1521   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1522     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1523
1524   switch (a)
1525     {
1526     case CPP_GREATER:
1527       if (b == a) return CPP_RSHIFT;
1528       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1529       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1530       break;
1531     case CPP_LESS:
1532       if (b == a) return CPP_LSHIFT;
1533       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1534       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1535       if (CPP_OPTION (pfile, digraphs))
1536         {
1537           if (b == CPP_COLON)
1538             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1539           if (b == CPP_MOD)
1540             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1541         }
1542       break;
1543
1544     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1545     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1546     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1547
1548     case CPP_MINUS:
1549       if (b == a)               return CPP_MINUS_MINUS;
1550       if (b == CPP_GREATER)     return CPP_DEREF;
1551       break;
1552     case CPP_COLON:
1553       if (b == a && cxx)        return CPP_SCOPE;
1554       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1555         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1556       break;
1557
1558     case CPP_MOD:
1559       if (CPP_OPTION (pfile, digraphs))
1560         {
1561           if (b == CPP_GREATER)
1562             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1563           if (b == CPP_COLON)
1564             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1565         }
1566       break;
1567     case CPP_DEREF:
1568       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1569       break;
1570     case CPP_DOT:
1571       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1572       if (b == CPP_NUMBER)      return CPP_NUMBER;
1573       break;
1574
1575     case CPP_HASH:
1576       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1577         /* %:%: digraph */
1578         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1579       break;
1580
1581     case CPP_NAME:
1582       if (b == CPP_NAME)        return CPP_NAME;
1583       if (b == CPP_NUMBER
1584           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1585       if (b == CPP_CHAR
1586           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1587       if (b == CPP_STRING
1588           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1589       break;
1590
1591     case CPP_NUMBER:
1592       if (b == CPP_NUMBER)      return CPP_NUMBER;
1593       if (b == CPP_NAME)        return CPP_NUMBER;
1594       if (b == CPP_DOT)         return CPP_NUMBER;
1595       /* Numbers cannot have length zero, so this is safe.  */
1596       if ((b == CPP_PLUS || b == CPP_MINUS)
1597           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1598         return CPP_NUMBER;
1599       break;
1600
1601     default:
1602       break;
1603     }
1604
1605   return CPP_EOF;
1606 }
1607
1608 /* Returns nonzero if a space should be inserted to avoid an
1609    accidental token paste for output.  For simplicity, it is
1610    conservative, and occasionally advises a space where one is not
1611    needed, e.g. "." and ".2".  */
1612
1613 int
1614 cpp_avoid_paste (pfile, token1, token2)
1615      cpp_reader *pfile;
1616      const cpp_token *token1, *token2;
1617 {
1618   enum cpp_ttype a = token1->type, b = token2->type;
1619   cppchar_t c;
1620
1621   if (token1->flags & NAMED_OP)
1622     a = CPP_NAME;
1623   if (token2->flags & NAMED_OP)
1624     b = CPP_NAME;
1625
1626   c = EOF;
1627   if (token2->flags & DIGRAPH)
1628     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1629   else if (token_spellings[b].category == SPELL_OPERATOR)
1630     c = token_spellings[b].name[0];
1631
1632   /* Quickly get everything that can paste with an '='.  */
1633   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1634     return 1;
1635
1636   switch (a)
1637     {
1638     case CPP_GREATER:   return c == '>' || c == '?';
1639     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1640     case CPP_PLUS:      return c == '+';
1641     case CPP_MINUS:     return c == '-' || c == '>';
1642     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1643     case CPP_MOD:       return c == ':' || c == '>';
1644     case CPP_AND:       return c == '&';
1645     case CPP_OR:        return c == '|';
1646     case CPP_COLON:     return c == ':' || c == '>';
1647     case CPP_DEREF:     return c == '*';
1648     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1649     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1650     case CPP_NAME:      return ((b == CPP_NUMBER
1651                                  && name_p (pfile, &token2->val.str))
1652                                 || b == CPP_NAME
1653                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1654     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1655                                 || c == '.' || c == '+' || c == '-');
1656     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1657                                 && token1->val.c == '@'
1658                                 && (b == CPP_NAME || b == CPP_STRING));
1659     default:            break;
1660     }
1661
1662   return 0;
1663 }
1664
1665 /* Output all the remaining tokens on the current line, and a newline
1666    character, to FP.  Leading whitespace is removed.  */
1667 void
1668 cpp_output_line (pfile, fp)
1669      cpp_reader *pfile;
1670      FILE *fp;
1671 {
1672   cpp_token token;
1673
1674   cpp_get_token (pfile, &token);
1675   token.flags &= ~PREV_WHITE;
1676   while (token.type != CPP_EOF)
1677     {
1678       cpp_output_token (&token, fp);
1679       cpp_get_token (pfile, &token);
1680     }
1681
1682   putc ('\n', fp);
1683 }
1684
1685 /* Returns the value of a hexadecimal digit.  */
1686 static unsigned int
1687 hex_digit_value (c)
1688      unsigned int c;
1689 {
1690   if (c >= 'a' && c <= 'f')
1691     return c - 'a' + 10;
1692   if (c >= 'A' && c <= 'F')
1693     return c - 'A' + 10;
1694   if (c >= '0' && c <= '9')
1695     return c - '0';
1696   abort ();
1697 }
1698
1699 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1700    failure if cpplib is not parsing C++ or C99.  Such failure is
1701    silent, and no variables are updated.  Otherwise returns 0, and
1702    warns if -Wtraditional.
1703
1704    [lex.charset]: The character designated by the universal character
1705    name \UNNNNNNNN is that character whose character short name in
1706    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707    universal character name \uNNNN is that character whose character
1708    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1709    for a universal character name is less than 0x20 or in the range
1710    0x7F-0x9F (inclusive), or if the universal character name
1711    designates a character in the basic source character set, then the
1712    program is ill-formed.
1713
1714    We assume that wchar_t is Unicode, so we don't need to do any
1715    mapping.  Is this ever wrong?
1716
1717    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718    LIMIT is the end of the string or charconst.  PSTR is updated to
1719    point after the UCS on return, and the UCS is written into PC.  */
1720
1721 static int
1722 maybe_read_ucs (pfile, pstr, limit, pc)
1723      cpp_reader *pfile;
1724      const unsigned char **pstr;
1725      const unsigned char *limit;
1726      unsigned int *pc;
1727 {
1728   const unsigned char *p = *pstr;
1729   unsigned int code = 0;
1730   unsigned int c = *pc, length;
1731
1732   /* Only attempt to interpret a UCS for C++ and C99.  */
1733   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734     return 1;
1735
1736   if (CPP_WTRADITIONAL (pfile))
1737     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1738
1739   length = (c == 'u' ? 4: 8);
1740
1741   if ((size_t) (limit - p) < length)
1742     {
1743       cpp_error (pfile, "incomplete universal-character-name");
1744       /* Skip to the end to avoid more diagnostics.  */
1745       p = limit;
1746     }
1747   else
1748     {
1749       for (; length; length--, p++)
1750         {
1751           c = *p;
1752           if (ISXDIGIT (c))
1753             code = (code << 4) + hex_digit_value (c);
1754           else
1755             {
1756               cpp_error (pfile,
1757                          "non-hex digit '%c' in universal-character-name", c);
1758               /* We shouldn't skip in case there are multibyte chars.  */
1759               break;
1760             }
1761         }
1762     }
1763
1764 #ifdef TARGET_EBCDIC
1765   cpp_error (pfile, "universal-character-name on EBCDIC target");
1766   code = 0x3f;  /* EBCDIC invalid character */
1767 #else
1768  /* True extended characters are OK.  */
1769   if (code >= 0xa0
1770       && !(code & 0x80000000)
1771       && !(code >= 0xD800 && code <= 0xDFFF))
1772     ;
1773   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1774      hex escapes so that this also works with EBCDIC hosts.  */
1775   else if (code == 0x24 || code == 0x40 || code == 0x60)
1776     ;
1777   /* Don't give another error if one occurred above.  */
1778   else if (length == 0)
1779     cpp_error (pfile, "universal-character-name out of range");
1780 #endif
1781
1782   *pstr = p;
1783   *pc = code;
1784   return 0;
1785 }
1786
1787 /* Interpret an escape sequence, and return its value.  PSTR points to
1788    the input pointer, which is just after the backslash.  LIMIT is how
1789    much text we have.  MASK is a bitmask for the precision for the
1790    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1791    interpret escapes that did not exist in traditional C.
1792
1793    Handles all relevant diagnostics.  */
1794
1795 unsigned int
1796 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1797      cpp_reader *pfile;
1798      const unsigned char **pstr;
1799      const unsigned char *limit;
1800      unsigned HOST_WIDE_INT mask;
1801      int traditional;
1802 {
1803   int unknown = 0;
1804   const unsigned char *str = *pstr;
1805   unsigned int c = *str++;
1806
1807   switch (c)
1808     {
1809     case '\\': case '\'': case '"': case '?': break;
1810     case 'b': c = TARGET_BS;      break;
1811     case 'f': c = TARGET_FF;      break;
1812     case 'n': c = TARGET_NEWLINE; break;
1813     case 'r': c = TARGET_CR;      break;
1814     case 't': c = TARGET_TAB;     break;
1815     case 'v': c = TARGET_VT;      break;
1816
1817     case '(': case '{': case '[': case '%':
1818       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819          '\%' is used to prevent SCCS from getting confused.  */
1820       unknown = CPP_PEDANTIC (pfile);
1821       break;
1822
1823     case 'a':
1824       if (CPP_WTRADITIONAL (pfile))
1825         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1826       if (!traditional)
1827         c = TARGET_BELL;
1828       break;
1829
1830     case 'e': case 'E':
1831       if (CPP_PEDANTIC (pfile))
1832         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1833       c = TARGET_ESC;
1834       break;
1835
1836     case 'u': case 'U':
1837       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1838       break;
1839
1840     case 'x':
1841       if (CPP_WTRADITIONAL (pfile))
1842         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843
1844       if (!traditional)
1845         {
1846           unsigned int i = 0, overflow = 0;
1847           int digits_found = 0;
1848
1849           while (str < limit)
1850             {
1851               c = *str;
1852               if (! ISXDIGIT (c))
1853                 break;
1854               str++;
1855               overflow |= i ^ (i << 4 >> 4);
1856               i = (i << 4) + hex_digit_value (c);
1857               digits_found = 1;
1858             }
1859
1860           if (!digits_found)
1861             cpp_error (pfile, "\\x used with no following hex digits");
1862
1863           if (overflow | (i != (i & mask)))
1864             {
1865               cpp_pedwarn (pfile, "hex escape sequence out of range");
1866               i &= mask;
1867             }
1868           c = i;
1869         }
1870       break;
1871
1872     case '0':  case '1':  case '2':  case '3':
1873     case '4':  case '5':  case '6':  case '7':
1874       {
1875         unsigned int i = c - '0';
1876         int count = 0;
1877
1878         while (str < limit && ++count < 3)
1879           {
1880             c = *str;
1881             if (c < '0' || c > '7')
1882               break;
1883             str++;
1884             i = (i << 3) + c - '0';
1885           }
1886
1887         if (i != (i & mask))
1888           {
1889             cpp_pedwarn (pfile, "octal escape sequence out of range");
1890             i &= mask;
1891           }
1892         c = i;
1893       }
1894       break;
1895
1896     default:
1897       unknown = 1;
1898       break;
1899     }
1900
1901   if (unknown)
1902     {
1903       if (ISGRAPH (c))
1904         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1905       else
1906         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907     }
1908
1909   if (c > mask)
1910     cpp_pedwarn (pfile, "escape sequence out of range for character");
1911
1912   *pstr = str;
1913   return c;
1914 }
1915
1916 #ifndef MAX_CHAR_TYPE_SIZE
1917 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918 #endif
1919
1920 #ifndef MAX_WCHAR_TYPE_SIZE
1921 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922 #endif
1923
1924 /* Interpret a (possibly wide) character constant in TOKEN.
1925    WARN_MULTI warns about multi-character charconsts, if not
1926    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1927    that did not exist in traditional C.  PCHARS_SEEN points to a
1928    variable that is filled in with the number of characters seen.  */
1929 HOST_WIDE_INT
1930 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1931      cpp_reader *pfile;
1932      const cpp_token *token;
1933      int warn_multi;
1934      int traditional;
1935      unsigned int *pchars_seen;
1936 {
1937   const unsigned char *str = token->val.str.text;
1938   const unsigned char *limit = str + token->val.str.len;
1939   unsigned int chars_seen = 0;
1940   unsigned int width, max_chars, c;
1941   unsigned HOST_WIDE_INT mask;
1942   HOST_WIDE_INT result = 0;
1943
1944 #ifdef MULTIBYTE_CHARS
1945   (void) local_mbtowc (NULL, NULL, 0);
1946 #endif
1947
1948   /* Width in bits.  */
1949   if (token->type == CPP_CHAR)
1950     width = MAX_CHAR_TYPE_SIZE;
1951   else
1952     width = MAX_WCHAR_TYPE_SIZE;
1953
1954   if (width < HOST_BITS_PER_WIDE_INT)
1955     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956   else
1957     mask = ~0;
1958   max_chars = HOST_BITS_PER_WIDE_INT / width;
1959
1960   while (str < limit)
1961     {
1962 #ifdef MULTIBYTE_CHARS
1963       wchar_t wc;
1964       int char_len;
1965
1966       char_len = local_mbtowc (&wc, str, limit - str);
1967       if (char_len == -1)
1968         {
1969           cpp_warning (pfile, "ignoring invalid multibyte character");
1970           c = *str++;
1971         }
1972       else
1973         {
1974           str += char_len;
1975           c = wc;
1976         }
1977 #else
1978       c = *str++;
1979 #endif
1980
1981       if (c == '\\')
1982         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1983
1984 #ifdef MAP_CHARACTER
1985       if (ISPRINT (c))
1986         c = MAP_CHARACTER (c);
1987 #endif
1988
1989       /* Merge character into result; ignore excess chars.  */
1990       if (++chars_seen <= max_chars)
1991         {
1992           if (width < HOST_BITS_PER_WIDE_INT)
1993             result = (result << width) | (c & mask);
1994           else
1995             result = c;
1996         }
1997     }
1998
1999   if (chars_seen == 0)
2000     cpp_error (pfile, "empty character constant");
2001   else if (chars_seen > max_chars)
2002     {
2003       chars_seen = max_chars;
2004       cpp_warning (pfile, "character constant too long");
2005     }
2006   else if (chars_seen > 1 && !traditional && warn_multi)
2007     cpp_warning (pfile, "multi-character character constant");
2008
2009   /* If char type is signed, sign-extend the constant.  The
2010      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2011   if (token->type == CPP_CHAR && chars_seen)
2012     {
2013       unsigned int nbits = chars_seen * width;
2014       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2015
2016       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017           || ((result >> (nbits - 1)) & 1) == 0)
2018         result &= mask;
2019       else
2020         result |= ~mask;
2021     }
2022
2023   *pchars_seen = chars_seen;
2024   return result;
2025 }
2026
2027 /* Memory pools.  */
2028
2029 struct dummy
2030 {
2031   char c;
2032   union
2033   {
2034     double d;
2035     int *p;
2036   } u;
2037 };
2038
2039 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2040
2041 static int
2042 chunk_suitable (pool, chunk, size)
2043      cpp_pool *pool;
2044      cpp_chunk *chunk;
2045      unsigned int size;
2046 {
2047   /* Being at least twice SIZE means we can use memcpy in
2048      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2049      anyway.  */
2050   return (chunk && pool->locked != chunk
2051           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2052 }
2053
2054 /* Returns the end of the new pool.  PTR points to a char in the old
2055    pool, and is updated to point to the same char in the new pool.  */
2056 unsigned char *
2057 _cpp_next_chunk (pool, len, ptr)
2058      cpp_pool *pool;
2059      unsigned int len;
2060      unsigned char **ptr;
2061 {
2062   cpp_chunk *chunk = pool->cur->next;
2063
2064   /* LEN is the minimum size we want in the new pool.  */
2065   len += POOL_ROOM (pool);
2066   if (! chunk_suitable (pool, chunk, len))
2067     {
2068       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2069
2070       chunk->next = pool->cur->next;
2071       pool->cur->next = chunk;
2072     }
2073
2074   /* Update the pointer before changing chunk's front.  */
2075   if (ptr)
2076     *ptr += chunk->base - POOL_FRONT (pool);
2077
2078   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2079   chunk->front = chunk->base;
2080
2081   pool->cur = chunk;
2082   return POOL_LIMIT (pool);
2083 }
2084
2085 static cpp_chunk *
2086 new_chunk (size)
2087      unsigned int size;
2088 {
2089   unsigned char *base;
2090   cpp_chunk *result;
2091
2092   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2093   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2094   /* Put the chunk descriptor at the end.  Then chunk overruns will
2095      cause obvious chaos.  */
2096   result = (cpp_chunk *) (base + size);
2097   result->base = base;
2098   result->front = base;
2099   result->limit = base + size;
2100   result->next = 0;
2101
2102   return result;
2103 }
2104
2105 void
2106 _cpp_init_pool (pool, size, align, temp)
2107      cpp_pool *pool;
2108      unsigned int size, align, temp;
2109 {
2110   if (align == 0)
2111     align = DEFAULT_ALIGNMENT;
2112   if (align & (align - 1))
2113     abort ();
2114   pool->align = align;
2115   pool->first = new_chunk (size);
2116   pool->cur = pool->first;
2117   pool->locked = 0;
2118   pool->locks = 0;
2119   if (temp)
2120     pool->cur->next = pool->cur;
2121 }
2122
2123 void
2124 _cpp_lock_pool (pool)
2125      cpp_pool *pool;
2126 {
2127   if (pool->locks++ == 0)
2128     pool->locked = pool->cur;
2129 }
2130
2131 void
2132 _cpp_unlock_pool (pool)
2133      cpp_pool *pool;
2134 {
2135   if (--pool->locks == 0)
2136     pool->locked = 0;
2137 }
2138
2139 void
2140 _cpp_free_pool (pool)
2141      cpp_pool *pool;
2142 {
2143   cpp_chunk *chunk = pool->first, *next;
2144
2145   do
2146     {
2147       next = chunk->next;
2148       free (chunk->base);
2149       chunk = next;
2150     }
2151   while (chunk && chunk != pool->first);
2152 }
2153
2154 /* Reserve LEN bytes from a memory pool.  */
2155 unsigned char *
2156 _cpp_pool_reserve (pool, len)
2157      cpp_pool *pool;
2158      unsigned int len;
2159 {
2160   len = POOL_ALIGN (len, pool->align);
2161   if (len > (unsigned int) POOL_ROOM (pool))
2162     _cpp_next_chunk (pool, len, 0);
2163
2164   return POOL_FRONT (pool);
2165 }
2166
2167 /* Allocate LEN bytes from a memory pool.  */
2168 unsigned char *
2169 _cpp_pool_alloc (pool, len)
2170      cpp_pool *pool;
2171      unsigned int len;
2172 {
2173   unsigned char *result = _cpp_pool_reserve (pool, len);
2174
2175   POOL_COMMIT (pool, len);
2176   return result;
2177 }