gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (pfile, newline_char)
 128      cpp_reader *pfile;
 129      cppchar_t newline_char;
 130 {
 131   cpp_buffer *buffer;
 132   cppchar_t next = EOF;
 133
 134   pfile->line++;
 135   buffer = pfile->buffer;
 136   buffer->col_adjust = 0;
 137   buffer->line_base = buffer->cur;
 138
 139   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 140   if (buffer->cur < buffer->rlimit)
 141     {
 142       next = *buffer->cur++;
 143       if (next + newline_char == '\r' + '\n')
 144         {
 145           buffer->line_base = buffer->cur;
 146           if (buffer->cur < buffer->rlimit)
 147             next = *buffer->cur++;
 148           else
 149             next = EOF;
 150         }
 151     }
 152
 153   buffer->read_ahead = next;
 154   return next;
 155 }
 156
 157 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 158    encountered.  It warns if necessary, and returns true if the
 159    trigraph should be honoured.  FROM_CHAR is the third character of a
 160    trigraph, and presumed to be the previous character for position
 161    reporting.  */
 162 static int
 163 trigraph_ok (pfile, from_char)
 164      cpp_reader *pfile;
 165      cppchar_t from_char;
 166 {
 167   int accept = CPP_OPTION (pfile, trigraphs);
 168
 169   /* Don't warn about trigraphs in comments.  */
 170   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 171     {
 172       cpp_buffer *buffer = pfile->buffer;
 173
 174       if (accept)
 175         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 176                                "trigraph ??%c converted to %c",
 177                                (int) from_char,
 178                                (int) _cpp_trigraph_map[from_char]);
 179       else if (buffer->cur != buffer->last_Wtrigraphs)
 180         {
 181           buffer->last_Wtrigraphs = buffer->cur;
 182           cpp_warning_with_line (pfile, pfile->line,
 183                                  CPP_BUF_COL (buffer) - 2,
 184                                  "trigraph ??%c ignored", (int) from_char);
 185         }
 186     }
 187
 188   return accept;
 189 }
 190
 191 /* Assumes local variables buffer and result.  */
 192 #define ACCEPT_CHAR(t) \
 193   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 194
 195 /* When we move to multibyte character sets, add to these something
 196    that saves and restores the state of the multibyte conversion
 197    library.  This probably involves saving and restoring a "cookie".
 198    In the case of glibc it is an 8-byte structure, so is not a high
 199    overhead operation.  In any case, it's out of the fast path.  */
 200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 202
 203 /* Skips any escaped newlines introduced by NEXT, which is either a
 204    '?' or a '\\'.  Returns the next character, which will also have
 205    been placed in buffer->read_ahead.  This routine performs
 206    preprocessing stages 1 and 2 of the ISO C standard.  */
 207 static cppchar_t
 208 skip_escaped_newlines (buffer, next)
 209      cpp_buffer *buffer;
 210      cppchar_t next;
 211 {
 212   /* Only do this if we apply stages 1 and 2.  */
 213   if (!buffer->from_stage3)
 214     {
 215       cppchar_t next1;
 216       const unsigned char *saved_cur;
 217       int space;
 218
 219       do
 220         {
 221           if (buffer->cur == buffer->rlimit)
 222             break;
 223
 224           SAVE_STATE ();
 225           if (next == '?')
 226             {
 227               next1 = *buffer->cur++;
 228               if (next1 != '?' || buffer->cur == buffer->rlimit)
 229                 {
 230                   RESTORE_STATE ();
 231                   break;
 232                 }
 233
 234               next1 = *buffer->cur++;
 235               if (!_cpp_trigraph_map[next1]
 236                   || !trigraph_ok (buffer->pfile, next1))
 237                 {
 238                   RESTORE_STATE ();
 239                   break;
 240                 }
 241
 242               /* We have a full trigraph here.  */
 243               next = _cpp_trigraph_map[next1];
 244               if (next != '\\' || buffer->cur == buffer->rlimit)
 245                 break;
 246               SAVE_STATE ();
 247             }
 248
 249           /* We have a backslash, and room for at least one more character.  */
 250           space = 0;
 251           do
 252             {
 253               next1 = *buffer->cur++;
 254               if (!is_nvspace (next1))
 255                 break;
 256               space = 1;
 257             }
 258           while (buffer->cur < buffer->rlimit);
 259
 260           if (!is_vspace (next1))
 261             {
 262               RESTORE_STATE ();
 263               break;
 264             }
 265
 266           if (space && !buffer->pfile->state.lexing_comment)
 267             cpp_warning (buffer->pfile,
 268                          "backslash and newline separated by space");
 269
 270           next = handle_newline (buffer->pfile, next1);
 271           if (next == EOF)
 272             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 273         }
 274       while (next == '\\' || next == '?');
 275     }
 276
 277   buffer->read_ahead = next;
 278   return next;
 279 }
 280
 281 /* Obtain the next character, after trigraph conversion and skipping
 282    an arbitrary string of escaped newlines.  The common case of no
 283    trigraphs or escaped newlines falls through quickly.  */
 284 static cppchar_t
 285 get_effective_char (buffer)
 286      cpp_buffer *buffer;
 287 {
 288   cppchar_t next = EOF;
 289
 290   if (buffer->cur < buffer->rlimit)
 291     {
 292       next = *buffer->cur++;
 293
 294       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 295          can introduce escaped newlines, which we want to skip, or
 296          UCNs, which, depending upon lexer state, we will handle in
 297          the future.  */
 298       if (next == '?' || next == '\\')
 299         next = skip_escaped_newlines (buffer, next);
 300     }
 301
 302   buffer->read_ahead = next;
 303   return next;
 304 }
 305
 306 /* Skip a C-style block comment.  We find the end of the comment by
 307    seeing if an asterisk is before every '/' we encounter.  Returns
 308    non-zero if comment terminated by EOF, zero otherwise.  */
 309 static int
 310 skip_block_comment (pfile)
 311      cpp_reader *pfile;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   cppchar_t c = EOF, prevc = EOF;
 315
 316   pfile->state.lexing_comment = 1;
 317   while (buffer->cur != buffer->rlimit)
 318     {
 319       prevc = c, c = *buffer->cur++;
 320
 321     next_char:
 322       /* FIXME: For speed, create a new character class of characters
 323          of interest inside block comments.  */
 324       if (c == '?' || c == '\\')
 325         c = skip_escaped_newlines (buffer, c);
 326
 327       /* People like decorating comments with '*', so check for '/'
 328          instead for efficiency.  */
 329       if (c == '/')
 330         {
 331           if (prevc == '*')
 332             break;
 333
 334           /* Warn about potential nested comments, but not if the '/'
 335              comes immediately before the true comment delimeter.
 336              Don't bother to get it right across escaped newlines.  */
 337           if (CPP_OPTION (pfile, warn_comments)
 338               && buffer->cur != buffer->rlimit)
 339             {
 340               prevc = c, c = *buffer->cur++;
 341               if (c == '*' && buffer->cur != buffer->rlimit)
 342                 {
 343                   prevc = c, c = *buffer->cur++;
 344                   if (c != '/')
 345                     cpp_warning_with_line (pfile, pfile->line,
 346                                            CPP_BUF_COL (buffer) - 2,
 347                                            "\"/*\" within comment");
 348                 }
 349               goto next_char;
 350             }
 351         }
 352       else if (is_vspace (c))
 353         {
 354           prevc = c, c = handle_newline (pfile, c);
 355           goto next_char;
 356         }
 357       else if (c == '\t')
 358         adjust_column (pfile);
 359     }
 360
 361   pfile->state.lexing_comment = 0;
 362   buffer->read_ahead = EOF;
 363   return c != '/' || prevc != '*';
 364 }
 365
 366 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 367    non-zero if a multiline comment.  The following new line, if any,
 368    is left in buffer->read_ahead.  */
 369 static int
 370 skip_line_comment (pfile)
 371      cpp_reader *pfile;
 372 {
 373   cpp_buffer *buffer = pfile->buffer;
 374   unsigned int orig_line = pfile->line;
 375   cppchar_t c;
 376
 377   pfile->state.lexing_comment = 1;
 378   do
 379     {
 380       c = EOF;
 381       if (buffer->cur == buffer->rlimit)
 382         break;
 383
 384       c = *buffer->cur++;
 385       if (c == '?' || c == '\\')
 386         c = skip_escaped_newlines (buffer, c);
 387     }
 388   while (!is_vspace (c));
 389
 390   pfile->state.lexing_comment = 0;
 391   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 392   return orig_line != pfile->line;
 393 }
 394
 395 /* pfile->buffer->cur is one beyond the \t character.  Update
 396    col_adjust so we track the column correctly.  */
 397 static void
 398 adjust_column (pfile)
 399      cpp_reader *pfile;
 400 {
 401   cpp_buffer *buffer = pfile->buffer;
 402   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 403
 404   /* Round it up to multiple of the tabstop, but subtract 1 since the
 405      tab itself occupies a character position.  */
 406   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 407                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 408 }
 409
 410 /* Skips whitespace, saving the next non-whitespace character.
 411    Adjusts pfile->col_adjust to account for tabs.  Without this,
 412    tokens might be assigned an incorrect column.  */
 413 static void
 414 skip_whitespace (pfile, c)
 415      cpp_reader *pfile;
 416      cppchar_t c;
 417 {
 418   cpp_buffer *buffer = pfile->buffer;
 419   unsigned int warned = 0;
 420
 421   do
 422     {
 423       /* Horizontal space always OK.  */
 424       if (c == ' ')
 425         ;
 426       else if (c == '\t')
 427         adjust_column (pfile);
 428       /* Just \f \v or \0 left.  */
 429       else if (c == '\0')
 430         {
 431           if (!warned)
 432             {
 433               cpp_warning (pfile, "null character(s) ignored");
 434               warned = 1;
 435             }
 436         }
 437       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 438         cpp_pedwarn_with_line (pfile, pfile->line,
 439                                CPP_BUF_COL (buffer),
 440                                "%s in preprocessing directive",
 441                                c == '\f' ? "form feed" : "vertical tab");
 442
 443       c = EOF;
 444       if (buffer->cur == buffer->rlimit)
 445         break;
 446       c = *buffer->cur++;
 447     }
 448   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 449   while (is_nvspace (c));
 450
 451   /* Remember the next character.  */
 452   buffer->read_ahead = c;
 453 }
 454
 455 /* See if the characters of a number token are valid in a name (no
 456    '.', '+' or '-').  */
 457 static int
 458 name_p (pfile, string)
 459      cpp_reader *pfile;
 460      const cpp_string *string;
 461 {
 462   unsigned int i;
 463
 464   for (i = 0; i < string->len; i++)
 465     if (!is_idchar (string->text[i]))
 466       return 0;
 467
 468   return 1;
 469 }
 470
 471 /* Parse an identifier, skipping embedded backslash-newlines.
 472    Calculate the hash value of the token while parsing, for improved
 473    performance.  The hashing algorithm *must* match cpp_lookup().  */
 474
 475 static cpp_hashnode *
 476 parse_identifier (pfile, c)
 477      cpp_reader *pfile;
 478      cppchar_t c;
 479 {
 480   cpp_hashnode *result;
 481   cpp_buffer *buffer = pfile->buffer;
 482   unsigned int saw_dollar = 0, len;
 483   struct obstack *stack = &pfile->hash_table->stack;
 484
 485   do
 486     {
 487       do
 488         {
 489           obstack_1grow (stack, c);
 490
 491           if (c == '$')
 492             saw_dollar++;
 493
 494           c = EOF;
 495           if (buffer->cur == buffer->rlimit)
 496             break;
 497
 498           c = *buffer->cur++;
 499         }
 500       while (is_idchar (c));
 501
 502       /* Potential escaped newline?  */
 503       if (c != '?' && c != '\\')
 504         break;
 505       c = skip_escaped_newlines (buffer, c);
 506     }
 507   while (is_idchar (c));
 508
 509   /* Remember the next character.  */
 510   buffer->read_ahead = c;
 511
 512   /* $ is not a identifier character in the standard, but is commonly
 513      accepted as an extension.  Don't warn about it in skipped
 514      conditional blocks.  */
 515   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 516     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 517
 518   /* Identifiers are null-terminated.  */
 519   len = obstack_object_size (stack);
 520   obstack_1grow (stack, '\0');
 521
 522   /* This routine commits the memory if necessary.  */
 523   result = (cpp_hashnode *)
 524     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 525
 526   /* Some identifiers require diagnostics when lexed.  */
 527   if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
 528     {
 529       /* It is allowed to poison the same identifier twice.  */
 530       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 531         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 532                    NODE_NAME (result));
 533
 534       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 535          replacement list of a variadic macro.  */
 536       if (result == pfile->spec_nodes.n__VA_ARGS__
 537           && !pfile->state.va_args_ok)
 538         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 539     }
 540
 541   return result;
 542 }
 543
 544 /* Parse a number, skipping embedded backslash-newlines.  */
 545 static void
 546 parse_number (pfile, number, c, leading_period)
 547      cpp_reader *pfile;
 548      cpp_string *number;
 549      cppchar_t c;
 550      int leading_period;
 551 {
 552   cpp_buffer *buffer = pfile->buffer;
 553   cpp_pool *pool = &pfile->ident_pool;
 554   unsigned char *dest, *limit;
 555
 556   dest = POOL_FRONT (pool);
 557   limit = POOL_LIMIT (pool);
 558
 559   /* Place a leading period.  */
 560   if (leading_period)
 561     {
 562       if (dest >= limit)
 563         limit = _cpp_next_chunk (pool, 0, &dest);
 564       *dest++ = '.';
 565     }
 566
 567   do
 568     {
 569       do
 570         {
 571           /* Need room for terminating null.  */
 572           if (dest + 1 >= limit)
 573             limit = _cpp_next_chunk (pool, 0, &dest);
 574           *dest++ = c;
 575
 576           c = EOF;
 577           if (buffer->cur == buffer->rlimit)
 578             break;
 579
 580           c = *buffer->cur++;
 581         }
 582       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 583
 584       /* Potential escaped newline?  */
 585       if (c != '?' && c != '\\')
 586         break;
 587       c = skip_escaped_newlines (buffer, c);
 588     }
 589   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591   /* Remember the next character.  */
 592   buffer->read_ahead = c;
 593
 594   /* Null-terminate the number.  */
 595   *dest = '\0';
 596
 597   number->text = POOL_FRONT (pool);
 598   number->len = dest - number->text;
 599   POOL_COMMIT (pool, number->len + 1);
 600 }
 601
 602 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 603 static void
 604 unterminated (pfile, term)
 605      cpp_reader *pfile;
 606      int term;
 607 {
 608   cpp_error (pfile, "missing terminating %c character", term);
 609
 610   if (term == '\"' && pfile->mlstring_pos.line
 611       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 612     {
 613       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 614                            pfile->mlstring_pos.col,
 615                            "possible start of unterminated string literal");
 616       pfile->mlstring_pos.line = 0;
 617     }
 618 }
 619
 620 /* Subroutine of parse_string.  */
 621 static int
 622 unescaped_terminator_p (pfile, dest)
 623      cpp_reader *pfile;
 624      const unsigned char *dest;
 625 {
 626   const unsigned char *start, *temp;
 627
 628   /* In #include-style directives, terminators are not escapeable.  */
 629   if (pfile->state.angled_headers)
 630     return 1;
 631
 632   start = POOL_FRONT (&pfile->ident_pool);
 633
 634   /* An odd number of consecutive backslashes represents an escaped
 635      terminator.  */
 636   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 637     ;
 638
 639   return ((dest - temp) & 1) == 0;
 640 }
 641
 642 /* Parses a string, character constant, or angle-bracketed header file
 643    name.  Handles embedded trigraphs and escaped newlines.  The stored
 644    string is guaranteed NUL-terminated, but it is not guaranteed that
 645    this is the first NUL since embedded NULs are preserved.
 646
 647    Multi-line strings are allowed, but they are deprecated.  */
 648 static void
 649 parse_string (pfile, token, terminator)
 650      cpp_reader *pfile;
 651      cpp_token *token;
 652      cppchar_t terminator;
 653 {
 654   cpp_buffer *buffer = pfile->buffer;
 655   cpp_pool *pool = &pfile->ident_pool;
 656   unsigned char *dest, *limit;
 657   cppchar_t c;
 658   bool warned_nulls = false, warned_multi = false;
 659
 660   dest = POOL_FRONT (pool);
 661   limit = POOL_LIMIT (pool);
 662
 663   for (;;)
 664     {
 665       if (buffer->cur == buffer->rlimit)
 666         c = EOF;
 667       else
 668         c = *buffer->cur++;
 669
 670     have_char:
 671       /* We need space for the terminating NUL.  */
 672       if (dest >= limit)
 673         limit = _cpp_next_chunk (pool, 0, &dest);
 674
 675       if (c == EOF)
 676         {
 677           unterminated (pfile, terminator);
 678           break;
 679         }
 680
 681       /* Handle trigraphs, escaped newlines etc.  */
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (buffer, c);
 684
 685       if (c == terminator && unescaped_terminator_p (pfile, dest))
 686         {
 687           c = EOF;
 688           break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* In assembly language, silently terminate string and
 693              character literals at end of line.  This is a kludge
 694              around not knowing where comments are.  */
 695           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 696             break;
 697
 698           /* Character constants and header names may not extend over
 699              multiple lines.  In Standard C, neither may strings.
 700              Unfortunately, we accept multiline strings as an
 701              extension, except in #include family directives.  */
 702           if (terminator != '"' || pfile->state.angled_headers)
 703             {
 704               unterminated (pfile, terminator);
 705               break;
 706             }
 707
 708           if (!warned_multi)
 709             {
 710               warned_multi = true;
 711               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 712             }
 713
 714           if (pfile->mlstring_pos.line == 0)
 715             pfile->mlstring_pos = pfile->lexer_pos;
 716
 717           c = handle_newline (pfile, c);
 718           *dest++ = '\n';
 719           goto have_char;
 720         }
 721       else if (c == '\0' && !warned_nulls)
 722         {
 723           warned_nulls = true;
 724           cpp_warning (pfile, "null character(s) preserved in literal");
 725         }
 726
 727       *dest++ = c;
 728     }
 729
 730   /* Remember the next character.  */
 731   buffer->read_ahead = c;
 732   *dest = '\0';
 733
 734   token->val.str.text = POOL_FRONT (pool);
 735   token->val.str.len = dest - token->val.str.text;
 736   POOL_COMMIT (pool, token->val.str.len + 1);
 737 }
 738
 739 /* The stored comment includes the comment start and any terminator.  */
 740 static void
 741 save_comment (pfile, token, from)
 742      cpp_reader *pfile;
 743      cpp_token *token;
 744      const unsigned char *from;
 745 {
 746   unsigned char *buffer;
 747   unsigned int len;
 748
 749   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 750   /* C++ comments probably (not definitely) have moved past a new
 751      line, which we don't want to save in the comment.  */
 752   if (pfile->buffer->read_ahead != EOF)
 753     len--;
 754   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 755
 756   token->type = CPP_COMMENT;
 757   token->val.str.len = len;
 758   token->val.str.text = buffer;
 759
 760   buffer[0] = '/';
 761   memcpy (buffer + 1, from, len - 1);
 762 }
 763
 764 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 765    want to avoid stepping back when lexing %:%X.  */
 766 static void
 767 lex_percent (buffer, result)
 768      cpp_buffer *buffer;
 769      cpp_token *result;
 770 {
 771   cppchar_t c;
 772
 773   result->type = CPP_MOD;
 774   /* Parsing %:%X could leave an extra character.  */
 775   if (buffer->extra_char == EOF)
 776     c = get_effective_char (buffer);
 777   else
 778     {
 779       c = buffer->read_ahead = buffer->extra_char;
 780       buffer->extra_char = EOF;
 781     }
 782
 783   if (c == '=')
 784     ACCEPT_CHAR (CPP_MOD_EQ);
 785   else if (CPP_OPTION (buffer->pfile, digraphs))
 786     {
 787       if (c == ':')
 788         {
 789           result->flags |= DIGRAPH;
 790           ACCEPT_CHAR (CPP_HASH);
 791           if (get_effective_char (buffer) == '%')
 792             {
 793               buffer->extra_char = get_effective_char (buffer);
 794               if (buffer->extra_char == ':')
 795                 {
 796                   buffer->extra_char = EOF;
 797                   ACCEPT_CHAR (CPP_PASTE);
 798                 }
 799               else
 800                 /* We'll catch the extra_char when we're called back.  */
 801                 buffer->read_ahead = '%';
 802             }
 803         }
 804       else if (c == '>')
 805         {
 806           result->flags |= DIGRAPH;
 807           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 808         }
 809     }
 810 }
 811
 812 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 813    want to avoid stepping back when lexing '...' or '.123'.  In the
 814    latter case we should also set a flag for parse_number.  */
 815 static void
 816 lex_dot (pfile, result)
 817      cpp_reader *pfile;
 818      cpp_token *result;
 819 {
 820   cpp_buffer *buffer = pfile->buffer;
 821   cppchar_t c;
 822
 823   /* Parsing ..X could leave an extra character.  */
 824   if (buffer->extra_char == EOF)
 825     c = get_effective_char (buffer);
 826   else
 827     {
 828       c = buffer->read_ahead = buffer->extra_char;
 829       buffer->extra_char = EOF;
 830     }
 831
 832   /* All known character sets have 0...9 contiguous.  */
 833   if (c >= '0' && c <= '9')
 834     {
 835       result->type = CPP_NUMBER;
 836       parse_number (pfile, &result->val.str, c, 1);
 837     }
 838   else
 839     {
 840       result->type = CPP_DOT;
 841       if (c == '.')
 842         {
 843           buffer->extra_char = get_effective_char (buffer);
 844           if (buffer->extra_char == '.')
 845             {
 846               buffer->extra_char = EOF;
 847               ACCEPT_CHAR (CPP_ELLIPSIS);
 848             }
 849           else
 850             /* We'll catch the extra_char when we're called back.  */
 851             buffer->read_ahead = '.';
 852         }
 853       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 854         ACCEPT_CHAR (CPP_DOT_STAR);
 855     }
 856 }
 857
 858 void
 859 _cpp_lex_token (pfile, result)
 860      cpp_reader *pfile;
 861      cpp_token *result;
 862 {
 863   cppchar_t c;
 864   cpp_buffer *buffer;
 865   const unsigned char *comment_start;
 866   int bol;
 867
 868  next_token:
 869   buffer = pfile->buffer;
 870   result->flags = buffer->saved_flags;
 871   buffer->saved_flags = 0;
 872   bol = (buffer->cur <= buffer->line_base + 1
 873          && pfile->lexer_pos.output_line == pfile->line);
 874  next_char:
 875   pfile->lexer_pos.line = pfile->line;
 876   result->line = pfile->line;
 877  next_char2:
 878   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 879
 880   c = buffer->read_ahead;
 881   if (c == EOF && buffer->cur < buffer->rlimit)
 882     {
 883       c = *buffer->cur++;
 884       pfile->lexer_pos.col++;
 885     }
 886   result->col = pfile->lexer_pos.col;
 887
 888  do_switch:
 889   buffer->read_ahead = EOF;
 890   switch (c)
 891     {
 892     case EOF:
 893       /* To prevent bogus diagnostics, only pop the buffer when
 894          in-progress directives and arguments have been taken care of.
 895          Decrement the line to terminate an in-progress directive.  */
 896       if (pfile->state.in_directive)
 897         pfile->lexer_pos.output_line = pfile->line--;
 898       else if (! pfile->state.parsing_args)
 899         {
 900           /* Non-empty files should end in a newline.  Don't warn for
 901              command line and _Pragma buffers.  */
 902           if (pfile->lexer_pos.col != 0)
 903             {
 904               /* Account for the missing \n, prevent multiple warnings.  */
 905               pfile->line++;
 906               pfile->lexer_pos.col = 0;
 907               if (!buffer->from_stage3)
 908                 cpp_pedwarn (pfile, "no newline at end of file");
 909             }
 910
 911           /* Don't pop the last file.  */
 912           if (buffer->prev)
 913             {
 914               unsigned char stop = buffer->return_at_eof;
 915
 916               _cpp_pop_buffer (pfile);
 917               if (!stop)
 918                 goto next_token;
 919             }
 920         }
 921       result->type = CPP_EOF;
 922       return;
 923
 924     case ' ': case '\t': case '\f': case '\v': case '\0':
 925       skip_whitespace (pfile, c);
 926       result->flags |= PREV_WHITE;
 927       goto next_char2;
 928
 929     case '\n': case '\r':
 930       if (pfile->state.in_directive)
 931         {
 932           result->type = CPP_EOF;
 933           if (pfile->state.parsing_args)
 934             buffer->read_ahead = c;
 935           else
 936             {
 937               handle_newline (pfile, c);
 938               /* Decrementing pfile->line allows directives to
 939                  recognise that the newline has been seen, and also
 940                  means that diagnostics don't point to the next line.  */
 941               pfile->lexer_pos.output_line = pfile->line--;
 942             }
 943           return;
 944         }
 945
 946       handle_newline (pfile, c);
 947       /* This is a new line, so clear any white space flag.  Newlines
 948          in arguments are white space (6.10.3.10); parse_arg takes
 949          care of that.  */
 950       result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 951       bol = 1;
 952       if (pfile->state.parsing_args != 2)
 953         pfile->lexer_pos.output_line = pfile->line;
 954       goto next_char;
 955
 956     case '?':
 957     case '\\':
 958       /* These could start an escaped newline, or '?' a trigraph.  Let
 959          skip_escaped_newlines do all the work.  */
 960       {
 961         unsigned int line = pfile->line;
 962
 963         c = skip_escaped_newlines (buffer, c);
 964         if (line != pfile->line)
 965           /* We had at least one escaped newline of some sort, and the
 966              next character is in buffer->read_ahead.  Update the
 967              token's line and column.  */
 968             goto next_char;
 969
 970         /* We are either the original '?' or '\\', or a trigraph.  */
 971         result->type = CPP_QUERY;
 972         buffer->read_ahead = EOF;
 973         if (c == '\\')
 974           goto random_char;
 975         else if (c != '?')
 976           goto do_switch;
 977       }
 978       break;
 979
 980     case '0': case '1': case '2': case '3': case '4':
 981     case '5': case '6': case '7': case '8': case '9':
 982       result->type = CPP_NUMBER;
 983       parse_number (pfile, &result->val.str, c, 0);
 984       break;
 985
 986     case '$':
 987       if (!CPP_OPTION (pfile, dollars_in_ident))
 988         goto random_char;
 989       /* Fall through...  */
 990
 991     case '_':
 992     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 993     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 994     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 995     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 996     case 'y': case 'z':
 997     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 998     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 999     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1000     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1001     case 'Y': case 'Z':
1002       result->type = CPP_NAME;
1003       result->val.node = parse_identifier (pfile, c);
1004
1005       /* 'L' may introduce wide characters or strings.  */
1006       if (result->val.node == pfile->spec_nodes.n_L)
1007         {
1008           c = buffer->read_ahead; /* For make_string.  */
1009           if (c == '\'' || c == '"')
1010             {
1011               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1012               goto make_string;
1013             }
1014         }
1015       /* Convert named operators to their proper types.  */
1016       else if (result->val.node->flags & NODE_OPERATOR)
1017         {
1018           result->flags |= NAMED_OP;
1019           result->type = result->val.node->value.operator;
1020         }
1021       break;
1022
1023     case '\'':
1024     case '"':
1025       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1026     make_string:
1027       parse_string (pfile, result, c);
1028       break;
1029
1030     case '/':
1031       /* A potential block or line comment.  */
1032       comment_start = buffer->cur;
1033       result->type = CPP_DIV;
1034       c = get_effective_char (buffer);
1035       if (c == '=')
1036         ACCEPT_CHAR (CPP_DIV_EQ);
1037       if (c != '/' && c != '*')
1038         break;
1039
1040       if (c == '*')
1041         {
1042           if (skip_block_comment (pfile))
1043             cpp_error (pfile, "unterminated comment");
1044         }
1045       else
1046         {
1047           if (!CPP_OPTION (pfile, cplusplus_comments)
1048               && !CPP_IN_SYSTEM_HEADER (pfile))
1049             break;
1050
1051           /* Warn about comments only if pedantically GNUC89, and not
1052              in system headers.  */
1053           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1054               && ! buffer->warned_cplusplus_comments)
1055             {
1056               cpp_pedwarn (pfile,
1057                            "C++ style comments are not allowed in ISO C89");
1058               cpp_pedwarn (pfile,
1059                            "(this will be reported only once per input file)");
1060               buffer->warned_cplusplus_comments = 1;
1061             }
1062
1063           /* Skip_line_comment updates buffer->read_ahead.  */
1064           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1065             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1066                                    pfile->lexer_pos.col,
1067                                    "multi-line comment");
1068         }
1069
1070       /* Skipping the comment has updated buffer->read_ahead.  */
1071       if (!pfile->state.save_comments)
1072         {
1073           result->flags |= PREV_WHITE;
1074           goto next_char;
1075         }
1076
1077       /* Save the comment as a token in its own right.  */
1078       save_comment (pfile, result, comment_start);
1079       /* Don't do MI optimisation.  */
1080       return;
1081
1082     case '<':
1083       if (pfile->state.angled_headers)
1084         {
1085           result->type = CPP_HEADER_NAME;
1086           c = '>';              /* terminator.  */
1087           goto make_string;
1088         }
1089
1090       result->type = CPP_LESS;
1091       c = get_effective_char (buffer);
1092       if (c == '=')
1093         ACCEPT_CHAR (CPP_LESS_EQ);
1094       else if (c == '<')
1095         {
1096           ACCEPT_CHAR (CPP_LSHIFT);
1097           if (get_effective_char (buffer) == '=')
1098             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1099         }
1100       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1101         {
1102           ACCEPT_CHAR (CPP_MIN);
1103           if (get_effective_char (buffer) == '=')
1104             ACCEPT_CHAR (CPP_MIN_EQ);
1105         }
1106       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1107         {
1108           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1109           result->flags |= DIGRAPH;
1110         }
1111       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1112         {
1113           ACCEPT_CHAR (CPP_OPEN_BRACE);
1114           result->flags |= DIGRAPH;
1115         }
1116       break;
1117
1118     case '>':
1119       result->type = CPP_GREATER;
1120       c = get_effective_char (buffer);
1121       if (c == '=')
1122         ACCEPT_CHAR (CPP_GREATER_EQ);
1123       else if (c == '>')
1124         {
1125           ACCEPT_CHAR (CPP_RSHIFT);
1126           if (get_effective_char (buffer) == '=')
1127             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1128         }
1129       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1130         {
1131           ACCEPT_CHAR (CPP_MAX);
1132           if (get_effective_char (buffer) == '=')
1133             ACCEPT_CHAR (CPP_MAX_EQ);
1134         }
1135       break;
1136
1137     case '%':
1138       lex_percent (buffer, result);
1139       if (result->type == CPP_HASH)
1140         goto do_hash;
1141       break;
1142
1143     case '.':
1144       lex_dot (pfile, result);
1145       break;
1146
1147     case '+':
1148       result->type = CPP_PLUS;
1149       c = get_effective_char (buffer);
1150       if (c == '=')
1151         ACCEPT_CHAR (CPP_PLUS_EQ);
1152       else if (c == '+')
1153         ACCEPT_CHAR (CPP_PLUS_PLUS);
1154       break;
1155
1156     case '-':
1157       result->type = CPP_MINUS;
1158       c = get_effective_char (buffer);
1159       if (c == '>')
1160         {
1161           ACCEPT_CHAR (CPP_DEREF);
1162           if (CPP_OPTION (pfile, cplusplus)
1163               && get_effective_char (buffer) == '*')
1164             ACCEPT_CHAR (CPP_DEREF_STAR);
1165         }
1166       else if (c == '=')
1167         ACCEPT_CHAR (CPP_MINUS_EQ);
1168       else if (c == '-')
1169         ACCEPT_CHAR (CPP_MINUS_MINUS);
1170       break;
1171
1172     case '*':
1173       result->type = CPP_MULT;
1174       if (get_effective_char (buffer) == '=')
1175         ACCEPT_CHAR (CPP_MULT_EQ);
1176       break;
1177
1178     case '=':
1179       result->type = CPP_EQ;
1180       if (get_effective_char (buffer) == '=')
1181         ACCEPT_CHAR (CPP_EQ_EQ);
1182       break;
1183
1184     case '!':
1185       result->type = CPP_NOT;
1186       if (get_effective_char (buffer) == '=')
1187         ACCEPT_CHAR (CPP_NOT_EQ);
1188       break;
1189
1190     case '&':
1191       result->type = CPP_AND;
1192       c = get_effective_char (buffer);
1193       if (c == '=')
1194         ACCEPT_CHAR (CPP_AND_EQ);
1195       else if (c == '&')
1196         ACCEPT_CHAR (CPP_AND_AND);
1197       break;
1198
1199     case '#':
1200       c = buffer->extra_char;   /* Can be set by error condition below.  */
1201       if (c != EOF)
1202         {
1203           buffer->read_ahead = c;
1204           buffer->extra_char = EOF;
1205         }
1206       else
1207         c = get_effective_char (buffer);
1208
1209       if (c == '#')
1210         {
1211           ACCEPT_CHAR (CPP_PASTE);
1212           break;
1213         }
1214
1215       result->type = CPP_HASH;
1216     do_hash:
1217       if (!bol)
1218         break;
1219       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1220          tokens within the list of arguments that would otherwise act
1221          as preprocessing directives, the behavior is undefined.
1222
1223          This implementation will report a hard error, terminate the
1224          macro invocation, and proceed to process the directive.  */
1225       if (pfile->state.parsing_args)
1226         {
1227           pfile->lexer_pos.output_line = pfile->line;
1228           if (pfile->state.parsing_args == 2)
1229             {
1230               cpp_error (pfile,
1231                          "directives may not be used inside a macro argument");
1232               result->type = CPP_EOF;
1233             }
1234         }
1235       /* in_directive can be true inside a _Pragma.  */
1236       else if (!pfile->state.in_directive)
1237         {
1238           /* This is the hash introducing a directive.  If the return
1239              value is false, it is an assembler #.  */
1240           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1241             goto next_token;
1242         }
1243       break;
1244
1245     case '|':
1246       result->type = CPP_OR;
1247       c = get_effective_char (buffer);
1248       if (c == '=')
1249         ACCEPT_CHAR (CPP_OR_EQ);
1250       else if (c == '|')
1251         ACCEPT_CHAR (CPP_OR_OR);
1252       break;
1253
1254     case '^':
1255       result->type = CPP_XOR;
1256       if (get_effective_char (buffer) == '=')
1257         ACCEPT_CHAR (CPP_XOR_EQ);
1258       break;
1259
1260     case ':':
1261       result->type = CPP_COLON;
1262       c = get_effective_char (buffer);
1263       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1264         ACCEPT_CHAR (CPP_SCOPE);
1265       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1266         {
1267           result->flags |= DIGRAPH;
1268           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1269         }
1270       break;
1271
1272     case '~': result->type = CPP_COMPL; break;
1273     case ',': result->type = CPP_COMMA; break;
1274     case '(': result->type = CPP_OPEN_PAREN; break;
1275     case ')': result->type = CPP_CLOSE_PAREN; break;
1276     case '[': result->type = CPP_OPEN_SQUARE; break;
1277     case ']': result->type = CPP_CLOSE_SQUARE; break;
1278     case '{': result->type = CPP_OPEN_BRACE; break;
1279     case '}': result->type = CPP_CLOSE_BRACE; break;
1280     case ';': result->type = CPP_SEMICOLON; break;
1281
1282       /* @ is a punctuator in Objective C.  */
1283     case '@': result->type = CPP_ATSIGN; break;
1284
1285     random_char:
1286     default:
1287       result->type = CPP_OTHER;
1288       result->val.c = c;
1289       break;
1290     }
1291
1292   if (!pfile->state.in_directive && pfile->state.skipping)
1293     goto next_char;
1294
1295   /* If not in a directive, this token invalidates controlling macros.  */
1296   if (!pfile->state.in_directive)
1297     pfile->mi_valid = false;
1298 }
1299
1300 /* An upper bound on the number of bytes needed to spell a token,
1301    including preceding whitespace.  */
1302 unsigned int
1303 cpp_token_len (token)
1304      const cpp_token *token;
1305 {
1306   unsigned int len;
1307
1308   switch (TOKEN_SPELL (token))
1309     {
1310     default:            len = 0;                                break;
1311     case SPELL_STRING:  len = token->val.str.len;               break;
1312     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1313     }
1314   /* 1 for whitespace, 4 for comment delimeters.  */
1315   return len + 5;
1316 }
1317
1318 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1319    already contain the enough space to hold the token's spelling.
1320    Returns a pointer to the character after the last character
1321    written.  */
1322 unsigned char *
1323 cpp_spell_token (pfile, token, buffer)
1324      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1325      const cpp_token *token;
1326      unsigned char *buffer;
1327 {
1328   switch (TOKEN_SPELL (token))
1329     {
1330     case SPELL_OPERATOR:
1331       {
1332         const unsigned char *spelling;
1333         unsigned char c;
1334
1335         if (token->flags & DIGRAPH)
1336           spelling
1337             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1338         else if (token->flags & NAMED_OP)
1339           goto spell_ident;
1340         else
1341           spelling = TOKEN_NAME (token);
1342
1343         while ((c = *spelling++) != '\0')
1344           *buffer++ = c;
1345       }
1346       break;
1347
1348     case SPELL_IDENT:
1349       spell_ident:
1350       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1351       buffer += NODE_LEN (token->val.node);
1352       break;
1353
1354     case SPELL_STRING:
1355       {
1356         int left, right, tag;
1357         switch (token->type)
1358           {
1359           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1360           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1361           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1362           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1363           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1364           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1365           }
1366         if (tag) *buffer++ = tag;
1367         if (left) *buffer++ = left;
1368         memcpy (buffer, token->val.str.text, token->val.str.len);
1369         buffer += token->val.str.len;
1370         if (right) *buffer++ = right;
1371       }
1372       break;
1373
1374     case SPELL_CHAR:
1375       *buffer++ = token->val.c;
1376       break;
1377
1378     case SPELL_NONE:
1379       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1380       break;
1381     }
1382
1383   return buffer;
1384 }
1385
1386 /* Returns a token as a null-terminated string.  The string is
1387    temporary, and automatically freed later.  Useful for diagnostics.  */
1388 unsigned char *
1389 cpp_token_as_text (pfile, token)
1390      cpp_reader *pfile;
1391      const cpp_token *token;
1392 {
1393   unsigned int len = cpp_token_len (token);
1394   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1395
1396   end = cpp_spell_token (pfile, token, start);
1397   end[0] = '\0';
1398
1399   return start;
1400 }
1401
1402 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1403 const char *
1404 cpp_type2name (type)
1405      enum cpp_ttype type;
1406 {
1407   return (const char *) token_spellings[type].name;
1408 }
1409
1410 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1411    for efficiency - to avoid double-buffering.  Also, outputs a space
1412    if PREV_WHITE is flagged.  */
1413 void
1414 cpp_output_token (token, fp)
1415      const cpp_token *token;
1416      FILE *fp;
1417 {
1418   if (token->flags & PREV_WHITE)
1419     putc (' ', fp);
1420
1421   switch (TOKEN_SPELL (token))
1422     {
1423     case SPELL_OPERATOR:
1424       {
1425         const unsigned char *spelling;
1426
1427         if (token->flags & DIGRAPH)
1428           spelling
1429             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1430         else if (token->flags & NAMED_OP)
1431           goto spell_ident;
1432         else
1433           spelling = TOKEN_NAME (token);
1434
1435         ufputs (spelling, fp);
1436       }
1437       break;
1438
1439     spell_ident:
1440     case SPELL_IDENT:
1441       ufputs (NODE_NAME (token->val.node), fp);
1442     break;
1443
1444     case SPELL_STRING:
1445       {
1446         int left, right, tag;
1447         switch (token->type)
1448           {
1449           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1450           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1451           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1452           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1453           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1454           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1455           }
1456         if (tag) putc (tag, fp);
1457         if (left) putc (left, fp);
1458         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1459         if (right) putc (right, fp);
1460       }
1461       break;
1462
1463     case SPELL_CHAR:
1464       putc (token->val.c, fp);
1465       break;
1466
1467     case SPELL_NONE:
1468       /* An error, most probably.  */
1469       break;
1470     }
1471 }
1472
1473 /* Compare two tokens.  */
1474 int
1475 _cpp_equiv_tokens (a, b)
1476      const cpp_token *a, *b;
1477 {
1478   if (a->type == b->type && a->flags == b->flags)
1479     switch (TOKEN_SPELL (a))
1480       {
1481       default:                  /* Keep compiler happy.  */
1482       case SPELL_OPERATOR:
1483         return 1;
1484       case SPELL_CHAR:
1485         return a->val.c == b->val.c; /* Character.  */
1486       case SPELL_NONE:
1487         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1488       case SPELL_IDENT:
1489         return a->val.node == b->val.node;
1490       case SPELL_STRING:
1491         return (a->val.str.len == b->val.str.len
1492                 && !memcmp (a->val.str.text, b->val.str.text,
1493                             a->val.str.len));
1494       }
1495
1496   return 0;
1497 }
1498
1499 /* Determine whether two tokens can be pasted together, and if so,
1500    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1501    be pasted, or the appropriate type for the merged token if they
1502    can.  */
1503 enum cpp_ttype
1504 cpp_can_paste (pfile, token1, token2, digraph)
1505      cpp_reader * pfile;
1506      const cpp_token *token1, *token2;
1507      int* digraph;
1508 {
1509   enum cpp_ttype a = token1->type, b = token2->type;
1510   int cxx = CPP_OPTION (pfile, cplusplus);
1511
1512   /* Treat named operators as if they were ordinary NAMEs.  */
1513   if (token1->flags & NAMED_OP)
1514     a = CPP_NAME;
1515   if (token2->flags & NAMED_OP)
1516     b = CPP_NAME;
1517
1518   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1519     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1520
1521   switch (a)
1522     {
1523     case CPP_GREATER:
1524       if (b == a) return CPP_RSHIFT;
1525       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1526       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1527       break;
1528     case CPP_LESS:
1529       if (b == a) return CPP_LSHIFT;
1530       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1531       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1532       if (CPP_OPTION (pfile, digraphs))
1533         {
1534           if (b == CPP_COLON)
1535             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1536           if (b == CPP_MOD)
1537             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1538         }
1539       break;
1540
1541     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1542     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1543     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1544
1545     case CPP_MINUS:
1546       if (b == a)               return CPP_MINUS_MINUS;
1547       if (b == CPP_GREATER)     return CPP_DEREF;
1548       break;
1549     case CPP_COLON:
1550       if (b == a && cxx)        return CPP_SCOPE;
1551       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1552         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1553       break;
1554
1555     case CPP_MOD:
1556       if (CPP_OPTION (pfile, digraphs))
1557         {
1558           if (b == CPP_GREATER)
1559             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1560           if (b == CPP_COLON)
1561             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1562         }
1563       break;
1564     case CPP_DEREF:
1565       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1566       break;
1567     case CPP_DOT:
1568       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1569       if (b == CPP_NUMBER)      return CPP_NUMBER;
1570       break;
1571
1572     case CPP_HASH:
1573       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1574         /* %:%: digraph */
1575         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1576       break;
1577
1578     case CPP_NAME:
1579       if (b == CPP_NAME)        return CPP_NAME;
1580       if (b == CPP_NUMBER
1581           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1582       if (b == CPP_CHAR
1583           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1584       if (b == CPP_STRING
1585           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1586       break;
1587
1588     case CPP_NUMBER:
1589       if (b == CPP_NUMBER)      return CPP_NUMBER;
1590       if (b == CPP_NAME)        return CPP_NUMBER;
1591       if (b == CPP_DOT)         return CPP_NUMBER;
1592       /* Numbers cannot have length zero, so this is safe.  */
1593       if ((b == CPP_PLUS || b == CPP_MINUS)
1594           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1595         return CPP_NUMBER;
1596       break;
1597
1598     default:
1599       break;
1600     }
1601
1602   return CPP_EOF;
1603 }
1604
1605 /* Returns nonzero if a space should be inserted to avoid an
1606    accidental token paste for output.  For simplicity, it is
1607    conservative, and occasionally advises a space where one is not
1608    needed, e.g. "." and ".2".  */
1609
1610 int
1611 cpp_avoid_paste (pfile, token1, token2)
1612      cpp_reader *pfile;
1613      const cpp_token *token1, *token2;
1614 {
1615   enum cpp_ttype a = token1->type, b = token2->type;
1616   cppchar_t c;
1617
1618   if (token1->flags & NAMED_OP)
1619     a = CPP_NAME;
1620   if (token2->flags & NAMED_OP)
1621     b = CPP_NAME;
1622
1623   c = EOF;
1624   if (token2->flags & DIGRAPH)
1625     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1626   else if (token_spellings[b].category == SPELL_OPERATOR)
1627     c = token_spellings[b].name[0];
1628
1629   /* Quickly get everything that can paste with an '='.  */
1630   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1631     return 1;
1632
1633   switch (a)
1634     {
1635     case CPP_GREATER:   return c == '>' || c == '?';
1636     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1637     case CPP_PLUS:      return c == '+';
1638     case CPP_MINUS:     return c == '-' || c == '>';
1639     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1640     case CPP_MOD:       return c == ':' || c == '>';
1641     case CPP_AND:       return c == '&';
1642     case CPP_OR:        return c == '|';
1643     case CPP_COLON:     return c == ':' || c == '>';
1644     case CPP_DEREF:     return c == '*';
1645     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1646     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1647     case CPP_NAME:      return ((b == CPP_NUMBER
1648                                  && name_p (pfile, &token2->val.str))
1649                                 || b == CPP_NAME
1650                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1651     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1652                                 || c == '.' || c == '+' || c == '-');
1653     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1654                                 && token1->val.c == '@'
1655                                 && (b == CPP_NAME || b == CPP_STRING));
1656     default:            break;
1657     }
1658
1659   return 0;
1660 }
1661
1662 /* Output all the remaining tokens on the current line, and a newline
1663    character, to FP.  Leading whitespace is removed.  */
1664 void
1665 cpp_output_line (pfile, fp)
1666      cpp_reader *pfile;
1667      FILE *fp;
1668 {
1669   cpp_token token;
1670
1671   cpp_get_token (pfile, &token);
1672   token.flags &= ~PREV_WHITE;
1673   while (token.type != CPP_EOF)
1674     {
1675       cpp_output_token (&token, fp);
1676       cpp_get_token (pfile, &token);
1677     }
1678
1679   putc ('\n', fp);
1680 }
1681
1682 /* Returns the value of a hexadecimal digit.  */
1683 static unsigned int
1684 hex_digit_value (c)
1685      unsigned int c;
1686 {
1687   if (c >= 'a' && c <= 'f')
1688     return c - 'a' + 10;
1689   if (c >= 'A' && c <= 'F')
1690     return c - 'A' + 10;
1691   if (c >= '0' && c <= '9')
1692     return c - '0';
1693   abort ();
1694 }
1695
1696 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1697    failure if cpplib is not parsing C++ or C99.  Such failure is
1698    silent, and no variables are updated.  Otherwise returns 0, and
1699    warns if -Wtraditional.
1700
1701    [lex.charset]: The character designated by the universal character
1702    name \UNNNNNNNN is that character whose character short name in
1703    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1704    universal character name \uNNNN is that character whose character
1705    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1706    for a universal character name is less than 0x20 or in the range
1707    0x7F-0x9F (inclusive), or if the universal character name
1708    designates a character in the basic source character set, then the
1709    program is ill-formed.
1710
1711    We assume that wchar_t is Unicode, so we don't need to do any
1712    mapping.  Is this ever wrong?
1713
1714    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1715    LIMIT is the end of the string or charconst.  PSTR is updated to
1716    point after the UCS on return, and the UCS is written into PC.  */
1717
1718 static int
1719 maybe_read_ucs (pfile, pstr, limit, pc)
1720      cpp_reader *pfile;
1721      const unsigned char **pstr;
1722      const unsigned char *limit;
1723      unsigned int *pc;
1724 {
1725   const unsigned char *p = *pstr;
1726   unsigned int code = 0;
1727   unsigned int c = *pc, length;
1728
1729   /* Only attempt to interpret a UCS for C++ and C99.  */
1730   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1731     return 1;
1732
1733   if (CPP_WTRADITIONAL (pfile))
1734     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1735
1736   length = (c == 'u' ? 4: 8);
1737
1738   if ((size_t) (limit - p) < length)
1739     {
1740       cpp_error (pfile, "incomplete universal-character-name");
1741       /* Skip to the end to avoid more diagnostics.  */
1742       p = limit;
1743     }
1744   else
1745     {
1746       for (; length; length--, p++)
1747         {
1748           c = *p;
1749           if (ISXDIGIT (c))
1750             code = (code << 4) + hex_digit_value (c);
1751           else
1752             {
1753               cpp_error (pfile,
1754                          "non-hex digit '%c' in universal-character-name", c);
1755               /* We shouldn't skip in case there are multibyte chars.  */
1756               break;
1757             }
1758         }
1759     }
1760
1761 #ifdef TARGET_EBCDIC
1762   cpp_error (pfile, "universal-character-name on EBCDIC target");
1763   code = 0x3f;  /* EBCDIC invalid character */
1764 #else
1765  /* True extended characters are OK.  */
1766   if (code >= 0xa0
1767       && !(code & 0x80000000)
1768       && !(code >= 0xD800 && code <= 0xDFFF))
1769     ;
1770   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1771      hex escapes so that this also works with EBCDIC hosts.  */
1772   else if (code == 0x24 || code == 0x40 || code == 0x60)
1773     ;
1774   /* Don't give another error if one occurred above.  */
1775   else if (length == 0)
1776     cpp_error (pfile, "universal-character-name out of range");
1777 #endif
1778
1779   *pstr = p;
1780   *pc = code;
1781   return 0;
1782 }
1783
1784 /* Interpret an escape sequence, and return its value.  PSTR points to
1785    the input pointer, which is just after the backslash.  LIMIT is how
1786    much text we have.  MASK is a bitmask for the precision for the
1787    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1788    interpret escapes that did not exist in traditional C.
1789
1790    Handles all relevant diagnostics.  */
1791
1792 unsigned int
1793 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1794      cpp_reader *pfile;
1795      const unsigned char **pstr;
1796      const unsigned char *limit;
1797      unsigned HOST_WIDE_INT mask;
1798      int traditional;
1799 {
1800   int unknown = 0;
1801   const unsigned char *str = *pstr;
1802   unsigned int c = *str++;
1803
1804   switch (c)
1805     {
1806     case '\\': case '\'': case '"': case '?': break;
1807     case 'b': c = TARGET_BS;      break;
1808     case 'f': c = TARGET_FF;      break;
1809     case 'n': c = TARGET_NEWLINE; break;
1810     case 'r': c = TARGET_CR;      break;
1811     case 't': c = TARGET_TAB;     break;
1812     case 'v': c = TARGET_VT;      break;
1813
1814     case '(': case '{': case '[': case '%':
1815       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1816          '\%' is used to prevent SCCS from getting confused.  */
1817       unknown = CPP_PEDANTIC (pfile);
1818       break;
1819
1820     case 'a':
1821       if (CPP_WTRADITIONAL (pfile))
1822         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1823       if (!traditional)
1824         c = TARGET_BELL;
1825       break;
1826
1827     case 'e': case 'E':
1828       if (CPP_PEDANTIC (pfile))
1829         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1830       c = TARGET_ESC;
1831       break;
1832
1833     case 'u': case 'U':
1834       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1835       break;
1836
1837     case 'x':
1838       if (CPP_WTRADITIONAL (pfile))
1839         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1840
1841       if (!traditional)
1842         {
1843           unsigned int i = 0, overflow = 0;
1844           int digits_found = 0;
1845
1846           while (str < limit)
1847             {
1848               c = *str;
1849               if (! ISXDIGIT (c))
1850                 break;
1851               str++;
1852               overflow |= i ^ (i << 4 >> 4);
1853               i = (i << 4) + hex_digit_value (c);
1854               digits_found = 1;
1855             }
1856
1857           if (!digits_found)
1858             cpp_error (pfile, "\\x used with no following hex digits");
1859
1860           if (overflow | (i != (i & mask)))
1861             {
1862               cpp_pedwarn (pfile, "hex escape sequence out of range");
1863               i &= mask;
1864             }
1865           c = i;
1866         }
1867       break;
1868
1869     case '0':  case '1':  case '2':  case '3':
1870     case '4':  case '5':  case '6':  case '7':
1871       {
1872         unsigned int i = c - '0';
1873         int count = 0;
1874
1875         while (str < limit && ++count < 3)
1876           {
1877             c = *str;
1878             if (c < '0' || c > '7')
1879               break;
1880             str++;
1881             i = (i << 3) + c - '0';
1882           }
1883
1884         if (i != (i & mask))
1885           {
1886             cpp_pedwarn (pfile, "octal escape sequence out of range");
1887             i &= mask;
1888           }
1889         c = i;
1890       }
1891       break;
1892
1893     default:
1894       unknown = 1;
1895       break;
1896     }
1897
1898   if (unknown)
1899     {
1900       if (ISGRAPH (c))
1901         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1902       else
1903         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1904     }
1905
1906   if (c > mask)
1907     cpp_pedwarn (pfile, "escape sequence out of range for character");
1908
1909   *pstr = str;
1910   return c;
1911 }
1912
1913 #ifndef MAX_CHAR_TYPE_SIZE
1914 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1915 #endif
1916
1917 #ifndef MAX_WCHAR_TYPE_SIZE
1918 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1919 #endif
1920
1921 /* Interpret a (possibly wide) character constant in TOKEN.
1922    WARN_MULTI warns about multi-character charconsts, if not
1923    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1924    that did not exist in traditional C.  PCHARS_SEEN points to a
1925    variable that is filled in with the number of characters seen.  */
1926 HOST_WIDE_INT
1927 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1928      cpp_reader *pfile;
1929      const cpp_token *token;
1930      int warn_multi;
1931      int traditional;
1932      unsigned int *pchars_seen;
1933 {
1934   const unsigned char *str = token->val.str.text;
1935   const unsigned char *limit = str + token->val.str.len;
1936   unsigned int chars_seen = 0;
1937   unsigned int width, max_chars, c;
1938   unsigned HOST_WIDE_INT mask;
1939   HOST_WIDE_INT result = 0;
1940
1941 #ifdef MULTIBYTE_CHARS
1942   (void) local_mbtowc (NULL, NULL, 0);
1943 #endif
1944
1945   /* Width in bits.  */
1946   if (token->type == CPP_CHAR)
1947     width = MAX_CHAR_TYPE_SIZE;
1948   else
1949     width = MAX_WCHAR_TYPE_SIZE;
1950
1951   if (width < HOST_BITS_PER_WIDE_INT)
1952     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1953   else
1954     mask = ~0;
1955   max_chars = HOST_BITS_PER_WIDE_INT / width;
1956
1957   while (str < limit)
1958     {
1959 #ifdef MULTIBYTE_CHARS
1960       wchar_t wc;
1961       int char_len;
1962
1963       char_len = local_mbtowc (&wc, str, limit - str);
1964       if (char_len == -1)
1965         {
1966           cpp_warning (pfile, "ignoring invalid multibyte character");
1967           c = *str++;
1968         }
1969       else
1970         {
1971           str += char_len;
1972           c = wc;
1973         }
1974 #else
1975       c = *str++;
1976 #endif
1977
1978       if (c == '\\')
1979         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1980
1981 #ifdef MAP_CHARACTER
1982       if (ISPRINT (c))
1983         c = MAP_CHARACTER (c);
1984 #endif
1985
1986       /* Merge character into result; ignore excess chars.  */
1987       if (++chars_seen <= max_chars)
1988         {
1989           if (width < HOST_BITS_PER_WIDE_INT)
1990             result = (result << width) | (c & mask);
1991           else
1992             result = c;
1993         }
1994     }
1995
1996   if (chars_seen == 0)
1997     cpp_error (pfile, "empty character constant");
1998   else if (chars_seen > max_chars)
1999     {
2000       chars_seen = max_chars;
2001       cpp_warning (pfile, "character constant too long");
2002     }
2003   else if (chars_seen > 1 && !traditional && warn_multi)
2004     cpp_warning (pfile, "multi-character character constant");
2005
2006   /* If char type is signed, sign-extend the constant.  The
2007      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2008   if (token->type == CPP_CHAR && chars_seen)
2009     {
2010       unsigned int nbits = chars_seen * width;
2011       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2012
2013       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2014           || ((result >> (nbits - 1)) & 1) == 0)
2015         result &= mask;
2016       else
2017         result |= ~mask;
2018     }
2019
2020   *pchars_seen = chars_seen;
2021   return result;
2022 }
2023
2024 /* Memory pools.  */
2025
2026 struct dummy
2027 {
2028   char c;
2029   union
2030   {
2031     double d;
2032     int *p;
2033   } u;
2034 };
2035
2036 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2037
2038 static int
2039 chunk_suitable (pool, chunk, size)
2040      cpp_pool *pool;
2041      cpp_chunk *chunk;
2042      unsigned int size;
2043 {
2044   /* Being at least twice SIZE means we can use memcpy in
2045      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2046      anyway.  */
2047   return (chunk && pool->locked != chunk
2048           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2049 }
2050
2051 /* Returns the end of the new pool.  PTR points to a char in the old
2052    pool, and is updated to point to the same char in the new pool.  */
2053 unsigned char *
2054 _cpp_next_chunk (pool, len, ptr)
2055      cpp_pool *pool;
2056      unsigned int len;
2057      unsigned char **ptr;
2058 {
2059   cpp_chunk *chunk = pool->cur->next;
2060
2061   /* LEN is the minimum size we want in the new pool.  */
2062   len += POOL_ROOM (pool);
2063   if (! chunk_suitable (pool, chunk, len))
2064     {
2065       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2066
2067       chunk->next = pool->cur->next;
2068       pool->cur->next = chunk;
2069     }
2070
2071   /* Update the pointer before changing chunk's front.  */
2072   if (ptr)
2073     *ptr += chunk->base - POOL_FRONT (pool);
2074
2075   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2076   chunk->front = chunk->base;
2077
2078   pool->cur = chunk;
2079   return POOL_LIMIT (pool);
2080 }
2081
2082 static cpp_chunk *
2083 new_chunk (size)
2084      unsigned int size;
2085 {
2086   unsigned char *base;
2087   cpp_chunk *result;
2088
2089   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2090   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2091   /* Put the chunk descriptor at the end.  Then chunk overruns will
2092      cause obvious chaos.  */
2093   result = (cpp_chunk *) (base + size);
2094   result->base = base;
2095   result->front = base;
2096   result->limit = base + size;
2097   result->next = 0;
2098
2099   return result;
2100 }
2101
2102 void
2103 _cpp_init_pool (pool, size, align, temp)
2104      cpp_pool *pool;
2105      unsigned int size, align, temp;
2106 {
2107   if (align == 0)
2108     align = DEFAULT_ALIGNMENT;
2109   if (align & (align - 1))
2110     abort ();
2111   pool->align = align;
2112   pool->first = new_chunk (size);
2113   pool->cur = pool->first;
2114   pool->locked = 0;
2115   pool->locks = 0;
2116   if (temp)
2117     pool->cur->next = pool->cur;
2118 }
2119
2120 void
2121 _cpp_lock_pool (pool)
2122      cpp_pool *pool;
2123 {
2124   if (pool->locks++ == 0)
2125     pool->locked = pool->cur;
2126 }
2127
2128 void
2129 _cpp_unlock_pool (pool)
2130      cpp_pool *pool;
2131 {
2132   if (--pool->locks == 0)
2133     pool->locked = 0;
2134 }
2135
2136 void
2137 _cpp_free_pool (pool)
2138      cpp_pool *pool;
2139 {
2140   cpp_chunk *chunk = pool->first, *next;
2141
2142   do
2143     {
2144       next = chunk->next;
2145       free (chunk->base);
2146       chunk = next;
2147     }
2148   while (chunk && chunk != pool->first);
2149 }
2150
2151 /* Reserve LEN bytes from a memory pool.  */
2152 unsigned char *
2153 _cpp_pool_reserve (pool, len)
2154      cpp_pool *pool;
2155      unsigned int len;
2156 {
2157   len = POOL_ALIGN (len, pool->align);
2158   if (len > (unsigned int) POOL_ROOM (pool))
2159     _cpp_next_chunk (pool, len, 0);
2160
2161   return POOL_FRONT (pool);
2162 }
2163
2164 /* Allocate LEN bytes from a memory pool.  */
2165 unsigned char *
2166 _cpp_pool_alloc (pool, len)
2167      cpp_pool *pool;
2168      unsigned int len;
2169 {
2170   unsigned char *result = _cpp_pool_reserve (pool, len);
2171
2172   POOL_COMMIT (pool, len);
2173   return result;
2174 }