gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (pfile, newline_char)
 128      cpp_reader *pfile;
 129      cppchar_t newline_char;
 130 {
 131   cpp_buffer *buffer;
 132   cppchar_t next = EOF;
 133
 134   pfile->line++;
 135   pfile->pseudo_newlines++;
 136
 137   buffer = pfile->buffer;
 138   buffer->col_adjust = 0;
 139   buffer->lineno++;
 140   buffer->line_base = buffer->cur;
 141
 142   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 143   if (buffer->cur < buffer->rlimit)
 144     {
 145       next = *buffer->cur++;
 146       if (next + newline_char == '\r' + '\n')
 147         {
 148           buffer->line_base = buffer->cur;
 149           if (buffer->cur < buffer->rlimit)
 150             next = *buffer->cur++;
 151           else
 152             next = EOF;
 153         }
 154     }
 155
 156   buffer->read_ahead = next;
 157   return next;
 158 }
 159
 160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 161    encountered.  It warns if necessary, and returns true if the
 162    trigraph should be honoured.  FROM_CHAR is the third character of a
 163    trigraph, and presumed to be the previous character for position
 164    reporting.  */
 165 static int
 166 trigraph_ok (pfile, from_char)
 167      cpp_reader *pfile;
 168      cppchar_t from_char;
 169 {
 170   int accept = CPP_OPTION (pfile, trigraphs);
 171
 172   /* Don't warn about trigraphs in comments.  */
 173   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 174     {
 175       cpp_buffer *buffer = pfile->buffer;
 176       if (accept)
 177         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 178                                "trigraph ??%c converted to %c",
 179                                (int) from_char,
 180                                (int) _cpp_trigraph_map[from_char]);
 181       else if (buffer->cur != buffer->last_Wtrigraphs)
 182         {
 183           buffer->last_Wtrigraphs = buffer->cur;
 184           cpp_warning_with_line (pfile, buffer->lineno,
 185                                  CPP_BUF_COL (buffer) - 2,
 186                                  "trigraph ??%c ignored", (int) from_char);
 187         }
 188     }
 189
 190   return accept;
 191 }
 192
 193 /* Assumes local variables buffer and result.  */
 194 #define ACCEPT_CHAR(t) \
 195   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 196
 197 /* When we move to multibyte character sets, add to these something
 198    that saves and restores the state of the multibyte conversion
 199    library.  This probably involves saving and restoring a "cookie".
 200    In the case of glibc it is an 8-byte structure, so is not a high
 201    overhead operation.  In any case, it's out of the fast path.  */
 202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 204
 205 /* Skips any escaped newlines introduced by NEXT, which is either a
 206    '?' or a '\\'.  Returns the next character, which will also have
 207    been placed in buffer->read_ahead.  This routine performs
 208    preprocessing stages 1 and 2 of the ISO C standard.  */
 209 static cppchar_t
 210 skip_escaped_newlines (buffer, next)
 211      cpp_buffer *buffer;
 212      cppchar_t next;
 213 {
 214   /* Only do this if we apply stages 1 and 2.  */
 215   if (!buffer->from_stage3)
 216     {
 217       cppchar_t next1;
 218       const unsigned char *saved_cur;
 219       int space;
 220
 221       do
 222         {
 223           if (buffer->cur == buffer->rlimit)
 224             break;
 225
 226           SAVE_STATE ();
 227           if (next == '?')
 228             {
 229               next1 = *buffer->cur++;
 230               if (next1 != '?' || buffer->cur == buffer->rlimit)
 231                 {
 232                   RESTORE_STATE ();
 233                   break;
 234                 }
 235
 236               next1 = *buffer->cur++;
 237               if (!_cpp_trigraph_map[next1]
 238                   || !trigraph_ok (buffer->pfile, next1))
 239                 {
 240                   RESTORE_STATE ();
 241                   break;
 242                 }
 243
 244               /* We have a full trigraph here.  */
 245               next = _cpp_trigraph_map[next1];
 246               if (next != '\\' || buffer->cur == buffer->rlimit)
 247                 break;
 248               SAVE_STATE ();
 249             }
 250
 251           /* We have a backslash, and room for at least one more character.  */
 252           space = 0;
 253           do
 254             {
 255               next1 = *buffer->cur++;
 256               if (!is_nvspace (next1))
 257                 break;
 258               space = 1;
 259             }
 260           while (buffer->cur < buffer->rlimit);
 261
 262           if (!is_vspace (next1))
 263             {
 264               RESTORE_STATE ();
 265               break;
 266             }
 267
 268           if (space && !buffer->pfile->state.lexing_comment)
 269             cpp_warning (buffer->pfile,
 270                          "backslash and newline separated by space");
 271
 272           next = handle_newline (buffer->pfile, next1);
 273           if (next == EOF)
 274             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 275         }
 276       while (next == '\\' || next == '?');
 277     }
 278
 279   buffer->read_ahead = next;
 280   return next;
 281 }
 282
 283 /* Obtain the next character, after trigraph conversion and skipping
 284    an arbitrary string of escaped newlines.  The common case of no
 285    trigraphs or escaped newlines falls through quickly.  */
 286 static cppchar_t
 287 get_effective_char (buffer)
 288      cpp_buffer *buffer;
 289 {
 290   cppchar_t next = EOF;
 291
 292   if (buffer->cur < buffer->rlimit)
 293     {
 294       next = *buffer->cur++;
 295
 296       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 297          can introduce escaped newlines, which we want to skip, or
 298          UCNs, which, depending upon lexer state, we will handle in
 299          the future.  */
 300       if (next == '?' || next == '\\')
 301         next = skip_escaped_newlines (buffer, next);
 302     }
 303
 304   buffer->read_ahead = next;
 305   return next;
 306 }
 307
 308 /* Skip a C-style block comment.  We find the end of the comment by
 309    seeing if an asterisk is before every '/' we encounter.  Returns
 310    non-zero if comment terminated by EOF, zero otherwise.  */
 311 static int
 312 skip_block_comment (pfile)
 313      cpp_reader *pfile;
 314 {
 315   cpp_buffer *buffer = pfile->buffer;
 316   cppchar_t c = EOF, prevc = EOF;
 317
 318   pfile->state.lexing_comment = 1;
 319   while (buffer->cur != buffer->rlimit)
 320     {
 321       prevc = c, c = *buffer->cur++;
 322
 323     next_char:
 324       /* FIXME: For speed, create a new character class of characters
 325          of interest inside block comments.  */
 326       if (c == '?' || c == '\\')
 327         c = skip_escaped_newlines (buffer, c);
 328
 329       /* People like decorating comments with '*', so check for '/'
 330          instead for efficiency.  */
 331       if (c == '/')
 332         {
 333           if (prevc == '*')
 334             break;
 335
 336           /* Warn about potential nested comments, but not if the '/'
 337              comes immediately before the true comment delimeter.
 338              Don't bother to get it right across escaped newlines.  */
 339           if (CPP_OPTION (pfile, warn_comments)
 340               && buffer->cur != buffer->rlimit)
 341             {
 342               prevc = c, c = *buffer->cur++;
 343               if (c == '*' && buffer->cur != buffer->rlimit)
 344                 {
 345                   prevc = c, c = *buffer->cur++;
 346                   if (c != '/')
 347                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 348                                            CPP_BUF_COL (buffer),
 349                                            "\"/*\" within comment");
 350                 }
 351               goto next_char;
 352             }
 353         }
 354       else if (is_vspace (c))
 355         {
 356           prevc = c, c = handle_newline (pfile, c);
 357           goto next_char;
 358         }
 359       else if (c == '\t')
 360         adjust_column (pfile);
 361     }
 362
 363   pfile->state.lexing_comment = 0;
 364   buffer->read_ahead = EOF;
 365   return c != '/' || prevc != '*';
 366 }
 367
 368 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 369    non-zero if a multiline comment.  The following new line, if any,
 370    is left in buffer->read_ahead.  */
 371 static int
 372 skip_line_comment (pfile)
 373      cpp_reader *pfile;
 374 {
 375   cpp_buffer *buffer = pfile->buffer;
 376   unsigned int orig_lineno = buffer->lineno;
 377   cppchar_t c;
 378
 379   pfile->state.lexing_comment = 1;
 380   do
 381     {
 382       c = EOF;
 383       if (buffer->cur == buffer->rlimit)
 384         break;
 385
 386       c = *buffer->cur++;
 387       if (c == '?' || c == '\\')
 388         c = skip_escaped_newlines (buffer, c);
 389     }
 390   while (!is_vspace (c));
 391
 392   pfile->state.lexing_comment = 0;
 393   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 394   return orig_lineno != buffer->lineno;
 395 }
 396
 397 /* pfile->buffer->cur is one beyond the \t character.  Update
 398    col_adjust so we track the column correctly.  */
 399 static void
 400 adjust_column (pfile)
 401      cpp_reader *pfile;
 402 {
 403   cpp_buffer *buffer = pfile->buffer;
 404   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 405
 406   /* Round it up to multiple of the tabstop, but subtract 1 since the
 407      tab itself occupies a character position.  */
 408   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 409                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 410 }
 411
 412 /* Skips whitespace, saving the next non-whitespace character.
 413    Adjusts pfile->col_adjust to account for tabs.  Without this,
 414    tokens might be assigned an incorrect column.  */
 415 static void
 416 skip_whitespace (pfile, c)
 417      cpp_reader *pfile;
 418      cppchar_t c;
 419 {
 420   cpp_buffer *buffer = pfile->buffer;
 421   unsigned int warned = 0;
 422
 423   do
 424     {
 425       /* Horizontal space always OK.  */
 426       if (c == ' ')
 427         ;
 428       else if (c == '\t')
 429         adjust_column (pfile);
 430       /* Just \f \v or \0 left.  */
 431       else if (c == '\0')
 432         {
 433           if (!warned)
 434             {
 435               cpp_warning (pfile, "null character(s) ignored");
 436               warned = 1;
 437             }
 438         }
 439       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 440         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 441                                CPP_BUF_COL (buffer),
 442                                "%s in preprocessing directive",
 443                                c == '\f' ? "form feed" : "vertical tab");
 444
 445       c = EOF;
 446       if (buffer->cur == buffer->rlimit)
 447         break;
 448       c = *buffer->cur++;
 449     }
 450   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 451   while (is_nvspace (c));
 452
 453   /* Remember the next character.  */
 454   buffer->read_ahead = c;
 455 }
 456
 457 /* See if the characters of a number token are valid in a name (no
 458    '.', '+' or '-').  */
 459 static int
 460 name_p (pfile, string)
 461      cpp_reader *pfile;
 462      const cpp_string *string;
 463 {
 464   unsigned int i;
 465
 466   for (i = 0; i < string->len; i++)
 467     if (!is_idchar (string->text[i]))
 468       return 0;
 469
 470   return 1;
 471 }
 472
 473 /* Parse an identifier, skipping embedded backslash-newlines.
 474    Calculate the hash value of the token while parsing, for improved
 475    performance.  The hashing algorithm *must* match cpp_lookup().  */
 476
 477 static cpp_hashnode *
 478 parse_identifier (pfile, c)
 479      cpp_reader *pfile;
 480      cppchar_t c;
 481 {
 482   cpp_hashnode *result;
 483   cpp_buffer *buffer = pfile->buffer;
 484   unsigned int saw_dollar = 0, len;
 485   struct obstack *stack = &pfile->hash_table->stack;
 486
 487   do
 488     {
 489       do
 490         {
 491           obstack_1grow (stack, c);
 492
 493           if (c == '$')
 494             saw_dollar++;
 495
 496           c = EOF;
 497           if (buffer->cur == buffer->rlimit)
 498             break;
 499
 500           c = *buffer->cur++;
 501         }
 502       while (is_idchar (c));
 503
 504       /* Potential escaped newline?  */
 505       if (c != '?' && c != '\\')
 506         break;
 507       c = skip_escaped_newlines (buffer, c);
 508     }
 509   while (is_idchar (c));
 510
 511   /* Remember the next character.  */
 512   buffer->read_ahead = c;
 513
 514   /* $ is not a identifier character in the standard, but is commonly
 515      accepted as an extension.  Don't warn about it in skipped
 516      conditional blocks.  */
 517   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 518     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 519
 520   /* Identifiers are null-terminated.  */
 521   len = obstack_object_size (stack);
 522   obstack_1grow (stack, '\0');
 523
 524   /* This routine commits the memory if necessary.  */
 525   result = (cpp_hashnode *)
 526     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 527
 528   /* Some identifiers require diagnostics when lexed.  */
 529   if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
 530     {
 531       /* It is allowed to poison the same identifier twice.  */
 532       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 533         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 534                    NODE_NAME (result));
 535
 536       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 537          replacement list of a variadic macro.  */
 538       if (result == pfile->spec_nodes.n__VA_ARGS__
 539           && !pfile->state.va_args_ok)
 540         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 541     }
 542
 543   return result;
 544 }
 545
 546 /* Parse a number, skipping embedded backslash-newlines.  */
 547 static void
 548 parse_number (pfile, number, c, leading_period)
 549      cpp_reader *pfile;
 550      cpp_string *number;
 551      cppchar_t c;
 552      int leading_period;
 553 {
 554   cpp_buffer *buffer = pfile->buffer;
 555   cpp_pool *pool = &pfile->ident_pool;
 556   unsigned char *dest, *limit;
 557
 558   dest = POOL_FRONT (pool);
 559   limit = POOL_LIMIT (pool);
 560
 561   /* Place a leading period.  */
 562   if (leading_period)
 563     {
 564       if (dest >= limit)
 565         limit = _cpp_next_chunk (pool, 0, &dest);
 566       *dest++ = '.';
 567     }
 568
 569   do
 570     {
 571       do
 572         {
 573           /* Need room for terminating null.  */
 574           if (dest + 1 >= limit)
 575             limit = _cpp_next_chunk (pool, 0, &dest);
 576           *dest++ = c;
 577
 578           c = EOF;
 579           if (buffer->cur == buffer->rlimit)
 580             break;
 581
 582           c = *buffer->cur++;
 583         }
 584       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 585
 586       /* Potential escaped newline?  */
 587       if (c != '?' && c != '\\')
 588         break;
 589       c = skip_escaped_newlines (buffer, c);
 590     }
 591   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 592
 593   /* Remember the next character.  */
 594   buffer->read_ahead = c;
 595
 596   /* Null-terminate the number.  */
 597   *dest = '\0';
 598
 599   number->text = POOL_FRONT (pool);
 600   number->len = dest - number->text;
 601   POOL_COMMIT (pool, number->len + 1);
 602 }
 603
 604 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 605 static void
 606 unterminated (pfile, term)
 607      cpp_reader *pfile;
 608      int term;
 609 {
 610   cpp_error (pfile, "missing terminating %c character", term);
 611
 612   if (term == '\"' && pfile->mlstring_pos.line
 613       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 614     {
 615       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 616                            pfile->mlstring_pos.col,
 617                            "possible start of unterminated string literal");
 618       pfile->mlstring_pos.line = 0;
 619     }
 620 }
 621
 622 /* Subroutine of parse_string.  */
 623 static int
 624 unescaped_terminator_p (pfile, dest)
 625      cpp_reader *pfile;
 626      const unsigned char *dest;
 627 {
 628   const unsigned char *start, *temp;
 629
 630   /* In #include-style directives, terminators are not escapeable.  */
 631   if (pfile->state.angled_headers)
 632     return 1;
 633
 634   start = POOL_FRONT (&pfile->ident_pool);
 635
 636   /* An odd number of consecutive backslashes represents an escaped
 637      terminator.  */
 638   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 639     ;
 640
 641   return ((dest - temp) & 1) == 0;
 642 }
 643
 644 /* Parses a string, character constant, or angle-bracketed header file
 645    name.  Handles embedded trigraphs and escaped newlines.  The stored
 646    string is guaranteed NUL-terminated, but it is not guaranteed that
 647    this is the first NUL since embedded NULs are preserved.
 648
 649    Multi-line strings are allowed, but they are deprecated.  */
 650 static void
 651 parse_string (pfile, token, terminator)
 652      cpp_reader *pfile;
 653      cpp_token *token;
 654      cppchar_t terminator;
 655 {
 656   cpp_buffer *buffer = pfile->buffer;
 657   cpp_pool *pool = &pfile->ident_pool;
 658   unsigned char *dest, *limit;
 659   cppchar_t c;
 660   unsigned int nulls = 0;
 661
 662   dest = POOL_FRONT (pool);
 663   limit = POOL_LIMIT (pool);
 664
 665   for (;;)
 666     {
 667       if (buffer->cur == buffer->rlimit)
 668         c = EOF;
 669       else
 670         c = *buffer->cur++;
 671
 672     have_char:
 673       /* We need space for the terminating NUL.  */
 674       if (dest >= limit)
 675         limit = _cpp_next_chunk (pool, 0, &dest);
 676
 677       if (c == EOF)
 678         {
 679           unterminated (pfile, terminator);
 680           break;
 681         }
 682
 683       /* Handle trigraphs, escaped newlines etc.  */
 684       if (c == '?' || c == '\\')
 685         c = skip_escaped_newlines (buffer, c);
 686
 687       if (c == terminator && unescaped_terminator_p (pfile, dest))
 688         {
 689           c = EOF;
 690           break;
 691         }
 692       else if (is_vspace (c))
 693         {
 694           /* In assembly language, silently terminate string and
 695              character literals at end of line.  This is a kludge
 696              around not knowing where comments are.  */
 697           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 698             break;
 699
 700           /* Character constants and header names may not extend over
 701              multiple lines.  In Standard C, neither may strings.
 702              Unfortunately, we accept multiline strings as an
 703              extension, except in #include family directives.  */
 704           if (terminator != '"' || pfile->state.angled_headers)
 705             {
 706               unterminated (pfile, terminator);
 707               break;
 708             }
 709
 710           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 711           if (pfile->mlstring_pos.line == 0)
 712             pfile->mlstring_pos = pfile->lexer_pos;
 713
 714           c = handle_newline (pfile, c);
 715           *dest++ = '\n';
 716           goto have_char;
 717         }
 718       else if (c == '\0')
 719         {
 720           if (nulls++ == 0)
 721             cpp_warning (pfile, "null character(s) preserved in literal");
 722         }
 723
 724       *dest++ = c;
 725     }
 726
 727   /* Remember the next character.  */
 728   buffer->read_ahead = c;
 729   *dest = '\0';
 730
 731   token->val.str.text = POOL_FRONT (pool);
 732   token->val.str.len = dest - token->val.str.text;
 733   POOL_COMMIT (pool, token->val.str.len + 1);
 734 }
 735
 736 /* The stored comment includes the comment start and any terminator.  */
 737 static void
 738 save_comment (pfile, token, from)
 739      cpp_reader *pfile;
 740      cpp_token *token;
 741      const unsigned char *from;
 742 {
 743   unsigned char *buffer;
 744   unsigned int len;
 745
 746   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 747   /* C++ comments probably (not definitely) have moved past a new
 748      line, which we don't want to save in the comment.  */
 749   if (pfile->buffer->read_ahead != EOF)
 750     len--;
 751   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 752
 753   token->type = CPP_COMMENT;
 754   token->val.str.len = len;
 755   token->val.str.text = buffer;
 756
 757   buffer[0] = '/';
 758   memcpy (buffer + 1, from, len - 1);
 759 }
 760
 761 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 762    want to avoid stepping back when lexing %:%X.  */
 763 static void
 764 lex_percent (buffer, result)
 765      cpp_buffer *buffer;
 766      cpp_token *result;
 767 {
 768   cppchar_t c;
 769
 770   result->type = CPP_MOD;
 771   /* Parsing %:%X could leave an extra character.  */
 772   if (buffer->extra_char == EOF)
 773     c = get_effective_char (buffer);
 774   else
 775     {
 776       c = buffer->read_ahead = buffer->extra_char;
 777       buffer->extra_char = EOF;
 778     }
 779
 780   if (c == '=')
 781     ACCEPT_CHAR (CPP_MOD_EQ);
 782   else if (CPP_OPTION (buffer->pfile, digraphs))
 783     {
 784       if (c == ':')
 785         {
 786           result->flags |= DIGRAPH;
 787           ACCEPT_CHAR (CPP_HASH);
 788           if (get_effective_char (buffer) == '%')
 789             {
 790               buffer->extra_char = get_effective_char (buffer);
 791               if (buffer->extra_char == ':')
 792                 {
 793                   buffer->extra_char = EOF;
 794                   ACCEPT_CHAR (CPP_PASTE);
 795                 }
 796               else
 797                 /* We'll catch the extra_char when we're called back.  */
 798                 buffer->read_ahead = '%';
 799             }
 800         }
 801       else if (c == '>')
 802         {
 803           result->flags |= DIGRAPH;
 804           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 805         }
 806     }
 807 }
 808
 809 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 810    want to avoid stepping back when lexing '...' or '.123'.  In the
 811    latter case we should also set a flag for parse_number.  */
 812 static void
 813 lex_dot (pfile, result)
 814      cpp_reader *pfile;
 815      cpp_token *result;
 816 {
 817   cpp_buffer *buffer = pfile->buffer;
 818   cppchar_t c;
 819
 820   /* Parsing ..X could leave an extra character.  */
 821   if (buffer->extra_char == EOF)
 822     c = get_effective_char (buffer);
 823   else
 824     {
 825       c = buffer->read_ahead = buffer->extra_char;
 826       buffer->extra_char = EOF;
 827     }
 828
 829   /* All known character sets have 0...9 contiguous.  */
 830   if (c >= '0' && c <= '9')
 831     {
 832       result->type = CPP_NUMBER;
 833       parse_number (pfile, &result->val.str, c, 1);
 834     }
 835   else
 836     {
 837       result->type = CPP_DOT;
 838       if (c == '.')
 839         {
 840           buffer->extra_char = get_effective_char (buffer);
 841           if (buffer->extra_char == '.')
 842             {
 843               buffer->extra_char = EOF;
 844               ACCEPT_CHAR (CPP_ELLIPSIS);
 845             }
 846           else
 847             /* We'll catch the extra_char when we're called back.  */
 848             buffer->read_ahead = '.';
 849         }
 850       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 851         ACCEPT_CHAR (CPP_DOT_STAR);
 852     }
 853 }
 854
 855 void
 856 _cpp_lex_token (pfile, result)
 857      cpp_reader *pfile;
 858      cpp_token *result;
 859 {
 860   cppchar_t c;
 861   cpp_buffer *buffer;
 862   const unsigned char *comment_start;
 863   unsigned char bol;
 864
 865  skip:
 866   bol = pfile->state.next_bol;
 867  done_directive:
 868   buffer = pfile->buffer;
 869   pfile->state.next_bol = 0;
 870   result->flags = buffer->saved_flags;
 871   buffer->saved_flags = 0;
 872  next_char:
 873   pfile->lexer_pos.line = buffer->lineno;
 874   result->line = pfile->line;
 875  next_char2:
 876   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 877
 878   c = buffer->read_ahead;
 879   if (c == EOF && buffer->cur < buffer->rlimit)
 880     {
 881       c = *buffer->cur++;
 882       pfile->lexer_pos.col++;
 883     }
 884   result->col = pfile->lexer_pos.col;
 885
 886  do_switch:
 887   buffer->read_ahead = EOF;
 888   switch (c)
 889     {
 890     case EOF:
 891       /* Non-empty files should end in a newline.  Checking "bol" too
 892           prevents multiple warnings when hitting the EOF more than
 893           once, like in a directive.  Don't warn for command line and
 894           _Pragma buffers.  */
 895       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 896         cpp_pedwarn (pfile, "no newline at end of file");
 897       pfile->state.next_bol = 1;
 898       pfile->state.skipping = 0;        /* In case missing #endif.  */
 899       result->type = CPP_EOF;
 900       /* Don't do MI optimisation.  */
 901       return;
 902
 903     case ' ': case '\t': case '\f': case '\v': case '\0':
 904       skip_whitespace (pfile, c);
 905       result->flags |= PREV_WHITE;
 906       goto next_char2;
 907
 908     case '\n': case '\r':
 909       if (!pfile->state.in_directive)
 910         {
 911           handle_newline (pfile, c);
 912           if (!pfile->state.parsing_args)
 913             pfile->pseudo_newlines = 0;
 914           bol = 1;
 915           pfile->lexer_pos.output_line = buffer->lineno;
 916           /* This is a new line, so clear any white space flag.
 917              Newlines in arguments are white space (6.10.3.10);
 918              parse_arg takes care of that.  */
 919           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 920           goto next_char;
 921         }
 922
 923       /* Don't let directives spill over to the next line.  */
 924       buffer->read_ahead = c;
 925       pfile->state.next_bol = 1;
 926       result->type = CPP_EOF;
 927       /* Don't break; pfile->state.skipping might be true.  */
 928       return;
 929
 930     case '?':
 931     case '\\':
 932       /* These could start an escaped newline, or '?' a trigraph.  Let
 933          skip_escaped_newlines do all the work.  */
 934       {
 935         unsigned int lineno = buffer->lineno;
 936
 937         c = skip_escaped_newlines (buffer, c);
 938         if (lineno != buffer->lineno)
 939           /* We had at least one escaped newline of some sort, and the
 940              next character is in buffer->read_ahead.  Update the
 941              token's line and column.  */
 942             goto next_char;
 943
 944         /* We are either the original '?' or '\\', or a trigraph.  */
 945         result->type = CPP_QUERY;
 946         buffer->read_ahead = EOF;
 947         if (c == '\\')
 948           goto random_char;
 949         else if (c != '?')
 950           goto do_switch;
 951       }
 952       break;
 953
 954     case '0': case '1': case '2': case '3': case '4':
 955     case '5': case '6': case '7': case '8': case '9':
 956       result->type = CPP_NUMBER;
 957       parse_number (pfile, &result->val.str, c, 0);
 958       break;
 959
 960     case '$':
 961       if (!CPP_OPTION (pfile, dollars_in_ident))
 962         goto random_char;
 963       /* Fall through...  */
 964
 965     case '_':
 966     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 967     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 968     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 969     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 970     case 'y': case 'z':
 971     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 972     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 973     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 974     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 975     case 'Y': case 'Z':
 976       result->type = CPP_NAME;
 977       result->val.node = parse_identifier (pfile, c);
 978
 979       /* 'L' may introduce wide characters or strings.  */
 980       if (result->val.node == pfile->spec_nodes.n_L)
 981         {
 982           c = buffer->read_ahead; /* For make_string.  */
 983           if (c == '\'' || c == '"')
 984             {
 985               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 986               goto make_string;
 987             }
 988         }
 989       /* Convert named operators to their proper types.  */
 990       else if (result->val.node->flags & NODE_OPERATOR)
 991         {
 992           result->flags |= NAMED_OP;
 993           result->type = result->val.node->value.operator;
 994         }
 995       break;
 996
 997     case '\'':
 998     case '"':
 999       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1000     make_string:
1001       parse_string (pfile, result, c);
1002       break;
1003
1004     case '/':
1005       /* A potential block or line comment.  */
1006       comment_start = buffer->cur;
1007       result->type = CPP_DIV;
1008       c = get_effective_char (buffer);
1009       if (c == '=')
1010         ACCEPT_CHAR (CPP_DIV_EQ);
1011       if (c != '/' && c != '*')
1012         break;
1013
1014       if (c == '*')
1015         {
1016           if (skip_block_comment (pfile))
1017             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1018                                  pfile->lexer_pos.col,
1019                                  "unterminated comment");
1020         }
1021       else
1022         {
1023           if (!CPP_OPTION (pfile, cplusplus_comments)
1024               && !CPP_IN_SYSTEM_HEADER (pfile))
1025             break;
1026
1027           /* Warn about comments only if pedantically GNUC89, and not
1028              in system headers.  */
1029           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1030               && ! buffer->warned_cplusplus_comments)
1031             {
1032               cpp_pedwarn (pfile,
1033                            "C++ style comments are not allowed in ISO C89");
1034               cpp_pedwarn (pfile,
1035                            "(this will be reported only once per input file)");
1036               buffer->warned_cplusplus_comments = 1;
1037             }
1038
1039           /* Skip_line_comment updates buffer->read_ahead.  */
1040           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1041             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1042                                    pfile->lexer_pos.col,
1043                                    "multi-line comment");
1044         }
1045
1046       /* Skipping the comment has updated buffer->read_ahead.  */
1047       if (!pfile->state.save_comments)
1048         {
1049           result->flags |= PREV_WHITE;
1050           goto next_char;
1051         }
1052
1053       /* Save the comment as a token in its own right.  */
1054       save_comment (pfile, result, comment_start);
1055       /* Don't do MI optimisation.  */
1056       return;
1057
1058     case '<':
1059       if (pfile->state.angled_headers)
1060         {
1061           result->type = CPP_HEADER_NAME;
1062           c = '>';              /* terminator.  */
1063           goto make_string;
1064         }
1065
1066       result->type = CPP_LESS;
1067       c = get_effective_char (buffer);
1068       if (c == '=')
1069         ACCEPT_CHAR (CPP_LESS_EQ);
1070       else if (c == '<')
1071         {
1072           ACCEPT_CHAR (CPP_LSHIFT);
1073           if (get_effective_char (buffer) == '=')
1074             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1075         }
1076       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1077         {
1078           ACCEPT_CHAR (CPP_MIN);
1079           if (get_effective_char (buffer) == '=')
1080             ACCEPT_CHAR (CPP_MIN_EQ);
1081         }
1082       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1083         {
1084           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1085           result->flags |= DIGRAPH;
1086         }
1087       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1088         {
1089           ACCEPT_CHAR (CPP_OPEN_BRACE);
1090           result->flags |= DIGRAPH;
1091         }
1092       break;
1093
1094     case '>':
1095       result->type = CPP_GREATER;
1096       c = get_effective_char (buffer);
1097       if (c == '=')
1098         ACCEPT_CHAR (CPP_GREATER_EQ);
1099       else if (c == '>')
1100         {
1101           ACCEPT_CHAR (CPP_RSHIFT);
1102           if (get_effective_char (buffer) == '=')
1103             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1104         }
1105       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1106         {
1107           ACCEPT_CHAR (CPP_MAX);
1108           if (get_effective_char (buffer) == '=')
1109             ACCEPT_CHAR (CPP_MAX_EQ);
1110         }
1111       break;
1112
1113     case '%':
1114       lex_percent (buffer, result);
1115       if (result->type == CPP_HASH)
1116         goto do_hash;
1117       break;
1118
1119     case '.':
1120       lex_dot (pfile, result);
1121       break;
1122
1123     case '+':
1124       result->type = CPP_PLUS;
1125       c = get_effective_char (buffer);
1126       if (c == '=')
1127         ACCEPT_CHAR (CPP_PLUS_EQ);
1128       else if (c == '+')
1129         ACCEPT_CHAR (CPP_PLUS_PLUS);
1130       break;
1131
1132     case '-':
1133       result->type = CPP_MINUS;
1134       c = get_effective_char (buffer);
1135       if (c == '>')
1136         {
1137           ACCEPT_CHAR (CPP_DEREF);
1138           if (CPP_OPTION (pfile, cplusplus)
1139               && get_effective_char (buffer) == '*')
1140             ACCEPT_CHAR (CPP_DEREF_STAR);
1141         }
1142       else if (c == '=')
1143         ACCEPT_CHAR (CPP_MINUS_EQ);
1144       else if (c == '-')
1145         ACCEPT_CHAR (CPP_MINUS_MINUS);
1146       break;
1147
1148     case '*':
1149       result->type = CPP_MULT;
1150       if (get_effective_char (buffer) == '=')
1151         ACCEPT_CHAR (CPP_MULT_EQ);
1152       break;
1153
1154     case '=':
1155       result->type = CPP_EQ;
1156       if (get_effective_char (buffer) == '=')
1157         ACCEPT_CHAR (CPP_EQ_EQ);
1158       break;
1159
1160     case '!':
1161       result->type = CPP_NOT;
1162       if (get_effective_char (buffer) == '=')
1163         ACCEPT_CHAR (CPP_NOT_EQ);
1164       break;
1165
1166     case '&':
1167       result->type = CPP_AND;
1168       c = get_effective_char (buffer);
1169       if (c == '=')
1170         ACCEPT_CHAR (CPP_AND_EQ);
1171       else if (c == '&')
1172         ACCEPT_CHAR (CPP_AND_AND);
1173       break;
1174
1175     case '#':
1176       c = buffer->extra_char;   /* Can be set by error condition below.  */
1177       if (c != EOF)
1178         {
1179           buffer->read_ahead = c;
1180           buffer->extra_char = EOF;
1181         }
1182       else
1183         c = get_effective_char (buffer);
1184
1185       if (c == '#')
1186         {
1187           ACCEPT_CHAR (CPP_PASTE);
1188           break;
1189         }
1190
1191       result->type = CPP_HASH;
1192     do_hash:
1193       if (!bol)
1194         break;
1195       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1196          tokens within the list of arguments that would otherwise act
1197          as preprocessing directives, the behavior is undefined.
1198
1199          This implementation will report a hard error, terminate the
1200          macro invocation, and proceed to process the directive.  */
1201       if (pfile->state.parsing_args)
1202         {
1203           if (pfile->state.parsing_args == 2)
1204             cpp_error (pfile,
1205                        "directives may not be used inside a macro argument");
1206
1207           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1208           buffer->extra_char = buffer->read_ahead;
1209           buffer->read_ahead = '#';
1210           pfile->state.next_bol = 1;
1211           result->type = CPP_EOF;
1212
1213           /* Get whitespace right - newline_in_args sets it.  */
1214           if (pfile->lexer_pos.col == 1)
1215             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1216         }
1217       else
1218         {
1219           /* This is the hash introducing a directive.  */
1220           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1221             goto done_directive; /* bol still 1.  */
1222           /* This is in fact an assembler #.  */
1223         }
1224       break;
1225
1226     case '|':
1227       result->type = CPP_OR;
1228       c = get_effective_char (buffer);
1229       if (c == '=')
1230         ACCEPT_CHAR (CPP_OR_EQ);
1231       else if (c == '|')
1232         ACCEPT_CHAR (CPP_OR_OR);
1233       break;
1234
1235     case '^':
1236       result->type = CPP_XOR;
1237       if (get_effective_char (buffer) == '=')
1238         ACCEPT_CHAR (CPP_XOR_EQ);
1239       break;
1240
1241     case ':':
1242       result->type = CPP_COLON;
1243       c = get_effective_char (buffer);
1244       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1245         ACCEPT_CHAR (CPP_SCOPE);
1246       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1247         {
1248           result->flags |= DIGRAPH;
1249           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1250         }
1251       break;
1252
1253     case '~': result->type = CPP_COMPL; break;
1254     case ',': result->type = CPP_COMMA; break;
1255     case '(': result->type = CPP_OPEN_PAREN; break;
1256     case ')': result->type = CPP_CLOSE_PAREN; break;
1257     case '[': result->type = CPP_OPEN_SQUARE; break;
1258     case ']': result->type = CPP_CLOSE_SQUARE; break;
1259     case '{': result->type = CPP_OPEN_BRACE; break;
1260     case '}': result->type = CPP_CLOSE_BRACE; break;
1261     case ';': result->type = CPP_SEMICOLON; break;
1262
1263       /* @ is a punctuator in Objective C.  */
1264     case '@': result->type = CPP_ATSIGN; break;
1265
1266     random_char:
1267     default:
1268       result->type = CPP_OTHER;
1269       result->val.c = c;
1270       break;
1271     }
1272
1273   if (!pfile->state.in_directive && pfile->state.skipping)
1274     goto skip;
1275
1276   /* If not in a directive, this token invalidates controlling macros.  */
1277   if (!pfile->state.in_directive)
1278     pfile->mi_valid = false;
1279 }
1280
1281 /* An upper bound on the number of bytes needed to spell a token,
1282    including preceding whitespace.  */
1283 unsigned int
1284 cpp_token_len (token)
1285      const cpp_token *token;
1286 {
1287   unsigned int len;
1288
1289   switch (TOKEN_SPELL (token))
1290     {
1291     default:            len = 0;                                break;
1292     case SPELL_STRING:  len = token->val.str.len;               break;
1293     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1294     }
1295   /* 1 for whitespace, 4 for comment delimeters.  */
1296   return len + 5;
1297 }
1298
1299 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1300    already contain the enough space to hold the token's spelling.
1301    Returns a pointer to the character after the last character
1302    written.  */
1303 unsigned char *
1304 cpp_spell_token (pfile, token, buffer)
1305      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1306      const cpp_token *token;
1307      unsigned char *buffer;
1308 {
1309   switch (TOKEN_SPELL (token))
1310     {
1311     case SPELL_OPERATOR:
1312       {
1313         const unsigned char *spelling;
1314         unsigned char c;
1315
1316         if (token->flags & DIGRAPH)
1317           spelling
1318             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319         else if (token->flags & NAMED_OP)
1320           goto spell_ident;
1321         else
1322           spelling = TOKEN_NAME (token);
1323
1324         while ((c = *spelling++) != '\0')
1325           *buffer++ = c;
1326       }
1327       break;
1328
1329     case SPELL_IDENT:
1330       spell_ident:
1331       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1332       buffer += NODE_LEN (token->val.node);
1333       break;
1334
1335     case SPELL_STRING:
1336       {
1337         int left, right, tag;
1338         switch (token->type)
1339           {
1340           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1341           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1342           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1343           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1344           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1345           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1346           }
1347         if (tag) *buffer++ = tag;
1348         if (left) *buffer++ = left;
1349         memcpy (buffer, token->val.str.text, token->val.str.len);
1350         buffer += token->val.str.len;
1351         if (right) *buffer++ = right;
1352       }
1353       break;
1354
1355     case SPELL_CHAR:
1356       *buffer++ = token->val.c;
1357       break;
1358
1359     case SPELL_NONE:
1360       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1361       break;
1362     }
1363
1364   return buffer;
1365 }
1366
1367 /* Returns a token as a null-terminated string.  The string is
1368    temporary, and automatically freed later.  Useful for diagnostics.  */
1369 unsigned char *
1370 cpp_token_as_text (pfile, token)
1371      cpp_reader *pfile;
1372      const cpp_token *token;
1373 {
1374   unsigned int len = cpp_token_len (token);
1375   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1376
1377   end = cpp_spell_token (pfile, token, start);
1378   end[0] = '\0';
1379
1380   return start;
1381 }
1382
1383 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1384 const char *
1385 cpp_type2name (type)
1386      enum cpp_ttype type;
1387 {
1388   return (const char *) token_spellings[type].name;
1389 }
1390
1391 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1392    for efficiency - to avoid double-buffering.  Also, outputs a space
1393    if PREV_WHITE is flagged.  */
1394 void
1395 cpp_output_token (token, fp)
1396      const cpp_token *token;
1397      FILE *fp;
1398 {
1399   if (token->flags & PREV_WHITE)
1400     putc (' ', fp);
1401
1402   switch (TOKEN_SPELL (token))
1403     {
1404     case SPELL_OPERATOR:
1405       {
1406         const unsigned char *spelling;
1407
1408         if (token->flags & DIGRAPH)
1409           spelling
1410             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1411         else if (token->flags & NAMED_OP)
1412           goto spell_ident;
1413         else
1414           spelling = TOKEN_NAME (token);
1415
1416         ufputs (spelling, fp);
1417       }
1418       break;
1419
1420     spell_ident:
1421     case SPELL_IDENT:
1422       ufputs (NODE_NAME (token->val.node), fp);
1423     break;
1424
1425     case SPELL_STRING:
1426       {
1427         int left, right, tag;
1428         switch (token->type)
1429           {
1430           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1431           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1432           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1433           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1434           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1435           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1436           }
1437         if (tag) putc (tag, fp);
1438         if (left) putc (left, fp);
1439         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1440         if (right) putc (right, fp);
1441       }
1442       break;
1443
1444     case SPELL_CHAR:
1445       putc (token->val.c, fp);
1446       break;
1447
1448     case SPELL_NONE:
1449       /* An error, most probably.  */
1450       break;
1451     }
1452 }
1453
1454 /* Compare two tokens.  */
1455 int
1456 _cpp_equiv_tokens (a, b)
1457      const cpp_token *a, *b;
1458 {
1459   if (a->type == b->type && a->flags == b->flags)
1460     switch (TOKEN_SPELL (a))
1461       {
1462       default:                  /* Keep compiler happy.  */
1463       case SPELL_OPERATOR:
1464         return 1;
1465       case SPELL_CHAR:
1466         return a->val.c == b->val.c; /* Character.  */
1467       case SPELL_NONE:
1468         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1469       case SPELL_IDENT:
1470         return a->val.node == b->val.node;
1471       case SPELL_STRING:
1472         return (a->val.str.len == b->val.str.len
1473                 && !memcmp (a->val.str.text, b->val.str.text,
1474                             a->val.str.len));
1475       }
1476
1477   return 0;
1478 }
1479
1480 /* Determine whether two tokens can be pasted together, and if so,
1481    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1482    be pasted, or the appropriate type for the merged token if they
1483    can.  */
1484 enum cpp_ttype
1485 cpp_can_paste (pfile, token1, token2, digraph)
1486      cpp_reader * pfile;
1487      const cpp_token *token1, *token2;
1488      int* digraph;
1489 {
1490   enum cpp_ttype a = token1->type, b = token2->type;
1491   int cxx = CPP_OPTION (pfile, cplusplus);
1492
1493   /* Treat named operators as if they were ordinary NAMEs.  */
1494   if (token1->flags & NAMED_OP)
1495     a = CPP_NAME;
1496   if (token2->flags & NAMED_OP)
1497     b = CPP_NAME;
1498
1499   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1500     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1501
1502   switch (a)
1503     {
1504     case CPP_GREATER:
1505       if (b == a) return CPP_RSHIFT;
1506       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1507       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1508       break;
1509     case CPP_LESS:
1510       if (b == a) return CPP_LSHIFT;
1511       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1512       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1513       if (CPP_OPTION (pfile, digraphs))
1514         {
1515           if (b == CPP_COLON)
1516             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1517           if (b == CPP_MOD)
1518             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1519         }
1520       break;
1521
1522     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1523     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1524     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1525
1526     case CPP_MINUS:
1527       if (b == a)               return CPP_MINUS_MINUS;
1528       if (b == CPP_GREATER)     return CPP_DEREF;
1529       break;
1530     case CPP_COLON:
1531       if (b == a && cxx)        return CPP_SCOPE;
1532       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1533         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1534       break;
1535
1536     case CPP_MOD:
1537       if (CPP_OPTION (pfile, digraphs))
1538         {
1539           if (b == CPP_GREATER)
1540             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1541           if (b == CPP_COLON)
1542             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1543         }
1544       break;
1545     case CPP_DEREF:
1546       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1547       break;
1548     case CPP_DOT:
1549       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1550       if (b == CPP_NUMBER)      return CPP_NUMBER;
1551       break;
1552
1553     case CPP_HASH:
1554       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1555         /* %:%: digraph */
1556         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1557       break;
1558
1559     case CPP_NAME:
1560       if (b == CPP_NAME)        return CPP_NAME;
1561       if (b == CPP_NUMBER
1562           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1563       if (b == CPP_CHAR
1564           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1565       if (b == CPP_STRING
1566           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1567       break;
1568
1569     case CPP_NUMBER:
1570       if (b == CPP_NUMBER)      return CPP_NUMBER;
1571       if (b == CPP_NAME)        return CPP_NUMBER;
1572       if (b == CPP_DOT)         return CPP_NUMBER;
1573       /* Numbers cannot have length zero, so this is safe.  */
1574       if ((b == CPP_PLUS || b == CPP_MINUS)
1575           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1576         return CPP_NUMBER;
1577       break;
1578
1579     default:
1580       break;
1581     }
1582
1583   return CPP_EOF;
1584 }
1585
1586 /* Returns nonzero if a space should be inserted to avoid an
1587    accidental token paste for output.  For simplicity, it is
1588    conservative, and occasionally advises a space where one is not
1589    needed, e.g. "." and ".2".  */
1590
1591 int
1592 cpp_avoid_paste (pfile, token1, token2)
1593      cpp_reader *pfile;
1594      const cpp_token *token1, *token2;
1595 {
1596   enum cpp_ttype a = token1->type, b = token2->type;
1597   cppchar_t c;
1598
1599   if (token1->flags & NAMED_OP)
1600     a = CPP_NAME;
1601   if (token2->flags & NAMED_OP)
1602     b = CPP_NAME;
1603
1604   c = EOF;
1605   if (token2->flags & DIGRAPH)
1606     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1607   else if (token_spellings[b].category == SPELL_OPERATOR)
1608     c = token_spellings[b].name[0];
1609
1610   /* Quickly get everything that can paste with an '='.  */
1611   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1612     return 1;
1613
1614   switch (a)
1615     {
1616     case CPP_GREATER:   return c == '>' || c == '?';
1617     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1618     case CPP_PLUS:      return c == '+';
1619     case CPP_MINUS:     return c == '-' || c == '>';
1620     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1621     case CPP_MOD:       return c == ':' || c == '>';
1622     case CPP_AND:       return c == '&';
1623     case CPP_OR:        return c == '|';
1624     case CPP_COLON:     return c == ':' || c == '>';
1625     case CPP_DEREF:     return c == '*';
1626     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1627     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1628     case CPP_NAME:      return ((b == CPP_NUMBER
1629                                  && name_p (pfile, &token2->val.str))
1630                                 || b == CPP_NAME
1631                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1632     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1633                                 || c == '.' || c == '+' || c == '-');
1634     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1635                                 && token1->val.c == '@'
1636                                 && (b == CPP_NAME || b == CPP_STRING));
1637     default:            break;
1638     }
1639
1640   return 0;
1641 }
1642
1643 /* Output all the remaining tokens on the current line, and a newline
1644    character, to FP.  Leading whitespace is removed.  */
1645 void
1646 cpp_output_line (pfile, fp)
1647      cpp_reader *pfile;
1648      FILE *fp;
1649 {
1650   cpp_token token;
1651
1652   cpp_get_token (pfile, &token);
1653   token.flags &= ~PREV_WHITE;
1654   while (token.type != CPP_EOF)
1655     {
1656       cpp_output_token (&token, fp);
1657       cpp_get_token (pfile, &token);
1658     }
1659
1660   putc ('\n', fp);
1661 }
1662
1663 /* Returns the value of a hexadecimal digit.  */
1664 static unsigned int
1665 hex_digit_value (c)
1666      unsigned int c;
1667 {
1668   if (c >= 'a' && c <= 'f')
1669     return c - 'a' + 10;
1670   if (c >= 'A' && c <= 'F')
1671     return c - 'A' + 10;
1672   if (c >= '0' && c <= '9')
1673     return c - '0';
1674   abort ();
1675 }
1676
1677 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1678    failure if cpplib is not parsing C++ or C99.  Such failure is
1679    silent, and no variables are updated.  Otherwise returns 0, and
1680    warns if -Wtraditional.
1681
1682    [lex.charset]: The character designated by the universal character
1683    name \UNNNNNNNN is that character whose character short name in
1684    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1685    universal character name \uNNNN is that character whose character
1686    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1687    for a universal character name is less than 0x20 or in the range
1688    0x7F-0x9F (inclusive), or if the universal character name
1689    designates a character in the basic source character set, then the
1690    program is ill-formed.
1691
1692    We assume that wchar_t is Unicode, so we don't need to do any
1693    mapping.  Is this ever wrong?
1694
1695    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1696    LIMIT is the end of the string or charconst.  PSTR is updated to
1697    point after the UCS on return, and the UCS is written into PC.  */
1698
1699 static int
1700 maybe_read_ucs (pfile, pstr, limit, pc)
1701      cpp_reader *pfile;
1702      const unsigned char **pstr;
1703      const unsigned char *limit;
1704      unsigned int *pc;
1705 {
1706   const unsigned char *p = *pstr;
1707   unsigned int code = 0;
1708   unsigned int c = *pc, length;
1709
1710   /* Only attempt to interpret a UCS for C++ and C99.  */
1711   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1712     return 1;
1713
1714   if (CPP_WTRADITIONAL (pfile))
1715     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1716
1717   length = (c == 'u' ? 4: 8);
1718
1719   if ((size_t) (limit - p) < length)
1720     {
1721       cpp_error (pfile, "incomplete universal-character-name");
1722       /* Skip to the end to avoid more diagnostics.  */
1723       p = limit;
1724     }
1725   else
1726     {
1727       for (; length; length--, p++)
1728         {
1729           c = *p;
1730           if (ISXDIGIT (c))
1731             code = (code << 4) + hex_digit_value (c);
1732           else
1733             {
1734               cpp_error (pfile,
1735                          "non-hex digit '%c' in universal-character-name", c);
1736               /* We shouldn't skip in case there are multibyte chars.  */
1737               break;
1738             }
1739         }
1740     }
1741
1742 #ifdef TARGET_EBCDIC
1743   cpp_error (pfile, "universal-character-name on EBCDIC target");
1744   code = 0x3f;  /* EBCDIC invalid character */
1745 #else
1746  /* True extended characters are OK.  */
1747   if (code >= 0xa0
1748       && !(code & 0x80000000)
1749       && !(code >= 0xD800 && code <= 0xDFFF))
1750     ;
1751   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1752      hex escapes so that this also works with EBCDIC hosts.  */
1753   else if (code == 0x24 || code == 0x40 || code == 0x60)
1754     ;
1755   /* Don't give another error if one occurred above.  */
1756   else if (length == 0)
1757     cpp_error (pfile, "universal-character-name out of range");
1758 #endif
1759
1760   *pstr = p;
1761   *pc = code;
1762   return 0;
1763 }
1764
1765 /* Interpret an escape sequence, and return its value.  PSTR points to
1766    the input pointer, which is just after the backslash.  LIMIT is how
1767    much text we have.  MASK is a bitmask for the precision for the
1768    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1769    interpret escapes that did not exist in traditional C.
1770
1771    Handles all relevant diagnostics.  */
1772
1773 unsigned int
1774 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1775      cpp_reader *pfile;
1776      const unsigned char **pstr;
1777      const unsigned char *limit;
1778      unsigned HOST_WIDE_INT mask;
1779      int traditional;
1780 {
1781   int unknown = 0;
1782   const unsigned char *str = *pstr;
1783   unsigned int c = *str++;
1784
1785   switch (c)
1786     {
1787     case '\\': case '\'': case '"': case '?': break;
1788     case 'b': c = TARGET_BS;      break;
1789     case 'f': c = TARGET_FF;      break;
1790     case 'n': c = TARGET_NEWLINE; break;
1791     case 'r': c = TARGET_CR;      break;
1792     case 't': c = TARGET_TAB;     break;
1793     case 'v': c = TARGET_VT;      break;
1794
1795     case '(': case '{': case '[': case '%':
1796       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1797          '\%' is used to prevent SCCS from getting confused.  */
1798       unknown = CPP_PEDANTIC (pfile);
1799       break;
1800
1801     case 'a':
1802       if (CPP_WTRADITIONAL (pfile))
1803         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1804       if (!traditional)
1805         c = TARGET_BELL;
1806       break;
1807
1808     case 'e': case 'E':
1809       if (CPP_PEDANTIC (pfile))
1810         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1811       c = TARGET_ESC;
1812       break;
1813
1814     case 'u': case 'U':
1815       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1816       break;
1817
1818     case 'x':
1819       if (CPP_WTRADITIONAL (pfile))
1820         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1821
1822       if (!traditional)
1823         {
1824           unsigned int i = 0, overflow = 0;
1825           int digits_found = 0;
1826
1827           while (str < limit)
1828             {
1829               c = *str;
1830               if (! ISXDIGIT (c))
1831                 break;
1832               str++;
1833               overflow |= i ^ (i << 4 >> 4);
1834               i = (i << 4) + hex_digit_value (c);
1835               digits_found = 1;
1836             }
1837
1838           if (!digits_found)
1839             cpp_error (pfile, "\\x used with no following hex digits");
1840
1841           if (overflow | (i != (i & mask)))
1842             {
1843               cpp_pedwarn (pfile, "hex escape sequence out of range");
1844               i &= mask;
1845             }
1846           c = i;
1847         }
1848       break;
1849
1850     case '0':  case '1':  case '2':  case '3':
1851     case '4':  case '5':  case '6':  case '7':
1852       {
1853         unsigned int i = c - '0';
1854         int count = 0;
1855
1856         while (str < limit && ++count < 3)
1857           {
1858             c = *str;
1859             if (c < '0' || c > '7')
1860               break;
1861             str++;
1862             i = (i << 3) + c - '0';
1863           }
1864
1865         if (i != (i & mask))
1866           {
1867             cpp_pedwarn (pfile, "octal escape sequence out of range");
1868             i &= mask;
1869           }
1870         c = i;
1871       }
1872       break;
1873
1874     default:
1875       unknown = 1;
1876       break;
1877     }
1878
1879   if (unknown)
1880     {
1881       if (ISGRAPH (c))
1882         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1883       else
1884         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1885     }
1886
1887   if (c > mask)
1888     cpp_pedwarn (pfile, "escape sequence out of range for character");
1889
1890   *pstr = str;
1891   return c;
1892 }
1893
1894 #ifndef MAX_CHAR_TYPE_SIZE
1895 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1896 #endif
1897
1898 #ifndef MAX_WCHAR_TYPE_SIZE
1899 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1900 #endif
1901
1902 /* Interpret a (possibly wide) character constant in TOKEN.
1903    WARN_MULTI warns about multi-character charconsts, if not
1904    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1905    that did not exist in traditional C.  PCHARS_SEEN points to a
1906    variable that is filled in with the number of characters seen.  */
1907 HOST_WIDE_INT
1908 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1909      cpp_reader *pfile;
1910      const cpp_token *token;
1911      int warn_multi;
1912      int traditional;
1913      unsigned int *pchars_seen;
1914 {
1915   const unsigned char *str = token->val.str.text;
1916   const unsigned char *limit = str + token->val.str.len;
1917   unsigned int chars_seen = 0;
1918   unsigned int width, max_chars, c;
1919   unsigned HOST_WIDE_INT mask;
1920   HOST_WIDE_INT result = 0;
1921
1922 #ifdef MULTIBYTE_CHARS
1923   (void) local_mbtowc (NULL, NULL, 0);
1924 #endif
1925
1926   /* Width in bits.  */
1927   if (token->type == CPP_CHAR)
1928     width = MAX_CHAR_TYPE_SIZE;
1929   else
1930     width = MAX_WCHAR_TYPE_SIZE;
1931
1932   if (width < HOST_BITS_PER_WIDE_INT)
1933     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1934   else
1935     mask = ~0;
1936   max_chars = HOST_BITS_PER_WIDE_INT / width;
1937
1938   while (str < limit)
1939     {
1940 #ifdef MULTIBYTE_CHARS
1941       wchar_t wc;
1942       int char_len;
1943
1944       char_len = local_mbtowc (&wc, str, limit - str);
1945       if (char_len == -1)
1946         {
1947           cpp_warning (pfile, "ignoring invalid multibyte character");
1948           c = *str++;
1949         }
1950       else
1951         {
1952           str += char_len;
1953           c = wc;
1954         }
1955 #else
1956       c = *str++;
1957 #endif
1958
1959       if (c == '\\')
1960         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1961
1962 #ifdef MAP_CHARACTER
1963       if (ISPRINT (c))
1964         c = MAP_CHARACTER (c);
1965 #endif
1966
1967       /* Merge character into result; ignore excess chars.  */
1968       if (++chars_seen <= max_chars)
1969         {
1970           if (width < HOST_BITS_PER_WIDE_INT)
1971             result = (result << width) | (c & mask);
1972           else
1973             result = c;
1974         }
1975     }
1976
1977   if (chars_seen == 0)
1978     cpp_error (pfile, "empty character constant");
1979   else if (chars_seen > max_chars)
1980     {
1981       chars_seen = max_chars;
1982       cpp_warning (pfile, "character constant too long");
1983     }
1984   else if (chars_seen > 1 && !traditional && warn_multi)
1985     cpp_warning (pfile, "multi-character character constant");
1986
1987   /* If char type is signed, sign-extend the constant.  The
1988      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1989   if (token->type == CPP_CHAR && chars_seen)
1990     {
1991       unsigned int nbits = chars_seen * width;
1992       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1993
1994       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1995           || ((result >> (nbits - 1)) & 1) == 0)
1996         result &= mask;
1997       else
1998         result |= ~mask;
1999     }
2000
2001   *pchars_seen = chars_seen;
2002   return result;
2003 }
2004
2005 /* Memory pools.  */
2006
2007 struct dummy
2008 {
2009   char c;
2010   union
2011   {
2012     double d;
2013     int *p;
2014   } u;
2015 };
2016
2017 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2018
2019 static int
2020 chunk_suitable (pool, chunk, size)
2021      cpp_pool *pool;
2022      cpp_chunk *chunk;
2023      unsigned int size;
2024 {
2025   /* Being at least twice SIZE means we can use memcpy in
2026      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2027      anyway.  */
2028   return (chunk && pool->locked != chunk
2029           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2030 }
2031
2032 /* Returns the end of the new pool.  PTR points to a char in the old
2033    pool, and is updated to point to the same char in the new pool.  */
2034 unsigned char *
2035 _cpp_next_chunk (pool, len, ptr)
2036      cpp_pool *pool;
2037      unsigned int len;
2038      unsigned char **ptr;
2039 {
2040   cpp_chunk *chunk = pool->cur->next;
2041
2042   /* LEN is the minimum size we want in the new pool.  */
2043   len += POOL_ROOM (pool);
2044   if (! chunk_suitable (pool, chunk, len))
2045     {
2046       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2047
2048       chunk->next = pool->cur->next;
2049       pool->cur->next = chunk;
2050     }
2051
2052   /* Update the pointer before changing chunk's front.  */
2053   if (ptr)
2054     *ptr += chunk->base - POOL_FRONT (pool);
2055
2056   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2057   chunk->front = chunk->base;
2058
2059   pool->cur = chunk;
2060   return POOL_LIMIT (pool);
2061 }
2062
2063 static cpp_chunk *
2064 new_chunk (size)
2065      unsigned int size;
2066 {
2067   unsigned char *base;
2068   cpp_chunk *result;
2069
2070   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2071   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2072   /* Put the chunk descriptor at the end.  Then chunk overruns will
2073      cause obvious chaos.  */
2074   result = (cpp_chunk *) (base + size);
2075   result->base = base;
2076   result->front = base;
2077   result->limit = base + size;
2078   result->next = 0;
2079
2080   return result;
2081 }
2082
2083 void
2084 _cpp_init_pool (pool, size, align, temp)
2085      cpp_pool *pool;
2086      unsigned int size, align, temp;
2087 {
2088   if (align == 0)
2089     align = DEFAULT_ALIGNMENT;
2090   if (align & (align - 1))
2091     abort ();
2092   pool->align = align;
2093   pool->cur = new_chunk (size);
2094   pool->locked = 0;
2095   pool->locks = 0;
2096   if (temp)
2097     pool->cur->next = pool->cur;
2098 }
2099
2100 void
2101 _cpp_lock_pool (pool)
2102      cpp_pool *pool;
2103 {
2104   if (pool->locks++ == 0)
2105     pool->locked = pool->cur;
2106 }
2107
2108 void
2109 _cpp_unlock_pool (pool)
2110      cpp_pool *pool;
2111 {
2112   if (--pool->locks == 0)
2113     pool->locked = 0;
2114 }
2115
2116 void
2117 _cpp_free_pool (pool)
2118      cpp_pool *pool;
2119 {
2120   cpp_chunk *chunk = pool->cur, *next;
2121
2122   do
2123     {
2124       next = chunk->next;
2125       free (chunk->base);
2126       chunk = next;
2127     }
2128   while (chunk && chunk != pool->cur);
2129 }
2130
2131 /* Reserve LEN bytes from a memory pool.  */
2132 unsigned char *
2133 _cpp_pool_reserve (pool, len)
2134      cpp_pool *pool;
2135      unsigned int len;
2136 {
2137   len = POOL_ALIGN (len, pool->align);
2138   if (len > (unsigned int) POOL_ROOM (pool))
2139     _cpp_next_chunk (pool, len, 0);
2140
2141   return POOL_FRONT (pool);
2142 }
2143
2144 /* Allocate LEN bytes from a memory pool.  */
2145 unsigned char *
2146 _cpp_pool_alloc (pool, len)
2147      cpp_pool *pool;
2148      unsigned int len;
2149 {
2150   unsigned char *result = _cpp_pool_reserve (pool, len);
2151
2152   POOL_COMMIT (pool, len);
2153   return result;
2154 }