gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
  92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  95 static void unterminated PARAMS ((cpp_reader *, int));
  96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
  99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 102                                    const unsigned char *, unsigned int *));
 103
 104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
 106 static unsigned int hex_digit_value PARAMS ((unsigned int));
 107
 108 /* Utility routine:
 109
 110    Compares, the token TOKEN to the NUL-terminated string STRING.
 111    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 112
 113 int
 114 cpp_ideq (token, string)
 115      const cpp_token *token;
 116      const char *string;
 117 {
 118   if (token->type != CPP_NAME)
 119     return 0;
 120
 121   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 122 }
 123
 124 /* Call when meeting a newline.  Returns the character after the newline
 125    (or carriage-return newline combination), or EOF.  */
 126 static cppchar_t
 127 handle_newline (buffer, newline_char)
 128      cpp_buffer *buffer;
 129      cppchar_t newline_char;
 130 {
 131   cppchar_t next = EOF;
 132
 133   buffer->col_adjust = 0;
 134   buffer->lineno++;
 135   buffer->line_base = buffer->cur;
 136
 137   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 138   if (buffer->cur < buffer->rlimit)
 139     {
 140       next = *buffer->cur++;
 141       if (next + newline_char == '\r' + '\n')
 142         {
 143           buffer->line_base = buffer->cur;
 144           if (buffer->cur < buffer->rlimit)
 145             next = *buffer->cur++;
 146           else
 147             next = EOF;
 148         }
 149     }
 150
 151   buffer->read_ahead = next;
 152   return next;
 153 }
 154
 155 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 156    encountered.  It warns if necessary, and returns true if the
 157    trigraph should be honoured.  FROM_CHAR is the third character of a
 158    trigraph, and presumed to be the previous character for position
 159    reporting.  */
 160 static int
 161 trigraph_ok (pfile, from_char)
 162      cpp_reader *pfile;
 163      cppchar_t from_char;
 164 {
 165   int accept = CPP_OPTION (pfile, trigraphs);
 166
 167   /* Don't warn about trigraphs in comments.  */
 168   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 169     {
 170       cpp_buffer *buffer = pfile->buffer;
 171       if (accept)
 172         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 173                                "trigraph ??%c converted to %c",
 174                                (int) from_char,
 175                                (int) _cpp_trigraph_map[from_char]);
 176       else if (buffer->cur != buffer->last_Wtrigraphs)
 177         {
 178           buffer->last_Wtrigraphs = buffer->cur;
 179           cpp_warning_with_line (pfile, buffer->lineno,
 180                                  CPP_BUF_COL (buffer) - 2,
 181                                  "trigraph ??%c ignored", (int) from_char);
 182         }
 183     }
 184
 185   return accept;
 186 }
 187
 188 /* Assumes local variables buffer and result.  */
 189 #define ACCEPT_CHAR(t) \
 190   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 191
 192 /* When we move to multibyte character sets, add to these something
 193    that saves and restores the state of the multibyte conversion
 194    library.  This probably involves saving and restoring a "cookie".
 195    In the case of glibc it is an 8-byte structure, so is not a high
 196    overhead operation.  In any case, it's out of the fast path.  */
 197 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 198 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 199
 200 /* Skips any escaped newlines introduced by NEXT, which is either a
 201    '?' or a '\\'.  Returns the next character, which will also have
 202    been placed in buffer->read_ahead.  This routine performs
 203    preprocessing stages 1 and 2 of the ISO C standard.  */
 204 static cppchar_t
 205 skip_escaped_newlines (buffer, next)
 206      cpp_buffer *buffer;
 207      cppchar_t next;
 208 {
 209   /* Only do this if we apply stages 1 and 2.  */
 210   if (!buffer->from_stage3)
 211     {
 212       cppchar_t next1;
 213       const unsigned char *saved_cur;
 214       int space;
 215
 216       do
 217         {
 218           if (buffer->cur == buffer->rlimit)
 219             break;
 220
 221           SAVE_STATE ();
 222           if (next == '?')
 223             {
 224               next1 = *buffer->cur++;
 225               if (next1 != '?' || buffer->cur == buffer->rlimit)
 226                 {
 227                   RESTORE_STATE ();
 228                   break;
 229                 }
 230
 231               next1 = *buffer->cur++;
 232               if (!_cpp_trigraph_map[next1]
 233                   || !trigraph_ok (buffer->pfile, next1))
 234                 {
 235                   RESTORE_STATE ();
 236                   break;
 237                 }
 238
 239               /* We have a full trigraph here.  */
 240               next = _cpp_trigraph_map[next1];
 241               if (next != '\\' || buffer->cur == buffer->rlimit)
 242                 break;
 243               SAVE_STATE ();
 244             }
 245
 246           /* We have a backslash, and room for at least one more character.  */
 247           space = 0;
 248           do
 249             {
 250               next1 = *buffer->cur++;
 251               if (!is_nvspace (next1))
 252                 break;
 253               space = 1;
 254             }
 255           while (buffer->cur < buffer->rlimit);
 256
 257           if (!is_vspace (next1))
 258             {
 259               RESTORE_STATE ();
 260               break;
 261             }
 262
 263           if (space && !buffer->pfile->state.lexing_comment)
 264             cpp_warning (buffer->pfile,
 265                          "backslash and newline separated by space");
 266
 267           next = handle_newline (buffer, next1);
 268           if (next == EOF)
 269             cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 270         }
 271       while (next == '\\' || next == '?');
 272     }
 273
 274   buffer->read_ahead = next;
 275   return next;
 276 }
 277
 278 /* Obtain the next character, after trigraph conversion and skipping
 279    an arbitrary string of escaped newlines.  The common case of no
 280    trigraphs or escaped newlines falls through quickly.  */
 281 static cppchar_t
 282 get_effective_char (buffer)
 283      cpp_buffer *buffer;
 284 {
 285   cppchar_t next = EOF;
 286
 287   if (buffer->cur < buffer->rlimit)
 288     {
 289       next = *buffer->cur++;
 290
 291       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 292          can introduce escaped newlines, which we want to skip, or
 293          UCNs, which, depending upon lexer state, we will handle in
 294          the future.  */
 295       if (next == '?' || next == '\\')
 296         next = skip_escaped_newlines (buffer, next);
 297     }
 298
 299   buffer->read_ahead = next;
 300   return next;
 301 }
 302
 303 /* Skip a C-style block comment.  We find the end of the comment by
 304    seeing if an asterisk is before every '/' we encounter.  Returns
 305    non-zero if comment terminated by EOF, zero otherwise.  */
 306 static int
 307 skip_block_comment (pfile)
 308      cpp_reader *pfile;
 309 {
 310   cpp_buffer *buffer = pfile->buffer;
 311   cppchar_t c = EOF, prevc = EOF;
 312
 313   pfile->state.lexing_comment = 1;
 314   while (buffer->cur != buffer->rlimit)
 315     {
 316       prevc = c, c = *buffer->cur++;
 317
 318     next_char:
 319       /* FIXME: For speed, create a new character class of characters
 320          of interest inside block comments.  */
 321       if (c == '?' || c == '\\')
 322         c = skip_escaped_newlines (buffer, c);
 323
 324       /* People like decorating comments with '*', so check for '/'
 325          instead for efficiency.  */
 326       if (c == '/')
 327         {
 328           if (prevc == '*')
 329             break;
 330
 331           /* Warn about potential nested comments, but not if the '/'
 332              comes immediately before the true comment delimeter.
 333              Don't bother to get it right across escaped newlines.  */
 334           if (CPP_OPTION (pfile, warn_comments)
 335               && buffer->cur != buffer->rlimit)
 336             {
 337               prevc = c, c = *buffer->cur++;
 338               if (c == '*' && buffer->cur != buffer->rlimit)
 339                 {
 340                   prevc = c, c = *buffer->cur++;
 341                   if (c != '/')
 342                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 343                                            CPP_BUF_COL (buffer),
 344                                            "\"/*\" within comment");
 345                 }
 346               goto next_char;
 347             }
 348         }
 349       else if (is_vspace (c))
 350         {
 351           prevc = c, c = handle_newline (buffer, c);
 352           goto next_char;
 353         }
 354       else if (c == '\t')
 355         adjust_column (pfile);
 356     }
 357
 358   pfile->state.lexing_comment = 0;
 359   buffer->read_ahead = EOF;
 360   return c != '/' || prevc != '*';
 361 }
 362
 363 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 364    non-zero if a multiline comment.  The following new line, if any,
 365    is left in buffer->read_ahead.  */
 366 static int
 367 skip_line_comment (pfile)
 368      cpp_reader *pfile;
 369 {
 370   cpp_buffer *buffer = pfile->buffer;
 371   unsigned int orig_lineno = buffer->lineno;
 372   cppchar_t c;
 373
 374   pfile->state.lexing_comment = 1;
 375   do
 376     {
 377       c = EOF;
 378       if (buffer->cur == buffer->rlimit)
 379         break;
 380
 381       c = *buffer->cur++;
 382       if (c == '?' || c == '\\')
 383         c = skip_escaped_newlines (buffer, c);
 384     }
 385   while (!is_vspace (c));
 386
 387   pfile->state.lexing_comment = 0;
 388   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 389   return orig_lineno != buffer->lineno;
 390 }
 391
 392 /* pfile->buffer->cur is one beyond the \t character.  Update
 393    col_adjust so we track the column correctly.  */
 394 static void
 395 adjust_column (pfile)
 396      cpp_reader *pfile;
 397 {
 398   cpp_buffer *buffer = pfile->buffer;
 399   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 400
 401   /* Round it up to multiple of the tabstop, but subtract 1 since the
 402      tab itself occupies a character position.  */
 403   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 404                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 405 }
 406
 407 /* Skips whitespace, saving the next non-whitespace character.
 408    Adjusts pfile->col_adjust to account for tabs.  Without this,
 409    tokens might be assigned an incorrect column.  */
 410 static void
 411 skip_whitespace (pfile, c)
 412      cpp_reader *pfile;
 413      cppchar_t c;
 414 {
 415   cpp_buffer *buffer = pfile->buffer;
 416   unsigned int warned = 0;
 417
 418   do
 419     {
 420       /* Horizontal space always OK.  */
 421       if (c == ' ')
 422         ;
 423       else if (c == '\t')
 424         adjust_column (pfile);
 425       /* Just \f \v or \0 left.  */
 426       else if (c == '\0')
 427         {
 428           if (!warned)
 429             {
 430               cpp_warning (pfile, "null character(s) ignored");
 431               warned = 1;
 432             }
 433         }
 434       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 435         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 436                                CPP_BUF_COL (buffer),
 437                                "%s in preprocessing directive",
 438                                c == '\f' ? "form feed" : "vertical tab");
 439
 440       c = EOF;
 441       if (buffer->cur == buffer->rlimit)
 442         break;
 443       c = *buffer->cur++;
 444     }
 445   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 446   while (is_nvspace (c));
 447
 448   /* Remember the next character.  */
 449   buffer->read_ahead = c;
 450 }
 451
 452 /* See if the characters of a number token are valid in a name (no
 453    '.', '+' or '-').  */
 454 static int
 455 name_p (pfile, string)
 456      cpp_reader *pfile;
 457      const cpp_string *string;
 458 {
 459   unsigned int i;
 460
 461   for (i = 0; i < string->len; i++)
 462     if (!is_idchar (string->text[i]))
 463       return 0;
 464
 465   return 1;
 466 }
 467
 468 /* Parse an identifier, skipping embedded backslash-newlines.
 469    Calculate the hash value of the token while parsing, for improved
 470    performance.  The hashing algorithm *must* match cpp_lookup().  */
 471
 472 static cpp_hashnode *
 473 parse_identifier (pfile, c)
 474      cpp_reader *pfile;
 475      cppchar_t c;
 476 {
 477   cpp_hashnode *result;
 478   cpp_buffer *buffer = pfile->buffer;
 479   unsigned int saw_dollar = 0, len;
 480   struct obstack *stack = &pfile->hash_table->stack;
 481
 482   do
 483     {
 484       do
 485         {
 486           obstack_1grow (stack, c);
 487
 488           if (c == '$')
 489             saw_dollar++;
 490
 491           c = EOF;
 492           if (buffer->cur == buffer->rlimit)
 493             break;
 494
 495           c = *buffer->cur++;
 496         }
 497       while (is_idchar (c));
 498
 499       /* Potential escaped newline?  */
 500       if (c != '?' && c != '\\')
 501         break;
 502       c = skip_escaped_newlines (buffer, c);
 503     }
 504   while (is_idchar (c));
 505
 506   /* Remember the next character.  */
 507   buffer->read_ahead = c;
 508
 509   /* $ is not a identifier character in the standard, but is commonly
 510      accepted as an extension.  Don't warn about it in skipped
 511      conditional blocks.  */
 512   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 513     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 514
 515   /* Identifiers are null-terminated.  */
 516   len = obstack_object_size (stack);
 517   obstack_1grow (stack, '\0');
 518
 519   /* This routine commits the memory if necessary.  */
 520   result = (cpp_hashnode *)
 521     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 522
 523   /* Some identifiers require diagnostics when lexed.  */
 524   if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
 525     {
 526       /* It is allowed to poison the same identifier twice.  */
 527       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 528         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 529                    NODE_NAME (result));
 530
 531       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 532          replacement list of a variadic macro.  */
 533       if (result == pfile->spec_nodes.n__VA_ARGS__
 534           && !pfile->state.va_args_ok)
 535         cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 536     }
 537
 538   return result;
 539 }
 540
 541 /* Parse a number, skipping embedded backslash-newlines.  */
 542 static void
 543 parse_number (pfile, number, c, leading_period)
 544      cpp_reader *pfile;
 545      cpp_string *number;
 546      cppchar_t c;
 547      int leading_period;
 548 {
 549   cpp_buffer *buffer = pfile->buffer;
 550   cpp_pool *pool = &pfile->ident_pool;
 551   unsigned char *dest, *limit;
 552
 553   dest = POOL_FRONT (pool);
 554   limit = POOL_LIMIT (pool);
 555
 556   /* Place a leading period.  */
 557   if (leading_period)
 558     {
 559       if (dest >= limit)
 560         limit = _cpp_next_chunk (pool, 0, &dest);
 561       *dest++ = '.';
 562     }
 563
 564   do
 565     {
 566       do
 567         {
 568           /* Need room for terminating null.  */
 569           if (dest + 1 >= limit)
 570             limit = _cpp_next_chunk (pool, 0, &dest);
 571           *dest++ = c;
 572
 573           c = EOF;
 574           if (buffer->cur == buffer->rlimit)
 575             break;
 576
 577           c = *buffer->cur++;
 578         }
 579       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 580
 581       /* Potential escaped newline?  */
 582       if (c != '?' && c != '\\')
 583         break;
 584       c = skip_escaped_newlines (buffer, c);
 585     }
 586   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 587
 588   /* Remember the next character.  */
 589   buffer->read_ahead = c;
 590
 591   /* Null-terminate the number.  */
 592   *dest = '\0';
 593
 594   number->text = POOL_FRONT (pool);
 595   number->len = dest - number->text;
 596   POOL_COMMIT (pool, number->len + 1);
 597 }
 598
 599 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 600 static void
 601 unterminated (pfile, term)
 602      cpp_reader *pfile;
 603      int term;
 604 {
 605   cpp_error (pfile, "missing terminating %c character", term);
 606
 607   if (term == '\"' && pfile->mlstring_pos.line
 608       && pfile->mlstring_pos.line != pfile->lexer_pos.line)
 609     {
 610       cpp_error_with_line (pfile, pfile->mlstring_pos.line,
 611                            pfile->mlstring_pos.col,
 612                            "possible start of unterminated string literal");
 613       pfile->mlstring_pos.line = 0;
 614     }
 615 }
 616
 617 /* Subroutine of parse_string.  */
 618 static int
 619 unescaped_terminator_p (pfile, dest)
 620      cpp_reader *pfile;
 621      const unsigned char *dest;
 622 {
 623   const unsigned char *start, *temp;
 624
 625   /* In #include-style directives, terminators are not escapeable.  */
 626   if (pfile->state.angled_headers)
 627     return 1;
 628
 629   start = POOL_FRONT (&pfile->ident_pool);
 630
 631   /* An odd number of consecutive backslashes represents an escaped
 632      terminator.  */
 633   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 634     ;
 635
 636   return ((dest - temp) & 1) == 0;
 637 }
 638
 639 /* Parses a string, character constant, or angle-bracketed header file
 640    name.  Handles embedded trigraphs and escaped newlines.  The stored
 641    string is guaranteed NUL-terminated, but it is not guaranteed that
 642    this is the first NUL since embedded NULs are preserved.
 643
 644    Multi-line strings are allowed, but they are deprecated.  */
 645 static void
 646 parse_string (pfile, token, terminator)
 647      cpp_reader *pfile;
 648      cpp_token *token;
 649      cppchar_t terminator;
 650 {
 651   cpp_buffer *buffer = pfile->buffer;
 652   cpp_pool *pool = &pfile->ident_pool;
 653   unsigned char *dest, *limit;
 654   cppchar_t c;
 655   unsigned int nulls = 0;
 656
 657   dest = POOL_FRONT (pool);
 658   limit = POOL_LIMIT (pool);
 659
 660   for (;;)
 661     {
 662       if (buffer->cur == buffer->rlimit)
 663         c = EOF;
 664       else
 665         c = *buffer->cur++;
 666
 667     have_char:
 668       /* We need space for the terminating NUL.  */
 669       if (dest >= limit)
 670         limit = _cpp_next_chunk (pool, 0, &dest);
 671
 672       if (c == EOF)
 673         {
 674           unterminated (pfile, terminator);
 675           break;
 676         }
 677
 678       /* Handle trigraphs, escaped newlines etc.  */
 679       if (c == '?' || c == '\\')
 680         c = skip_escaped_newlines (buffer, c);
 681
 682       if (c == terminator && unescaped_terminator_p (pfile, dest))
 683         {
 684           c = EOF;
 685           break;
 686         }
 687       else if (is_vspace (c))
 688         {
 689           /* In assembly language, silently terminate string and
 690              character literals at end of line.  This is a kludge
 691              around not knowing where comments are.  */
 692           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 693             break;
 694
 695           /* Character constants and header names may not extend over
 696              multiple lines.  In Standard C, neither may strings.
 697              Unfortunately, we accept multiline strings as an
 698              extension, except in #include family directives.  */
 699           if (terminator != '"' || pfile->state.angled_headers)
 700             {
 701               unterminated (pfile, terminator);
 702               break;
 703             }
 704
 705           cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 706           if (pfile->mlstring_pos.line == 0)
 707             pfile->mlstring_pos = pfile->lexer_pos;
 708
 709           c = handle_newline (buffer, c);
 710           *dest++ = '\n';
 711           goto have_char;
 712         }
 713       else if (c == '\0')
 714         {
 715           if (nulls++ == 0)
 716             cpp_warning (pfile, "null character(s) preserved in literal");
 717         }
 718
 719       *dest++ = c;
 720     }
 721
 722   /* Remember the next character.  */
 723   buffer->read_ahead = c;
 724   *dest = '\0';
 725
 726   token->val.str.text = POOL_FRONT (pool);
 727   token->val.str.len = dest - token->val.str.text;
 728   POOL_COMMIT (pool, token->val.str.len + 1);
 729 }
 730
 731 /* The stored comment includes the comment start and any terminator.  */
 732 static void
 733 save_comment (pfile, token, from)
 734      cpp_reader *pfile;
 735      cpp_token *token;
 736      const unsigned char *from;
 737 {
 738   unsigned char *buffer;
 739   unsigned int len;
 740
 741   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 742   /* C++ comments probably (not definitely) have moved past a new
 743      line, which we don't want to save in the comment.  */
 744   if (pfile->buffer->read_ahead != EOF)
 745     len--;
 746   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 747
 748   token->type = CPP_COMMENT;
 749   token->val.str.len = len;
 750   token->val.str.text = buffer;
 751
 752   buffer[0] = '/';
 753   memcpy (buffer + 1, from, len - 1);
 754 }
 755
 756 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 757    want to avoid stepping back when lexing %:%X.  */
 758 static void
 759 lex_percent (buffer, result)
 760      cpp_buffer *buffer;
 761      cpp_token *result;
 762 {
 763   cppchar_t c;
 764
 765   result->type = CPP_MOD;
 766   /* Parsing %:%X could leave an extra character.  */
 767   if (buffer->extra_char == EOF)
 768     c = get_effective_char (buffer);
 769   else
 770     {
 771       c = buffer->read_ahead = buffer->extra_char;
 772       buffer->extra_char = EOF;
 773     }
 774
 775   if (c == '=')
 776     ACCEPT_CHAR (CPP_MOD_EQ);
 777   else if (CPP_OPTION (buffer->pfile, digraphs))
 778     {
 779       if (c == ':')
 780         {
 781           result->flags |= DIGRAPH;
 782           ACCEPT_CHAR (CPP_HASH);
 783           if (get_effective_char (buffer) == '%')
 784             {
 785               buffer->extra_char = get_effective_char (buffer);
 786               if (buffer->extra_char == ':')
 787                 {
 788                   buffer->extra_char = EOF;
 789                   ACCEPT_CHAR (CPP_PASTE);
 790                 }
 791               else
 792                 /* We'll catch the extra_char when we're called back.  */
 793                 buffer->read_ahead = '%';
 794             }
 795         }
 796       else if (c == '>')
 797         {
 798           result->flags |= DIGRAPH;
 799           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 800         }
 801     }
 802 }
 803
 804 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
 805    want to avoid stepping back when lexing '...' or '.123'.  In the
 806    latter case we should also set a flag for parse_number.  */
 807 static void
 808 lex_dot (pfile, result)
 809      cpp_reader *pfile;
 810      cpp_token *result;
 811 {
 812   cpp_buffer *buffer = pfile->buffer;
 813   cppchar_t c;
 814
 815   /* Parsing ..X could leave an extra character.  */
 816   if (buffer->extra_char == EOF)
 817     c = get_effective_char (buffer);
 818   else
 819     {
 820       c = buffer->read_ahead = buffer->extra_char;
 821       buffer->extra_char = EOF;
 822     }
 823
 824   /* All known character sets have 0...9 contiguous.  */
 825   if (c >= '0' && c <= '9')
 826     {
 827       result->type = CPP_NUMBER;
 828       parse_number (pfile, &result->val.str, c, 1);
 829     }
 830   else
 831     {
 832       result->type = CPP_DOT;
 833       if (c == '.')
 834         {
 835           buffer->extra_char = get_effective_char (buffer);
 836           if (buffer->extra_char == '.')
 837             {
 838               buffer->extra_char = EOF;
 839               ACCEPT_CHAR (CPP_ELLIPSIS);
 840             }
 841           else
 842             /* We'll catch the extra_char when we're called back.  */
 843             buffer->read_ahead = '.';
 844         }
 845       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 846         ACCEPT_CHAR (CPP_DOT_STAR);
 847     }
 848 }
 849
 850 void
 851 _cpp_lex_token (pfile, result)
 852      cpp_reader *pfile;
 853      cpp_token *result;
 854 {
 855   cppchar_t c;
 856   cpp_buffer *buffer;
 857   const unsigned char *comment_start;
 858   unsigned char bol;
 859
 860  skip:
 861   bol = pfile->state.next_bol;
 862  done_directive:
 863   buffer = pfile->buffer;
 864   pfile->state.next_bol = 0;
 865   result->flags = buffer->saved_flags;
 866   buffer->saved_flags = 0;
 867  next_char:
 868   pfile->lexer_pos.line = buffer->lineno;
 869  next_char2:
 870   pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
 871
 872   c = buffer->read_ahead;
 873   if (c == EOF && buffer->cur < buffer->rlimit)
 874     {
 875       c = *buffer->cur++;
 876       pfile->lexer_pos.col++;
 877     }
 878
 879  do_switch:
 880   buffer->read_ahead = EOF;
 881   switch (c)
 882     {
 883     case EOF:
 884       /* Non-empty files should end in a newline.  Checking "bol" too
 885           prevents multiple warnings when hitting the EOF more than
 886           once, like in a directive.  Don't warn for command line and
 887           _Pragma buffers.  */
 888       if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
 889         cpp_pedwarn (pfile, "no newline at end of file");
 890       pfile->state.next_bol = 1;
 891       pfile->skipping = 0;      /* In case missing #endif.  */
 892       result->type = CPP_EOF;
 893       /* Don't do MI optimisation.  */
 894       return;
 895
 896     case ' ': case '\t': case '\f': case '\v': case '\0':
 897       skip_whitespace (pfile, c);
 898       result->flags |= PREV_WHITE;
 899       goto next_char2;
 900
 901     case '\n': case '\r':
 902       if (!pfile->state.in_directive)
 903         {
 904           handle_newline (buffer, c);
 905           bol = 1;
 906           pfile->lexer_pos.output_line = buffer->lineno;
 907           /* This is a new line, so clear any white space flag.
 908              Newlines in arguments are white space (6.10.3.10);
 909              parse_arg takes care of that.  */
 910           result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
 911           goto next_char;
 912         }
 913
 914       /* Don't let directives spill over to the next line.  */
 915       buffer->read_ahead = c;
 916       pfile->state.next_bol = 1;
 917       result->type = CPP_EOF;
 918       /* Don't break; pfile->skipping might be true.  */
 919       return;
 920
 921     case '?':
 922     case '\\':
 923       /* These could start an escaped newline, or '?' a trigraph.  Let
 924          skip_escaped_newlines do all the work.  */
 925       {
 926         unsigned int lineno = buffer->lineno;
 927
 928         c = skip_escaped_newlines (buffer, c);
 929         if (lineno != buffer->lineno)
 930           /* We had at least one escaped newline of some sort, and the
 931              next character is in buffer->read_ahead.  Update the
 932              token's line and column.  */
 933             goto next_char;
 934
 935         /* We are either the original '?' or '\\', or a trigraph.  */
 936         result->type = CPP_QUERY;
 937         buffer->read_ahead = EOF;
 938         if (c == '\\')
 939           goto random_char;
 940         else if (c != '?')
 941           goto do_switch;
 942       }
 943       break;
 944
 945     case '0': case '1': case '2': case '3': case '4':
 946     case '5': case '6': case '7': case '8': case '9':
 947       result->type = CPP_NUMBER;
 948       parse_number (pfile, &result->val.str, c, 0);
 949       break;
 950
 951     case '$':
 952       if (!CPP_OPTION (pfile, dollars_in_ident))
 953         goto random_char;
 954       /* Fall through... */
 955
 956     case '_':
 957     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 958     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 959     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 960     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 961     case 'y': case 'z':
 962     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 963     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 964     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 965     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 966     case 'Y': case 'Z':
 967       result->type = CPP_NAME;
 968       result->val.node = parse_identifier (pfile, c);
 969
 970       /* 'L' may introduce wide characters or strings.  */
 971       if (result->val.node == pfile->spec_nodes.n_L)
 972         {
 973           c = buffer->read_ahead; /* For make_string.  */
 974           if (c == '\'' || c == '"')
 975             {
 976               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 977               goto make_string;
 978             }
 979         }
 980       /* Convert named operators to their proper types.  */
 981       else if (result->val.node->flags & NODE_OPERATOR)
 982         {
 983           result->flags |= NAMED_OP;
 984           result->type = result->val.node->value.operator;
 985         }
 986       break;
 987
 988     case '\'':
 989     case '"':
 990       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 991     make_string:
 992       parse_string (pfile, result, c);
 993       break;
 994
 995     case '/':
 996       /* A potential block or line comment.  */
 997       comment_start = buffer->cur;
 998       result->type = CPP_DIV;
 999       c = get_effective_char (buffer);
1000       if (c == '=')
1001         ACCEPT_CHAR (CPP_DIV_EQ);
1002       if (c != '/' && c != '*')
1003         break;
1004
1005       if (c == '*')
1006         {
1007           if (skip_block_comment (pfile))
1008             cpp_error_with_line (pfile, pfile->lexer_pos.line,
1009                                  pfile->lexer_pos.col,
1010                                  "unterminated comment");
1011         }
1012       else
1013         {
1014           if (!CPP_OPTION (pfile, cplusplus_comments)
1015               && !CPP_IN_SYSTEM_HEADER (pfile))
1016             break;
1017
1018           /* Warn about comments only if pedantically GNUC89, and not
1019              in system headers.  */
1020           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1021               && ! buffer->warned_cplusplus_comments)
1022             {
1023               cpp_pedwarn (pfile,
1024                            "C++ style comments are not allowed in ISO C89");
1025               cpp_pedwarn (pfile,
1026                            "(this will be reported only once per input file)");
1027               buffer->warned_cplusplus_comments = 1;
1028             }
1029
1030           /* Skip_line_comment updates buffer->read_ahead.  */
1031           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1032             cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1033                                    pfile->lexer_pos.col,
1034                                    "multi-line comment");
1035         }
1036
1037       /* Skipping the comment has updated buffer->read_ahead.  */
1038       if (!pfile->state.save_comments)
1039         {
1040           result->flags |= PREV_WHITE;
1041           goto next_char;
1042         }
1043
1044       /* Save the comment as a token in its own right.  */
1045       save_comment (pfile, result, comment_start);
1046       /* Don't do MI optimisation.  */
1047       return;
1048
1049     case '<':
1050       if (pfile->state.angled_headers)
1051         {
1052           result->type = CPP_HEADER_NAME;
1053           c = '>';              /* terminator.  */
1054           goto make_string;
1055         }
1056
1057       result->type = CPP_LESS;
1058       c = get_effective_char (buffer);
1059       if (c == '=')
1060         ACCEPT_CHAR (CPP_LESS_EQ);
1061       else if (c == '<')
1062         {
1063           ACCEPT_CHAR (CPP_LSHIFT);
1064           if (get_effective_char (buffer) == '=')
1065             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1066         }
1067       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1068         {
1069           ACCEPT_CHAR (CPP_MIN);
1070           if (get_effective_char (buffer) == '=')
1071             ACCEPT_CHAR (CPP_MIN_EQ);
1072         }
1073       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1074         {
1075           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1076           result->flags |= DIGRAPH;
1077         }
1078       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1079         {
1080           ACCEPT_CHAR (CPP_OPEN_BRACE);
1081           result->flags |= DIGRAPH;
1082         }
1083       break;
1084
1085     case '>':
1086       result->type = CPP_GREATER;
1087       c = get_effective_char (buffer);
1088       if (c == '=')
1089         ACCEPT_CHAR (CPP_GREATER_EQ);
1090       else if (c == '>')
1091         {
1092           ACCEPT_CHAR (CPP_RSHIFT);
1093           if (get_effective_char (buffer) == '=')
1094             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1095         }
1096       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1097         {
1098           ACCEPT_CHAR (CPP_MAX);
1099           if (get_effective_char (buffer) == '=')
1100             ACCEPT_CHAR (CPP_MAX_EQ);
1101         }
1102       break;
1103
1104     case '%':
1105       lex_percent (buffer, result);
1106       if (result->type == CPP_HASH)
1107         goto do_hash;
1108       break;
1109
1110     case '.':
1111       lex_dot (pfile, result);
1112       break;
1113
1114     case '+':
1115       result->type = CPP_PLUS;
1116       c = get_effective_char (buffer);
1117       if (c == '=')
1118         ACCEPT_CHAR (CPP_PLUS_EQ);
1119       else if (c == '+')
1120         ACCEPT_CHAR (CPP_PLUS_PLUS);
1121       break;
1122
1123     case '-':
1124       result->type = CPP_MINUS;
1125       c = get_effective_char (buffer);
1126       if (c == '>')
1127         {
1128           ACCEPT_CHAR (CPP_DEREF);
1129           if (CPP_OPTION (pfile, cplusplus)
1130               && get_effective_char (buffer) == '*')
1131             ACCEPT_CHAR (CPP_DEREF_STAR);
1132         }
1133       else if (c == '=')
1134         ACCEPT_CHAR (CPP_MINUS_EQ);
1135       else if (c == '-')
1136         ACCEPT_CHAR (CPP_MINUS_MINUS);
1137       break;
1138
1139     case '*':
1140       result->type = CPP_MULT;
1141       if (get_effective_char (buffer) == '=')
1142         ACCEPT_CHAR (CPP_MULT_EQ);
1143       break;
1144
1145     case '=':
1146       result->type = CPP_EQ;
1147       if (get_effective_char (buffer) == '=')
1148         ACCEPT_CHAR (CPP_EQ_EQ);
1149       break;
1150
1151     case '!':
1152       result->type = CPP_NOT;
1153       if (get_effective_char (buffer) == '=')
1154         ACCEPT_CHAR (CPP_NOT_EQ);
1155       break;
1156
1157     case '&':
1158       result->type = CPP_AND;
1159       c = get_effective_char (buffer);
1160       if (c == '=')
1161         ACCEPT_CHAR (CPP_AND_EQ);
1162       else if (c == '&')
1163         ACCEPT_CHAR (CPP_AND_AND);
1164       break;
1165
1166     case '#':
1167       c = buffer->extra_char;   /* Can be set by error condition below.  */
1168       if (c != EOF)
1169         {
1170           buffer->read_ahead = c;
1171           buffer->extra_char = EOF;
1172         }
1173       else
1174         c = get_effective_char (buffer);
1175
1176       if (c == '#')
1177         {
1178           ACCEPT_CHAR (CPP_PASTE);
1179           break;
1180         }
1181
1182       result->type = CPP_HASH;
1183     do_hash:
1184       if (!bol)
1185         break;
1186       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1187          tokens within the list of arguments that would otherwise act
1188          as preprocessing directives, the behavior is undefined.
1189
1190          This implementation will report a hard error, terminate the
1191          macro invocation, and proceed to process the directive.  */
1192       if (pfile->state.parsing_args)
1193         {
1194           if (pfile->state.parsing_args == 2)
1195             cpp_error (pfile,
1196                        "directives may not be used inside a macro argument");
1197
1198           /* Put a '#' in lookahead, return CPP_EOF for parse_arg.  */
1199           buffer->extra_char = buffer->read_ahead;
1200           buffer->read_ahead = '#';
1201           pfile->state.next_bol = 1;
1202           result->type = CPP_EOF;
1203
1204           /* Get whitespace right - newline_in_args sets it.  */
1205           if (pfile->lexer_pos.col == 1)
1206             result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1207         }
1208       else
1209         {
1210           /* This is the hash introducing a directive.  */
1211           if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1212             goto done_directive; /* bol still 1.  */
1213           /* This is in fact an assembler #.  */
1214         }
1215       break;
1216
1217     case '|':
1218       result->type = CPP_OR;
1219       c = get_effective_char (buffer);
1220       if (c == '=')
1221         ACCEPT_CHAR (CPP_OR_EQ);
1222       else if (c == '|')
1223         ACCEPT_CHAR (CPP_OR_OR);
1224       break;
1225
1226     case '^':
1227       result->type = CPP_XOR;
1228       if (get_effective_char (buffer) == '=')
1229         ACCEPT_CHAR (CPP_XOR_EQ);
1230       break;
1231
1232     case ':':
1233       result->type = CPP_COLON;
1234       c = get_effective_char (buffer);
1235       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1236         ACCEPT_CHAR (CPP_SCOPE);
1237       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1238         {
1239           result->flags |= DIGRAPH;
1240           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1241         }
1242       break;
1243
1244     case '~': result->type = CPP_COMPL; break;
1245     case ',': result->type = CPP_COMMA; break;
1246     case '(': result->type = CPP_OPEN_PAREN; break;
1247     case ')': result->type = CPP_CLOSE_PAREN; break;
1248     case '[': result->type = CPP_OPEN_SQUARE; break;
1249     case ']': result->type = CPP_CLOSE_SQUARE; break;
1250     case '{': result->type = CPP_OPEN_BRACE; break;
1251     case '}': result->type = CPP_CLOSE_BRACE; break;
1252     case ';': result->type = CPP_SEMICOLON; break;
1253
1254       /* @ is a punctuator in Objective C.  */
1255     case '@': result->type = CPP_ATSIGN; break;
1256
1257     random_char:
1258     default:
1259       result->type = CPP_OTHER;
1260       result->val.c = c;
1261       break;
1262     }
1263
1264   if (pfile->skipping)
1265     goto skip;
1266
1267   /* If not in a directive, this token invalidates controlling macros.  */
1268   if (!pfile->state.in_directive)
1269     pfile->mi_state = MI_FAILED;
1270 }
1271
1272 /* An upper bound on the number of bytes needed to spell a token,
1273    including preceding whitespace.  */
1274 unsigned int
1275 cpp_token_len (token)
1276      const cpp_token *token;
1277 {
1278   unsigned int len;
1279
1280   switch (TOKEN_SPELL (token))
1281     {
1282     default:            len = 0;                                break;
1283     case SPELL_STRING:  len = token->val.str.len;               break;
1284     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1285     }
1286   /* 1 for whitespace, 4 for comment delimeters.  */
1287   return len + 5;
1288 }
1289
1290 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1291    already contain the enough space to hold the token's spelling.
1292    Returns a pointer to the character after the last character
1293    written.  */
1294 unsigned char *
1295 cpp_spell_token (pfile, token, buffer)
1296      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1297      const cpp_token *token;
1298      unsigned char *buffer;
1299 {
1300   switch (TOKEN_SPELL (token))
1301     {
1302     case SPELL_OPERATOR:
1303       {
1304         const unsigned char *spelling;
1305         unsigned char c;
1306
1307         if (token->flags & DIGRAPH)
1308           spelling
1309             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1310         else if (token->flags & NAMED_OP)
1311           goto spell_ident;
1312         else
1313           spelling = TOKEN_NAME (token);
1314
1315         while ((c = *spelling++) != '\0')
1316           *buffer++ = c;
1317       }
1318       break;
1319
1320     case SPELL_IDENT:
1321       spell_ident:
1322       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1323       buffer += NODE_LEN (token->val.node);
1324       break;
1325
1326     case SPELL_STRING:
1327       {
1328         int left, right, tag;
1329         switch (token->type)
1330           {
1331           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1332           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1333           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1334           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1335           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1336           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1337           }
1338         if (tag) *buffer++ = tag;
1339         if (left) *buffer++ = left;
1340         memcpy (buffer, token->val.str.text, token->val.str.len);
1341         buffer += token->val.str.len;
1342         if (right) *buffer++ = right;
1343       }
1344       break;
1345
1346     case SPELL_CHAR:
1347       *buffer++ = token->val.c;
1348       break;
1349
1350     case SPELL_NONE:
1351       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1352       break;
1353     }
1354
1355   return buffer;
1356 }
1357
1358 /* Returns a token as a null-terminated string.  The string is
1359    temporary, and automatically freed later.  Useful for diagnostics.  */
1360 unsigned char *
1361 cpp_token_as_text (pfile, token)
1362      cpp_reader *pfile;
1363      const cpp_token *token;
1364 {
1365   unsigned int len = cpp_token_len (token);
1366   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1367
1368   end = cpp_spell_token (pfile, token, start);
1369   end[0] = '\0';
1370
1371   return start;
1372 }
1373
1374 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1375 const char *
1376 cpp_type2name (type)
1377      enum cpp_ttype type;
1378 {
1379   return (const char *) token_spellings[type].name;
1380 }
1381
1382 /* Writes the spelling of token to FP.  Separate from cpp_spell_token
1383    for efficiency - to avoid double-buffering.  Also, outputs a space
1384    if PREV_WHITE is flagged.  */
1385 void
1386 cpp_output_token (token, fp)
1387      const cpp_token *token;
1388      FILE *fp;
1389 {
1390   if (token->flags & PREV_WHITE)
1391     putc (' ', fp);
1392
1393   switch (TOKEN_SPELL (token))
1394     {
1395     case SPELL_OPERATOR:
1396       {
1397         const unsigned char *spelling;
1398
1399         if (token->flags & DIGRAPH)
1400           spelling
1401             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1402         else if (token->flags & NAMED_OP)
1403           goto spell_ident;
1404         else
1405           spelling = TOKEN_NAME (token);
1406
1407         ufputs (spelling, fp);
1408       }
1409       break;
1410
1411     spell_ident:
1412     case SPELL_IDENT:
1413       ufputs (NODE_NAME (token->val.node), fp);
1414     break;
1415
1416     case SPELL_STRING:
1417       {
1418         int left, right, tag;
1419         switch (token->type)
1420           {
1421           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1422           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1423           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1424           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1425           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1426           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1427           }
1428         if (tag) putc (tag, fp);
1429         if (left) putc (left, fp);
1430         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1431         if (right) putc (right, fp);
1432       }
1433       break;
1434
1435     case SPELL_CHAR:
1436       putc (token->val.c, fp);
1437       break;
1438
1439     case SPELL_NONE:
1440       /* An error, most probably.  */
1441       break;
1442     }
1443 }
1444
1445 /* Compare two tokens.  */
1446 int
1447 _cpp_equiv_tokens (a, b)
1448      const cpp_token *a, *b;
1449 {
1450   if (a->type == b->type && a->flags == b->flags)
1451     switch (TOKEN_SPELL (a))
1452       {
1453       default:                  /* Keep compiler happy.  */
1454       case SPELL_OPERATOR:
1455         return 1;
1456       case SPELL_CHAR:
1457         return a->val.c == b->val.c; /* Character.  */
1458       case SPELL_NONE:
1459         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1460       case SPELL_IDENT:
1461         return a->val.node == b->val.node;
1462       case SPELL_STRING:
1463         return (a->val.str.len == b->val.str.len
1464                 && !memcmp (a->val.str.text, b->val.str.text,
1465                             a->val.str.len));
1466       }
1467
1468   return 0;
1469 }
1470
1471 /* Determine whether two tokens can be pasted together, and if so,
1472    what the resulting token is.  Returns CPP_EOF if the tokens cannot
1473    be pasted, or the appropriate type for the merged token if they
1474    can.  */
1475 enum cpp_ttype
1476 cpp_can_paste (pfile, token1, token2, digraph)
1477      cpp_reader * pfile;
1478      const cpp_token *token1, *token2;
1479      int* digraph;
1480 {
1481   enum cpp_ttype a = token1->type, b = token2->type;
1482   int cxx = CPP_OPTION (pfile, cplusplus);
1483
1484   /* Treat named operators as if they were ordinary NAMEs.  */
1485   if (token1->flags & NAMED_OP)
1486     a = CPP_NAME;
1487   if (token2->flags & NAMED_OP)
1488     b = CPP_NAME;
1489
1490   if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1491     return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1492
1493   switch (a)
1494     {
1495     case CPP_GREATER:
1496       if (b == a) return CPP_RSHIFT;
1497       if (b == CPP_QUERY && cxx)        return CPP_MAX;
1498       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
1499       break;
1500     case CPP_LESS:
1501       if (b == a) return CPP_LSHIFT;
1502       if (b == CPP_QUERY && cxx)        return CPP_MIN;
1503       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
1504       if (CPP_OPTION (pfile, digraphs))
1505         {
1506           if (b == CPP_COLON)
1507             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1508           if (b == CPP_MOD)
1509             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
1510         }
1511       break;
1512
1513     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
1514     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
1515     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
1516
1517     case CPP_MINUS:
1518       if (b == a)               return CPP_MINUS_MINUS;
1519       if (b == CPP_GREATER)     return CPP_DEREF;
1520       break;
1521     case CPP_COLON:
1522       if (b == a && cxx)        return CPP_SCOPE;
1523       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1524         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1525       break;
1526
1527     case CPP_MOD:
1528       if (CPP_OPTION (pfile, digraphs))
1529         {
1530           if (b == CPP_GREATER)
1531             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
1532           if (b == CPP_COLON)
1533             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
1534         }
1535       break;
1536     case CPP_DEREF:
1537       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1538       break;
1539     case CPP_DOT:
1540       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1541       if (b == CPP_NUMBER)      return CPP_NUMBER;
1542       break;
1543
1544     case CPP_HASH:
1545       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1546         /* %:%: digraph */
1547         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1548       break;
1549
1550     case CPP_NAME:
1551       if (b == CPP_NAME)        return CPP_NAME;
1552       if (b == CPP_NUMBER
1553           && name_p (pfile, &token2->val.str)) return CPP_NAME;
1554       if (b == CPP_CHAR
1555           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1556       if (b == CPP_STRING
1557           && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1558       break;
1559
1560     case CPP_NUMBER:
1561       if (b == CPP_NUMBER)      return CPP_NUMBER;
1562       if (b == CPP_NAME)        return CPP_NUMBER;
1563       if (b == CPP_DOT)         return CPP_NUMBER;
1564       /* Numbers cannot have length zero, so this is safe.  */
1565       if ((b == CPP_PLUS || b == CPP_MINUS)
1566           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1567         return CPP_NUMBER;
1568       break;
1569
1570     default:
1571       break;
1572     }
1573
1574   return CPP_EOF;
1575 }
1576
1577 /* Returns nonzero if a space should be inserted to avoid an
1578    accidental token paste for output.  For simplicity, it is
1579    conservative, and occasionally advises a space where one is not
1580    needed, e.g. "." and ".2".  */
1581
1582 int
1583 cpp_avoid_paste (pfile, token1, token2)
1584      cpp_reader *pfile;
1585      const cpp_token *token1, *token2;
1586 {
1587   enum cpp_ttype a = token1->type, b = token2->type;
1588   cppchar_t c;
1589
1590   if (token1->flags & NAMED_OP)
1591     a = CPP_NAME;
1592   if (token2->flags & NAMED_OP)
1593     b = CPP_NAME;
1594
1595   c = EOF;
1596   if (token2->flags & DIGRAPH)
1597     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1598   else if (token_spellings[b].category == SPELL_OPERATOR)
1599     c = token_spellings[b].name[0];
1600
1601   /* Quickly get everything that can paste with an '='.  */
1602   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1603     return 1;
1604
1605   switch (a)
1606     {
1607     case CPP_GREATER:   return c == '>' || c == '?';
1608     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1609     case CPP_PLUS:      return c == '+';
1610     case CPP_MINUS:     return c == '-' || c == '>';
1611     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1612     case CPP_MOD:       return c == ':' || c == '>';
1613     case CPP_AND:       return c == '&';
1614     case CPP_OR:        return c == '|';
1615     case CPP_COLON:     return c == ':' || c == '>';
1616     case CPP_DEREF:     return c == '*';
1617     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1618     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1619     case CPP_NAME:      return ((b == CPP_NUMBER
1620                                  && name_p (pfile, &token2->val.str))
1621                                 || b == CPP_NAME
1622                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1623     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1624                                 || c == '.' || c == '+' || c == '-');
1625     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1626                                 && token1->val.c == '@'
1627                                 && (b == CPP_NAME || b == CPP_STRING));
1628     default:            break;
1629     }
1630
1631   return 0;
1632 }
1633
1634 /* Output all the remaining tokens on the current line, and a newline
1635    character, to FP.  Leading whitespace is removed.  */
1636 void
1637 cpp_output_line (pfile, fp)
1638      cpp_reader *pfile;
1639      FILE *fp;
1640 {
1641   cpp_token token;
1642
1643   cpp_get_token (pfile, &token);
1644   token.flags &= ~PREV_WHITE;
1645   while (token.type != CPP_EOF)
1646     {
1647       cpp_output_token (&token, fp);
1648       cpp_get_token (pfile, &token);
1649     }
1650
1651   putc ('\n', fp);
1652 }
1653
1654 /* Returns the value of a hexadecimal digit.  */
1655 static unsigned int
1656 hex_digit_value (c)
1657      unsigned int c;
1658 {
1659   if (c >= 'a' && c <= 'f')
1660     return c - 'a' + 10;
1661   if (c >= 'A' && c <= 'F')
1662     return c - 'A' + 10;
1663   if (c >= '0' && c <= '9')
1664     return c - '0';
1665   abort ();
1666 }
1667
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1669    failure if cpplib is not parsing C++ or C99.  Such failure is
1670    silent, and no variables are updated.  Otherwise returns 0, and
1671    warns if -Wtraditional.
1672
1673    [lex.charset]: The character designated by the universal character
1674    name \UNNNNNNNN is that character whose character short name in
1675    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676    universal character name \uNNNN is that character whose character
1677    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1678    for a universal character name is less than 0x20 or in the range
1679    0x7F-0x9F (inclusive), or if the universal character name
1680    designates a character in the basic source character set, then the
1681    program is ill-formed.
1682
1683    We assume that wchar_t is Unicode, so we don't need to do any
1684    mapping.  Is this ever wrong?
1685
1686    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687    LIMIT is the end of the string or charconst.  PSTR is updated to
1688    point after the UCS on return, and the UCS is written into PC.  */
1689
1690 static int
1691 maybe_read_ucs (pfile, pstr, limit, pc)
1692      cpp_reader *pfile;
1693      const unsigned char **pstr;
1694      const unsigned char *limit;
1695      unsigned int *pc;
1696 {
1697   const unsigned char *p = *pstr;
1698   unsigned int code = 0;
1699   unsigned int c = *pc, length;
1700
1701   /* Only attempt to interpret a UCS for C++ and C99.  */
1702   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1703     return 1;
1704
1705   if (CPP_WTRADITIONAL (pfile))
1706     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1707
1708   length = (c == 'u' ? 4: 8);
1709
1710   if ((size_t) (limit - p) < length)
1711     {
1712       cpp_error (pfile, "incomplete universal-character-name");
1713       /* Skip to the end to avoid more diagnostics.  */
1714       p = limit;
1715     }
1716   else
1717     {
1718       for (; length; length--, p++)
1719         {
1720           c = *p;
1721           if (ISXDIGIT (c))
1722             code = (code << 4) + hex_digit_value (c);
1723           else
1724             {
1725               cpp_error (pfile,
1726                          "non-hex digit '%c' in universal-character-name", c);
1727               /* We shouldn't skip in case there are multibyte chars.  */
1728               break;
1729             }
1730         }
1731     }
1732
1733 #ifdef TARGET_EBCDIC
1734   cpp_error (pfile, "universal-character-name on EBCDIC target");
1735   code = 0x3f;  /* EBCDIC invalid character */
1736 #else
1737  /* True extended characters are OK.  */
1738   if (code >= 0xa0
1739       && !(code & 0x80000000)
1740       && !(code >= 0xD800 && code <= 0xDFFF))
1741     ;
1742   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1743      hex escapes so that this also works with EBCDIC hosts.  */
1744   else if (code == 0x24 || code == 0x40 || code == 0x60)
1745     ;
1746   /* Don't give another error if one occurred above.  */
1747   else if (length == 0)
1748     cpp_error (pfile, "universal-character-name out of range");
1749 #endif
1750
1751   *pstr = p;
1752   *pc = code;
1753   return 0;
1754 }
1755
1756 /* Interpret an escape sequence, and return its value.  PSTR points to
1757    the input pointer, which is just after the backslash.  LIMIT is how
1758    much text we have.  MASK is a bitmask for the precision for the
1759    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1760    interpret escapes that did not exist in traditional C.
1761
1762    Handles all relevant diagnostics.  */
1763
1764 unsigned int
1765 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1766      cpp_reader *pfile;
1767      const unsigned char **pstr;
1768      const unsigned char *limit;
1769      unsigned HOST_WIDE_INT mask;
1770      int traditional;
1771 {
1772   int unknown = 0;
1773   const unsigned char *str = *pstr;
1774   unsigned int c = *str++;
1775
1776   switch (c)
1777     {
1778     case '\\': case '\'': case '"': case '?': break;
1779     case 'b': c = TARGET_BS;      break;
1780     case 'f': c = TARGET_FF;      break;
1781     case 'n': c = TARGET_NEWLINE; break;
1782     case 'r': c = TARGET_CR;      break;
1783     case 't': c = TARGET_TAB;     break;
1784     case 'v': c = TARGET_VT;      break;
1785
1786     case '(': case '{': case '[': case '%':
1787       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1788          '\%' is used to prevent SCCS from getting confused.  */
1789       unknown = CPP_PEDANTIC (pfile);
1790       break;
1791
1792     case 'a':
1793       if (CPP_WTRADITIONAL (pfile))
1794         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1795       if (!traditional)
1796         c = TARGET_BELL;
1797       break;
1798
1799     case 'e': case 'E':
1800       if (CPP_PEDANTIC (pfile))
1801         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1802       c = TARGET_ESC;
1803       break;
1804
1805     case 'u': case 'U':
1806       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1807       break;
1808
1809     case 'x':
1810       if (CPP_WTRADITIONAL (pfile))
1811         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1812
1813       if (!traditional)
1814         {
1815           unsigned int i = 0, overflow = 0;
1816           int digits_found = 0;
1817
1818           while (str < limit)
1819             {
1820               c = *str;
1821               if (! ISXDIGIT (c))
1822                 break;
1823               str++;
1824               overflow |= i ^ (i << 4 >> 4);
1825               i = (i << 4) + hex_digit_value (c);
1826               digits_found = 1;
1827             }
1828
1829           if (!digits_found)
1830             cpp_error (pfile, "\\x used with no following hex digits");
1831
1832           if (overflow | (i != (i & mask)))
1833             {
1834               cpp_pedwarn (pfile, "hex escape sequence out of range");
1835               i &= mask;
1836             }
1837           c = i;
1838         }
1839       break;
1840
1841     case '0':  case '1':  case '2':  case '3':
1842     case '4':  case '5':  case '6':  case '7':
1843       {
1844         unsigned int i = c - '0';
1845         int count = 0;
1846
1847         while (str < limit && ++count < 3)
1848           {
1849             c = *str;
1850             if (c < '0' || c > '7')
1851               break;
1852             str++;
1853             i = (i << 3) + c - '0';
1854           }
1855
1856         if (i != (i & mask))
1857           {
1858             cpp_pedwarn (pfile, "octal escape sequence out of range");
1859             i &= mask;
1860           }
1861         c = i;
1862       }
1863       break;
1864
1865     default:
1866       unknown = 1;
1867       break;
1868     }
1869
1870   if (unknown)
1871     {
1872       if (ISGRAPH (c))
1873         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1874       else
1875         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1876     }
1877
1878   if (c > mask)
1879     cpp_pedwarn (pfile, "escape sequence out of range for character");
1880
1881   *pstr = str;
1882   return c;
1883 }
1884
1885 #ifndef MAX_CHAR_TYPE_SIZE
1886 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1887 #endif
1888
1889 #ifndef MAX_WCHAR_TYPE_SIZE
1890 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1891 #endif
1892
1893 /* Interpret a (possibly wide) character constant in TOKEN.
1894    WARN_MULTI warns about multi-character charconsts, if not
1895    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1896    that did not exist in traditional C.  PCHARS_SEEN points to a
1897    variable that is filled in with the number of characters seen.  */
1898 HOST_WIDE_INT
1899 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1900      cpp_reader *pfile;
1901      const cpp_token *token;
1902      int warn_multi;
1903      int traditional;
1904      unsigned int *pchars_seen;
1905 {
1906   const unsigned char *str = token->val.str.text;
1907   const unsigned char *limit = str + token->val.str.len;
1908   unsigned int chars_seen = 0;
1909   unsigned int width, max_chars, c;
1910   unsigned HOST_WIDE_INT mask;
1911   HOST_WIDE_INT result = 0;
1912
1913 #ifdef MULTIBYTE_CHARS
1914   (void) local_mbtowc (NULL, NULL, 0);
1915 #endif
1916
1917   /* Width in bits.  */
1918   if (token->type == CPP_CHAR)
1919     width = MAX_CHAR_TYPE_SIZE;
1920   else
1921     width = MAX_WCHAR_TYPE_SIZE;
1922
1923   if (width < HOST_BITS_PER_WIDE_INT)
1924     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1925   else
1926     mask = ~0;
1927   max_chars = HOST_BITS_PER_WIDE_INT / width;
1928
1929   while (str < limit)
1930     {
1931 #ifdef MULTIBYTE_CHARS
1932       wchar_t wc;
1933       int char_len;
1934
1935       char_len = local_mbtowc (&wc, str, limit - str);
1936       if (char_len == -1)
1937         {
1938           cpp_warning (pfile, "ignoring invalid multibyte character");
1939           c = *str++;
1940         }
1941       else
1942         {
1943           str += char_len;
1944           c = wc;
1945         }
1946 #else
1947       c = *str++;
1948 #endif
1949
1950       if (c == '\\')
1951         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1952
1953 #ifdef MAP_CHARACTER
1954       if (ISPRINT (c))
1955         c = MAP_CHARACTER (c);
1956 #endif
1957
1958       /* Merge character into result; ignore excess chars.  */
1959       if (++chars_seen <= max_chars)
1960         {
1961           if (width < HOST_BITS_PER_WIDE_INT)
1962             result = (result << width) | (c & mask);
1963           else
1964             result = c;
1965         }
1966     }
1967
1968   if (chars_seen == 0)
1969     cpp_error (pfile, "empty character constant");
1970   else if (chars_seen > max_chars)
1971     {
1972       chars_seen = max_chars;
1973       cpp_warning (pfile, "character constant too long");
1974     }
1975   else if (chars_seen > 1 && !traditional && warn_multi)
1976     cpp_warning (pfile, "multi-character character constant");
1977
1978   /* If char type is signed, sign-extend the constant.  The
1979      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1980   if (token->type == CPP_CHAR && chars_seen)
1981     {
1982       unsigned int nbits = chars_seen * width;
1983       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1984
1985       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1986           || ((result >> (nbits - 1)) & 1) == 0)
1987         result &= mask;
1988       else
1989         result |= ~mask;
1990     }
1991
1992   *pchars_seen = chars_seen;
1993   return result;
1994 }
1995
1996 /* Memory pools.  */
1997
1998 struct dummy
1999 {
2000   char c;
2001   union
2002   {
2003     double d;
2004     int *p;
2005   } u;
2006 };
2007
2008 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2009
2010 static int
2011 chunk_suitable (pool, chunk, size)
2012      cpp_pool *pool;
2013      cpp_chunk *chunk;
2014      unsigned int size;
2015 {
2016   /* Being at least twice SIZE means we can use memcpy in
2017      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2018      anyway.  */
2019   return (chunk && pool->locked != chunk
2020           && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2021 }
2022
2023 /* Returns the end of the new pool.  PTR points to a char in the old
2024    pool, and is updated to point to the same char in the new pool.  */
2025 unsigned char *
2026 _cpp_next_chunk (pool, len, ptr)
2027      cpp_pool *pool;
2028      unsigned int len;
2029      unsigned char **ptr;
2030 {
2031   cpp_chunk *chunk = pool->cur->next;
2032
2033   /* LEN is the minimum size we want in the new pool.  */
2034   len += POOL_ROOM (pool);
2035   if (! chunk_suitable (pool, chunk, len))
2036     {
2037       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2038
2039       chunk->next = pool->cur->next;
2040       pool->cur->next = chunk;
2041     }
2042
2043   /* Update the pointer before changing chunk's front.  */
2044   if (ptr)
2045     *ptr += chunk->base - POOL_FRONT (pool);
2046
2047   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2048   chunk->front = chunk->base;
2049
2050   pool->cur = chunk;
2051   return POOL_LIMIT (pool);
2052 }
2053
2054 static cpp_chunk *
2055 new_chunk (size)
2056      unsigned int size;
2057 {
2058   unsigned char *base;
2059   cpp_chunk *result;
2060
2061   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2062   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2063   /* Put the chunk descriptor at the end.  Then chunk overruns will
2064      cause obvious chaos.  */
2065   result = (cpp_chunk *) (base + size);
2066   result->base = base;
2067   result->front = base;
2068   result->limit = base + size;
2069   result->next = 0;
2070
2071   return result;
2072 }
2073
2074 void
2075 _cpp_init_pool (pool, size, align, temp)
2076      cpp_pool *pool;
2077      unsigned int size, align, temp;
2078 {
2079   if (align == 0)
2080     align = DEFAULT_ALIGNMENT;
2081   if (align & (align - 1))
2082     abort ();
2083   pool->align = align;
2084   pool->cur = new_chunk (size);
2085   pool->locked = 0;
2086   pool->locks = 0;
2087   if (temp)
2088     pool->cur->next = pool->cur;
2089 }
2090
2091 void
2092 _cpp_lock_pool (pool)
2093      cpp_pool *pool;
2094 {
2095   if (pool->locks++ == 0)
2096     pool->locked = pool->cur;
2097 }
2098
2099 void
2100 _cpp_unlock_pool (pool)
2101      cpp_pool *pool;
2102 {
2103   if (--pool->locks == 0)
2104     pool->locked = 0;
2105 }
2106
2107 void
2108 _cpp_free_pool (pool)
2109      cpp_pool *pool;
2110 {
2111   cpp_chunk *chunk = pool->cur, *next;
2112
2113   do
2114     {
2115       next = chunk->next;
2116       free (chunk->base);
2117       chunk = next;
2118     }
2119   while (chunk && chunk != pool->cur);
2120 }
2121
2122 /* Reserve LEN bytes from a memory pool.  */
2123 unsigned char *
2124 _cpp_pool_reserve (pool, len)
2125      cpp_pool *pool;
2126      unsigned int len;
2127 {
2128   len = POOL_ALIGN (len, pool->align);
2129   if (len > (unsigned int) POOL_ROOM (pool))
2130     _cpp_next_chunk (pool, len, 0);
2131
2132   return POOL_FRONT (pool);
2133 }
2134
2135 /* Allocate LEN bytes from a memory pool.  */
2136 unsigned char *
2137 _cpp_pool_alloc (pool, len)
2138      cpp_pool *pool;
2139      unsigned int len;
2140 {
2141   unsigned char *result = _cpp_pool_reserve (pool, len);
2142
2143   POOL_COMMIT (pool, len);
2144   return result;
2145 }