gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110 static _cpp_buff *new_buff PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   cpp_pool *pool = &pfile->ident_pool;
 603   unsigned char *dest, *limit;
 604
 605   dest = POOL_FRONT (pool);
 606   limit = POOL_LIMIT (pool);
 607
 608   /* Place a leading period.  */
 609   if (leading_period)
 610     {
 611       if (dest >= limit)
 612         limit = _cpp_next_chunk (pool, 0, &dest);
 613       *dest++ = '.';
 614     }
 615
 616   do
 617     {
 618       do
 619         {
 620           /* Need room for terminating null.  */
 621           if (dest + 1 >= limit)
 622             limit = _cpp_next_chunk (pool, 0, &dest);
 623           *dest++ = c;
 624
 625           c = EOF;
 626           if (buffer->cur == buffer->rlimit)
 627             break;
 628
 629           c = *buffer->cur++;
 630         }
 631       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 632
 633       /* Potential escaped newline?  */
 634       if (c != '?' && c != '\\')
 635         break;
 636       c = skip_escaped_newlines (pfile, c);
 637     }
 638   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640   /* Remember the next character.  */
 641   buffer->read_ahead = c;
 642
 643   /* Null-terminate the number.  */
 644   *dest = '\0';
 645
 646   number->text = POOL_FRONT (pool);
 647   number->len = dest - number->text;
 648   POOL_COMMIT (pool, number->len + 1);
 649 }
 650
 651 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 652 static void
 653 unterminated (pfile, term)
 654      cpp_reader *pfile;
 655      int term;
 656 {
 657   cpp_error (pfile, "missing terminating %c character", term);
 658
 659   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 660     {
 661       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 662                            "possible start of unterminated string literal");
 663       pfile->mls_line = 0;
 664     }
 665 }
 666
 667 /* Subroutine of parse_string.  */
 668 static int
 669 unescaped_terminator_p (pfile, dest)
 670      cpp_reader *pfile;
 671      const unsigned char *dest;
 672 {
 673   const unsigned char *start, *temp;
 674
 675   /* In #include-style directives, terminators are not escapeable.  */
 676   if (pfile->state.angled_headers)
 677     return 1;
 678
 679   start = POOL_FRONT (&pfile->ident_pool);
 680
 681   /* An odd number of consecutive backslashes represents an escaped
 682      terminator.  */
 683   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 684     ;
 685
 686   return ((dest - temp) & 1) == 0;
 687 }
 688
 689 /* Parses a string, character constant, or angle-bracketed header file
 690    name.  Handles embedded trigraphs and escaped newlines.  The stored
 691    string is guaranteed NUL-terminated, but it is not guaranteed that
 692    this is the first NUL since embedded NULs are preserved.
 693
 694    Multi-line strings are allowed, but they are deprecated.  */
 695 static void
 696 parse_string (pfile, token, terminator)
 697      cpp_reader *pfile;
 698      cpp_token *token;
 699      cppchar_t terminator;
 700 {
 701   cpp_buffer *buffer = pfile->buffer;
 702   cpp_pool *pool = &pfile->ident_pool;
 703   unsigned char *dest, *limit;
 704   cppchar_t c;
 705   bool warned_nulls = false, warned_multi = false;
 706
 707   dest = POOL_FRONT (pool);
 708   limit = POOL_LIMIT (pool);
 709
 710   for (;;)
 711     {
 712       if (buffer->cur == buffer->rlimit)
 713         c = EOF;
 714       else
 715         c = *buffer->cur++;
 716
 717     have_char:
 718       /* We need space for the terminating NUL.  */
 719       if (dest >= limit)
 720         limit = _cpp_next_chunk (pool, 0, &dest);
 721
 722       if (c == EOF)
 723         {
 724           unterminated (pfile, terminator);
 725           break;
 726         }
 727
 728       /* Handle trigraphs, escaped newlines etc.  */
 729       if (c == '?' || c == '\\')
 730         c = skip_escaped_newlines (pfile, c);
 731
 732       if (c == terminator && unescaped_terminator_p (pfile, dest))
 733         {
 734           c = EOF;
 735           break;
 736         }
 737       else if (is_vspace (c))
 738         {
 739           /* In assembly language, silently terminate string and
 740              character literals at end of line.  This is a kludge
 741              around not knowing where comments are.  */
 742           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 743             break;
 744
 745           /* Character constants and header names may not extend over
 746              multiple lines.  In Standard C, neither may strings.
 747              Unfortunately, we accept multiline strings as an
 748              extension, except in #include family directives.  */
 749           if (terminator != '"' || pfile->state.angled_headers)
 750             {
 751               unterminated (pfile, terminator);
 752               break;
 753             }
 754
 755           if (!warned_multi)
 756             {
 757               warned_multi = true;
 758               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 759             }
 760
 761           if (pfile->mls_line == 0)
 762             {
 763               pfile->mls_line = token->line;
 764               pfile->mls_col = token->col;
 765             }
 766
 767           c = handle_newline (pfile, c);
 768           *dest++ = '\n';
 769           goto have_char;
 770         }
 771       else if (c == '\0' && !warned_nulls)
 772         {
 773           warned_nulls = true;
 774           cpp_warning (pfile, "null character(s) preserved in literal");
 775         }
 776
 777       *dest++ = c;
 778     }
 779
 780   /* Remember the next character.  */
 781   buffer->read_ahead = c;
 782   *dest = '\0';
 783
 784   token->val.str.text = POOL_FRONT (pool);
 785   token->val.str.len = dest - token->val.str.text;
 786   POOL_COMMIT (pool, token->val.str.len + 1);
 787 }
 788
 789 /* The stored comment includes the comment start and any terminator.  */
 790 static void
 791 save_comment (pfile, token, from)
 792      cpp_reader *pfile;
 793      cpp_token *token;
 794      const unsigned char *from;
 795 {
 796   unsigned char *buffer;
 797   unsigned int len;
 798
 799   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 800   /* C++ comments probably (not definitely) have moved past a new
 801      line, which we don't want to save in the comment.  */
 802   if (pfile->buffer->read_ahead != EOF)
 803     len--;
 804   buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
 805
 806   token->type = CPP_COMMENT;
 807   token->val.str.len = len;
 808   token->val.str.text = buffer;
 809
 810   buffer[0] = '/';
 811   memcpy (buffer + 1, from, len - 1);
 812 }
 813
 814 /* Subroutine of _cpp_lex_direct to handle '%'.  A little tricky, since we
 815    want to avoid stepping back when lexing %:%X.  */
 816 static void
 817 lex_percent (pfile, result)
 818      cpp_reader *pfile;
 819      cpp_token *result;
 820 {
 821   cpp_buffer *buffer= pfile->buffer;
 822   cppchar_t c;
 823
 824   result->type = CPP_MOD;
 825   /* Parsing %:%X could leave an extra character.  */
 826   if (buffer->extra_char == EOF)
 827     c = get_effective_char (pfile);
 828   else
 829     {
 830       c = buffer->read_ahead = buffer->extra_char;
 831       buffer->extra_char = EOF;
 832     }
 833
 834   if (c == '=')
 835     ACCEPT_CHAR (CPP_MOD_EQ);
 836   else if (CPP_OPTION (pfile, digraphs))
 837     {
 838       if (c == ':')
 839         {
 840           result->flags |= DIGRAPH;
 841           ACCEPT_CHAR (CPP_HASH);
 842           if (get_effective_char (pfile) == '%')
 843             {
 844               buffer->extra_char = get_effective_char (pfile);
 845               if (buffer->extra_char == ':')
 846                 {
 847                   buffer->extra_char = EOF;
 848                   ACCEPT_CHAR (CPP_PASTE);
 849                 }
 850               else
 851                 /* We'll catch the extra_char when we're called back.  */
 852                 buffer->read_ahead = '%';
 853             }
 854         }
 855       else if (c == '>')
 856         {
 857           result->flags |= DIGRAPH;
 858           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 859         }
 860     }
 861 }
 862
 863 /* Subroutine of _cpp_lex_direct to handle '.'.  This is tricky, since we
 864    want to avoid stepping back when lexing '...' or '.123'.  In the
 865    latter case we should also set a flag for parse_number.  */
 866 static void
 867 lex_dot (pfile, result)
 868      cpp_reader *pfile;
 869      cpp_token *result;
 870 {
 871   cpp_buffer *buffer = pfile->buffer;
 872   cppchar_t c;
 873
 874   /* Parsing ..X could leave an extra character.  */
 875   if (buffer->extra_char == EOF)
 876     c = get_effective_char (pfile);
 877   else
 878     {
 879       c = buffer->read_ahead = buffer->extra_char;
 880       buffer->extra_char = EOF;
 881     }
 882
 883   /* All known character sets have 0...9 contiguous.  */
 884   if (c >= '0' && c <= '9')
 885     {
 886       result->type = CPP_NUMBER;
 887       parse_number (pfile, &result->val.str, c, 1);
 888     }
 889   else
 890     {
 891       result->type = CPP_DOT;
 892       if (c == '.')
 893         {
 894           buffer->extra_char = get_effective_char (pfile);
 895           if (buffer->extra_char == '.')
 896             {
 897               buffer->extra_char = EOF;
 898               ACCEPT_CHAR (CPP_ELLIPSIS);
 899             }
 900           else
 901             /* We'll catch the extra_char when we're called back.  */
 902             buffer->read_ahead = '.';
 903         }
 904       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 905         ACCEPT_CHAR (CPP_DOT_STAR);
 906     }
 907 }
 908
 909 /* Allocate COUNT tokens for RUN.  */
 910 void
 911 _cpp_init_tokenrun (run, count)
 912      tokenrun *run;
 913      unsigned int count;
 914 {
 915   run->base = xnewvec (cpp_token, count);
 916   run->limit = run->base + count;
 917   run->next = NULL;
 918 }
 919
 920 /* Returns the next tokenrun, or creates one if there is none.  */
 921 static tokenrun *
 922 next_tokenrun (run)
 923      tokenrun *run;
 924 {
 925   if (run->next == NULL)
 926     {
 927       run->next = xnew (tokenrun);
 928       run->next->prev = run;
 929       _cpp_init_tokenrun (run->next, 250);
 930     }
 931
 932   return run->next;
 933 }
 934
 935 /* Allocate a single token that is invalidated at the same time as the
 936    rest of the tokens on the line.  Has its line and col set to the
 937    same as the last lexed token, so that diagnostics appear in the
 938    right place.  */
 939 cpp_token *
 940 _cpp_temp_token (pfile)
 941      cpp_reader *pfile;
 942 {
 943   cpp_token *old, *result;
 944
 945   old = pfile->cur_token - 1;
 946   if (pfile->cur_token == pfile->cur_run->limit)
 947     {
 948       pfile->cur_run = next_tokenrun (pfile->cur_run);
 949       pfile->cur_token = pfile->cur_run->base;
 950     }
 951
 952   result = pfile->cur_token++;
 953   result->line = old->line;
 954   result->col = old->col;
 955   return result;
 956 }
 957
 958 /* Lex a token into RESULT (external interface).  Takes care of issues
 959    like directive handling, token lookahead, multiple include
 960    opimisation and skipping.  */
 961 const cpp_token *
 962 _cpp_lex_token (pfile)
 963      cpp_reader *pfile;
 964 {
 965   cpp_token *result;
 966
 967   for (;;)
 968     {
 969       if (pfile->cur_token == pfile->cur_run->limit)
 970         {
 971           pfile->cur_run = next_tokenrun (pfile->cur_run);
 972           pfile->cur_token = pfile->cur_run->base;
 973         }
 974
 975       if (pfile->lookaheads)
 976         {
 977           pfile->lookaheads--;
 978           result = pfile->cur_token++;
 979         }
 980       else
 981         result = _cpp_lex_direct (pfile);
 982
 983       if (result->flags & BOL)
 984         {
 985           /* Is this a directive.  If _cpp_handle_directive returns
 986              false, it is an assembler #.  */
 987           if (result->type == CPP_HASH
 988               && !pfile->state.parsing_args
 989               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 990             continue;
 991           if (pfile->cb.line_change && !pfile->state.skipping)
 992             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 993         }
 994
 995       /* We don't skip tokens in directives.  */
 996       if (pfile->state.in_directive)
 997         break;
 998
 999       /* Outside a directive, invalidate controlling macros.  At file
1000          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001          get here and MI optimisation works.  */
1002       pfile->mi_valid = false;
1003
1004       if (!pfile->state.skipping || result->type == CPP_EOF)
1005         break;
1006     }
1007
1008   return result;
1009 }
1010
1011 /* Lex a token into pfile->cur_token, which is also incremented, to
1012    get diagnostics pointing to the correct location.
1013
1014    Does not handle issues such as token lookahead, multiple-include
1015    optimisation, directives, skipping etc.  This function is only
1016    suitable for use by _cpp_lex_token, and in special cases like
1017    lex_expansion_token which doesn't care for any of these issues.
1018
1019    When meeting a newline, returns CPP_EOF if parsing a directive,
1020    otherwise returns to the start of the token buffer if permissible.
1021    Returns the location of the lexed token.  */
1022 cpp_token *
1023 _cpp_lex_direct (pfile)
1024      cpp_reader *pfile;
1025 {
1026   cppchar_t c;
1027   cpp_buffer *buffer;
1028   const unsigned char *comment_start;
1029   cpp_token *result = pfile->cur_token++;
1030
1031  fresh_line:
1032   buffer = pfile->buffer;
1033   result->flags = buffer->saved_flags;
1034   buffer->saved_flags = 0;
1035  update_tokens_line:
1036   result->line = pfile->line;
1037
1038  skipped_white:
1039   c = buffer->read_ahead;
1040   if (c == EOF && buffer->cur < buffer->rlimit)
1041     c = *buffer->cur++;
1042   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1043   buffer->read_ahead = EOF;
1044
1045  trigraph:
1046   switch (c)
1047     {
1048     case EOF:
1049       buffer->saved_flags = BOL;
1050       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1051         {
1052           if (buffer->cur != buffer->line_base)
1053             {
1054               /* Non-empty files should end in a newline.  Don't warn
1055                  for command line and _Pragma buffers.  */
1056               if (!buffer->from_stage3)
1057                 cpp_pedwarn (pfile, "no newline at end of file");
1058               handle_newline (pfile, '\n');
1059             }
1060
1061           /* Don't pop the last buffer.  */
1062           if (buffer->prev)
1063             {
1064               unsigned char stop = buffer->return_at_eof;
1065
1066               _cpp_pop_buffer (pfile);
1067               if (!stop)
1068                 goto fresh_line;
1069             }
1070         }
1071       result->type = CPP_EOF;
1072       break;
1073
1074     case ' ': case '\t': case '\f': case '\v': case '\0':
1075       skip_whitespace (pfile, c);
1076       result->flags |= PREV_WHITE;
1077       goto skipped_white;
1078
1079     case '\n': case '\r':
1080       handle_newline (pfile, c);
1081       buffer->saved_flags = BOL;
1082       if (! pfile->state.in_directive)
1083         {
1084           if (pfile->state.parsing_args == 2)
1085             buffer->saved_flags |= PREV_WHITE;
1086           if (!pfile->keep_tokens)
1087             {
1088               pfile->cur_run = &pfile->base_run;
1089               result = pfile->base_run.base;
1090               pfile->cur_token = result + 1;
1091             }
1092           goto fresh_line;
1093         }
1094       result->type = CPP_EOF;
1095       break;
1096
1097     case '?':
1098     case '\\':
1099       /* These could start an escaped newline, or '?' a trigraph.  Let
1100          skip_escaped_newlines do all the work.  */
1101       {
1102         unsigned int line = pfile->line;
1103
1104         c = skip_escaped_newlines (pfile, c);
1105         if (line != pfile->line)
1106           /* We had at least one escaped newline of some sort, and the
1107              next character is in buffer->read_ahead.  Update the
1108              token's line and column.  */
1109             goto update_tokens_line;
1110
1111         /* We are either the original '?' or '\\', or a trigraph.  */
1112         result->type = CPP_QUERY;
1113         buffer->read_ahead = EOF;
1114         if (c == '\\')
1115           goto random_char;
1116         else if (c != '?')
1117           goto trigraph;
1118       }
1119       break;
1120
1121     case '0': case '1': case '2': case '3': case '4':
1122     case '5': case '6': case '7': case '8': case '9':
1123       result->type = CPP_NUMBER;
1124       parse_number (pfile, &result->val.str, c, 0);
1125       break;
1126
1127     case '$':
1128       if (!CPP_OPTION (pfile, dollars_in_ident))
1129         goto random_char;
1130       /* Fall through...  */
1131
1132     case '_':
1133     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1134     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1135     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1136     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1137     case 'y': case 'z':
1138     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1139     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1140     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1141     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1142     case 'Y': case 'Z':
1143       result->type = CPP_NAME;
1144       result->val.node = parse_identifier (pfile);
1145
1146       /* 'L' may introduce wide characters or strings.  */
1147       if (result->val.node == pfile->spec_nodes.n_L)
1148         {
1149           c = buffer->read_ahead;
1150           if (c == EOF && buffer->cur < buffer->rlimit)
1151             c = *buffer->cur;
1152           if (c == '\'' || c == '"')
1153             {
1154               buffer->cur++;
1155               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1156               goto make_string;
1157             }
1158         }
1159       /* Convert named operators to their proper types.  */
1160       else if (result->val.node->flags & NODE_OPERATOR)
1161         {
1162           result->flags |= NAMED_OP;
1163           result->type = result->val.node->value.operator;
1164         }
1165       break;
1166
1167     case '\'':
1168     case '"':
1169       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1170     make_string:
1171       parse_string (pfile, result, c);
1172       break;
1173
1174     case '/':
1175       /* A potential block or line comment.  */
1176       comment_start = buffer->cur;
1177       result->type = CPP_DIV;
1178       c = get_effective_char (pfile);
1179       if (c == '=')
1180         ACCEPT_CHAR (CPP_DIV_EQ);
1181       if (c != '/' && c != '*')
1182         break;
1183
1184       if (c == '*')
1185         {
1186           if (skip_block_comment (pfile))
1187             cpp_error (pfile, "unterminated comment");
1188         }
1189       else
1190         {
1191           if (!CPP_OPTION (pfile, cplusplus_comments)
1192               && !CPP_IN_SYSTEM_HEADER (pfile))
1193             break;
1194
1195           /* Warn about comments only if pedantically GNUC89, and not
1196              in system headers.  */
1197           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1198               && ! buffer->warned_cplusplus_comments)
1199             {
1200               cpp_pedwarn (pfile,
1201                            "C++ style comments are not allowed in ISO C89");
1202               cpp_pedwarn (pfile,
1203                            "(this will be reported only once per input file)");
1204               buffer->warned_cplusplus_comments = 1;
1205             }
1206
1207           /* Skip_line_comment updates buffer->read_ahead.  */
1208           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1209             cpp_warning (pfile, "multi-line comment");
1210         }
1211
1212       /* Skipping the comment has updated buffer->read_ahead.  */
1213       if (!pfile->state.save_comments)
1214         {
1215           result->flags |= PREV_WHITE;
1216           goto update_tokens_line;
1217         }
1218
1219       /* Save the comment as a token in its own right.  */
1220       save_comment (pfile, result, comment_start);
1221       break;
1222
1223     case '<':
1224       if (pfile->state.angled_headers)
1225         {
1226           result->type = CPP_HEADER_NAME;
1227           c = '>';              /* terminator.  */
1228           goto make_string;
1229         }
1230
1231       result->type = CPP_LESS;
1232       c = get_effective_char (pfile);
1233       if (c == '=')
1234         ACCEPT_CHAR (CPP_LESS_EQ);
1235       else if (c == '<')
1236         {
1237           ACCEPT_CHAR (CPP_LSHIFT);
1238           if (get_effective_char (pfile) == '=')
1239             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1240         }
1241       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1242         {
1243           ACCEPT_CHAR (CPP_MIN);
1244           if (get_effective_char (pfile) == '=')
1245             ACCEPT_CHAR (CPP_MIN_EQ);
1246         }
1247       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1248         {
1249           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1250           result->flags |= DIGRAPH;
1251         }
1252       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1253         {
1254           ACCEPT_CHAR (CPP_OPEN_BRACE);
1255           result->flags |= DIGRAPH;
1256         }
1257       break;
1258
1259     case '>':
1260       result->type = CPP_GREATER;
1261       c = get_effective_char (pfile);
1262       if (c == '=')
1263         ACCEPT_CHAR (CPP_GREATER_EQ);
1264       else if (c == '>')
1265         {
1266           ACCEPT_CHAR (CPP_RSHIFT);
1267           if (get_effective_char (pfile) == '=')
1268             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1269         }
1270       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1271         {
1272           ACCEPT_CHAR (CPP_MAX);
1273           if (get_effective_char (pfile) == '=')
1274             ACCEPT_CHAR (CPP_MAX_EQ);
1275         }
1276       break;
1277
1278     case '%':
1279       lex_percent (pfile, result);
1280       break;
1281
1282     case '.':
1283       lex_dot (pfile, result);
1284       break;
1285
1286     case '+':
1287       result->type = CPP_PLUS;
1288       c = get_effective_char (pfile);
1289       if (c == '=')
1290         ACCEPT_CHAR (CPP_PLUS_EQ);
1291       else if (c == '+')
1292         ACCEPT_CHAR (CPP_PLUS_PLUS);
1293       break;
1294
1295     case '-':
1296       result->type = CPP_MINUS;
1297       c = get_effective_char (pfile);
1298       if (c == '>')
1299         {
1300           ACCEPT_CHAR (CPP_DEREF);
1301           if (CPP_OPTION (pfile, cplusplus)
1302               && get_effective_char (pfile) == '*')
1303             ACCEPT_CHAR (CPP_DEREF_STAR);
1304         }
1305       else if (c == '=')
1306         ACCEPT_CHAR (CPP_MINUS_EQ);
1307       else if (c == '-')
1308         ACCEPT_CHAR (CPP_MINUS_MINUS);
1309       break;
1310
1311     case '*':
1312       result->type = CPP_MULT;
1313       if (get_effective_char (pfile) == '=')
1314         ACCEPT_CHAR (CPP_MULT_EQ);
1315       break;
1316
1317     case '=':
1318       result->type = CPP_EQ;
1319       if (get_effective_char (pfile) == '=')
1320         ACCEPT_CHAR (CPP_EQ_EQ);
1321       break;
1322
1323     case '!':
1324       result->type = CPP_NOT;
1325       if (get_effective_char (pfile) == '=')
1326         ACCEPT_CHAR (CPP_NOT_EQ);
1327       break;
1328
1329     case '&':
1330       result->type = CPP_AND;
1331       c = get_effective_char (pfile);
1332       if (c == '=')
1333         ACCEPT_CHAR (CPP_AND_EQ);
1334       else if (c == '&')
1335         ACCEPT_CHAR (CPP_AND_AND);
1336       break;
1337
1338     case '#':
1339       result->type = CPP_HASH;
1340       if (get_effective_char (pfile) == '#')
1341           ACCEPT_CHAR (CPP_PASTE);
1342       break;
1343
1344     case '|':
1345       result->type = CPP_OR;
1346       c = get_effective_char (pfile);
1347       if (c == '=')
1348         ACCEPT_CHAR (CPP_OR_EQ);
1349       else if (c == '|')
1350         ACCEPT_CHAR (CPP_OR_OR);
1351       break;
1352
1353     case '^':
1354       result->type = CPP_XOR;
1355       if (get_effective_char (pfile) == '=')
1356         ACCEPT_CHAR (CPP_XOR_EQ);
1357       break;
1358
1359     case ':':
1360       result->type = CPP_COLON;
1361       c = get_effective_char (pfile);
1362       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1363         ACCEPT_CHAR (CPP_SCOPE);
1364       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1365         {
1366           result->flags |= DIGRAPH;
1367           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1368         }
1369       break;
1370
1371     case '~': result->type = CPP_COMPL; break;
1372     case ',': result->type = CPP_COMMA; break;
1373     case '(': result->type = CPP_OPEN_PAREN; break;
1374     case ')': result->type = CPP_CLOSE_PAREN; break;
1375     case '[': result->type = CPP_OPEN_SQUARE; break;
1376     case ']': result->type = CPP_CLOSE_SQUARE; break;
1377     case '{': result->type = CPP_OPEN_BRACE; break;
1378     case '}': result->type = CPP_CLOSE_BRACE; break;
1379     case ';': result->type = CPP_SEMICOLON; break;
1380
1381       /* @ is a punctuator in Objective C.  */
1382     case '@': result->type = CPP_ATSIGN; break;
1383
1384     random_char:
1385     default:
1386       result->type = CPP_OTHER;
1387       result->val.c = c;
1388       break;
1389     }
1390
1391   return result;
1392 }
1393
1394 /* An upper bound on the number of bytes needed to spell a token,
1395    including preceding whitespace.  */
1396 unsigned int
1397 cpp_token_len (token)
1398      const cpp_token *token;
1399 {
1400   unsigned int len;
1401
1402   switch (TOKEN_SPELL (token))
1403     {
1404     default:            len = 0;                                break;
1405     case SPELL_STRING:  len = token->val.str.len;               break;
1406     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1407     }
1408   /* 1 for whitespace, 4 for comment delimeters.  */
1409   return len + 5;
1410 }
1411
1412 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1413    already contain the enough space to hold the token's spelling.
1414    Returns a pointer to the character after the last character
1415    written.  */
1416 unsigned char *
1417 cpp_spell_token (pfile, token, buffer)
1418      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1419      const cpp_token *token;
1420      unsigned char *buffer;
1421 {
1422   switch (TOKEN_SPELL (token))
1423     {
1424     case SPELL_OPERATOR:
1425       {
1426         const unsigned char *spelling;
1427         unsigned char c;
1428
1429         if (token->flags & DIGRAPH)
1430           spelling
1431             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1432         else if (token->flags & NAMED_OP)
1433           goto spell_ident;
1434         else
1435           spelling = TOKEN_NAME (token);
1436
1437         while ((c = *spelling++) != '\0')
1438           *buffer++ = c;
1439       }
1440       break;
1441
1442     case SPELL_IDENT:
1443       spell_ident:
1444       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1445       buffer += NODE_LEN (token->val.node);
1446       break;
1447
1448     case SPELL_STRING:
1449       {
1450         int left, right, tag;
1451         switch (token->type)
1452           {
1453           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1454           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1455           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1456           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1457           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1458           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1459           }
1460         if (tag) *buffer++ = tag;
1461         if (left) *buffer++ = left;
1462         memcpy (buffer, token->val.str.text, token->val.str.len);
1463         buffer += token->val.str.len;
1464         if (right) *buffer++ = right;
1465       }
1466       break;
1467
1468     case SPELL_CHAR:
1469       *buffer++ = token->val.c;
1470       break;
1471
1472     case SPELL_NONE:
1473       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1474       break;
1475     }
1476
1477   return buffer;
1478 }
1479
1480 /* Returns a token as a null-terminated string.  The string is
1481    temporary, and automatically freed later.  Useful for diagnostics.  */
1482 unsigned char *
1483 cpp_token_as_text (pfile, token)
1484      cpp_reader *pfile;
1485      const cpp_token *token;
1486 {
1487   unsigned int len = cpp_token_len (token);
1488   unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1489
1490   end = cpp_spell_token (pfile, token, start);
1491   end[0] = '\0';
1492
1493   return start;
1494 }
1495
1496 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1497 const char *
1498 cpp_type2name (type)
1499      enum cpp_ttype type;
1500 {
1501   return (const char *) token_spellings[type].name;
1502 }
1503
1504 /* Writes the spelling of token to FP, without any preceding space.
1505    Separated from cpp_spell_token for efficiency - to avoid stdio
1506    double-buffering.  */
1507 void
1508 cpp_output_token (token, fp)
1509      const cpp_token *token;
1510      FILE *fp;
1511 {
1512   switch (TOKEN_SPELL (token))
1513     {
1514     case SPELL_OPERATOR:
1515       {
1516         const unsigned char *spelling;
1517
1518         if (token->flags & DIGRAPH)
1519           spelling
1520             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1521         else if (token->flags & NAMED_OP)
1522           goto spell_ident;
1523         else
1524           spelling = TOKEN_NAME (token);
1525
1526         ufputs (spelling, fp);
1527       }
1528       break;
1529
1530     spell_ident:
1531     case SPELL_IDENT:
1532       ufputs (NODE_NAME (token->val.node), fp);
1533     break;
1534
1535     case SPELL_STRING:
1536       {
1537         int left, right, tag;
1538         switch (token->type)
1539           {
1540           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1541           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1542           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1543           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1544           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1545           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1546           }
1547         if (tag) putc (tag, fp);
1548         if (left) putc (left, fp);
1549         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1550         if (right) putc (right, fp);
1551       }
1552       break;
1553
1554     case SPELL_CHAR:
1555       putc (token->val.c, fp);
1556       break;
1557
1558     case SPELL_NONE:
1559       /* An error, most probably.  */
1560       break;
1561     }
1562 }
1563
1564 /* Compare two tokens.  */
1565 int
1566 _cpp_equiv_tokens (a, b)
1567      const cpp_token *a, *b;
1568 {
1569   if (a->type == b->type && a->flags == b->flags)
1570     switch (TOKEN_SPELL (a))
1571       {
1572       default:                  /* Keep compiler happy.  */
1573       case SPELL_OPERATOR:
1574         return 1;
1575       case SPELL_CHAR:
1576         return a->val.c == b->val.c; /* Character.  */
1577       case SPELL_NONE:
1578         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1579       case SPELL_IDENT:
1580         return a->val.node == b->val.node;
1581       case SPELL_STRING:
1582         return (a->val.str.len == b->val.str.len
1583                 && !memcmp (a->val.str.text, b->val.str.text,
1584                             a->val.str.len));
1585       }
1586
1587   return 0;
1588 }
1589
1590 /* Returns nonzero if a space should be inserted to avoid an
1591    accidental token paste for output.  For simplicity, it is
1592    conservative, and occasionally advises a space where one is not
1593    needed, e.g. "." and ".2".  */
1594
1595 int
1596 cpp_avoid_paste (pfile, token1, token2)
1597      cpp_reader *pfile;
1598      const cpp_token *token1, *token2;
1599 {
1600   enum cpp_ttype a = token1->type, b = token2->type;
1601   cppchar_t c;
1602
1603   if (token1->flags & NAMED_OP)
1604     a = CPP_NAME;
1605   if (token2->flags & NAMED_OP)
1606     b = CPP_NAME;
1607
1608   c = EOF;
1609   if (token2->flags & DIGRAPH)
1610     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1611   else if (token_spellings[b].category == SPELL_OPERATOR)
1612     c = token_spellings[b].name[0];
1613
1614   /* Quickly get everything that can paste with an '='.  */
1615   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1616     return 1;
1617
1618   switch (a)
1619     {
1620     case CPP_GREATER:   return c == '>' || c == '?';
1621     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1622     case CPP_PLUS:      return c == '+';
1623     case CPP_MINUS:     return c == '-' || c == '>';
1624     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1625     case CPP_MOD:       return c == ':' || c == '>';
1626     case CPP_AND:       return c == '&';
1627     case CPP_OR:        return c == '|';
1628     case CPP_COLON:     return c == ':' || c == '>';
1629     case CPP_DEREF:     return c == '*';
1630     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1631     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1632     case CPP_NAME:      return ((b == CPP_NUMBER
1633                                  && name_p (pfile, &token2->val.str))
1634                                 || b == CPP_NAME
1635                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1636     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1637                                 || c == '.' || c == '+' || c == '-');
1638     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1639                                 && token1->val.c == '@'
1640                                 && (b == CPP_NAME || b == CPP_STRING));
1641     default:            break;
1642     }
1643
1644   return 0;
1645 }
1646
1647 /* Output all the remaining tokens on the current line, and a newline
1648    character, to FP.  Leading whitespace is removed.  If there are
1649    macros, special token padding is not performed.  */
1650 void
1651 cpp_output_line (pfile, fp)
1652      cpp_reader *pfile;
1653      FILE *fp;
1654 {
1655   const cpp_token *token;
1656
1657   token = cpp_get_token (pfile);
1658   while (token->type != CPP_EOF)
1659     {
1660       cpp_output_token (token, fp);
1661       token = cpp_get_token (pfile);
1662       if (token->flags & PREV_WHITE)
1663         putc (' ', fp);
1664     }
1665
1666   putc ('\n', fp);
1667 }
1668
1669 /* Returns the value of a hexadecimal digit.  */
1670 static unsigned int
1671 hex_digit_value (c)
1672      unsigned int c;
1673 {
1674   if (c >= 'a' && c <= 'f')
1675     return c - 'a' + 10;
1676   if (c >= 'A' && c <= 'F')
1677     return c - 'A' + 10;
1678   if (c >= '0' && c <= '9')
1679     return c - '0';
1680   abort ();
1681 }
1682
1683 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1684    failure if cpplib is not parsing C++ or C99.  Such failure is
1685    silent, and no variables are updated.  Otherwise returns 0, and
1686    warns if -Wtraditional.
1687
1688    [lex.charset]: The character designated by the universal character
1689    name \UNNNNNNNN is that character whose character short name in
1690    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1691    universal character name \uNNNN is that character whose character
1692    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1693    for a universal character name is less than 0x20 or in the range
1694    0x7F-0x9F (inclusive), or if the universal character name
1695    designates a character in the basic source character set, then the
1696    program is ill-formed.
1697
1698    We assume that wchar_t is Unicode, so we don't need to do any
1699    mapping.  Is this ever wrong?
1700
1701    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1702    LIMIT is the end of the string or charconst.  PSTR is updated to
1703    point after the UCS on return, and the UCS is written into PC.  */
1704
1705 static int
1706 maybe_read_ucs (pfile, pstr, limit, pc)
1707      cpp_reader *pfile;
1708      const unsigned char **pstr;
1709      const unsigned char *limit;
1710      unsigned int *pc;
1711 {
1712   const unsigned char *p = *pstr;
1713   unsigned int code = 0;
1714   unsigned int c = *pc, length;
1715
1716   /* Only attempt to interpret a UCS for C++ and C99.  */
1717   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1718     return 1;
1719
1720   if (CPP_WTRADITIONAL (pfile))
1721     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1722
1723   length = (c == 'u' ? 4: 8);
1724
1725   if ((size_t) (limit - p) < length)
1726     {
1727       cpp_error (pfile, "incomplete universal-character-name");
1728       /* Skip to the end to avoid more diagnostics.  */
1729       p = limit;
1730     }
1731   else
1732     {
1733       for (; length; length--, p++)
1734         {
1735           c = *p;
1736           if (ISXDIGIT (c))
1737             code = (code << 4) + hex_digit_value (c);
1738           else
1739             {
1740               cpp_error (pfile,
1741                          "non-hex digit '%c' in universal-character-name", c);
1742               /* We shouldn't skip in case there are multibyte chars.  */
1743               break;
1744             }
1745         }
1746     }
1747
1748 #ifdef TARGET_EBCDIC
1749   cpp_error (pfile, "universal-character-name on EBCDIC target");
1750   code = 0x3f;  /* EBCDIC invalid character */
1751 #else
1752  /* True extended characters are OK.  */
1753   if (code >= 0xa0
1754       && !(code & 0x80000000)
1755       && !(code >= 0xD800 && code <= 0xDFFF))
1756     ;
1757   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1758      hex escapes so that this also works with EBCDIC hosts.  */
1759   else if (code == 0x24 || code == 0x40 || code == 0x60)
1760     ;
1761   /* Don't give another error if one occurred above.  */
1762   else if (length == 0)
1763     cpp_error (pfile, "universal-character-name out of range");
1764 #endif
1765
1766   *pstr = p;
1767   *pc = code;
1768   return 0;
1769 }
1770
1771 /* Interpret an escape sequence, and return its value.  PSTR points to
1772    the input pointer, which is just after the backslash.  LIMIT is how
1773    much text we have.  MASK is a bitmask for the precision for the
1774    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1775    interpret escapes that did not exist in traditional C.
1776
1777    Handles all relevant diagnostics.  */
1778
1779 unsigned int
1780 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1781      cpp_reader *pfile;
1782      const unsigned char **pstr;
1783      const unsigned char *limit;
1784      unsigned HOST_WIDE_INT mask;
1785      int traditional;
1786 {
1787   int unknown = 0;
1788   const unsigned char *str = *pstr;
1789   unsigned int c = *str++;
1790
1791   switch (c)
1792     {
1793     case '\\': case '\'': case '"': case '?': break;
1794     case 'b': c = TARGET_BS;      break;
1795     case 'f': c = TARGET_FF;      break;
1796     case 'n': c = TARGET_NEWLINE; break;
1797     case 'r': c = TARGET_CR;      break;
1798     case 't': c = TARGET_TAB;     break;
1799     case 'v': c = TARGET_VT;      break;
1800
1801     case '(': case '{': case '[': case '%':
1802       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1803          '\%' is used to prevent SCCS from getting confused.  */
1804       unknown = CPP_PEDANTIC (pfile);
1805       break;
1806
1807     case 'a':
1808       if (CPP_WTRADITIONAL (pfile))
1809         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1810       if (!traditional)
1811         c = TARGET_BELL;
1812       break;
1813
1814     case 'e': case 'E':
1815       if (CPP_PEDANTIC (pfile))
1816         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1817       c = TARGET_ESC;
1818       break;
1819
1820     case 'u': case 'U':
1821       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1822       break;
1823
1824     case 'x':
1825       if (CPP_WTRADITIONAL (pfile))
1826         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1827
1828       if (!traditional)
1829         {
1830           unsigned int i = 0, overflow = 0;
1831           int digits_found = 0;
1832
1833           while (str < limit)
1834             {
1835               c = *str;
1836               if (! ISXDIGIT (c))
1837                 break;
1838               str++;
1839               overflow |= i ^ (i << 4 >> 4);
1840               i = (i << 4) + hex_digit_value (c);
1841               digits_found = 1;
1842             }
1843
1844           if (!digits_found)
1845             cpp_error (pfile, "\\x used with no following hex digits");
1846
1847           if (overflow | (i != (i & mask)))
1848             {
1849               cpp_pedwarn (pfile, "hex escape sequence out of range");
1850               i &= mask;
1851             }
1852           c = i;
1853         }
1854       break;
1855
1856     case '0':  case '1':  case '2':  case '3':
1857     case '4':  case '5':  case '6':  case '7':
1858       {
1859         unsigned int i = c - '0';
1860         int count = 0;
1861
1862         while (str < limit && ++count < 3)
1863           {
1864             c = *str;
1865             if (c < '0' || c > '7')
1866               break;
1867             str++;
1868             i = (i << 3) + c - '0';
1869           }
1870
1871         if (i != (i & mask))
1872           {
1873             cpp_pedwarn (pfile, "octal escape sequence out of range");
1874             i &= mask;
1875           }
1876         c = i;
1877       }
1878       break;
1879
1880     default:
1881       unknown = 1;
1882       break;
1883     }
1884
1885   if (unknown)
1886     {
1887       if (ISGRAPH (c))
1888         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1889       else
1890         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1891     }
1892
1893   if (c > mask)
1894     cpp_pedwarn (pfile, "escape sequence out of range for character");
1895
1896   *pstr = str;
1897   return c;
1898 }
1899
1900 #ifndef MAX_CHAR_TYPE_SIZE
1901 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1902 #endif
1903
1904 #ifndef MAX_WCHAR_TYPE_SIZE
1905 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1906 #endif
1907
1908 /* Interpret a (possibly wide) character constant in TOKEN.
1909    WARN_MULTI warns about multi-character charconsts, if not
1910    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1911    that did not exist in traditional C.  PCHARS_SEEN points to a
1912    variable that is filled in with the number of characters seen.  */
1913 HOST_WIDE_INT
1914 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1915      cpp_reader *pfile;
1916      const cpp_token *token;
1917      int warn_multi;
1918      int traditional;
1919      unsigned int *pchars_seen;
1920 {
1921   const unsigned char *str = token->val.str.text;
1922   const unsigned char *limit = str + token->val.str.len;
1923   unsigned int chars_seen = 0;
1924   unsigned int width, max_chars, c;
1925   unsigned HOST_WIDE_INT mask;
1926   HOST_WIDE_INT result = 0;
1927
1928 #ifdef MULTIBYTE_CHARS
1929   (void) local_mbtowc (NULL, NULL, 0);
1930 #endif
1931
1932   /* Width in bits.  */
1933   if (token->type == CPP_CHAR)
1934     width = MAX_CHAR_TYPE_SIZE;
1935   else
1936     width = MAX_WCHAR_TYPE_SIZE;
1937
1938   if (width < HOST_BITS_PER_WIDE_INT)
1939     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1940   else
1941     mask = ~0;
1942   max_chars = HOST_BITS_PER_WIDE_INT / width;
1943
1944   while (str < limit)
1945     {
1946 #ifdef MULTIBYTE_CHARS
1947       wchar_t wc;
1948       int char_len;
1949
1950       char_len = local_mbtowc (&wc, str, limit - str);
1951       if (char_len == -1)
1952         {
1953           cpp_warning (pfile, "ignoring invalid multibyte character");
1954           c = *str++;
1955         }
1956       else
1957         {
1958           str += char_len;
1959           c = wc;
1960         }
1961 #else
1962       c = *str++;
1963 #endif
1964
1965       if (c == '\\')
1966         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1967
1968 #ifdef MAP_CHARACTER
1969       if (ISPRINT (c))
1970         c = MAP_CHARACTER (c);
1971 #endif
1972
1973       /* Merge character into result; ignore excess chars.  */
1974       if (++chars_seen <= max_chars)
1975         {
1976           if (width < HOST_BITS_PER_WIDE_INT)
1977             result = (result << width) | (c & mask);
1978           else
1979             result = c;
1980         }
1981     }
1982
1983   if (chars_seen == 0)
1984     cpp_error (pfile, "empty character constant");
1985   else if (chars_seen > max_chars)
1986     {
1987       chars_seen = max_chars;
1988       cpp_warning (pfile, "character constant too long");
1989     }
1990   else if (chars_seen > 1 && !traditional && warn_multi)
1991     cpp_warning (pfile, "multi-character character constant");
1992
1993   /* If char type is signed, sign-extend the constant.  The
1994      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1995   if (token->type == CPP_CHAR && chars_seen)
1996     {
1997       unsigned int nbits = chars_seen * width;
1998       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1999
2000       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2001           || ((result >> (nbits - 1)) & 1) == 0)
2002         result &= mask;
2003       else
2004         result |= ~mask;
2005     }
2006
2007   *pchars_seen = chars_seen;
2008   return result;
2009 }
2010
2011 /* Memory buffers.  Changing these three constants can have a dramatic
2012    effect on performance.  The values here are reasonable defaults,
2013    but might be tuned.  If you adjust them, be sure to test across a
2014    range of uses of cpplib, including heavy nested function-like macro
2015    expansion.  Also check the change in peak memory usage (NJAMD is a
2016    good tool for this).  */
2017 #define MIN_BUFF_SIZE 8000
2018 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2019 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2020         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2021
2022 struct dummy
2023 {
2024   char c;
2025   union
2026   {
2027     double d;
2028     int *p;
2029   } u;
2030 };
2031
2032 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2033 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2034
2035 /* Create a new allocation buffer.  Place the control block at the end
2036    of the buffer, so that buffer overflows will cause immediate chaos.  */
2037 static _cpp_buff *
2038 new_buff (len)
2039      unsigned int len;
2040 {
2041   _cpp_buff *result;
2042   char *base;
2043
2044   if (len < MIN_BUFF_SIZE)
2045     len = MIN_BUFF_SIZE;
2046   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2047
2048   base = xmalloc (len + sizeof (_cpp_buff));
2049   result = (_cpp_buff *) (base + len);
2050   result->base = base;
2051   result->cur = base;
2052   result->limit = base + len;
2053   result->next = NULL;
2054   return result;
2055 }
2056
2057 /* Place a chain of unwanted allocation buffers on the free list.  */
2058 void
2059 _cpp_release_buff (pfile, buff)
2060      cpp_reader *pfile;
2061      _cpp_buff *buff;
2062 {
2063   _cpp_buff *end = buff;
2064
2065   while (end->next)
2066     end = end->next;
2067   end->next = pfile->free_buffs;
2068   pfile->free_buffs = buff;
2069 }
2070
2071 /* Return a free buffer of size at least MIN_SIZE.  */
2072 _cpp_buff *
2073 _cpp_get_buff (pfile, min_size)
2074      cpp_reader *pfile;
2075      unsigned int min_size;
2076 {
2077   _cpp_buff *result, **p;
2078
2079   for (p = &pfile->free_buffs;; p = &(*p)->next)
2080     {
2081       unsigned int size;
2082
2083       if (*p == NULL)
2084         return new_buff (min_size);
2085       result = *p;
2086       size = result->limit - result->base;
2087       /* Return a buffer that's big enough, but don't waste one that's
2088          way too big.  */
2089       if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2090         break;
2091     }
2092
2093   *p = result->next;
2094   result->next = NULL;
2095   result->cur = result->base;
2096   return result;
2097 }
2098
2099 /* Return a buffer chained on the end of BUFF.  Copy to it the
2100    uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2101    bytes.  */
2102 _cpp_buff *
2103 _cpp_extend_buff (pfile, buff, min_extra)
2104      cpp_reader *pfile;
2105      _cpp_buff *buff;
2106      unsigned int min_extra;
2107 {
2108   unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
2109
2110   buff->next = _cpp_get_buff (pfile, size);
2111   memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2112   return buff->next;
2113 }
2114
2115 /* Free a chain of buffers starting at BUFF.  */
2116 void
2117 _cpp_free_buff (buff)
2118      _cpp_buff *buff;
2119 {
2120   _cpp_buff *next;
2121
2122   for (; buff; buff = next)
2123     {
2124       next = buff->next;
2125       free (buff->base);
2126     }
2127 }
2128
2129 static int
2130 chunk_suitable (chunk, size)
2131      cpp_chunk *chunk;
2132      unsigned int size;
2133 {
2134   /* Being at least twice SIZE means we can use memcpy in
2135      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2136      anyway.  */
2137   return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2138 }
2139
2140 /* Returns the end of the new pool.  PTR points to a char in the old
2141    pool, and is updated to point to the same char in the new pool.  */
2142 unsigned char *
2143 _cpp_next_chunk (pool, len, ptr)
2144      cpp_pool *pool;
2145      unsigned int len;
2146      unsigned char **ptr;
2147 {
2148   cpp_chunk *chunk = pool->cur->next;
2149
2150   /* LEN is the minimum size we want in the new pool.  */
2151   len += POOL_ROOM (pool);
2152   if (! chunk_suitable (chunk, len))
2153     {
2154       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2155
2156       chunk->next = pool->cur->next;
2157       pool->cur->next = chunk;
2158     }
2159
2160   /* Update the pointer before changing chunk's front.  */
2161   if (ptr)
2162     *ptr += chunk->base - POOL_FRONT (pool);
2163
2164   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2165   chunk->front = chunk->base;
2166
2167   pool->cur = chunk;
2168   return POOL_LIMIT (pool);
2169 }
2170
2171 static cpp_chunk *
2172 new_chunk (size)
2173      unsigned int size;
2174 {
2175   unsigned char *base;
2176   cpp_chunk *result;
2177
2178   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2179   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2180   /* Put the chunk descriptor at the end.  Then chunk overruns will
2181      cause obvious chaos.  */
2182   result = (cpp_chunk *) (base + size);
2183   result->base = base;
2184   result->front = base;
2185   result->limit = base + size;
2186   result->next = 0;
2187
2188   return result;
2189 }
2190
2191 void
2192 _cpp_init_pool (pool, size, align, temp)
2193      cpp_pool *pool;
2194      unsigned int size, align, temp;
2195 {
2196   if (align == 0)
2197     align = DEFAULT_ALIGNMENT;
2198   if (align & (align - 1))
2199     abort ();
2200   pool->align = align;
2201   pool->first = new_chunk (size);
2202   pool->cur = pool->first;
2203   if (temp)
2204     pool->cur->next = pool->cur;
2205 }
2206
2207 void
2208 _cpp_free_pool (pool)
2209      cpp_pool *pool;
2210 {
2211   cpp_chunk *chunk = pool->first, *next;
2212
2213   do
2214     {
2215       next = chunk->next;
2216       free (chunk->base);
2217       chunk = next;
2218     }
2219   while (chunk && chunk != pool->first);
2220 }
2221
2222 /* Reserve LEN bytes from a memory pool.  */
2223 unsigned char *
2224 _cpp_pool_reserve (pool, len)
2225      cpp_pool *pool;
2226      unsigned int len;
2227 {
2228   len = POOL_ALIGN (len, pool->align);
2229   if (len > (unsigned int) POOL_ROOM (pool))
2230     _cpp_next_chunk (pool, len, 0);
2231
2232   return POOL_FRONT (pool);
2233 }
2234
2235 /* Allocate LEN bytes from a memory pool.  */
2236 unsigned char *
2237 _cpp_pool_alloc (pool, len)
2238      cpp_pool *pool;
2239      unsigned int len;
2240 {
2241   unsigned char *result = _cpp_pool_reserve (pool, len);
2242
2243   POOL_COMMIT (pool, len);
2244   return result;
2245 }