gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_STRING,
  62   SPELL_NONE
  63 };
  64
  65 struct token_spelling
  66 {
  67   enum spell_type category;
  68   const unsigned char *name;
  69 };
  70
  71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  72                                              U":>", U"<%", U"%>"};
  73
  74 #define OP(e, s) { SPELL_OPERATOR, U s           },
  75 #define TK(e, s) { s,              U STRINGX (e) },
  76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
  77 #undef OP
  78 #undef TK
  79
  80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  82
  83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  86
  87 static int skip_block_comment PARAMS ((cpp_reader *));
  88 static int skip_line_comment PARAMS ((cpp_reader *));
  89 static void adjust_column PARAMS ((cpp_reader *));
  90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  93                                                     const U_CHAR *));
  94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  97 static void unterminated PARAMS ((cpp_reader *, int));
  98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
  99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 104                                    const unsigned char *, unsigned int *));
 105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 106
 107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
 108 static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
 109 static unsigned int hex_digit_value PARAMS ((unsigned int));
 110 static _cpp_buff *new_buff PARAMS ((unsigned int));
 111
 112 /* Utility routine:
 113
 114    Compares, the token TOKEN to the NUL-terminated string STRING.
 115    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 116
 117 int
 118 cpp_ideq (token, string)
 119      const cpp_token *token;
 120      const char *string;
 121 {
 122   if (token->type != CPP_NAME)
 123     return 0;
 124
 125   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 126 }
 127
 128 /* Call when meeting a newline.  Returns the character after the newline
 129    (or carriage-return newline combination), or EOF.  */
 130 static cppchar_t
 131 handle_newline (pfile, newline_char)
 132      cpp_reader *pfile;
 133      cppchar_t newline_char;
 134 {
 135   cpp_buffer *buffer;
 136   cppchar_t next = EOF;
 137
 138   pfile->line++;
 139   buffer = pfile->buffer;
 140   buffer->col_adjust = 0;
 141   buffer->line_base = buffer->cur;
 142
 143   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 144   if (buffer->cur < buffer->rlimit)
 145     {
 146       next = *buffer->cur++;
 147       if (next + newline_char == '\r' + '\n')
 148         {
 149           buffer->line_base = buffer->cur;
 150           if (buffer->cur < buffer->rlimit)
 151             next = *buffer->cur++;
 152           else
 153             next = EOF;
 154         }
 155     }
 156
 157   buffer->read_ahead = next;
 158   return next;
 159 }
 160
 161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 162    encountered.  It warns if necessary, and returns true if the
 163    trigraph should be honoured.  FROM_CHAR is the third character of a
 164    trigraph, and presumed to be the previous character for position
 165    reporting.  */
 166 static int
 167 trigraph_ok (pfile, from_char)
 168      cpp_reader *pfile;
 169      cppchar_t from_char;
 170 {
 171   int accept = CPP_OPTION (pfile, trigraphs);
 172
 173   /* Don't warn about trigraphs in comments.  */
 174   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 175     {
 176       cpp_buffer *buffer = pfile->buffer;
 177
 178       if (accept)
 179         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 180                                "trigraph ??%c converted to %c",
 181                                (int) from_char,
 182                                (int) _cpp_trigraph_map[from_char]);
 183       else if (buffer->cur != buffer->last_Wtrigraphs)
 184         {
 185           buffer->last_Wtrigraphs = buffer->cur;
 186           cpp_warning_with_line (pfile, pfile->line,
 187                                  CPP_BUF_COL (buffer) - 2,
 188                                  "trigraph ??%c ignored", (int) from_char);
 189         }
 190     }
 191
 192   return accept;
 193 }
 194
 195 /* Assumes local variables buffer and result.  */
 196 #define ACCEPT_CHAR(t) \
 197   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 198
 199 /* When we move to multibyte character sets, add to these something
 200    that saves and restores the state of the multibyte conversion
 201    library.  This probably involves saving and restoring a "cookie".
 202    In the case of glibc it is an 8-byte structure, so is not a high
 203    overhead operation.  In any case, it's out of the fast path.  */
 204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 206
 207 /* Skips any escaped newlines introduced by NEXT, which is either a
 208    '?' or a '\\'.  Returns the next character, which will also have
 209    been placed in buffer->read_ahead.  This routine performs
 210    preprocessing stages 1 and 2 of the ISO C standard.  */
 211 static cppchar_t
 212 skip_escaped_newlines (pfile, next)
 213      cpp_reader *pfile;
 214      cppchar_t next;
 215 {
 216   cpp_buffer *buffer = pfile->buffer;
 217
 218   /* Only do this if we apply stages 1 and 2.  */
 219   if (!buffer->from_stage3)
 220     {
 221       cppchar_t next1;
 222       const unsigned char *saved_cur;
 223       int space;
 224
 225       do
 226         {
 227           if (buffer->cur == buffer->rlimit)
 228             break;
 229
 230           SAVE_STATE ();
 231           if (next == '?')
 232             {
 233               next1 = *buffer->cur++;
 234               if (next1 != '?' || buffer->cur == buffer->rlimit)
 235                 {
 236                   RESTORE_STATE ();
 237                   break;
 238                 }
 239
 240               next1 = *buffer->cur++;
 241               if (!_cpp_trigraph_map[next1]
 242                   || !trigraph_ok (pfile, next1))
 243                 {
 244                   RESTORE_STATE ();
 245                   break;
 246                 }
 247
 248               /* We have a full trigraph here.  */
 249               next = _cpp_trigraph_map[next1];
 250               if (next != '\\' || buffer->cur == buffer->rlimit)
 251                 break;
 252               SAVE_STATE ();
 253             }
 254
 255           /* We have a backslash, and room for at least one more character.  */
 256           space = 0;
 257           do
 258             {
 259               next1 = *buffer->cur++;
 260               if (!is_nvspace (next1))
 261                 break;
 262               space = 1;
 263             }
 264           while (buffer->cur < buffer->rlimit);
 265
 266           if (!is_vspace (next1))
 267             {
 268               RESTORE_STATE ();
 269               break;
 270             }
 271
 272           if (space && !pfile->state.lexing_comment)
 273             cpp_warning (pfile, "backslash and newline separated by space");
 274
 275           next = handle_newline (pfile, next1);
 276           if (next == EOF)
 277             cpp_pedwarn (pfile, "backslash-newline at end of file");
 278         }
 279       while (next == '\\' || next == '?');
 280     }
 281
 282   buffer->read_ahead = next;
 283   return next;
 284 }
 285
 286 /* Obtain the next character, after trigraph conversion and skipping
 287    an arbitrary string of escaped newlines.  The common case of no
 288    trigraphs or escaped newlines falls through quickly.  */
 289 static cppchar_t
 290 get_effective_char (pfile)
 291      cpp_reader *pfile;
 292 {
 293   cpp_buffer *buffer = pfile->buffer;
 294   cppchar_t next = EOF;
 295
 296   if (buffer->cur < buffer->rlimit)
 297     {
 298       next = *buffer->cur++;
 299
 300       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 301          can introduce escaped newlines, which we want to skip, or
 302          UCNs, which, depending upon lexer state, we will handle in
 303          the future.  */
 304       if (next == '?' || next == '\\')
 305         next = skip_escaped_newlines (pfile, next);
 306     }
 307
 308   buffer->read_ahead = next;
 309   return next;
 310 }
 311
 312 /* Skip a C-style block comment.  We find the end of the comment by
 313    seeing if an asterisk is before every '/' we encounter.  Returns
 314    non-zero if comment terminated by EOF, zero otherwise.  */
 315 static int
 316 skip_block_comment (pfile)
 317      cpp_reader *pfile;
 318 {
 319   cpp_buffer *buffer = pfile->buffer;
 320   cppchar_t c = EOF, prevc = EOF;
 321
 322   pfile->state.lexing_comment = 1;
 323   while (buffer->cur != buffer->rlimit)
 324     {
 325       prevc = c, c = *buffer->cur++;
 326
 327     next_char:
 328       /* FIXME: For speed, create a new character class of characters
 329          of interest inside block comments.  */
 330       if (c == '?' || c == '\\')
 331         c = skip_escaped_newlines (pfile, c);
 332
 333       /* People like decorating comments with '*', so check for '/'
 334          instead for efficiency.  */
 335       if (c == '/')
 336         {
 337           if (prevc == '*')
 338             break;
 339
 340           /* Warn about potential nested comments, but not if the '/'
 341              comes immediately before the true comment delimeter.
 342              Don't bother to get it right across escaped newlines.  */
 343           if (CPP_OPTION (pfile, warn_comments)
 344               && buffer->cur != buffer->rlimit)
 345             {
 346               prevc = c, c = *buffer->cur++;
 347               if (c == '*' && buffer->cur != buffer->rlimit)
 348                 {
 349                   prevc = c, c = *buffer->cur++;
 350                   if (c != '/')
 351                     cpp_warning_with_line (pfile, pfile->line,
 352                                            CPP_BUF_COL (buffer) - 2,
 353                                            "\"/*\" within comment");
 354                 }
 355               goto next_char;
 356             }
 357         }
 358       else if (is_vspace (c))
 359         {
 360           prevc = c, c = handle_newline (pfile, c);
 361           goto next_char;
 362         }
 363       else if (c == '\t')
 364         adjust_column (pfile);
 365     }
 366
 367   pfile->state.lexing_comment = 0;
 368   buffer->read_ahead = EOF;
 369   return c != '/' || prevc != '*';
 370 }
 371
 372 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 373    non-zero if a multiline comment.  The following new line, if any,
 374    is left in buffer->read_ahead.  */
 375 static int
 376 skip_line_comment (pfile)
 377      cpp_reader *pfile;
 378 {
 379   cpp_buffer *buffer = pfile->buffer;
 380   unsigned int orig_line = pfile->line;
 381   cppchar_t c;
 382
 383   pfile->state.lexing_comment = 1;
 384   do
 385     {
 386       c = EOF;
 387       if (buffer->cur == buffer->rlimit)
 388         break;
 389
 390       c = *buffer->cur++;
 391       if (c == '?' || c == '\\')
 392         c = skip_escaped_newlines (pfile, c);
 393     }
 394   while (!is_vspace (c));
 395
 396   pfile->state.lexing_comment = 0;
 397   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 398   return orig_line != pfile->line;
 399 }
 400
 401 /* pfile->buffer->cur is one beyond the \t character.  Update
 402    col_adjust so we track the column correctly.  */
 403 static void
 404 adjust_column (pfile)
 405      cpp_reader *pfile;
 406 {
 407   cpp_buffer *buffer = pfile->buffer;
 408   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 409
 410   /* Round it up to multiple of the tabstop, but subtract 1 since the
 411      tab itself occupies a character position.  */
 412   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 413                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 414 }
 415
 416 /* Skips whitespace, saving the next non-whitespace character.
 417    Adjusts pfile->col_adjust to account for tabs.  Without this,
 418    tokens might be assigned an incorrect column.  */
 419 static void
 420 skip_whitespace (pfile, c)
 421      cpp_reader *pfile;
 422      cppchar_t c;
 423 {
 424   cpp_buffer *buffer = pfile->buffer;
 425   unsigned int warned = 0;
 426
 427   do
 428     {
 429       /* Horizontal space always OK.  */
 430       if (c == ' ')
 431         ;
 432       else if (c == '\t')
 433         adjust_column (pfile);
 434       /* Just \f \v or \0 left.  */
 435       else if (c == '\0')
 436         {
 437           if (!warned)
 438             {
 439               cpp_warning (pfile, "null character(s) ignored");
 440               warned = 1;
 441             }
 442         }
 443       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 444         cpp_pedwarn_with_line (pfile, pfile->line,
 445                                CPP_BUF_COL (buffer),
 446                                "%s in preprocessing directive",
 447                                c == '\f' ? "form feed" : "vertical tab");
 448
 449       c = EOF;
 450       if (buffer->cur == buffer->rlimit)
 451         break;
 452       c = *buffer->cur++;
 453     }
 454   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 455   while (is_nvspace (c));
 456
 457   /* Remember the next character.  */
 458   buffer->read_ahead = c;
 459 }
 460
 461 /* See if the characters of a number token are valid in a name (no
 462    '.', '+' or '-').  */
 463 static int
 464 name_p (pfile, string)
 465      cpp_reader *pfile;
 466      const cpp_string *string;
 467 {
 468   unsigned int i;
 469
 470   for (i = 0; i < string->len; i++)
 471     if (!is_idchar (string->text[i]))
 472       return 0;
 473
 474   return 1;
 475 }
 476
 477 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 478    a critical inner loop.  The common case is an identifier which has
 479    not been split by backslash-newline, does not contain a dollar
 480    sign, and has already been scanned (roughly 10:1 ratio of
 481    seen:unseen identifiers in normal code; the distribution is
 482    Poisson-like).  Second most common case is a new identifier, not
 483    split and no dollar sign.  The other possibilities are rare and
 484    have been relegated to parse_identifier_slow.  */
 485
 486 static cpp_hashnode *
 487 parse_identifier (pfile)
 488      cpp_reader *pfile;
 489 {
 490   cpp_hashnode *result;
 491   const U_CHAR *cur, *rlimit;
 492
 493   /* Fast-path loop.  Skim over a normal identifier.
 494      N.B. ISIDNUM does not include $.  */
 495   cur    = pfile->buffer->cur - 1;
 496   rlimit = pfile->buffer->rlimit;
 497   do
 498     cur++;
 499   while (cur < rlimit && ISIDNUM (*cur));
 500
 501   /* Check for slow-path cases.  */
 502   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 503     result = parse_identifier_slow (pfile, cur);
 504   else
 505     {
 506       const U_CHAR *base = pfile->buffer->cur - 1;
 507       result = (cpp_hashnode *)
 508         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 509       pfile->buffer->cur = cur;
 510     }
 511
 512   /* Rarely, identifiers require diagnostics when lexed.
 513      XXX Has to be forced out of the fast path.  */
 514   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 515                         && !pfile->state.skipping, 0))
 516     {
 517       /* It is allowed to poison the same identifier twice.  */
 518       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 519         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 520                    NODE_NAME (result));
 521
 522       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 523          replacement list of a variadic macro.  */
 524       if (result == pfile->spec_nodes.n__VA_ARGS__
 525           && !pfile->state.va_args_ok)
 526         cpp_pedwarn (pfile,
 527         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 528     }
 529
 530   return result;
 531 }
 532
 533 /* Slow path.  This handles identifiers which have been split, and
 534    identifiers which contain dollar signs.  The part of the identifier
 535    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 536 static cpp_hashnode *
 537 parse_identifier_slow (pfile, cur)
 538      cpp_reader *pfile;
 539      const U_CHAR *cur;
 540 {
 541   cpp_buffer *buffer = pfile->buffer;
 542   const U_CHAR *base = buffer->cur - 1;
 543   struct obstack *stack = &pfile->hash_table->stack;
 544   unsigned int c, saw_dollar = 0, len;
 545
 546   /* Copy the part of the token which is known to be okay.  */
 547   obstack_grow (stack, base, cur - base);
 548
 549   /* Now process the part which isn't.  We are looking at one of
 550      '$', '\\', or '?' on entry to this loop.  */
 551   c = *cur++;
 552   buffer->cur = cur;
 553   do
 554     {
 555       while (is_idchar (c))
 556         {
 557           obstack_1grow (stack, c);
 558
 559           if (c == '$')
 560             saw_dollar++;
 561
 562           c = EOF;
 563           if (buffer->cur == buffer->rlimit)
 564             break;
 565
 566           c = *buffer->cur++;
 567         }
 568
 569       /* Potential escaped newline?  */
 570       if (c != '?' && c != '\\')
 571         break;
 572       c = skip_escaped_newlines (pfile, c);
 573     }
 574   while (is_idchar (c));
 575
 576   /* Remember the next character.  */
 577   buffer->read_ahead = c;
 578
 579   /* $ is not a identifier character in the standard, but is commonly
 580      accepted as an extension.  Don't warn about it in skipped
 581      conditional blocks.  */
 582   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 583     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 584
 585   /* Identifiers are null-terminated.  */
 586   len = obstack_object_size (stack);
 587   obstack_1grow (stack, '\0');
 588
 589   return (cpp_hashnode *)
 590     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 591 }
 592
 593 /* Parse a number, skipping embedded backslash-newlines.  */
 594 static void
 595 parse_number (pfile, number, c, leading_period)
 596      cpp_reader *pfile;
 597      cpp_string *number;
 598      cppchar_t c;
 599      int leading_period;
 600 {
 601   cpp_buffer *buffer = pfile->buffer;
 602   unsigned char *dest, *limit;
 603
 604   dest = BUFF_FRONT (pfile->u_buff);
 605   limit = BUFF_LIMIT (pfile->u_buff);
 606
 607   /* Place a leading period.  */
 608   if (leading_period)
 609     {
 610       if (dest == limit)
 611         {
 612           pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 1);
 613           dest = BUFF_FRONT (pfile->u_buff);
 614           limit = BUFF_LIMIT (pfile->u_buff);
 615         }
 616       *dest++ = '.';
 617     }
 618
 619   do
 620     {
 621       do
 622         {
 623           /* Need room for terminating null.  */
 624           if ((size_t) (limit - dest) < 2)
 625             {
 626               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 627               pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
 628               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 629               limit = BUFF_LIMIT (pfile->u_buff);
 630             }
 631           *dest++ = c;
 632
 633           c = EOF;
 634           if (buffer->cur == buffer->rlimit)
 635             break;
 636
 637           c = *buffer->cur++;
 638         }
 639       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 640
 641       /* Potential escaped newline?  */
 642       if (c != '?' && c != '\\')
 643         break;
 644       c = skip_escaped_newlines (pfile, c);
 645     }
 646   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 647
 648   /* Remember the next character.  */
 649   buffer->read_ahead = c;
 650
 651   /* Null-terminate the number.  */
 652   *dest = '\0';
 653
 654   number->text = BUFF_FRONT (pfile->u_buff);
 655   number->len = dest - number->text;
 656   BUFF_FRONT (pfile->u_buff) = dest + 1;
 657 }
 658
 659 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 660 static void
 661 unterminated (pfile, term)
 662      cpp_reader *pfile;
 663      int term;
 664 {
 665   cpp_error (pfile, "missing terminating %c character", term);
 666
 667   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 668     {
 669       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 670                            "possible start of unterminated string literal");
 671       pfile->mls_line = 0;
 672     }
 673 }
 674
 675 /* Subroutine of parse_string.  */
 676 static int
 677 unescaped_terminator_p (pfile, dest)
 678      cpp_reader *pfile;
 679      const unsigned char *dest;
 680 {
 681   const unsigned char *start, *temp;
 682
 683   /* In #include-style directives, terminators are not escapeable.  */
 684   if (pfile->state.angled_headers)
 685     return 1;
 686
 687   start = BUFF_FRONT (pfile->u_buff);
 688
 689   /* An odd number of consecutive backslashes represents an escaped
 690      terminator.  */
 691   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 692     ;
 693
 694   return ((dest - temp) & 1) == 0;
 695 }
 696
 697 /* Parses a string, character constant, or angle-bracketed header file
 698    name.  Handles embedded trigraphs and escaped newlines.  The stored
 699    string is guaranteed NUL-terminated, but it is not guaranteed that
 700    this is the first NUL since embedded NULs are preserved.
 701
 702    Multi-line strings are allowed, but they are deprecated.  */
 703 static void
 704 parse_string (pfile, token, terminator)
 705      cpp_reader *pfile;
 706      cpp_token *token;
 707      cppchar_t terminator;
 708 {
 709   cpp_buffer *buffer = pfile->buffer;
 710   unsigned char *dest, *limit;
 711   cppchar_t c;
 712   bool warned_nulls = false, warned_multi = false;
 713
 714   dest = BUFF_FRONT (pfile->u_buff);
 715   limit = BUFF_LIMIT (pfile->u_buff);
 716
 717   for (;;)
 718     {
 719       if (buffer->cur == buffer->rlimit)
 720         c = EOF;
 721       else
 722         c = *buffer->cur++;
 723
 724     have_char:
 725       /* We need space for the terminating NUL.  */
 726       if ((size_t) (limit - dest) < 1)
 727         {
 728           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 729           pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
 730           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 731           limit = BUFF_LIMIT (pfile->u_buff);
 732         }
 733
 734       if (c == EOF)
 735         {
 736           unterminated (pfile, terminator);
 737           break;
 738         }
 739
 740       /* Handle trigraphs, escaped newlines etc.  */
 741       if (c == '?' || c == '\\')
 742         c = skip_escaped_newlines (pfile, c);
 743
 744       if (c == terminator && unescaped_terminator_p (pfile, dest))
 745         {
 746           c = EOF;
 747           break;
 748         }
 749       else if (is_vspace (c))
 750         {
 751           /* In assembly language, silently terminate string and
 752              character literals at end of line.  This is a kludge
 753              around not knowing where comments are.  */
 754           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 755             break;
 756
 757           /* Character constants and header names may not extend over
 758              multiple lines.  In Standard C, neither may strings.
 759              Unfortunately, we accept multiline strings as an
 760              extension, except in #include family directives.  */
 761           if (terminator != '"' || pfile->state.angled_headers)
 762             {
 763               unterminated (pfile, terminator);
 764               break;
 765             }
 766
 767           if (!warned_multi)
 768             {
 769               warned_multi = true;
 770               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 771             }
 772
 773           if (pfile->mls_line == 0)
 774             {
 775               pfile->mls_line = token->line;
 776               pfile->mls_col = token->col;
 777             }
 778
 779           c = handle_newline (pfile, c);
 780           *dest++ = '\n';
 781           goto have_char;
 782         }
 783       else if (c == '\0' && !warned_nulls)
 784         {
 785           warned_nulls = true;
 786           cpp_warning (pfile, "null character(s) preserved in literal");
 787         }
 788
 789       *dest++ = c;
 790     }
 791
 792   /* Remember the next character.  */
 793   buffer->read_ahead = c;
 794   *dest = '\0';
 795
 796   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 797   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 798   BUFF_FRONT (pfile->u_buff) = dest + 1;
 799 }
 800
 801 /* The stored comment includes the comment start and any terminator.  */
 802 static void
 803 save_comment (pfile, token, from)
 804      cpp_reader *pfile;
 805      cpp_token *token;
 806      const unsigned char *from;
 807 {
 808   unsigned char *buffer;
 809   unsigned int len;
 810
 811   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 812   /* C++ comments probably (not definitely) have moved past a new
 813      line, which we don't want to save in the comment.  */
 814   if (pfile->buffer->read_ahead != EOF)
 815     len--;
 816   buffer = _cpp_unaligned_alloc (pfile, len);
 817
 818   token->type = CPP_COMMENT;
 819   token->val.str.len = len;
 820   token->val.str.text = buffer;
 821
 822   buffer[0] = '/';
 823   memcpy (buffer + 1, from, len - 1);
 824 }
 825
 826 /* Subroutine of _cpp_lex_direct to handle '%'.  A little tricky, since we
 827    want to avoid stepping back when lexing %:%X.  */
 828 static void
 829 lex_percent (pfile, result)
 830      cpp_reader *pfile;
 831      cpp_token *result;
 832 {
 833   cpp_buffer *buffer= pfile->buffer;
 834   cppchar_t c;
 835
 836   result->type = CPP_MOD;
 837   /* Parsing %:%X could leave an extra character.  */
 838   if (buffer->extra_char == EOF)
 839     c = get_effective_char (pfile);
 840   else
 841     {
 842       c = buffer->read_ahead = buffer->extra_char;
 843       buffer->extra_char = EOF;
 844     }
 845
 846   if (c == '=')
 847     ACCEPT_CHAR (CPP_MOD_EQ);
 848   else if (CPP_OPTION (pfile, digraphs))
 849     {
 850       if (c == ':')
 851         {
 852           result->flags |= DIGRAPH;
 853           ACCEPT_CHAR (CPP_HASH);
 854           if (get_effective_char (pfile) == '%')
 855             {
 856               buffer->extra_char = get_effective_char (pfile);
 857               if (buffer->extra_char == ':')
 858                 {
 859                   buffer->extra_char = EOF;
 860                   ACCEPT_CHAR (CPP_PASTE);
 861                 }
 862               else
 863                 /* We'll catch the extra_char when we're called back.  */
 864                 buffer->read_ahead = '%';
 865             }
 866         }
 867       else if (c == '>')
 868         {
 869           result->flags |= DIGRAPH;
 870           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 871         }
 872     }
 873 }
 874
 875 /* Subroutine of _cpp_lex_direct to handle '.'.  This is tricky, since we
 876    want to avoid stepping back when lexing '...' or '.123'.  In the
 877    latter case we should also set a flag for parse_number.  */
 878 static void
 879 lex_dot (pfile, result)
 880      cpp_reader *pfile;
 881      cpp_token *result;
 882 {
 883   cpp_buffer *buffer = pfile->buffer;
 884   cppchar_t c;
 885
 886   /* Parsing ..X could leave an extra character.  */
 887   if (buffer->extra_char == EOF)
 888     c = get_effective_char (pfile);
 889   else
 890     {
 891       c = buffer->read_ahead = buffer->extra_char;
 892       buffer->extra_char = EOF;
 893     }
 894
 895   /* All known character sets have 0...9 contiguous.  */
 896   if (c >= '0' && c <= '9')
 897     {
 898       result->type = CPP_NUMBER;
 899       parse_number (pfile, &result->val.str, c, 1);
 900     }
 901   else
 902     {
 903       result->type = CPP_DOT;
 904       if (c == '.')
 905         {
 906           buffer->extra_char = get_effective_char (pfile);
 907           if (buffer->extra_char == '.')
 908             {
 909               buffer->extra_char = EOF;
 910               ACCEPT_CHAR (CPP_ELLIPSIS);
 911             }
 912           else
 913             /* We'll catch the extra_char when we're called back.  */
 914             buffer->read_ahead = '.';
 915         }
 916       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 917         ACCEPT_CHAR (CPP_DOT_STAR);
 918     }
 919 }
 920
 921 /* Allocate COUNT tokens for RUN.  */
 922 void
 923 _cpp_init_tokenrun (run, count)
 924      tokenrun *run;
 925      unsigned int count;
 926 {
 927   run->base = xnewvec (cpp_token, count);
 928   run->limit = run->base + count;
 929   run->next = NULL;
 930 }
 931
 932 /* Returns the next tokenrun, or creates one if there is none.  */
 933 static tokenrun *
 934 next_tokenrun (run)
 935      tokenrun *run;
 936 {
 937   if (run->next == NULL)
 938     {
 939       run->next = xnew (tokenrun);
 940       run->next->prev = run;
 941       _cpp_init_tokenrun (run->next, 250);
 942     }
 943
 944   return run->next;
 945 }
 946
 947 /* Allocate a single token that is invalidated at the same time as the
 948    rest of the tokens on the line.  Has its line and col set to the
 949    same as the last lexed token, so that diagnostics appear in the
 950    right place.  */
 951 cpp_token *
 952 _cpp_temp_token (pfile)
 953      cpp_reader *pfile;
 954 {
 955   cpp_token *old, *result;
 956
 957   old = pfile->cur_token - 1;
 958   if (pfile->cur_token == pfile->cur_run->limit)
 959     {
 960       pfile->cur_run = next_tokenrun (pfile->cur_run);
 961       pfile->cur_token = pfile->cur_run->base;
 962     }
 963
 964   result = pfile->cur_token++;
 965   result->line = old->line;
 966   result->col = old->col;
 967   return result;
 968 }
 969
 970 /* Lex a token into RESULT (external interface).  Takes care of issues
 971    like directive handling, token lookahead, multiple include
 972    opimisation and skipping.  */
 973 const cpp_token *
 974 _cpp_lex_token (pfile)
 975      cpp_reader *pfile;
 976 {
 977   cpp_token *result;
 978
 979   for (;;)
 980     {
 981       if (pfile->cur_token == pfile->cur_run->limit)
 982         {
 983           pfile->cur_run = next_tokenrun (pfile->cur_run);
 984           pfile->cur_token = pfile->cur_run->base;
 985         }
 986
 987       if (pfile->lookaheads)
 988         {
 989           pfile->lookaheads--;
 990           result = pfile->cur_token++;
 991         }
 992       else
 993         result = _cpp_lex_direct (pfile);
 994
 995       if (result->flags & BOL)
 996         {
 997           /* Is this a directive.  If _cpp_handle_directive returns
 998              false, it is an assembler #.  */
 999           if (result->type == CPP_HASH
1000               && !pfile->state.parsing_args
1001               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1002             continue;
1003           if (pfile->cb.line_change && !pfile->state.skipping)
1004             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1005         }
1006
1007       /* We don't skip tokens in directives.  */
1008       if (pfile->state.in_directive)
1009         break;
1010
1011       /* Outside a directive, invalidate controlling macros.  At file
1012          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1013          get here and MI optimisation works.  */
1014       pfile->mi_valid = false;
1015
1016       if (!pfile->state.skipping || result->type == CPP_EOF)
1017         break;
1018     }
1019
1020   return result;
1021 }
1022
1023 /* Lex a token into pfile->cur_token, which is also incremented, to
1024    get diagnostics pointing to the correct location.
1025
1026    Does not handle issues such as token lookahead, multiple-include
1027    optimisation, directives, skipping etc.  This function is only
1028    suitable for use by _cpp_lex_token, and in special cases like
1029    lex_expansion_token which doesn't care for any of these issues.
1030
1031    When meeting a newline, returns CPP_EOF if parsing a directive,
1032    otherwise returns to the start of the token buffer if permissible.
1033    Returns the location of the lexed token.  */
1034 cpp_token *
1035 _cpp_lex_direct (pfile)
1036      cpp_reader *pfile;
1037 {
1038   cppchar_t c;
1039   cpp_buffer *buffer;
1040   const unsigned char *comment_start;
1041   cpp_token *result = pfile->cur_token++;
1042
1043  fresh_line:
1044   buffer = pfile->buffer;
1045   result->flags = buffer->saved_flags;
1046   buffer->saved_flags = 0;
1047  update_tokens_line:
1048   result->line = pfile->line;
1049
1050  skipped_white:
1051   c = buffer->read_ahead;
1052   if (c == EOF && buffer->cur < buffer->rlimit)
1053     c = *buffer->cur++;
1054   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1055   buffer->read_ahead = EOF;
1056
1057  trigraph:
1058   switch (c)
1059     {
1060     case EOF:
1061       buffer->saved_flags = BOL;
1062       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1063         {
1064           if (buffer->cur != buffer->line_base)
1065             {
1066               /* Non-empty files should end in a newline.  Don't warn
1067                  for command line and _Pragma buffers.  */
1068               if (!buffer->from_stage3)
1069                 cpp_pedwarn (pfile, "no newline at end of file");
1070               handle_newline (pfile, '\n');
1071             }
1072
1073           /* Don't pop the last buffer.  */
1074           if (buffer->prev)
1075             {
1076               unsigned char stop = buffer->return_at_eof;
1077
1078               _cpp_pop_buffer (pfile);
1079               if (!stop)
1080                 goto fresh_line;
1081             }
1082         }
1083       result->type = CPP_EOF;
1084       break;
1085
1086     case ' ': case '\t': case '\f': case '\v': case '\0':
1087       skip_whitespace (pfile, c);
1088       result->flags |= PREV_WHITE;
1089       goto skipped_white;
1090
1091     case '\n': case '\r':
1092       handle_newline (pfile, c);
1093       buffer->saved_flags = BOL;
1094       if (! pfile->state.in_directive)
1095         {
1096           if (pfile->state.parsing_args == 2)
1097             buffer->saved_flags |= PREV_WHITE;
1098           if (!pfile->keep_tokens)
1099             {
1100               pfile->cur_run = &pfile->base_run;
1101               result = pfile->base_run.base;
1102               pfile->cur_token = result + 1;
1103             }
1104           goto fresh_line;
1105         }
1106       result->type = CPP_EOF;
1107       break;
1108
1109     case '?':
1110     case '\\':
1111       /* These could start an escaped newline, or '?' a trigraph.  Let
1112          skip_escaped_newlines do all the work.  */
1113       {
1114         unsigned int line = pfile->line;
1115
1116         c = skip_escaped_newlines (pfile, c);
1117         if (line != pfile->line)
1118           /* We had at least one escaped newline of some sort, and the
1119              next character is in buffer->read_ahead.  Update the
1120              token's line and column.  */
1121             goto update_tokens_line;
1122
1123         /* We are either the original '?' or '\\', or a trigraph.  */
1124         result->type = CPP_QUERY;
1125         buffer->read_ahead = EOF;
1126         if (c == '\\')
1127           goto random_char;
1128         else if (c != '?')
1129           goto trigraph;
1130       }
1131       break;
1132
1133     case '0': case '1': case '2': case '3': case '4':
1134     case '5': case '6': case '7': case '8': case '9':
1135       result->type = CPP_NUMBER;
1136       parse_number (pfile, &result->val.str, c, 0);
1137       break;
1138
1139     case '$':
1140       if (!CPP_OPTION (pfile, dollars_in_ident))
1141         goto random_char;
1142       /* Fall through...  */
1143
1144     case '_':
1145     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1146     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1147     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1148     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1149     case 'y': case 'z':
1150     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1151     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1152     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1153     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1154     case 'Y': case 'Z':
1155       result->type = CPP_NAME;
1156       result->val.node = parse_identifier (pfile);
1157
1158       /* 'L' may introduce wide characters or strings.  */
1159       if (result->val.node == pfile->spec_nodes.n_L)
1160         {
1161           c = buffer->read_ahead;
1162           if (c == EOF && buffer->cur < buffer->rlimit)
1163             c = *buffer->cur;
1164           if (c == '\'' || c == '"')
1165             {
1166               buffer->cur++;
1167               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1168               goto make_string;
1169             }
1170         }
1171       /* Convert named operators to their proper types.  */
1172       else if (result->val.node->flags & NODE_OPERATOR)
1173         {
1174           result->flags |= NAMED_OP;
1175           result->type = result->val.node->value.operator;
1176         }
1177       break;
1178
1179     case '\'':
1180     case '"':
1181       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1182     make_string:
1183       parse_string (pfile, result, c);
1184       break;
1185
1186     case '/':
1187       /* A potential block or line comment.  */
1188       comment_start = buffer->cur;
1189       result->type = CPP_DIV;
1190       c = get_effective_char (pfile);
1191       if (c == '=')
1192         ACCEPT_CHAR (CPP_DIV_EQ);
1193       if (c != '/' && c != '*')
1194         break;
1195
1196       if (c == '*')
1197         {
1198           if (skip_block_comment (pfile))
1199             cpp_error (pfile, "unterminated comment");
1200         }
1201       else
1202         {
1203           if (!CPP_OPTION (pfile, cplusplus_comments)
1204               && !CPP_IN_SYSTEM_HEADER (pfile))
1205             break;
1206
1207           /* Warn about comments only if pedantically GNUC89, and not
1208              in system headers.  */
1209           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1210               && ! buffer->warned_cplusplus_comments)
1211             {
1212               cpp_pedwarn (pfile,
1213                            "C++ style comments are not allowed in ISO C89");
1214               cpp_pedwarn (pfile,
1215                            "(this will be reported only once per input file)");
1216               buffer->warned_cplusplus_comments = 1;
1217             }
1218
1219           /* Skip_line_comment updates buffer->read_ahead.  */
1220           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1221             cpp_warning (pfile, "multi-line comment");
1222         }
1223
1224       /* Skipping the comment has updated buffer->read_ahead.  */
1225       if (!pfile->state.save_comments)
1226         {
1227           result->flags |= PREV_WHITE;
1228           goto update_tokens_line;
1229         }
1230
1231       /* Save the comment as a token in its own right.  */
1232       save_comment (pfile, result, comment_start);
1233       break;
1234
1235     case '<':
1236       if (pfile->state.angled_headers)
1237         {
1238           result->type = CPP_HEADER_NAME;
1239           c = '>';              /* terminator.  */
1240           goto make_string;
1241         }
1242
1243       result->type = CPP_LESS;
1244       c = get_effective_char (pfile);
1245       if (c == '=')
1246         ACCEPT_CHAR (CPP_LESS_EQ);
1247       else if (c == '<')
1248         {
1249           ACCEPT_CHAR (CPP_LSHIFT);
1250           if (get_effective_char (pfile) == '=')
1251             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1252         }
1253       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1254         {
1255           ACCEPT_CHAR (CPP_MIN);
1256           if (get_effective_char (pfile) == '=')
1257             ACCEPT_CHAR (CPP_MIN_EQ);
1258         }
1259       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1260         {
1261           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1262           result->flags |= DIGRAPH;
1263         }
1264       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1265         {
1266           ACCEPT_CHAR (CPP_OPEN_BRACE);
1267           result->flags |= DIGRAPH;
1268         }
1269       break;
1270
1271     case '>':
1272       result->type = CPP_GREATER;
1273       c = get_effective_char (pfile);
1274       if (c == '=')
1275         ACCEPT_CHAR (CPP_GREATER_EQ);
1276       else if (c == '>')
1277         {
1278           ACCEPT_CHAR (CPP_RSHIFT);
1279           if (get_effective_char (pfile) == '=')
1280             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1281         }
1282       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1283         {
1284           ACCEPT_CHAR (CPP_MAX);
1285           if (get_effective_char (pfile) == '=')
1286             ACCEPT_CHAR (CPP_MAX_EQ);
1287         }
1288       break;
1289
1290     case '%':
1291       lex_percent (pfile, result);
1292       break;
1293
1294     case '.':
1295       lex_dot (pfile, result);
1296       break;
1297
1298     case '+':
1299       result->type = CPP_PLUS;
1300       c = get_effective_char (pfile);
1301       if (c == '=')
1302         ACCEPT_CHAR (CPP_PLUS_EQ);
1303       else if (c == '+')
1304         ACCEPT_CHAR (CPP_PLUS_PLUS);
1305       break;
1306
1307     case '-':
1308       result->type = CPP_MINUS;
1309       c = get_effective_char (pfile);
1310       if (c == '>')
1311         {
1312           ACCEPT_CHAR (CPP_DEREF);
1313           if (CPP_OPTION (pfile, cplusplus)
1314               && get_effective_char (pfile) == '*')
1315             ACCEPT_CHAR (CPP_DEREF_STAR);
1316         }
1317       else if (c == '=')
1318         ACCEPT_CHAR (CPP_MINUS_EQ);
1319       else if (c == '-')
1320         ACCEPT_CHAR (CPP_MINUS_MINUS);
1321       break;
1322
1323     case '*':
1324       result->type = CPP_MULT;
1325       if (get_effective_char (pfile) == '=')
1326         ACCEPT_CHAR (CPP_MULT_EQ);
1327       break;
1328
1329     case '=':
1330       result->type = CPP_EQ;
1331       if (get_effective_char (pfile) == '=')
1332         ACCEPT_CHAR (CPP_EQ_EQ);
1333       break;
1334
1335     case '!':
1336       result->type = CPP_NOT;
1337       if (get_effective_char (pfile) == '=')
1338         ACCEPT_CHAR (CPP_NOT_EQ);
1339       break;
1340
1341     case '&':
1342       result->type = CPP_AND;
1343       c = get_effective_char (pfile);
1344       if (c == '=')
1345         ACCEPT_CHAR (CPP_AND_EQ);
1346       else if (c == '&')
1347         ACCEPT_CHAR (CPP_AND_AND);
1348       break;
1349
1350     case '#':
1351       result->type = CPP_HASH;
1352       if (get_effective_char (pfile) == '#')
1353           ACCEPT_CHAR (CPP_PASTE);
1354       break;
1355
1356     case '|':
1357       result->type = CPP_OR;
1358       c = get_effective_char (pfile);
1359       if (c == '=')
1360         ACCEPT_CHAR (CPP_OR_EQ);
1361       else if (c == '|')
1362         ACCEPT_CHAR (CPP_OR_OR);
1363       break;
1364
1365     case '^':
1366       result->type = CPP_XOR;
1367       if (get_effective_char (pfile) == '=')
1368         ACCEPT_CHAR (CPP_XOR_EQ);
1369       break;
1370
1371     case ':':
1372       result->type = CPP_COLON;
1373       c = get_effective_char (pfile);
1374       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1375         ACCEPT_CHAR (CPP_SCOPE);
1376       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1377         {
1378           result->flags |= DIGRAPH;
1379           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1380         }
1381       break;
1382
1383     case '~': result->type = CPP_COMPL; break;
1384     case ',': result->type = CPP_COMMA; break;
1385     case '(': result->type = CPP_OPEN_PAREN; break;
1386     case ')': result->type = CPP_CLOSE_PAREN; break;
1387     case '[': result->type = CPP_OPEN_SQUARE; break;
1388     case ']': result->type = CPP_CLOSE_SQUARE; break;
1389     case '{': result->type = CPP_OPEN_BRACE; break;
1390     case '}': result->type = CPP_CLOSE_BRACE; break;
1391     case ';': result->type = CPP_SEMICOLON; break;
1392
1393       /* @ is a punctuator in Objective C.  */
1394     case '@': result->type = CPP_ATSIGN; break;
1395
1396     random_char:
1397     default:
1398       result->type = CPP_OTHER;
1399       result->val.c = c;
1400       break;
1401     }
1402
1403   return result;
1404 }
1405
1406 /* An upper bound on the number of bytes needed to spell a token,
1407    including preceding whitespace.  */
1408 unsigned int
1409 cpp_token_len (token)
1410      const cpp_token *token;
1411 {
1412   unsigned int len;
1413
1414   switch (TOKEN_SPELL (token))
1415     {
1416     default:            len = 0;                                break;
1417     case SPELL_STRING:  len = token->val.str.len;               break;
1418     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1419     }
1420   /* 1 for whitespace, 4 for comment delimeters.  */
1421   return len + 5;
1422 }
1423
1424 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1425    already contain the enough space to hold the token's spelling.
1426    Returns a pointer to the character after the last character
1427    written.  */
1428 unsigned char *
1429 cpp_spell_token (pfile, token, buffer)
1430      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1431      const cpp_token *token;
1432      unsigned char *buffer;
1433 {
1434   switch (TOKEN_SPELL (token))
1435     {
1436     case SPELL_OPERATOR:
1437       {
1438         const unsigned char *spelling;
1439         unsigned char c;
1440
1441         if (token->flags & DIGRAPH)
1442           spelling
1443             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1444         else if (token->flags & NAMED_OP)
1445           goto spell_ident;
1446         else
1447           spelling = TOKEN_NAME (token);
1448
1449         while ((c = *spelling++) != '\0')
1450           *buffer++ = c;
1451       }
1452       break;
1453
1454     case SPELL_IDENT:
1455       spell_ident:
1456       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1457       buffer += NODE_LEN (token->val.node);
1458       break;
1459
1460     case SPELL_STRING:
1461       {
1462         int left, right, tag;
1463         switch (token->type)
1464           {
1465           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1466           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1467           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1468           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1469           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1470           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1471           }
1472         if (tag) *buffer++ = tag;
1473         if (left) *buffer++ = left;
1474         memcpy (buffer, token->val.str.text, token->val.str.len);
1475         buffer += token->val.str.len;
1476         if (right) *buffer++ = right;
1477       }
1478       break;
1479
1480     case SPELL_CHAR:
1481       *buffer++ = token->val.c;
1482       break;
1483
1484     case SPELL_NONE:
1485       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1486       break;
1487     }
1488
1489   return buffer;
1490 }
1491
1492 /* Returns a token as a null-terminated string.  The string is
1493    temporary, and automatically freed later.  Useful for diagnostics.  */
1494 unsigned char *
1495 cpp_token_as_text (pfile, token)
1496      cpp_reader *pfile;
1497      const cpp_token *token;
1498 {
1499   unsigned int len = cpp_token_len (token);
1500   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1501
1502   end = cpp_spell_token (pfile, token, start);
1503   end[0] = '\0';
1504
1505   return start;
1506 }
1507
1508 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1509 const char *
1510 cpp_type2name (type)
1511      enum cpp_ttype type;
1512 {
1513   return (const char *) token_spellings[type].name;
1514 }
1515
1516 /* Writes the spelling of token to FP, without any preceding space.
1517    Separated from cpp_spell_token for efficiency - to avoid stdio
1518    double-buffering.  */
1519 void
1520 cpp_output_token (token, fp)
1521      const cpp_token *token;
1522      FILE *fp;
1523 {
1524   switch (TOKEN_SPELL (token))
1525     {
1526     case SPELL_OPERATOR:
1527       {
1528         const unsigned char *spelling;
1529         int c;
1530
1531         if (token->flags & DIGRAPH)
1532           spelling
1533             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1534         else if (token->flags & NAMED_OP)
1535           goto spell_ident;
1536         else
1537           spelling = TOKEN_NAME (token);
1538
1539         c = *spelling;
1540         do
1541           putc (c, fp);
1542         while ((c = *++spelling) != '\0');
1543       }
1544       break;
1545
1546     spell_ident:
1547     case SPELL_IDENT:
1548       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1549     break;
1550
1551     case SPELL_STRING:
1552       {
1553         int left, right, tag;
1554         switch (token->type)
1555           {
1556           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1557           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1558           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1559           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1560           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1561           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1562           }
1563         if (tag) putc (tag, fp);
1564         if (left) putc (left, fp);
1565         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1566         if (right) putc (right, fp);
1567       }
1568       break;
1569
1570     case SPELL_CHAR:
1571       putc (token->val.c, fp);
1572       break;
1573
1574     case SPELL_NONE:
1575       /* An error, most probably.  */
1576       break;
1577     }
1578 }
1579
1580 /* Compare two tokens.  */
1581 int
1582 _cpp_equiv_tokens (a, b)
1583      const cpp_token *a, *b;
1584 {
1585   if (a->type == b->type && a->flags == b->flags)
1586     switch (TOKEN_SPELL (a))
1587       {
1588       default:                  /* Keep compiler happy.  */
1589       case SPELL_OPERATOR:
1590         return 1;
1591       case SPELL_CHAR:
1592         return a->val.c == b->val.c; /* Character.  */
1593       case SPELL_NONE:
1594         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1595       case SPELL_IDENT:
1596         return a->val.node == b->val.node;
1597       case SPELL_STRING:
1598         return (a->val.str.len == b->val.str.len
1599                 && !memcmp (a->val.str.text, b->val.str.text,
1600                             a->val.str.len));
1601       }
1602
1603   return 0;
1604 }
1605
1606 /* Returns nonzero if a space should be inserted to avoid an
1607    accidental token paste for output.  For simplicity, it is
1608    conservative, and occasionally advises a space where one is not
1609    needed, e.g. "." and ".2".  */
1610
1611 int
1612 cpp_avoid_paste (pfile, token1, token2)
1613      cpp_reader *pfile;
1614      const cpp_token *token1, *token2;
1615 {
1616   enum cpp_ttype a = token1->type, b = token2->type;
1617   cppchar_t c;
1618
1619   if (token1->flags & NAMED_OP)
1620     a = CPP_NAME;
1621   if (token2->flags & NAMED_OP)
1622     b = CPP_NAME;
1623
1624   c = EOF;
1625   if (token2->flags & DIGRAPH)
1626     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1627   else if (token_spellings[b].category == SPELL_OPERATOR)
1628     c = token_spellings[b].name[0];
1629
1630   /* Quickly get everything that can paste with an '='.  */
1631   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1632     return 1;
1633
1634   switch (a)
1635     {
1636     case CPP_GREATER:   return c == '>' || c == '?';
1637     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1638     case CPP_PLUS:      return c == '+';
1639     case CPP_MINUS:     return c == '-' || c == '>';
1640     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1641     case CPP_MOD:       return c == ':' || c == '>';
1642     case CPP_AND:       return c == '&';
1643     case CPP_OR:        return c == '|';
1644     case CPP_COLON:     return c == ':' || c == '>';
1645     case CPP_DEREF:     return c == '*';
1646     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1647     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1648     case CPP_NAME:      return ((b == CPP_NUMBER
1649                                  && name_p (pfile, &token2->val.str))
1650                                 || b == CPP_NAME
1651                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1652     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1653                                 || c == '.' || c == '+' || c == '-');
1654     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1655                                 && token1->val.c == '@'
1656                                 && (b == CPP_NAME || b == CPP_STRING));
1657     default:            break;
1658     }
1659
1660   return 0;
1661 }
1662
1663 /* Output all the remaining tokens on the current line, and a newline
1664    character, to FP.  Leading whitespace is removed.  If there are
1665    macros, special token padding is not performed.  */
1666 void
1667 cpp_output_line (pfile, fp)
1668      cpp_reader *pfile;
1669      FILE *fp;
1670 {
1671   const cpp_token *token;
1672
1673   token = cpp_get_token (pfile);
1674   while (token->type != CPP_EOF)
1675     {
1676       cpp_output_token (token, fp);
1677       token = cpp_get_token (pfile);
1678       if (token->flags & PREV_WHITE)
1679         putc (' ', fp);
1680     }
1681
1682   putc ('\n', fp);
1683 }
1684
1685 /* Returns the value of a hexadecimal digit.  */
1686 static unsigned int
1687 hex_digit_value (c)
1688      unsigned int c;
1689 {
1690   if (c >= 'a' && c <= 'f')
1691     return c - 'a' + 10;
1692   if (c >= 'A' && c <= 'F')
1693     return c - 'A' + 10;
1694   if (c >= '0' && c <= '9')
1695     return c - '0';
1696   abort ();
1697 }
1698
1699 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1700    failure if cpplib is not parsing C++ or C99.  Such failure is
1701    silent, and no variables are updated.  Otherwise returns 0, and
1702    warns if -Wtraditional.
1703
1704    [lex.charset]: The character designated by the universal character
1705    name \UNNNNNNNN is that character whose character short name in
1706    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707    universal character name \uNNNN is that character whose character
1708    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1709    for a universal character name is less than 0x20 or in the range
1710    0x7F-0x9F (inclusive), or if the universal character name
1711    designates a character in the basic source character set, then the
1712    program is ill-formed.
1713
1714    We assume that wchar_t is Unicode, so we don't need to do any
1715    mapping.  Is this ever wrong?
1716
1717    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718    LIMIT is the end of the string or charconst.  PSTR is updated to
1719    point after the UCS on return, and the UCS is written into PC.  */
1720
1721 static int
1722 maybe_read_ucs (pfile, pstr, limit, pc)
1723      cpp_reader *pfile;
1724      const unsigned char **pstr;
1725      const unsigned char *limit;
1726      unsigned int *pc;
1727 {
1728   const unsigned char *p = *pstr;
1729   unsigned int code = 0;
1730   unsigned int c = *pc, length;
1731
1732   /* Only attempt to interpret a UCS for C++ and C99.  */
1733   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1734     return 1;
1735
1736   if (CPP_WTRADITIONAL (pfile))
1737     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1738
1739   length = (c == 'u' ? 4: 8);
1740
1741   if ((size_t) (limit - p) < length)
1742     {
1743       cpp_error (pfile, "incomplete universal-character-name");
1744       /* Skip to the end to avoid more diagnostics.  */
1745       p = limit;
1746     }
1747   else
1748     {
1749       for (; length; length--, p++)
1750         {
1751           c = *p;
1752           if (ISXDIGIT (c))
1753             code = (code << 4) + hex_digit_value (c);
1754           else
1755             {
1756               cpp_error (pfile,
1757                          "non-hex digit '%c' in universal-character-name", c);
1758               /* We shouldn't skip in case there are multibyte chars.  */
1759               break;
1760             }
1761         }
1762     }
1763
1764 #ifdef TARGET_EBCDIC
1765   cpp_error (pfile, "universal-character-name on EBCDIC target");
1766   code = 0x3f;  /* EBCDIC invalid character */
1767 #else
1768  /* True extended characters are OK.  */
1769   if (code >= 0xa0
1770       && !(code & 0x80000000)
1771       && !(code >= 0xD800 && code <= 0xDFFF))
1772     ;
1773   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1774      hex escapes so that this also works with EBCDIC hosts.  */
1775   else if (code == 0x24 || code == 0x40 || code == 0x60)
1776     ;
1777   /* Don't give another error if one occurred above.  */
1778   else if (length == 0)
1779     cpp_error (pfile, "universal-character-name out of range");
1780 #endif
1781
1782   *pstr = p;
1783   *pc = code;
1784   return 0;
1785 }
1786
1787 /* Interpret an escape sequence, and return its value.  PSTR points to
1788    the input pointer, which is just after the backslash.  LIMIT is how
1789    much text we have.  MASK is a bitmask for the precision for the
1790    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1791    interpret escapes that did not exist in traditional C.
1792
1793    Handles all relevant diagnostics.  */
1794
1795 unsigned int
1796 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1797      cpp_reader *pfile;
1798      const unsigned char **pstr;
1799      const unsigned char *limit;
1800      unsigned HOST_WIDE_INT mask;
1801      int traditional;
1802 {
1803   int unknown = 0;
1804   const unsigned char *str = *pstr;
1805   unsigned int c = *str++;
1806
1807   switch (c)
1808     {
1809     case '\\': case '\'': case '"': case '?': break;
1810     case 'b': c = TARGET_BS;      break;
1811     case 'f': c = TARGET_FF;      break;
1812     case 'n': c = TARGET_NEWLINE; break;
1813     case 'r': c = TARGET_CR;      break;
1814     case 't': c = TARGET_TAB;     break;
1815     case 'v': c = TARGET_VT;      break;
1816
1817     case '(': case '{': case '[': case '%':
1818       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819          '\%' is used to prevent SCCS from getting confused.  */
1820       unknown = CPP_PEDANTIC (pfile);
1821       break;
1822
1823     case 'a':
1824       if (CPP_WTRADITIONAL (pfile))
1825         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1826       if (!traditional)
1827         c = TARGET_BELL;
1828       break;
1829
1830     case 'e': case 'E':
1831       if (CPP_PEDANTIC (pfile))
1832         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1833       c = TARGET_ESC;
1834       break;
1835
1836     case 'u': case 'U':
1837       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1838       break;
1839
1840     case 'x':
1841       if (CPP_WTRADITIONAL (pfile))
1842         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843
1844       if (!traditional)
1845         {
1846           unsigned int i = 0, overflow = 0;
1847           int digits_found = 0;
1848
1849           while (str < limit)
1850             {
1851               c = *str;
1852               if (! ISXDIGIT (c))
1853                 break;
1854               str++;
1855               overflow |= i ^ (i << 4 >> 4);
1856               i = (i << 4) + hex_digit_value (c);
1857               digits_found = 1;
1858             }
1859
1860           if (!digits_found)
1861             cpp_error (pfile, "\\x used with no following hex digits");
1862
1863           if (overflow | (i != (i & mask)))
1864             {
1865               cpp_pedwarn (pfile, "hex escape sequence out of range");
1866               i &= mask;
1867             }
1868           c = i;
1869         }
1870       break;
1871
1872     case '0':  case '1':  case '2':  case '3':
1873     case '4':  case '5':  case '6':  case '7':
1874       {
1875         unsigned int i = c - '0';
1876         int count = 0;
1877
1878         while (str < limit && ++count < 3)
1879           {
1880             c = *str;
1881             if (c < '0' || c > '7')
1882               break;
1883             str++;
1884             i = (i << 3) + c - '0';
1885           }
1886
1887         if (i != (i & mask))
1888           {
1889             cpp_pedwarn (pfile, "octal escape sequence out of range");
1890             i &= mask;
1891           }
1892         c = i;
1893       }
1894       break;
1895
1896     default:
1897       unknown = 1;
1898       break;
1899     }
1900
1901   if (unknown)
1902     {
1903       if (ISGRAPH (c))
1904         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1905       else
1906         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907     }
1908
1909   if (c > mask)
1910     cpp_pedwarn (pfile, "escape sequence out of range for character");
1911
1912   *pstr = str;
1913   return c;
1914 }
1915
1916 #ifndef MAX_CHAR_TYPE_SIZE
1917 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1918 #endif
1919
1920 #ifndef MAX_WCHAR_TYPE_SIZE
1921 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1922 #endif
1923
1924 /* Interpret a (possibly wide) character constant in TOKEN.
1925    WARN_MULTI warns about multi-character charconsts, if not
1926    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1927    that did not exist in traditional C.  PCHARS_SEEN points to a
1928    variable that is filled in with the number of characters seen.  */
1929 HOST_WIDE_INT
1930 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1931      cpp_reader *pfile;
1932      const cpp_token *token;
1933      int warn_multi;
1934      int traditional;
1935      unsigned int *pchars_seen;
1936 {
1937   const unsigned char *str = token->val.str.text;
1938   const unsigned char *limit = str + token->val.str.len;
1939   unsigned int chars_seen = 0;
1940   unsigned int width, max_chars, c;
1941   unsigned HOST_WIDE_INT mask;
1942   HOST_WIDE_INT result = 0;
1943
1944 #ifdef MULTIBYTE_CHARS
1945   (void) local_mbtowc (NULL, NULL, 0);
1946 #endif
1947
1948   /* Width in bits.  */
1949   if (token->type == CPP_CHAR)
1950     width = MAX_CHAR_TYPE_SIZE;
1951   else
1952     width = MAX_WCHAR_TYPE_SIZE;
1953
1954   if (width < HOST_BITS_PER_WIDE_INT)
1955     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1956   else
1957     mask = ~0;
1958   max_chars = HOST_BITS_PER_WIDE_INT / width;
1959
1960   while (str < limit)
1961     {
1962 #ifdef MULTIBYTE_CHARS
1963       wchar_t wc;
1964       int char_len;
1965
1966       char_len = local_mbtowc (&wc, str, limit - str);
1967       if (char_len == -1)
1968         {
1969           cpp_warning (pfile, "ignoring invalid multibyte character");
1970           c = *str++;
1971         }
1972       else
1973         {
1974           str += char_len;
1975           c = wc;
1976         }
1977 #else
1978       c = *str++;
1979 #endif
1980
1981       if (c == '\\')
1982         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1983
1984 #ifdef MAP_CHARACTER
1985       if (ISPRINT (c))
1986         c = MAP_CHARACTER (c);
1987 #endif
1988
1989       /* Merge character into result; ignore excess chars.  */
1990       if (++chars_seen <= max_chars)
1991         {
1992           if (width < HOST_BITS_PER_WIDE_INT)
1993             result = (result << width) | (c & mask);
1994           else
1995             result = c;
1996         }
1997     }
1998
1999   if (chars_seen == 0)
2000     cpp_error (pfile, "empty character constant");
2001   else if (chars_seen > max_chars)
2002     {
2003       chars_seen = max_chars;
2004       cpp_warning (pfile, "character constant too long");
2005     }
2006   else if (chars_seen > 1 && !traditional && warn_multi)
2007     cpp_warning (pfile, "multi-character character constant");
2008
2009   /* If char type is signed, sign-extend the constant.  The
2010      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2011   if (token->type == CPP_CHAR && chars_seen)
2012     {
2013       unsigned int nbits = chars_seen * width;
2014       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2015
2016       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017           || ((result >> (nbits - 1)) & 1) == 0)
2018         result &= mask;
2019       else
2020         result |= ~mask;
2021     }
2022
2023   *pchars_seen = chars_seen;
2024   return result;
2025 }
2026
2027 /* Memory buffers.  Changing these three constants can have a dramatic
2028    effect on performance.  The values here are reasonable defaults,
2029    but might be tuned.  If you adjust them, be sure to test across a
2030    range of uses of cpplib, including heavy nested function-like macro
2031    expansion.  Also check the change in peak memory usage (NJAMD is a
2032    good tool for this).  */
2033 #define MIN_BUFF_SIZE 8000
2034 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2035 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2036         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2037
2038 struct dummy
2039 {
2040   char c;
2041   union
2042   {
2043     double d;
2044     int *p;
2045   } u;
2046 };
2047
2048 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2049 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2050
2051 /* Create a new allocation buffer.  Place the control block at the end
2052    of the buffer, so that buffer overflows will cause immediate chaos.  */
2053 static _cpp_buff *
2054 new_buff (len)
2055      unsigned int len;
2056 {
2057   _cpp_buff *result;
2058   unsigned char *base;
2059
2060   if (len < MIN_BUFF_SIZE)
2061     len = MIN_BUFF_SIZE;
2062   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2063
2064   base = xmalloc (len + sizeof (_cpp_buff));
2065   result = (_cpp_buff *) (base + len);
2066   result->base = base;
2067   result->cur = base;
2068   result->limit = base + len;
2069   result->next = NULL;
2070   return result;
2071 }
2072
2073 /* Place a chain of unwanted allocation buffers on the free list.  */
2074 void
2075 _cpp_release_buff (pfile, buff)
2076      cpp_reader *pfile;
2077      _cpp_buff *buff;
2078 {
2079   _cpp_buff *end = buff;
2080
2081   while (end->next)
2082     end = end->next;
2083   end->next = pfile->free_buffs;
2084   pfile->free_buffs = buff;
2085 }
2086
2087 /* Return a free buffer of size at least MIN_SIZE.  */
2088 _cpp_buff *
2089 _cpp_get_buff (pfile, min_size)
2090      cpp_reader *pfile;
2091      unsigned int min_size;
2092 {
2093   _cpp_buff *result, **p;
2094
2095   for (p = &pfile->free_buffs;; p = &(*p)->next)
2096     {
2097       unsigned int size;
2098
2099       if (*p == NULL)
2100         return new_buff (min_size);
2101       result = *p;
2102       size = result->limit - result->base;
2103       /* Return a buffer that's big enough, but don't waste one that's
2104          way too big.  */
2105       if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2106         break;
2107     }
2108
2109   *p = result->next;
2110   result->next = NULL;
2111   result->cur = result->base;
2112   return result;
2113 }
2114
2115 /* Return a buffer chained on the end of BUFF.  Copy to it the
2116    uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2117    bytes.  */
2118 _cpp_buff *
2119 _cpp_extend_buff (pfile, buff, min_extra)
2120      cpp_reader *pfile;
2121      _cpp_buff *buff;
2122      unsigned int min_extra;
2123 {
2124   unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
2125
2126   buff->next = _cpp_get_buff (pfile, size);
2127   memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2128   return buff->next;
2129 }
2130
2131 /* Free a chain of buffers starting at BUFF.  */
2132 void
2133 _cpp_free_buff (buff)
2134      _cpp_buff *buff;
2135 {
2136   _cpp_buff *next;
2137
2138   for (; buff; buff = next)
2139     {
2140       next = buff->next;
2141       free (buff->base);
2142     }
2143 }
2144
2145 /* Allocate permanent, unaligned storage of length LEN.  */
2146 unsigned char *
2147 _cpp_unaligned_alloc (pfile, len)
2148      cpp_reader *pfile;
2149      size_t len;
2150 {
2151   _cpp_buff *buff = pfile->u_buff;
2152   unsigned char *result = buff->cur;
2153
2154   if (len > (size_t) (buff->limit - result))
2155     {
2156       buff = _cpp_get_buff (pfile, len);
2157       buff->next = pfile->u_buff;
2158       pfile->u_buff = buff;
2159       result = buff->cur;
2160     }
2161
2162   buff->cur = result + len;
2163   return result;
2164 }
2165
2166 static int
2167 chunk_suitable (chunk, size)
2168      cpp_chunk *chunk;
2169      unsigned int size;
2170 {
2171   /* Being at least twice SIZE means we can use memcpy in
2172      _cpp_next_chunk rather than memmove.  Besides, it's a good idea
2173      anyway.  */
2174   return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2175 }
2176
2177 /* Returns the end of the new pool.  PTR points to a char in the old
2178    pool, and is updated to point to the same char in the new pool.  */
2179 unsigned char *
2180 _cpp_next_chunk (pool, len, ptr)
2181      cpp_pool *pool;
2182      unsigned int len;
2183      unsigned char **ptr;
2184 {
2185   cpp_chunk *chunk = pool->cur->next;
2186
2187   /* LEN is the minimum size we want in the new pool.  */
2188   len += POOL_ROOM (pool);
2189   if (! chunk_suitable (chunk, len))
2190     {
2191       chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2192
2193       chunk->next = pool->cur->next;
2194       pool->cur->next = chunk;
2195     }
2196
2197   /* Update the pointer before changing chunk's front.  */
2198   if (ptr)
2199     *ptr += chunk->base - POOL_FRONT (pool);
2200
2201   memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2202   chunk->front = chunk->base;
2203
2204   pool->cur = chunk;
2205   return POOL_LIMIT (pool);
2206 }
2207
2208 static cpp_chunk *
2209 new_chunk (size)
2210      unsigned int size;
2211 {
2212   unsigned char *base;
2213   cpp_chunk *result;
2214
2215   size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2216   base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2217   /* Put the chunk descriptor at the end.  Then chunk overruns will
2218      cause obvious chaos.  */
2219   result = (cpp_chunk *) (base + size);
2220   result->base = base;
2221   result->front = base;
2222   result->limit = base + size;
2223   result->next = 0;
2224
2225   return result;
2226 }
2227
2228 void
2229 _cpp_init_pool (pool, size, align, temp)
2230      cpp_pool *pool;
2231      unsigned int size, align, temp;
2232 {
2233   if (align == 0)
2234     align = DEFAULT_ALIGNMENT;
2235   if (align & (align - 1))
2236     abort ();
2237   pool->align = align;
2238   pool->first = new_chunk (size);
2239   pool->cur = pool->first;
2240   if (temp)
2241     pool->cur->next = pool->cur;
2242 }
2243
2244 void
2245 _cpp_free_pool (pool)
2246      cpp_pool *pool;
2247 {
2248   cpp_chunk *chunk = pool->first, *next;
2249
2250   do
2251     {
2252       next = chunk->next;
2253       free (chunk->base);
2254       chunk = next;
2255     }
2256   while (chunk && chunk != pool->first);
2257 }
2258
2259 /* Reserve LEN bytes from a memory pool.  */
2260 unsigned char *
2261 _cpp_pool_reserve (pool, len)
2262      cpp_pool *pool;
2263      unsigned int len;
2264 {
2265   len = POOL_ALIGN (len, pool->align);
2266   if (len > (unsigned int) POOL_ROOM (pool))
2267     _cpp_next_chunk (pool, len, 0);
2268
2269   return POOL_FRONT (pool);
2270 }
2271
2272 /* Allocate LEN bytes from a memory pool.  */
2273 unsigned char *
2274 _cpp_pool_alloc (pool, len)
2275      cpp_pool *pool;
2276      unsigned int len;
2277 {
2278   unsigned char *result = _cpp_pool_reserve (pool, len);
2279
2280   POOL_COMMIT (pool, len);
2281   return result;
2282 }