gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /* This lexer works with a single pass of the file.  Recently I
  24    re-wrote it to minimize the places where we step backwards in the
  25    input stream, to make future changes to support multi-byte
  26    character sets fairly straight-forward.
  27
  28    There is now only one routine where we do step backwards:
  29    skip_escaped_newlines.  This routine could probably also be changed
  30    so that it doesn't need to step back.  One possibility is to use a
  31    trick similar to that used in lex_period and lex_percent.  Two
  32    extra characters might be needed, but skip_escaped_newlines itself
  33    would probably be the only place that needs to be aware of that,
  34    and changes to the remaining routines would probably only be needed
  35    if they process a backslash.  */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "cpplib.h"
  40 #include "cpphash.h"
  41
  42 /* MULTIBYTE_CHARS support only works for native compilers.
  43    ??? Ideally what we want is to model widechar support after
  44    the current floating point support.  */
  45 #ifdef CROSS_COMPILE
  46 #undef MULTIBYTE_CHARS
  47 #endif
  48
  49 #ifdef MULTIBYTE_CHARS
  50 #include "mbchar.h"
  51 #include <locale.h>
  52 #endif
  53
  54 /* Tokens with SPELL_STRING store their spelling in the token list,
  55    and it's length in the token->val.name.len.  */
  56 enum spell_type
  57 {
  58   SPELL_OPERATOR = 0,
  59   SPELL_CHAR,
  60   SPELL_IDENT,
  61   SPELL_NUMBER,
  62   SPELL_STRING,
  63   SPELL_NONE
  64 };
  65
  66 struct token_spelling
  67 {
  68   enum spell_type category;
  69   const unsigned char *name;
  70 };
  71
  72 static const unsigned char *const digraph_spellings[] =
  73 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  74
  75 #define OP(e, s) { SPELL_OPERATOR, U s           },
  76 #define TK(e, s) { s,              U STRINGX (e) },
  77 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  78 #undef OP
  79 #undef TK
  80
  81 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  82 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  83
  84 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
  85 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
  86 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  87
  88 static int skip_block_comment PARAMS ((cpp_reader *));
  89 static int skip_line_comment PARAMS ((cpp_reader *));
  90 static void adjust_column PARAMS ((cpp_reader *));
  91 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  92 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  93 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  94                                                     const U_CHAR *));
  95 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  96 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  97 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  98 static void unterminated PARAMS ((cpp_reader *, int));
  99 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
 100 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 101 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
 102 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 103 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 104 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
 105                                    const unsigned char *, unsigned int *));
 106 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
 107
 108 static unsigned int hex_digit_value PARAMS ((unsigned int));
 109 static _cpp_buff *new_buff PARAMS ((size_t));
 110
 111 /* Utility routine:
 112
 113    Compares, the token TOKEN to the NUL-terminated string STRING.
 114    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 115
 116 int
 117 cpp_ideq (token, string)
 118      const cpp_token *token;
 119      const char *string;
 120 {
 121   if (token->type != CPP_NAME)
 122     return 0;
 123
 124   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 125 }
 126
 127 /* Call when meeting a newline.  Returns the character after the newline
 128    (or carriage-return newline combination), or EOF.  */
 129 static cppchar_t
 130 handle_newline (pfile, newline_char)
 131      cpp_reader *pfile;
 132      cppchar_t newline_char;
 133 {
 134   cpp_buffer *buffer;
 135   cppchar_t next = EOF;
 136
 137   pfile->line++;
 138   buffer = pfile->buffer;
 139   buffer->col_adjust = 0;
 140   buffer->line_base = buffer->cur;
 141
 142   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 143   if (buffer->cur < buffer->rlimit)
 144     {
 145       next = *buffer->cur++;
 146       if (next + newline_char == '\r' + '\n')
 147         {
 148           buffer->line_base = buffer->cur;
 149           if (buffer->cur < buffer->rlimit)
 150             next = *buffer->cur++;
 151           else
 152             next = EOF;
 153         }
 154     }
 155
 156   buffer->read_ahead = next;
 157   return next;
 158 }
 159
 160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 161    encountered.  It warns if necessary, and returns true if the
 162    trigraph should be honoured.  FROM_CHAR is the third character of a
 163    trigraph, and presumed to be the previous character for position
 164    reporting.  */
 165 static int
 166 trigraph_ok (pfile, from_char)
 167      cpp_reader *pfile;
 168      cppchar_t from_char;
 169 {
 170   int accept = CPP_OPTION (pfile, trigraphs);
 171
 172   /* Don't warn about trigraphs in comments.  */
 173   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 174     {
 175       cpp_buffer *buffer = pfile->buffer;
 176
 177       if (accept)
 178         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
 179                                "trigraph ??%c converted to %c",
 180                                (int) from_char,
 181                                (int) _cpp_trigraph_map[from_char]);
 182       else if (buffer->cur != buffer->last_Wtrigraphs)
 183         {
 184           buffer->last_Wtrigraphs = buffer->cur;
 185           cpp_warning_with_line (pfile, pfile->line,
 186                                  CPP_BUF_COL (buffer) - 2,
 187                                  "trigraph ??%c ignored", (int) from_char);
 188         }
 189     }
 190
 191   return accept;
 192 }
 193
 194 /* Assumes local variables buffer and result.  */
 195 #define ACCEPT_CHAR(t) \
 196   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 197
 198 /* When we move to multibyte character sets, add to these something
 199    that saves and restores the state of the multibyte conversion
 200    library.  This probably involves saving and restoring a "cookie".
 201    In the case of glibc it is an 8-byte structure, so is not a high
 202    overhead operation.  In any case, it's out of the fast path.  */
 203 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 204 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 205
 206 /* Skips any escaped newlines introduced by NEXT, which is either a
 207    '?' or a '\\'.  Returns the next character, which will also have
 208    been placed in buffer->read_ahead.  This routine performs
 209    preprocessing stages 1 and 2 of the ISO C standard.  */
 210 static cppchar_t
 211 skip_escaped_newlines (pfile, next)
 212      cpp_reader *pfile;
 213      cppchar_t next;
 214 {
 215   cpp_buffer *buffer = pfile->buffer;
 216
 217   /* Only do this if we apply stages 1 and 2.  */
 218   if (!buffer->from_stage3)
 219     {
 220       cppchar_t next1;
 221       const unsigned char *saved_cur;
 222       int space;
 223
 224       do
 225         {
 226           if (buffer->cur == buffer->rlimit)
 227             break;
 228
 229           SAVE_STATE ();
 230           if (next == '?')
 231             {
 232               next1 = *buffer->cur++;
 233               if (next1 != '?' || buffer->cur == buffer->rlimit)
 234                 {
 235                   RESTORE_STATE ();
 236                   break;
 237                 }
 238
 239               next1 = *buffer->cur++;
 240               if (!_cpp_trigraph_map[next1]
 241                   || !trigraph_ok (pfile, next1))
 242                 {
 243                   RESTORE_STATE ();
 244                   break;
 245                 }
 246
 247               /* We have a full trigraph here.  */
 248               next = _cpp_trigraph_map[next1];
 249               if (next != '\\' || buffer->cur == buffer->rlimit)
 250                 break;
 251               SAVE_STATE ();
 252             }
 253
 254           /* We have a backslash, and room for at least one more character.  */
 255           space = 0;
 256           do
 257             {
 258               next1 = *buffer->cur++;
 259               if (!is_nvspace (next1))
 260                 break;
 261               space = 1;
 262             }
 263           while (buffer->cur < buffer->rlimit);
 264
 265           if (!is_vspace (next1))
 266             {
 267               RESTORE_STATE ();
 268               break;
 269             }
 270
 271           if (space && !pfile->state.lexing_comment)
 272             cpp_warning (pfile, "backslash and newline separated by space");
 273
 274           next = handle_newline (pfile, next1);
 275           if (next == EOF)
 276             cpp_pedwarn (pfile, "backslash-newline at end of file");
 277         }
 278       while (next == '\\' || next == '?');
 279     }
 280
 281   buffer->read_ahead = next;
 282   return next;
 283 }
 284
 285 /* Obtain the next character, after trigraph conversion and skipping
 286    an arbitrary string of escaped newlines.  The common case of no
 287    trigraphs or escaped newlines falls through quickly.  */
 288 static cppchar_t
 289 get_effective_char (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *buffer = pfile->buffer;
 293   cppchar_t next = EOF;
 294
 295   if (buffer->cur < buffer->rlimit)
 296     {
 297       next = *buffer->cur++;
 298
 299       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 300          can introduce escaped newlines, which we want to skip, or
 301          UCNs, which, depending upon lexer state, we will handle in
 302          the future.  */
 303       if (next == '?' || next == '\\')
 304         next = skip_escaped_newlines (pfile, next);
 305     }
 306
 307   buffer->read_ahead = next;
 308   return next;
 309 }
 310
 311 /* Skip a C-style block comment.  We find the end of the comment by
 312    seeing if an asterisk is before every '/' we encounter.  Returns
 313    non-zero if comment terminated by EOF, zero otherwise.  */
 314 static int
 315 skip_block_comment (pfile)
 316      cpp_reader *pfile;
 317 {
 318   cpp_buffer *buffer = pfile->buffer;
 319   cppchar_t c = EOF, prevc = EOF;
 320
 321   pfile->state.lexing_comment = 1;
 322   while (buffer->cur != buffer->rlimit)
 323     {
 324       prevc = c, c = *buffer->cur++;
 325
 326     next_char:
 327       /* FIXME: For speed, create a new character class of characters
 328          of interest inside block comments.  */
 329       if (c == '?' || c == '\\')
 330         c = skip_escaped_newlines (pfile, c);
 331
 332       /* People like decorating comments with '*', so check for '/'
 333          instead for efficiency.  */
 334       if (c == '/')
 335         {
 336           if (prevc == '*')
 337             break;
 338
 339           /* Warn about potential nested comments, but not if the '/'
 340              comes immediately before the true comment delimeter.
 341              Don't bother to get it right across escaped newlines.  */
 342           if (CPP_OPTION (pfile, warn_comments)
 343               && buffer->cur != buffer->rlimit)
 344             {
 345               prevc = c, c = *buffer->cur++;
 346               if (c == '*' && buffer->cur != buffer->rlimit)
 347                 {
 348                   prevc = c, c = *buffer->cur++;
 349                   if (c != '/')
 350                     cpp_warning_with_line (pfile, pfile->line,
 351                                            CPP_BUF_COL (buffer) - 2,
 352                                            "\"/*\" within comment");
 353                 }
 354               goto next_char;
 355             }
 356         }
 357       else if (is_vspace (c))
 358         {
 359           prevc = c, c = handle_newline (pfile, c);
 360           goto next_char;
 361         }
 362       else if (c == '\t')
 363         adjust_column (pfile);
 364     }
 365
 366   pfile->state.lexing_comment = 0;
 367   buffer->read_ahead = EOF;
 368   return c != '/' || prevc != '*';
 369 }
 370
 371 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 372    non-zero if a multiline comment.  The following new line, if any,
 373    is left in buffer->read_ahead.  */
 374 static int
 375 skip_line_comment (pfile)
 376      cpp_reader *pfile;
 377 {
 378   cpp_buffer *buffer = pfile->buffer;
 379   unsigned int orig_line = pfile->line;
 380   cppchar_t c;
 381
 382   pfile->state.lexing_comment = 1;
 383   do
 384     {
 385       c = EOF;
 386       if (buffer->cur == buffer->rlimit)
 387         break;
 388
 389       c = *buffer->cur++;
 390       if (c == '?' || c == '\\')
 391         c = skip_escaped_newlines (pfile, c);
 392     }
 393   while (!is_vspace (c));
 394
 395   pfile->state.lexing_comment = 0;
 396   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 397   return orig_line != pfile->line;
 398 }
 399
 400 /* pfile->buffer->cur is one beyond the \t character.  Update
 401    col_adjust so we track the column correctly.  */
 402 static void
 403 adjust_column (pfile)
 404      cpp_reader *pfile;
 405 {
 406   cpp_buffer *buffer = pfile->buffer;
 407   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 408
 409   /* Round it up to multiple of the tabstop, but subtract 1 since the
 410      tab itself occupies a character position.  */
 411   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 412                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 413 }
 414
 415 /* Skips whitespace, saving the next non-whitespace character.
 416    Adjusts pfile->col_adjust to account for tabs.  Without this,
 417    tokens might be assigned an incorrect column.  */
 418 static void
 419 skip_whitespace (pfile, c)
 420      cpp_reader *pfile;
 421      cppchar_t c;
 422 {
 423   cpp_buffer *buffer = pfile->buffer;
 424   unsigned int warned = 0;
 425
 426   do
 427     {
 428       /* Horizontal space always OK.  */
 429       if (c == ' ')
 430         ;
 431       else if (c == '\t')
 432         adjust_column (pfile);
 433       /* Just \f \v or \0 left.  */
 434       else if (c == '\0')
 435         {
 436           if (!warned)
 437             {
 438               cpp_warning (pfile, "null character(s) ignored");
 439               warned = 1;
 440             }
 441         }
 442       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 443         cpp_pedwarn_with_line (pfile, pfile->line,
 444                                CPP_BUF_COL (buffer),
 445                                "%s in preprocessing directive",
 446                                c == '\f' ? "form feed" : "vertical tab");
 447
 448       c = EOF;
 449       if (buffer->cur == buffer->rlimit)
 450         break;
 451       c = *buffer->cur++;
 452     }
 453   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 454   while (is_nvspace (c));
 455
 456   /* Remember the next character.  */
 457   buffer->read_ahead = c;
 458 }
 459
 460 /* See if the characters of a number token are valid in a name (no
 461    '.', '+' or '-').  */
 462 static int
 463 name_p (pfile, string)
 464      cpp_reader *pfile;
 465      const cpp_string *string;
 466 {
 467   unsigned int i;
 468
 469   for (i = 0; i < string->len; i++)
 470     if (!is_idchar (string->text[i]))
 471       return 0;
 472
 473   return 1;
 474 }
 475
 476 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 477    a critical inner loop.  The common case is an identifier which has
 478    not been split by backslash-newline, does not contain a dollar
 479    sign, and has already been scanned (roughly 10:1 ratio of
 480    seen:unseen identifiers in normal code; the distribution is
 481    Poisson-like).  Second most common case is a new identifier, not
 482    split and no dollar sign.  The other possibilities are rare and
 483    have been relegated to parse_identifier_slow.  */
 484
 485 static cpp_hashnode *
 486 parse_identifier (pfile)
 487      cpp_reader *pfile;
 488 {
 489   cpp_hashnode *result;
 490   const U_CHAR *cur, *rlimit;
 491
 492   /* Fast-path loop.  Skim over a normal identifier.
 493      N.B. ISIDNUM does not include $.  */
 494   cur    = pfile->buffer->cur - 1;
 495   rlimit = pfile->buffer->rlimit;
 496   do
 497     cur++;
 498   while (cur < rlimit && ISIDNUM (*cur));
 499
 500   /* Check for slow-path cases.  */
 501   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 502     result = parse_identifier_slow (pfile, cur);
 503   else
 504     {
 505       const U_CHAR *base = pfile->buffer->cur - 1;
 506       result = (cpp_hashnode *)
 507         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 508       pfile->buffer->cur = cur;
 509     }
 510
 511   /* Rarely, identifiers require diagnostics when lexed.
 512      XXX Has to be forced out of the fast path.  */
 513   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 514                         && !pfile->state.skipping, 0))
 515     {
 516       /* It is allowed to poison the same identifier twice.  */
 517       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 518         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 519                    NODE_NAME (result));
 520
 521       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 522          replacement list of a variadic macro.  */
 523       if (result == pfile->spec_nodes.n__VA_ARGS__
 524           && !pfile->state.va_args_ok)
 525         cpp_pedwarn (pfile,
 526         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 527     }
 528
 529   return result;
 530 }
 531
 532 /* Slow path.  This handles identifiers which have been split, and
 533    identifiers which contain dollar signs.  The part of the identifier
 534    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 535 static cpp_hashnode *
 536 parse_identifier_slow (pfile, cur)
 537      cpp_reader *pfile;
 538      const U_CHAR *cur;
 539 {
 540   cpp_buffer *buffer = pfile->buffer;
 541   const U_CHAR *base = buffer->cur - 1;
 542   struct obstack *stack = &pfile->hash_table->stack;
 543   unsigned int c, saw_dollar = 0, len;
 544
 545   /* Copy the part of the token which is known to be okay.  */
 546   obstack_grow (stack, base, cur - base);
 547
 548   /* Now process the part which isn't.  We are looking at one of
 549      '$', '\\', or '?' on entry to this loop.  */
 550   c = *cur++;
 551   buffer->cur = cur;
 552   do
 553     {
 554       while (is_idchar (c))
 555         {
 556           obstack_1grow (stack, c);
 557
 558           if (c == '$')
 559             saw_dollar++;
 560
 561           c = EOF;
 562           if (buffer->cur == buffer->rlimit)
 563             break;
 564
 565           c = *buffer->cur++;
 566         }
 567
 568       /* Potential escaped newline?  */
 569       if (c != '?' && c != '\\')
 570         break;
 571       c = skip_escaped_newlines (pfile, c);
 572     }
 573   while (is_idchar (c));
 574
 575   /* Remember the next character.  */
 576   buffer->read_ahead = c;
 577
 578   /* $ is not an identifier character in the standard, but is commonly
 579      accepted as an extension.  Don't warn about it in skipped
 580      conditional blocks.  */
 581   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 582     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 583
 584   /* Identifiers are null-terminated.  */
 585   len = obstack_object_size (stack);
 586   obstack_1grow (stack, '\0');
 587
 588   return (cpp_hashnode *)
 589     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 590 }
 591
 592 /* Parse a number, skipping embedded backslash-newlines.  */
 593 static void
 594 parse_number (pfile, number, c, leading_period)
 595      cpp_reader *pfile;
 596      cpp_string *number;
 597      cppchar_t c;
 598      int leading_period;
 599 {
 600   cpp_buffer *buffer = pfile->buffer;
 601   unsigned char *dest, *limit;
 602
 603   dest = BUFF_FRONT (pfile->u_buff);
 604   limit = BUFF_LIMIT (pfile->u_buff);
 605
 606   /* Place a leading period.  */
 607   if (leading_period)
 608     {
 609       if (dest == limit)
 610         {
 611           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 612           dest = BUFF_FRONT (pfile->u_buff);
 613           limit = BUFF_LIMIT (pfile->u_buff);
 614         }
 615       *dest++ = '.';
 616     }
 617
 618   do
 619     {
 620       do
 621         {
 622           /* Need room for terminating null.  */
 623           if ((size_t) (limit - dest) < 2)
 624             {
 625               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 626               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 627               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 628               limit = BUFF_LIMIT (pfile->u_buff);
 629             }
 630           *dest++ = c;
 631
 632           c = EOF;
 633           if (buffer->cur == buffer->rlimit)
 634             break;
 635
 636           c = *buffer->cur++;
 637         }
 638       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 639
 640       /* Potential escaped newline?  */
 641       if (c != '?' && c != '\\')
 642         break;
 643       c = skip_escaped_newlines (pfile, c);
 644     }
 645   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 646
 647   /* Remember the next character.  */
 648   buffer->read_ahead = c;
 649
 650   /* Null-terminate the number.  */
 651   *dest = '\0';
 652
 653   number->text = BUFF_FRONT (pfile->u_buff);
 654   number->len = dest - number->text;
 655   BUFF_FRONT (pfile->u_buff) = dest + 1;
 656 }
 657
 658 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 659 static void
 660 unterminated (pfile, term)
 661      cpp_reader *pfile;
 662      int term;
 663 {
 664   cpp_error (pfile, "missing terminating %c character", term);
 665
 666   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 667     {
 668       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 669                            "possible start of unterminated string literal");
 670       pfile->mls_line = 0;
 671     }
 672 }
 673
 674 /* Subroutine of parse_string.  */
 675 static int
 676 unescaped_terminator_p (pfile, dest)
 677      cpp_reader *pfile;
 678      const unsigned char *dest;
 679 {
 680   const unsigned char *start, *temp;
 681
 682   /* In #include-style directives, terminators are not escapeable.  */
 683   if (pfile->state.angled_headers)
 684     return 1;
 685
 686   start = BUFF_FRONT (pfile->u_buff);
 687
 688   /* An odd number of consecutive backslashes represents an escaped
 689      terminator.  */
 690   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 691     ;
 692
 693   return ((dest - temp) & 1) == 0;
 694 }
 695
 696 /* Parses a string, character constant, or angle-bracketed header file
 697    name.  Handles embedded trigraphs and escaped newlines.  The stored
 698    string is guaranteed NUL-terminated, but it is not guaranteed that
 699    this is the first NUL since embedded NULs are preserved.
 700
 701    Multi-line strings are allowed, but they are deprecated.  */
 702 static void
 703 parse_string (pfile, token, terminator)
 704      cpp_reader *pfile;
 705      cpp_token *token;
 706      cppchar_t terminator;
 707 {
 708   cpp_buffer *buffer = pfile->buffer;
 709   unsigned char *dest, *limit;
 710   cppchar_t c;
 711   bool warned_nulls = false, warned_multi = false;
 712
 713   dest = BUFF_FRONT (pfile->u_buff);
 714   limit = BUFF_LIMIT (pfile->u_buff);
 715
 716   for (;;)
 717     {
 718       if (buffer->cur == buffer->rlimit)
 719         c = EOF;
 720       else
 721         c = *buffer->cur++;
 722
 723     have_char:
 724       /* We need space for the terminating NUL.  */
 725       if ((size_t) (limit - dest) < 1)
 726         {
 727           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 728           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 729           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 730           limit = BUFF_LIMIT (pfile->u_buff);
 731         }
 732
 733       if (c == EOF)
 734         {
 735           unterminated (pfile, terminator);
 736           break;
 737         }
 738
 739       /* Handle trigraphs, escaped newlines etc.  */
 740       if (c == '?' || c == '\\')
 741         c = skip_escaped_newlines (pfile, c);
 742
 743       if (c == terminator && unescaped_terminator_p (pfile, dest))
 744         {
 745           c = EOF;
 746           break;
 747         }
 748       else if (is_vspace (c))
 749         {
 750           /* In assembly language, silently terminate string and
 751              character literals at end of line.  This is a kludge
 752              around not knowing where comments are.  */
 753           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 754             break;
 755
 756           /* Character constants and header names may not extend over
 757              multiple lines.  In Standard C, neither may strings.
 758              Unfortunately, we accept multiline strings as an
 759              extension, except in #include family directives.  */
 760           if (terminator != '"' || pfile->state.angled_headers)
 761             {
 762               unterminated (pfile, terminator);
 763               break;
 764             }
 765
 766           if (!warned_multi)
 767             {
 768               warned_multi = true;
 769               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 770             }
 771
 772           if (pfile->mls_line == 0)
 773             {
 774               pfile->mls_line = token->line;
 775               pfile->mls_col = token->col;
 776             }
 777
 778           c = handle_newline (pfile, c);
 779           *dest++ = '\n';
 780           goto have_char;
 781         }
 782       else if (c == '\0' && !warned_nulls)
 783         {
 784           warned_nulls = true;
 785           cpp_warning (pfile, "null character(s) preserved in literal");
 786         }
 787
 788       *dest++ = c;
 789     }
 790
 791   /* Remember the next character.  */
 792   buffer->read_ahead = c;
 793   *dest = '\0';
 794
 795   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 796   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 797   BUFF_FRONT (pfile->u_buff) = dest + 1;
 798 }
 799
 800 /* The stored comment includes the comment start and any terminator.  */
 801 static void
 802 save_comment (pfile, token, from)
 803      cpp_reader *pfile;
 804      cpp_token *token;
 805      const unsigned char *from;
 806 {
 807   unsigned char *buffer;
 808   unsigned int len;
 809
 810   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 811   /* C++ comments probably (not definitely) have moved past a new
 812      line, which we don't want to save in the comment.  */
 813   if (pfile->buffer->read_ahead != EOF)
 814     len--;
 815   buffer = _cpp_unaligned_alloc (pfile, len);
 816
 817   token->type = CPP_COMMENT;
 818   token->val.str.len = len;
 819   token->val.str.text = buffer;
 820
 821   buffer[0] = '/';
 822   memcpy (buffer + 1, from, len - 1);
 823 }
 824
 825 /* Subroutine of _cpp_lex_direct to handle '%'.  A little tricky, since we
 826    want to avoid stepping back when lexing %:%X.  */
 827 static void
 828 lex_percent (pfile, result)
 829      cpp_reader *pfile;
 830      cpp_token *result;
 831 {
 832   cpp_buffer *buffer= pfile->buffer;
 833   cppchar_t c;
 834
 835   result->type = CPP_MOD;
 836   /* Parsing %:%X could leave an extra character.  */
 837   if (buffer->extra_char == EOF)
 838     c = get_effective_char (pfile);
 839   else
 840     {
 841       c = buffer->read_ahead = buffer->extra_char;
 842       buffer->extra_char = EOF;
 843     }
 844
 845   if (c == '=')
 846     ACCEPT_CHAR (CPP_MOD_EQ);
 847   else if (CPP_OPTION (pfile, digraphs))
 848     {
 849       if (c == ':')
 850         {
 851           result->flags |= DIGRAPH;
 852           ACCEPT_CHAR (CPP_HASH);
 853           if (get_effective_char (pfile) == '%')
 854             {
 855               buffer->extra_char = get_effective_char (pfile);
 856               if (buffer->extra_char == ':')
 857                 {
 858                   buffer->extra_char = EOF;
 859                   ACCEPT_CHAR (CPP_PASTE);
 860                 }
 861               else
 862                 /* We'll catch the extra_char when we're called back.  */
 863                 buffer->read_ahead = '%';
 864             }
 865         }
 866       else if (c == '>')
 867         {
 868           result->flags |= DIGRAPH;
 869           ACCEPT_CHAR (CPP_CLOSE_BRACE);
 870         }
 871     }
 872 }
 873
 874 /* Subroutine of _cpp_lex_direct to handle '.'.  This is tricky, since we
 875    want to avoid stepping back when lexing '...' or '.123'.  In the
 876    latter case we should also set a flag for parse_number.  */
 877 static void
 878 lex_dot (pfile, result)
 879      cpp_reader *pfile;
 880      cpp_token *result;
 881 {
 882   cpp_buffer *buffer = pfile->buffer;
 883   cppchar_t c;
 884
 885   /* Parsing ..X could leave an extra character.  */
 886   if (buffer->extra_char == EOF)
 887     c = get_effective_char (pfile);
 888   else
 889     {
 890       c = buffer->read_ahead = buffer->extra_char;
 891       buffer->extra_char = EOF;
 892     }
 893
 894   /* All known character sets have 0...9 contiguous.  */
 895   if (c >= '0' && c <= '9')
 896     {
 897       result->type = CPP_NUMBER;
 898       parse_number (pfile, &result->val.str, c, 1);
 899     }
 900   else
 901     {
 902       result->type = CPP_DOT;
 903       if (c == '.')
 904         {
 905           buffer->extra_char = get_effective_char (pfile);
 906           if (buffer->extra_char == '.')
 907             {
 908               buffer->extra_char = EOF;
 909               ACCEPT_CHAR (CPP_ELLIPSIS);
 910             }
 911           else
 912             /* We'll catch the extra_char when we're called back.  */
 913             buffer->read_ahead = '.';
 914         }
 915       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
 916         ACCEPT_CHAR (CPP_DOT_STAR);
 917     }
 918 }
 919
 920 /* Allocate COUNT tokens for RUN.  */
 921 void
 922 _cpp_init_tokenrun (run, count)
 923      tokenrun *run;
 924      unsigned int count;
 925 {
 926   run->base = xnewvec (cpp_token, count);
 927   run->limit = run->base + count;
 928   run->next = NULL;
 929 }
 930
 931 /* Returns the next tokenrun, or creates one if there is none.  */
 932 static tokenrun *
 933 next_tokenrun (run)
 934      tokenrun *run;
 935 {
 936   if (run->next == NULL)
 937     {
 938       run->next = xnew (tokenrun);
 939       run->next->prev = run;
 940       _cpp_init_tokenrun (run->next, 250);
 941     }
 942
 943   return run->next;
 944 }
 945
 946 /* Allocate a single token that is invalidated at the same time as the
 947    rest of the tokens on the line.  Has its line and col set to the
 948    same as the last lexed token, so that diagnostics appear in the
 949    right place.  */
 950 cpp_token *
 951 _cpp_temp_token (pfile)
 952      cpp_reader *pfile;
 953 {
 954   cpp_token *old, *result;
 955
 956   old = pfile->cur_token - 1;
 957   if (pfile->cur_token == pfile->cur_run->limit)
 958     {
 959       pfile->cur_run = next_tokenrun (pfile->cur_run);
 960       pfile->cur_token = pfile->cur_run->base;
 961     }
 962
 963   result = pfile->cur_token++;
 964   result->line = old->line;
 965   result->col = old->col;
 966   return result;
 967 }
 968
 969 /* Lex a token into RESULT (external interface).  Takes care of issues
 970    like directive handling, token lookahead, multiple include
 971    opimisation and skipping.  */
 972 const cpp_token *
 973 _cpp_lex_token (pfile)
 974      cpp_reader *pfile;
 975 {
 976   cpp_token *result;
 977
 978   for (;;)
 979     {
 980       if (pfile->cur_token == pfile->cur_run->limit)
 981         {
 982           pfile->cur_run = next_tokenrun (pfile->cur_run);
 983           pfile->cur_token = pfile->cur_run->base;
 984         }
 985
 986       if (pfile->lookaheads)
 987         {
 988           pfile->lookaheads--;
 989           result = pfile->cur_token++;
 990         }
 991       else
 992         result = _cpp_lex_direct (pfile);
 993
 994       if (result->flags & BOL)
 995         {
 996           /* Is this a directive.  If _cpp_handle_directive returns
 997              false, it is an assembler #.  */
 998           if (result->type == CPP_HASH
 999               && !pfile->state.parsing_args
1000               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1001             continue;
1002           if (pfile->cb.line_change && !pfile->state.skipping)
1003             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1004         }
1005
1006       /* We don't skip tokens in directives.  */
1007       if (pfile->state.in_directive)
1008         break;
1009
1010       /* Outside a directive, invalidate controlling macros.  At file
1011          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1012          get here and MI optimisation works.  */
1013       pfile->mi_valid = false;
1014
1015       if (!pfile->state.skipping || result->type == CPP_EOF)
1016         break;
1017     }
1018
1019   return result;
1020 }
1021
1022 /* Lex a token into pfile->cur_token, which is also incremented, to
1023    get diagnostics pointing to the correct location.
1024
1025    Does not handle issues such as token lookahead, multiple-include
1026    optimisation, directives, skipping etc.  This function is only
1027    suitable for use by _cpp_lex_token, and in special cases like
1028    lex_expansion_token which doesn't care for any of these issues.
1029
1030    When meeting a newline, returns CPP_EOF if parsing a directive,
1031    otherwise returns to the start of the token buffer if permissible.
1032    Returns the location of the lexed token.  */
1033 cpp_token *
1034 _cpp_lex_direct (pfile)
1035      cpp_reader *pfile;
1036 {
1037   cppchar_t c;
1038   cpp_buffer *buffer;
1039   const unsigned char *comment_start;
1040   cpp_token *result = pfile->cur_token++;
1041
1042  fresh_line:
1043   buffer = pfile->buffer;
1044   result->flags = buffer->saved_flags;
1045   buffer->saved_flags = 0;
1046  update_tokens_line:
1047   result->line = pfile->line;
1048
1049  skipped_white:
1050   c = buffer->read_ahead;
1051   if (c == EOF && buffer->cur < buffer->rlimit)
1052     c = *buffer->cur++;
1053   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1054   buffer->read_ahead = EOF;
1055
1056  trigraph:
1057   switch (c)
1058     {
1059     case EOF:
1060       buffer->saved_flags = BOL;
1061       if (!pfile->state.parsing_args && !pfile->state.in_directive)
1062         {
1063           if (buffer->cur != buffer->line_base)
1064             {
1065               /* Non-empty files should end in a newline.  Don't warn
1066                  for command line and _Pragma buffers.  */
1067               if (!buffer->from_stage3)
1068                 cpp_pedwarn (pfile, "no newline at end of file");
1069               handle_newline (pfile, '\n');
1070             }
1071
1072           /* Don't pop the last buffer.  */
1073           if (buffer->prev)
1074             {
1075               unsigned char stop = buffer->return_at_eof;
1076
1077               _cpp_pop_buffer (pfile);
1078               if (!stop)
1079                 goto fresh_line;
1080             }
1081         }
1082       result->type = CPP_EOF;
1083       break;
1084
1085     case ' ': case '\t': case '\f': case '\v': case '\0':
1086       skip_whitespace (pfile, c);
1087       result->flags |= PREV_WHITE;
1088       goto skipped_white;
1089
1090     case '\n': case '\r':
1091       handle_newline (pfile, c);
1092       buffer->saved_flags = BOL;
1093       if (! pfile->state.in_directive)
1094         {
1095           if (pfile->state.parsing_args == 2)
1096             buffer->saved_flags |= PREV_WHITE;
1097           if (!pfile->keep_tokens)
1098             {
1099               pfile->cur_run = &pfile->base_run;
1100               result = pfile->base_run.base;
1101               pfile->cur_token = result + 1;
1102             }
1103           goto fresh_line;
1104         }
1105       result->type = CPP_EOF;
1106       break;
1107
1108     case '?':
1109     case '\\':
1110       /* These could start an escaped newline, or '?' a trigraph.  Let
1111          skip_escaped_newlines do all the work.  */
1112       {
1113         unsigned int line = pfile->line;
1114
1115         c = skip_escaped_newlines (pfile, c);
1116         if (line != pfile->line)
1117           /* We had at least one escaped newline of some sort, and the
1118              next character is in buffer->read_ahead.  Update the
1119              token's line and column.  */
1120             goto update_tokens_line;
1121
1122         /* We are either the original '?' or '\\', or a trigraph.  */
1123         result->type = CPP_QUERY;
1124         buffer->read_ahead = EOF;
1125         if (c == '\\')
1126           goto random_char;
1127         else if (c != '?')
1128           goto trigraph;
1129       }
1130       break;
1131
1132     case '0': case '1': case '2': case '3': case '4':
1133     case '5': case '6': case '7': case '8': case '9':
1134       result->type = CPP_NUMBER;
1135       parse_number (pfile, &result->val.str, c, 0);
1136       break;
1137
1138     case '$':
1139       if (!CPP_OPTION (pfile, dollars_in_ident))
1140         goto random_char;
1141       /* Fall through...  */
1142
1143     case '_':
1144     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1145     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1146     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1147     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1148     case 'y': case 'z':
1149     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1150     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1151     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1152     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1153     case 'Y': case 'Z':
1154       result->type = CPP_NAME;
1155       result->val.node = parse_identifier (pfile);
1156
1157       /* 'L' may introduce wide characters or strings.  */
1158       if (result->val.node == pfile->spec_nodes.n_L)
1159         {
1160           c = buffer->read_ahead;
1161           if (c == EOF && buffer->cur < buffer->rlimit)
1162             c = *buffer->cur;
1163           if (c == '\'' || c == '"')
1164             {
1165               buffer->cur++;
1166               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1167               goto make_string;
1168             }
1169         }
1170       /* Convert named operators to their proper types.  */
1171       else if (result->val.node->flags & NODE_OPERATOR)
1172         {
1173           result->flags |= NAMED_OP;
1174           result->type = result->val.node->value.operator;
1175         }
1176       break;
1177
1178     case '\'':
1179     case '"':
1180       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1181     make_string:
1182       parse_string (pfile, result, c);
1183       break;
1184
1185     case '/':
1186       /* A potential block or line comment.  */
1187       comment_start = buffer->cur;
1188       result->type = CPP_DIV;
1189       c = get_effective_char (pfile);
1190       if (c == '=')
1191         ACCEPT_CHAR (CPP_DIV_EQ);
1192       if (c != '/' && c != '*')
1193         break;
1194
1195       if (c == '*')
1196         {
1197           if (skip_block_comment (pfile))
1198             cpp_error (pfile, "unterminated comment");
1199         }
1200       else
1201         {
1202           if (!CPP_OPTION (pfile, cplusplus_comments)
1203               && !CPP_IN_SYSTEM_HEADER (pfile))
1204             break;
1205
1206           /* Warn about comments only if pedantically GNUC89, and not
1207              in system headers.  */
1208           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1209               && ! buffer->warned_cplusplus_comments)
1210             {
1211               cpp_pedwarn (pfile,
1212                            "C++ style comments are not allowed in ISO C89");
1213               cpp_pedwarn (pfile,
1214                            "(this will be reported only once per input file)");
1215               buffer->warned_cplusplus_comments = 1;
1216             }
1217
1218           /* Skip_line_comment updates buffer->read_ahead.  */
1219           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1220             cpp_warning (pfile, "multi-line comment");
1221         }
1222
1223       /* Skipping the comment has updated buffer->read_ahead.  */
1224       if (!pfile->state.save_comments)
1225         {
1226           result->flags |= PREV_WHITE;
1227           goto update_tokens_line;
1228         }
1229
1230       /* Save the comment as a token in its own right.  */
1231       save_comment (pfile, result, comment_start);
1232       break;
1233
1234     case '<':
1235       if (pfile->state.angled_headers)
1236         {
1237           result->type = CPP_HEADER_NAME;
1238           c = '>';              /* terminator.  */
1239           goto make_string;
1240         }
1241
1242       result->type = CPP_LESS;
1243       c = get_effective_char (pfile);
1244       if (c == '=')
1245         ACCEPT_CHAR (CPP_LESS_EQ);
1246       else if (c == '<')
1247         {
1248           ACCEPT_CHAR (CPP_LSHIFT);
1249           if (get_effective_char (pfile) == '=')
1250             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1251         }
1252       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1253         {
1254           ACCEPT_CHAR (CPP_MIN);
1255           if (get_effective_char (pfile) == '=')
1256             ACCEPT_CHAR (CPP_MIN_EQ);
1257         }
1258       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1259         {
1260           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1261           result->flags |= DIGRAPH;
1262         }
1263       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1264         {
1265           ACCEPT_CHAR (CPP_OPEN_BRACE);
1266           result->flags |= DIGRAPH;
1267         }
1268       break;
1269
1270     case '>':
1271       result->type = CPP_GREATER;
1272       c = get_effective_char (pfile);
1273       if (c == '=')
1274         ACCEPT_CHAR (CPP_GREATER_EQ);
1275       else if (c == '>')
1276         {
1277           ACCEPT_CHAR (CPP_RSHIFT);
1278           if (get_effective_char (pfile) == '=')
1279             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1280         }
1281       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1282         {
1283           ACCEPT_CHAR (CPP_MAX);
1284           if (get_effective_char (pfile) == '=')
1285             ACCEPT_CHAR (CPP_MAX_EQ);
1286         }
1287       break;
1288
1289     case '%':
1290       lex_percent (pfile, result);
1291       break;
1292
1293     case '.':
1294       lex_dot (pfile, result);
1295       break;
1296
1297     case '+':
1298       result->type = CPP_PLUS;
1299       c = get_effective_char (pfile);
1300       if (c == '=')
1301         ACCEPT_CHAR (CPP_PLUS_EQ);
1302       else if (c == '+')
1303         ACCEPT_CHAR (CPP_PLUS_PLUS);
1304       break;
1305
1306     case '-':
1307       result->type = CPP_MINUS;
1308       c = get_effective_char (pfile);
1309       if (c == '>')
1310         {
1311           ACCEPT_CHAR (CPP_DEREF);
1312           if (CPP_OPTION (pfile, cplusplus)
1313               && get_effective_char (pfile) == '*')
1314             ACCEPT_CHAR (CPP_DEREF_STAR);
1315         }
1316       else if (c == '=')
1317         ACCEPT_CHAR (CPP_MINUS_EQ);
1318       else if (c == '-')
1319         ACCEPT_CHAR (CPP_MINUS_MINUS);
1320       break;
1321
1322     case '*':
1323       result->type = CPP_MULT;
1324       if (get_effective_char (pfile) == '=')
1325         ACCEPT_CHAR (CPP_MULT_EQ);
1326       break;
1327
1328     case '=':
1329       result->type = CPP_EQ;
1330       if (get_effective_char (pfile) == '=')
1331         ACCEPT_CHAR (CPP_EQ_EQ);
1332       break;
1333
1334     case '!':
1335       result->type = CPP_NOT;
1336       if (get_effective_char (pfile) == '=')
1337         ACCEPT_CHAR (CPP_NOT_EQ);
1338       break;
1339
1340     case '&':
1341       result->type = CPP_AND;
1342       c = get_effective_char (pfile);
1343       if (c == '=')
1344         ACCEPT_CHAR (CPP_AND_EQ);
1345       else if (c == '&')
1346         ACCEPT_CHAR (CPP_AND_AND);
1347       break;
1348
1349     case '#':
1350       result->type = CPP_HASH;
1351       if (get_effective_char (pfile) == '#')
1352           ACCEPT_CHAR (CPP_PASTE);
1353       break;
1354
1355     case '|':
1356       result->type = CPP_OR;
1357       c = get_effective_char (pfile);
1358       if (c == '=')
1359         ACCEPT_CHAR (CPP_OR_EQ);
1360       else if (c == '|')
1361         ACCEPT_CHAR (CPP_OR_OR);
1362       break;
1363
1364     case '^':
1365       result->type = CPP_XOR;
1366       if (get_effective_char (pfile) == '=')
1367         ACCEPT_CHAR (CPP_XOR_EQ);
1368       break;
1369
1370     case ':':
1371       result->type = CPP_COLON;
1372       c = get_effective_char (pfile);
1373       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1374         ACCEPT_CHAR (CPP_SCOPE);
1375       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1376         {
1377           result->flags |= DIGRAPH;
1378           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1379         }
1380       break;
1381
1382     case '~': result->type = CPP_COMPL; break;
1383     case ',': result->type = CPP_COMMA; break;
1384     case '(': result->type = CPP_OPEN_PAREN; break;
1385     case ')': result->type = CPP_CLOSE_PAREN; break;
1386     case '[': result->type = CPP_OPEN_SQUARE; break;
1387     case ']': result->type = CPP_CLOSE_SQUARE; break;
1388     case '{': result->type = CPP_OPEN_BRACE; break;
1389     case '}': result->type = CPP_CLOSE_BRACE; break;
1390     case ';': result->type = CPP_SEMICOLON; break;
1391
1392       /* @ is a punctuator in Objective C.  */
1393     case '@': result->type = CPP_ATSIGN; break;
1394
1395     random_char:
1396     default:
1397       result->type = CPP_OTHER;
1398       result->val.c = c;
1399       break;
1400     }
1401
1402   return result;
1403 }
1404
1405 /* An upper bound on the number of bytes needed to spell a token,
1406    including preceding whitespace.  */
1407 unsigned int
1408 cpp_token_len (token)
1409      const cpp_token *token;
1410 {
1411   unsigned int len;
1412
1413   switch (TOKEN_SPELL (token))
1414     {
1415     default:            len = 0;                                break;
1416     case SPELL_NUMBER:
1417     case SPELL_STRING:  len = token->val.str.len;               break;
1418     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1419     }
1420   /* 1 for whitespace, 4 for comment delimiters.  */
1421   return len + 5;
1422 }
1423
1424 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1425    already contain the enough space to hold the token's spelling.
1426    Returns a pointer to the character after the last character
1427    written.  */
1428 unsigned char *
1429 cpp_spell_token (pfile, token, buffer)
1430      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1431      const cpp_token *token;
1432      unsigned char *buffer;
1433 {
1434   switch (TOKEN_SPELL (token))
1435     {
1436     case SPELL_OPERATOR:
1437       {
1438         const unsigned char *spelling;
1439         unsigned char c;
1440
1441         if (token->flags & DIGRAPH)
1442           spelling
1443             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1444         else if (token->flags & NAMED_OP)
1445           goto spell_ident;
1446         else
1447           spelling = TOKEN_NAME (token);
1448
1449         while ((c = *spelling++) != '\0')
1450           *buffer++ = c;
1451       }
1452       break;
1453
1454     case SPELL_CHAR:
1455       *buffer++ = token->val.c;
1456       break;
1457
1458     spell_ident:
1459     case SPELL_IDENT:
1460       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1461       buffer += NODE_LEN (token->val.node);
1462       break;
1463
1464     case SPELL_NUMBER:
1465       memcpy (buffer, token->val.str.text, token->val.str.len);
1466       buffer += token->val.str.len;
1467       break;
1468
1469     case SPELL_STRING:
1470       {
1471         int left, right, tag;
1472         switch (token->type)
1473           {
1474           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1475           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1476           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1477           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1478           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1479           default:
1480             cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1481             return buffer;
1482           }
1483         if (tag) *buffer++ = tag;
1484         *buffer++ = left;
1485         memcpy (buffer, token->val.str.text, token->val.str.len);
1486         buffer += token->val.str.len;
1487         *buffer++ = right;
1488       }
1489       break;
1490
1491     case SPELL_NONE:
1492       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1493       break;
1494     }
1495
1496   return buffer;
1497 }
1498
1499 /* Returns a token as a null-terminated string.  The string is
1500    temporary, and automatically freed later.  Useful for diagnostics.  */
1501 unsigned char *
1502 cpp_token_as_text (pfile, token)
1503      cpp_reader *pfile;
1504      const cpp_token *token;
1505 {
1506   unsigned int len = cpp_token_len (token);
1507   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1508
1509   end = cpp_spell_token (pfile, token, start);
1510   end[0] = '\0';
1511
1512   return start;
1513 }
1514
1515 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1516 const char *
1517 cpp_type2name (type)
1518      enum cpp_ttype type;
1519 {
1520   return (const char *) token_spellings[type].name;
1521 }
1522
1523 /* Writes the spelling of token to FP, without any preceding space.
1524    Separated from cpp_spell_token for efficiency - to avoid stdio
1525    double-buffering.  */
1526 void
1527 cpp_output_token (token, fp)
1528      const cpp_token *token;
1529      FILE *fp;
1530 {
1531   switch (TOKEN_SPELL (token))
1532     {
1533     case SPELL_OPERATOR:
1534       {
1535         const unsigned char *spelling;
1536         int c;
1537
1538         if (token->flags & DIGRAPH)
1539           spelling
1540             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1541         else if (token->flags & NAMED_OP)
1542           goto spell_ident;
1543         else
1544           spelling = TOKEN_NAME (token);
1545
1546         c = *spelling;
1547         do
1548           putc (c, fp);
1549         while ((c = *++spelling) != '\0');
1550       }
1551       break;
1552
1553     case SPELL_CHAR:
1554       putc (token->val.c, fp);
1555       break;
1556
1557     spell_ident:
1558     case SPELL_IDENT:
1559       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1560     break;
1561
1562     case SPELL_NUMBER:
1563       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1564       break;
1565
1566     case SPELL_STRING:
1567       {
1568         int left, right, tag;
1569         switch (token->type)
1570           {
1571           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1572           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1573           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1574           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1575           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1576           default:
1577             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1578             return;
1579           }
1580         if (tag) putc (tag, fp);
1581         putc (left, fp);
1582         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1583         putc (right, fp);
1584       }
1585       break;
1586
1587     case SPELL_NONE:
1588       /* An error, most probably.  */
1589       break;
1590     }
1591 }
1592
1593 /* Compare two tokens.  */
1594 int
1595 _cpp_equiv_tokens (a, b)
1596      const cpp_token *a, *b;
1597 {
1598   if (a->type == b->type && a->flags == b->flags)
1599     switch (TOKEN_SPELL (a))
1600       {
1601       default:                  /* Keep compiler happy.  */
1602       case SPELL_OPERATOR:
1603         return 1;
1604       case SPELL_CHAR:
1605         return a->val.c == b->val.c; /* Character.  */
1606       case SPELL_NONE:
1607         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1608       case SPELL_IDENT:
1609         return a->val.node == b->val.node;
1610       case SPELL_NUMBER:
1611       case SPELL_STRING:
1612         return (a->val.str.len == b->val.str.len
1613                 && !memcmp (a->val.str.text, b->val.str.text,
1614                             a->val.str.len));
1615       }
1616
1617   return 0;
1618 }
1619
1620 /* Returns nonzero if a space should be inserted to avoid an
1621    accidental token paste for output.  For simplicity, it is
1622    conservative, and occasionally advises a space where one is not
1623    needed, e.g. "." and ".2".  */
1624
1625 int
1626 cpp_avoid_paste (pfile, token1, token2)
1627      cpp_reader *pfile;
1628      const cpp_token *token1, *token2;
1629 {
1630   enum cpp_ttype a = token1->type, b = token2->type;
1631   cppchar_t c;
1632
1633   if (token1->flags & NAMED_OP)
1634     a = CPP_NAME;
1635   if (token2->flags & NAMED_OP)
1636     b = CPP_NAME;
1637
1638   c = EOF;
1639   if (token2->flags & DIGRAPH)
1640     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1641   else if (token_spellings[b].category == SPELL_OPERATOR)
1642     c = token_spellings[b].name[0];
1643
1644   /* Quickly get everything that can paste with an '='.  */
1645   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1646     return 1;
1647
1648   switch (a)
1649     {
1650     case CPP_GREATER:   return c == '>' || c == '?';
1651     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1652     case CPP_PLUS:      return c == '+';
1653     case CPP_MINUS:     return c == '-' || c == '>';
1654     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1655     case CPP_MOD:       return c == ':' || c == '>';
1656     case CPP_AND:       return c == '&';
1657     case CPP_OR:        return c == '|';
1658     case CPP_COLON:     return c == ':' || c == '>';
1659     case CPP_DEREF:     return c == '*';
1660     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1661     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1662     case CPP_NAME:      return ((b == CPP_NUMBER
1663                                  && name_p (pfile, &token2->val.str))
1664                                 || b == CPP_NAME
1665                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1666     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1667                                 || c == '.' || c == '+' || c == '-');
1668     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1669                                 && token1->val.c == '@'
1670                                 && (b == CPP_NAME || b == CPP_STRING));
1671     default:            break;
1672     }
1673
1674   return 0;
1675 }
1676
1677 /* Output all the remaining tokens on the current line, and a newline
1678    character, to FP.  Leading whitespace is removed.  If there are
1679    macros, special token padding is not performed.  */
1680 void
1681 cpp_output_line (pfile, fp)
1682      cpp_reader *pfile;
1683      FILE *fp;
1684 {
1685   const cpp_token *token;
1686
1687   token = cpp_get_token (pfile);
1688   while (token->type != CPP_EOF)
1689     {
1690       cpp_output_token (token, fp);
1691       token = cpp_get_token (pfile);
1692       if (token->flags & PREV_WHITE)
1693         putc (' ', fp);
1694     }
1695
1696   putc ('\n', fp);
1697 }
1698
1699 /* Returns the value of a hexadecimal digit.  */
1700 static unsigned int
1701 hex_digit_value (c)
1702      unsigned int c;
1703 {
1704   if (c >= 'a' && c <= 'f')
1705     return c - 'a' + 10;
1706   if (c >= 'A' && c <= 'F')
1707     return c - 'A' + 10;
1708   if (c >= '0' && c <= '9')
1709     return c - '0';
1710   abort ();
1711 }
1712
1713 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1714    failure if cpplib is not parsing C++ or C99.  Such failure is
1715    silent, and no variables are updated.  Otherwise returns 0, and
1716    warns if -Wtraditional.
1717
1718    [lex.charset]: The character designated by the universal character
1719    name \UNNNNNNNN is that character whose character short name in
1720    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1721    universal character name \uNNNN is that character whose character
1722    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1723    for a universal character name is less than 0x20 or in the range
1724    0x7F-0x9F (inclusive), or if the universal character name
1725    designates a character in the basic source character set, then the
1726    program is ill-formed.
1727
1728    We assume that wchar_t is Unicode, so we don't need to do any
1729    mapping.  Is this ever wrong?
1730
1731    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1732    LIMIT is the end of the string or charconst.  PSTR is updated to
1733    point after the UCS on return, and the UCS is written into PC.  */
1734
1735 static int
1736 maybe_read_ucs (pfile, pstr, limit, pc)
1737      cpp_reader *pfile;
1738      const unsigned char **pstr;
1739      const unsigned char *limit;
1740      unsigned int *pc;
1741 {
1742   const unsigned char *p = *pstr;
1743   unsigned int code = 0;
1744   unsigned int c = *pc, length;
1745
1746   /* Only attempt to interpret a UCS for C++ and C99.  */
1747   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1748     return 1;
1749
1750   if (CPP_WTRADITIONAL (pfile))
1751     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1752
1753   length = (c == 'u' ? 4: 8);
1754
1755   if ((size_t) (limit - p) < length)
1756     {
1757       cpp_error (pfile, "incomplete universal-character-name");
1758       /* Skip to the end to avoid more diagnostics.  */
1759       p = limit;
1760     }
1761   else
1762     {
1763       for (; length; length--, p++)
1764         {
1765           c = *p;
1766           if (ISXDIGIT (c))
1767             code = (code << 4) + hex_digit_value (c);
1768           else
1769             {
1770               cpp_error (pfile,
1771                          "non-hex digit '%c' in universal-character-name", c);
1772               /* We shouldn't skip in case there are multibyte chars.  */
1773               break;
1774             }
1775         }
1776     }
1777
1778 #ifdef TARGET_EBCDIC
1779   cpp_error (pfile, "universal-character-name on EBCDIC target");
1780   code = 0x3f;  /* EBCDIC invalid character */
1781 #else
1782  /* True extended characters are OK.  */
1783   if (code >= 0xa0
1784       && !(code & 0x80000000)
1785       && !(code >= 0xD800 && code <= 0xDFFF))
1786     ;
1787   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1788      hex escapes so that this also works with EBCDIC hosts.  */
1789   else if (code == 0x24 || code == 0x40 || code == 0x60)
1790     ;
1791   /* Don't give another error if one occurred above.  */
1792   else if (length == 0)
1793     cpp_error (pfile, "universal-character-name out of range");
1794 #endif
1795
1796   *pstr = p;
1797   *pc = code;
1798   return 0;
1799 }
1800
1801 /* Interpret an escape sequence, and return its value.  PSTR points to
1802    the input pointer, which is just after the backslash.  LIMIT is how
1803    much text we have.  MASK is a bitmask for the precision for the
1804    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1805    interpret escapes that did not exist in traditional C.
1806
1807    Handles all relevant diagnostics.  */
1808
1809 unsigned int
1810 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1811      cpp_reader *pfile;
1812      const unsigned char **pstr;
1813      const unsigned char *limit;
1814      unsigned HOST_WIDE_INT mask;
1815      int traditional;
1816 {
1817   int unknown = 0;
1818   const unsigned char *str = *pstr;
1819   unsigned int c = *str++;
1820
1821   switch (c)
1822     {
1823     case '\\': case '\'': case '"': case '?': break;
1824     case 'b': c = TARGET_BS;      break;
1825     case 'f': c = TARGET_FF;      break;
1826     case 'n': c = TARGET_NEWLINE; break;
1827     case 'r': c = TARGET_CR;      break;
1828     case 't': c = TARGET_TAB;     break;
1829     case 'v': c = TARGET_VT;      break;
1830
1831     case '(': case '{': case '[': case '%':
1832       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1833          '\%' is used to prevent SCCS from getting confused.  */
1834       unknown = CPP_PEDANTIC (pfile);
1835       break;
1836
1837     case 'a':
1838       if (CPP_WTRADITIONAL (pfile))
1839         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1840       if (!traditional)
1841         c = TARGET_BELL;
1842       break;
1843
1844     case 'e': case 'E':
1845       if (CPP_PEDANTIC (pfile))
1846         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1847       c = TARGET_ESC;
1848       break;
1849
1850     case 'u': case 'U':
1851       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1852       break;
1853
1854     case 'x':
1855       if (CPP_WTRADITIONAL (pfile))
1856         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1857
1858       if (!traditional)
1859         {
1860           unsigned int i = 0, overflow = 0;
1861           int digits_found = 0;
1862
1863           while (str < limit)
1864             {
1865               c = *str;
1866               if (! ISXDIGIT (c))
1867                 break;
1868               str++;
1869               overflow |= i ^ (i << 4 >> 4);
1870               i = (i << 4) + hex_digit_value (c);
1871               digits_found = 1;
1872             }
1873
1874           if (!digits_found)
1875             cpp_error (pfile, "\\x used with no following hex digits");
1876
1877           if (overflow | (i != (i & mask)))
1878             {
1879               cpp_pedwarn (pfile, "hex escape sequence out of range");
1880               i &= mask;
1881             }
1882           c = i;
1883         }
1884       break;
1885
1886     case '0':  case '1':  case '2':  case '3':
1887     case '4':  case '5':  case '6':  case '7':
1888       {
1889         unsigned int i = c - '0';
1890         int count = 0;
1891
1892         while (str < limit && ++count < 3)
1893           {
1894             c = *str;
1895             if (c < '0' || c > '7')
1896               break;
1897             str++;
1898             i = (i << 3) + c - '0';
1899           }
1900
1901         if (i != (i & mask))
1902           {
1903             cpp_pedwarn (pfile, "octal escape sequence out of range");
1904             i &= mask;
1905           }
1906         c = i;
1907       }
1908       break;
1909
1910     default:
1911       unknown = 1;
1912       break;
1913     }
1914
1915   if (unknown)
1916     {
1917       if (ISGRAPH (c))
1918         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1919       else
1920         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1921     }
1922
1923   if (c > mask)
1924     cpp_pedwarn (pfile, "escape sequence out of range for character");
1925
1926   *pstr = str;
1927   return c;
1928 }
1929
1930 #ifndef MAX_CHAR_TYPE_SIZE
1931 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1932 #endif
1933
1934 #ifndef MAX_WCHAR_TYPE_SIZE
1935 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1936 #endif
1937
1938 /* Interpret a (possibly wide) character constant in TOKEN.
1939    WARN_MULTI warns about multi-character charconsts, if not
1940    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1941    that did not exist in traditional C.  PCHARS_SEEN points to a
1942    variable that is filled in with the number of characters seen.  */
1943 HOST_WIDE_INT
1944 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1945      cpp_reader *pfile;
1946      const cpp_token *token;
1947      int warn_multi;
1948      int traditional;
1949      unsigned int *pchars_seen;
1950 {
1951   const unsigned char *str = token->val.str.text;
1952   const unsigned char *limit = str + token->val.str.len;
1953   unsigned int chars_seen = 0;
1954   unsigned int width, max_chars, c;
1955   unsigned HOST_WIDE_INT mask;
1956   HOST_WIDE_INT result = 0;
1957
1958 #ifdef MULTIBYTE_CHARS
1959   (void) local_mbtowc (NULL, NULL, 0);
1960 #endif
1961
1962   /* Width in bits.  */
1963   if (token->type == CPP_CHAR)
1964     width = MAX_CHAR_TYPE_SIZE;
1965   else
1966     width = MAX_WCHAR_TYPE_SIZE;
1967
1968   if (width < HOST_BITS_PER_WIDE_INT)
1969     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1970   else
1971     mask = ~0;
1972   max_chars = HOST_BITS_PER_WIDE_INT / width;
1973
1974   while (str < limit)
1975     {
1976 #ifdef MULTIBYTE_CHARS
1977       wchar_t wc;
1978       int char_len;
1979
1980       char_len = local_mbtowc (&wc, str, limit - str);
1981       if (char_len == -1)
1982         {
1983           cpp_warning (pfile, "ignoring invalid multibyte character");
1984           c = *str++;
1985         }
1986       else
1987         {
1988           str += char_len;
1989           c = wc;
1990         }
1991 #else
1992       c = *str++;
1993 #endif
1994
1995       if (c == '\\')
1996         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1997
1998 #ifdef MAP_CHARACTER
1999       if (ISPRINT (c))
2000         c = MAP_CHARACTER (c);
2001 #endif
2002
2003       /* Merge character into result; ignore excess chars.  */
2004       if (++chars_seen <= max_chars)
2005         {
2006           if (width < HOST_BITS_PER_WIDE_INT)
2007             result = (result << width) | (c & mask);
2008           else
2009             result = c;
2010         }
2011     }
2012
2013   if (chars_seen == 0)
2014     cpp_error (pfile, "empty character constant");
2015   else if (chars_seen > max_chars)
2016     {
2017       chars_seen = max_chars;
2018       cpp_warning (pfile, "character constant too long");
2019     }
2020   else if (chars_seen > 1 && !traditional && warn_multi)
2021     cpp_warning (pfile, "multi-character character constant");
2022
2023   /* If char type is signed, sign-extend the constant.  The
2024      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
2025   if (token->type == CPP_CHAR && chars_seen)
2026     {
2027       unsigned int nbits = chars_seen * width;
2028       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2029
2030       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2031           || ((result >> (nbits - 1)) & 1) == 0)
2032         result &= mask;
2033       else
2034         result |= ~mask;
2035     }
2036
2037   *pchars_seen = chars_seen;
2038   return result;
2039 }
2040
2041 /* Memory buffers.  Changing these three constants can have a dramatic
2042    effect on performance.  The values here are reasonable defaults,
2043    but might be tuned.  If you adjust them, be sure to test across a
2044    range of uses of cpplib, including heavy nested function-like macro
2045    expansion.  Also check the change in peak memory usage (NJAMD is a
2046    good tool for this).  */
2047 #define MIN_BUFF_SIZE 8000
2048 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2049 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2050         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2051
2052 struct dummy
2053 {
2054   char c;
2055   union
2056   {
2057     double d;
2058     int *p;
2059   } u;
2060 };
2061
2062 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2063 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2064
2065 /* Create a new allocation buffer.  Place the control block at the end
2066    of the buffer, so that buffer overflows will cause immediate chaos.  */
2067 static _cpp_buff *
2068 new_buff (len)
2069      size_t len;
2070 {
2071   _cpp_buff *result;
2072   unsigned char *base;
2073
2074   if (len < MIN_BUFF_SIZE)
2075     len = MIN_BUFF_SIZE;
2076   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2077
2078   base = xmalloc (len + sizeof (_cpp_buff));
2079   result = (_cpp_buff *) (base + len);
2080   result->base = base;
2081   result->cur = base;
2082   result->limit = base + len;
2083   result->next = NULL;
2084   return result;
2085 }
2086
2087 /* Place a chain of unwanted allocation buffers on the free list.  */
2088 void
2089 _cpp_release_buff (pfile, buff)
2090      cpp_reader *pfile;
2091      _cpp_buff *buff;
2092 {
2093   _cpp_buff *end = buff;
2094
2095   while (end->next)
2096     end = end->next;
2097   end->next = pfile->free_buffs;
2098   pfile->free_buffs = buff;
2099 }
2100
2101 /* Return a free buffer of size at least MIN_SIZE.  */
2102 _cpp_buff *
2103 _cpp_get_buff (pfile, min_size)
2104      cpp_reader *pfile;
2105      size_t min_size;
2106 {
2107   _cpp_buff *result, **p;
2108
2109   for (p = &pfile->free_buffs;; p = &(*p)->next)
2110     {
2111       size_t size;
2112
2113       if (*p == NULL)
2114         return new_buff (min_size);
2115       result = *p;
2116       size = result->limit - result->base;
2117       /* Return a buffer that's big enough, but don't waste one that's
2118          way too big.  */
2119       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2120         break;
2121     }
2122
2123   *p = result->next;
2124   result->next = NULL;
2125   result->cur = result->base;
2126   return result;
2127 }
2128
2129 /* Creates a new buffer with enough space to hold the uncommitted
2130    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2131    the excess bytes to the new buffer.  Chains the new buffer after
2132    BUFF, and returns the new buffer.  */
2133 _cpp_buff *
2134 _cpp_append_extend_buff (pfile, buff, min_extra)
2135      cpp_reader *pfile;
2136      _cpp_buff *buff;
2137      size_t min_extra;
2138 {
2139   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2140   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2141
2142   buff->next = new_buff;
2143   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2144   return new_buff;
2145 }
2146
2147 /* Creates a new buffer with enough space to hold the uncommitted
2148    remaining bytes of the buffer pointed to by BUFF, and at least
2149    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2150    Chains the new buffer before the buffer pointed to by BUFF, and
2151    updates the pointer to point to the new buffer.  */
2152 void
2153 _cpp_extend_buff (pfile, pbuff, min_extra)
2154      cpp_reader *pfile;
2155      _cpp_buff **pbuff;
2156      size_t min_extra;
2157 {
2158   _cpp_buff *new_buff, *old_buff = *pbuff;
2159   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2160
2161   new_buff = _cpp_get_buff (pfile, size);
2162   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2163   new_buff->next = old_buff;
2164   *pbuff = new_buff;
2165 }
2166
2167 /* Free a chain of buffers starting at BUFF.  */
2168 void
2169 _cpp_free_buff (buff)
2170      _cpp_buff *buff;
2171 {
2172   _cpp_buff *next;
2173
2174   for (; buff; buff = next)
2175     {
2176       next = buff->next;
2177       free (buff->base);
2178     }
2179 }
2180
2181 /* Allocate permanent, unaligned storage of length LEN.  */
2182 unsigned char *
2183 _cpp_unaligned_alloc (pfile, len)
2184      cpp_reader *pfile;
2185      size_t len;
2186 {
2187   _cpp_buff *buff = pfile->u_buff;
2188   unsigned char *result = buff->cur;
2189
2190   if (len > (size_t) (buff->limit - result))
2191     {
2192       buff = _cpp_get_buff (pfile, len);
2193       buff->next = pfile->u_buff;
2194       pfile->u_buff = buff;
2195       result = buff->cur;
2196     }
2197
2198   buff->cur = result + len;
2199   return result;
2200 }
2201
2202 /* Allocate permanent, unaligned storage of length LEN.  */
2203 unsigned char *
2204 _cpp_aligned_alloc (pfile, len)
2205      cpp_reader *pfile;
2206      size_t len;
2207 {
2208   _cpp_buff *buff = pfile->a_buff;
2209   unsigned char *result = buff->cur;
2210
2211   if (len > (size_t) (buff->limit - result))
2212     {
2213       buff = _cpp_get_buff (pfile, len);
2214       buff->next = pfile->a_buff;
2215       pfile->a_buff = buff;
2216       result = buff->cur;
2217     }
2218
2219   buff->cur = result + len;
2220   return result;
2221 }