gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "cpplib.h"
  26 #include "cpphash.h"
  27
  28 /* MULTIBYTE_CHARS support only works for native compilers.
  29    ??? Ideally what we want is to model widechar support after
  30    the current floating point support.  */
  31 #ifdef CROSS_COMPILE
  32 #undef MULTIBYTE_CHARS
  33 #endif
  34
  35 #ifdef MULTIBYTE_CHARS
  36 #include "mbchar.h"
  37 #include <locale.h>
  38 #endif
  39
  40 /* Tokens with SPELL_STRING store their spelling in the token list,
  41    and it's length in the token->val.name.len.  */
  42 enum spell_type
  43 {
  44   SPELL_OPERATOR = 0,
  45   SPELL_CHAR,
  46   SPELL_IDENT,
  47   SPELL_NUMBER,
  48   SPELL_STRING,
  49   SPELL_NONE
  50 };
  51
  52 struct token_spelling
  53 {
  54   enum spell_type category;
  55   const unsigned char *name;
  56 };
  57
  58 static const unsigned char *const digraph_spellings[] =
  59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  60
  61 #define OP(e, s) { SPELL_OPERATOR, U s           },
  62 #define TK(e, s) { s,              U STRINGX (e) },
  63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  64 #undef OP
  65 #undef TK
  66
  67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  70
  71 static void handle_newline PARAMS ((cpp_reader *));
  72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
  73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  74
  75 static int skip_block_comment PARAMS ((cpp_reader *));
  76 static int skip_line_comment PARAMS ((cpp_reader *));
  77 static void adjust_column PARAMS ((cpp_reader *));
  78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
  81                                                     const U_CHAR *));
  82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
  83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
  84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  85 static void unterminated PARAMS ((cpp_reader *, int));
  86 static bool trigraph_p PARAMS ((cpp_reader *));
  87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
  88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  90                                    const unsigned char *, unsigned int *));
  91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  92
  93 static unsigned int hex_digit_value PARAMS ((unsigned int));
  94 static _cpp_buff *new_buff PARAMS ((size_t));
  95
  96 /* Utility routine:
  97
  98    Compares, the token TOKEN to the NUL-terminated string STRING.
  99    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 100
 101 int
 102 cpp_ideq (token, string)
 103      const cpp_token *token;
 104      const char *string;
 105 {
 106   if (token->type != CPP_NAME)
 107     return 0;
 108
 109   return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
 110 }
 111
 112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
 113    Returns with buffer->cur pointing to the character immediately
 114    following the newline (combination).  */
 115 static void
 116 handle_newline (pfile)
 117      cpp_reader *pfile;
 118 {
 119   cpp_buffer *buffer = pfile->buffer;
 120
 121   /* Handle CR-LF and LF-CR.  Most other implementations (e.g. java)
 122      only accept CR-LF; maybe we should fall back to that behaviour?
 123
 124      NOTE: the EOF case in _cpp_lex_direct currently requires the
 125      buffer->cur != buffer->rlimit test here for 0-length files.  */
 126   if (buffer->cur != buffer->rlimit
 127       && buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
 128     buffer->cur++;
 129
 130   buffer->line_base = buffer->cur;
 131   buffer->col_adjust = 0;
 132   pfile->line++;
 133 }
 134
 135 /* Subroutine of skip_escaped_newlines; called when a 3-character
 136    sequence beginning with "??" is encountered.  buffer->cur points to
 137    the second '?'.
 138
 139    Warn if necessary, and returns true if the sequence forms a
 140    trigraph and the trigraph should be honoured.  */
 141 static bool
 142 trigraph_p (pfile)
 143      cpp_reader *pfile;
 144 {
 145   cpp_buffer *buffer = pfile->buffer;
 146   cppchar_t from_char = buffer->cur[1];
 147   bool accept;
 148
 149   if (!_cpp_trigraph_map[from_char])
 150     return false;
 151
 152   accept = CPP_OPTION (pfile, trigraphs);
 153
 154   /* Don't warn about trigraphs in comments.  */
 155   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 156     {
 157       if (accept)
 158         cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
 159                                "trigraph ??%c converted to %c",
 160                                (int) from_char,
 161                                (int) _cpp_trigraph_map[from_char]);
 162       else if (buffer->cur != buffer->last_Wtrigraphs)
 163         {
 164           buffer->last_Wtrigraphs = buffer->cur;
 165           cpp_warning_with_line (pfile, pfile->line,
 166                                  CPP_BUF_COL (buffer) - 1,
 167                                  "trigraph ??%c ignored", (int) from_char);
 168         }
 169     }
 170
 171   return accept;
 172 }
 173
 174 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
 175    lie in buffer->cur[-1].  Returns the next byte, which will be in
 176    buffer->cur[-1].  This routine performs preprocessing stages 1 and
 177    2 of the ISO C standard.  */
 178 static cppchar_t
 179 skip_escaped_newlines (pfile)
 180      cpp_reader *pfile;
 181 {
 182   cpp_buffer *buffer = pfile->buffer;
 183   cppchar_t next = buffer->cur[-1];
 184
 185   /* Only do this if we apply stages 1 and 2.  */
 186   if (!buffer->from_stage3)
 187     {
 188       const unsigned char *saved_cur;
 189       cppchar_t next1;
 190
 191       do
 192         {
 193           if (buffer->cur == buffer->rlimit)
 194             break;
 195
 196           if (next == '?')
 197             {
 198               if (buffer->cur[0] != '?' || buffer->cur + 1 == buffer->rlimit)
 199                 break;
 200
 201               if (!trigraph_p (pfile))
 202                 break;
 203
 204               /* Translate the trigraph.  */
 205               next = _cpp_trigraph_map[buffer->cur[1]];
 206               buffer->cur += 2;
 207               if (next != '\\' || buffer->cur == buffer->rlimit)
 208                 break;
 209             }
 210
 211           /* We have a backslash, and room for at least one more
 212              character.  Skip horizontal whitespace.  */
 213           saved_cur = buffer->cur;
 214           do
 215             next1 = *buffer->cur++;
 216           while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
 217
 218           if (!is_vspace (next1))
 219             {
 220               buffer->cur = saved_cur;
 221               break;
 222             }
 223
 224           if (saved_cur != buffer->cur - 1
 225               && !pfile->state.lexing_comment)
 226             cpp_warning (pfile, "backslash and newline separated by space");
 227
 228           handle_newline (pfile);
 229           buffer->backup_to = buffer->cur;
 230           if (buffer->cur == buffer->rlimit)
 231             {
 232               cpp_pedwarn (pfile, "backslash-newline at end of file");
 233               next = EOF;
 234             }
 235           else
 236             next = *buffer->cur++;
 237         }
 238       while (next == '\\' || next == '?');
 239     }
 240
 241   return next;
 242 }
 243
 244 /* Obtain the next character, after trigraph conversion and skipping
 245    an arbitrarily long string of escaped newlines.  The common case of
 246    no trigraphs or escaped newlines falls through quickly.  On return,
 247    buffer->backup_to points to where to return to if the character is
 248    not to be processed.  */
 249 static cppchar_t
 250 get_effective_char (pfile)
 251      cpp_reader *pfile;
 252 {
 253   cppchar_t next = EOF;
 254   cpp_buffer *buffer = pfile->buffer;
 255
 256   buffer->backup_to = buffer->cur;
 257   if (buffer->cur < buffer->rlimit)
 258     {
 259       next = *buffer->cur++;
 260       if (__builtin_expect (next == '?' || next == '\\', 0))
 261         next = skip_escaped_newlines (pfile);
 262     }
 263
 264    return next;
 265 }
 266
 267 /* Skip a C-style block comment.  We find the end of the comment by
 268    seeing if an asterisk is before every '/' we encounter.  Returns
 269    non-zero if comment terminated by EOF, zero otherwise.  */
 270 static int
 271 skip_block_comment (pfile)
 272      cpp_reader *pfile;
 273 {
 274   cpp_buffer *buffer = pfile->buffer;
 275   cppchar_t c = EOF, prevc = EOF;
 276
 277   pfile->state.lexing_comment = 1;
 278   while (buffer->cur != buffer->rlimit)
 279     {
 280       prevc = c, c = *buffer->cur++;
 281
 282       /* FIXME: For speed, create a new character class of characters
 283          of interest inside block comments.  */
 284       if (c == '?' || c == '\\')
 285         c = skip_escaped_newlines (pfile);
 286
 287       /* People like decorating comments with '*', so check for '/'
 288          instead for efficiency.  */
 289       if (c == '/')
 290         {
 291           if (prevc == '*')
 292             break;
 293
 294           /* Warn about potential nested comments, but not if the '/'
 295              comes immediately before the true comment delimeter.
 296              Don't bother to get it right across escaped newlines.  */
 297           if (CPP_OPTION (pfile, warn_comments)
 298               && buffer->cur + 1 < buffer->rlimit
 299               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 300             cpp_warning_with_line (pfile,
 301                                    pfile->line, CPP_BUF_COL (buffer),
 302                                    "\"/*\" within comment");
 303         }
 304       else if (is_vspace (c))
 305         handle_newline (pfile);
 306       else if (c == '\t')
 307         adjust_column (pfile);
 308     }
 309
 310   pfile->state.lexing_comment = 0;
 311   return c != '/' || prevc != '*';
 312 }
 313
 314 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 315    terminating newline.  Handles escaped newlines.  Returns non-zero
 316    if a multiline comment.  */
 317 static int
 318 skip_line_comment (pfile)
 319      cpp_reader *pfile;
 320 {
 321   cpp_buffer *buffer = pfile->buffer;
 322   unsigned int orig_line = pfile->line;
 323   cppchar_t c;
 324
 325   pfile->state.lexing_comment = 1;
 326   do
 327     {
 328       if (buffer->cur == buffer->rlimit)
 329         goto at_eof;
 330
 331       c = *buffer->cur++;
 332       if (c == '?' || c == '\\')
 333         c = skip_escaped_newlines (pfile);
 334     }
 335   while (!is_vspace (c));
 336
 337   /* Step back over the newline, except at EOF.  */
 338   buffer->cur--;
 339  at_eof:
 340
 341   pfile->state.lexing_comment = 0;
 342   return orig_line != pfile->line;
 343 }
 344
 345 /* pfile->buffer->cur is one beyond the \t character.  Update
 346    col_adjust so we track the column correctly.  */
 347 static void
 348 adjust_column (pfile)
 349      cpp_reader *pfile;
 350 {
 351   cpp_buffer *buffer = pfile->buffer;
 352   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 353
 354   /* Round it up to multiple of the tabstop, but subtract 1 since the
 355      tab itself occupies a character position.  */
 356   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 357                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 358 }
 359
 360 /* Skips whitespace, saving the next non-whitespace character.
 361    Adjusts pfile->col_adjust to account for tabs.  Without this,
 362    tokens might be assigned an incorrect column.  */
 363 static void
 364 skip_whitespace (pfile, c)
 365      cpp_reader *pfile;
 366      cppchar_t c;
 367 {
 368   cpp_buffer *buffer = pfile->buffer;
 369   unsigned int warned = 0;
 370
 371   do
 372     {
 373       /* Horizontal space always OK.  */
 374       if (c == ' ')
 375         ;
 376       else if (c == '\t')
 377         adjust_column (pfile);
 378       /* Just \f \v or \0 left.  */
 379       else if (c == '\0')
 380         {
 381           if (!warned)
 382             {
 383               cpp_warning (pfile, "null character(s) ignored");
 384               warned = 1;
 385             }
 386         }
 387       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 388         cpp_pedwarn_with_line (pfile, pfile->line,
 389                                CPP_BUF_COL (buffer),
 390                                "%s in preprocessing directive",
 391                                c == '\f' ? "form feed" : "vertical tab");
 392
 393       if (buffer->cur == buffer->rlimit)
 394         return;
 395       c = *buffer->cur++;
 396     }
 397   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 398   while (is_nvspace (c));
 399
 400   buffer->cur--;
 401 }
 402
 403 /* See if the characters of a number token are valid in a name (no
 404    '.', '+' or '-').  */
 405 static int
 406 name_p (pfile, string)
 407      cpp_reader *pfile;
 408      const cpp_string *string;
 409 {
 410   unsigned int i;
 411
 412   for (i = 0; i < string->len; i++)
 413     if (!is_idchar (string->text[i]))
 414       return 0;
 415
 416   return 1;
 417 }
 418
 419 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 420    a critical inner loop.  The common case is an identifier which has
 421    not been split by backslash-newline, does not contain a dollar
 422    sign, and has already been scanned (roughly 10:1 ratio of
 423    seen:unseen identifiers in normal code; the distribution is
 424    Poisson-like).  Second most common case is a new identifier, not
 425    split and no dollar sign.  The other possibilities are rare and
 426    have been relegated to parse_identifier_slow.  */
 427
 428 static cpp_hashnode *
 429 parse_identifier (pfile)
 430      cpp_reader *pfile;
 431 {
 432   cpp_hashnode *result;
 433   const U_CHAR *cur, *rlimit;
 434
 435   /* Fast-path loop.  Skim over a normal identifier.
 436      N.B. ISIDNUM does not include $.  */
 437   cur    = pfile->buffer->cur - 1;
 438   rlimit = pfile->buffer->rlimit;
 439   do
 440     cur++;
 441   while (cur < rlimit && ISIDNUM (*cur));
 442
 443   /* Check for slow-path cases.  */
 444   if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
 445     result = parse_identifier_slow (pfile, cur);
 446   else
 447     {
 448       const U_CHAR *base = pfile->buffer->cur - 1;
 449       result = (cpp_hashnode *)
 450         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 451       pfile->buffer->cur = cur;
 452     }
 453
 454   /* Rarely, identifiers require diagnostics when lexed.
 455      XXX Has to be forced out of the fast path.  */
 456   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 457                         && !pfile->state.skipping, 0))
 458     {
 459       /* It is allowed to poison the same identifier twice.  */
 460       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 461         cpp_error (pfile, "attempt to use poisoned \"%s\"",
 462                    NODE_NAME (result));
 463
 464       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 465          replacement list of a variadic macro.  */
 466       if (result == pfile->spec_nodes.n__VA_ARGS__
 467           && !pfile->state.va_args_ok)
 468         cpp_pedwarn (pfile,
 469         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 470     }
 471
 472   return result;
 473 }
 474
 475 /* Slow path.  This handles identifiers which have been split, and
 476    identifiers which contain dollar signs.  The part of the identifier
 477    from PFILE->buffer->cur-1 to CUR has already been scanned.  */
 478 static cpp_hashnode *
 479 parse_identifier_slow (pfile, cur)
 480      cpp_reader *pfile;
 481      const U_CHAR *cur;
 482 {
 483   cpp_buffer *buffer = pfile->buffer;
 484   const U_CHAR *base = buffer->cur - 1;
 485   struct obstack *stack = &pfile->hash_table->stack;
 486   unsigned int c, saw_dollar = 0, len;
 487
 488   /* Copy the part of the token which is known to be okay.  */
 489   obstack_grow (stack, base, cur - base);
 490
 491   /* Now process the part which isn't.  We are looking at one of
 492      '$', '\\', or '?' on entry to this loop.  */
 493   c = *cur++;
 494   buffer->cur = cur;
 495   do
 496     {
 497       while (is_idchar (c))
 498         {
 499           obstack_1grow (stack, c);
 500
 501           if (c == '$')
 502             saw_dollar++;
 503
 504           if (buffer->cur == buffer->rlimit)
 505             goto at_eof;
 506
 507           c = *buffer->cur++;
 508         }
 509
 510       /* Potential escaped newline?  */
 511       buffer->backup_to = buffer->cur - 1;
 512       if (c != '?' && c != '\\')
 513         break;
 514       c = skip_escaped_newlines (pfile);
 515     }
 516   while (is_idchar (c));
 517
 518   /* Step back over the unwanted char, except at EOF.  */
 519   BACKUP ();
 520  at_eof:
 521
 522   /* $ is not an identifier character in the standard, but is commonly
 523      accepted as an extension.  Don't warn about it in skipped
 524      conditional blocks.  */
 525   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 526     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 527
 528   /* Identifiers are null-terminated.  */
 529   len = obstack_object_size (stack);
 530   obstack_1grow (stack, '\0');
 531
 532   return (cpp_hashnode *)
 533     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
 534 }
 535
 536 /* Parse a number, skipping embedded backslash-newlines.  */
 537 static void
 538 parse_number (pfile, number, c, leading_period)
 539      cpp_reader *pfile;
 540      cpp_string *number;
 541      cppchar_t c;
 542      int leading_period;
 543 {
 544   cpp_buffer *buffer = pfile->buffer;
 545   unsigned char *dest, *limit;
 546
 547   dest = BUFF_FRONT (pfile->u_buff);
 548   limit = BUFF_LIMIT (pfile->u_buff);
 549
 550   /* Place a leading period.  */
 551   if (leading_period)
 552     {
 553       if (dest == limit)
 554         {
 555           _cpp_extend_buff (pfile, &pfile->u_buff, 1);
 556           dest = BUFF_FRONT (pfile->u_buff);
 557           limit = BUFF_LIMIT (pfile->u_buff);
 558         }
 559       *dest++ = '.';
 560     }
 561
 562   do
 563     {
 564       do
 565         {
 566           /* Need room for terminating null.  */
 567           if ((size_t) (limit - dest) < 2)
 568             {
 569               size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 570               _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 571               dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 572               limit = BUFF_LIMIT (pfile->u_buff);
 573             }
 574           *dest++ = c;
 575
 576           if (buffer->cur == buffer->rlimit)
 577             goto at_eof;
 578
 579           c = *buffer->cur++;
 580         }
 581       while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 582
 583       /* Potential escaped newline?  */
 584       buffer->backup_to = buffer->cur - 1;
 585       if (c != '?' && c != '\\')
 586         break;
 587       c = skip_escaped_newlines (pfile);
 588     }
 589   while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
 590
 591   /* Step back over the unwanted char, except at EOF.  */
 592   BACKUP ();
 593  at_eof:
 594
 595   /* Null-terminate the number.  */
 596   *dest = '\0';
 597
 598   number->text = BUFF_FRONT (pfile->u_buff);
 599   number->len = dest - number->text;
 600   BUFF_FRONT (pfile->u_buff) = dest + 1;
 601 }
 602
 603 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 604 static void
 605 unterminated (pfile, term)
 606      cpp_reader *pfile;
 607      int term;
 608 {
 609   cpp_error (pfile, "missing terminating %c character", term);
 610
 611   if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
 612     {
 613       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
 614                            "possible start of unterminated string literal");
 615       pfile->mls_line = 0;
 616     }
 617 }
 618
 619 /* Subroutine of parse_string.  */
 620 static int
 621 unescaped_terminator_p (pfile, dest)
 622      cpp_reader *pfile;
 623      const unsigned char *dest;
 624 {
 625   const unsigned char *start, *temp;
 626
 627   /* In #include-style directives, terminators are not escapeable.  */
 628   if (pfile->state.angled_headers)
 629     return 1;
 630
 631   start = BUFF_FRONT (pfile->u_buff);
 632
 633   /* An odd number of consecutive backslashes represents an escaped
 634      terminator.  */
 635   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 636     ;
 637
 638   return ((dest - temp) & 1) == 0;
 639 }
 640
 641 /* Parses a string, character constant, or angle-bracketed header file
 642    name.  Handles embedded trigraphs and escaped newlines.  The stored
 643    string is guaranteed NUL-terminated, but it is not guaranteed that
 644    this is the first NUL since embedded NULs are preserved.
 645    Multi-line strings are allowed, but they are deprecated.
 646
 647    When this function returns, buffer->cur points to the next
 648    character to be processed.  */
 649 static void
 650 parse_string (pfile, token, terminator)
 651      cpp_reader *pfile;
 652      cpp_token *token;
 653      cppchar_t terminator;
 654 {
 655   cpp_buffer *buffer = pfile->buffer;
 656   unsigned char *dest, *limit;
 657   cppchar_t c;
 658   bool warned_nulls = false, warned_multi = false;
 659
 660   dest = BUFF_FRONT (pfile->u_buff);
 661   limit = BUFF_LIMIT (pfile->u_buff);
 662
 663   for (;;)
 664     {
 665       /* We need room for another char, possibly the terminating NUL.  */
 666       if ((size_t) (limit - dest) < 1)
 667         {
 668           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 669           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 670           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 671           limit = BUFF_LIMIT (pfile->u_buff);
 672         }
 673
 674       if (buffer->cur == buffer->rlimit)
 675         {
 676           unterminated (pfile, terminator);
 677           break;
 678         }
 679
 680       /* Handle trigraphs, escaped newlines etc.  */
 681       c = *buffer->cur++;
 682       if (c == '?' || c == '\\')
 683         c = skip_escaped_newlines (pfile);
 684
 685       if (c == terminator)
 686         {
 687           if (unescaped_terminator_p (pfile, dest))
 688             break;
 689         }
 690       else if (is_vspace (c))
 691         {
 692           /* In assembly language, silently terminate string and
 693              character literals at end of line.  This is a kludge
 694              around not knowing where comments are.  */
 695           if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
 696             {
 697               buffer->cur--;
 698               break;
 699             }
 700
 701           /* Character constants and header names may not extend over
 702              multiple lines.  In Standard C, neither may strings.
 703              Unfortunately, we accept multiline strings as an
 704              extension, except in #include family directives.  */
 705           if (terminator != '"' || pfile->state.angled_headers)
 706             {
 707               unterminated (pfile, terminator);
 708               buffer->cur--;
 709               break;
 710             }
 711
 712           if (!warned_multi)
 713             {
 714               warned_multi = true;
 715               cpp_pedwarn (pfile, "multi-line string literals are deprecated");
 716             }
 717
 718           if (pfile->mls_line == 0)
 719             {
 720               pfile->mls_line = token->line;
 721               pfile->mls_col = token->col;
 722             }
 723
 724           handle_newline (pfile);
 725           c = '\n';
 726         }
 727       else if (c == '\0' && !warned_nulls)
 728         {
 729           warned_nulls = true;
 730           cpp_warning (pfile, "null character(s) preserved in literal");
 731         }
 732
 733       *dest++ = c;
 734     }
 735
 736   *dest = '\0';
 737
 738   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 739   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 740   BUFF_FRONT (pfile->u_buff) = dest + 1;
 741 }
 742
 743 /* The stored comment includes the comment start and any terminator.  */
 744 static void
 745 save_comment (pfile, token, from)
 746      cpp_reader *pfile;
 747      cpp_token *token;
 748      const unsigned char *from;
 749 {
 750   unsigned char *buffer;
 751   unsigned int len;
 752
 753   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 754
 755   /* C++ comments probably (not definitely) have moved past a new
 756      line, which we don't want to save in the comment.  */
 757   if (is_vspace (pfile->buffer->cur[-1]))
 758     len--;
 759   buffer = _cpp_unaligned_alloc (pfile, len);
 760
 761   token->type = CPP_COMMENT;
 762   token->val.str.len = len;
 763   token->val.str.text = buffer;
 764
 765   buffer[0] = '/';
 766   memcpy (buffer + 1, from, len - 1);
 767 }
 768
 769 /* Allocate COUNT tokens for RUN.  */
 770 void
 771 _cpp_init_tokenrun (run, count)
 772      tokenrun *run;
 773      unsigned int count;
 774 {
 775   run->base = xnewvec (cpp_token, count);
 776   run->limit = run->base + count;
 777   run->next = NULL;
 778 }
 779
 780 /* Returns the next tokenrun, or creates one if there is none.  */
 781 static tokenrun *
 782 next_tokenrun (run)
 783      tokenrun *run;
 784 {
 785   if (run->next == NULL)
 786     {
 787       run->next = xnew (tokenrun);
 788       run->next->prev = run;
 789       _cpp_init_tokenrun (run->next, 250);
 790     }
 791
 792   return run->next;
 793 }
 794
 795 /* Allocate a single token that is invalidated at the same time as the
 796    rest of the tokens on the line.  Has its line and col set to the
 797    same as the last lexed token, so that diagnostics appear in the
 798    right place.  */
 799 cpp_token *
 800 _cpp_temp_token (pfile)
 801      cpp_reader *pfile;
 802 {
 803   cpp_token *old, *result;
 804
 805   old = pfile->cur_token - 1;
 806   if (pfile->cur_token == pfile->cur_run->limit)
 807     {
 808       pfile->cur_run = next_tokenrun (pfile->cur_run);
 809       pfile->cur_token = pfile->cur_run->base;
 810     }
 811
 812   result = pfile->cur_token++;
 813   result->line = old->line;
 814   result->col = old->col;
 815   return result;
 816 }
 817
 818 /* Lex a token into RESULT (external interface).  Takes care of issues
 819    like directive handling, token lookahead, multiple include
 820    opimisation and skipping.  */
 821 const cpp_token *
 822 _cpp_lex_token (pfile)
 823      cpp_reader *pfile;
 824 {
 825   cpp_token *result;
 826
 827   for (;;)
 828     {
 829       if (pfile->cur_token == pfile->cur_run->limit)
 830         {
 831           pfile->cur_run = next_tokenrun (pfile->cur_run);
 832           pfile->cur_token = pfile->cur_run->base;
 833         }
 834
 835       if (pfile->lookaheads)
 836         {
 837           pfile->lookaheads--;
 838           result = pfile->cur_token++;
 839         }
 840       else
 841         result = _cpp_lex_direct (pfile);
 842
 843       if (result->flags & BOL)
 844         {
 845           /* Is this a directive.  If _cpp_handle_directive returns
 846              false, it is an assembler #.  */
 847           if (result->type == CPP_HASH
 848               && !pfile->state.parsing_args
 849               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 850             continue;
 851           if (pfile->cb.line_change && !pfile->state.skipping)
 852             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 853         }
 854
 855       /* We don't skip tokens in directives.  */
 856       if (pfile->state.in_directive)
 857         break;
 858
 859       /* Outside a directive, invalidate controlling macros.  At file
 860          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 861          get here and MI optimisation works.  */
 862       pfile->mi_valid = false;
 863
 864       if (!pfile->state.skipping || result->type == CPP_EOF)
 865         break;
 866     }
 867
 868   return result;
 869 }
 870
 871 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 872   do {                                          \
 873     if (get_effective_char (pfile) == CHAR)     \
 874       result->type = THEN_TYPE;                 \
 875     else                                        \
 876       {                                         \
 877         BACKUP ();                              \
 878         result->type = ELSE_TYPE;               \
 879       }                                         \
 880   } while (0)
 881
 882 /* Lex a token into pfile->cur_token, which is also incremented, to
 883    get diagnostics pointing to the correct location.
 884
 885    Does not handle issues such as token lookahead, multiple-include
 886    optimisation, directives, skipping etc.  This function is only
 887    suitable for use by _cpp_lex_token, and in special cases like
 888    lex_expansion_token which doesn't care for any of these issues.
 889
 890    When meeting a newline, returns CPP_EOF if parsing a directive,
 891    otherwise returns to the start of the token buffer if permissible.
 892    Returns the location of the lexed token.  */
 893 cpp_token *
 894 _cpp_lex_direct (pfile)
 895      cpp_reader *pfile;
 896 {
 897   cppchar_t c;
 898   cpp_buffer *buffer;
 899   const unsigned char *comment_start;
 900   cpp_token *result = pfile->cur_token++;
 901
 902  fresh_line:
 903   buffer = pfile->buffer;
 904   result->flags = buffer->saved_flags;
 905   buffer->saved_flags = 0;
 906  update_tokens_line:
 907   result->line = pfile->line;
 908
 909  skipped_white:
 910   if (buffer->cur == buffer->rlimit)
 911     goto at_eof;
 912   c = *buffer->cur++;
 913   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 914
 915  trigraph:
 916   switch (c)
 917     {
 918     at_eof:
 919       buffer->saved_flags = BOL;
 920       if (!pfile->state.parsing_args && !pfile->state.in_directive)
 921         {
 922           if (buffer->cur != buffer->line_base)
 923             {
 924               /* Non-empty files should end in a newline.  Don't warn
 925                  for command line and _Pragma buffers.  */
 926               if (!buffer->from_stage3)
 927                 cpp_pedwarn (pfile, "no newline at end of file");
 928               handle_newline (pfile);
 929             }
 930
 931           /* Don't pop the last buffer.  */
 932           if (buffer->prev)
 933             {
 934               unsigned char stop = buffer->return_at_eof;
 935
 936               _cpp_pop_buffer (pfile);
 937               if (!stop)
 938                 goto fresh_line;
 939             }
 940         }
 941       result->type = CPP_EOF;
 942       break;
 943
 944     case ' ': case '\t': case '\f': case '\v': case '\0':
 945       skip_whitespace (pfile, c);
 946       result->flags |= PREV_WHITE;
 947       goto skipped_white;
 948
 949     case '\n': case '\r':
 950       handle_newline (pfile);
 951       buffer->saved_flags = BOL;
 952       if (! pfile->state.in_directive)
 953         {
 954           if (pfile->state.parsing_args == 2)
 955             buffer->saved_flags |= PREV_WHITE;
 956           if (!pfile->keep_tokens)
 957             {
 958               pfile->cur_run = &pfile->base_run;
 959               result = pfile->base_run.base;
 960               pfile->cur_token = result + 1;
 961             }
 962           goto fresh_line;
 963         }
 964       result->type = CPP_EOF;
 965       break;
 966
 967     case '?':
 968     case '\\':
 969       /* These could start an escaped newline, or '?' a trigraph.  Let
 970          skip_escaped_newlines do all the work.  */
 971       {
 972         unsigned int line = pfile->line;
 973
 974         c = skip_escaped_newlines (pfile);
 975         if (line != pfile->line)
 976           {
 977             buffer->cur--;
 978             /* We had at least one escaped newline of some sort.
 979                Update the token's line and column.  */
 980             goto update_tokens_line;
 981           }
 982       }
 983
 984       /* We are either the original '?' or '\\', or a trigraph.  */
 985       if (c == '?')
 986         result->type = CPP_QUERY;
 987       else if (c == '\\')
 988         goto random_char;
 989       else
 990         goto trigraph;
 991       break;
 992
 993     case '0': case '1': case '2': case '3': case '4':
 994     case '5': case '6': case '7': case '8': case '9':
 995       result->type = CPP_NUMBER;
 996       parse_number (pfile, &result->val.str, c, 0);
 997       break;
 998
 999     case '$':
1000       if (!CPP_OPTION (pfile, dollars_in_ident))
1001         goto random_char;
1002       /* Fall through...  */
1003
1004     case '_':
1005     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1006     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1007     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1008     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1009     case 'y': case 'z':
1010     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1011     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1012     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1013     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1014     case 'Y': case 'Z':
1015       result->type = CPP_NAME;
1016       result->val.node = parse_identifier (pfile);
1017
1018       /* 'L' may introduce wide characters or strings.  */
1019       if (result->val.node == pfile->spec_nodes.n_L
1020           && buffer->cur < buffer->rlimit)
1021         {
1022           c = *buffer->cur;
1023           if (c == '\'' || c == '"')
1024             {
1025               buffer->cur++;
1026               result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1027               parse_string (pfile, result, c);
1028             }
1029         }
1030       /* Convert named operators to their proper types.  */
1031       else if (result->val.node->flags & NODE_OPERATOR)
1032         {
1033           result->flags |= NAMED_OP;
1034           result->type = result->val.node->value.operator;
1035         }
1036       break;
1037
1038     case '\'':
1039     case '"':
1040       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1041       parse_string (pfile, result, c);
1042       break;
1043
1044     case '/':
1045       /* A potential block or line comment.  */
1046       comment_start = buffer->cur;
1047       c = get_effective_char (pfile);
1048
1049       if (c == '*')
1050         {
1051           if (skip_block_comment (pfile))
1052             cpp_error (pfile, "unterminated comment");
1053         }
1054       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1055                             || CPP_IN_SYSTEM_HEADER (pfile)))
1056         {
1057           /* Warn about comments only if pedantically GNUC89, and not
1058              in system headers.  */
1059           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1060               && ! buffer->warned_cplusplus_comments)
1061             {
1062               cpp_pedwarn (pfile,
1063                            "C++ style comments are not allowed in ISO C89");
1064               cpp_pedwarn (pfile,
1065                            "(this will be reported only once per input file)");
1066               buffer->warned_cplusplus_comments = 1;
1067             }
1068
1069           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1070             cpp_warning (pfile, "multi-line comment");
1071         }
1072       else if (c == '=')
1073         {
1074           result->type = CPP_DIV_EQ;
1075           break;
1076         }
1077       else
1078         {
1079           BACKUP ();
1080           result->type = CPP_DIV;
1081           break;
1082         }
1083
1084       if (!pfile->state.save_comments)
1085         {
1086           result->flags |= PREV_WHITE;
1087           goto update_tokens_line;
1088         }
1089
1090       /* Save the comment as a token in its own right.  */
1091       save_comment (pfile, result, comment_start);
1092       break;
1093
1094     case '<':
1095       if (pfile->state.angled_headers)
1096         {
1097           result->type = CPP_HEADER_NAME;
1098           parse_string (pfile, result, '>');
1099           break;
1100         }
1101
1102       c = get_effective_char (pfile);
1103       if (c == '=')
1104         result->type = CPP_LESS_EQ;
1105       else if (c == '<')
1106         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1107       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1108         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1109       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1110         {
1111           result->type = CPP_OPEN_SQUARE;
1112           result->flags |= DIGRAPH;
1113         }
1114       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1115         {
1116           result->type = CPP_OPEN_BRACE;
1117           result->flags |= DIGRAPH;
1118         }
1119       else
1120         {
1121           BACKUP ();
1122           result->type = CPP_LESS;
1123         }
1124       break;
1125
1126     case '>':
1127       c = get_effective_char (pfile);
1128       if (c == '=')
1129         result->type = CPP_GREATER_EQ;
1130       else if (c == '>')
1131         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1132       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1134       else
1135         {
1136           BACKUP ();
1137           result->type = CPP_GREATER;
1138         }
1139       break;
1140
1141     case '%':
1142       c = get_effective_char (pfile);
1143       if (c == '=')
1144         result->type = CPP_MOD_EQ;
1145       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1146         {
1147           result->flags |= DIGRAPH;
1148           result->type = CPP_HASH;
1149           if (get_effective_char (pfile) == '%')
1150             {
1151               const unsigned char *pos = buffer->cur;
1152
1153               if (get_effective_char (pfile) == ':')
1154                 result->type = CPP_PASTE;
1155               else
1156                 buffer->cur = pos - 1;
1157             }
1158           else
1159             BACKUP ();
1160         }
1161       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1162         {
1163           result->flags |= DIGRAPH;
1164           result->type = CPP_CLOSE_BRACE;
1165         }
1166       else
1167         {
1168           BACKUP ();
1169           result->type = CPP_MOD;
1170         }
1171       break;
1172
1173     case '.':
1174       result->type = CPP_DOT;
1175       c = get_effective_char (pfile);
1176       if (c == '.')
1177         {
1178           const unsigned char *pos = buffer->cur;
1179
1180           if (get_effective_char (pfile) == '.')
1181             result->type = CPP_ELLIPSIS;
1182           else
1183             buffer->cur = pos - 1;
1184         }
1185       /* All known character sets have 0...9 contiguous.  */
1186       else if (ISDIGIT (c))
1187         {
1188           result->type = CPP_NUMBER;
1189           parse_number (pfile, &result->val.str, c, 1);
1190         }
1191       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1192         result->type = CPP_DOT_STAR;
1193       else
1194         BACKUP ();
1195       break;
1196
1197     case '+':
1198       c = get_effective_char (pfile);
1199       if (c == '+')
1200         result->type = CPP_PLUS_PLUS;
1201       else if (c == '=')
1202         result->type = CPP_PLUS_EQ;
1203       else
1204         {
1205           BACKUP ();
1206           result->type = CPP_PLUS;
1207         }
1208       break;
1209
1210     case '-':
1211       c = get_effective_char (pfile);
1212       if (c == '>')
1213         {
1214           result->type = CPP_DEREF;
1215           if (CPP_OPTION (pfile, cplusplus))
1216             {
1217               if (get_effective_char (pfile) == '*')
1218                 result->type = CPP_DEREF_STAR;
1219               else
1220                 BACKUP ();
1221             }
1222         }
1223       else if (c == '-')
1224         result->type = CPP_MINUS_MINUS;
1225       else if (c == '=')
1226         result->type = CPP_MINUS_EQ;
1227       else
1228         {
1229           BACKUP ();
1230           result->type = CPP_MINUS;
1231         }
1232       break;
1233
1234     case '&':
1235       c = get_effective_char (pfile);
1236       if (c == '&')
1237         result->type = CPP_AND_AND;
1238       else if (c == '=')
1239         result->type = CPP_AND_EQ;
1240       else
1241         {
1242           BACKUP ();
1243           result->type = CPP_AND;
1244         }
1245       break;
1246
1247     case '|':
1248       c = get_effective_char (pfile);
1249       if (c == '|')
1250         result->type = CPP_OR_OR;
1251       else if (c == '=')
1252         result->type = CPP_OR_EQ;
1253       else
1254         {
1255           BACKUP ();
1256           result->type = CPP_OR;
1257         }
1258       break;
1259
1260     case ':':
1261       c = get_effective_char (pfile);
1262       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1263         result->type = CPP_SCOPE;
1264       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1265         {
1266           result->flags |= DIGRAPH;
1267           result->type = CPP_CLOSE_SQUARE;
1268         }
1269       else
1270         {
1271           BACKUP ();
1272           result->type = CPP_COLON;
1273         }
1274       break;
1275
1276     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1277     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1278     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1279     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1280     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1281
1282     case '~': result->type = CPP_COMPL; break;
1283     case ',': result->type = CPP_COMMA; break;
1284     case '(': result->type = CPP_OPEN_PAREN; break;
1285     case ')': result->type = CPP_CLOSE_PAREN; break;
1286     case '[': result->type = CPP_OPEN_SQUARE; break;
1287     case ']': result->type = CPP_CLOSE_SQUARE; break;
1288     case '{': result->type = CPP_OPEN_BRACE; break;
1289     case '}': result->type = CPP_CLOSE_BRACE; break;
1290     case ';': result->type = CPP_SEMICOLON; break;
1291
1292       /* @ is a punctuator in Objective C.  */
1293     case '@': result->type = CPP_ATSIGN; break;
1294
1295     random_char:
1296     default:
1297       result->type = CPP_OTHER;
1298       result->val.c = c;
1299       break;
1300     }
1301
1302   return result;
1303 }
1304
1305 /* An upper bound on the number of bytes needed to spell a token,
1306    including preceding whitespace.  */
1307 unsigned int
1308 cpp_token_len (token)
1309      const cpp_token *token;
1310 {
1311   unsigned int len;
1312
1313   switch (TOKEN_SPELL (token))
1314     {
1315     default:            len = 0;                                break;
1316     case SPELL_NUMBER:
1317     case SPELL_STRING:  len = token->val.str.len;               break;
1318     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1319     }
1320   /* 1 for whitespace, 4 for comment delimiters.  */
1321   return len + 5;
1322 }
1323
1324 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1325    already contain the enough space to hold the token's spelling.
1326    Returns a pointer to the character after the last character
1327    written.  */
1328 unsigned char *
1329 cpp_spell_token (pfile, token, buffer)
1330      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1331      const cpp_token *token;
1332      unsigned char *buffer;
1333 {
1334   switch (TOKEN_SPELL (token))
1335     {
1336     case SPELL_OPERATOR:
1337       {
1338         const unsigned char *spelling;
1339         unsigned char c;
1340
1341         if (token->flags & DIGRAPH)
1342           spelling
1343             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1344         else if (token->flags & NAMED_OP)
1345           goto spell_ident;
1346         else
1347           spelling = TOKEN_NAME (token);
1348
1349         while ((c = *spelling++) != '\0')
1350           *buffer++ = c;
1351       }
1352       break;
1353
1354     case SPELL_CHAR:
1355       *buffer++ = token->val.c;
1356       break;
1357
1358     spell_ident:
1359     case SPELL_IDENT:
1360       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1361       buffer += NODE_LEN (token->val.node);
1362       break;
1363
1364     case SPELL_NUMBER:
1365       memcpy (buffer, token->val.str.text, token->val.str.len);
1366       buffer += token->val.str.len;
1367       break;
1368
1369     case SPELL_STRING:
1370       {
1371         int left, right, tag;
1372         switch (token->type)
1373           {
1374           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1375           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1376           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1377           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1378           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1379           default:
1380             cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1381             return buffer;
1382           }
1383         if (tag) *buffer++ = tag;
1384         *buffer++ = left;
1385         memcpy (buffer, token->val.str.text, token->val.str.len);
1386         buffer += token->val.str.len;
1387         *buffer++ = right;
1388       }
1389       break;
1390
1391     case SPELL_NONE:
1392       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1393       break;
1394     }
1395
1396   return buffer;
1397 }
1398
1399 /* Returns a token as a null-terminated string.  The string is
1400    temporary, and automatically freed later.  Useful for diagnostics.  */
1401 unsigned char *
1402 cpp_token_as_text (pfile, token)
1403      cpp_reader *pfile;
1404      const cpp_token *token;
1405 {
1406   unsigned int len = cpp_token_len (token);
1407   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1408
1409   end = cpp_spell_token (pfile, token, start);
1410   end[0] = '\0';
1411
1412   return start;
1413 }
1414
1415 /* Used by C front ends.  Should really move to using cpp_token_as_text.  */
1416 const char *
1417 cpp_type2name (type)
1418      enum cpp_ttype type;
1419 {
1420   return (const char *) token_spellings[type].name;
1421 }
1422
1423 /* Writes the spelling of token to FP, without any preceding space.
1424    Separated from cpp_spell_token for efficiency - to avoid stdio
1425    double-buffering.  */
1426 void
1427 cpp_output_token (token, fp)
1428      const cpp_token *token;
1429      FILE *fp;
1430 {
1431   switch (TOKEN_SPELL (token))
1432     {
1433     case SPELL_OPERATOR:
1434       {
1435         const unsigned char *spelling;
1436         int c;
1437
1438         if (token->flags & DIGRAPH)
1439           spelling
1440             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1441         else if (token->flags & NAMED_OP)
1442           goto spell_ident;
1443         else
1444           spelling = TOKEN_NAME (token);
1445
1446         c = *spelling;
1447         do
1448           putc (c, fp);
1449         while ((c = *++spelling) != '\0');
1450       }
1451       break;
1452
1453     case SPELL_CHAR:
1454       putc (token->val.c, fp);
1455       break;
1456
1457     spell_ident:
1458     case SPELL_IDENT:
1459       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1460     break;
1461
1462     case SPELL_NUMBER:
1463       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1464       break;
1465
1466     case SPELL_STRING:
1467       {
1468         int left, right, tag;
1469         switch (token->type)
1470           {
1471           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1472           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1473           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1474           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1475           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1476           default:
1477             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1478             return;
1479           }
1480         if (tag) putc (tag, fp);
1481         putc (left, fp);
1482         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1483         putc (right, fp);
1484       }
1485       break;
1486
1487     case SPELL_NONE:
1488       /* An error, most probably.  */
1489       break;
1490     }
1491 }
1492
1493 /* Compare two tokens.  */
1494 int
1495 _cpp_equiv_tokens (a, b)
1496      const cpp_token *a, *b;
1497 {
1498   if (a->type == b->type && a->flags == b->flags)
1499     switch (TOKEN_SPELL (a))
1500       {
1501       default:                  /* Keep compiler happy.  */
1502       case SPELL_OPERATOR:
1503         return 1;
1504       case SPELL_CHAR:
1505         return a->val.c == b->val.c; /* Character.  */
1506       case SPELL_NONE:
1507         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1508       case SPELL_IDENT:
1509         return a->val.node == b->val.node;
1510       case SPELL_NUMBER:
1511       case SPELL_STRING:
1512         return (a->val.str.len == b->val.str.len
1513                 && !memcmp (a->val.str.text, b->val.str.text,
1514                             a->val.str.len));
1515       }
1516
1517   return 0;
1518 }
1519
1520 /* Returns nonzero if a space should be inserted to avoid an
1521    accidental token paste for output.  For simplicity, it is
1522    conservative, and occasionally advises a space where one is not
1523    needed, e.g. "." and ".2".  */
1524
1525 int
1526 cpp_avoid_paste (pfile, token1, token2)
1527      cpp_reader *pfile;
1528      const cpp_token *token1, *token2;
1529 {
1530   enum cpp_ttype a = token1->type, b = token2->type;
1531   cppchar_t c;
1532
1533   if (token1->flags & NAMED_OP)
1534     a = CPP_NAME;
1535   if (token2->flags & NAMED_OP)
1536     b = CPP_NAME;
1537
1538   c = EOF;
1539   if (token2->flags & DIGRAPH)
1540     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1541   else if (token_spellings[b].category == SPELL_OPERATOR)
1542     c = token_spellings[b].name[0];
1543
1544   /* Quickly get everything that can paste with an '='.  */
1545   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1546     return 1;
1547
1548   switch (a)
1549     {
1550     case CPP_GREATER:   return c == '>' || c == '?';
1551     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1552     case CPP_PLUS:      return c == '+';
1553     case CPP_MINUS:     return c == '-' || c == '>';
1554     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1555     case CPP_MOD:       return c == ':' || c == '>';
1556     case CPP_AND:       return c == '&';
1557     case CPP_OR:        return c == '|';
1558     case CPP_COLON:     return c == ':' || c == '>';
1559     case CPP_DEREF:     return c == '*';
1560     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1561     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1562     case CPP_NAME:      return ((b == CPP_NUMBER
1563                                  && name_p (pfile, &token2->val.str))
1564                                 || b == CPP_NAME
1565                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1566     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1567                                 || c == '.' || c == '+' || c == '-');
1568     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1569                                 && token1->val.c == '@'
1570                                 && (b == CPP_NAME || b == CPP_STRING));
1571     default:            break;
1572     }
1573
1574   return 0;
1575 }
1576
1577 /* Output all the remaining tokens on the current line, and a newline
1578    character, to FP.  Leading whitespace is removed.  If there are
1579    macros, special token padding is not performed.  */
1580 void
1581 cpp_output_line (pfile, fp)
1582      cpp_reader *pfile;
1583      FILE *fp;
1584 {
1585   const cpp_token *token;
1586
1587   token = cpp_get_token (pfile);
1588   while (token->type != CPP_EOF)
1589     {
1590       cpp_output_token (token, fp);
1591       token = cpp_get_token (pfile);
1592       if (token->flags & PREV_WHITE)
1593         putc (' ', fp);
1594     }
1595
1596   putc ('\n', fp);
1597 }
1598
1599 /* Returns the value of a hexadecimal digit.  */
1600 static unsigned int
1601 hex_digit_value (c)
1602      unsigned int c;
1603 {
1604   if (hex_p (c))
1605     return hex_value (c);
1606   else
1607     abort ();
1608 }
1609
1610 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1611    failure if cpplib is not parsing C++ or C99.  Such failure is
1612    silent, and no variables are updated.  Otherwise returns 0, and
1613    warns if -Wtraditional.
1614
1615    [lex.charset]: The character designated by the universal character
1616    name \UNNNNNNNN is that character whose character short name in
1617    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1618    universal character name \uNNNN is that character whose character
1619    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1620    for a universal character name is less than 0x20 or in the range
1621    0x7F-0x9F (inclusive), or if the universal character name
1622    designates a character in the basic source character set, then the
1623    program is ill-formed.
1624
1625    We assume that wchar_t is Unicode, so we don't need to do any
1626    mapping.  Is this ever wrong?
1627
1628    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1629    LIMIT is the end of the string or charconst.  PSTR is updated to
1630    point after the UCS on return, and the UCS is written into PC.  */
1631
1632 static int
1633 maybe_read_ucs (pfile, pstr, limit, pc)
1634      cpp_reader *pfile;
1635      const unsigned char **pstr;
1636      const unsigned char *limit;
1637      unsigned int *pc;
1638 {
1639   const unsigned char *p = *pstr;
1640   unsigned int code = 0;
1641   unsigned int c = *pc, length;
1642
1643   /* Only attempt to interpret a UCS for C++ and C99.  */
1644   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1645     return 1;
1646
1647   if (CPP_WTRADITIONAL (pfile))
1648     cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1649
1650   length = (c == 'u' ? 4: 8);
1651
1652   if ((size_t) (limit - p) < length)
1653     {
1654       cpp_error (pfile, "incomplete universal-character-name");
1655       /* Skip to the end to avoid more diagnostics.  */
1656       p = limit;
1657     }
1658   else
1659     {
1660       for (; length; length--, p++)
1661         {
1662           c = *p;
1663           if (ISXDIGIT (c))
1664             code = (code << 4) + hex_digit_value (c);
1665           else
1666             {
1667               cpp_error (pfile,
1668                          "non-hex digit '%c' in universal-character-name", c);
1669               /* We shouldn't skip in case there are multibyte chars.  */
1670               break;
1671             }
1672         }
1673     }
1674
1675 #ifdef TARGET_EBCDIC
1676   cpp_error (pfile, "universal-character-name on EBCDIC target");
1677   code = 0x3f;  /* EBCDIC invalid character */
1678 #else
1679  /* True extended characters are OK.  */
1680   if (code >= 0xa0
1681       && !(code & 0x80000000)
1682       && !(code >= 0xD800 && code <= 0xDFFF))
1683     ;
1684   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1685      hex escapes so that this also works with EBCDIC hosts.  */
1686   else if (code == 0x24 || code == 0x40 || code == 0x60)
1687     ;
1688   /* Don't give another error if one occurred above.  */
1689   else if (length == 0)
1690     cpp_error (pfile, "universal-character-name out of range");
1691 #endif
1692
1693   *pstr = p;
1694   *pc = code;
1695   return 0;
1696 }
1697
1698 /* Interpret an escape sequence, and return its value.  PSTR points to
1699    the input pointer, which is just after the backslash.  LIMIT is how
1700    much text we have.  MASK is a bitmask for the precision for the
1701    destination type (char or wchar_t).  TRADITIONAL, if true, does not
1702    interpret escapes that did not exist in traditional C.
1703
1704    Handles all relevant diagnostics.  */
1705
1706 unsigned int
1707 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1708      cpp_reader *pfile;
1709      const unsigned char **pstr;
1710      const unsigned char *limit;
1711      unsigned HOST_WIDE_INT mask;
1712      int traditional;
1713 {
1714   int unknown = 0;
1715   const unsigned char *str = *pstr;
1716   unsigned int c = *str++;
1717
1718   switch (c)
1719     {
1720     case '\\': case '\'': case '"': case '?': break;
1721     case 'b': c = TARGET_BS;      break;
1722     case 'f': c = TARGET_FF;      break;
1723     case 'n': c = TARGET_NEWLINE; break;
1724     case 'r': c = TARGET_CR;      break;
1725     case 't': c = TARGET_TAB;     break;
1726     case 'v': c = TARGET_VT;      break;
1727
1728     case '(': case '{': case '[': case '%':
1729       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1730          '\%' is used to prevent SCCS from getting confused.  */
1731       unknown = CPP_PEDANTIC (pfile);
1732       break;
1733
1734     case 'a':
1735       if (CPP_WTRADITIONAL (pfile))
1736         cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1737       if (!traditional)
1738         c = TARGET_BELL;
1739       break;
1740
1741     case 'e': case 'E':
1742       if (CPP_PEDANTIC (pfile))
1743         cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1744       c = TARGET_ESC;
1745       break;
1746
1747     case 'u': case 'U':
1748       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1749       break;
1750
1751     case 'x':
1752       if (CPP_WTRADITIONAL (pfile))
1753         cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1754
1755       if (!traditional)
1756         {
1757           unsigned int i = 0, overflow = 0;
1758           int digits_found = 0;
1759
1760           while (str < limit)
1761             {
1762               c = *str;
1763               if (! ISXDIGIT (c))
1764                 break;
1765               str++;
1766               overflow |= i ^ (i << 4 >> 4);
1767               i = (i << 4) + hex_digit_value (c);
1768               digits_found = 1;
1769             }
1770
1771           if (!digits_found)
1772             cpp_error (pfile, "\\x used with no following hex digits");
1773
1774           if (overflow | (i != (i & mask)))
1775             {
1776               cpp_pedwarn (pfile, "hex escape sequence out of range");
1777               i &= mask;
1778             }
1779           c = i;
1780         }
1781       break;
1782
1783     case '0':  case '1':  case '2':  case '3':
1784     case '4':  case '5':  case '6':  case '7':
1785       {
1786         unsigned int i = c - '0';
1787         int count = 0;
1788
1789         while (str < limit && ++count < 3)
1790           {
1791             c = *str;
1792             if (c < '0' || c > '7')
1793               break;
1794             str++;
1795             i = (i << 3) + c - '0';
1796           }
1797
1798         if (i != (i & mask))
1799           {
1800             cpp_pedwarn (pfile, "octal escape sequence out of range");
1801             i &= mask;
1802           }
1803         c = i;
1804       }
1805       break;
1806
1807     default:
1808       unknown = 1;
1809       break;
1810     }
1811
1812   if (unknown)
1813     {
1814       if (ISGRAPH (c))
1815         cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1816       else
1817         cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1818     }
1819
1820   if (c > mask)
1821     cpp_pedwarn (pfile, "escape sequence out of range for character");
1822
1823   *pstr = str;
1824   return c;
1825 }
1826
1827 #ifndef MAX_CHAR_TYPE_SIZE
1828 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1829 #endif
1830
1831 #ifndef MAX_WCHAR_TYPE_SIZE
1832 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1833 #endif
1834
1835 /* Interpret a (possibly wide) character constant in TOKEN.
1836    WARN_MULTI warns about multi-character charconsts, if not
1837    TRADITIONAL.  TRADITIONAL also indicates not to interpret escapes
1838    that did not exist in traditional C.  PCHARS_SEEN points to a
1839    variable that is filled in with the number of characters seen.  */
1840 HOST_WIDE_INT
1841 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1842      cpp_reader *pfile;
1843      const cpp_token *token;
1844      int warn_multi;
1845      int traditional;
1846      unsigned int *pchars_seen;
1847 {
1848   const unsigned char *str = token->val.str.text;
1849   const unsigned char *limit = str + token->val.str.len;
1850   unsigned int chars_seen = 0;
1851   unsigned int width, max_chars, c;
1852   unsigned HOST_WIDE_INT mask;
1853   HOST_WIDE_INT result = 0;
1854
1855 #ifdef MULTIBYTE_CHARS
1856   (void) local_mbtowc (NULL, NULL, 0);
1857 #endif
1858
1859   /* Width in bits.  */
1860   if (token->type == CPP_CHAR)
1861     width = MAX_CHAR_TYPE_SIZE;
1862   else
1863     width = MAX_WCHAR_TYPE_SIZE;
1864
1865   if (width < HOST_BITS_PER_WIDE_INT)
1866     mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1867   else
1868     mask = ~0;
1869   max_chars = HOST_BITS_PER_WIDE_INT / width;
1870
1871   while (str < limit)
1872     {
1873 #ifdef MULTIBYTE_CHARS
1874       wchar_t wc;
1875       int char_len;
1876
1877       char_len = local_mbtowc (&wc, str, limit - str);
1878       if (char_len == -1)
1879         {
1880           cpp_warning (pfile, "ignoring invalid multibyte character");
1881           c = *str++;
1882         }
1883       else
1884         {
1885           str += char_len;
1886           c = wc;
1887         }
1888 #else
1889       c = *str++;
1890 #endif
1891
1892       if (c == '\\')
1893         c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1894
1895 #ifdef MAP_CHARACTER
1896       if (ISPRINT (c))
1897         c = MAP_CHARACTER (c);
1898 #endif
1899
1900       /* Merge character into result; ignore excess chars.  */
1901       if (++chars_seen <= max_chars)
1902         {
1903           if (width < HOST_BITS_PER_WIDE_INT)
1904             result = (result << width) | (c & mask);
1905           else
1906             result = c;
1907         }
1908     }
1909
1910   if (chars_seen == 0)
1911     cpp_error (pfile, "empty character constant");
1912   else if (chars_seen > max_chars)
1913     {
1914       chars_seen = max_chars;
1915       cpp_warning (pfile, "character constant too long");
1916     }
1917   else if (chars_seen > 1 && !traditional && warn_multi)
1918     cpp_warning (pfile, "multi-character character constant");
1919
1920   /* If char type is signed, sign-extend the constant.  The
1921      __CHAR_UNSIGNED__ macro is set by the driver if appropriate.  */
1922   if (token->type == CPP_CHAR && chars_seen)
1923     {
1924       unsigned int nbits = chars_seen * width;
1925       unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1926
1927       if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1928           || ((result >> (nbits - 1)) & 1) == 0)
1929         result &= mask;
1930       else
1931         result |= ~mask;
1932     }
1933
1934   *pchars_seen = chars_seen;
1935   return result;
1936 }
1937
1938 /* Memory buffers.  Changing these three constants can have a dramatic
1939    effect on performance.  The values here are reasonable defaults,
1940    but might be tuned.  If you adjust them, be sure to test across a
1941    range of uses of cpplib, including heavy nested function-like macro
1942    expansion.  Also check the change in peak memory usage (NJAMD is a
1943    good tool for this).  */
1944 #define MIN_BUFF_SIZE 8000
1945 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1946 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1947         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1948
1949 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1950   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1951 #endif
1952
1953 struct dummy
1954 {
1955   char c;
1956   union
1957   {
1958     double d;
1959     int *p;
1960   } u;
1961 };
1962
1963 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1964 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1965
1966 /* Create a new allocation buffer.  Place the control block at the end
1967    of the buffer, so that buffer overflows will cause immediate chaos.  */
1968 static _cpp_buff *
1969 new_buff (len)
1970      size_t len;
1971 {
1972   _cpp_buff *result;
1973   unsigned char *base;
1974
1975   if (len < MIN_BUFF_SIZE)
1976     len = MIN_BUFF_SIZE;
1977   len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1978
1979   base = xmalloc (len + sizeof (_cpp_buff));
1980   result = (_cpp_buff *) (base + len);
1981   result->base = base;
1982   result->cur = base;
1983   result->limit = base + len;
1984   result->next = NULL;
1985   return result;
1986 }
1987
1988 /* Place a chain of unwanted allocation buffers on the free list.  */
1989 void
1990 _cpp_release_buff (pfile, buff)
1991      cpp_reader *pfile;
1992      _cpp_buff *buff;
1993 {
1994   _cpp_buff *end = buff;
1995
1996   while (end->next)
1997     end = end->next;
1998   end->next = pfile->free_buffs;
1999   pfile->free_buffs = buff;
2000 }
2001
2002 /* Return a free buffer of size at least MIN_SIZE.  */
2003 _cpp_buff *
2004 _cpp_get_buff (pfile, min_size)
2005      cpp_reader *pfile;
2006      size_t min_size;
2007 {
2008   _cpp_buff *result, **p;
2009
2010   for (p = &pfile->free_buffs;; p = &(*p)->next)
2011     {
2012       size_t size;
2013
2014       if (*p == NULL)
2015         return new_buff (min_size);
2016       result = *p;
2017       size = result->limit - result->base;
2018       /* Return a buffer that's big enough, but don't waste one that's
2019          way too big.  */
2020       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2021         break;
2022     }
2023
2024   *p = result->next;
2025   result->next = NULL;
2026   result->cur = result->base;
2027   return result;
2028 }
2029
2030 /* Creates a new buffer with enough space to hold the uncommitted
2031    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
2032    the excess bytes to the new buffer.  Chains the new buffer after
2033    BUFF, and returns the new buffer.  */
2034 _cpp_buff *
2035 _cpp_append_extend_buff (pfile, buff, min_extra)
2036      cpp_reader *pfile;
2037      _cpp_buff *buff;
2038      size_t min_extra;
2039 {
2040   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2041   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2042
2043   buff->next = new_buff;
2044   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2045   return new_buff;
2046 }
2047
2048 /* Creates a new buffer with enough space to hold the uncommitted
2049    remaining bytes of the buffer pointed to by BUFF, and at least
2050    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
2051    Chains the new buffer before the buffer pointed to by BUFF, and
2052    updates the pointer to point to the new buffer.  */
2053 void
2054 _cpp_extend_buff (pfile, pbuff, min_extra)
2055      cpp_reader *pfile;
2056      _cpp_buff **pbuff;
2057      size_t min_extra;
2058 {
2059   _cpp_buff *new_buff, *old_buff = *pbuff;
2060   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2061
2062   new_buff = _cpp_get_buff (pfile, size);
2063   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2064   new_buff->next = old_buff;
2065   *pbuff = new_buff;
2066 }
2067
2068 /* Free a chain of buffers starting at BUFF.  */
2069 void
2070 _cpp_free_buff (buff)
2071      _cpp_buff *buff;
2072 {
2073   _cpp_buff *next;
2074
2075   for (; buff; buff = next)
2076     {
2077       next = buff->next;
2078       free (buff->base);
2079     }
2080 }
2081
2082 /* Allocate permanent, unaligned storage of length LEN.  */
2083 unsigned char *
2084 _cpp_unaligned_alloc (pfile, len)
2085      cpp_reader *pfile;
2086      size_t len;
2087 {
2088   _cpp_buff *buff = pfile->u_buff;
2089   unsigned char *result = buff->cur;
2090
2091   if (len > (size_t) (buff->limit - result))
2092     {
2093       buff = _cpp_get_buff (pfile, len);
2094       buff->next = pfile->u_buff;
2095       pfile->u_buff = buff;
2096       result = buff->cur;
2097     }
2098
2099   buff->cur = result + len;
2100   return result;
2101 }
2102
2103 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2104    That buffer is used for growing allocations when saving macro
2105    replacement lists in a #define, and when parsing an answer to an
2106    assertion in #assert, #unassert or #if (and therefore possibly
2107    whilst expanding macros).  It therefore must not be used by any
2108    code that they might call: specifically the lexer and the guts of
2109    the macro expander.
2110
2111    All existing other uses clearly fit this restriction: storing
2112    registered pragmas during initialization.  */
2113 unsigned char *
2114 _cpp_aligned_alloc (pfile, len)
2115      cpp_reader *pfile;
2116      size_t len;
2117 {
2118   _cpp_buff *buff = pfile->a_buff;
2119   unsigned char *result = buff->cur;
2120
2121   if (len > (size_t) (buff->limit - result))
2122     {
2123       buff = _cpp_get_buff (pfile, len);
2124       buff->next = pfile->a_buff;
2125       pfile->a_buff = buff;
2126       result = buff->cur;
2127     }
2128
2129   buff->cur = result + len;
2130   return result;
2131 }