gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, U s           },
  47 #define TK(e, s) { s,              U STRINGX (e) },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  56 static int skip_line_comment PARAMS ((cpp_reader *));
  57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
  60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
  61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  63                                   cppchar_t));
  64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  65                                     unsigned int, enum cpp_ttype));
  66 static bool warn_in_comment PARAMS ((cpp_reader *, _cpp_line_note *));
  67 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  68 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
  69 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  70
  71 static unsigned int hex_digit_value PARAMS ((unsigned int));
  72 static _cpp_buff *new_buff PARAMS ((size_t));
  73
  74
  75 /* Utility routine:
  76
  77    Compares, the token TOKEN to the NUL-terminated string STRING.
  78    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  79 int
  80 cpp_ideq (token, string)
  81      const cpp_token *token;
  82      const char *string;
  83 {
  84   if (token->type != CPP_NAME)
  85     return 0;
  86
  87   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  88 }
  89
  90 /* Record a note TYPE at byte POS into the current cleaned logical
  91    line.  */
  92 static void
  93 add_line_note (buffer, pos, type)
  94      cpp_buffer *buffer;
  95      const uchar *pos;
  96      unsigned int type;
  97 {
  98   if (buffer->notes_used == buffer->notes_cap)
  99     {
 100       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 101       buffer->notes = (_cpp_line_note *)
 102         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 103     }
 104
 105   buffer->notes[buffer->notes_used].pos = pos;
 106   buffer->notes[buffer->notes_used].type = type;
 107   buffer->notes_used++;
 108 }
 109
 110 /* Returns with a logical line that contains no escaped newlines or
 111    trigraphs.  This is a time-critical inner loop.  */
 112 void
 113 _cpp_clean_line (pfile)
 114      cpp_reader *pfile;
 115 {
 116   cpp_buffer *buffer;
 117   const uchar *s;
 118   uchar c, *d, *p;
 119
 120   buffer = pfile->buffer;
 121   buffer->cur_note = buffer->notes_used = 0;
 122   buffer->cur = buffer->line_base = buffer->next_line;
 123   buffer->need_line = false;
 124   s = buffer->next_line - 1;
 125
 126   if (!buffer->from_stage3)
 127     {
 128       d = (uchar *) s;
 129
 130       for (;;)
 131         {
 132           c = *++s;
 133           *++d = c;
 134
 135           if (c == '\n' || c == '\r')
 136             {
 137                   /* Handle DOS line endings.  */
 138               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 139                 s++;
 140               if (s == buffer->rlimit)
 141                 break;
 142
 143               /* Escaped?  */
 144               p = d;
 145               while (p != buffer->next_line && is_nvspace (p[-1]))
 146                 p--;
 147               if (p == buffer->next_line || p[-1] != '\\')
 148                 break;
 149
 150               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 151               d = p - 2;
 152               buffer->next_line = p - 1;
 153             }
 154           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 155             {
 156               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 157               add_line_note (buffer, d, s[2]);
 158               if (CPP_OPTION (pfile, trigraphs))
 159                 {
 160                   *d = _cpp_trigraph_map[s[2]];
 161                   s += 2;
 162                 }
 163             }
 164         }
 165     }
 166   else
 167     {
 168       do
 169         s++;
 170       while (*s != '\n' && *s != '\r');
 171       d = (uchar *) s;
 172
 173       /* Handle DOS line endings.  */
 174       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 175         s++;
 176     }
 177
 178   *d = '\n';
 179   /* A sentinel note that should never be processed.  */
 180   add_line_note (buffer, d + 1, '\n');
 181   buffer->next_line = s + 1;
 182 }
 183
 184 /* Return true if the trigraph indicated by NOTE should be warned
 185    about in a comment.  */
 186 static bool
 187 warn_in_comment (pfile, note)
 188      cpp_reader *pfile;
 189      _cpp_line_note *note;
 190 {
 191   const uchar *p;
 192
 193   /* Within comments we don't warn about trigraphs, unless the
 194      trigraph forms an escaped newline, as that may change
 195      behaviour.  */
 196   if (note->type != '/')
 197     return false;
 198
 199   /* If -trigraphs, then this was an escaped newline iff the next note
 200      is coincident.  */
 201   if (CPP_OPTION (pfile, trigraphs))
 202     return note[1].pos == note->pos;
 203
 204   /* Otherwise, see if this forms an escaped newline.  */
 205   p = note->pos + 3;
 206   while (is_nvspace (*p))
 207     p++;
 208
 209   /* There might have been escaped newlines between the trigraph and the
 210      newline we found.  Hence the position test.  */
 211   return (*p == '\n' && p < note[1].pos);
 212 }
 213
 214 /* Process the notes created by add_line_note as far as the current
 215    location.  */
 216 void
 217 _cpp_process_line_notes (pfile, in_comment)
 218      cpp_reader *pfile;
 219      int in_comment;
 220 {
 221   cpp_buffer *buffer = pfile->buffer;
 222
 223   for (;;)
 224     {
 225       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 226       unsigned int col;
 227
 228       if (note->pos > buffer->cur)
 229         break;
 230
 231       buffer->cur_note++;
 232       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 233
 234       if (note->type == '\\' || note->type == ' ')
 235         {
 236           if (note->type == ' ' && !in_comment)
 237             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 238                                  "backslash and newline separated by space");
 239
 240           if (buffer->next_line > buffer->rlimit)
 241             {
 242               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 243                                    "backslash-newline at end of file");
 244               /* Prevent "no newline at end of file" warning.  */
 245               buffer->next_line = buffer->rlimit;
 246             }
 247
 248           buffer->line_base = note->pos;
 249           pfile->line++;
 250         }
 251       else if (_cpp_trigraph_map[note->type])
 252         {
 253           if (CPP_OPTION (pfile, warn_trigraphs)
 254               && (!in_comment || warn_in_comment (pfile, note)))
 255             {
 256               if (CPP_OPTION (pfile, trigraphs))
 257                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 258                                      "trigraph ??%c converted to %c",
 259                                      note->type,
 260                                      (int) _cpp_trigraph_map[note->type]);
 261               else
 262                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 263                                      "trigraph ??%c ignored",
 264                                      note->type);
 265             }
 266         }
 267       else
 268         abort ();
 269     }
 270 }
 271
 272 /* Skip a C-style block comment.  We find the end of the comment by
 273    seeing if an asterisk is before every '/' we encounter.  Returns
 274    nonzero if comment terminated by EOF, zero otherwise.
 275
 276    Buffer->cur points to the initial asterisk of the comment.  */
 277 bool
 278 _cpp_skip_block_comment (pfile)
 279      cpp_reader *pfile;
 280 {
 281   cpp_buffer *buffer = pfile->buffer;
 282   cppchar_t c;
 283
 284   buffer->cur++;
 285   if (*buffer->cur == '/')
 286     buffer->cur++;
 287
 288   for (;;)
 289     {
 290       c = *buffer->cur++;
 291
 292       /* People like decorating comments with '*', so check for '/'
 293          instead for efficiency.  */
 294       if (c == '/')
 295         {
 296           if (buffer->cur[-2] == '*')
 297             break;
 298
 299           /* Warn about potential nested comments, but not if the '/'
 300              comes immediately before the true comment delimiter.
 301              Don't bother to get it right across escaped newlines.  */
 302           if (CPP_OPTION (pfile, warn_comments)
 303               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 304             cpp_error_with_line (pfile, DL_WARNING,
 305                                  pfile->line, CPP_BUF_COL (buffer),
 306                                  "\"/*\" within comment");
 307         }
 308       else if (c == '\n')
 309         {
 310           buffer->cur--;
 311           _cpp_process_line_notes (pfile, true);
 312           if (buffer->next_line >= buffer->rlimit)
 313             return true;
 314           _cpp_clean_line (pfile);
 315           pfile->line++;
 316         }
 317     }
 318
 319   _cpp_process_line_notes (pfile, true);
 320   return false;
 321 }
 322
 323 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 324    terminating newline.  Handles escaped newlines.  Returns nonzero
 325    if a multiline comment.  */
 326 static int
 327 skip_line_comment (pfile)
 328      cpp_reader *pfile;
 329 {
 330   cpp_buffer *buffer = pfile->buffer;
 331   unsigned int orig_line = pfile->line;
 332
 333   while (*buffer->cur != '\n')
 334     buffer->cur++;
 335
 336   _cpp_process_line_notes (pfile, true);
 337   return orig_line != pfile->line;
 338 }
 339
 340 /* Skips whitespace, saving the next non-whitespace character.  */
 341 static void
 342 skip_whitespace (pfile, c)
 343      cpp_reader *pfile;
 344      cppchar_t c;
 345 {
 346   cpp_buffer *buffer = pfile->buffer;
 347   bool saw_NUL = false;
 348
 349   do
 350     {
 351       /* Horizontal space always OK.  */
 352       if (c == ' ' || c == '\t')
 353         ;
 354       /* Just \f \v or \0 left.  */
 355       else if (c == '\0')
 356         saw_NUL = true;
 357       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 358         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 359                              CPP_BUF_COL (buffer),
 360                              "%s in preprocessing directive",
 361                              c == '\f' ? "form feed" : "vertical tab");
 362
 363       c = *buffer->cur++;
 364     }
 365   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 366   while (is_nvspace (c));
 367
 368   if (saw_NUL)
 369     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 370
 371   buffer->cur--;
 372 }
 373
 374 /* See if the characters of a number token are valid in a name (no
 375    '.', '+' or '-').  */
 376 static int
 377 name_p (pfile, string)
 378      cpp_reader *pfile;
 379      const cpp_string *string;
 380 {
 381   unsigned int i;
 382
 383   for (i = 0; i < string->len; i++)
 384     if (!is_idchar (string->text[i]))
 385       return 0;
 386
 387   return 1;
 388 }
 389
 390 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 391    an identifier.  FIRST is TRUE if this starts an identifier.  */
 392 static bool
 393 forms_identifier_p (pfile, first)
 394      cpp_reader *pfile;
 395      int first;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398
 399   if (*buffer->cur == '$')
 400     {
 401       if (!CPP_OPTION (pfile, dollars_in_ident))
 402         return false;
 403
 404       buffer->cur++;
 405       if (CPP_PEDANTIC (pfile)
 406           && !pfile->state.skipping
 407           && !pfile->warned_dollar)
 408         {
 409           pfile->warned_dollar = true;
 410           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 411         }
 412
 413       return true;
 414     }
 415
 416   /* Is this a syntactically valid UCN?  */
 417   if (0 && *buffer->cur == '\\'
 418       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 419     {
 420       buffer->cur += 2;
 421       if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
 422         return true;
 423       buffer->cur -= 2;
 424     }
 425
 426   return false;
 427 }
 428
 429 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 430 static cpp_hashnode *
 431 lex_identifier (pfile, base)
 432      cpp_reader *pfile;
 433      const uchar *base;
 434 {
 435   cpp_hashnode *result;
 436   const uchar *cur;
 437
 438   do
 439     {
 440       cur = pfile->buffer->cur;
 441
 442       /* N.B. ISIDNUM does not include $.  */
 443       while (ISIDNUM (*cur))
 444         cur++;
 445
 446       pfile->buffer->cur = cur;
 447     }
 448   while (forms_identifier_p (pfile, false));
 449
 450   result = (cpp_hashnode *)
 451     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 452
 453   /* Rarely, identifiers require diagnostics when lexed.  */
 454   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 455                         && !pfile->state.skipping, 0))
 456     {
 457       /* It is allowed to poison the same identifier twice.  */
 458       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 459         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 460                    NODE_NAME (result));
 461
 462       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 463          replacement list of a variadic macro.  */
 464       if (result == pfile->spec_nodes.n__VA_ARGS__
 465           && !pfile->state.va_args_ok)
 466         cpp_error (pfile, DL_PEDWARN,
 467         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 468     }
 469
 470   return result;
 471 }
 472
 473 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 474 static void
 475 lex_number (pfile, number)
 476      cpp_reader *pfile;
 477      cpp_string *number;
 478 {
 479   const uchar *cur;
 480   const uchar *base;
 481   uchar *dest;
 482
 483   base = pfile->buffer->cur - 1;
 484   do
 485     {
 486       cur = pfile->buffer->cur;
 487
 488       /* N.B. ISIDNUM does not include $.  */
 489       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 490         cur++;
 491
 492       pfile->buffer->cur = cur;
 493     }
 494   while (forms_identifier_p (pfile, false));
 495
 496   number->len = cur - base;
 497   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 498   memcpy (dest, base, number->len);
 499   dest[number->len] = '\0';
 500   number->text = dest;
 501 }
 502
 503 /* Create a token of type TYPE with a literal spelling.  */
 504 static void
 505 create_literal (pfile, token, base, len, type)
 506      cpp_reader *pfile;
 507      cpp_token *token;
 508      const uchar *base;
 509      unsigned int len;
 510      enum cpp_ttype type;
 511 {
 512   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 513
 514   memcpy (dest, base, len);
 515   dest[len] = '\0';
 516   token->type = type;
 517   token->val.str.len = len;
 518   token->val.str.text = dest;
 519 }
 520
 521 /* Lexes a string, character constant, or angle-bracketed header file
 522    name.  The stored string contains the spelling, including opening
 523    quote and leading any leading 'L'.  It returns the type of the
 524    literal, or CPP_OTHER if it was not properly terminated.
 525
 526    The spelling is NUL-terminated, but it is not guaranteed that this
 527    is the first NUL since embedded NULs are preserved.  */
 528 static void
 529 lex_string (pfile, token, base)
 530      cpp_reader *pfile;
 531      cpp_token *token;
 532      const uchar *base;
 533 {
 534   bool saw_NUL = false;
 535   const uchar *cur;
 536   cppchar_t terminator;
 537   enum cpp_ttype type;
 538
 539   cur = base;
 540   terminator = *cur++;
 541   if (terminator == 'L')
 542     terminator = *cur++;
 543   if (terminator == '\"')
 544     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 545   else if (terminator == '\'')
 546     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 547   else
 548     terminator = '>', type = CPP_HEADER_NAME;
 549
 550   for (;;)
 551     {
 552       cppchar_t c = *cur++;
 553
 554       /* In #include-style directives, terminators are not escapable.  */
 555       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 556         cur++;
 557       else if (c == terminator)
 558         break;
 559       else if (c == '\n')
 560         {
 561           cur--;
 562           type = CPP_OTHER;
 563           break;
 564         }
 565       else if (c == '\0')
 566         saw_NUL = true;
 567     }
 568
 569   if (saw_NUL && !pfile->state.skipping)
 570     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 571
 572   pfile->buffer->cur = cur;
 573   create_literal (pfile, token, base, cur - base, type);
 574 }
 575
 576 /* The stored comment includes the comment start and any terminator.  */
 577 static void
 578 save_comment (pfile, token, from, type)
 579      cpp_reader *pfile;
 580      cpp_token *token;
 581      const unsigned char *from;
 582      cppchar_t type;
 583 {
 584   unsigned char *buffer;
 585   unsigned int len, clen;
 586
 587   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 588
 589   /* C++ comments probably (not definitely) have moved past a new
 590      line, which we don't want to save in the comment.  */
 591   if (is_vspace (pfile->buffer->cur[-1]))
 592     len--;
 593
 594   /* If we are currently in a directive, then we need to store all
 595      C++ comments as C comments internally, and so we need to
 596      allocate a little extra space in that case.
 597
 598      Note that the only time we encounter a directive here is
 599      when we are saving comments in a "#define".  */
 600   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 601
 602   buffer = _cpp_unaligned_alloc (pfile, clen);
 603
 604   token->type = CPP_COMMENT;
 605   token->val.str.len = clen;
 606   token->val.str.text = buffer;
 607
 608   buffer[0] = '/';
 609   memcpy (buffer + 1, from, len - 1);
 610
 611   /* Finish conversion to a C comment, if necessary.  */
 612   if (pfile->state.in_directive && type == '/')
 613     {
 614       buffer[1] = '*';
 615       buffer[clen - 2] = '*';
 616       buffer[clen - 1] = '/';
 617     }
 618 }
 619
 620 /* Allocate COUNT tokens for RUN.  */
 621 void
 622 _cpp_init_tokenrun (run, count)
 623      tokenrun *run;
 624      unsigned int count;
 625 {
 626   run->base = xnewvec (cpp_token, count);
 627   run->limit = run->base + count;
 628   run->next = NULL;
 629 }
 630
 631 /* Returns the next tokenrun, or creates one if there is none.  */
 632 static tokenrun *
 633 next_tokenrun (run)
 634      tokenrun *run;
 635 {
 636   if (run->next == NULL)
 637     {
 638       run->next = xnew (tokenrun);
 639       run->next->prev = run;
 640       _cpp_init_tokenrun (run->next, 250);
 641     }
 642
 643   return run->next;
 644 }
 645
 646 /* Allocate a single token that is invalidated at the same time as the
 647    rest of the tokens on the line.  Has its line and col set to the
 648    same as the last lexed token, so that diagnostics appear in the
 649    right place.  */
 650 cpp_token *
 651 _cpp_temp_token (pfile)
 652      cpp_reader *pfile;
 653 {
 654   cpp_token *old, *result;
 655
 656   old = pfile->cur_token - 1;
 657   if (pfile->cur_token == pfile->cur_run->limit)
 658     {
 659       pfile->cur_run = next_tokenrun (pfile->cur_run);
 660       pfile->cur_token = pfile->cur_run->base;
 661     }
 662
 663   result = pfile->cur_token++;
 664   result->line = old->line;
 665   result->col = old->col;
 666   return result;
 667 }
 668
 669 /* Lex a token into RESULT (external interface).  Takes care of issues
 670    like directive handling, token lookahead, multiple include
 671    optimization and skipping.  */
 672 const cpp_token *
 673 _cpp_lex_token (pfile)
 674      cpp_reader *pfile;
 675 {
 676   cpp_token *result;
 677
 678   for (;;)
 679     {
 680       if (pfile->cur_token == pfile->cur_run->limit)
 681         {
 682           pfile->cur_run = next_tokenrun (pfile->cur_run);
 683           pfile->cur_token = pfile->cur_run->base;
 684         }
 685
 686       if (pfile->lookaheads)
 687         {
 688           pfile->lookaheads--;
 689           result = pfile->cur_token++;
 690         }
 691       else
 692         result = _cpp_lex_direct (pfile);
 693
 694       if (result->flags & BOL)
 695         {
 696           /* Is this a directive.  If _cpp_handle_directive returns
 697              false, it is an assembler #.  */
 698           if (result->type == CPP_HASH
 699               /* 6.10.3 p 11: Directives in a list of macro arguments
 700                  gives undefined behavior.  This implementation
 701                  handles the directive as normal.  */
 702               && pfile->state.parsing_args != 1
 703               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 704             continue;
 705           if (pfile->cb.line_change && !pfile->state.skipping)
 706             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 707         }
 708
 709       /* We don't skip tokens in directives.  */
 710       if (pfile->state.in_directive)
 711         break;
 712
 713       /* Outside a directive, invalidate controlling macros.  At file
 714          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 715          get here and MI optimisation works.  */
 716       pfile->mi_valid = false;
 717
 718       if (!pfile->state.skipping || result->type == CPP_EOF)
 719         break;
 720     }
 721
 722   return result;
 723 }
 724
 725 /* Returns true if a fresh line has been loaded.  */
 726 bool
 727 _cpp_get_fresh_line (pfile)
 728      cpp_reader *pfile;
 729 {
 730   /* We can't get a new line until we leave the current directive.  */
 731   if (pfile->state.in_directive)
 732     return false;
 733
 734   for (;;)
 735     {
 736       cpp_buffer *buffer = pfile->buffer;
 737
 738       if (!buffer->need_line)
 739         return true;
 740
 741       if (buffer->next_line < buffer->rlimit)
 742         {
 743           _cpp_clean_line (pfile);
 744           return true;
 745         }
 746
 747       /* First, get out of parsing arguments state.  */
 748       if (pfile->state.parsing_args)
 749         return false;
 750
 751       /* End of buffer.  Non-empty files should end in a newline.  */
 752       if (buffer->buf != buffer->rlimit
 753           && buffer->next_line > buffer->rlimit
 754           && !buffer->from_stage3)
 755         {
 756           /* Only warn once.  */
 757           buffer->next_line = buffer->rlimit;
 758           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 759                                CPP_BUF_COLUMN (buffer, buffer->cur),
 760                                "no newline at end of file");
 761         }
 762
 763       if (!buffer->prev)
 764         return false;
 765
 766       if (buffer->return_at_eof)
 767         {
 768           _cpp_pop_buffer (pfile);
 769           return false;
 770         }
 771
 772       _cpp_pop_buffer (pfile);
 773     }
 774 }
 775
 776 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 777   do                                                    \
 778     {                                                   \
 779       result->type = ELSE_TYPE;                         \
 780       if (*buffer->cur == CHAR)                         \
 781         buffer->cur++, result->type = THEN_TYPE;        \
 782     }                                                   \
 783   while (0)
 784
 785 /* Lex a token into pfile->cur_token, which is also incremented, to
 786    get diagnostics pointing to the correct location.
 787
 788    Does not handle issues such as token lookahead, multiple-include
 789    optimisation, directives, skipping etc.  This function is only
 790    suitable for use by _cpp_lex_token, and in special cases like
 791    lex_expansion_token which doesn't care for any of these issues.
 792
 793    When meeting a newline, returns CPP_EOF if parsing a directive,
 794    otherwise returns to the start of the token buffer if permissible.
 795    Returns the location of the lexed token.  */
 796 cpp_token *
 797 _cpp_lex_direct (pfile)
 798      cpp_reader *pfile;
 799 {
 800   cppchar_t c;
 801   cpp_buffer *buffer;
 802   const unsigned char *comment_start;
 803   cpp_token *result = pfile->cur_token++;
 804
 805  fresh_line:
 806   result->flags = 0;
 807   if (pfile->buffer->need_line)
 808     {
 809       if (!_cpp_get_fresh_line (pfile))
 810         {
 811           result->type = CPP_EOF;
 812           if (!pfile->state.in_directive)
 813             {
 814               /* Tell the compiler the line number of the EOF token.  */
 815               result->line = pfile->line;
 816               result->flags = BOL;
 817             }
 818           return result;
 819         }
 820       if (!pfile->keep_tokens)
 821         {
 822           pfile->cur_run = &pfile->base_run;
 823           result = pfile->base_run.base;
 824           pfile->cur_token = result + 1;
 825         }
 826       result->flags = BOL;
 827       if (pfile->state.parsing_args == 2)
 828         result->flags |= PREV_WHITE;
 829     }
 830   buffer = pfile->buffer;
 831  update_tokens_line:
 832   result->line = pfile->line;
 833
 834  skipped_white:
 835   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 836       && !pfile->overlaid_buffer)
 837     {
 838       _cpp_process_line_notes (pfile, false);
 839       result->line = pfile->line;
 840     }
 841   c = *buffer->cur++;
 842   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 843
 844   switch (c)
 845     {
 846     case ' ': case '\t': case '\f': case '\v': case '\0':
 847       result->flags |= PREV_WHITE;
 848       skip_whitespace (pfile, c);
 849       goto skipped_white;
 850
 851     case '\n':
 852       pfile->line++;
 853       buffer->need_line = true;
 854       goto fresh_line;
 855
 856     case '0': case '1': case '2': case '3': case '4':
 857     case '5': case '6': case '7': case '8': case '9':
 858       result->type = CPP_NUMBER;
 859       lex_number (pfile, &result->val.str);
 860       break;
 861
 862     case 'L':
 863       /* 'L' may introduce wide characters or strings.  */
 864       if (*buffer->cur == '\'' || *buffer->cur == '"')
 865         {
 866           lex_string (pfile, result, buffer->cur - 1);
 867           break;
 868         }
 869       /* Fall through.  */
 870
 871     case '_':
 872     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 873     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 874     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 875     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 876     case 'y': case 'z':
 877     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 878     case 'G': case 'H': case 'I': case 'J': case 'K':
 879     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 880     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 881     case 'Y': case 'Z':
 882       result->type = CPP_NAME;
 883       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 884
 885       /* Convert named operators to their proper types.  */
 886       if (result->val.node->flags & NODE_OPERATOR)
 887         {
 888           result->flags |= NAMED_OP;
 889           result->type = result->val.node->directive_index;
 890         }
 891       break;
 892
 893     case '\'':
 894     case '"':
 895       lex_string (pfile, result, buffer->cur - 1);
 896       break;
 897
 898     case '/':
 899       /* A potential block or line comment.  */
 900       comment_start = buffer->cur;
 901       c = *buffer->cur;
 902
 903       if (c == '*')
 904         {
 905           if (_cpp_skip_block_comment (pfile))
 906             cpp_error (pfile, DL_ERROR, "unterminated comment");
 907         }
 908       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 909                             || CPP_IN_SYSTEM_HEADER (pfile)))
 910         {
 911           /* Warn about comments only if pedantically GNUC89, and not
 912              in system headers.  */
 913           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 914               && ! buffer->warned_cplusplus_comments)
 915             {
 916               cpp_error (pfile, DL_PEDWARN,
 917                          "C++ style comments are not allowed in ISO C90");
 918               cpp_error (pfile, DL_PEDWARN,
 919                          "(this will be reported only once per input file)");
 920               buffer->warned_cplusplus_comments = 1;
 921             }
 922
 923           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 924             cpp_error (pfile, DL_WARNING, "multi-line comment");
 925         }
 926       else if (c == '=')
 927         {
 928           buffer->cur++;
 929           result->type = CPP_DIV_EQ;
 930           break;
 931         }
 932       else
 933         {
 934           result->type = CPP_DIV;
 935           break;
 936         }
 937
 938       if (!pfile->state.save_comments)
 939         {
 940           result->flags |= PREV_WHITE;
 941           goto update_tokens_line;
 942         }
 943
 944       /* Save the comment as a token in its own right.  */
 945       save_comment (pfile, result, comment_start, c);
 946       break;
 947
 948     case '<':
 949       if (pfile->state.angled_headers)
 950         {
 951           lex_string (pfile, result, buffer->cur - 1);
 952           break;
 953         }
 954
 955       result->type = CPP_LESS;
 956       if (*buffer->cur == '=')
 957         buffer->cur++, result->type = CPP_LESS_EQ;
 958       else if (*buffer->cur == '<')
 959         {
 960           buffer->cur++;
 961           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 962         }
 963       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 964         {
 965           buffer->cur++;
 966           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 967         }
 968       else if (CPP_OPTION (pfile, digraphs))
 969         {
 970           if (*buffer->cur == ':')
 971             {
 972               buffer->cur++;
 973               result->flags |= DIGRAPH;
 974               result->type = CPP_OPEN_SQUARE;
 975             }
 976           else if (*buffer->cur == '%')
 977             {
 978               buffer->cur++;
 979               result->flags |= DIGRAPH;
 980               result->type = CPP_OPEN_BRACE;
 981             }
 982         }
 983       break;
 984
 985     case '>':
 986       result->type = CPP_GREATER;
 987       if (*buffer->cur == '=')
 988         buffer->cur++, result->type = CPP_GREATER_EQ;
 989       else if (*buffer->cur == '>')
 990         {
 991           buffer->cur++;
 992           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
 993         }
 994       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 995         {
 996           buffer->cur++;
 997           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
 998         }
 999       break;
1000
1001     case '%':
1002       result->type = CPP_MOD;
1003       if (*buffer->cur == '=')
1004         buffer->cur++, result->type = CPP_MOD_EQ;
1005       else if (CPP_OPTION (pfile, digraphs))
1006         {
1007           if (*buffer->cur == ':')
1008             {
1009               buffer->cur++;
1010               result->flags |= DIGRAPH;
1011               result->type = CPP_HASH;
1012               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1013                 buffer->cur += 2, result->type = CPP_PASTE;
1014             }
1015           else if (*buffer->cur == '>')
1016             {
1017               buffer->cur++;
1018               result->flags |= DIGRAPH;
1019               result->type = CPP_CLOSE_BRACE;
1020             }
1021         }
1022       break;
1023
1024     case '.':
1025       result->type = CPP_DOT;
1026       if (ISDIGIT (*buffer->cur))
1027         {
1028           result->type = CPP_NUMBER;
1029           lex_number (pfile, &result->val.str);
1030         }
1031       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1032         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1033       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1034         buffer->cur++, result->type = CPP_DOT_STAR;
1035       break;
1036
1037     case '+':
1038       result->type = CPP_PLUS;
1039       if (*buffer->cur == '+')
1040         buffer->cur++, result->type = CPP_PLUS_PLUS;
1041       else if (*buffer->cur == '=')
1042         buffer->cur++, result->type = CPP_PLUS_EQ;
1043       break;
1044
1045     case '-':
1046       result->type = CPP_MINUS;
1047       if (*buffer->cur == '>')
1048         {
1049           buffer->cur++;
1050           result->type = CPP_DEREF;
1051           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1052             buffer->cur++, result->type = CPP_DEREF_STAR;
1053         }
1054       else if (*buffer->cur == '-')
1055         buffer->cur++, result->type = CPP_MINUS_MINUS;
1056       else if (*buffer->cur == '=')
1057         buffer->cur++, result->type = CPP_MINUS_EQ;
1058       break;
1059
1060     case '&':
1061       result->type = CPP_AND;
1062       if (*buffer->cur == '&')
1063         buffer->cur++, result->type = CPP_AND_AND;
1064       else if (*buffer->cur == '=')
1065         buffer->cur++, result->type = CPP_AND_EQ;
1066       break;
1067
1068     case '|':
1069       result->type = CPP_OR;
1070       if (*buffer->cur == '|')
1071         buffer->cur++, result->type = CPP_OR_OR;
1072       else if (*buffer->cur == '=')
1073         buffer->cur++, result->type = CPP_OR_EQ;
1074       break;
1075
1076     case ':':
1077       result->type = CPP_COLON;
1078       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1079         buffer->cur++, result->type = CPP_SCOPE;
1080       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1081         {
1082           buffer->cur++;
1083           result->flags |= DIGRAPH;
1084           result->type = CPP_CLOSE_SQUARE;
1085         }
1086       break;
1087
1088     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1089     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1090     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1091     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1092     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1093
1094     case '?': result->type = CPP_QUERY; break;
1095     case '~': result->type = CPP_COMPL; break;
1096     case ',': result->type = CPP_COMMA; break;
1097     case '(': result->type = CPP_OPEN_PAREN; break;
1098     case ')': result->type = CPP_CLOSE_PAREN; break;
1099     case '[': result->type = CPP_OPEN_SQUARE; break;
1100     case ']': result->type = CPP_CLOSE_SQUARE; break;
1101     case '{': result->type = CPP_OPEN_BRACE; break;
1102     case '}': result->type = CPP_CLOSE_BRACE; break;
1103     case ';': result->type = CPP_SEMICOLON; break;
1104
1105       /* @ is a punctuator in Objective-C.  */
1106     case '@': result->type = CPP_ATSIGN; break;
1107
1108     case '$':
1109     case '\\':
1110       {
1111         const uchar *base = --buffer->cur;
1112
1113         if (forms_identifier_p (pfile, true))
1114           {
1115             result->type = CPP_NAME;
1116             result->val.node = lex_identifier (pfile, base);
1117             break;
1118           }
1119         buffer->cur++;
1120       }
1121
1122     default:
1123       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1124       break;
1125     }
1126
1127   return result;
1128 }
1129
1130 /* An upper bound on the number of bytes needed to spell TOKEN.
1131    Does not include preceding whitespace.  */
1132 unsigned int
1133 cpp_token_len (token)
1134      const cpp_token *token;
1135 {
1136   unsigned int len;
1137
1138   switch (TOKEN_SPELL (token))
1139     {
1140     default:            len = 4;                                break;
1141     case SPELL_LITERAL: len = token->val.str.len;               break;
1142     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1143     }
1144
1145   return len;
1146 }
1147
1148 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1149    already contain the enough space to hold the token's spelling.
1150    Returns a pointer to the character after the last character
1151    written.  */
1152 unsigned char *
1153 cpp_spell_token (pfile, token, buffer)
1154      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1155      const cpp_token *token;
1156      unsigned char *buffer;
1157 {
1158   switch (TOKEN_SPELL (token))
1159     {
1160     case SPELL_OPERATOR:
1161       {
1162         const unsigned char *spelling;
1163         unsigned char c;
1164
1165         if (token->flags & DIGRAPH)
1166           spelling
1167             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1168         else if (token->flags & NAMED_OP)
1169           goto spell_ident;
1170         else
1171           spelling = TOKEN_NAME (token);
1172
1173         while ((c = *spelling++) != '\0')
1174           *buffer++ = c;
1175       }
1176       break;
1177
1178     spell_ident:
1179     case SPELL_IDENT:
1180       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1181       buffer += NODE_LEN (token->val.node);
1182       break;
1183
1184     case SPELL_LITERAL:
1185       memcpy (buffer, token->val.str.text, token->val.str.len);
1186       buffer += token->val.str.len;
1187       break;
1188
1189     case SPELL_NONE:
1190       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1191       break;
1192     }
1193
1194   return buffer;
1195 }
1196
1197 /* Returns TOKEN spelt as a null-terminated string.  The string is
1198    freed when the reader is destroyed.  Useful for diagnostics.  */
1199 unsigned char *
1200 cpp_token_as_text (pfile, token)
1201      cpp_reader *pfile;
1202      const cpp_token *token;
1203 {
1204   unsigned int len = cpp_token_len (token) + 1;
1205   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1206
1207   end = cpp_spell_token (pfile, token, start);
1208   end[0] = '\0';
1209
1210   return start;
1211 }
1212
1213 /* Used by C front ends, which really should move to using
1214    cpp_token_as_text.  */
1215 const char *
1216 cpp_type2name (type)
1217      enum cpp_ttype type;
1218 {
1219   return (const char *) token_spellings[type].name;
1220 }
1221
1222 /* Writes the spelling of token to FP, without any preceding space.
1223    Separated from cpp_spell_token for efficiency - to avoid stdio
1224    double-buffering.  */
1225 void
1226 cpp_output_token (token, fp)
1227      const cpp_token *token;
1228      FILE *fp;
1229 {
1230   switch (TOKEN_SPELL (token))
1231     {
1232     case SPELL_OPERATOR:
1233       {
1234         const unsigned char *spelling;
1235         int c;
1236
1237         if (token->flags & DIGRAPH)
1238           spelling
1239             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1240         else if (token->flags & NAMED_OP)
1241           goto spell_ident;
1242         else
1243           spelling = TOKEN_NAME (token);
1244
1245         c = *spelling;
1246         do
1247           putc (c, fp);
1248         while ((c = *++spelling) != '\0');
1249       }
1250       break;
1251
1252     spell_ident:
1253     case SPELL_IDENT:
1254       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1255     break;
1256
1257     case SPELL_LITERAL:
1258       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1259       break;
1260
1261     case SPELL_NONE:
1262       /* An error, most probably.  */
1263       break;
1264     }
1265 }
1266
1267 /* Compare two tokens.  */
1268 int
1269 _cpp_equiv_tokens (a, b)
1270      const cpp_token *a, *b;
1271 {
1272   if (a->type == b->type && a->flags == b->flags)
1273     switch (TOKEN_SPELL (a))
1274       {
1275       default:                  /* Keep compiler happy.  */
1276       case SPELL_OPERATOR:
1277         return 1;
1278       case SPELL_NONE:
1279         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1280       case SPELL_IDENT:
1281         return a->val.node == b->val.node;
1282       case SPELL_LITERAL:
1283         return (a->val.str.len == b->val.str.len
1284                 && !memcmp (a->val.str.text, b->val.str.text,
1285                             a->val.str.len));
1286       }
1287
1288   return 0;
1289 }
1290
1291 /* Returns nonzero if a space should be inserted to avoid an
1292    accidental token paste for output.  For simplicity, it is
1293    conservative, and occasionally advises a space where one is not
1294    needed, e.g. "." and ".2".  */
1295 int
1296 cpp_avoid_paste (pfile, token1, token2)
1297      cpp_reader *pfile;
1298      const cpp_token *token1, *token2;
1299 {
1300   enum cpp_ttype a = token1->type, b = token2->type;
1301   cppchar_t c;
1302
1303   if (token1->flags & NAMED_OP)
1304     a = CPP_NAME;
1305   if (token2->flags & NAMED_OP)
1306     b = CPP_NAME;
1307
1308   c = EOF;
1309   if (token2->flags & DIGRAPH)
1310     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1311   else if (token_spellings[b].category == SPELL_OPERATOR)
1312     c = token_spellings[b].name[0];
1313
1314   /* Quickly get everything that can paste with an '='.  */
1315   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1316     return 1;
1317
1318   switch (a)
1319     {
1320     case CPP_GREATER:   return c == '>' || c == '?';
1321     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1322     case CPP_PLUS:      return c == '+';
1323     case CPP_MINUS:     return c == '-' || c == '>';
1324     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1325     case CPP_MOD:       return c == ':' || c == '>';
1326     case CPP_AND:       return c == '&';
1327     case CPP_OR:        return c == '|';
1328     case CPP_COLON:     return c == ':' || c == '>';
1329     case CPP_DEREF:     return c == '*';
1330     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1331     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1332     case CPP_NAME:      return ((b == CPP_NUMBER
1333                                  && name_p (pfile, &token2->val.str))
1334                                 || b == CPP_NAME
1335                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1336     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1337                                 || c == '.' || c == '+' || c == '-');
1338                                       /* UCNs */
1339     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1340                                  && b == CPP_NAME)
1341                                 || (CPP_OPTION (pfile, objc)
1342                                     && token1->val.str.text[0] == '@'
1343                                     && (b == CPP_NAME || b == CPP_STRING)));
1344     default:            break;
1345     }
1346
1347   return 0;
1348 }
1349
1350 /* Output all the remaining tokens on the current line, and a newline
1351    character, to FP.  Leading whitespace is removed.  If there are
1352    macros, special token padding is not performed.  */
1353 void
1354 cpp_output_line (pfile, fp)
1355      cpp_reader *pfile;
1356      FILE *fp;
1357 {
1358   const cpp_token *token;
1359
1360   token = cpp_get_token (pfile);
1361   while (token->type != CPP_EOF)
1362     {
1363       cpp_output_token (token, fp);
1364       token = cpp_get_token (pfile);
1365       if (token->flags & PREV_WHITE)
1366         putc (' ', fp);
1367     }
1368
1369   putc ('\n', fp);
1370 }
1371
1372 /* Returns the value of a hexadecimal digit.  */
1373 static unsigned int
1374 hex_digit_value (c)
1375      unsigned int c;
1376 {
1377   if (hex_p (c))
1378     return hex_value (c);
1379   else
1380     abort ();
1381 }
1382
1383 /* Read a possible universal character name starting at *PSTR.  */
1384 static cppchar_t
1385 maybe_read_ucn (pfile, pstr)
1386      cpp_reader *pfile;
1387      const uchar **pstr;
1388 {
1389   cppchar_t result, c = (*pstr)[-1];
1390
1391   result = _cpp_valid_ucn (pfile, pstr, false);
1392   if (result)
1393     {
1394       if (CPP_WTRADITIONAL (pfile))
1395         cpp_error (pfile, DL_WARNING,
1396                    "the meaning of '\\%c' is different in traditional C",
1397                    (int) c);
1398
1399       if (CPP_OPTION (pfile, EBCDIC))
1400         {
1401           cpp_error (pfile, DL_ERROR,
1402                      "universal character with an EBCDIC target");
1403           result = 0x3f;  /* EBCDIC invalid character */
1404         }
1405     }
1406
1407   return result;
1408 }
1409
1410 /* Returns the value of an escape sequence, truncated to the correct
1411    target precision.  PSTR points to the input pointer, which is just
1412    after the backslash.  LIMIT is how much text we have.  WIDE is true
1413    if the escape sequence is part of a wide character constant or
1414    string literal.  Handles all relevant diagnostics.  */
1415 cppchar_t
1416 cpp_parse_escape (pfile, pstr, limit, wide)
1417      cpp_reader *pfile;
1418      const unsigned char **pstr;
1419      const unsigned char *limit;
1420      int wide;
1421 {
1422   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1423   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1424   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1425
1426   int unknown = 0;
1427   const unsigned char *str = *pstr, *charconsts;
1428   cppchar_t c, ucn, mask;
1429   unsigned int width;
1430
1431   if (CPP_OPTION (pfile, EBCDIC))
1432     charconsts = ebcdic;
1433   else
1434     charconsts = ascii;
1435
1436   if (wide)
1437     width = CPP_OPTION (pfile, wchar_precision);
1438   else
1439     width = CPP_OPTION (pfile, char_precision);
1440   if (width < BITS_PER_CPPCHAR_T)
1441     mask = ((cppchar_t) 1 << width) - 1;
1442   else
1443     mask = ~0;
1444
1445   c = *str++;
1446   switch (c)
1447     {
1448     case '\\': case '\'': case '"': case '?': break;
1449     case 'b': c = charconsts[1];  break;
1450     case 'f': c = charconsts[3];  break;
1451     case 'n': c = charconsts[4];  break;
1452     case 'r': c = charconsts[5];  break;
1453     case 't': c = charconsts[6];  break;
1454     case 'v': c = charconsts[7];  break;
1455
1456     case '(': case '{': case '[': case '%':
1457       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1458          '\%' is used to prevent SCCS from getting confused.  */
1459       unknown = CPP_PEDANTIC (pfile);
1460       break;
1461
1462     case 'a':
1463       if (CPP_WTRADITIONAL (pfile))
1464         cpp_error (pfile, DL_WARNING,
1465                    "the meaning of '\\a' is different in traditional C");
1466       c = charconsts[0];
1467       break;
1468
1469     case 'e': case 'E':
1470       if (CPP_PEDANTIC (pfile))
1471         cpp_error (pfile, DL_PEDWARN,
1472                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1473       c = charconsts[2];
1474       break;
1475
1476     case 'u': case 'U':
1477       ucn = maybe_read_ucn (pfile, &str);
1478       if (ucn)
1479         c = ucn;
1480       else
1481         unknown = true;
1482       break;
1483
1484     case 'x':
1485       if (CPP_WTRADITIONAL (pfile))
1486         cpp_error (pfile, DL_WARNING,
1487                    "the meaning of '\\x' is different in traditional C");
1488
1489       {
1490         cppchar_t i = 0, overflow = 0;
1491         int digits_found = 0;
1492
1493         while (str < limit)
1494           {
1495             c = *str;
1496             if (! ISXDIGIT (c))
1497               break;
1498             str++;
1499             overflow |= i ^ (i << 4 >> 4);
1500             i = (i << 4) + hex_digit_value (c);
1501             digits_found = 1;
1502           }
1503
1504         if (!digits_found)
1505           cpp_error (pfile, DL_ERROR,
1506                        "\\x used with no following hex digits");
1507
1508         if (overflow | (i != (i & mask)))
1509           {
1510             cpp_error (pfile, DL_PEDWARN,
1511                        "hex escape sequence out of range");
1512             i &= mask;
1513           }
1514         c = i;
1515       }
1516       break;
1517
1518     case '0':  case '1':  case '2':  case '3':
1519     case '4':  case '5':  case '6':  case '7':
1520       {
1521         size_t count = 0;
1522         cppchar_t i = c - '0';
1523
1524         while (str < limit && ++count < 3)
1525           {
1526             c = *str;
1527             if (c < '0' || c > '7')
1528               break;
1529             str++;
1530             i = (i << 3) + c - '0';
1531           }
1532
1533         if (i != (i & mask))
1534           {
1535             cpp_error (pfile, DL_PEDWARN,
1536                        "octal escape sequence out of range");
1537             i &= mask;
1538           }
1539         c = i;
1540       }
1541       break;
1542
1543     default:
1544       unknown = 1;
1545       break;
1546     }
1547
1548   if (unknown)
1549     {
1550       if (ISGRAPH (c))
1551         cpp_error (pfile, DL_PEDWARN,
1552                    "unknown escape sequence '\\%c'", (int) c);
1553       else
1554         cpp_error (pfile, DL_PEDWARN,
1555                    "unknown escape sequence: '\\%03o'", (int) c);
1556     }
1557
1558   if (c > mask)
1559     {
1560       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1561       c &= mask;
1562     }
1563
1564   *pstr = str;
1565   return c;
1566 }
1567
1568 /* Interpret a (possibly wide) character constant in TOKEN.
1569    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1570    points to a variable that is filled in with the number of
1571    characters seen, and UNSIGNEDP to a variable that indicates whether
1572    the result has signed type.  */
1573 cppchar_t
1574 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1575      cpp_reader *pfile;
1576      const cpp_token *token;
1577      unsigned int *pchars_seen;
1578      int *unsignedp;
1579 {
1580   const unsigned char *str, *limit;
1581   unsigned int chars_seen = 0;
1582   size_t width, max_chars;
1583   cppchar_t c, mask, result = 0;
1584   bool unsigned_p;
1585
1586   str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1587   limit = token->val.str.text + token->val.str.len - 1;
1588
1589   if (token->type == CPP_CHAR)
1590     {
1591       width = CPP_OPTION (pfile, char_precision);
1592       max_chars = CPP_OPTION (pfile, int_precision) / width;
1593       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1594     }
1595   else
1596     {
1597       width = CPP_OPTION (pfile, wchar_precision);
1598       max_chars = 1;
1599       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1600     }
1601
1602   if (width < BITS_PER_CPPCHAR_T)
1603     mask = ((cppchar_t) 1 << width) - 1;
1604   else
1605     mask = ~0;
1606
1607   while (str < limit)
1608     {
1609       c = *str++;
1610
1611       if (c == '\\')
1612         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1613
1614 #ifdef MAP_CHARACTER
1615       if (ISPRINT (c))
1616         c = MAP_CHARACTER (c);
1617 #endif
1618
1619       chars_seen++;
1620
1621       /* Truncate the character, scale the result and merge the two.  */
1622       c &= mask;
1623       if (width < BITS_PER_CPPCHAR_T)
1624         result = (result << width) | c;
1625       else
1626         result = c;
1627     }
1628
1629   if (chars_seen == 0)
1630     cpp_error (pfile, DL_ERROR, "empty character constant");
1631   else if (chars_seen > 1)
1632     {
1633       /* Multichar charconsts are of type int and therefore signed.  */
1634       unsigned_p = 0;
1635
1636       if (chars_seen > max_chars)
1637         {
1638           chars_seen = max_chars;
1639           cpp_error (pfile, DL_WARNING,
1640                      "character constant too long for its type");
1641         }
1642       else if (CPP_OPTION (pfile, warn_multichar))
1643         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1644     }
1645
1646   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1647      in WIDTH bits, but for multi-char charconsts it's value is the
1648      full target type's width.  */
1649   if (chars_seen > 1)
1650     width *= max_chars;
1651   if (width < BITS_PER_CPPCHAR_T)
1652     {
1653       mask = ((cppchar_t) 1 << width) - 1;
1654       if (unsigned_p || !(result & (1 << (width - 1))))
1655         result &= mask;
1656       else
1657         result |= ~mask;
1658     }
1659
1660   *pchars_seen = chars_seen;
1661   *unsignedp = unsigned_p;
1662   return result;
1663 }
1664
1665 /* Memory buffers.  Changing these three constants can have a dramatic
1666    effect on performance.  The values here are reasonable defaults,
1667    but might be tuned.  If you adjust them, be sure to test across a
1668    range of uses of cpplib, including heavy nested function-like macro
1669    expansion.  Also check the change in peak memory usage (NJAMD is a
1670    good tool for this).  */
1671 #define MIN_BUFF_SIZE 8000
1672 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1673 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1674         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1675
1676 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1677   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1678 #endif
1679
1680 /* Create a new allocation buffer.  Place the control block at the end
1681    of the buffer, so that buffer overflows will cause immediate chaos.  */
1682 static _cpp_buff *
1683 new_buff (len)
1684      size_t len;
1685 {
1686   _cpp_buff *result;
1687   unsigned char *base;
1688
1689   if (len < MIN_BUFF_SIZE)
1690     len = MIN_BUFF_SIZE;
1691   len = CPP_ALIGN (len);
1692
1693   base = xmalloc (len + sizeof (_cpp_buff));
1694   result = (_cpp_buff *) (base + len);
1695   result->base = base;
1696   result->cur = base;
1697   result->limit = base + len;
1698   result->next = NULL;
1699   return result;
1700 }
1701
1702 /* Place a chain of unwanted allocation buffers on the free list.  */
1703 void
1704 _cpp_release_buff (pfile, buff)
1705      cpp_reader *pfile;
1706      _cpp_buff *buff;
1707 {
1708   _cpp_buff *end = buff;
1709
1710   while (end->next)
1711     end = end->next;
1712   end->next = pfile->free_buffs;
1713   pfile->free_buffs = buff;
1714 }
1715
1716 /* Return a free buffer of size at least MIN_SIZE.  */
1717 _cpp_buff *
1718 _cpp_get_buff (pfile, min_size)
1719      cpp_reader *pfile;
1720      size_t min_size;
1721 {
1722   _cpp_buff *result, **p;
1723
1724   for (p = &pfile->free_buffs;; p = &(*p)->next)
1725     {
1726       size_t size;
1727
1728       if (*p == NULL)
1729         return new_buff (min_size);
1730       result = *p;
1731       size = result->limit - result->base;
1732       /* Return a buffer that's big enough, but don't waste one that's
1733          way too big.  */
1734       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1735         break;
1736     }
1737
1738   *p = result->next;
1739   result->next = NULL;
1740   result->cur = result->base;
1741   return result;
1742 }
1743
1744 /* Creates a new buffer with enough space to hold the uncommitted
1745    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1746    the excess bytes to the new buffer.  Chains the new buffer after
1747    BUFF, and returns the new buffer.  */
1748 _cpp_buff *
1749 _cpp_append_extend_buff (pfile, buff, min_extra)
1750      cpp_reader *pfile;
1751      _cpp_buff *buff;
1752      size_t min_extra;
1753 {
1754   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1755   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1756
1757   buff->next = new_buff;
1758   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1759   return new_buff;
1760 }
1761
1762 /* Creates a new buffer with enough space to hold the uncommitted
1763    remaining bytes of the buffer pointed to by BUFF, and at least
1764    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1765    Chains the new buffer before the buffer pointed to by BUFF, and
1766    updates the pointer to point to the new buffer.  */
1767 void
1768 _cpp_extend_buff (pfile, pbuff, min_extra)
1769      cpp_reader *pfile;
1770      _cpp_buff **pbuff;
1771      size_t min_extra;
1772 {
1773   _cpp_buff *new_buff, *old_buff = *pbuff;
1774   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1775
1776   new_buff = _cpp_get_buff (pfile, size);
1777   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1778   new_buff->next = old_buff;
1779   *pbuff = new_buff;
1780 }
1781
1782 /* Free a chain of buffers starting at BUFF.  */
1783 void
1784 _cpp_free_buff (buff)
1785      _cpp_buff *buff;
1786 {
1787   _cpp_buff *next;
1788
1789   for (; buff; buff = next)
1790     {
1791       next = buff->next;
1792       free (buff->base);
1793     }
1794 }
1795
1796 /* Allocate permanent, unaligned storage of length LEN.  */
1797 unsigned char *
1798 _cpp_unaligned_alloc (pfile, len)
1799      cpp_reader *pfile;
1800      size_t len;
1801 {
1802   _cpp_buff *buff = pfile->u_buff;
1803   unsigned char *result = buff->cur;
1804
1805   if (len > (size_t) (buff->limit - result))
1806     {
1807       buff = _cpp_get_buff (pfile, len);
1808       buff->next = pfile->u_buff;
1809       pfile->u_buff = buff;
1810       result = buff->cur;
1811     }
1812
1813   buff->cur = result + len;
1814   return result;
1815 }
1816
1817 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1818    That buffer is used for growing allocations when saving macro
1819    replacement lists in a #define, and when parsing an answer to an
1820    assertion in #assert, #unassert or #if (and therefore possibly
1821    whilst expanding macros).  It therefore must not be used by any
1822    code that they might call: specifically the lexer and the guts of
1823    the macro expander.
1824
1825    All existing other uses clearly fit this restriction: storing
1826    registered pragmas during initialization.  */
1827 unsigned char *
1828 _cpp_aligned_alloc (pfile, len)
1829      cpp_reader *pfile;
1830      size_t len;
1831 {
1832   _cpp_buff *buff = pfile->a_buff;
1833   unsigned char *result = buff->cur;
1834
1835   if (len > (size_t) (buff->limit - result))
1836     {
1837       buff = _cpp_get_buff (pfile, len);
1838       buff->next = pfile->a_buff;
1839       pfile->a_buff = buff;
1840       result = buff->cur;
1841     }
1842
1843   buff->cur = result + len;
1844   return result;
1845 }