gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, U s           },
  47 #define TK(e, s) { s,              U STRINGX (e) },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  56 static int skip_line_comment PARAMS ((cpp_reader *));
  57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
  60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
  61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  63                                   cppchar_t));
  64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  65                                     unsigned int, enum cpp_ttype));
  66 static bool warn_in_comment PARAMS ((cpp_reader *, _cpp_line_note *));
  67 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  68 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
  69 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  70
  71 static unsigned int hex_digit_value PARAMS ((unsigned int));
  72 static _cpp_buff *new_buff PARAMS ((size_t));
  73
  74
  75 /* Utility routine:
  76
  77    Compares, the token TOKEN to the NUL-terminated string STRING.
  78    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  79 int
  80 cpp_ideq (token, string)
  81      const cpp_token *token;
  82      const char *string;
  83 {
  84   if (token->type != CPP_NAME)
  85     return 0;
  86
  87   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  88 }
  89
  90 /* Record a note TYPE at byte POS into the current cleaned logical
  91    line.  */
  92 static void
  93 add_line_note (buffer, pos, type)
  94      cpp_buffer *buffer;
  95      const uchar *pos;
  96      unsigned int type;
  97 {
  98   if (buffer->notes_used == buffer->notes_cap)
  99     {
 100       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 101       buffer->notes = (_cpp_line_note *)
 102         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 103     }
 104
 105   buffer->notes[buffer->notes_used].pos = pos;
 106   buffer->notes[buffer->notes_used].type = type;
 107   buffer->notes_used++;
 108 }
 109
 110 /* Returns with a logical line that contains no escaped newlines or
 111    trigraphs.  This is a time-critical inner loop.  */
 112 void
 113 _cpp_clean_line (pfile)
 114      cpp_reader *pfile;
 115 {
 116   cpp_buffer *buffer;
 117   const uchar *s;
 118   uchar c, *d, *p;
 119
 120   buffer = pfile->buffer;
 121   buffer->cur_note = buffer->notes_used = 0;
 122   buffer->cur = buffer->line_base = buffer->next_line;
 123   buffer->need_line = false;
 124   s = buffer->next_line - 1;
 125
 126   if (!buffer->from_stage3)
 127     {
 128       d = (uchar *) s;
 129
 130       for (;;)
 131         {
 132           c = *++s;
 133           *++d = c;
 134
 135           if (c == '\n' || c == '\r')
 136             {
 137                   /* Handle DOS line endings.  */
 138               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 139                 s++;
 140               if (s == buffer->rlimit)
 141                 break;
 142
 143               /* Escaped?  */
 144               p = d;
 145               while (p != buffer->next_line && is_nvspace (p[-1]))
 146                 p--;
 147               if (p == buffer->next_line || p[-1] != '\\')
 148                 break;
 149
 150               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 151               d = p - 2;
 152               buffer->next_line = p - 1;
 153             }
 154           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 155             {
 156               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 157               add_line_note (buffer, d, s[2]);
 158               if (CPP_OPTION (pfile, trigraphs))
 159                 {
 160                   *d = _cpp_trigraph_map[s[2]];
 161                   s += 2;
 162                 }
 163             }
 164         }
 165     }
 166   else
 167     {
 168       do
 169         s++;
 170       while (*s != '\n' && *s != '\r');
 171       d = (uchar *) s;
 172
 173       /* Handle DOS line endings.  */
 174       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 175         s++;
 176     }
 177
 178   *d = '\n';
 179   /* A sentinel note that should never be processed.  */
 180   add_line_note (buffer, d + 1, '\n');
 181   buffer->next_line = s + 1;
 182 }
 183
 184 /* Return true if the trigraph indicated by NOTE should be warned
 185    about in a comment.  */
 186 static bool
 187 warn_in_comment (pfile, note)
 188      cpp_reader *pfile;
 189      _cpp_line_note *note;
 190 {
 191   const uchar *p;
 192
 193   /* Within comments we don't warn about trigraphs, unless the
 194      trigraph forms an escaped newline, as that may change
 195      behaviour.  */
 196   if (note->type != '/')
 197     return false;
 198
 199   /* If -trigraphs, then this was an escaped newline iff the next note
 200      is coincident.  */
 201   if (CPP_OPTION (pfile, trigraphs))
 202     return note[1].pos == note->pos;
 203
 204   /* Otherwise, see if this forms an escaped newline.  */
 205   p = note->pos + 3;
 206   while (is_nvspace (*p))
 207     p++;
 208
 209   /* There might have been escaped newlines between the trigraph and the
 210      newline we found.  Hence the position test.  */
 211   return (*p == '\n' && p < note[1].pos);
 212 }
 213
 214 /* Process the notes created by add_line_note as far as the current
 215    location.  */
 216 void
 217 _cpp_process_line_notes (pfile, in_comment)
 218      cpp_reader *pfile;
 219      int in_comment;
 220 {
 221   cpp_buffer *buffer = pfile->buffer;
 222
 223   for (;;)
 224     {
 225       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 226       unsigned int col;
 227
 228       if (note->pos > buffer->cur)
 229         break;
 230
 231       buffer->cur_note++;
 232       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 233
 234       if (note->type == '\\' || note->type == ' ')
 235         {
 236           if (note->type == ' ' && !in_comment)
 237             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 238                                  "backslash and newline separated by space");
 239
 240           if (buffer->next_line > buffer->rlimit)
 241             {
 242               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 243                                    "backslash-newline at end of file");
 244               /* Prevent "no newline at end of file" warning.  */
 245               buffer->next_line = buffer->rlimit;
 246             }
 247
 248           buffer->line_base = note->pos;
 249           pfile->line++;
 250         }
 251       else if (_cpp_trigraph_map[note->type])
 252         {
 253           if (CPP_OPTION (pfile, warn_trigraphs)
 254               && (!in_comment || warn_in_comment (pfile, note)))
 255             {
 256               if (CPP_OPTION (pfile, trigraphs))
 257                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 258                                      "trigraph ??%c converted to %c",
 259                                      note->type,
 260                                      (int) _cpp_trigraph_map[note->type]);
 261               else
 262                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 263                                      "trigraph ??%c ignored",
 264                                      note->type);
 265             }
 266         }
 267       else
 268         abort ();
 269     }
 270 }
 271
 272 /* Skip a C-style block comment.  We find the end of the comment by
 273    seeing if an asterisk is before every '/' we encounter.  Returns
 274    nonzero if comment terminated by EOF, zero otherwise.
 275
 276    Buffer->cur points to the initial asterisk of the comment.  */
 277 bool
 278 _cpp_skip_block_comment (pfile)
 279      cpp_reader *pfile;
 280 {
 281   cpp_buffer *buffer = pfile->buffer;
 282   cppchar_t c;
 283
 284   buffer->cur++;
 285   if (*buffer->cur == '/')
 286     buffer->cur++;
 287
 288   for (;;)
 289     {
 290       c = *buffer->cur++;
 291
 292       /* People like decorating comments with '*', so check for '/'
 293          instead for efficiency.  */
 294       if (c == '/')
 295         {
 296           if (buffer->cur[-2] == '*')
 297             break;
 298
 299           /* Warn about potential nested comments, but not if the '/'
 300              comes immediately before the true comment delimiter.
 301              Don't bother to get it right across escaped newlines.  */
 302           if (CPP_OPTION (pfile, warn_comments)
 303               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 304             cpp_error_with_line (pfile, DL_WARNING,
 305                                  pfile->line, CPP_BUF_COL (buffer),
 306                                  "\"/*\" within comment");
 307         }
 308       else if (c == '\n')
 309         {
 310           buffer->cur--;
 311           _cpp_process_line_notes (pfile, true);
 312           if (buffer->next_line >= buffer->rlimit)
 313             return true;
 314           _cpp_clean_line (pfile);
 315           pfile->line++;
 316         }
 317     }
 318
 319   _cpp_process_line_notes (pfile, true);
 320   return false;
 321 }
 322
 323 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 324    terminating newline.  Handles escaped newlines.  Returns nonzero
 325    if a multiline comment.  */
 326 static int
 327 skip_line_comment (pfile)
 328      cpp_reader *pfile;
 329 {
 330   cpp_buffer *buffer = pfile->buffer;
 331   unsigned int orig_line = pfile->line;
 332
 333   while (*buffer->cur != '\n')
 334     buffer->cur++;
 335
 336   _cpp_process_line_notes (pfile, true);
 337   return orig_line != pfile->line;
 338 }
 339
 340 /* Skips whitespace, saving the next non-whitespace character.  */
 341 static void
 342 skip_whitespace (pfile, c)
 343      cpp_reader *pfile;
 344      cppchar_t c;
 345 {
 346   cpp_buffer *buffer = pfile->buffer;
 347   bool saw_NUL = false;
 348
 349   do
 350     {
 351       /* Horizontal space always OK.  */
 352       if (c == ' ' || c == '\t')
 353         ;
 354       /* Just \f \v or \0 left.  */
 355       else if (c == '\0')
 356         saw_NUL = true;
 357       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 358         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 359                              CPP_BUF_COL (buffer),
 360                              "%s in preprocessing directive",
 361                              c == '\f' ? "form feed" : "vertical tab");
 362
 363       c = *buffer->cur++;
 364     }
 365   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 366   while (is_nvspace (c));
 367
 368   if (saw_NUL)
 369     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 370
 371   buffer->cur--;
 372 }
 373
 374 /* See if the characters of a number token are valid in a name (no
 375    '.', '+' or '-').  */
 376 static int
 377 name_p (pfile, string)
 378      cpp_reader *pfile;
 379      const cpp_string *string;
 380 {
 381   unsigned int i;
 382
 383   for (i = 0; i < string->len; i++)
 384     if (!is_idchar (string->text[i]))
 385       return 0;
 386
 387   return 1;
 388 }
 389
 390 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 391    an identifier.  FIRST is TRUE if this starts an identifier.  */
 392 static bool
 393 forms_identifier_p (pfile, first)
 394      cpp_reader *pfile;
 395      int first;
 396 {
 397   cpp_buffer *buffer = pfile->buffer;
 398
 399   if (*buffer->cur == '$')
 400     {
 401       if (!CPP_OPTION (pfile, dollars_in_ident))
 402         return false;
 403
 404       buffer->cur++;
 405       if (pfile->warn_dollars && !pfile->state.skipping)
 406         {
 407           pfile->warn_dollars = false;
 408           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 409         }
 410
 411       return true;
 412     }
 413
 414   /* Is this a syntactically valid UCN?  */
 415   if (0 && *buffer->cur == '\\'
 416       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 417     {
 418       buffer->cur += 2;
 419       if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
 420         return true;
 421       buffer->cur -= 2;
 422     }
 423
 424   return false;
 425 }
 426
 427 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 428 static cpp_hashnode *
 429 lex_identifier (pfile, base)
 430      cpp_reader *pfile;
 431      const uchar *base;
 432 {
 433   cpp_hashnode *result;
 434   const uchar *cur;
 435
 436   do
 437     {
 438       cur = pfile->buffer->cur;
 439
 440       /* N.B. ISIDNUM does not include $.  */
 441       while (ISIDNUM (*cur))
 442         cur++;
 443
 444       pfile->buffer->cur = cur;
 445     }
 446   while (forms_identifier_p (pfile, false));
 447
 448   result = (cpp_hashnode *)
 449     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 450
 451   /* Rarely, identifiers require diagnostics when lexed.  */
 452   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 453                         && !pfile->state.skipping, 0))
 454     {
 455       /* It is allowed to poison the same identifier twice.  */
 456       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 457         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 458                    NODE_NAME (result));
 459
 460       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 461          replacement list of a variadic macro.  */
 462       if (result == pfile->spec_nodes.n__VA_ARGS__
 463           && !pfile->state.va_args_ok)
 464         cpp_error (pfile, DL_PEDWARN,
 465         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 466     }
 467
 468   return result;
 469 }
 470
 471 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 472 static void
 473 lex_number (pfile, number)
 474      cpp_reader *pfile;
 475      cpp_string *number;
 476 {
 477   const uchar *cur;
 478   const uchar *base;
 479   uchar *dest;
 480
 481   base = pfile->buffer->cur - 1;
 482   do
 483     {
 484       cur = pfile->buffer->cur;
 485
 486       /* N.B. ISIDNUM does not include $.  */
 487       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 488         cur++;
 489
 490       pfile->buffer->cur = cur;
 491     }
 492   while (forms_identifier_p (pfile, false));
 493
 494   number->len = cur - base;
 495   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 496   memcpy (dest, base, number->len);
 497   dest[number->len] = '\0';
 498   number->text = dest;
 499 }
 500
 501 /* Create a token of type TYPE with a literal spelling.  */
 502 static void
 503 create_literal (pfile, token, base, len, type)
 504      cpp_reader *pfile;
 505      cpp_token *token;
 506      const uchar *base;
 507      unsigned int len;
 508      enum cpp_ttype type;
 509 {
 510   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 511
 512   memcpy (dest, base, len);
 513   dest[len] = '\0';
 514   token->type = type;
 515   token->val.str.len = len;
 516   token->val.str.text = dest;
 517 }
 518
 519 /* Lexes a string, character constant, or angle-bracketed header file
 520    name.  The stored string contains the spelling, including opening
 521    quote and leading any leading 'L'.  It returns the type of the
 522    literal, or CPP_OTHER if it was not properly terminated.
 523
 524    The spelling is NUL-terminated, but it is not guaranteed that this
 525    is the first NUL since embedded NULs are preserved.  */
 526 static void
 527 lex_string (pfile, token, base)
 528      cpp_reader *pfile;
 529      cpp_token *token;
 530      const uchar *base;
 531 {
 532   bool saw_NUL = false;
 533   const uchar *cur;
 534   cppchar_t terminator;
 535   enum cpp_ttype type;
 536
 537   cur = base;
 538   terminator = *cur++;
 539   if (terminator == 'L')
 540     terminator = *cur++;
 541   if (terminator == '\"')
 542     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 543   else if (terminator == '\'')
 544     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 545   else
 546     terminator = '>', type = CPP_HEADER_NAME;
 547
 548   for (;;)
 549     {
 550       cppchar_t c = *cur++;
 551
 552       /* In #include-style directives, terminators are not escapable.  */
 553       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 554         cur++;
 555       else if (c == terminator)
 556         break;
 557       else if (c == '\n')
 558         {
 559           cur--;
 560           type = CPP_OTHER;
 561           break;
 562         }
 563       else if (c == '\0')
 564         saw_NUL = true;
 565     }
 566
 567   if (saw_NUL && !pfile->state.skipping)
 568     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 569
 570   pfile->buffer->cur = cur;
 571   create_literal (pfile, token, base, cur - base, type);
 572 }
 573
 574 /* The stored comment includes the comment start and any terminator.  */
 575 static void
 576 save_comment (pfile, token, from, type)
 577      cpp_reader *pfile;
 578      cpp_token *token;
 579      const unsigned char *from;
 580      cppchar_t type;
 581 {
 582   unsigned char *buffer;
 583   unsigned int len, clen;
 584
 585   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 586
 587   /* C++ comments probably (not definitely) have moved past a new
 588      line, which we don't want to save in the comment.  */
 589   if (is_vspace (pfile->buffer->cur[-1]))
 590     len--;
 591
 592   /* If we are currently in a directive, then we need to store all
 593      C++ comments as C comments internally, and so we need to
 594      allocate a little extra space in that case.
 595
 596      Note that the only time we encounter a directive here is
 597      when we are saving comments in a "#define".  */
 598   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 599
 600   buffer = _cpp_unaligned_alloc (pfile, clen);
 601
 602   token->type = CPP_COMMENT;
 603   token->val.str.len = clen;
 604   token->val.str.text = buffer;
 605
 606   buffer[0] = '/';
 607   memcpy (buffer + 1, from, len - 1);
 608
 609   /* Finish conversion to a C comment, if necessary.  */
 610   if (pfile->state.in_directive && type == '/')
 611     {
 612       buffer[1] = '*';
 613       buffer[clen - 2] = '*';
 614       buffer[clen - 1] = '/';
 615     }
 616 }
 617
 618 /* Allocate COUNT tokens for RUN.  */
 619 void
 620 _cpp_init_tokenrun (run, count)
 621      tokenrun *run;
 622      unsigned int count;
 623 {
 624   run->base = xnewvec (cpp_token, count);
 625   run->limit = run->base + count;
 626   run->next = NULL;
 627 }
 628
 629 /* Returns the next tokenrun, or creates one if there is none.  */
 630 static tokenrun *
 631 next_tokenrun (run)
 632      tokenrun *run;
 633 {
 634   if (run->next == NULL)
 635     {
 636       run->next = xnew (tokenrun);
 637       run->next->prev = run;
 638       _cpp_init_tokenrun (run->next, 250);
 639     }
 640
 641   return run->next;
 642 }
 643
 644 /* Allocate a single token that is invalidated at the same time as the
 645    rest of the tokens on the line.  Has its line and col set to the
 646    same as the last lexed token, so that diagnostics appear in the
 647    right place.  */
 648 cpp_token *
 649 _cpp_temp_token (pfile)
 650      cpp_reader *pfile;
 651 {
 652   cpp_token *old, *result;
 653
 654   old = pfile->cur_token - 1;
 655   if (pfile->cur_token == pfile->cur_run->limit)
 656     {
 657       pfile->cur_run = next_tokenrun (pfile->cur_run);
 658       pfile->cur_token = pfile->cur_run->base;
 659     }
 660
 661   result = pfile->cur_token++;
 662   result->line = old->line;
 663   result->col = old->col;
 664   return result;
 665 }
 666
 667 /* Lex a token into RESULT (external interface).  Takes care of issues
 668    like directive handling, token lookahead, multiple include
 669    optimization and skipping.  */
 670 const cpp_token *
 671 _cpp_lex_token (pfile)
 672      cpp_reader *pfile;
 673 {
 674   cpp_token *result;
 675
 676   for (;;)
 677     {
 678       if (pfile->cur_token == pfile->cur_run->limit)
 679         {
 680           pfile->cur_run = next_tokenrun (pfile->cur_run);
 681           pfile->cur_token = pfile->cur_run->base;
 682         }
 683
 684       if (pfile->lookaheads)
 685         {
 686           pfile->lookaheads--;
 687           result = pfile->cur_token++;
 688         }
 689       else
 690         result = _cpp_lex_direct (pfile);
 691
 692       if (result->flags & BOL)
 693         {
 694           /* Is this a directive.  If _cpp_handle_directive returns
 695              false, it is an assembler #.  */
 696           if (result->type == CPP_HASH
 697               /* 6.10.3 p 11: Directives in a list of macro arguments
 698                  gives undefined behavior.  This implementation
 699                  handles the directive as normal.  */
 700               && pfile->state.parsing_args != 1
 701               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 702             continue;
 703           if (pfile->cb.line_change && !pfile->state.skipping)
 704             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 705         }
 706
 707       /* We don't skip tokens in directives.  */
 708       if (pfile->state.in_directive)
 709         break;
 710
 711       /* Outside a directive, invalidate controlling macros.  At file
 712          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 713          get here and MI optimisation works.  */
 714       pfile->mi_valid = false;
 715
 716       if (!pfile->state.skipping || result->type == CPP_EOF)
 717         break;
 718     }
 719
 720   return result;
 721 }
 722
 723 /* Returns true if a fresh line has been loaded.  */
 724 bool
 725 _cpp_get_fresh_line (pfile)
 726      cpp_reader *pfile;
 727 {
 728   /* We can't get a new line until we leave the current directive.  */
 729   if (pfile->state.in_directive)
 730     return false;
 731
 732   for (;;)
 733     {
 734       cpp_buffer *buffer = pfile->buffer;
 735
 736       if (!buffer->need_line)
 737         return true;
 738
 739       if (buffer->next_line < buffer->rlimit)
 740         {
 741           _cpp_clean_line (pfile);
 742           return true;
 743         }
 744
 745       /* First, get out of parsing arguments state.  */
 746       if (pfile->state.parsing_args)
 747         return false;
 748
 749       /* End of buffer.  Non-empty files should end in a newline.  */
 750       if (buffer->buf != buffer->rlimit
 751           && buffer->next_line > buffer->rlimit
 752           && !buffer->from_stage3)
 753         {
 754           /* Only warn once.  */
 755           buffer->next_line = buffer->rlimit;
 756           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 757                                CPP_BUF_COLUMN (buffer, buffer->cur),
 758                                "no newline at end of file");
 759         }
 760
 761       if (!buffer->prev)
 762         return false;
 763
 764       if (buffer->return_at_eof)
 765         {
 766           _cpp_pop_buffer (pfile);
 767           return false;
 768         }
 769
 770       _cpp_pop_buffer (pfile);
 771     }
 772 }
 773
 774 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 775   do                                                    \
 776     {                                                   \
 777       result->type = ELSE_TYPE;                         \
 778       if (*buffer->cur == CHAR)                         \
 779         buffer->cur++, result->type = THEN_TYPE;        \
 780     }                                                   \
 781   while (0)
 782
 783 /* Lex a token into pfile->cur_token, which is also incremented, to
 784    get diagnostics pointing to the correct location.
 785
 786    Does not handle issues such as token lookahead, multiple-include
 787    optimisation, directives, skipping etc.  This function is only
 788    suitable for use by _cpp_lex_token, and in special cases like
 789    lex_expansion_token which doesn't care for any of these issues.
 790
 791    When meeting a newline, returns CPP_EOF if parsing a directive,
 792    otherwise returns to the start of the token buffer if permissible.
 793    Returns the location of the lexed token.  */
 794 cpp_token *
 795 _cpp_lex_direct (pfile)
 796      cpp_reader *pfile;
 797 {
 798   cppchar_t c;
 799   cpp_buffer *buffer;
 800   const unsigned char *comment_start;
 801   cpp_token *result = pfile->cur_token++;
 802
 803  fresh_line:
 804   result->flags = 0;
 805   if (pfile->buffer->need_line)
 806     {
 807       if (!_cpp_get_fresh_line (pfile))
 808         {
 809           result->type = CPP_EOF;
 810           if (!pfile->state.in_directive)
 811             {
 812               /* Tell the compiler the line number of the EOF token.  */
 813               result->line = pfile->line;
 814               result->flags = BOL;
 815             }
 816           return result;
 817         }
 818       if (!pfile->keep_tokens)
 819         {
 820           pfile->cur_run = &pfile->base_run;
 821           result = pfile->base_run.base;
 822           pfile->cur_token = result + 1;
 823         }
 824       result->flags = BOL;
 825       if (pfile->state.parsing_args == 2)
 826         result->flags |= PREV_WHITE;
 827     }
 828   buffer = pfile->buffer;
 829  update_tokens_line:
 830   result->line = pfile->line;
 831
 832  skipped_white:
 833   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 834       && !pfile->overlaid_buffer)
 835     {
 836       _cpp_process_line_notes (pfile, false);
 837       result->line = pfile->line;
 838     }
 839   c = *buffer->cur++;
 840   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 841
 842   switch (c)
 843     {
 844     case ' ': case '\t': case '\f': case '\v': case '\0':
 845       result->flags |= PREV_WHITE;
 846       skip_whitespace (pfile, c);
 847       goto skipped_white;
 848
 849     case '\n':
 850       pfile->line++;
 851       buffer->need_line = true;
 852       goto fresh_line;
 853
 854     case '0': case '1': case '2': case '3': case '4':
 855     case '5': case '6': case '7': case '8': case '9':
 856       result->type = CPP_NUMBER;
 857       lex_number (pfile, &result->val.str);
 858       break;
 859
 860     case 'L':
 861       /* 'L' may introduce wide characters or strings.  */
 862       if (*buffer->cur == '\'' || *buffer->cur == '"')
 863         {
 864           lex_string (pfile, result, buffer->cur - 1);
 865           break;
 866         }
 867       /* Fall through.  */
 868
 869     case '_':
 870     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 871     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 872     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 873     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 874     case 'y': case 'z':
 875     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 876     case 'G': case 'H': case 'I': case 'J': case 'K':
 877     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 878     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 879     case 'Y': case 'Z':
 880       result->type = CPP_NAME;
 881       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 882
 883       /* Convert named operators to their proper types.  */
 884       if (result->val.node->flags & NODE_OPERATOR)
 885         {
 886           result->flags |= NAMED_OP;
 887           result->type = result->val.node->directive_index;
 888         }
 889       break;
 890
 891     case '\'':
 892     case '"':
 893       lex_string (pfile, result, buffer->cur - 1);
 894       break;
 895
 896     case '/':
 897       /* A potential block or line comment.  */
 898       comment_start = buffer->cur;
 899       c = *buffer->cur;
 900
 901       if (c == '*')
 902         {
 903           if (_cpp_skip_block_comment (pfile))
 904             cpp_error (pfile, DL_ERROR, "unterminated comment");
 905         }
 906       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 907                             || CPP_IN_SYSTEM_HEADER (pfile)))
 908         {
 909           /* Warn about comments only if pedantically GNUC89, and not
 910              in system headers.  */
 911           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 912               && ! buffer->warned_cplusplus_comments)
 913             {
 914               cpp_error (pfile, DL_PEDWARN,
 915                          "C++ style comments are not allowed in ISO C90");
 916               cpp_error (pfile, DL_PEDWARN,
 917                          "(this will be reported only once per input file)");
 918               buffer->warned_cplusplus_comments = 1;
 919             }
 920
 921           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 922             cpp_error (pfile, DL_WARNING, "multi-line comment");
 923         }
 924       else if (c == '=')
 925         {
 926           buffer->cur++;
 927           result->type = CPP_DIV_EQ;
 928           break;
 929         }
 930       else
 931         {
 932           result->type = CPP_DIV;
 933           break;
 934         }
 935
 936       if (!pfile->state.save_comments)
 937         {
 938           result->flags |= PREV_WHITE;
 939           goto update_tokens_line;
 940         }
 941
 942       /* Save the comment as a token in its own right.  */
 943       save_comment (pfile, result, comment_start, c);
 944       break;
 945
 946     case '<':
 947       if (pfile->state.angled_headers)
 948         {
 949           lex_string (pfile, result, buffer->cur - 1);
 950           break;
 951         }
 952
 953       result->type = CPP_LESS;
 954       if (*buffer->cur == '=')
 955         buffer->cur++, result->type = CPP_LESS_EQ;
 956       else if (*buffer->cur == '<')
 957         {
 958           buffer->cur++;
 959           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 960         }
 961       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 962         {
 963           buffer->cur++;
 964           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 965         }
 966       else if (CPP_OPTION (pfile, digraphs))
 967         {
 968           if (*buffer->cur == ':')
 969             {
 970               buffer->cur++;
 971               result->flags |= DIGRAPH;
 972               result->type = CPP_OPEN_SQUARE;
 973             }
 974           else if (*buffer->cur == '%')
 975             {
 976               buffer->cur++;
 977               result->flags |= DIGRAPH;
 978               result->type = CPP_OPEN_BRACE;
 979             }
 980         }
 981       break;
 982
 983     case '>':
 984       result->type = CPP_GREATER;
 985       if (*buffer->cur == '=')
 986         buffer->cur++, result->type = CPP_GREATER_EQ;
 987       else if (*buffer->cur == '>')
 988         {
 989           buffer->cur++;
 990           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
 991         }
 992       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 993         {
 994           buffer->cur++;
 995           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
 996         }
 997       break;
 998
 999     case '%':
1000       result->type = CPP_MOD;
1001       if (*buffer->cur == '=')
1002         buffer->cur++, result->type = CPP_MOD_EQ;
1003       else if (CPP_OPTION (pfile, digraphs))
1004         {
1005           if (*buffer->cur == ':')
1006             {
1007               buffer->cur++;
1008               result->flags |= DIGRAPH;
1009               result->type = CPP_HASH;
1010               if (*buffer->cur == '%' && buffer->cur[1] == ':')
1011                 buffer->cur += 2, result->type = CPP_PASTE;
1012             }
1013           else if (*buffer->cur == '>')
1014             {
1015               buffer->cur++;
1016               result->flags |= DIGRAPH;
1017               result->type = CPP_CLOSE_BRACE;
1018             }
1019         }
1020       break;
1021
1022     case '.':
1023       result->type = CPP_DOT;
1024       if (ISDIGIT (*buffer->cur))
1025         {
1026           result->type = CPP_NUMBER;
1027           lex_number (pfile, &result->val.str);
1028         }
1029       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1030         buffer->cur += 2, result->type = CPP_ELLIPSIS;
1031       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1032         buffer->cur++, result->type = CPP_DOT_STAR;
1033       break;
1034
1035     case '+':
1036       result->type = CPP_PLUS;
1037       if (*buffer->cur == '+')
1038         buffer->cur++, result->type = CPP_PLUS_PLUS;
1039       else if (*buffer->cur == '=')
1040         buffer->cur++, result->type = CPP_PLUS_EQ;
1041       break;
1042
1043     case '-':
1044       result->type = CPP_MINUS;
1045       if (*buffer->cur == '>')
1046         {
1047           buffer->cur++;
1048           result->type = CPP_DEREF;
1049           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1050             buffer->cur++, result->type = CPP_DEREF_STAR;
1051         }
1052       else if (*buffer->cur == '-')
1053         buffer->cur++, result->type = CPP_MINUS_MINUS;
1054       else if (*buffer->cur == '=')
1055         buffer->cur++, result->type = CPP_MINUS_EQ;
1056       break;
1057
1058     case '&':
1059       result->type = CPP_AND;
1060       if (*buffer->cur == '&')
1061         buffer->cur++, result->type = CPP_AND_AND;
1062       else if (*buffer->cur == '=')
1063         buffer->cur++, result->type = CPP_AND_EQ;
1064       break;
1065
1066     case '|':
1067       result->type = CPP_OR;
1068       if (*buffer->cur == '|')
1069         buffer->cur++, result->type = CPP_OR_OR;
1070       else if (*buffer->cur == '=')
1071         buffer->cur++, result->type = CPP_OR_EQ;
1072       break;
1073
1074     case ':':
1075       result->type = CPP_COLON;
1076       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1077         buffer->cur++, result->type = CPP_SCOPE;
1078       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1079         {
1080           buffer->cur++;
1081           result->flags |= DIGRAPH;
1082           result->type = CPP_CLOSE_SQUARE;
1083         }
1084       break;
1085
1086     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1087     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1088     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1089     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1090     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1091
1092     case '?': result->type = CPP_QUERY; break;
1093     case '~': result->type = CPP_COMPL; break;
1094     case ',': result->type = CPP_COMMA; break;
1095     case '(': result->type = CPP_OPEN_PAREN; break;
1096     case ')': result->type = CPP_CLOSE_PAREN; break;
1097     case '[': result->type = CPP_OPEN_SQUARE; break;
1098     case ']': result->type = CPP_CLOSE_SQUARE; break;
1099     case '{': result->type = CPP_OPEN_BRACE; break;
1100     case '}': result->type = CPP_CLOSE_BRACE; break;
1101     case ';': result->type = CPP_SEMICOLON; break;
1102
1103       /* @ is a punctuator in Objective-C.  */
1104     case '@': result->type = CPP_ATSIGN; break;
1105
1106     case '$':
1107     case '\\':
1108       {
1109         const uchar *base = --buffer->cur;
1110
1111         if (forms_identifier_p (pfile, true))
1112           {
1113             result->type = CPP_NAME;
1114             result->val.node = lex_identifier (pfile, base);
1115             break;
1116           }
1117         buffer->cur++;
1118       }
1119
1120     default:
1121       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1122       break;
1123     }
1124
1125   return result;
1126 }
1127
1128 /* An upper bound on the number of bytes needed to spell TOKEN.
1129    Does not include preceding whitespace.  */
1130 unsigned int
1131 cpp_token_len (token)
1132      const cpp_token *token;
1133 {
1134   unsigned int len;
1135
1136   switch (TOKEN_SPELL (token))
1137     {
1138     default:            len = 4;                                break;
1139     case SPELL_LITERAL: len = token->val.str.len;               break;
1140     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1141     }
1142
1143   return len;
1144 }
1145
1146 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1147    already contain the enough space to hold the token's spelling.
1148    Returns a pointer to the character after the last character
1149    written.  */
1150 unsigned char *
1151 cpp_spell_token (pfile, token, buffer)
1152      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1153      const cpp_token *token;
1154      unsigned char *buffer;
1155 {
1156   switch (TOKEN_SPELL (token))
1157     {
1158     case SPELL_OPERATOR:
1159       {
1160         const unsigned char *spelling;
1161         unsigned char c;
1162
1163         if (token->flags & DIGRAPH)
1164           spelling
1165             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1166         else if (token->flags & NAMED_OP)
1167           goto spell_ident;
1168         else
1169           spelling = TOKEN_NAME (token);
1170
1171         while ((c = *spelling++) != '\0')
1172           *buffer++ = c;
1173       }
1174       break;
1175
1176     spell_ident:
1177     case SPELL_IDENT:
1178       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1179       buffer += NODE_LEN (token->val.node);
1180       break;
1181
1182     case SPELL_LITERAL:
1183       memcpy (buffer, token->val.str.text, token->val.str.len);
1184       buffer += token->val.str.len;
1185       break;
1186
1187     case SPELL_NONE:
1188       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1189       break;
1190     }
1191
1192   return buffer;
1193 }
1194
1195 /* Returns TOKEN spelt as a null-terminated string.  The string is
1196    freed when the reader is destroyed.  Useful for diagnostics.  */
1197 unsigned char *
1198 cpp_token_as_text (pfile, token)
1199      cpp_reader *pfile;
1200      const cpp_token *token;
1201 {
1202   unsigned int len = cpp_token_len (token) + 1;
1203   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1204
1205   end = cpp_spell_token (pfile, token, start);
1206   end[0] = '\0';
1207
1208   return start;
1209 }
1210
1211 /* Used by C front ends, which really should move to using
1212    cpp_token_as_text.  */
1213 const char *
1214 cpp_type2name (type)
1215      enum cpp_ttype type;
1216 {
1217   return (const char *) token_spellings[type].name;
1218 }
1219
1220 /* Writes the spelling of token to FP, without any preceding space.
1221    Separated from cpp_spell_token for efficiency - to avoid stdio
1222    double-buffering.  */
1223 void
1224 cpp_output_token (token, fp)
1225      const cpp_token *token;
1226      FILE *fp;
1227 {
1228   switch (TOKEN_SPELL (token))
1229     {
1230     case SPELL_OPERATOR:
1231       {
1232         const unsigned char *spelling;
1233         int c;
1234
1235         if (token->flags & DIGRAPH)
1236           spelling
1237             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1238         else if (token->flags & NAMED_OP)
1239           goto spell_ident;
1240         else
1241           spelling = TOKEN_NAME (token);
1242
1243         c = *spelling;
1244         do
1245           putc (c, fp);
1246         while ((c = *++spelling) != '\0');
1247       }
1248       break;
1249
1250     spell_ident:
1251     case SPELL_IDENT:
1252       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1253     break;
1254
1255     case SPELL_LITERAL:
1256       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1257       break;
1258
1259     case SPELL_NONE:
1260       /* An error, most probably.  */
1261       break;
1262     }
1263 }
1264
1265 /* Compare two tokens.  */
1266 int
1267 _cpp_equiv_tokens (a, b)
1268      const cpp_token *a, *b;
1269 {
1270   if (a->type == b->type && a->flags == b->flags)
1271     switch (TOKEN_SPELL (a))
1272       {
1273       default:                  /* Keep compiler happy.  */
1274       case SPELL_OPERATOR:
1275         return 1;
1276       case SPELL_NONE:
1277         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1278       case SPELL_IDENT:
1279         return a->val.node == b->val.node;
1280       case SPELL_LITERAL:
1281         return (a->val.str.len == b->val.str.len
1282                 && !memcmp (a->val.str.text, b->val.str.text,
1283                             a->val.str.len));
1284       }
1285
1286   return 0;
1287 }
1288
1289 /* Returns nonzero if a space should be inserted to avoid an
1290    accidental token paste for output.  For simplicity, it is
1291    conservative, and occasionally advises a space where one is not
1292    needed, e.g. "." and ".2".  */
1293 int
1294 cpp_avoid_paste (pfile, token1, token2)
1295      cpp_reader *pfile;
1296      const cpp_token *token1, *token2;
1297 {
1298   enum cpp_ttype a = token1->type, b = token2->type;
1299   cppchar_t c;
1300
1301   if (token1->flags & NAMED_OP)
1302     a = CPP_NAME;
1303   if (token2->flags & NAMED_OP)
1304     b = CPP_NAME;
1305
1306   c = EOF;
1307   if (token2->flags & DIGRAPH)
1308     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1309   else if (token_spellings[b].category == SPELL_OPERATOR)
1310     c = token_spellings[b].name[0];
1311
1312   /* Quickly get everything that can paste with an '='.  */
1313   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1314     return 1;
1315
1316   switch (a)
1317     {
1318     case CPP_GREATER:   return c == '>' || c == '?';
1319     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1320     case CPP_PLUS:      return c == '+';
1321     case CPP_MINUS:     return c == '-' || c == '>';
1322     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1323     case CPP_MOD:       return c == ':' || c == '>';
1324     case CPP_AND:       return c == '&';
1325     case CPP_OR:        return c == '|';
1326     case CPP_COLON:     return c == ':' || c == '>';
1327     case CPP_DEREF:     return c == '*';
1328     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1329     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1330     case CPP_NAME:      return ((b == CPP_NUMBER
1331                                  && name_p (pfile, &token2->val.str))
1332                                 || b == CPP_NAME
1333                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1334     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1335                                 || c == '.' || c == '+' || c == '-');
1336                                       /* UCNs */
1337     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1338                                  && b == CPP_NAME)
1339                                 || (CPP_OPTION (pfile, objc)
1340                                     && token1->val.str.text[0] == '@'
1341                                     && (b == CPP_NAME || b == CPP_STRING)));
1342     default:            break;
1343     }
1344
1345   return 0;
1346 }
1347
1348 /* Output all the remaining tokens on the current line, and a newline
1349    character, to FP.  Leading whitespace is removed.  If there are
1350    macros, special token padding is not performed.  */
1351 void
1352 cpp_output_line (pfile, fp)
1353      cpp_reader *pfile;
1354      FILE *fp;
1355 {
1356   const cpp_token *token;
1357
1358   token = cpp_get_token (pfile);
1359   while (token->type != CPP_EOF)
1360     {
1361       cpp_output_token (token, fp);
1362       token = cpp_get_token (pfile);
1363       if (token->flags & PREV_WHITE)
1364         putc (' ', fp);
1365     }
1366
1367   putc ('\n', fp);
1368 }
1369
1370 /* Returns the value of a hexadecimal digit.  */
1371 static unsigned int
1372 hex_digit_value (c)
1373      unsigned int c;
1374 {
1375   if (hex_p (c))
1376     return hex_value (c);
1377   else
1378     abort ();
1379 }
1380
1381 /* Read a possible universal character name starting at *PSTR.  */
1382 static cppchar_t
1383 maybe_read_ucn (pfile, pstr)
1384      cpp_reader *pfile;
1385      const uchar **pstr;
1386 {
1387   cppchar_t result, c = (*pstr)[-1];
1388
1389   result = _cpp_valid_ucn (pfile, pstr, false);
1390   if (result)
1391     {
1392       if (CPP_WTRADITIONAL (pfile))
1393         cpp_error (pfile, DL_WARNING,
1394                    "the meaning of '\\%c' is different in traditional C",
1395                    (int) c);
1396
1397       if (CPP_OPTION (pfile, EBCDIC))
1398         {
1399           cpp_error (pfile, DL_ERROR,
1400                      "universal character with an EBCDIC target");
1401           result = 0x3f;  /* EBCDIC invalid character */
1402         }
1403     }
1404
1405   return result;
1406 }
1407
1408 /* Returns the value of an escape sequence, truncated to the correct
1409    target precision.  PSTR points to the input pointer, which is just
1410    after the backslash.  LIMIT is how much text we have.  WIDE is true
1411    if the escape sequence is part of a wide character constant or
1412    string literal.  Handles all relevant diagnostics.  */
1413 cppchar_t
1414 cpp_parse_escape (pfile, pstr, limit, wide)
1415      cpp_reader *pfile;
1416      const unsigned char **pstr;
1417      const unsigned char *limit;
1418      int wide;
1419 {
1420   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1421   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1422   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1423
1424   int unknown = 0;
1425   const unsigned char *str = *pstr, *charconsts;
1426   cppchar_t c, ucn, mask;
1427   unsigned int width;
1428
1429   if (CPP_OPTION (pfile, EBCDIC))
1430     charconsts = ebcdic;
1431   else
1432     charconsts = ascii;
1433
1434   if (wide)
1435     width = CPP_OPTION (pfile, wchar_precision);
1436   else
1437     width = CPP_OPTION (pfile, char_precision);
1438   if (width < BITS_PER_CPPCHAR_T)
1439     mask = ((cppchar_t) 1 << width) - 1;
1440   else
1441     mask = ~0;
1442
1443   c = *str++;
1444   switch (c)
1445     {
1446     case '\\': case '\'': case '"': case '?': break;
1447     case 'b': c = charconsts[1];  break;
1448     case 'f': c = charconsts[3];  break;
1449     case 'n': c = charconsts[4];  break;
1450     case 'r': c = charconsts[5];  break;
1451     case 't': c = charconsts[6];  break;
1452     case 'v': c = charconsts[7];  break;
1453
1454     case '(': case '{': case '[': case '%':
1455       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1456          '\%' is used to prevent SCCS from getting confused.  */
1457       unknown = CPP_PEDANTIC (pfile);
1458       break;
1459
1460     case 'a':
1461       if (CPP_WTRADITIONAL (pfile))
1462         cpp_error (pfile, DL_WARNING,
1463                    "the meaning of '\\a' is different in traditional C");
1464       c = charconsts[0];
1465       break;
1466
1467     case 'e': case 'E':
1468       if (CPP_PEDANTIC (pfile))
1469         cpp_error (pfile, DL_PEDWARN,
1470                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1471       c = charconsts[2];
1472       break;
1473
1474     case 'u': case 'U':
1475       ucn = maybe_read_ucn (pfile, &str);
1476       if (ucn)
1477         c = ucn;
1478       else
1479         unknown = true;
1480       break;
1481
1482     case 'x':
1483       if (CPP_WTRADITIONAL (pfile))
1484         cpp_error (pfile, DL_WARNING,
1485                    "the meaning of '\\x' is different in traditional C");
1486
1487       {
1488         cppchar_t i = 0, overflow = 0;
1489         int digits_found = 0;
1490
1491         while (str < limit)
1492           {
1493             c = *str;
1494             if (! ISXDIGIT (c))
1495               break;
1496             str++;
1497             overflow |= i ^ (i << 4 >> 4);
1498             i = (i << 4) + hex_digit_value (c);
1499             digits_found = 1;
1500           }
1501
1502         if (!digits_found)
1503           cpp_error (pfile, DL_ERROR,
1504                        "\\x used with no following hex digits");
1505
1506         if (overflow | (i != (i & mask)))
1507           {
1508             cpp_error (pfile, DL_PEDWARN,
1509                        "hex escape sequence out of range");
1510             i &= mask;
1511           }
1512         c = i;
1513       }
1514       break;
1515
1516     case '0':  case '1':  case '2':  case '3':
1517     case '4':  case '5':  case '6':  case '7':
1518       {
1519         size_t count = 0;
1520         cppchar_t i = c - '0';
1521
1522         while (str < limit && ++count < 3)
1523           {
1524             c = *str;
1525             if (c < '0' || c > '7')
1526               break;
1527             str++;
1528             i = (i << 3) + c - '0';
1529           }
1530
1531         if (i != (i & mask))
1532           {
1533             cpp_error (pfile, DL_PEDWARN,
1534                        "octal escape sequence out of range");
1535             i &= mask;
1536           }
1537         c = i;
1538       }
1539       break;
1540
1541     default:
1542       unknown = 1;
1543       break;
1544     }
1545
1546   if (unknown)
1547     {
1548       if (ISGRAPH (c))
1549         cpp_error (pfile, DL_PEDWARN,
1550                    "unknown escape sequence '\\%c'", (int) c);
1551       else
1552         cpp_error (pfile, DL_PEDWARN,
1553                    "unknown escape sequence: '\\%03o'", (int) c);
1554     }
1555
1556   if (c > mask)
1557     {
1558       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1559       c &= mask;
1560     }
1561
1562   *pstr = str;
1563   return c;
1564 }
1565
1566 /* Interpret a (possibly wide) character constant in TOKEN.
1567    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1568    points to a variable that is filled in with the number of
1569    characters seen, and UNSIGNEDP to a variable that indicates whether
1570    the result has signed type.  */
1571 cppchar_t
1572 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1573      cpp_reader *pfile;
1574      const cpp_token *token;
1575      unsigned int *pchars_seen;
1576      int *unsignedp;
1577 {
1578   const unsigned char *str, *limit;
1579   unsigned int chars_seen = 0;
1580   size_t width, max_chars;
1581   cppchar_t c, mask, result = 0;
1582   bool unsigned_p;
1583
1584   str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1585   limit = token->val.str.text + token->val.str.len - 1;
1586
1587   if (token->type == CPP_CHAR)
1588     {
1589       width = CPP_OPTION (pfile, char_precision);
1590       max_chars = CPP_OPTION (pfile, int_precision) / width;
1591       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1592     }
1593   else
1594     {
1595       width = CPP_OPTION (pfile, wchar_precision);
1596       max_chars = 1;
1597       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1598     }
1599
1600   if (width < BITS_PER_CPPCHAR_T)
1601     mask = ((cppchar_t) 1 << width) - 1;
1602   else
1603     mask = ~0;
1604
1605   while (str < limit)
1606     {
1607       c = *str++;
1608
1609       if (c == '\\')
1610         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1611
1612 #ifdef MAP_CHARACTER
1613       if (ISPRINT (c))
1614         c = MAP_CHARACTER (c);
1615 #endif
1616
1617       chars_seen++;
1618
1619       /* Truncate the character, scale the result and merge the two.  */
1620       c &= mask;
1621       if (width < BITS_PER_CPPCHAR_T)
1622         result = (result << width) | c;
1623       else
1624         result = c;
1625     }
1626
1627   if (chars_seen == 0)
1628     cpp_error (pfile, DL_ERROR, "empty character constant");
1629   else if (chars_seen > 1)
1630     {
1631       /* Multichar charconsts are of type int and therefore signed.  */
1632       unsigned_p = 0;
1633
1634       if (chars_seen > max_chars)
1635         {
1636           chars_seen = max_chars;
1637           cpp_error (pfile, DL_WARNING,
1638                      "character constant too long for its type");
1639         }
1640       else if (CPP_OPTION (pfile, warn_multichar))
1641         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1642     }
1643
1644   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1645      in WIDTH bits, but for multi-char charconsts it's value is the
1646      full target type's width.  */
1647   if (chars_seen > 1)
1648     width *= max_chars;
1649   if (width < BITS_PER_CPPCHAR_T)
1650     {
1651       mask = ((cppchar_t) 1 << width) - 1;
1652       if (unsigned_p || !(result & (1 << (width - 1))))
1653         result &= mask;
1654       else
1655         result |= ~mask;
1656     }
1657
1658   *pchars_seen = chars_seen;
1659   *unsignedp = unsigned_p;
1660   return result;
1661 }
1662
1663 /* Memory buffers.  Changing these three constants can have a dramatic
1664    effect on performance.  The values here are reasonable defaults,
1665    but might be tuned.  If you adjust them, be sure to test across a
1666    range of uses of cpplib, including heavy nested function-like macro
1667    expansion.  Also check the change in peak memory usage (NJAMD is a
1668    good tool for this).  */
1669 #define MIN_BUFF_SIZE 8000
1670 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1671 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1672         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1673
1674 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1675   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1676 #endif
1677
1678 /* Create a new allocation buffer.  Place the control block at the end
1679    of the buffer, so that buffer overflows will cause immediate chaos.  */
1680 static _cpp_buff *
1681 new_buff (len)
1682      size_t len;
1683 {
1684   _cpp_buff *result;
1685   unsigned char *base;
1686
1687   if (len < MIN_BUFF_SIZE)
1688     len = MIN_BUFF_SIZE;
1689   len = CPP_ALIGN (len);
1690
1691   base = xmalloc (len + sizeof (_cpp_buff));
1692   result = (_cpp_buff *) (base + len);
1693   result->base = base;
1694   result->cur = base;
1695   result->limit = base + len;
1696   result->next = NULL;
1697   return result;
1698 }
1699
1700 /* Place a chain of unwanted allocation buffers on the free list.  */
1701 void
1702 _cpp_release_buff (pfile, buff)
1703      cpp_reader *pfile;
1704      _cpp_buff *buff;
1705 {
1706   _cpp_buff *end = buff;
1707
1708   while (end->next)
1709     end = end->next;
1710   end->next = pfile->free_buffs;
1711   pfile->free_buffs = buff;
1712 }
1713
1714 /* Return a free buffer of size at least MIN_SIZE.  */
1715 _cpp_buff *
1716 _cpp_get_buff (pfile, min_size)
1717      cpp_reader *pfile;
1718      size_t min_size;
1719 {
1720   _cpp_buff *result, **p;
1721
1722   for (p = &pfile->free_buffs;; p = &(*p)->next)
1723     {
1724       size_t size;
1725
1726       if (*p == NULL)
1727         return new_buff (min_size);
1728       result = *p;
1729       size = result->limit - result->base;
1730       /* Return a buffer that's big enough, but don't waste one that's
1731          way too big.  */
1732       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1733         break;
1734     }
1735
1736   *p = result->next;
1737   result->next = NULL;
1738   result->cur = result->base;
1739   return result;
1740 }
1741
1742 /* Creates a new buffer with enough space to hold the uncommitted
1743    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1744    the excess bytes to the new buffer.  Chains the new buffer after
1745    BUFF, and returns the new buffer.  */
1746 _cpp_buff *
1747 _cpp_append_extend_buff (pfile, buff, min_extra)
1748      cpp_reader *pfile;
1749      _cpp_buff *buff;
1750      size_t min_extra;
1751 {
1752   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1753   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1754
1755   buff->next = new_buff;
1756   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1757   return new_buff;
1758 }
1759
1760 /* Creates a new buffer with enough space to hold the uncommitted
1761    remaining bytes of the buffer pointed to by BUFF, and at least
1762    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1763    Chains the new buffer before the buffer pointed to by BUFF, and
1764    updates the pointer to point to the new buffer.  */
1765 void
1766 _cpp_extend_buff (pfile, pbuff, min_extra)
1767      cpp_reader *pfile;
1768      _cpp_buff **pbuff;
1769      size_t min_extra;
1770 {
1771   _cpp_buff *new_buff, *old_buff = *pbuff;
1772   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1773
1774   new_buff = _cpp_get_buff (pfile, size);
1775   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1776   new_buff->next = old_buff;
1777   *pbuff = new_buff;
1778 }
1779
1780 /* Free a chain of buffers starting at BUFF.  */
1781 void
1782 _cpp_free_buff (buff)
1783      _cpp_buff *buff;
1784 {
1785   _cpp_buff *next;
1786
1787   for (; buff; buff = next)
1788     {
1789       next = buff->next;
1790       free (buff->base);
1791     }
1792 }
1793
1794 /* Allocate permanent, unaligned storage of length LEN.  */
1795 unsigned char *
1796 _cpp_unaligned_alloc (pfile, len)
1797      cpp_reader *pfile;
1798      size_t len;
1799 {
1800   _cpp_buff *buff = pfile->u_buff;
1801   unsigned char *result = buff->cur;
1802
1803   if (len > (size_t) (buff->limit - result))
1804     {
1805       buff = _cpp_get_buff (pfile, len);
1806       buff->next = pfile->u_buff;
1807       pfile->u_buff = buff;
1808       result = buff->cur;
1809     }
1810
1811   buff->cur = result + len;
1812   return result;
1813 }
1814
1815 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1816    That buffer is used for growing allocations when saving macro
1817    replacement lists in a #define, and when parsing an answer to an
1818    assertion in #assert, #unassert or #if (and therefore possibly
1819    whilst expanding macros).  It therefore must not be used by any
1820    code that they might call: specifically the lexer and the guts of
1821    the macro expander.
1822
1823    All existing other uses clearly fit this restriction: storing
1824    registered pragmas during initialization.  */
1825 unsigned char *
1826 _cpp_aligned_alloc (pfile, len)
1827      cpp_reader *pfile;
1828      size_t len;
1829 {
1830   _cpp_buff *buff = pfile->a_buff;
1831   unsigned char *result = buff->cur;
1832
1833   if (len > (size_t) (buff->limit - result))
1834     {
1835       buff = _cpp_get_buff (pfile, len);
1836       buff->next = pfile->a_buff;
1837       pfile->a_buff = buff;
1838       result = buff->cur;
1839     }
1840
1841   buff->cur = result + len;
1842   return result;
1843 }