gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, U s           },
  47 #define TK(e, s) { s,              U STRINGX (e) },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  56 static int skip_line_comment PARAMS ((cpp_reader *));
  57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
  60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
  61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
  62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  63                                   cppchar_t));
  64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  65                                     unsigned int, enum cpp_ttype));
  66 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  67 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
  68 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  69
  70 static unsigned int hex_digit_value PARAMS ((unsigned int));
  71 static _cpp_buff *new_buff PARAMS ((size_t));
  72
  73
  74 /* Utility routine:
  75
  76    Compares, the token TOKEN to the NUL-terminated string STRING.
  77    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  78 int
  79 cpp_ideq (token, string)
  80      const cpp_token *token;
  81      const char *string;
  82 {
  83   if (token->type != CPP_NAME)
  84     return 0;
  85
  86   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  87 }
  88
  89 /* Record a note TYPE at byte POS into the current cleaned logical
  90    line.  */
  91 static void
  92 add_line_note (buffer, pos, type)
  93      cpp_buffer *buffer;
  94      const uchar *pos;
  95      unsigned int type;
  96 {
  97   if (buffer->notes_used == buffer->notes_cap)
  98     {
  99       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 100       buffer->notes = (_cpp_line_note *)
 101         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 102     }
 103
 104   buffer->notes[buffer->notes_used].pos = pos;
 105   buffer->notes[buffer->notes_used].type = type;
 106   buffer->notes_used++;
 107 }
 108
 109 /* Returns with a logical line that contains no escaped newlines or
 110    trigraphs.  This is a time-critical inner loop.  */
 111 void
 112 _cpp_clean_line (pfile)
 113      cpp_reader *pfile;
 114 {
 115   cpp_buffer *buffer;
 116   const uchar *s;
 117   uchar c, *d, *p;
 118
 119   buffer = pfile->buffer;
 120   buffer->cur_note = buffer->notes_used = 0;
 121   buffer->cur = buffer->line_base = buffer->next_line;
 122   buffer->need_line = false;
 123   s = buffer->next_line - 1;
 124
 125   if (!buffer->from_stage3)
 126     {
 127       d = (uchar *) s;
 128
 129       for (;;)
 130         {
 131           c = *++s;
 132           *++d = c;
 133
 134           if (c == '\n' || c == '\r')
 135             {
 136                   /* Handle DOS line endings.  */
 137               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 138                 s++;
 139               if (s == buffer->rlimit)
 140                 break;
 141
 142               /* Escaped?  */
 143               p = d;
 144               while (p != buffer->next_line && is_nvspace (p[-1]))
 145                 p--;
 146               if (p == buffer->next_line || p[-1] != '\\')
 147                 break;
 148
 149               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 150               d = p - 2;
 151               buffer->next_line = p - 1;
 152             }
 153           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 154             {
 155               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 156               add_line_note (buffer, d, s[2]);
 157               if (CPP_OPTION (pfile, trigraphs))
 158                 {
 159                   *d = _cpp_trigraph_map[s[2]];
 160                   s += 2;
 161                 }
 162             }
 163         }
 164     }
 165   else
 166     {
 167       do
 168         s++;
 169       while (*s != '\n' && *s != '\r');
 170       d = (uchar *) s;
 171
 172       /* Handle DOS line endings.  */
 173       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 174         s++;
 175     }
 176
 177   *d = '\n';
 178   /* A sentinel note that should never be processed.  */
 179   add_line_note (buffer, d + 1, '\n');
 180   buffer->next_line = s + 1;
 181 }
 182
 183 /* Process the notes created by add_line_note as far as the current
 184    location.  */
 185 void
 186 _cpp_process_line_notes (pfile, in_comment)
 187      cpp_reader *pfile;
 188      int in_comment;
 189 {
 190   cpp_buffer *buffer = pfile->buffer;
 191
 192   for (;;)
 193     {
 194       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 195       unsigned int col;
 196
 197       if (note->pos > buffer->cur)
 198         break;
 199
 200       buffer->cur_note++;
 201       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 202
 203       if (note->type == '\\' || note->type == ' ')
 204         {
 205           if (note->type == ' ' && !in_comment)
 206             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 207                                  "backslash and newline separated by space");
 208
 209           if (buffer->next_line > buffer->rlimit)
 210             {
 211               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 212                                    "backslash-newline at end of file");
 213               /* Prevent "no newline at end of file" warning.  */
 214               buffer->next_line = buffer->rlimit;
 215             }
 216
 217           buffer->line_base = note->pos;
 218           pfile->line++;
 219         }
 220       else if (_cpp_trigraph_map[note->type])
 221         {
 222           if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
 223             {
 224               if (CPP_OPTION (pfile, trigraphs))
 225                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 226                                      "trigraph ??%c converted to %c",
 227                                      note->type,
 228                                      (int) _cpp_trigraph_map[note->type]);
 229               else
 230                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 231                                      "trigraph ??%c ignored",
 232                                      note->type);
 233             }
 234         }
 235       else
 236         abort ();
 237     }
 238 }
 239
 240 /* Skip a C-style block comment.  We find the end of the comment by
 241    seeing if an asterisk is before every '/' we encounter.  Returns
 242    nonzero if comment terminated by EOF, zero otherwise.
 243
 244    Buffer->cur points to the initial asterisk of the comment.  */
 245 bool
 246 _cpp_skip_block_comment (pfile)
 247      cpp_reader *pfile;
 248 {
 249   cpp_buffer *buffer = pfile->buffer;
 250   cppchar_t c;
 251
 252   buffer->cur++;
 253   if (*buffer->cur == '/')
 254     buffer->cur++;
 255
 256   for (;;)
 257     {
 258       c = *buffer->cur++;
 259
 260       /* People like decorating comments with '*', so check for '/'
 261          instead for efficiency.  */
 262       if (c == '/')
 263         {
 264           if (buffer->cur[-2] == '*')
 265             break;
 266
 267           /* Warn about potential nested comments, but not if the '/'
 268              comes immediately before the true comment delimiter.
 269              Don't bother to get it right across escaped newlines.  */
 270           if (CPP_OPTION (pfile, warn_comments)
 271               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 272             cpp_error_with_line (pfile, DL_WARNING,
 273                                  pfile->line, CPP_BUF_COL (buffer),
 274                                  "\"/*\" within comment");
 275         }
 276       else if (c == '\n')
 277         {
 278           buffer->cur--;
 279           _cpp_process_line_notes (pfile, true);
 280           if (buffer->next_line >= buffer->rlimit)
 281             return true;
 282           _cpp_clean_line (pfile);
 283           pfile->line++;
 284         }
 285     }
 286
 287   return false;
 288 }
 289
 290 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 291    terminating newline.  Handles escaped newlines.  Returns nonzero
 292    if a multiline comment.  */
 293 static int
 294 skip_line_comment (pfile)
 295      cpp_reader *pfile;
 296 {
 297   cpp_buffer *buffer = pfile->buffer;
 298   unsigned int orig_line = pfile->line;
 299
 300   while (*buffer->cur != '\n')
 301     buffer->cur++;
 302
 303   _cpp_process_line_notes (pfile, true);
 304   return orig_line != pfile->line;
 305 }
 306
 307 /* Skips whitespace, saving the next non-whitespace character.  */
 308 static void
 309 skip_whitespace (pfile, c)
 310      cpp_reader *pfile;
 311      cppchar_t c;
 312 {
 313   cpp_buffer *buffer = pfile->buffer;
 314   bool saw_NUL = false;
 315
 316   do
 317     {
 318       /* Horizontal space always OK.  */
 319       if (c == ' ' || c == '\t')
 320         ;
 321       /* Just \f \v or \0 left.  */
 322       else if (c == '\0')
 323         saw_NUL = true;
 324       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 325         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 326                              CPP_BUF_COL (buffer),
 327                              "%s in preprocessing directive",
 328                              c == '\f' ? "form feed" : "vertical tab");
 329
 330       c = *buffer->cur++;
 331     }
 332   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 333   while (is_nvspace (c));
 334
 335   if (saw_NUL)
 336     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 337
 338   buffer->cur--;
 339 }
 340
 341 /* See if the characters of a number token are valid in a name (no
 342    '.', '+' or '-').  */
 343 static int
 344 name_p (pfile, string)
 345      cpp_reader *pfile;
 346      const cpp_string *string;
 347 {
 348   unsigned int i;
 349
 350   for (i = 0; i < string->len; i++)
 351     if (!is_idchar (string->text[i]))
 352       return 0;
 353
 354   return 1;
 355 }
 356
 357 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 358    an identifier.  FIRST is TRUE if this starts an identifier.  */
 359 static bool
 360 forms_identifier_p (pfile, first)
 361      cpp_reader *pfile;
 362      int first;
 363 {
 364   cpp_buffer *buffer = pfile->buffer;
 365
 366   if (*buffer->cur == '$')
 367     {
 368       if (!CPP_OPTION (pfile, dollars_in_ident))
 369         return false;
 370
 371       buffer->cur++;
 372       if (CPP_PEDANTIC (pfile)
 373           && !pfile->state.skipping
 374           && !pfile->warned_dollar)
 375         {
 376           pfile->warned_dollar = true;
 377           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 378         }
 379
 380       return true;
 381     }
 382
 383   /* Is this a syntactically valid UCN?  */
 384   if (0 && *buffer->cur == '\\'
 385       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 386     {
 387       buffer->cur += 2;
 388       if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
 389         return true;
 390       buffer->cur -= 2;
 391     }
 392
 393   return false;
 394 }
 395
 396 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 397 static cpp_hashnode *
 398 lex_identifier (pfile, base)
 399      cpp_reader *pfile;
 400      const uchar *base;
 401 {
 402   cpp_hashnode *result;
 403   const uchar *cur;
 404
 405   do
 406     {
 407       cur = pfile->buffer->cur;
 408
 409       /* N.B. ISIDNUM does not include $.  */
 410       while (ISIDNUM (*cur))
 411         cur++;
 412
 413       pfile->buffer->cur = cur;
 414     }
 415   while (forms_identifier_p (pfile, false));
 416
 417   result = (cpp_hashnode *)
 418     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 419
 420   /* Rarely, identifiers require diagnostics when lexed.  */
 421   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 422                         && !pfile->state.skipping, 0))
 423     {
 424       /* It is allowed to poison the same identifier twice.  */
 425       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 426         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 427                    NODE_NAME (result));
 428
 429       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 430          replacement list of a variadic macro.  */
 431       if (result == pfile->spec_nodes.n__VA_ARGS__
 432           && !pfile->state.va_args_ok)
 433         cpp_error (pfile, DL_PEDWARN,
 434         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 435     }
 436
 437   return result;
 438 }
 439
 440 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 441 static void
 442 lex_number (pfile, number)
 443      cpp_reader *pfile;
 444      cpp_string *number;
 445 {
 446   const uchar *cur;
 447   const uchar *base;
 448   uchar *dest;
 449
 450   base = pfile->buffer->cur - 1;
 451   do
 452     {
 453       cur = pfile->buffer->cur;
 454
 455       /* N.B. ISIDNUM does not include $.  */
 456       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 457         cur++;
 458
 459       pfile->buffer->cur = cur;
 460     }
 461   while (forms_identifier_p (pfile, false));
 462
 463   number->len = cur - base;
 464   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 465   memcpy (dest, base, number->len);
 466   dest[number->len] = '\0';
 467   number->text = dest;
 468 }
 469
 470 /* Create a token of type TYPE with a literal spelling.  */
 471 static void
 472 create_literal (pfile, token, base, len, type)
 473      cpp_reader *pfile;
 474      cpp_token *token;
 475      const uchar *base;
 476      unsigned int len;
 477      enum cpp_ttype type;
 478 {
 479   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 480
 481   memcpy (dest, base, len);
 482   dest[len] = '\0';
 483   token->type = type;
 484   token->val.str.len = len;
 485   token->val.str.text = dest;
 486 }
 487
 488 /* Lexes a string, character constant, or angle-bracketed header file
 489    name.  The stored string contains the spelling, including opening
 490    quote and leading any leading 'L'.  It returns the type of the
 491    literal, or CPP_OTHER if it was not properly terminated.
 492
 493    The spelling is NUL-terminated, but it is not guaranteed that this
 494    is the first NUL since embedded NULs are preserved.  */
 495 static void
 496 lex_string (pfile, token, base)
 497      cpp_reader *pfile;
 498      cpp_token *token;
 499      const uchar *base;
 500 {
 501   bool saw_NUL = false;
 502   const uchar *cur;
 503   cppchar_t terminator;
 504   enum cpp_ttype type;
 505
 506   cur = base;
 507   terminator = *cur++;
 508   if (terminator == 'L')
 509     terminator = *cur++;
 510   if (terminator == '\"')
 511     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 512   else if (terminator == '\'')
 513     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 514   else
 515     terminator = '>', type = CPP_HEADER_NAME;
 516
 517   for (;;)
 518     {
 519       cppchar_t c = *cur++;
 520
 521       /* In #include-style directives, terminators are not escapable.  */
 522       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 523         cur++;
 524       else if (c == terminator)
 525         break;
 526       else if (c == '\n')
 527         {
 528           cur--;
 529           type = CPP_OTHER;
 530           break;
 531         }
 532       else if (c == '\0')
 533         saw_NUL = true;
 534     }
 535
 536   if (saw_NUL && !pfile->state.skipping)
 537     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 538
 539   pfile->buffer->cur = cur;
 540   create_literal (pfile, token, base, cur - base, type);
 541 }
 542
 543 /* The stored comment includes the comment start and any terminator.  */
 544 static void
 545 save_comment (pfile, token, from, type)
 546      cpp_reader *pfile;
 547      cpp_token *token;
 548      const unsigned char *from;
 549      cppchar_t type;
 550 {
 551   unsigned char *buffer;
 552   unsigned int len, clen;
 553
 554   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 555
 556   /* C++ comments probably (not definitely) have moved past a new
 557      line, which we don't want to save in the comment.  */
 558   if (is_vspace (pfile->buffer->cur[-1]))
 559     len--;
 560
 561   /* If we are currently in a directive, then we need to store all
 562      C++ comments as C comments internally, and so we need to
 563      allocate a little extra space in that case.
 564
 565      Note that the only time we encounter a directive here is
 566      when we are saving comments in a "#define".  */
 567   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 568
 569   buffer = _cpp_unaligned_alloc (pfile, clen);
 570
 571   token->type = CPP_COMMENT;
 572   token->val.str.len = clen;
 573   token->val.str.text = buffer;
 574
 575   buffer[0] = '/';
 576   memcpy (buffer + 1, from, len - 1);
 577
 578   /* Finish conversion to a C comment, if necessary.  */
 579   if (pfile->state.in_directive && type == '/')
 580     {
 581       buffer[1] = '*';
 582       buffer[clen - 2] = '*';
 583       buffer[clen - 1] = '/';
 584     }
 585 }
 586
 587 /* Allocate COUNT tokens for RUN.  */
 588 void
 589 _cpp_init_tokenrun (run, count)
 590      tokenrun *run;
 591      unsigned int count;
 592 {
 593   run->base = xnewvec (cpp_token, count);
 594   run->limit = run->base + count;
 595   run->next = NULL;
 596 }
 597
 598 /* Returns the next tokenrun, or creates one if there is none.  */
 599 static tokenrun *
 600 next_tokenrun (run)
 601      tokenrun *run;
 602 {
 603   if (run->next == NULL)
 604     {
 605       run->next = xnew (tokenrun);
 606       run->next->prev = run;
 607       _cpp_init_tokenrun (run->next, 250);
 608     }
 609
 610   return run->next;
 611 }
 612
 613 /* Allocate a single token that is invalidated at the same time as the
 614    rest of the tokens on the line.  Has its line and col set to the
 615    same as the last lexed token, so that diagnostics appear in the
 616    right place.  */
 617 cpp_token *
 618 _cpp_temp_token (pfile)
 619      cpp_reader *pfile;
 620 {
 621   cpp_token *old, *result;
 622
 623   old = pfile->cur_token - 1;
 624   if (pfile->cur_token == pfile->cur_run->limit)
 625     {
 626       pfile->cur_run = next_tokenrun (pfile->cur_run);
 627       pfile->cur_token = pfile->cur_run->base;
 628     }
 629
 630   result = pfile->cur_token++;
 631   result->line = old->line;
 632   result->col = old->col;
 633   return result;
 634 }
 635
 636 /* Lex a token into RESULT (external interface).  Takes care of issues
 637    like directive handling, token lookahead, multiple include
 638    optimization and skipping.  */
 639 const cpp_token *
 640 _cpp_lex_token (pfile)
 641      cpp_reader *pfile;
 642 {
 643   cpp_token *result;
 644
 645   for (;;)
 646     {
 647       if (pfile->cur_token == pfile->cur_run->limit)
 648         {
 649           pfile->cur_run = next_tokenrun (pfile->cur_run);
 650           pfile->cur_token = pfile->cur_run->base;
 651         }
 652
 653       if (pfile->lookaheads)
 654         {
 655           pfile->lookaheads--;
 656           result = pfile->cur_token++;
 657         }
 658       else
 659         result = _cpp_lex_direct (pfile);
 660
 661       if (result->flags & BOL)
 662         {
 663           /* Is this a directive.  If _cpp_handle_directive returns
 664              false, it is an assembler #.  */
 665           if (result->type == CPP_HASH
 666               /* 6.10.3 p 11: Directives in a list of macro arguments
 667                  gives undefined behavior.  This implementation
 668                  handles the directive as normal.  */
 669               && pfile->state.parsing_args != 1
 670               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 671             continue;
 672           if (pfile->cb.line_change && !pfile->state.skipping)
 673             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 674         }
 675
 676       /* We don't skip tokens in directives.  */
 677       if (pfile->state.in_directive)
 678         break;
 679
 680       /* Outside a directive, invalidate controlling macros.  At file
 681          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 682          get here and MI optimisation works.  */
 683       pfile->mi_valid = false;
 684
 685       if (!pfile->state.skipping || result->type == CPP_EOF)
 686         break;
 687     }
 688
 689   return result;
 690 }
 691
 692 /* Returns true if a fresh line has been loaded.  */
 693 bool
 694 _cpp_get_fresh_line (pfile)
 695      cpp_reader *pfile;
 696 {
 697   /* We can't get a new line until we leave the current directive.  */
 698   if (pfile->state.in_directive)
 699     return false;
 700
 701   for (;;)
 702     {
 703       cpp_buffer *buffer = pfile->buffer;
 704
 705       if (!buffer->need_line)
 706         return true;
 707
 708       if (buffer->next_line < buffer->rlimit)
 709         {
 710           _cpp_clean_line (pfile);
 711           return true;
 712         }
 713
 714       /* First, get out of parsing arguments state.  */
 715       if (pfile->state.parsing_args)
 716         return false;
 717
 718       /* End of buffer.  Non-empty files should end in a newline.  */
 719       if (buffer->buf != buffer->rlimit
 720           && buffer->next_line > buffer->rlimit
 721           && !buffer->from_stage3)
 722         {
 723           /* Only warn once.  */
 724           buffer->next_line = buffer->rlimit;
 725           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 726                                CPP_BUF_COLUMN (buffer, buffer->cur),
 727                                "no newline at end of file");
 728         }
 729
 730       if (!buffer->prev)
 731         return false;
 732
 733       if (buffer->return_at_eof)
 734         {
 735           _cpp_pop_buffer (pfile);
 736           return false;
 737         }
 738
 739       _cpp_pop_buffer (pfile);
 740     }
 741 }
 742
 743 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 744   do                                                    \
 745     {                                                   \
 746       result->type = ELSE_TYPE;                         \
 747       if (*buffer->cur == CHAR)                         \
 748         buffer->cur++, result->type = THEN_TYPE;        \
 749     }                                                   \
 750   while (0)
 751
 752 /* Lex a token into pfile->cur_token, which is also incremented, to
 753    get diagnostics pointing to the correct location.
 754
 755    Does not handle issues such as token lookahead, multiple-include
 756    optimisation, directives, skipping etc.  This function is only
 757    suitable for use by _cpp_lex_token, and in special cases like
 758    lex_expansion_token which doesn't care for any of these issues.
 759
 760    When meeting a newline, returns CPP_EOF if parsing a directive,
 761    otherwise returns to the start of the token buffer if permissible.
 762    Returns the location of the lexed token.  */
 763 cpp_token *
 764 _cpp_lex_direct (pfile)
 765      cpp_reader *pfile;
 766 {
 767   cppchar_t c;
 768   cpp_buffer *buffer;
 769   const unsigned char *comment_start;
 770   cpp_token *result = pfile->cur_token++;
 771
 772  fresh_line:
 773   result->flags = 0;
 774   if (pfile->buffer->need_line)
 775     {
 776       if (!_cpp_get_fresh_line (pfile))
 777         {
 778           result->type = CPP_EOF;
 779           return result;
 780         }
 781       if (!pfile->keep_tokens)
 782         {
 783           pfile->cur_run = &pfile->base_run;
 784           result = pfile->base_run.base;
 785           pfile->cur_token = result + 1;
 786         }
 787       result->flags = BOL;
 788       if (pfile->state.parsing_args == 2)
 789         result->flags |= PREV_WHITE;
 790     }
 791   buffer = pfile->buffer;
 792  update_tokens_line:
 793   result->line = pfile->line;
 794
 795  skipped_white:
 796   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 797       && !pfile->overlaid_buffer)
 798     {
 799       _cpp_process_line_notes (pfile, false);
 800       result->line = pfile->line;
 801     }
 802   c = *buffer->cur++;
 803   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 804
 805   switch (c)
 806     {
 807     case ' ': case '\t': case '\f': case '\v': case '\0':
 808       result->flags |= PREV_WHITE;
 809       skip_whitespace (pfile, c);
 810       goto skipped_white;
 811
 812     case '\n':
 813       pfile->line++;
 814       buffer->need_line = true;
 815       goto fresh_line;
 816
 817     case '0': case '1': case '2': case '3': case '4':
 818     case '5': case '6': case '7': case '8': case '9':
 819       result->type = CPP_NUMBER;
 820       lex_number (pfile, &result->val.str);
 821       break;
 822
 823     case 'L':
 824       /* 'L' may introduce wide characters or strings.  */
 825       if (*buffer->cur == '\'' || *buffer->cur == '"')
 826         {
 827           lex_string (pfile, result, buffer->cur - 1);
 828           break;
 829         }
 830       /* Fall through.  */
 831
 832     case '_':
 833     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 834     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 835     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 836     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 837     case 'y': case 'z':
 838     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 839     case 'G': case 'H': case 'I': case 'J': case 'K':
 840     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 841     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 842     case 'Y': case 'Z':
 843       result->type = CPP_NAME;
 844       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 845
 846       /* Convert named operators to their proper types.  */
 847       if (result->val.node->flags & NODE_OPERATOR)
 848         {
 849           result->flags |= NAMED_OP;
 850           result->type = result->val.node->directive_index;
 851         }
 852       break;
 853
 854     case '\'':
 855     case '"':
 856       lex_string (pfile, result, buffer->cur - 1);
 857       break;
 858
 859     case '/':
 860       /* A potential block or line comment.  */
 861       comment_start = buffer->cur;
 862       c = *buffer->cur;
 863
 864       if (c == '*')
 865         {
 866           if (_cpp_skip_block_comment (pfile))
 867             cpp_error (pfile, DL_ERROR, "unterminated comment");
 868         }
 869       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 870                             || CPP_IN_SYSTEM_HEADER (pfile)))
 871         {
 872           /* Warn about comments only if pedantically GNUC89, and not
 873              in system headers.  */
 874           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 875               && ! buffer->warned_cplusplus_comments)
 876             {
 877               cpp_error (pfile, DL_PEDWARN,
 878                          "C++ style comments are not allowed in ISO C90");
 879               cpp_error (pfile, DL_PEDWARN,
 880                          "(this will be reported only once per input file)");
 881               buffer->warned_cplusplus_comments = 1;
 882             }
 883
 884           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 885             cpp_error (pfile, DL_WARNING, "multi-line comment");
 886         }
 887       else if (c == '=')
 888         {
 889           buffer->cur++;
 890           result->type = CPP_DIV_EQ;
 891           break;
 892         }
 893       else
 894         {
 895           result->type = CPP_DIV;
 896           break;
 897         }
 898
 899       if (!pfile->state.save_comments)
 900         {
 901           result->flags |= PREV_WHITE;
 902           goto update_tokens_line;
 903         }
 904
 905       /* Save the comment as a token in its own right.  */
 906       save_comment (pfile, result, comment_start, c);
 907       break;
 908
 909     case '<':
 910       if (pfile->state.angled_headers)
 911         {
 912           lex_string (pfile, result, buffer->cur - 1);
 913           break;
 914         }
 915
 916       result->type = CPP_LESS;
 917       if (*buffer->cur == '=')
 918         buffer->cur++, result->type = CPP_LESS_EQ;
 919       else if (*buffer->cur == '<')
 920         {
 921           buffer->cur++;
 922           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 923         }
 924       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 925         {
 926           buffer->cur++;
 927           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 928         }
 929       else if (CPP_OPTION (pfile, digraphs))
 930         {
 931           if (*buffer->cur == ':')
 932             {
 933               buffer->cur++;
 934               result->flags |= DIGRAPH;
 935               result->type = CPP_OPEN_SQUARE;
 936             }
 937           else if (*buffer->cur == '%')
 938             {
 939               buffer->cur++;
 940               result->flags |= DIGRAPH;
 941               result->type = CPP_OPEN_BRACE;
 942             }
 943         }
 944       break;
 945
 946     case '>':
 947       result->type = CPP_GREATER;
 948       if (*buffer->cur == '=')
 949         buffer->cur++, result->type = CPP_GREATER_EQ;
 950       else if (*buffer->cur == '>')
 951         {
 952           buffer->cur++;
 953           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
 954         }
 955       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 956         {
 957           buffer->cur++;
 958           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
 959         }
 960       break;
 961
 962     case '%':
 963       result->type = CPP_MOD;
 964       if (*buffer->cur == '=')
 965         buffer->cur++, result->type = CPP_MOD_EQ;
 966       else if (CPP_OPTION (pfile, digraphs))
 967         {
 968           if (*buffer->cur == ':')
 969             {
 970               buffer->cur++;
 971               result->flags |= DIGRAPH;
 972               result->type = CPP_HASH;
 973               if (*buffer->cur == '%' && buffer->cur[1] == ':')
 974                 buffer->cur += 2, result->type = CPP_PASTE;
 975             }
 976           else if (*buffer->cur == '>')
 977             {
 978               buffer->cur++;
 979               result->flags |= DIGRAPH;
 980               result->type = CPP_CLOSE_BRACE;
 981             }
 982         }
 983       break;
 984
 985     case '.':
 986       result->type = CPP_DOT;
 987       if (ISDIGIT (*buffer->cur))
 988         {
 989           result->type = CPP_NUMBER;
 990           lex_number (pfile, &result->val.str);
 991         }
 992       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
 993         buffer->cur += 2, result->type = CPP_ELLIPSIS;
 994       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
 995         buffer->cur++, result->type = CPP_DOT_STAR;
 996       break;
 997
 998     case '+':
 999       result->type = CPP_PLUS;
1000       if (*buffer->cur == '+')
1001         buffer->cur++, result->type = CPP_PLUS_PLUS;
1002       else if (*buffer->cur == '=')
1003         buffer->cur++, result->type = CPP_PLUS_EQ;
1004       break;
1005
1006     case '-':
1007       result->type = CPP_MINUS;
1008       if (*buffer->cur == '>')
1009         {
1010           buffer->cur++;
1011           result->type = CPP_DEREF;
1012           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1013             buffer->cur++, result->type = CPP_DEREF_STAR;
1014         }
1015       else if (*buffer->cur == '-')
1016         buffer->cur++, result->type = CPP_MINUS_MINUS;
1017       else if (*buffer->cur == '=')
1018         buffer->cur++, result->type = CPP_MINUS_EQ;
1019       break;
1020
1021     case '&':
1022       result->type = CPP_AND;
1023       if (*buffer->cur == '&')
1024         buffer->cur++, result->type = CPP_AND_AND;
1025       else if (*buffer->cur == '=')
1026         buffer->cur++, result->type = CPP_AND_EQ;
1027       break;
1028
1029     case '|':
1030       result->type = CPP_OR;
1031       if (*buffer->cur == '|')
1032         buffer->cur++, result->type = CPP_OR_OR;
1033       else if (*buffer->cur == '=')
1034         buffer->cur++, result->type = CPP_OR_EQ;
1035       break;
1036
1037     case ':':
1038       result->type = CPP_COLON;
1039       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1040         buffer->cur++, result->type = CPP_SCOPE;
1041       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1042         {
1043           buffer->cur++;
1044           result->flags |= DIGRAPH;
1045           result->type = CPP_CLOSE_SQUARE;
1046         }
1047       break;
1048
1049     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1050     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1051     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1052     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1053     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1054
1055     case '?': result->type = CPP_QUERY; break;
1056     case '~': result->type = CPP_COMPL; break;
1057     case ',': result->type = CPP_COMMA; break;
1058     case '(': result->type = CPP_OPEN_PAREN; break;
1059     case ')': result->type = CPP_CLOSE_PAREN; break;
1060     case '[': result->type = CPP_OPEN_SQUARE; break;
1061     case ']': result->type = CPP_CLOSE_SQUARE; break;
1062     case '{': result->type = CPP_OPEN_BRACE; break;
1063     case '}': result->type = CPP_CLOSE_BRACE; break;
1064     case ';': result->type = CPP_SEMICOLON; break;
1065
1066       /* @ is a punctuator in Objective-C.  */
1067     case '@': result->type = CPP_ATSIGN; break;
1068
1069     case '$':
1070     case '\\':
1071       {
1072         const uchar *base = --buffer->cur;
1073
1074         if (forms_identifier_p (pfile, true))
1075           {
1076             result->type = CPP_NAME;
1077             result->val.node = lex_identifier (pfile, base);
1078             break;
1079           }
1080         buffer->cur++;
1081       }
1082
1083     default:
1084       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1085       break;
1086     }
1087
1088   return result;
1089 }
1090
1091 /* An upper bound on the number of bytes needed to spell TOKEN.
1092    Does not include preceding whitespace.  */
1093 unsigned int
1094 cpp_token_len (token)
1095      const cpp_token *token;
1096 {
1097   unsigned int len;
1098
1099   switch (TOKEN_SPELL (token))
1100     {
1101     default:            len = 4;                                break;
1102     case SPELL_LITERAL: len = token->val.str.len;               break;
1103     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1104     }
1105
1106   return len;
1107 }
1108
1109 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1110    already contain the enough space to hold the token's spelling.
1111    Returns a pointer to the character after the last character
1112    written.  */
1113 unsigned char *
1114 cpp_spell_token (pfile, token, buffer)
1115      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1116      const cpp_token *token;
1117      unsigned char *buffer;
1118 {
1119   switch (TOKEN_SPELL (token))
1120     {
1121     case SPELL_OPERATOR:
1122       {
1123         const unsigned char *spelling;
1124         unsigned char c;
1125
1126         if (token->flags & DIGRAPH)
1127           spelling
1128             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1129         else if (token->flags & NAMED_OP)
1130           goto spell_ident;
1131         else
1132           spelling = TOKEN_NAME (token);
1133
1134         while ((c = *spelling++) != '\0')
1135           *buffer++ = c;
1136       }
1137       break;
1138
1139     spell_ident:
1140     case SPELL_IDENT:
1141       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1142       buffer += NODE_LEN (token->val.node);
1143       break;
1144
1145     case SPELL_LITERAL:
1146       memcpy (buffer, token->val.str.text, token->val.str.len);
1147       buffer += token->val.str.len;
1148       break;
1149
1150     case SPELL_NONE:
1151       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1152       break;
1153     }
1154
1155   return buffer;
1156 }
1157
1158 /* Returns TOKEN spelt as a null-terminated string.  The string is
1159    freed when the reader is destroyed.  Useful for diagnostics.  */
1160 unsigned char *
1161 cpp_token_as_text (pfile, token)
1162      cpp_reader *pfile;
1163      const cpp_token *token;
1164 {
1165   unsigned int len = cpp_token_len (token) + 1;
1166   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1167
1168   end = cpp_spell_token (pfile, token, start);
1169   end[0] = '\0';
1170
1171   return start;
1172 }
1173
1174 /* Used by C front ends, which really should move to using
1175    cpp_token_as_text.  */
1176 const char *
1177 cpp_type2name (type)
1178      enum cpp_ttype type;
1179 {
1180   return (const char *) token_spellings[type].name;
1181 }
1182
1183 /* Writes the spelling of token to FP, without any preceding space.
1184    Separated from cpp_spell_token for efficiency - to avoid stdio
1185    double-buffering.  */
1186 void
1187 cpp_output_token (token, fp)
1188      const cpp_token *token;
1189      FILE *fp;
1190 {
1191   switch (TOKEN_SPELL (token))
1192     {
1193     case SPELL_OPERATOR:
1194       {
1195         const unsigned char *spelling;
1196         int c;
1197
1198         if (token->flags & DIGRAPH)
1199           spelling
1200             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1201         else if (token->flags & NAMED_OP)
1202           goto spell_ident;
1203         else
1204           spelling = TOKEN_NAME (token);
1205
1206         c = *spelling;
1207         do
1208           putc (c, fp);
1209         while ((c = *++spelling) != '\0');
1210       }
1211       break;
1212
1213     spell_ident:
1214     case SPELL_IDENT:
1215       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1216     break;
1217
1218     case SPELL_LITERAL:
1219       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1220       break;
1221
1222     case SPELL_NONE:
1223       /* An error, most probably.  */
1224       break;
1225     }
1226 }
1227
1228 /* Compare two tokens.  */
1229 int
1230 _cpp_equiv_tokens (a, b)
1231      const cpp_token *a, *b;
1232 {
1233   if (a->type == b->type && a->flags == b->flags)
1234     switch (TOKEN_SPELL (a))
1235       {
1236       default:                  /* Keep compiler happy.  */
1237       case SPELL_OPERATOR:
1238         return 1;
1239       case SPELL_NONE:
1240         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1241       case SPELL_IDENT:
1242         return a->val.node == b->val.node;
1243       case SPELL_LITERAL:
1244         return (a->val.str.len == b->val.str.len
1245                 && !memcmp (a->val.str.text, b->val.str.text,
1246                             a->val.str.len));
1247       }
1248
1249   return 0;
1250 }
1251
1252 /* Returns nonzero if a space should be inserted to avoid an
1253    accidental token paste for output.  For simplicity, it is
1254    conservative, and occasionally advises a space where one is not
1255    needed, e.g. "." and ".2".  */
1256 int
1257 cpp_avoid_paste (pfile, token1, token2)
1258      cpp_reader *pfile;
1259      const cpp_token *token1, *token2;
1260 {
1261   enum cpp_ttype a = token1->type, b = token2->type;
1262   cppchar_t c;
1263
1264   if (token1->flags & NAMED_OP)
1265     a = CPP_NAME;
1266   if (token2->flags & NAMED_OP)
1267     b = CPP_NAME;
1268
1269   c = EOF;
1270   if (token2->flags & DIGRAPH)
1271     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1272   else if (token_spellings[b].category == SPELL_OPERATOR)
1273     c = token_spellings[b].name[0];
1274
1275   /* Quickly get everything that can paste with an '='.  */
1276   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1277     return 1;
1278
1279   switch (a)
1280     {
1281     case CPP_GREATER:   return c == '>' || c == '?';
1282     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1283     case CPP_PLUS:      return c == '+';
1284     case CPP_MINUS:     return c == '-' || c == '>';
1285     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1286     case CPP_MOD:       return c == ':' || c == '>';
1287     case CPP_AND:       return c == '&';
1288     case CPP_OR:        return c == '|';
1289     case CPP_COLON:     return c == ':' || c == '>';
1290     case CPP_DEREF:     return c == '*';
1291     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1292     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1293     case CPP_NAME:      return ((b == CPP_NUMBER
1294                                  && name_p (pfile, &token2->val.str))
1295                                 || b == CPP_NAME
1296                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1297     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1298                                 || c == '.' || c == '+' || c == '-');
1299                                       /* UCNs */
1300     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1301                                  && b == CPP_NAME)
1302                                 || (CPP_OPTION (pfile, objc)
1303                                     && token1->val.str.text[0] == '@'
1304                                     && (b == CPP_NAME || b == CPP_STRING)));
1305     default:            break;
1306     }
1307
1308   return 0;
1309 }
1310
1311 /* Output all the remaining tokens on the current line, and a newline
1312    character, to FP.  Leading whitespace is removed.  If there are
1313    macros, special token padding is not performed.  */
1314 void
1315 cpp_output_line (pfile, fp)
1316      cpp_reader *pfile;
1317      FILE *fp;
1318 {
1319   const cpp_token *token;
1320
1321   token = cpp_get_token (pfile);
1322   while (token->type != CPP_EOF)
1323     {
1324       cpp_output_token (token, fp);
1325       token = cpp_get_token (pfile);
1326       if (token->flags & PREV_WHITE)
1327         putc (' ', fp);
1328     }
1329
1330   putc ('\n', fp);
1331 }
1332
1333 /* Returns the value of a hexadecimal digit.  */
1334 static unsigned int
1335 hex_digit_value (c)
1336      unsigned int c;
1337 {
1338   if (hex_p (c))
1339     return hex_value (c);
1340   else
1341     abort ();
1342 }
1343
1344 /* Read a possible universal character name starting at *PSTR.  */
1345 static cppchar_t
1346 maybe_read_ucn (pfile, pstr)
1347      cpp_reader *pfile;
1348      const uchar **pstr;
1349 {
1350   cppchar_t result, c = (*pstr)[-1];
1351
1352   result = _cpp_valid_ucn (pfile, pstr, false);
1353   if (result)
1354     {
1355       if (CPP_WTRADITIONAL (pfile))
1356         cpp_error (pfile, DL_WARNING,
1357                    "the meaning of '\\%c' is different in traditional C",
1358                    (int) c);
1359
1360       if (CPP_OPTION (pfile, EBCDIC))
1361         {
1362           cpp_error (pfile, DL_ERROR,
1363                      "universal character with an EBCDIC target");
1364           result = 0x3f;  /* EBCDIC invalid character */
1365         }
1366     }
1367
1368   return result;
1369 }
1370
1371 /* Returns the value of an escape sequence, truncated to the correct
1372    target precision.  PSTR points to the input pointer, which is just
1373    after the backslash.  LIMIT is how much text we have.  WIDE is true
1374    if the escape sequence is part of a wide character constant or
1375    string literal.  Handles all relevant diagnostics.  */
1376 cppchar_t
1377 cpp_parse_escape (pfile, pstr, limit, wide)
1378      cpp_reader *pfile;
1379      const unsigned char **pstr;
1380      const unsigned char *limit;
1381      int wide;
1382 {
1383   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1384   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1385   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1386
1387   int unknown = 0;
1388   const unsigned char *str = *pstr, *charconsts;
1389   cppchar_t c, ucn, mask;
1390   unsigned int width;
1391
1392   if (CPP_OPTION (pfile, EBCDIC))
1393     charconsts = ebcdic;
1394   else
1395     charconsts = ascii;
1396
1397   if (wide)
1398     width = CPP_OPTION (pfile, wchar_precision);
1399   else
1400     width = CPP_OPTION (pfile, char_precision);
1401   if (width < BITS_PER_CPPCHAR_T)
1402     mask = ((cppchar_t) 1 << width) - 1;
1403   else
1404     mask = ~0;
1405
1406   c = *str++;
1407   switch (c)
1408     {
1409     case '\\': case '\'': case '"': case '?': break;
1410     case 'b': c = charconsts[1];  break;
1411     case 'f': c = charconsts[3];  break;
1412     case 'n': c = charconsts[4];  break;
1413     case 'r': c = charconsts[5];  break;
1414     case 't': c = charconsts[6];  break;
1415     case 'v': c = charconsts[7];  break;
1416
1417     case '(': case '{': case '[': case '%':
1418       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1419          '\%' is used to prevent SCCS from getting confused.  */
1420       unknown = CPP_PEDANTIC (pfile);
1421       break;
1422
1423     case 'a':
1424       if (CPP_WTRADITIONAL (pfile))
1425         cpp_error (pfile, DL_WARNING,
1426                    "the meaning of '\\a' is different in traditional C");
1427       c = charconsts[0];
1428       break;
1429
1430     case 'e': case 'E':
1431       if (CPP_PEDANTIC (pfile))
1432         cpp_error (pfile, DL_PEDWARN,
1433                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1434       c = charconsts[2];
1435       break;
1436
1437     case 'u': case 'U':
1438       ucn = maybe_read_ucn (pfile, &str);
1439       if (ucn)
1440         c = ucn;
1441       else
1442         unknown = true;
1443       break;
1444
1445     case 'x':
1446       if (CPP_WTRADITIONAL (pfile))
1447         cpp_error (pfile, DL_WARNING,
1448                    "the meaning of '\\x' is different in traditional C");
1449
1450       {
1451         cppchar_t i = 0, overflow = 0;
1452         int digits_found = 0;
1453
1454         while (str < limit)
1455           {
1456             c = *str;
1457             if (! ISXDIGIT (c))
1458               break;
1459             str++;
1460             overflow |= i ^ (i << 4 >> 4);
1461             i = (i << 4) + hex_digit_value (c);
1462             digits_found = 1;
1463           }
1464
1465         if (!digits_found)
1466           cpp_error (pfile, DL_ERROR,
1467                        "\\x used with no following hex digits");
1468
1469         if (overflow | (i != (i & mask)))
1470           {
1471             cpp_error (pfile, DL_PEDWARN,
1472                        "hex escape sequence out of range");
1473             i &= mask;
1474           }
1475         c = i;
1476       }
1477       break;
1478
1479     case '0':  case '1':  case '2':  case '3':
1480     case '4':  case '5':  case '6':  case '7':
1481       {
1482         size_t count = 0;
1483         cppchar_t i = c - '0';
1484
1485         while (str < limit && ++count < 3)
1486           {
1487             c = *str;
1488             if (c < '0' || c > '7')
1489               break;
1490             str++;
1491             i = (i << 3) + c - '0';
1492           }
1493
1494         if (i != (i & mask))
1495           {
1496             cpp_error (pfile, DL_PEDWARN,
1497                        "octal escape sequence out of range");
1498             i &= mask;
1499           }
1500         c = i;
1501       }
1502       break;
1503
1504     default:
1505       unknown = 1;
1506       break;
1507     }
1508
1509   if (unknown)
1510     {
1511       if (ISGRAPH (c))
1512         cpp_error (pfile, DL_PEDWARN,
1513                    "unknown escape sequence '\\%c'", (int) c);
1514       else
1515         cpp_error (pfile, DL_PEDWARN,
1516                    "unknown escape sequence: '\\%03o'", (int) c);
1517     }
1518
1519   if (c > mask)
1520     {
1521       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1522       c &= mask;
1523     }
1524
1525   *pstr = str;
1526   return c;
1527 }
1528
1529 /* Interpret a (possibly wide) character constant in TOKEN.
1530    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1531    points to a variable that is filled in with the number of
1532    characters seen, and UNSIGNEDP to a variable that indicates whether
1533    the result has signed type.  */
1534 cppchar_t
1535 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1536      cpp_reader *pfile;
1537      const cpp_token *token;
1538      unsigned int *pchars_seen;
1539      int *unsignedp;
1540 {
1541   const unsigned char *str, *limit;
1542   unsigned int chars_seen = 0;
1543   size_t width, max_chars;
1544   cppchar_t c, mask, result = 0;
1545   bool unsigned_p;
1546
1547   str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1548   limit = token->val.str.text + token->val.str.len - 1;
1549
1550   if (token->type == CPP_CHAR)
1551     {
1552       width = CPP_OPTION (pfile, char_precision);
1553       max_chars = CPP_OPTION (pfile, int_precision) / width;
1554       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1555     }
1556   else
1557     {
1558       width = CPP_OPTION (pfile, wchar_precision);
1559       max_chars = 1;
1560       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1561     }
1562
1563   if (width < BITS_PER_CPPCHAR_T)
1564     mask = ((cppchar_t) 1 << width) - 1;
1565   else
1566     mask = ~0;
1567
1568   while (str < limit)
1569     {
1570       c = *str++;
1571
1572       if (c == '\\')
1573         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1574
1575 #ifdef MAP_CHARACTER
1576       if (ISPRINT (c))
1577         c = MAP_CHARACTER (c);
1578 #endif
1579
1580       chars_seen++;
1581
1582       /* Truncate the character, scale the result and merge the two.  */
1583       c &= mask;
1584       if (width < BITS_PER_CPPCHAR_T)
1585         result = (result << width) | c;
1586       else
1587         result = c;
1588     }
1589
1590   if (chars_seen == 0)
1591     cpp_error (pfile, DL_ERROR, "empty character constant");
1592   else if (chars_seen > 1)
1593     {
1594       /* Multichar charconsts are of type int and therefore signed.  */
1595       unsigned_p = 0;
1596
1597       if (chars_seen > max_chars)
1598         {
1599           chars_seen = max_chars;
1600           cpp_error (pfile, DL_WARNING,
1601                      "character constant too long for its type");
1602         }
1603       else if (CPP_OPTION (pfile, warn_multichar))
1604         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1605     }
1606
1607   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1608      in WIDTH bits, but for multi-char charconsts it's value is the
1609      full target type's width.  */
1610   if (chars_seen > 1)
1611     width *= max_chars;
1612   if (width < BITS_PER_CPPCHAR_T)
1613     {
1614       mask = ((cppchar_t) 1 << width) - 1;
1615       if (unsigned_p || !(result & (1 << (width - 1))))
1616         result &= mask;
1617       else
1618         result |= ~mask;
1619     }
1620
1621   *pchars_seen = chars_seen;
1622   *unsignedp = unsigned_p;
1623   return result;
1624 }
1625
1626 /* Memory buffers.  Changing these three constants can have a dramatic
1627    effect on performance.  The values here are reasonable defaults,
1628    but might be tuned.  If you adjust them, be sure to test across a
1629    range of uses of cpplib, including heavy nested function-like macro
1630    expansion.  Also check the change in peak memory usage (NJAMD is a
1631    good tool for this).  */
1632 #define MIN_BUFF_SIZE 8000
1633 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1634 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1635         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1636
1637 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1638   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1639 #endif
1640
1641 /* Create a new allocation buffer.  Place the control block at the end
1642    of the buffer, so that buffer overflows will cause immediate chaos.  */
1643 static _cpp_buff *
1644 new_buff (len)
1645      size_t len;
1646 {
1647   _cpp_buff *result;
1648   unsigned char *base;
1649
1650   if (len < MIN_BUFF_SIZE)
1651     len = MIN_BUFF_SIZE;
1652   len = CPP_ALIGN (len);
1653
1654   base = xmalloc (len + sizeof (_cpp_buff));
1655   result = (_cpp_buff *) (base + len);
1656   result->base = base;
1657   result->cur = base;
1658   result->limit = base + len;
1659   result->next = NULL;
1660   return result;
1661 }
1662
1663 /* Place a chain of unwanted allocation buffers on the free list.  */
1664 void
1665 _cpp_release_buff (pfile, buff)
1666      cpp_reader *pfile;
1667      _cpp_buff *buff;
1668 {
1669   _cpp_buff *end = buff;
1670
1671   while (end->next)
1672     end = end->next;
1673   end->next = pfile->free_buffs;
1674   pfile->free_buffs = buff;
1675 }
1676
1677 /* Return a free buffer of size at least MIN_SIZE.  */
1678 _cpp_buff *
1679 _cpp_get_buff (pfile, min_size)
1680      cpp_reader *pfile;
1681      size_t min_size;
1682 {
1683   _cpp_buff *result, **p;
1684
1685   for (p = &pfile->free_buffs;; p = &(*p)->next)
1686     {
1687       size_t size;
1688
1689       if (*p == NULL)
1690         return new_buff (min_size);
1691       result = *p;
1692       size = result->limit - result->base;
1693       /* Return a buffer that's big enough, but don't waste one that's
1694          way too big.  */
1695       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1696         break;
1697     }
1698
1699   *p = result->next;
1700   result->next = NULL;
1701   result->cur = result->base;
1702   return result;
1703 }
1704
1705 /* Creates a new buffer with enough space to hold the uncommitted
1706    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1707    the excess bytes to the new buffer.  Chains the new buffer after
1708    BUFF, and returns the new buffer.  */
1709 _cpp_buff *
1710 _cpp_append_extend_buff (pfile, buff, min_extra)
1711      cpp_reader *pfile;
1712      _cpp_buff *buff;
1713      size_t min_extra;
1714 {
1715   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1716   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1717
1718   buff->next = new_buff;
1719   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1720   return new_buff;
1721 }
1722
1723 /* Creates a new buffer with enough space to hold the uncommitted
1724    remaining bytes of the buffer pointed to by BUFF, and at least
1725    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1726    Chains the new buffer before the buffer pointed to by BUFF, and
1727    updates the pointer to point to the new buffer.  */
1728 void
1729 _cpp_extend_buff (pfile, pbuff, min_extra)
1730      cpp_reader *pfile;
1731      _cpp_buff **pbuff;
1732      size_t min_extra;
1733 {
1734   _cpp_buff *new_buff, *old_buff = *pbuff;
1735   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1736
1737   new_buff = _cpp_get_buff (pfile, size);
1738   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1739   new_buff->next = old_buff;
1740   *pbuff = new_buff;
1741 }
1742
1743 /* Free a chain of buffers starting at BUFF.  */
1744 void
1745 _cpp_free_buff (buff)
1746      _cpp_buff *buff;
1747 {
1748   _cpp_buff *next;
1749
1750   for (; buff; buff = next)
1751     {
1752       next = buff->next;
1753       free (buff->base);
1754     }
1755 }
1756
1757 /* Allocate permanent, unaligned storage of length LEN.  */
1758 unsigned char *
1759 _cpp_unaligned_alloc (pfile, len)
1760      cpp_reader *pfile;
1761      size_t len;
1762 {
1763   _cpp_buff *buff = pfile->u_buff;
1764   unsigned char *result = buff->cur;
1765
1766   if (len > (size_t) (buff->limit - result))
1767     {
1768       buff = _cpp_get_buff (pfile, len);
1769       buff->next = pfile->u_buff;
1770       pfile->u_buff = buff;
1771       result = buff->cur;
1772     }
1773
1774   buff->cur = result + len;
1775   return result;
1776 }
1777
1778 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1779    That buffer is used for growing allocations when saving macro
1780    replacement lists in a #define, and when parsing an answer to an
1781    assertion in #assert, #unassert or #if (and therefore possibly
1782    whilst expanding macros).  It therefore must not be used by any
1783    code that they might call: specifically the lexer and the guts of
1784    the macro expander.
1785
1786    All existing other uses clearly fit this restriction: storing
1787    registered pragmas during initialization.  */
1788 unsigned char *
1789 _cpp_aligned_alloc (pfile, len)
1790      cpp_reader *pfile;
1791      size_t len;
1792 {
1793   _cpp_buff *buff = pfile->a_buff;
1794   unsigned char *result = buff->cur;
1795
1796   if (len > (size_t) (buff->limit - result))
1797     {
1798       buff = _cpp_get_buff (pfile, len);
1799       buff->next = pfile->a_buff;
1800       pfile->a_buff = buff;
1801       result = buff->cur;
1802     }
1803
1804   buff->cur = result + len;
1805   return result;
1806 }