gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 enum spell_type
  30 {
  31   SPELL_OPERATOR = 0,
  32   SPELL_IDENT,
  33   SPELL_LITERAL,
  34   SPELL_NONE
  35 };
  36
  37 struct token_spelling
  38 {
  39   enum spell_type category;
  40   const unsigned char *name;
  41 };
  42
  43 static const unsigned char *const digraph_spellings[] =
  44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  45
  46 #define OP(e, s) { SPELL_OPERATOR, U s           },
  47 #define TK(e, s) { s,              U #e },
  48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  49 #undef OP
  50 #undef TK
  51
  52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  54
  55 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
  56 static int skip_line_comment (cpp_reader *);
  57 static void skip_whitespace (cpp_reader *, cppchar_t);
  58 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
  59 static void lex_number (cpp_reader *, cpp_string *);
  60 static bool forms_identifier_p (cpp_reader *, int);
  61 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
  62 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
  63 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
  64                             unsigned int, enum cpp_ttype);
  65 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
  66 static int name_p (cpp_reader *, const cpp_string *);
  67 static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
  68 static tokenrun *next_tokenrun (tokenrun *);
  69
  70 static unsigned int hex_digit_value (unsigned int);
  71 static _cpp_buff *new_buff (size_t);
  72
  73
  74 /* Utility routine:
  75
  76    Compares, the token TOKEN to the NUL-terminated string STRING.
  77    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  78 int
  79 cpp_ideq (const cpp_token *token, const char *string)
  80 {
  81   if (token->type != CPP_NAME)
  82     return 0;
  83
  84   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  85 }
  86
  87 /* Record a note TYPE at byte POS into the current cleaned logical
  88    line.  */
  89 static void
  90 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
  91 {
  92   if (buffer->notes_used == buffer->notes_cap)
  93     {
  94       buffer->notes_cap = buffer->notes_cap * 2 + 200;
  95       buffer->notes = (_cpp_line_note *)
  96         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
  97     }
  98
  99   buffer->notes[buffer->notes_used].pos = pos;
 100   buffer->notes[buffer->notes_used].type = type;
 101   buffer->notes_used++;
 102 }
 103
 104 /* Returns with a logical line that contains no escaped newlines or
 105    trigraphs.  This is a time-critical inner loop.  */
 106 void
 107 _cpp_clean_line (cpp_reader *pfile)
 108 {
 109   cpp_buffer *buffer;
 110   const uchar *s;
 111   uchar c, *d, *p;
 112
 113   buffer = pfile->buffer;
 114   buffer->cur_note = buffer->notes_used = 0;
 115   buffer->cur = buffer->line_base = buffer->next_line;
 116   buffer->need_line = false;
 117   s = buffer->next_line - 1;
 118
 119   if (!buffer->from_stage3)
 120     {
 121       d = (uchar *) s;
 122
 123       for (;;)
 124         {
 125           c = *++s;
 126           *++d = c;
 127
 128           if (c == '\n' || c == '\r')
 129             {
 130                   /* Handle DOS line endings.  */
 131               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 132                 s++;
 133               if (s == buffer->rlimit)
 134                 break;
 135
 136               /* Escaped?  */
 137               p = d;
 138               while (p != buffer->next_line && is_nvspace (p[-1]))
 139                 p--;
 140               if (p == buffer->next_line || p[-1] != '\\')
 141                 break;
 142
 143               add_line_note (buffer, p - 1, p != d ? ' ': '\\');
 144               d = p - 2;
 145               buffer->next_line = p - 1;
 146             }
 147           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 148             {
 149               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 150               add_line_note (buffer, d, s[2]);
 151               if (CPP_OPTION (pfile, trigraphs))
 152                 {
 153                   *d = _cpp_trigraph_map[s[2]];
 154                   s += 2;
 155                 }
 156             }
 157         }
 158     }
 159   else
 160     {
 161       do
 162         s++;
 163       while (*s != '\n' && *s != '\r');
 164       d = (uchar *) s;
 165
 166       /* Handle DOS line endings.  */
 167       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 168         s++;
 169     }
 170
 171   *d = '\n';
 172   /* A sentinel note that should never be processed.  */
 173   add_line_note (buffer, d + 1, '\n');
 174   buffer->next_line = s + 1;
 175 }
 176
 177 /* Return true if the trigraph indicated by NOTE should be warned
 178    about in a comment.  */
 179 static bool
 180 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
 181 {
 182   const uchar *p;
 183
 184   /* Within comments we don't warn about trigraphs, unless the
 185      trigraph forms an escaped newline, as that may change
 186      behavior.  */
 187   if (note->type != '/')
 188     return false;
 189
 190   /* If -trigraphs, then this was an escaped newline iff the next note
 191      is coincident.  */
 192   if (CPP_OPTION (pfile, trigraphs))
 193     return note[1].pos == note->pos;
 194
 195   /* Otherwise, see if this forms an escaped newline.  */
 196   p = note->pos + 3;
 197   while (is_nvspace (*p))
 198     p++;
 199
 200   /* There might have been escaped newlines between the trigraph and the
 201      newline we found.  Hence the position test.  */
 202   return (*p == '\n' && p < note[1].pos);
 203 }
 204
 205 /* Process the notes created by add_line_note as far as the current
 206    location.  */
 207 void
 208 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
 209 {
 210   cpp_buffer *buffer = pfile->buffer;
 211
 212   for (;;)
 213     {
 214       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 215       unsigned int col;
 216
 217       if (note->pos > buffer->cur)
 218         break;
 219
 220       buffer->cur_note++;
 221       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 222
 223       if (note->type == '\\' || note->type == ' ')
 224         {
 225           if (note->type == ' ' && !in_comment)
 226             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 227                                  "backslash and newline separated by space");
 228
 229           if (buffer->next_line > buffer->rlimit)
 230             {
 231               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 232                                    "backslash-newline at end of file");
 233               /* Prevent "no newline at end of file" warning.  */
 234               buffer->next_line = buffer->rlimit;
 235             }
 236
 237           buffer->line_base = note->pos;
 238           pfile->line++;
 239         }
 240       else if (_cpp_trigraph_map[note->type])
 241         {
 242           if (CPP_OPTION (pfile, warn_trigraphs)
 243               && (!in_comment || warn_in_comment (pfile, note)))
 244             {
 245               if (CPP_OPTION (pfile, trigraphs))
 246                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 247                                      "trigraph ??%c converted to %c",
 248                                      note->type,
 249                                      (int) _cpp_trigraph_map[note->type]);
 250               else
 251                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 252                                      "trigraph ??%c ignored",
 253                                      note->type);
 254             }
 255         }
 256       else
 257         abort ();
 258     }
 259 }
 260
 261 /* Skip a C-style block comment.  We find the end of the comment by
 262    seeing if an asterisk is before every '/' we encounter.  Returns
 263    nonzero if comment terminated by EOF, zero otherwise.
 264
 265    Buffer->cur points to the initial asterisk of the comment.  */
 266 bool
 267 _cpp_skip_block_comment (cpp_reader *pfile)
 268 {
 269   cpp_buffer *buffer = pfile->buffer;
 270   cppchar_t c;
 271
 272   buffer->cur++;
 273   if (*buffer->cur == '/')
 274     buffer->cur++;
 275
 276   for (;;)
 277     {
 278       c = *buffer->cur++;
 279
 280       /* People like decorating comments with '*', so check for '/'
 281          instead for efficiency.  */
 282       if (c == '/')
 283         {
 284           if (buffer->cur[-2] == '*')
 285             break;
 286
 287           /* Warn about potential nested comments, but not if the '/'
 288              comes immediately before the true comment delimiter.
 289              Don't bother to get it right across escaped newlines.  */
 290           if (CPP_OPTION (pfile, warn_comments)
 291               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 292             cpp_error_with_line (pfile, DL_WARNING,
 293                                  pfile->line, CPP_BUF_COL (buffer),
 294                                  "\"/*\" within comment");
 295         }
 296       else if (c == '\n')
 297         {
 298           buffer->cur--;
 299           _cpp_process_line_notes (pfile, true);
 300           if (buffer->next_line >= buffer->rlimit)
 301             return true;
 302           _cpp_clean_line (pfile);
 303           pfile->line++;
 304         }
 305     }
 306
 307   _cpp_process_line_notes (pfile, true);
 308   return false;
 309 }
 310
 311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 312    terminating newline.  Handles escaped newlines.  Returns nonzero
 313    if a multiline comment.  */
 314 static int
 315 skip_line_comment (cpp_reader *pfile)
 316 {
 317   cpp_buffer *buffer = pfile->buffer;
 318   unsigned int orig_line = pfile->line;
 319
 320   while (*buffer->cur != '\n')
 321     buffer->cur++;
 322
 323   _cpp_process_line_notes (pfile, true);
 324   return orig_line != pfile->line;
 325 }
 326
 327 /* Skips whitespace, saving the next non-whitespace character.  */
 328 static void
 329 skip_whitespace (cpp_reader *pfile, cppchar_t c)
 330 {
 331   cpp_buffer *buffer = pfile->buffer;
 332   bool saw_NUL = false;
 333
 334   do
 335     {
 336       /* Horizontal space always OK.  */
 337       if (c == ' ' || c == '\t')
 338         ;
 339       /* Just \f \v or \0 left.  */
 340       else if (c == '\0')
 341         saw_NUL = true;
 342       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 343         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 344                              CPP_BUF_COL (buffer),
 345                              "%s in preprocessing directive",
 346                              c == '\f' ? "form feed" : "vertical tab");
 347
 348       c = *buffer->cur++;
 349     }
 350   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 351   while (is_nvspace (c));
 352
 353   if (saw_NUL)
 354     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 355
 356   buffer->cur--;
 357 }
 358
 359 /* See if the characters of a number token are valid in a name (no
 360    '.', '+' or '-').  */
 361 static int
 362 name_p (cpp_reader *pfile, const cpp_string *string)
 363 {
 364   unsigned int i;
 365
 366   for (i = 0; i < string->len; i++)
 367     if (!is_idchar (string->text[i]))
 368       return 0;
 369
 370   return 1;
 371 }
 372
 373 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
 374    an identifier.  FIRST is TRUE if this starts an identifier.  */
 375 static bool
 376 forms_identifier_p (cpp_reader *pfile, int first)
 377 {
 378   cpp_buffer *buffer = pfile->buffer;
 379
 380   if (*buffer->cur == '$')
 381     {
 382       if (!CPP_OPTION (pfile, dollars_in_ident))
 383         return false;
 384
 385       buffer->cur++;
 386       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
 387         {
 388           CPP_OPTION (pfile, warn_dollars) = 0;
 389           cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
 390         }
 391
 392       return true;
 393     }
 394
 395   /* Is this a syntactically valid UCN?  */
 396   if (0 && *buffer->cur == '\\'
 397       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
 398     {
 399       buffer->cur += 2;
 400       if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
 401         return true;
 402       buffer->cur -= 2;
 403     }
 404
 405   return false;
 406 }
 407
 408 /* Lex an identifier starting at BUFFER->CUR - 1.  */
 409 static cpp_hashnode *
 410 lex_identifier (cpp_reader *pfile, const uchar *base)
 411 {
 412   cpp_hashnode *result;
 413   const uchar *cur;
 414
 415   do
 416     {
 417       cur = pfile->buffer->cur;
 418
 419       /* N.B. ISIDNUM does not include $.  */
 420       while (ISIDNUM (*cur))
 421         cur++;
 422
 423       pfile->buffer->cur = cur;
 424     }
 425   while (forms_identifier_p (pfile, false));
 426
 427   result = (cpp_hashnode *)
 428     ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 429
 430   /* Rarely, identifiers require diagnostics when lexed.  */
 431   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 432                         && !pfile->state.skipping, 0))
 433     {
 434       /* It is allowed to poison the same identifier twice.  */
 435       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 436         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 437                    NODE_NAME (result));
 438
 439       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 440          replacement list of a variadic macro.  */
 441       if (result == pfile->spec_nodes.n__VA_ARGS__
 442           && !pfile->state.va_args_ok)
 443         cpp_error (pfile, DL_PEDWARN,
 444                    "__VA_ARGS__ can only appear in the expansion"
 445                    " of a C99 variadic macro");
 446     }
 447
 448   return result;
 449 }
 450
 451 /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
 452 static void
 453 lex_number (cpp_reader *pfile, cpp_string *number)
 454 {
 455   const uchar *cur;
 456   const uchar *base;
 457   uchar *dest;
 458
 459   base = pfile->buffer->cur - 1;
 460   do
 461     {
 462       cur = pfile->buffer->cur;
 463
 464       /* N.B. ISIDNUM does not include $.  */
 465       while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 466         cur++;
 467
 468       pfile->buffer->cur = cur;
 469     }
 470   while (forms_identifier_p (pfile, false));
 471
 472   number->len = cur - base;
 473   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 474   memcpy (dest, base, number->len);
 475   dest[number->len] = '\0';
 476   number->text = dest;
 477 }
 478
 479 /* Create a token of type TYPE with a literal spelling.  */
 480 static void
 481 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 482                 unsigned int len, enum cpp_ttype type)
 483 {
 484   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
 485
 486   memcpy (dest, base, len);
 487   dest[len] = '\0';
 488   token->type = type;
 489   token->val.str.len = len;
 490   token->val.str.text = dest;
 491 }
 492
 493 /* Lexes a string, character constant, or angle-bracketed header file
 494    name.  The stored string contains the spelling, including opening
 495    quote and leading any leading 'L'.  It returns the type of the
 496    literal, or CPP_OTHER if it was not properly terminated.
 497
 498    The spelling is NUL-terminated, but it is not guaranteed that this
 499    is the first NUL since embedded NULs are preserved.  */
 500 static void
 501 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 502 {
 503   bool saw_NUL = false;
 504   const uchar *cur;
 505   cppchar_t terminator;
 506   enum cpp_ttype type;
 507
 508   cur = base;
 509   terminator = *cur++;
 510   if (terminator == 'L')
 511     terminator = *cur++;
 512   if (terminator == '\"')
 513     type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
 514   else if (terminator == '\'')
 515     type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
 516   else
 517     terminator = '>', type = CPP_HEADER_NAME;
 518
 519   for (;;)
 520     {
 521       cppchar_t c = *cur++;
 522
 523       /* In #include-style directives, terminators are not escapable.  */
 524       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
 525         cur++;
 526       else if (c == terminator)
 527         break;
 528       else if (c == '\n')
 529         {
 530           cur--;
 531           type = CPP_OTHER;
 532           break;
 533         }
 534       else if (c == '\0')
 535         saw_NUL = true;
 536     }
 537
 538   if (saw_NUL && !pfile->state.skipping)
 539     cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 540
 541   pfile->buffer->cur = cur;
 542   create_literal (pfile, token, base, cur - base, type);
 543 }
 544
 545 /* The stored comment includes the comment start and any terminator.  */
 546 static void
 547 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
 548               cppchar_t type)
 549 {
 550   unsigned char *buffer;
 551   unsigned int len, clen;
 552
 553   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 554
 555   /* C++ comments probably (not definitely) have moved past a new
 556      line, which we don't want to save in the comment.  */
 557   if (is_vspace (pfile->buffer->cur[-1]))
 558     len--;
 559
 560   /* If we are currently in a directive, then we need to store all
 561      C++ comments as C comments internally, and so we need to
 562      allocate a little extra space in that case.
 563
 564      Note that the only time we encounter a directive here is
 565      when we are saving comments in a "#define".  */
 566   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 567
 568   buffer = _cpp_unaligned_alloc (pfile, clen);
 569
 570   token->type = CPP_COMMENT;
 571   token->val.str.len = clen;
 572   token->val.str.text = buffer;
 573
 574   buffer[0] = '/';
 575   memcpy (buffer + 1, from, len - 1);
 576
 577   /* Finish conversion to a C comment, if necessary.  */
 578   if (pfile->state.in_directive && type == '/')
 579     {
 580       buffer[1] = '*';
 581       buffer[clen - 2] = '*';
 582       buffer[clen - 1] = '/';
 583     }
 584 }
 585
 586 /* Allocate COUNT tokens for RUN.  */
 587 void
 588 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
 589 {
 590   run->base = xnewvec (cpp_token, count);
 591   run->limit = run->base + count;
 592   run->next = NULL;
 593 }
 594
 595 /* Returns the next tokenrun, or creates one if there is none.  */
 596 static tokenrun *
 597 next_tokenrun (tokenrun *run)
 598 {
 599   if (run->next == NULL)
 600     {
 601       run->next = xnew (tokenrun);
 602       run->next->prev = run;
 603       _cpp_init_tokenrun (run->next, 250);
 604     }
 605
 606   return run->next;
 607 }
 608
 609 /* Allocate a single token that is invalidated at the same time as the
 610    rest of the tokens on the line.  Has its line and col set to the
 611    same as the last lexed token, so that diagnostics appear in the
 612    right place.  */
 613 cpp_token *
 614 _cpp_temp_token (cpp_reader *pfile)
 615 {
 616   cpp_token *old, *result;
 617
 618   old = pfile->cur_token - 1;
 619   if (pfile->cur_token == pfile->cur_run->limit)
 620     {
 621       pfile->cur_run = next_tokenrun (pfile->cur_run);
 622       pfile->cur_token = pfile->cur_run->base;
 623     }
 624
 625   result = pfile->cur_token++;
 626   result->line = old->line;
 627   result->col = old->col;
 628   return result;
 629 }
 630
 631 /* Lex a token into RESULT (external interface).  Takes care of issues
 632    like directive handling, token lookahead, multiple include
 633    optimization and skipping.  */
 634 const cpp_token *
 635 _cpp_lex_token (cpp_reader *pfile)
 636 {
 637   cpp_token *result;
 638
 639   for (;;)
 640     {
 641       if (pfile->cur_token == pfile->cur_run->limit)
 642         {
 643           pfile->cur_run = next_tokenrun (pfile->cur_run);
 644           pfile->cur_token = pfile->cur_run->base;
 645         }
 646
 647       if (pfile->lookaheads)
 648         {
 649           pfile->lookaheads--;
 650           result = pfile->cur_token++;
 651         }
 652       else
 653         result = _cpp_lex_direct (pfile);
 654
 655       if (result->flags & BOL)
 656         {
 657           /* Is this a directive.  If _cpp_handle_directive returns
 658              false, it is an assembler #.  */
 659           if (result->type == CPP_HASH
 660               /* 6.10.3 p 11: Directives in a list of macro arguments
 661                  gives undefined behavior.  This implementation
 662                  handles the directive as normal.  */
 663               && pfile->state.parsing_args != 1
 664               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 665             continue;
 666           if (pfile->cb.line_change && !pfile->state.skipping)
 667             pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
 668         }
 669
 670       /* We don't skip tokens in directives.  */
 671       if (pfile->state.in_directive)
 672         break;
 673
 674       /* Outside a directive, invalidate controlling macros.  At file
 675          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 676          get here and MI optimization works.  */
 677       pfile->mi_valid = false;
 678
 679       if (!pfile->state.skipping || result->type == CPP_EOF)
 680         break;
 681     }
 682
 683   return result;
 684 }
 685
 686 /* Returns true if a fresh line has been loaded.  */
 687 bool
 688 _cpp_get_fresh_line (cpp_reader *pfile)
 689 {
 690   /* We can't get a new line until we leave the current directive.  */
 691   if (pfile->state.in_directive)
 692     return false;
 693
 694   for (;;)
 695     {
 696       cpp_buffer *buffer = pfile->buffer;
 697
 698       if (!buffer->need_line)
 699         return true;
 700
 701       if (buffer->next_line < buffer->rlimit)
 702         {
 703           _cpp_clean_line (pfile);
 704           return true;
 705         }
 706
 707       /* First, get out of parsing arguments state.  */
 708       if (pfile->state.parsing_args)
 709         return false;
 710
 711       /* End of buffer.  Non-empty files should end in a newline.  */
 712       if (buffer->buf != buffer->rlimit
 713           && buffer->next_line > buffer->rlimit
 714           && !buffer->from_stage3)
 715         {
 716           /* Only warn once.  */
 717           buffer->next_line = buffer->rlimit;
 718           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 719                                CPP_BUF_COLUMN (buffer, buffer->cur),
 720                                "no newline at end of file");
 721         }
 722
 723       if (!buffer->prev)
 724         return false;
 725
 726       if (buffer->return_at_eof)
 727         {
 728           _cpp_pop_buffer (pfile);
 729           return false;
 730         }
 731
 732       _cpp_pop_buffer (pfile);
 733     }
 734 }
 735
 736 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)          \
 737   do                                                    \
 738     {                                                   \
 739       result->type = ELSE_TYPE;                         \
 740       if (*buffer->cur == CHAR)                         \
 741         buffer->cur++, result->type = THEN_TYPE;        \
 742     }                                                   \
 743   while (0)
 744
 745 /* Lex a token into pfile->cur_token, which is also incremented, to
 746    get diagnostics pointing to the correct location.
 747
 748    Does not handle issues such as token lookahead, multiple-include
 749    optimisation, directives, skipping etc.  This function is only
 750    suitable for use by _cpp_lex_token, and in special cases like
 751    lex_expansion_token which doesn't care for any of these issues.
 752
 753    When meeting a newline, returns CPP_EOF if parsing a directive,
 754    otherwise returns to the start of the token buffer if permissible.
 755    Returns the location of the lexed token.  */
 756 cpp_token *
 757 _cpp_lex_direct (cpp_reader *pfile)
 758 {
 759   cppchar_t c;
 760   cpp_buffer *buffer;
 761   const unsigned char *comment_start;
 762   cpp_token *result = pfile->cur_token++;
 763
 764  fresh_line:
 765   result->flags = 0;
 766   if (pfile->buffer->need_line)
 767     {
 768       if (!_cpp_get_fresh_line (pfile))
 769         {
 770           result->type = CPP_EOF;
 771           if (!pfile->state.in_directive)
 772             {
 773               /* Tell the compiler the line number of the EOF token.  */
 774               result->line = pfile->line;
 775               result->flags = BOL;
 776             }
 777           return result;
 778         }
 779       if (!pfile->keep_tokens)
 780         {
 781           pfile->cur_run = &pfile->base_run;
 782           result = pfile->base_run.base;
 783           pfile->cur_token = result + 1;
 784         }
 785       result->flags = BOL;
 786       if (pfile->state.parsing_args == 2)
 787         result->flags |= PREV_WHITE;
 788     }
 789   buffer = pfile->buffer;
 790  update_tokens_line:
 791   result->line = pfile->line;
 792
 793  skipped_white:
 794   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 795       && !pfile->overlaid_buffer)
 796     {
 797       _cpp_process_line_notes (pfile, false);
 798       result->line = pfile->line;
 799     }
 800   c = *buffer->cur++;
 801   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 802
 803   switch (c)
 804     {
 805     case ' ': case '\t': case '\f': case '\v': case '\0':
 806       result->flags |= PREV_WHITE;
 807       skip_whitespace (pfile, c);
 808       goto skipped_white;
 809
 810     case '\n':
 811       pfile->line++;
 812       buffer->need_line = true;
 813       goto fresh_line;
 814
 815     case '0': case '1': case '2': case '3': case '4':
 816     case '5': case '6': case '7': case '8': case '9':
 817       result->type = CPP_NUMBER;
 818       lex_number (pfile, &result->val.str);
 819       break;
 820
 821     case 'L':
 822       /* 'L' may introduce wide characters or strings.  */
 823       if (*buffer->cur == '\'' || *buffer->cur == '"')
 824         {
 825           lex_string (pfile, result, buffer->cur - 1);
 826           break;
 827         }
 828       /* Fall through.  */
 829
 830     case '_':
 831     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 832     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 833     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 834     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 835     case 'y': case 'z':
 836     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 837     case 'G': case 'H': case 'I': case 'J': case 'K':
 838     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 839     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 840     case 'Y': case 'Z':
 841       result->type = CPP_NAME;
 842       result->val.node = lex_identifier (pfile, buffer->cur - 1);
 843
 844       /* Convert named operators to their proper types.  */
 845       if (result->val.node->flags & NODE_OPERATOR)
 846         {
 847           result->flags |= NAMED_OP;
 848           result->type = result->val.node->directive_index;
 849         }
 850       break;
 851
 852     case '\'':
 853     case '"':
 854       lex_string (pfile, result, buffer->cur - 1);
 855       break;
 856
 857     case '/':
 858       /* A potential block or line comment.  */
 859       comment_start = buffer->cur;
 860       c = *buffer->cur;
 861
 862       if (c == '*')
 863         {
 864           if (_cpp_skip_block_comment (pfile))
 865             cpp_error (pfile, DL_ERROR, "unterminated comment");
 866         }
 867       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 868                             || CPP_IN_SYSTEM_HEADER (pfile)))
 869         {
 870           /* Warn about comments only if pedantically GNUC89, and not
 871              in system headers.  */
 872           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 873               && ! buffer->warned_cplusplus_comments)
 874             {
 875               cpp_error (pfile, DL_PEDWARN,
 876                          "C++ style comments are not allowed in ISO C90");
 877               cpp_error (pfile, DL_PEDWARN,
 878                          "(this will be reported only once per input file)");
 879               buffer->warned_cplusplus_comments = 1;
 880             }
 881
 882           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 883             cpp_error (pfile, DL_WARNING, "multi-line comment");
 884         }
 885       else if (c == '=')
 886         {
 887           buffer->cur++;
 888           result->type = CPP_DIV_EQ;
 889           break;
 890         }
 891       else
 892         {
 893           result->type = CPP_DIV;
 894           break;
 895         }
 896
 897       if (!pfile->state.save_comments)
 898         {
 899           result->flags |= PREV_WHITE;
 900           goto update_tokens_line;
 901         }
 902
 903       /* Save the comment as a token in its own right.  */
 904       save_comment (pfile, result, comment_start, c);
 905       break;
 906
 907     case '<':
 908       if (pfile->state.angled_headers)
 909         {
 910           lex_string (pfile, result, buffer->cur - 1);
 911           break;
 912         }
 913
 914       result->type = CPP_LESS;
 915       if (*buffer->cur == '=')
 916         buffer->cur++, result->type = CPP_LESS_EQ;
 917       else if (*buffer->cur == '<')
 918         {
 919           buffer->cur++;
 920           IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
 921         }
 922       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 923         {
 924           buffer->cur++;
 925           IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
 926         }
 927       else if (CPP_OPTION (pfile, digraphs))
 928         {
 929           if (*buffer->cur == ':')
 930             {
 931               buffer->cur++;
 932               result->flags |= DIGRAPH;
 933               result->type = CPP_OPEN_SQUARE;
 934             }
 935           else if (*buffer->cur == '%')
 936             {
 937               buffer->cur++;
 938               result->flags |= DIGRAPH;
 939               result->type = CPP_OPEN_BRACE;
 940             }
 941         }
 942       break;
 943
 944     case '>':
 945       result->type = CPP_GREATER;
 946       if (*buffer->cur == '=')
 947         buffer->cur++, result->type = CPP_GREATER_EQ;
 948       else if (*buffer->cur == '>')
 949         {
 950           buffer->cur++;
 951           IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
 952         }
 953       else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
 954         {
 955           buffer->cur++;
 956           IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
 957         }
 958       break;
 959
 960     case '%':
 961       result->type = CPP_MOD;
 962       if (*buffer->cur == '=')
 963         buffer->cur++, result->type = CPP_MOD_EQ;
 964       else if (CPP_OPTION (pfile, digraphs))
 965         {
 966           if (*buffer->cur == ':')
 967             {
 968               buffer->cur++;
 969               result->flags |= DIGRAPH;
 970               result->type = CPP_HASH;
 971               if (*buffer->cur == '%' && buffer->cur[1] == ':')
 972                 buffer->cur += 2, result->type = CPP_PASTE;
 973             }
 974           else if (*buffer->cur == '>')
 975             {
 976               buffer->cur++;
 977               result->flags |= DIGRAPH;
 978               result->type = CPP_CLOSE_BRACE;
 979             }
 980         }
 981       break;
 982
 983     case '.':
 984       result->type = CPP_DOT;
 985       if (ISDIGIT (*buffer->cur))
 986         {
 987           result->type = CPP_NUMBER;
 988           lex_number (pfile, &result->val.str);
 989         }
 990       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
 991         buffer->cur += 2, result->type = CPP_ELLIPSIS;
 992       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
 993         buffer->cur++, result->type = CPP_DOT_STAR;
 994       break;
 995
 996     case '+':
 997       result->type = CPP_PLUS;
 998       if (*buffer->cur == '+')
 999         buffer->cur++, result->type = CPP_PLUS_PLUS;
1000       else if (*buffer->cur == '=')
1001         buffer->cur++, result->type = CPP_PLUS_EQ;
1002       break;
1003
1004     case '-':
1005       result->type = CPP_MINUS;
1006       if (*buffer->cur == '>')
1007         {
1008           buffer->cur++;
1009           result->type = CPP_DEREF;
1010           if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1011             buffer->cur++, result->type = CPP_DEREF_STAR;
1012         }
1013       else if (*buffer->cur == '-')
1014         buffer->cur++, result->type = CPP_MINUS_MINUS;
1015       else if (*buffer->cur == '=')
1016         buffer->cur++, result->type = CPP_MINUS_EQ;
1017       break;
1018
1019     case '&':
1020       result->type = CPP_AND;
1021       if (*buffer->cur == '&')
1022         buffer->cur++, result->type = CPP_AND_AND;
1023       else if (*buffer->cur == '=')
1024         buffer->cur++, result->type = CPP_AND_EQ;
1025       break;
1026
1027     case '|':
1028       result->type = CPP_OR;
1029       if (*buffer->cur == '|')
1030         buffer->cur++, result->type = CPP_OR_OR;
1031       else if (*buffer->cur == '=')
1032         buffer->cur++, result->type = CPP_OR_EQ;
1033       break;
1034
1035     case ':':
1036       result->type = CPP_COLON;
1037       if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1038         buffer->cur++, result->type = CPP_SCOPE;
1039       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1040         {
1041           buffer->cur++;
1042           result->flags |= DIGRAPH;
1043           result->type = CPP_CLOSE_SQUARE;
1044         }
1045       break;
1046
1047     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1048     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1049     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1050     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1051     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1052
1053     case '?': result->type = CPP_QUERY; break;
1054     case '~': result->type = CPP_COMPL; break;
1055     case ',': result->type = CPP_COMMA; break;
1056     case '(': result->type = CPP_OPEN_PAREN; break;
1057     case ')': result->type = CPP_CLOSE_PAREN; break;
1058     case '[': result->type = CPP_OPEN_SQUARE; break;
1059     case ']': result->type = CPP_CLOSE_SQUARE; break;
1060     case '{': result->type = CPP_OPEN_BRACE; break;
1061     case '}': result->type = CPP_CLOSE_BRACE; break;
1062     case ';': result->type = CPP_SEMICOLON; break;
1063
1064       /* @ is a punctuator in Objective-C.  */
1065     case '@': result->type = CPP_ATSIGN; break;
1066
1067     case '$':
1068     case '\\':
1069       {
1070         const uchar *base = --buffer->cur;
1071
1072         if (forms_identifier_p (pfile, true))
1073           {
1074             result->type = CPP_NAME;
1075             result->val.node = lex_identifier (pfile, base);
1076             break;
1077           }
1078         buffer->cur++;
1079       }
1080
1081     default:
1082       create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1083       break;
1084     }
1085
1086   return result;
1087 }
1088
1089 /* An upper bound on the number of bytes needed to spell TOKEN.
1090    Does not include preceding whitespace.  */
1091 unsigned int
1092 cpp_token_len (const cpp_token *token)
1093 {
1094   unsigned int len;
1095
1096   switch (TOKEN_SPELL (token))
1097     {
1098     default:            len = 4;                                break;
1099     case SPELL_LITERAL: len = token->val.str.len;               break;
1100     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1101     }
1102
1103   return len;
1104 }
1105
1106 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1107    already contain the enough space to hold the token's spelling.
1108    Returns a pointer to the character after the last character written.
1109    FIXME: Would be nice if we didn't need the PFILE argument.  */
1110 unsigned char *
1111 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1112                  unsigned char *buffer)
1113 {
1114   switch (TOKEN_SPELL (token))
1115     {
1116     case SPELL_OPERATOR:
1117       {
1118         const unsigned char *spelling;
1119         unsigned char c;
1120
1121         if (token->flags & DIGRAPH)
1122           spelling
1123             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1124         else if (token->flags & NAMED_OP)
1125           goto spell_ident;
1126         else
1127           spelling = TOKEN_NAME (token);
1128
1129         while ((c = *spelling++) != '\0')
1130           *buffer++ = c;
1131       }
1132       break;
1133
1134     spell_ident:
1135     case SPELL_IDENT:
1136       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1137       buffer += NODE_LEN (token->val.node);
1138       break;
1139
1140     case SPELL_LITERAL:
1141       memcpy (buffer, token->val.str.text, token->val.str.len);
1142       buffer += token->val.str.len;
1143       break;
1144
1145     case SPELL_NONE:
1146       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1147       break;
1148     }
1149
1150   return buffer;
1151 }
1152
1153 /* Returns TOKEN spelt as a null-terminated string.  The string is
1154    freed when the reader is destroyed.  Useful for diagnostics.  */
1155 unsigned char *
1156 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1157 {
1158   unsigned int len = cpp_token_len (token) + 1;
1159   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1160
1161   end = cpp_spell_token (pfile, token, start);
1162   end[0] = '\0';
1163
1164   return start;
1165 }
1166
1167 /* Used by C front ends, which really should move to using
1168    cpp_token_as_text.  */
1169 const char *
1170 cpp_type2name (enum cpp_ttype type)
1171 {
1172   return (const char *) token_spellings[type].name;
1173 }
1174
1175 /* Writes the spelling of token to FP, without any preceding space.
1176    Separated from cpp_spell_token for efficiency - to avoid stdio
1177    double-buffering.  */
1178 void
1179 cpp_output_token (const cpp_token *token, FILE *fp)
1180 {
1181   switch (TOKEN_SPELL (token))
1182     {
1183     case SPELL_OPERATOR:
1184       {
1185         const unsigned char *spelling;
1186         int c;
1187
1188         if (token->flags & DIGRAPH)
1189           spelling
1190             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1191         else if (token->flags & NAMED_OP)
1192           goto spell_ident;
1193         else
1194           spelling = TOKEN_NAME (token);
1195
1196         c = *spelling;
1197         do
1198           putc (c, fp);
1199         while ((c = *++spelling) != '\0');
1200       }
1201       break;
1202
1203     spell_ident:
1204     case SPELL_IDENT:
1205       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1206     break;
1207
1208     case SPELL_LITERAL:
1209       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1210       break;
1211
1212     case SPELL_NONE:
1213       /* An error, most probably.  */
1214       break;
1215     }
1216 }
1217
1218 /* Compare two tokens.  */
1219 int
1220 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1221 {
1222   if (a->type == b->type && a->flags == b->flags)
1223     switch (TOKEN_SPELL (a))
1224       {
1225       default:                  /* Keep compiler happy.  */
1226       case SPELL_OPERATOR:
1227         return 1;
1228       case SPELL_NONE:
1229         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1230       case SPELL_IDENT:
1231         return a->val.node == b->val.node;
1232       case SPELL_LITERAL:
1233         return (a->val.str.len == b->val.str.len
1234                 && !memcmp (a->val.str.text, b->val.str.text,
1235                             a->val.str.len));
1236       }
1237
1238   return 0;
1239 }
1240
1241 /* Returns nonzero if a space should be inserted to avoid an
1242    accidental token paste for output.  For simplicity, it is
1243    conservative, and occasionally advises a space where one is not
1244    needed, e.g. "." and ".2".  */
1245 int
1246 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1247                  const cpp_token *token2)
1248 {
1249   enum cpp_ttype a = token1->type, b = token2->type;
1250   cppchar_t c;
1251
1252   if (token1->flags & NAMED_OP)
1253     a = CPP_NAME;
1254   if (token2->flags & NAMED_OP)
1255     b = CPP_NAME;
1256
1257   c = EOF;
1258   if (token2->flags & DIGRAPH)
1259     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1260   else if (token_spellings[b].category == SPELL_OPERATOR)
1261     c = token_spellings[b].name[0];
1262
1263   /* Quickly get everything that can paste with an '='.  */
1264   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1265     return 1;
1266
1267   switch (a)
1268     {
1269     case CPP_GREATER:   return c == '>' || c == '?';
1270     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1271     case CPP_PLUS:      return c == '+';
1272     case CPP_MINUS:     return c == '-' || c == '>';
1273     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1274     case CPP_MOD:       return c == ':' || c == '>';
1275     case CPP_AND:       return c == '&';
1276     case CPP_OR:        return c == '|';
1277     case CPP_COLON:     return c == ':' || c == '>';
1278     case CPP_DEREF:     return c == '*';
1279     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1280     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1281     case CPP_NAME:      return ((b == CPP_NUMBER
1282                                  && name_p (pfile, &token2->val.str))
1283                                 || b == CPP_NAME
1284                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1285     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1286                                 || c == '.' || c == '+' || c == '-');
1287                                       /* UCNs */
1288     case CPP_OTHER:     return ((token1->val.str.text[0] == '\\'
1289                                  && b == CPP_NAME)
1290                                 || (CPP_OPTION (pfile, objc)
1291                                     && token1->val.str.text[0] == '@'
1292                                     && (b == CPP_NAME || b == CPP_STRING)));
1293     default:            break;
1294     }
1295
1296   return 0;
1297 }
1298
1299 /* Output all the remaining tokens on the current line, and a newline
1300    character, to FP.  Leading whitespace is removed.  If there are
1301    macros, special token padding is not performed.  */
1302 void
1303 cpp_output_line (cpp_reader *pfile, FILE *fp)
1304 {
1305   const cpp_token *token;
1306
1307   token = cpp_get_token (pfile);
1308   while (token->type != CPP_EOF)
1309     {
1310       cpp_output_token (token, fp);
1311       token = cpp_get_token (pfile);
1312       if (token->flags & PREV_WHITE)
1313         putc (' ', fp);
1314     }
1315
1316   putc ('\n', fp);
1317 }
1318
1319 /* Returns the value of a hexadecimal digit.  */
1320 static unsigned int
1321 hex_digit_value (unsigned int c)
1322 {
1323   if (hex_p (c))
1324     return hex_value (c);
1325   else
1326     abort ();
1327 }
1328
1329 /* Read a possible universal character name starting at *PSTR.  */
1330 static cppchar_t
1331 maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
1332 {
1333   cppchar_t result, c = (*pstr)[-1];
1334
1335   result = _cpp_valid_ucn (pfile, pstr, false);
1336   if (result)
1337     {
1338       if (CPP_WTRADITIONAL (pfile))
1339         cpp_error (pfile, DL_WARNING,
1340                    "the meaning of '\\%c' is different in traditional C",
1341                    (int) c);
1342
1343       if (CPP_OPTION (pfile, EBCDIC))
1344         {
1345           cpp_error (pfile, DL_ERROR,
1346                      "universal character with an EBCDIC target");
1347           result = 0x3f;  /* EBCDIC invalid character */
1348         }
1349     }
1350
1351   return result;
1352 }
1353
1354 /* Returns the value of an escape sequence, truncated to the correct
1355    target precision.  PSTR points to the input pointer, which is just
1356    after the backslash.  LIMIT is how much text we have.  WIDE is true
1357    if the escape sequence is part of a wide character constant or
1358    string literal.  Handles all relevant diagnostics.  */
1359 cppchar_t
1360 cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
1361                   const unsigned char *limit, int wide)
1362 {
1363   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1364   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1365   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1366
1367   int unknown = 0;
1368   const unsigned char *str = *pstr, *charconsts;
1369   cppchar_t c, ucn, mask;
1370   unsigned int width;
1371
1372   if (CPP_OPTION (pfile, EBCDIC))
1373     charconsts = ebcdic;
1374   else
1375     charconsts = ascii;
1376
1377   if (wide)
1378     width = CPP_OPTION (pfile, wchar_precision);
1379   else
1380     width = CPP_OPTION (pfile, char_precision);
1381   if (width < BITS_PER_CPPCHAR_T)
1382     mask = ((cppchar_t) 1 << width) - 1;
1383   else
1384     mask = ~0;
1385
1386   c = *str++;
1387   switch (c)
1388     {
1389     case '\\': case '\'': case '"': case '?': break;
1390     case 'b': c = charconsts[1];  break;
1391     case 'f': c = charconsts[3];  break;
1392     case 'n': c = charconsts[4];  break;
1393     case 'r': c = charconsts[5];  break;
1394     case 't': c = charconsts[6];  break;
1395     case 'v': c = charconsts[7];  break;
1396
1397     case '(': case '{': case '[': case '%':
1398       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1399          '\%' is used to prevent SCCS from getting confused.  */
1400       unknown = CPP_PEDANTIC (pfile);
1401       break;
1402
1403     case 'a':
1404       if (CPP_WTRADITIONAL (pfile))
1405         cpp_error (pfile, DL_WARNING,
1406                    "the meaning of '\\a' is different in traditional C");
1407       c = charconsts[0];
1408       break;
1409
1410     case 'e': case 'E':
1411       if (CPP_PEDANTIC (pfile))
1412         cpp_error (pfile, DL_PEDWARN,
1413                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1414       c = charconsts[2];
1415       break;
1416
1417     case 'u': case 'U':
1418       ucn = maybe_read_ucn (pfile, &str);
1419       if (ucn)
1420         c = ucn;
1421       else
1422         unknown = true;
1423       break;
1424
1425     case 'x':
1426       if (CPP_WTRADITIONAL (pfile))
1427         cpp_error (pfile, DL_WARNING,
1428                    "the meaning of '\\x' is different in traditional C");
1429
1430       {
1431         cppchar_t i = 0, overflow = 0;
1432         int digits_found = 0;
1433
1434         while (str < limit)
1435           {
1436             c = *str;
1437             if (! ISXDIGIT (c))
1438               break;
1439             str++;
1440             overflow |= i ^ (i << 4 >> 4);
1441             i = (i << 4) + hex_digit_value (c);
1442             digits_found = 1;
1443           }
1444
1445         if (!digits_found)
1446           cpp_error (pfile, DL_ERROR,
1447                        "\\x used with no following hex digits");
1448
1449         if (overflow | (i != (i & mask)))
1450           {
1451             cpp_error (pfile, DL_PEDWARN,
1452                        "hex escape sequence out of range");
1453             i &= mask;
1454           }
1455         c = i;
1456       }
1457       break;
1458
1459     case '0':  case '1':  case '2':  case '3':
1460     case '4':  case '5':  case '6':  case '7':
1461       {
1462         size_t count = 0;
1463         cppchar_t i = c - '0';
1464
1465         while (str < limit && ++count < 3)
1466           {
1467             c = *str;
1468             if (c < '0' || c > '7')
1469               break;
1470             str++;
1471             i = (i << 3) + c - '0';
1472           }
1473
1474         if (i != (i & mask))
1475           {
1476             cpp_error (pfile, DL_PEDWARN,
1477                        "octal escape sequence out of range");
1478             i &= mask;
1479           }
1480         c = i;
1481       }
1482       break;
1483
1484     default:
1485       unknown = 1;
1486       break;
1487     }
1488
1489   if (unknown)
1490     {
1491       if (ISGRAPH (c))
1492         cpp_error (pfile, DL_PEDWARN,
1493                    "unknown escape sequence '\\%c'", (int) c);
1494       else
1495         cpp_error (pfile, DL_PEDWARN,
1496                    "unknown escape sequence: '\\%03o'", (int) c);
1497     }
1498
1499   if (c > mask)
1500     {
1501       cpp_error (pfile, DL_PEDWARN,
1502                  "escape sequence out of range for its type");
1503       c &= mask;
1504     }
1505
1506   *pstr = str;
1507   return c;
1508 }
1509
1510 /* Interpret a (possibly wide) character constant in TOKEN.
1511    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1512    points to a variable that is filled in with the number of
1513    characters seen, and UNSIGNEDP to a variable that indicates whether
1514    the result has signed type.  */
1515 cppchar_t
1516 cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
1517                          unsigned int *pchars_seen, int *unsignedp)
1518 {
1519   const unsigned char *str, *limit;
1520   unsigned int chars_seen = 0;
1521   size_t width, max_chars;
1522   cppchar_t c, mask, result = 0;
1523   bool unsigned_p;
1524
1525   str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1526   limit = token->val.str.text + token->val.str.len - 1;
1527
1528   if (token->type == CPP_CHAR)
1529     {
1530       width = CPP_OPTION (pfile, char_precision);
1531       max_chars = CPP_OPTION (pfile, int_precision) / width;
1532       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1533     }
1534   else
1535     {
1536       width = CPP_OPTION (pfile, wchar_precision);
1537       max_chars = 1;
1538       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1539     }
1540
1541   if (width < BITS_PER_CPPCHAR_T)
1542     mask = ((cppchar_t) 1 << width) - 1;
1543   else
1544     mask = ~0;
1545
1546   while (str < limit)
1547     {
1548       c = *str++;
1549
1550       if (c == '\\')
1551         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1552
1553 #ifdef MAP_CHARACTER
1554       if (ISPRINT (c))
1555         c = MAP_CHARACTER (c);
1556 #endif
1557
1558       chars_seen++;
1559
1560       /* Truncate the character, scale the result and merge the two.  */
1561       c &= mask;
1562       if (width < BITS_PER_CPPCHAR_T)
1563         result = (result << width) | c;
1564       else
1565         result = c;
1566     }
1567
1568   if (chars_seen == 0)
1569     cpp_error (pfile, DL_ERROR, "empty character constant");
1570   else if (chars_seen > 1)
1571     {
1572       /* Multichar charconsts are of type int and therefore signed.  */
1573       unsigned_p = 0;
1574
1575       if (chars_seen > max_chars)
1576         {
1577           chars_seen = max_chars;
1578           cpp_error (pfile, DL_WARNING,
1579                      "character constant too long for its type");
1580         }
1581       else if (CPP_OPTION (pfile, warn_multichar))
1582         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1583     }
1584
1585   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1586      in WIDTH bits, but for multi-char charconsts it's value is the
1587      full target type's width.  */
1588   if (chars_seen > 1)
1589     width *= max_chars;
1590   if (width < BITS_PER_CPPCHAR_T)
1591     {
1592       mask = ((cppchar_t) 1 << width) - 1;
1593       if (unsigned_p || !(result & (1 << (width - 1))))
1594         result &= mask;
1595       else
1596         result |= ~mask;
1597     }
1598
1599   *pchars_seen = chars_seen;
1600   *unsignedp = unsigned_p;
1601   return result;
1602 }
1603
1604 /* Memory buffers.  Changing these three constants can have a dramatic
1605    effect on performance.  The values here are reasonable defaults,
1606    but might be tuned.  If you adjust them, be sure to test across a
1607    range of uses of cpplib, including heavy nested function-like macro
1608    expansion.  Also check the change in peak memory usage (NJAMD is a
1609    good tool for this).  */
1610 #define MIN_BUFF_SIZE 8000
1611 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1612 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1613         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1614
1615 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1616   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1617 #endif
1618
1619 /* Create a new allocation buffer.  Place the control block at the end
1620    of the buffer, so that buffer overflows will cause immediate chaos.  */
1621 static _cpp_buff *
1622 new_buff (size_t len)
1623 {
1624   _cpp_buff *result;
1625   unsigned char *base;
1626
1627   if (len < MIN_BUFF_SIZE)
1628     len = MIN_BUFF_SIZE;
1629   len = CPP_ALIGN (len);
1630
1631   base = xmalloc (len + sizeof (_cpp_buff));
1632   result = (_cpp_buff *) (base + len);
1633   result->base = base;
1634   result->cur = base;
1635   result->limit = base + len;
1636   result->next = NULL;
1637   return result;
1638 }
1639
1640 /* Place a chain of unwanted allocation buffers on the free list.  */
1641 void
1642 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1643 {
1644   _cpp_buff *end = buff;
1645
1646   while (end->next)
1647     end = end->next;
1648   end->next = pfile->free_buffs;
1649   pfile->free_buffs = buff;
1650 }
1651
1652 /* Return a free buffer of size at least MIN_SIZE.  */
1653 _cpp_buff *
1654 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1655 {
1656   _cpp_buff *result, **p;
1657
1658   for (p = &pfile->free_buffs;; p = &(*p)->next)
1659     {
1660       size_t size;
1661
1662       if (*p == NULL)
1663         return new_buff (min_size);
1664       result = *p;
1665       size = result->limit - result->base;
1666       /* Return a buffer that's big enough, but don't waste one that's
1667          way too big.  */
1668       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1669         break;
1670     }
1671
1672   *p = result->next;
1673   result->next = NULL;
1674   result->cur = result->base;
1675   return result;
1676 }
1677
1678 /* Creates a new buffer with enough space to hold the uncommitted
1679    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1680    the excess bytes to the new buffer.  Chains the new buffer after
1681    BUFF, and returns the new buffer.  */
1682 _cpp_buff *
1683 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1684 {
1685   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1686   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1687
1688   buff->next = new_buff;
1689   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1690   return new_buff;
1691 }
1692
1693 /* Creates a new buffer with enough space to hold the uncommitted
1694    remaining bytes of the buffer pointed to by BUFF, and at least
1695    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1696    Chains the new buffer before the buffer pointed to by BUFF, and
1697    updates the pointer to point to the new buffer.  */
1698 void
1699 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1700 {
1701   _cpp_buff *new_buff, *old_buff = *pbuff;
1702   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1703
1704   new_buff = _cpp_get_buff (pfile, size);
1705   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1706   new_buff->next = old_buff;
1707   *pbuff = new_buff;
1708 }
1709
1710 /* Free a chain of buffers starting at BUFF.  */
1711 void
1712 _cpp_free_buff (buff)
1713      _cpp_buff *buff;
1714 {
1715   _cpp_buff *next;
1716
1717   for (; buff; buff = next)
1718     {
1719       next = buff->next;
1720       free (buff->base);
1721     }
1722 }
1723
1724 /* Allocate permanent, unaligned storage of length LEN.  */
1725 unsigned char *
1726 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1727 {
1728   _cpp_buff *buff = pfile->u_buff;
1729   unsigned char *result = buff->cur;
1730
1731   if (len > (size_t) (buff->limit - result))
1732     {
1733       buff = _cpp_get_buff (pfile, len);
1734       buff->next = pfile->u_buff;
1735       pfile->u_buff = buff;
1736       result = buff->cur;
1737     }
1738
1739   buff->cur = result + len;
1740   return result;
1741 }
1742
1743 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1744    That buffer is used for growing allocations when saving macro
1745    replacement lists in a #define, and when parsing an answer to an
1746    assertion in #assert, #unassert or #if (and therefore possibly
1747    whilst expanding macros).  It therefore must not be used by any
1748    code that they might call: specifically the lexer and the guts of
1749    the macro expander.
1750
1751    All existing other uses clearly fit this restriction: storing
1752    registered pragmas during initialization.  */
1753 unsigned char *
1754 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1755 {
1756   _cpp_buff *buff = pfile->a_buff;
1757   unsigned char *result = buff->cur;
1758
1759   if (len > (size_t) (buff->limit - result))
1760     {
1761       buff = _cpp_get_buff (pfile, len);
1762       buff->next = pfile->a_buff;
1763       pfile->a_buff = buff;
1764       result = buff->cur;
1765     }
1766
1767   buff->cur = result + len;
1768   return result;
1769 }