gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 /* Tokens with SPELL_STRING store their spelling in the token list,
  30    and it's length in the token->val.name.len.  */
  31 enum spell_type
  32 {
  33   SPELL_OPERATOR = 0,
  34   SPELL_CHAR,
  35   SPELL_IDENT,
  36   SPELL_NUMBER,
  37   SPELL_STRING,
  38   SPELL_NONE
  39 };
  40
  41 struct token_spelling
  42 {
  43   enum spell_type category;
  44   const unsigned char *name;
  45 };
  46
  47 static const unsigned char *const digraph_spellings[] =
  48 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  49
  50 #define OP(e, s) { SPELL_OPERATOR, U s           },
  51 #define TK(e, s) { s,              U STRINGX (e) },
  52 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  53 #undef OP
  54 #undef TK
  55
  56 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  57 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  58 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  59
  60 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  61 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  62
  63 static int skip_line_comment PARAMS ((cpp_reader *));
  64 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  65 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  66 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  67                                   unsigned int *));
  68 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  69 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  70 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  71 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  72                                   cppchar_t));
  73 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  74 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  75                                    const unsigned char *, cppchar_t *));
  76 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  77
  78 static unsigned int hex_digit_value PARAMS ((unsigned int));
  79 static _cpp_buff *new_buff PARAMS ((size_t));
  80
  81
  82 /* Utility routine:
  83
  84    Compares, the token TOKEN to the NUL-terminated string STRING.
  85    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  86 int
  87 cpp_ideq (token, string)
  88      const cpp_token *token;
  89      const char *string;
  90 {
  91   if (token->type != CPP_NAME)
  92     return 0;
  93
  94   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  95 }
  96
  97 /* Record a note TYPE at byte POS into the current cleaned logical
  98    line.  */
  99 static void
 100 add_line_note (buffer, pos, type)
 101      cpp_buffer *buffer;
 102      const uchar *pos;
 103      unsigned int type;
 104 {
 105   if (buffer->notes_used == buffer->notes_cap)
 106     {
 107       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 108       buffer->notes = (_cpp_line_note *)
 109         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 110     }
 111
 112   buffer->notes[buffer->notes_used].pos = pos;
 113   buffer->notes[buffer->notes_used].type = type;
 114   buffer->notes_used++;
 115 }
 116
 117 /* Returns with a logical line that contains no escaped newlines or
 118    trigraphs.  This is a time-critical inner loop.  */
 119 void
 120 _cpp_clean_line (pfile)
 121      cpp_reader *pfile;
 122 {
 123   cpp_buffer *buffer;
 124   const uchar *s;
 125   uchar c, *d, *p;
 126
 127   buffer = pfile->buffer;
 128   buffer->cur_note = buffer->notes_used = 0;
 129   buffer->cur = buffer->line_base = buffer->next_line;
 130   buffer->need_line = false;
 131   s = buffer->next_line - 1;
 132
 133   if (!buffer->from_stage3)
 134     {
 135       d = (uchar *) s;
 136
 137       for (;;)
 138         {
 139           c = *++s;
 140           *++d = c;
 141
 142           if (c == '\n' || c == '\r')
 143             {
 144                   /* Handle DOS line endings.  */
 145               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 146                 s++;
 147               if (s == buffer->rlimit)
 148                 break;
 149
 150               /* Escaped?  */
 151               p = d;
 152               while (p != buffer->next_line && is_nvspace (p[-1]))
 153                 p--;
 154               if (p == buffer->next_line || p[-1] != '\\')
 155                 break;
 156
 157               add_line_note (buffer, p - 1,
 158                              p != d ? NOTE_ESC_SPACE_NL: NOTE_ESC_NL);
 159               d = p - 2;
 160               buffer->next_line = p - 1;
 161             }
 162           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 163             {
 164               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 165               add_line_note (buffer, d, NOTE_TRIGRAPH);
 166               if (CPP_OPTION (pfile, trigraphs))
 167                 {
 168                   *d = _cpp_trigraph_map[s[2]];
 169                   s += 2;
 170                 }
 171             }
 172         }
 173     }
 174   else
 175     {
 176       do
 177         s++;
 178       while (*s != '\n' && *s != '\r');
 179       d = (uchar *) s;
 180
 181       /* Handle DOS line endings.  */
 182       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 183         s++;
 184     }
 185
 186   *d = '\n';
 187   add_line_note (buffer, d + 1, NOTE_NEWLINE);
 188   buffer->next_line = s + 1;
 189 }
 190
 191 /* Process the notes created by add_line_note as far as the current
 192    location.  */
 193 void
 194 _cpp_process_line_notes (pfile, in_comment)
 195      cpp_reader *pfile;
 196      int in_comment;
 197 {
 198   cpp_buffer *buffer = pfile->buffer;
 199
 200   for (;;)
 201     {
 202       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 203       unsigned int col;
 204
 205       if (note->pos > buffer->cur)
 206         break;
 207
 208       buffer->cur_note++;
 209       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 210
 211       switch (note->type)
 212         {
 213         case NOTE_NEWLINE:
 214           /* This note is a kind of sentinel we should never reach.  */
 215           abort ();
 216
 217         case NOTE_TRIGRAPH:
 218           if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
 219             {
 220               if (CPP_OPTION (pfile, trigraphs))
 221                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 222                                      "trigraph converted to %c",
 223                                      (int) note->pos[0]);
 224               else
 225                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 226                                      "trigraph ??%c ignored",
 227                                      (int) note->pos[2]);
 228             }
 229           break;
 230
 231         case NOTE_ESC_SPACE_NL:
 232           if (!in_comment)
 233             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 234                                  "backslash and newline separated by space");
 235           /* Fall through... */
 236         case NOTE_ESC_NL:
 237           if (buffer->next_line > buffer->rlimit)
 238             {
 239               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 240                                    "backslash-newline at end of file");
 241               /* Prevent "no newline at end of file" warning.  */
 242               buffer->next_line = buffer->rlimit;
 243             }
 244
 245           buffer->line_base = note->pos;
 246           pfile->line++;
 247         }
 248     }
 249 }
 250
 251 /* Obtain the next character, after trigraph conversion and skipping
 252    an arbitrarily long string of escaped newlines.  The common case of
 253    no trigraphs or escaped newlines falls through quickly.  On return,
 254    buffer->backup_to points to where to return to if the character is
 255    not to be processed.  */
 256 static cppchar_t
 257 get_effective_char (pfile)
 258      cpp_reader *pfile;
 259 {
 260   cpp_buffer *buffer = pfile->buffer;
 261
 262   buffer->backup_to = buffer->cur;
 263   return *buffer->cur++;
 264 }
 265
 266 /* Skip a C-style block comment.  We find the end of the comment by
 267    seeing if an asterisk is before every '/' we encounter.  Returns
 268    nonzero if comment terminated by EOF, zero otherwise.  */
 269 bool
 270 _cpp_skip_block_comment (pfile)
 271      cpp_reader *pfile;
 272 {
 273   cpp_buffer *buffer = pfile->buffer;
 274   cppchar_t c;
 275
 276   if (*buffer->cur == '/')
 277     buffer->cur++;
 278
 279   for (;;)
 280     {
 281       c = *buffer->cur++;
 282
 283       /* People like decorating comments with '*', so check for '/'
 284          instead for efficiency.  */
 285       if (c == '/')
 286         {
 287           if (buffer->cur[-2] == '*')
 288             break;
 289
 290           /* Warn about potential nested comments, but not if the '/'
 291              comes immediately before the true comment delimiter.
 292              Don't bother to get it right across escaped newlines.  */
 293           if (CPP_OPTION (pfile, warn_comments)
 294               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 295             cpp_error_with_line (pfile, DL_WARNING,
 296                                  pfile->line, CPP_BUF_COL (buffer),
 297                                  "\"/*\" within comment");
 298         }
 299       else if (c == '\n')
 300         {
 301           buffer->cur--;
 302           _cpp_process_line_notes (pfile, true);
 303           if (buffer->next_line >= buffer->rlimit)
 304             return true;
 305           _cpp_clean_line (pfile);
 306           pfile->line++;
 307         }
 308     }
 309
 310   return false;
 311 }
 312
 313 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 314    terminating newline.  Handles escaped newlines.  Returns nonzero
 315    if a multiline comment.  */
 316 static int
 317 skip_line_comment (pfile)
 318      cpp_reader *pfile;
 319 {
 320   cpp_buffer *buffer = pfile->buffer;
 321   unsigned int orig_line = pfile->line;
 322
 323   while (*buffer->cur != '\n')
 324     buffer->cur++;
 325
 326   _cpp_process_line_notes (pfile, true);
 327   return orig_line != pfile->line;
 328 }
 329
 330 /* Skips whitespace, saving the next non-whitespace character.  */
 331 static void
 332 skip_whitespace (pfile, c)
 333      cpp_reader *pfile;
 334      cppchar_t c;
 335 {
 336   cpp_buffer *buffer = pfile->buffer;
 337   bool saw_NUL = false;
 338
 339   do
 340     {
 341       /* Horizontal space always OK.  */
 342       if (c == ' ' || c == '\t')
 343         ;
 344       /* Just \f \v or \0 left.  */
 345       else if (c == '\0')
 346         saw_NUL = true;
 347       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 348         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 349                              CPP_BUF_COL (buffer),
 350                              "%s in preprocessing directive",
 351                              c == '\f' ? "form feed" : "vertical tab");
 352
 353       c = *buffer->cur++;
 354     }
 355   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 356   while (is_nvspace (c));
 357
 358   if (saw_NUL)
 359     cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 360
 361   buffer->cur--;
 362 }
 363
 364 /* See if the characters of a number token are valid in a name (no
 365    '.', '+' or '-').  */
 366 static int
 367 name_p (pfile, string)
 368      cpp_reader *pfile;
 369      const cpp_string *string;
 370 {
 371   unsigned int i;
 372
 373   for (i = 0; i < string->len; i++)
 374     if (!is_idchar (string->text[i]))
 375       return 0;
 376
 377   return 1;
 378 }
 379
 380 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 381    a critical inner loop.  The common case is an identifier which has
 382    not been split by backslash-newline, does not contain a dollar
 383    sign, and has already been scanned (roughly 10:1 ratio of
 384    seen:unseen identifiers in normal code; the distribution is
 385    Poisson-like).  Second most common case is a new identifier, not
 386    split and no dollar sign.  The other possibilities are rare and
 387    have been relegated to parse_slow.  */
 388 static cpp_hashnode *
 389 parse_identifier (pfile)
 390      cpp_reader *pfile;
 391 {
 392   cpp_hashnode *result;
 393   const uchar *cur, *base;
 394
 395   /* Fast-path loop.  Skim over a normal identifier.
 396      N.B. ISIDNUM does not include $.  */
 397   cur = pfile->buffer->cur;
 398   while (ISIDNUM (*cur))
 399     cur++;
 400
 401   /* Check for slow-path cases.  */
 402   if (*cur == '$')
 403     {
 404       unsigned int len;
 405
 406       base = parse_slow (pfile, cur, 0, &len);
 407       result = (cpp_hashnode *)
 408         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 409     }
 410   else
 411     {
 412       base = pfile->buffer->cur - 1;
 413       pfile->buffer->cur = cur;
 414       result = (cpp_hashnode *)
 415         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 416     }
 417
 418   /* Rarely, identifiers require diagnostics when lexed.
 419      XXX Has to be forced out of the fast path.  */
 420   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 421                         && !pfile->state.skipping, 0))
 422     {
 423       /* It is allowed to poison the same identifier twice.  */
 424       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 425         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 426                    NODE_NAME (result));
 427
 428       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 429          replacement list of a variadic macro.  */
 430       if (result == pfile->spec_nodes.n__VA_ARGS__
 431           && !pfile->state.va_args_ok)
 432         cpp_error (pfile, DL_PEDWARN,
 433         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 434     }
 435
 436   return result;
 437 }
 438
 439 /* Slow path.  This handles numbers and identifiers which have been
 440    split, or contain dollar signs.  The part of the token from
 441    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 442    1 if it's a number, and 2 if it has a leading period.  Returns a
 443    pointer to the token's NUL-terminated spelling in permanent
 444    storage, and sets PLEN to its length.  */
 445 static uchar *
 446 parse_slow (pfile, cur, number_p, plen)
 447      cpp_reader *pfile;
 448      const uchar *cur;
 449      int number_p;
 450      unsigned int *plen;
 451 {
 452   cpp_buffer *buffer = pfile->buffer;
 453   const uchar *base = buffer->cur - 1;
 454   struct obstack *stack = &pfile->hash_table->stack;
 455   unsigned int c, prevc, saw_dollar = 0;
 456
 457   /* Place any leading period.  */
 458   if (number_p == 2)
 459     obstack_1grow (stack, '.');
 460
 461   /* Copy the part of the token which is known to be okay.  */
 462   obstack_grow (stack, base, cur - base);
 463
 464   /* Now process the part which isn't.  We are looking at one of
 465      '$', '\\', or '?' on entry to this loop.  */
 466   prevc = cur[-1];
 467   c = *cur++;
 468   buffer->cur = cur;
 469   for (;;)
 470     {
 471       /* Potential escaped newline?  */
 472       buffer->backup_to = buffer->cur - 1;
 473
 474       if (!is_idchar (c))
 475         {
 476           if (!number_p)
 477             break;
 478           if (c != '.' && !VALID_SIGN (c, prevc))
 479             break;
 480         }
 481
 482       /* Handle normal identifier characters in this loop.  */
 483       do
 484         {
 485           prevc = c;
 486           obstack_1grow (stack, c);
 487
 488           if (c == '$')
 489             saw_dollar++;
 490
 491           c = *buffer->cur++;
 492         }
 493       while (is_idchar (c));
 494     }
 495
 496   /* Step back over the unwanted char.  */
 497   BACKUP ();
 498
 499   /* $ is not an identifier character in the standard, but is commonly
 500      accepted as an extension.  Don't warn about it in skipped
 501      conditional blocks.  */
 502   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 503     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 504
 505   /* Identifiers and numbers are null-terminated.  */
 506   *plen = obstack_object_size (stack);
 507   obstack_1grow (stack, '\0');
 508   return obstack_finish (stack);
 509 }
 510
 511 /* Parse a number, beginning with character C, skipping embedded
 512    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 513    before C.  Place the result in NUMBER.  */
 514 static void
 515 parse_number (pfile, number, leading_period)
 516      cpp_reader *pfile;
 517      cpp_string *number;
 518      int leading_period;
 519 {
 520   const uchar *cur;
 521
 522   /* Fast-path loop.  Skim over a normal number.
 523      N.B. ISIDNUM does not include $.  */
 524   cur = pfile->buffer->cur;
 525   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 526     cur++;
 527
 528   /* Check for slow-path cases.  */
 529   if (*cur == '$')
 530     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 531   else
 532     {
 533       const uchar *base = pfile->buffer->cur - 1;
 534       uchar *dest;
 535
 536       number->len = cur - base + leading_period;
 537       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 538       dest[number->len] = '\0';
 539       number->text = dest;
 540
 541       if (leading_period)
 542         *dest++ = '.';
 543       memcpy (dest, base, cur - base);
 544       pfile->buffer->cur = cur;
 545     }
 546 }
 547
 548 /* Subroutine of parse_string.  */
 549 static int
 550 unescaped_terminator_p (pfile, dest)
 551      cpp_reader *pfile;
 552      const unsigned char *dest;
 553 {
 554   const unsigned char *start, *temp;
 555
 556   /* In #include-style directives, terminators are not escapable.  */
 557   if (pfile->state.angled_headers)
 558     return 1;
 559
 560   start = BUFF_FRONT (pfile->u_buff);
 561
 562   /* An odd number of consecutive backslashes represents an escaped
 563      terminator.  */
 564   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 565     ;
 566
 567   return ((dest - temp) & 1) == 0;
 568 }
 569
 570 /* Parses a string, character constant, or angle-bracketed header file
 571    name.  Handles embedded trigraphs and escaped newlines.  The stored
 572    string is guaranteed NUL-terminated, but it is not guaranteed that
 573    this is the first NUL since embedded NULs are preserved.
 574
 575    When this function returns, buffer->cur points to the next
 576    character to be processed.  */
 577 static void
 578 parse_string (pfile, token, terminator)
 579      cpp_reader *pfile;
 580      cpp_token *token;
 581      cppchar_t terminator;
 582 {
 583   cpp_buffer *buffer = pfile->buffer;
 584   unsigned char *dest, *limit;
 585   cppchar_t c;
 586   bool warned_nulls = false;
 587
 588   dest = BUFF_FRONT (pfile->u_buff);
 589   limit = BUFF_LIMIT (pfile->u_buff);
 590
 591   for (;;)
 592     {
 593       /* We need room for another char, possibly the terminating NUL.  */
 594       if ((size_t) (limit - dest) < 1)
 595         {
 596           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 597           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 598           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 599           limit = BUFF_LIMIT (pfile->u_buff);
 600         }
 601
 602       c = *buffer->cur++;
 603
 604       if (c == terminator)
 605         {
 606           if (unescaped_terminator_p (pfile, dest))
 607             break;
 608         }
 609       else if (c == '\n')
 610         {
 611           /* No string literal may extend over multiple lines.  In
 612              assembly language, suppress the error except for <>
 613              includes.  This is a kludge around not knowing where
 614              comments are.  */
 615           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 616             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 617                        (int) terminator);
 618           buffer->cur--;
 619           break;
 620         }
 621       else if (c == '\0')
 622         {
 623           if (!warned_nulls)
 624             {
 625               warned_nulls = true;
 626               cpp_error (pfile, DL_WARNING,
 627                          "null character(s) preserved in literal");
 628             }
 629         }
 630         *dest++ = c;
 631     }
 632
 633   *dest = '\0';
 634
 635   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 636   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 637   BUFF_FRONT (pfile->u_buff) = dest + 1;
 638 }
 639
 640 /* The stored comment includes the comment start and any terminator.  */
 641 static void
 642 save_comment (pfile, token, from, type)
 643      cpp_reader *pfile;
 644      cpp_token *token;
 645      const unsigned char *from;
 646      cppchar_t type;
 647 {
 648   unsigned char *buffer;
 649   unsigned int len, clen;
 650
 651   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 652
 653   /* C++ comments probably (not definitely) have moved past a new
 654      line, which we don't want to save in the comment.  */
 655   if (is_vspace (pfile->buffer->cur[-1]))
 656     len--;
 657
 658   /* If we are currently in a directive, then we need to store all
 659      C++ comments as C comments internally, and so we need to
 660      allocate a little extra space in that case.
 661
 662      Note that the only time we encounter a directive here is
 663      when we are saving comments in a "#define".  */
 664   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 665
 666   buffer = _cpp_unaligned_alloc (pfile, clen);
 667
 668   token->type = CPP_COMMENT;
 669   token->val.str.len = clen;
 670   token->val.str.text = buffer;
 671
 672   buffer[0] = '/';
 673   memcpy (buffer + 1, from, len - 1);
 674
 675   /* Finish conversion to a C comment, if necessary.  */
 676   if (pfile->state.in_directive && type == '/')
 677     {
 678       buffer[1] = '*';
 679       buffer[clen - 2] = '*';
 680       buffer[clen - 1] = '/';
 681     }
 682 }
 683
 684 /* Allocate COUNT tokens for RUN.  */
 685 void
 686 _cpp_init_tokenrun (run, count)
 687      tokenrun *run;
 688      unsigned int count;
 689 {
 690   run->base = xnewvec (cpp_token, count);
 691   run->limit = run->base + count;
 692   run->next = NULL;
 693 }
 694
 695 /* Returns the next tokenrun, or creates one if there is none.  */
 696 static tokenrun *
 697 next_tokenrun (run)
 698      tokenrun *run;
 699 {
 700   if (run->next == NULL)
 701     {
 702       run->next = xnew (tokenrun);
 703       run->next->prev = run;
 704       _cpp_init_tokenrun (run->next, 250);
 705     }
 706
 707   return run->next;
 708 }
 709
 710 /* Allocate a single token that is invalidated at the same time as the
 711    rest of the tokens on the line.  Has its line and col set to the
 712    same as the last lexed token, so that diagnostics appear in the
 713    right place.  */
 714 cpp_token *
 715 _cpp_temp_token (pfile)
 716      cpp_reader *pfile;
 717 {
 718   cpp_token *old, *result;
 719
 720   old = pfile->cur_token - 1;
 721   if (pfile->cur_token == pfile->cur_run->limit)
 722     {
 723       pfile->cur_run = next_tokenrun (pfile->cur_run);
 724       pfile->cur_token = pfile->cur_run->base;
 725     }
 726
 727   result = pfile->cur_token++;
 728   result->line = old->line;
 729   result->col = old->col;
 730   return result;
 731 }
 732
 733 /* Lex a token into RESULT (external interface).  Takes care of issues
 734    like directive handling, token lookahead, multiple include
 735    optimization and skipping.  */
 736 const cpp_token *
 737 _cpp_lex_token (pfile)
 738      cpp_reader *pfile;
 739 {
 740   cpp_token *result;
 741
 742   for (;;)
 743     {
 744       if (pfile->cur_token == pfile->cur_run->limit)
 745         {
 746           pfile->cur_run = next_tokenrun (pfile->cur_run);
 747           pfile->cur_token = pfile->cur_run->base;
 748         }
 749
 750       if (pfile->lookaheads)
 751         {
 752           pfile->lookaheads--;
 753           result = pfile->cur_token++;
 754         }
 755       else
 756         result = _cpp_lex_direct (pfile);
 757
 758       if (result->flags & BOL)
 759         {
 760           /* Is this a directive.  If _cpp_handle_directive returns
 761              false, it is an assembler #.  */
 762           if (result->type == CPP_HASH
 763               /* 6.10.3 p 11: Directives in a list of macro arguments
 764                  gives undefined behavior.  This implementation
 765                  handles the directive as normal.  */
 766               && pfile->state.parsing_args != 1
 767               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 768             continue;
 769           if (pfile->cb.line_change && !pfile->state.skipping)
 770             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 771         }
 772
 773       /* We don't skip tokens in directives.  */
 774       if (pfile->state.in_directive)
 775         break;
 776
 777       /* Outside a directive, invalidate controlling macros.  At file
 778          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 779          get here and MI optimisation works.  */
 780       pfile->mi_valid = false;
 781
 782       if (!pfile->state.skipping || result->type == CPP_EOF)
 783         break;
 784     }
 785
 786   return result;
 787 }
 788
 789 /* Returns true if a fresh line has been loaded.  */
 790 bool
 791 _cpp_get_fresh_line (pfile)
 792      cpp_reader *pfile;
 793 {
 794   /* We can't get a new line until we leave the current directive.  */
 795   if (pfile->state.in_directive)
 796     return false;
 797
 798   for (;;)
 799     {
 800       cpp_buffer *buffer = pfile->buffer;
 801
 802       if (!buffer->need_line)
 803         return true;
 804
 805       if (buffer->next_line < buffer->rlimit)
 806         {
 807           _cpp_clean_line (pfile);
 808           return true;
 809         }
 810
 811       /* First, get out of parsing arguments state.  */
 812       if (pfile->state.parsing_args)
 813         return false;
 814
 815       /* End of buffer.  Non-empty files should end in a newline.  */
 816       if (buffer->buf != buffer->rlimit
 817           && buffer->next_line > buffer->rlimit
 818           && !buffer->from_stage3)
 819         {
 820           /* Only warn once.  */
 821           buffer->next_line = buffer->rlimit;
 822           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 823                                CPP_BUF_COLUMN (buffer, buffer->cur),
 824                                "no newline at end of file");
 825         }
 826
 827       if (buffer->return_at_eof)
 828         {
 829           buffer->return_at_eof = false;
 830           return false;
 831         }
 832
 833       if (!buffer->prev)
 834         return false;
 835
 836       _cpp_pop_buffer (pfile);
 837     }
 838 }
 839
 840 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 841   do {                                          \
 842     if (get_effective_char (pfile) == CHAR)     \
 843       result->type = THEN_TYPE;                 \
 844     else                                        \
 845       {                                         \
 846         BACKUP ();                              \
 847         result->type = ELSE_TYPE;               \
 848       }                                         \
 849   } while (0)
 850
 851 /* Lex a token into pfile->cur_token, which is also incremented, to
 852    get diagnostics pointing to the correct location.
 853
 854    Does not handle issues such as token lookahead, multiple-include
 855    optimisation, directives, skipping etc.  This function is only
 856    suitable for use by _cpp_lex_token, and in special cases like
 857    lex_expansion_token which doesn't care for any of these issues.
 858
 859    When meeting a newline, returns CPP_EOF if parsing a directive,
 860    otherwise returns to the start of the token buffer if permissible.
 861    Returns the location of the lexed token.  */
 862 cpp_token *
 863 _cpp_lex_direct (pfile)
 864      cpp_reader *pfile;
 865 {
 866   cppchar_t c;
 867   cpp_buffer *buffer;
 868   const unsigned char *comment_start;
 869   cpp_token *result = pfile->cur_token++;
 870
 871  fresh_line:
 872   result->flags = 0;
 873   if (pfile->buffer->need_line)
 874     {
 875       if (!_cpp_get_fresh_line (pfile))
 876         {
 877           result->type = CPP_EOF;
 878           return result;
 879         }
 880       if (!pfile->keep_tokens)
 881         {
 882           pfile->cur_run = &pfile->base_run;
 883           result = pfile->base_run.base;
 884           pfile->cur_token = result + 1;
 885         }
 886       result->flags = BOL;
 887       if (pfile->state.parsing_args == 2)
 888         result->flags |= PREV_WHITE;
 889     }
 890   buffer = pfile->buffer;
 891  update_tokens_line:
 892   result->line = pfile->line;
 893
 894  skipped_white:
 895   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 896       && !pfile->overlaid_buffer)
 897     {
 898       _cpp_process_line_notes (pfile, false);
 899       result->line = pfile->line;
 900     }
 901   c = *buffer->cur++;
 902   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 903
 904   switch (c)
 905     {
 906     case ' ': case '\t': case '\f': case '\v': case '\0':
 907       result->flags |= PREV_WHITE;
 908       skip_whitespace (pfile, c);
 909       goto skipped_white;
 910
 911     case '\n':
 912       pfile->line++;
 913       buffer->need_line = true;
 914       goto fresh_line;
 915
 916     case '0': case '1': case '2': case '3': case '4':
 917     case '5': case '6': case '7': case '8': case '9':
 918       result->type = CPP_NUMBER;
 919       parse_number (pfile, &result->val.str, 0);
 920       break;
 921
 922     case 'L':
 923       /* 'L' may introduce wide characters or strings.  */
 924       {
 925         const unsigned char *pos = buffer->cur;
 926
 927         c = get_effective_char (pfile);
 928         if (c == '\'' || c == '"')
 929           {
 930             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 931             parse_string (pfile, result, c);
 932             break;
 933           }
 934         buffer->cur = pos;
 935       }
 936       /* Fall through.  */
 937
 938     start_ident:
 939     case '_':
 940     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 941     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 942     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 943     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 944     case 'y': case 'z':
 945     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 946     case 'G': case 'H': case 'I': case 'J': case 'K':
 947     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 948     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 949     case 'Y': case 'Z':
 950       result->type = CPP_NAME;
 951       result->val.node = parse_identifier (pfile);
 952
 953       /* Convert named operators to their proper types.  */
 954       if (result->val.node->flags & NODE_OPERATOR)
 955         {
 956           result->flags |= NAMED_OP;
 957           result->type = result->val.node->directive_index;
 958         }
 959       break;
 960
 961     case '\'':
 962     case '"':
 963       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 964       parse_string (pfile, result, c);
 965       break;
 966
 967     case '/':
 968       /* A potential block or line comment.  */
 969       comment_start = buffer->cur;
 970       c = get_effective_char (pfile);
 971
 972       if (c == '*')
 973         {
 974           if (_cpp_skip_block_comment (pfile))
 975             cpp_error (pfile, DL_ERROR, "unterminated comment");
 976         }
 977       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 978                             || CPP_IN_SYSTEM_HEADER (pfile)))
 979         {
 980           /* Warn about comments only if pedantically GNUC89, and not
 981              in system headers.  */
 982           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 983               && ! buffer->warned_cplusplus_comments)
 984             {
 985               cpp_error (pfile, DL_PEDWARN,
 986                          "C++ style comments are not allowed in ISO C90");
 987               cpp_error (pfile, DL_PEDWARN,
 988                          "(this will be reported only once per input file)");
 989               buffer->warned_cplusplus_comments = 1;
 990             }
 991
 992           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 993             cpp_error (pfile, DL_WARNING, "multi-line comment");
 994         }
 995       else if (c == '=')
 996         {
 997           result->type = CPP_DIV_EQ;
 998           break;
 999         }
1000       else
1001         {
1002           BACKUP ();
1003           result->type = CPP_DIV;
1004           break;
1005         }
1006
1007       if (!pfile->state.save_comments)
1008         {
1009           result->flags |= PREV_WHITE;
1010           goto update_tokens_line;
1011         }
1012
1013       /* Save the comment as a token in its own right.  */
1014       save_comment (pfile, result, comment_start, c);
1015       break;
1016
1017     case '<':
1018       if (pfile->state.angled_headers)
1019         {
1020           result->type = CPP_HEADER_NAME;
1021           parse_string (pfile, result, '>');
1022           break;
1023         }
1024
1025       c = get_effective_char (pfile);
1026       if (c == '=')
1027         result->type = CPP_LESS_EQ;
1028       else if (c == '<')
1029         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1030       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1031         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1032       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1033         {
1034           result->type = CPP_OPEN_SQUARE;
1035           result->flags |= DIGRAPH;
1036         }
1037       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1038         {
1039           result->type = CPP_OPEN_BRACE;
1040           result->flags |= DIGRAPH;
1041         }
1042       else
1043         {
1044           BACKUP ();
1045           result->type = CPP_LESS;
1046         }
1047       break;
1048
1049     case '>':
1050       c = get_effective_char (pfile);
1051       if (c == '=')
1052         result->type = CPP_GREATER_EQ;
1053       else if (c == '>')
1054         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1055       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1056         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1057       else
1058         {
1059           BACKUP ();
1060           result->type = CPP_GREATER;
1061         }
1062       break;
1063
1064     case '%':
1065       c = get_effective_char (pfile);
1066       if (c == '=')
1067         result->type = CPP_MOD_EQ;
1068       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1069         {
1070           result->flags |= DIGRAPH;
1071           result->type = CPP_HASH;
1072           if (get_effective_char (pfile) == '%')
1073             {
1074               const unsigned char *pos = buffer->cur;
1075
1076               if (get_effective_char (pfile) == ':')
1077                 result->type = CPP_PASTE;
1078               else
1079                 buffer->cur = pos - 1;
1080             }
1081           else
1082             BACKUP ();
1083         }
1084       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1085         {
1086           result->flags |= DIGRAPH;
1087           result->type = CPP_CLOSE_BRACE;
1088         }
1089       else
1090         {
1091           BACKUP ();
1092           result->type = CPP_MOD;
1093         }
1094       break;
1095
1096     case '.':
1097       result->type = CPP_DOT;
1098       c = get_effective_char (pfile);
1099       if (c == '.')
1100         {
1101           const unsigned char *pos = buffer->cur;
1102
1103           if (get_effective_char (pfile) == '.')
1104             result->type = CPP_ELLIPSIS;
1105           else
1106             buffer->cur = pos - 1;
1107         }
1108       /* All known character sets have 0...9 contiguous.  */
1109       else if (ISDIGIT (c))
1110         {
1111           result->type = CPP_NUMBER;
1112           parse_number (pfile, &result->val.str, 1);
1113         }
1114       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1115         result->type = CPP_DOT_STAR;
1116       else
1117         BACKUP ();
1118       break;
1119
1120     case '+':
1121       c = get_effective_char (pfile);
1122       if (c == '+')
1123         result->type = CPP_PLUS_PLUS;
1124       else if (c == '=')
1125         result->type = CPP_PLUS_EQ;
1126       else
1127         {
1128           BACKUP ();
1129           result->type = CPP_PLUS;
1130         }
1131       break;
1132
1133     case '-':
1134       c = get_effective_char (pfile);
1135       if (c == '>')
1136         {
1137           result->type = CPP_DEREF;
1138           if (CPP_OPTION (pfile, cplusplus))
1139             {
1140               if (get_effective_char (pfile) == '*')
1141                 result->type = CPP_DEREF_STAR;
1142               else
1143                 BACKUP ();
1144             }
1145         }
1146       else if (c == '-')
1147         result->type = CPP_MINUS_MINUS;
1148       else if (c == '=')
1149         result->type = CPP_MINUS_EQ;
1150       else
1151         {
1152           BACKUP ();
1153           result->type = CPP_MINUS;
1154         }
1155       break;
1156
1157     case '&':
1158       c = get_effective_char (pfile);
1159       if (c == '&')
1160         result->type = CPP_AND_AND;
1161       else if (c == '=')
1162         result->type = CPP_AND_EQ;
1163       else
1164         {
1165           BACKUP ();
1166           result->type = CPP_AND;
1167         }
1168       break;
1169
1170     case '|':
1171       c = get_effective_char (pfile);
1172       if (c == '|')
1173         result->type = CPP_OR_OR;
1174       else if (c == '=')
1175         result->type = CPP_OR_EQ;
1176       else
1177         {
1178           BACKUP ();
1179           result->type = CPP_OR;
1180         }
1181       break;
1182
1183     case ':':
1184       c = get_effective_char (pfile);
1185       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1186         result->type = CPP_SCOPE;
1187       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1188         {
1189           result->flags |= DIGRAPH;
1190           result->type = CPP_CLOSE_SQUARE;
1191         }
1192       else
1193         {
1194           BACKUP ();
1195           result->type = CPP_COLON;
1196         }
1197       break;
1198
1199     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1200     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1201     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1202     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1203     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1204
1205     case '?': result->type = CPP_QUERY; break;
1206     case '~': result->type = CPP_COMPL; break;
1207     case ',': result->type = CPP_COMMA; break;
1208     case '(': result->type = CPP_OPEN_PAREN; break;
1209     case ')': result->type = CPP_CLOSE_PAREN; break;
1210     case '[': result->type = CPP_OPEN_SQUARE; break;
1211     case ']': result->type = CPP_CLOSE_SQUARE; break;
1212     case '{': result->type = CPP_OPEN_BRACE; break;
1213     case '}': result->type = CPP_CLOSE_BRACE; break;
1214     case ';': result->type = CPP_SEMICOLON; break;
1215
1216       /* @ is a punctuator in Objective-C.  */
1217     case '@': result->type = CPP_ATSIGN; break;
1218
1219     case '$':
1220       if (CPP_OPTION (pfile, dollars_in_ident))
1221         goto start_ident;
1222       /* Fall through...  */
1223
1224     default:
1225       result->type = CPP_OTHER;
1226       result->val.c = c;
1227       break;
1228     }
1229
1230   return result;
1231 }
1232
1233 /* An upper bound on the number of bytes needed to spell TOKEN,
1234    including preceding whitespace.  */
1235 unsigned int
1236 cpp_token_len (token)
1237      const cpp_token *token;
1238 {
1239   unsigned int len;
1240
1241   switch (TOKEN_SPELL (token))
1242     {
1243     default:            len = 0;                                break;
1244     case SPELL_NUMBER:
1245     case SPELL_STRING:  len = token->val.str.len;               break;
1246     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1247     }
1248   /* 1 for whitespace, 4 for comment delimiters.  */
1249   return len + 5;
1250 }
1251
1252 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1253    already contain the enough space to hold the token's spelling.
1254    Returns a pointer to the character after the last character
1255    written.  */
1256 unsigned char *
1257 cpp_spell_token (pfile, token, buffer)
1258      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1259      const cpp_token *token;
1260      unsigned char *buffer;
1261 {
1262   switch (TOKEN_SPELL (token))
1263     {
1264     case SPELL_OPERATOR:
1265       {
1266         const unsigned char *spelling;
1267         unsigned char c;
1268
1269         if (token->flags & DIGRAPH)
1270           spelling
1271             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1272         else if (token->flags & NAMED_OP)
1273           goto spell_ident;
1274         else
1275           spelling = TOKEN_NAME (token);
1276
1277         while ((c = *spelling++) != '\0')
1278           *buffer++ = c;
1279       }
1280       break;
1281
1282     case SPELL_CHAR:
1283       *buffer++ = token->val.c;
1284       break;
1285
1286     spell_ident:
1287     case SPELL_IDENT:
1288       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1289       buffer += NODE_LEN (token->val.node);
1290       break;
1291
1292     case SPELL_NUMBER:
1293       memcpy (buffer, token->val.str.text, token->val.str.len);
1294       buffer += token->val.str.len;
1295       break;
1296
1297     case SPELL_STRING:
1298       {
1299         int left, right, tag;
1300         switch (token->type)
1301           {
1302           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1303           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1304           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1305           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1306           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1307           default:
1308             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1309                        TOKEN_NAME (token));
1310             return buffer;
1311           }
1312         if (tag) *buffer++ = tag;
1313         *buffer++ = left;
1314         memcpy (buffer, token->val.str.text, token->val.str.len);
1315         buffer += token->val.str.len;
1316         *buffer++ = right;
1317       }
1318       break;
1319
1320     case SPELL_NONE:
1321       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1322       break;
1323     }
1324
1325   return buffer;
1326 }
1327
1328 /* Returns TOKEN spelt as a null-terminated string.  The string is
1329    freed when the reader is destroyed.  Useful for diagnostics.  */
1330 unsigned char *
1331 cpp_token_as_text (pfile, token)
1332      cpp_reader *pfile;
1333      const cpp_token *token;
1334 {
1335   unsigned int len = cpp_token_len (token);
1336   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1337
1338   end = cpp_spell_token (pfile, token, start);
1339   end[0] = '\0';
1340
1341   return start;
1342 }
1343
1344 /* Used by C front ends, which really should move to using
1345    cpp_token_as_text.  */
1346 const char *
1347 cpp_type2name (type)
1348      enum cpp_ttype type;
1349 {
1350   return (const char *) token_spellings[type].name;
1351 }
1352
1353 /* Writes the spelling of token to FP, without any preceding space.
1354    Separated from cpp_spell_token for efficiency - to avoid stdio
1355    double-buffering.  */
1356 void
1357 cpp_output_token (token, fp)
1358      const cpp_token *token;
1359      FILE *fp;
1360 {
1361   switch (TOKEN_SPELL (token))
1362     {
1363     case SPELL_OPERATOR:
1364       {
1365         const unsigned char *spelling;
1366         int c;
1367
1368         if (token->flags & DIGRAPH)
1369           spelling
1370             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1371         else if (token->flags & NAMED_OP)
1372           goto spell_ident;
1373         else
1374           spelling = TOKEN_NAME (token);
1375
1376         c = *spelling;
1377         do
1378           putc (c, fp);
1379         while ((c = *++spelling) != '\0');
1380       }
1381       break;
1382
1383     case SPELL_CHAR:
1384       putc (token->val.c, fp);
1385       break;
1386
1387     spell_ident:
1388     case SPELL_IDENT:
1389       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1390     break;
1391
1392     case SPELL_NUMBER:
1393       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1394       break;
1395
1396     case SPELL_STRING:
1397       {
1398         int left, right, tag;
1399         switch (token->type)
1400           {
1401           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1402           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1403           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1404           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1405           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1406           default:
1407             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1408             return;
1409           }
1410         if (tag) putc (tag, fp);
1411         putc (left, fp);
1412         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1413         putc (right, fp);
1414       }
1415       break;
1416
1417     case SPELL_NONE:
1418       /* An error, most probably.  */
1419       break;
1420     }
1421 }
1422
1423 /* Compare two tokens.  */
1424 int
1425 _cpp_equiv_tokens (a, b)
1426      const cpp_token *a, *b;
1427 {
1428   if (a->type == b->type && a->flags == b->flags)
1429     switch (TOKEN_SPELL (a))
1430       {
1431       default:                  /* Keep compiler happy.  */
1432       case SPELL_OPERATOR:
1433         return 1;
1434       case SPELL_CHAR:
1435         return a->val.c == b->val.c; /* Character.  */
1436       case SPELL_NONE:
1437         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1438       case SPELL_IDENT:
1439         return a->val.node == b->val.node;
1440       case SPELL_NUMBER:
1441       case SPELL_STRING:
1442         return (a->val.str.len == b->val.str.len
1443                 && !memcmp (a->val.str.text, b->val.str.text,
1444                             a->val.str.len));
1445       }
1446
1447   return 0;
1448 }
1449
1450 /* Returns nonzero if a space should be inserted to avoid an
1451    accidental token paste for output.  For simplicity, it is
1452    conservative, and occasionally advises a space where one is not
1453    needed, e.g. "." and ".2".  */
1454 int
1455 cpp_avoid_paste (pfile, token1, token2)
1456      cpp_reader *pfile;
1457      const cpp_token *token1, *token2;
1458 {
1459   enum cpp_ttype a = token1->type, b = token2->type;
1460   cppchar_t c;
1461
1462   if (token1->flags & NAMED_OP)
1463     a = CPP_NAME;
1464   if (token2->flags & NAMED_OP)
1465     b = CPP_NAME;
1466
1467   c = EOF;
1468   if (token2->flags & DIGRAPH)
1469     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1470   else if (token_spellings[b].category == SPELL_OPERATOR)
1471     c = token_spellings[b].name[0];
1472
1473   /* Quickly get everything that can paste with an '='.  */
1474   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1475     return 1;
1476
1477   switch (a)
1478     {
1479     case CPP_GREATER:   return c == '>' || c == '?';
1480     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1481     case CPP_PLUS:      return c == '+';
1482     case CPP_MINUS:     return c == '-' || c == '>';
1483     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1484     case CPP_MOD:       return c == ':' || c == '>';
1485     case CPP_AND:       return c == '&';
1486     case CPP_OR:        return c == '|';
1487     case CPP_COLON:     return c == ':' || c == '>';
1488     case CPP_DEREF:     return c == '*';
1489     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1490     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1491     case CPP_NAME:      return ((b == CPP_NUMBER
1492                                  && name_p (pfile, &token2->val.str))
1493                                 || b == CPP_NAME
1494                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1495     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1496                                 || c == '.' || c == '+' || c == '-');
1497     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1498                                 && token1->val.c == '@'
1499                                 && (b == CPP_NAME || b == CPP_STRING));
1500     default:            break;
1501     }
1502
1503   return 0;
1504 }
1505
1506 /* Output all the remaining tokens on the current line, and a newline
1507    character, to FP.  Leading whitespace is removed.  If there are
1508    macros, special token padding is not performed.  */
1509 void
1510 cpp_output_line (pfile, fp)
1511      cpp_reader *pfile;
1512      FILE *fp;
1513 {
1514   const cpp_token *token;
1515
1516   token = cpp_get_token (pfile);
1517   while (token->type != CPP_EOF)
1518     {
1519       cpp_output_token (token, fp);
1520       token = cpp_get_token (pfile);
1521       if (token->flags & PREV_WHITE)
1522         putc (' ', fp);
1523     }
1524
1525   putc ('\n', fp);
1526 }
1527
1528 /* Returns the value of a hexadecimal digit.  */
1529 static unsigned int
1530 hex_digit_value (c)
1531      unsigned int c;
1532 {
1533   if (hex_p (c))
1534     return hex_value (c);
1535   else
1536     abort ();
1537 }
1538
1539 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1540    failure if cpplib is not parsing C++ or C99.  Such failure is
1541    silent, and no variables are updated.  Otherwise returns 0, and
1542    warns if -Wtraditional.
1543
1544    [lex.charset]: The character designated by the universal character
1545    name \UNNNNNNNN is that character whose character short name in
1546    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1547    universal character name \uNNNN is that character whose character
1548    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1549    for a universal character name is less than 0x20 or in the range
1550    0x7F-0x9F (inclusive), or if the universal character name
1551    designates a character in the basic source character set, then the
1552    program is ill-formed.
1553
1554    We assume that wchar_t is Unicode, so we don't need to do any
1555    mapping.  Is this ever wrong?
1556
1557    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1558    LIMIT is the end of the string or charconst.  PSTR is updated to
1559    point after the UCS on return, and the UCS is written into PC.  */
1560
1561 static int
1562 maybe_read_ucs (pfile, pstr, limit, pc)
1563      cpp_reader *pfile;
1564      const unsigned char **pstr;
1565      const unsigned char *limit;
1566      cppchar_t *pc;
1567 {
1568   const unsigned char *p = *pstr;
1569   unsigned int code = 0;
1570   unsigned int c = *pc, length;
1571
1572   /* Only attempt to interpret a UCS for C++ and C99.  */
1573   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1574     return 1;
1575
1576   if (CPP_WTRADITIONAL (pfile))
1577     cpp_error (pfile, DL_WARNING,
1578                "the meaning of '\\%c' is different in traditional C", c);
1579
1580   length = (c == 'u' ? 4: 8);
1581
1582   if ((size_t) (limit - p) < length)
1583     {
1584       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1585       /* Skip to the end to avoid more diagnostics.  */
1586       p = limit;
1587     }
1588   else
1589     {
1590       for (; length; length--, p++)
1591         {
1592           c = *p;
1593           if (ISXDIGIT (c))
1594             code = (code << 4) + hex_digit_value (c);
1595           else
1596             {
1597               cpp_error (pfile, DL_ERROR,
1598                          "non-hex digit '%c' in universal-character-name", c);
1599               /* We shouldn't skip in case there are multibyte chars.  */
1600               break;
1601             }
1602         }
1603     }
1604
1605   if (CPP_OPTION (pfile, EBCDIC))
1606     {
1607       cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1608       code = 0x3f;  /* EBCDIC invalid character */
1609     }
1610   /* True extended characters are OK.  */
1611   else if (code >= 0xa0
1612            && !(code & 0x80000000)
1613            && !(code >= 0xD800 && code <= 0xDFFF))
1614     ;
1615   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1616      hex escapes so that this also works with EBCDIC hosts.  */
1617   else if (code == 0x24 || code == 0x40 || code == 0x60)
1618     ;
1619   /* Don't give another error if one occurred above.  */
1620   else if (length == 0)
1621     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1622
1623   *pstr = p;
1624   *pc = code;
1625   return 0;
1626 }
1627
1628 /* Returns the value of an escape sequence, truncated to the correct
1629    target precision.  PSTR points to the input pointer, which is just
1630    after the backslash.  LIMIT is how much text we have.  WIDE is true
1631    if the escape sequence is part of a wide character constant or
1632    string literal.  Handles all relevant diagnostics.  */
1633 cppchar_t
1634 cpp_parse_escape (pfile, pstr, limit, wide)
1635      cpp_reader *pfile;
1636      const unsigned char **pstr;
1637      const unsigned char *limit;
1638      int wide;
1639 {
1640   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1641   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1642   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1643
1644   int unknown = 0;
1645   const unsigned char *str = *pstr, *charconsts;
1646   cppchar_t c, mask;
1647   unsigned int width;
1648
1649   if (CPP_OPTION (pfile, EBCDIC))
1650     charconsts = ebcdic;
1651   else
1652     charconsts = ascii;
1653
1654   if (wide)
1655     width = CPP_OPTION (pfile, wchar_precision);
1656   else
1657     width = CPP_OPTION (pfile, char_precision);
1658   if (width < BITS_PER_CPPCHAR_T)
1659     mask = ((cppchar_t) 1 << width) - 1;
1660   else
1661     mask = ~0;
1662
1663   c = *str++;
1664   switch (c)
1665     {
1666     case '\\': case '\'': case '"': case '?': break;
1667     case 'b': c = charconsts[1];  break;
1668     case 'f': c = charconsts[3];  break;
1669     case 'n': c = charconsts[4];  break;
1670     case 'r': c = charconsts[5];  break;
1671     case 't': c = charconsts[6];  break;
1672     case 'v': c = charconsts[7];  break;
1673
1674     case '(': case '{': case '[': case '%':
1675       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1676          '\%' is used to prevent SCCS from getting confused.  */
1677       unknown = CPP_PEDANTIC (pfile);
1678       break;
1679
1680     case 'a':
1681       if (CPP_WTRADITIONAL (pfile))
1682         cpp_error (pfile, DL_WARNING,
1683                    "the meaning of '\\a' is different in traditional C");
1684       c = charconsts[0];
1685       break;
1686
1687     case 'e': case 'E':
1688       if (CPP_PEDANTIC (pfile))
1689         cpp_error (pfile, DL_PEDWARN,
1690                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1691       c = charconsts[2];
1692       break;
1693
1694     case 'u': case 'U':
1695       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1696       break;
1697
1698     case 'x':
1699       if (CPP_WTRADITIONAL (pfile))
1700         cpp_error (pfile, DL_WARNING,
1701                    "the meaning of '\\x' is different in traditional C");
1702
1703       {
1704         cppchar_t i = 0, overflow = 0;
1705         int digits_found = 0;
1706
1707         while (str < limit)
1708           {
1709             c = *str;
1710             if (! ISXDIGIT (c))
1711               break;
1712             str++;
1713             overflow |= i ^ (i << 4 >> 4);
1714             i = (i << 4) + hex_digit_value (c);
1715             digits_found = 1;
1716           }
1717
1718         if (!digits_found)
1719           cpp_error (pfile, DL_ERROR,
1720                        "\\x used with no following hex digits");
1721
1722         if (overflow | (i != (i & mask)))
1723           {
1724             cpp_error (pfile, DL_PEDWARN,
1725                        "hex escape sequence out of range");
1726             i &= mask;
1727           }
1728         c = i;
1729       }
1730       break;
1731
1732     case '0':  case '1':  case '2':  case '3':
1733     case '4':  case '5':  case '6':  case '7':
1734       {
1735         size_t count = 0;
1736         cppchar_t i = c - '0';
1737
1738         while (str < limit && ++count < 3)
1739           {
1740             c = *str;
1741             if (c < '0' || c > '7')
1742               break;
1743             str++;
1744             i = (i << 3) + c - '0';
1745           }
1746
1747         if (i != (i & mask))
1748           {
1749             cpp_error (pfile, DL_PEDWARN,
1750                        "octal escape sequence out of range");
1751             i &= mask;
1752           }
1753         c = i;
1754       }
1755       break;
1756
1757     default:
1758       unknown = 1;
1759       break;
1760     }
1761
1762   if (unknown)
1763     {
1764       if (ISGRAPH (c))
1765         cpp_error (pfile, DL_PEDWARN,
1766                    "unknown escape sequence '\\%c'", (int) c);
1767       else
1768         cpp_error (pfile, DL_PEDWARN,
1769                    "unknown escape sequence: '\\%03o'", (int) c);
1770     }
1771
1772   if (c > mask)
1773     {
1774       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1775       c &= mask;
1776     }
1777
1778   *pstr = str;
1779   return c;
1780 }
1781
1782 /* Interpret a (possibly wide) character constant in TOKEN.
1783    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1784    points to a variable that is filled in with the number of
1785    characters seen, and UNSIGNEDP to a variable that indicates whether
1786    the result has signed type.  */
1787 cppchar_t
1788 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1789      cpp_reader *pfile;
1790      const cpp_token *token;
1791      unsigned int *pchars_seen;
1792      int *unsignedp;
1793 {
1794   const unsigned char *str = token->val.str.text;
1795   const unsigned char *limit = str + token->val.str.len;
1796   unsigned int chars_seen = 0;
1797   size_t width, max_chars;
1798   cppchar_t c, mask, result = 0;
1799   bool unsigned_p;
1800
1801   /* Width in bits.  */
1802   if (token->type == CPP_CHAR)
1803     {
1804       width = CPP_OPTION (pfile, char_precision);
1805       max_chars = CPP_OPTION (pfile, int_precision) / width;
1806       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1807     }
1808   else
1809     {
1810       width = CPP_OPTION (pfile, wchar_precision);
1811       max_chars = 1;
1812       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1813     }
1814
1815   if (width < BITS_PER_CPPCHAR_T)
1816     mask = ((cppchar_t) 1 << width) - 1;
1817   else
1818     mask = ~0;
1819
1820   while (str < limit)
1821     {
1822       c = *str++;
1823
1824       if (c == '\\')
1825         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1826
1827 #ifdef MAP_CHARACTER
1828       if (ISPRINT (c))
1829         c = MAP_CHARACTER (c);
1830 #endif
1831
1832       chars_seen++;
1833
1834       /* Truncate the character, scale the result and merge the two.  */
1835       c &= mask;
1836       if (width < BITS_PER_CPPCHAR_T)
1837         result = (result << width) | c;
1838       else
1839         result = c;
1840     }
1841
1842   if (chars_seen == 0)
1843     cpp_error (pfile, DL_ERROR, "empty character constant");
1844   else if (chars_seen > 1)
1845     {
1846       /* Multichar charconsts are of type int and therefore signed.  */
1847       unsigned_p = 0;
1848
1849       if (chars_seen > max_chars)
1850         {
1851           chars_seen = max_chars;
1852           cpp_error (pfile, DL_WARNING,
1853                      "character constant too long for its type");
1854         }
1855       else if (CPP_OPTION (pfile, warn_multichar))
1856         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1857     }
1858
1859   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1860      in WIDTH bits, but for multi-char charconsts it's value is the
1861      full target type's width.  */
1862   if (chars_seen > 1)
1863     width *= max_chars;
1864   if (width < BITS_PER_CPPCHAR_T)
1865     {
1866       mask = ((cppchar_t) 1 << width) - 1;
1867       if (unsigned_p || !(result & (1 << (width - 1))))
1868         result &= mask;
1869       else
1870         result |= ~mask;
1871     }
1872
1873   *pchars_seen = chars_seen;
1874   *unsignedp = unsigned_p;
1875   return result;
1876 }
1877
1878 /* Memory buffers.  Changing these three constants can have a dramatic
1879    effect on performance.  The values here are reasonable defaults,
1880    but might be tuned.  If you adjust them, be sure to test across a
1881    range of uses of cpplib, including heavy nested function-like macro
1882    expansion.  Also check the change in peak memory usage (NJAMD is a
1883    good tool for this).  */
1884 #define MIN_BUFF_SIZE 8000
1885 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1886 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1887         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1888
1889 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1890   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1891 #endif
1892
1893 /* Create a new allocation buffer.  Place the control block at the end
1894    of the buffer, so that buffer overflows will cause immediate chaos.  */
1895 static _cpp_buff *
1896 new_buff (len)
1897      size_t len;
1898 {
1899   _cpp_buff *result;
1900   unsigned char *base;
1901
1902   if (len < MIN_BUFF_SIZE)
1903     len = MIN_BUFF_SIZE;
1904   len = CPP_ALIGN (len);
1905
1906   base = xmalloc (len + sizeof (_cpp_buff));
1907   result = (_cpp_buff *) (base + len);
1908   result->base = base;
1909   result->cur = base;
1910   result->limit = base + len;
1911   result->next = NULL;
1912   return result;
1913 }
1914
1915 /* Place a chain of unwanted allocation buffers on the free list.  */
1916 void
1917 _cpp_release_buff (pfile, buff)
1918      cpp_reader *pfile;
1919      _cpp_buff *buff;
1920 {
1921   _cpp_buff *end = buff;
1922
1923   while (end->next)
1924     end = end->next;
1925   end->next = pfile->free_buffs;
1926   pfile->free_buffs = buff;
1927 }
1928
1929 /* Return a free buffer of size at least MIN_SIZE.  */
1930 _cpp_buff *
1931 _cpp_get_buff (pfile, min_size)
1932      cpp_reader *pfile;
1933      size_t min_size;
1934 {
1935   _cpp_buff *result, **p;
1936
1937   for (p = &pfile->free_buffs;; p = &(*p)->next)
1938     {
1939       size_t size;
1940
1941       if (*p == NULL)
1942         return new_buff (min_size);
1943       result = *p;
1944       size = result->limit - result->base;
1945       /* Return a buffer that's big enough, but don't waste one that's
1946          way too big.  */
1947       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1948         break;
1949     }
1950
1951   *p = result->next;
1952   result->next = NULL;
1953   result->cur = result->base;
1954   return result;
1955 }
1956
1957 /* Creates a new buffer with enough space to hold the uncommitted
1958    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1959    the excess bytes to the new buffer.  Chains the new buffer after
1960    BUFF, and returns the new buffer.  */
1961 _cpp_buff *
1962 _cpp_append_extend_buff (pfile, buff, min_extra)
1963      cpp_reader *pfile;
1964      _cpp_buff *buff;
1965      size_t min_extra;
1966 {
1967   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1968   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1969
1970   buff->next = new_buff;
1971   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1972   return new_buff;
1973 }
1974
1975 /* Creates a new buffer with enough space to hold the uncommitted
1976    remaining bytes of the buffer pointed to by BUFF, and at least
1977    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1978    Chains the new buffer before the buffer pointed to by BUFF, and
1979    updates the pointer to point to the new buffer.  */
1980 void
1981 _cpp_extend_buff (pfile, pbuff, min_extra)
1982      cpp_reader *pfile;
1983      _cpp_buff **pbuff;
1984      size_t min_extra;
1985 {
1986   _cpp_buff *new_buff, *old_buff = *pbuff;
1987   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1988
1989   new_buff = _cpp_get_buff (pfile, size);
1990   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1991   new_buff->next = old_buff;
1992   *pbuff = new_buff;
1993 }
1994
1995 /* Free a chain of buffers starting at BUFF.  */
1996 void
1997 _cpp_free_buff (buff)
1998      _cpp_buff *buff;
1999 {
2000   _cpp_buff *next;
2001
2002   for (; buff; buff = next)
2003     {
2004       next = buff->next;
2005       free (buff->base);
2006     }
2007 }
2008
2009 /* Allocate permanent, unaligned storage of length LEN.  */
2010 unsigned char *
2011 _cpp_unaligned_alloc (pfile, len)
2012      cpp_reader *pfile;
2013      size_t len;
2014 {
2015   _cpp_buff *buff = pfile->u_buff;
2016   unsigned char *result = buff->cur;
2017
2018   if (len > (size_t) (buff->limit - result))
2019     {
2020       buff = _cpp_get_buff (pfile, len);
2021       buff->next = pfile->u_buff;
2022       pfile->u_buff = buff;
2023       result = buff->cur;
2024     }
2025
2026   buff->cur = result + len;
2027   return result;
2028 }
2029
2030 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2031    That buffer is used for growing allocations when saving macro
2032    replacement lists in a #define, and when parsing an answer to an
2033    assertion in #assert, #unassert or #if (and therefore possibly
2034    whilst expanding macros).  It therefore must not be used by any
2035    code that they might call: specifically the lexer and the guts of
2036    the macro expander.
2037
2038    All existing other uses clearly fit this restriction: storing
2039    registered pragmas during initialization.  */
2040 unsigned char *
2041 _cpp_aligned_alloc (pfile, len)
2042      cpp_reader *pfile;
2043      size_t len;
2044 {
2045   _cpp_buff *buff = pfile->a_buff;
2046   unsigned char *result = buff->cur;
2047
2048   if (len > (size_t) (buff->limit - result))
2049     {
2050       buff = _cpp_get_buff (pfile, len);
2051       buff->next = pfile->a_buff;
2052       pfile->a_buff = buff;
2053       result = buff->cur;
2054     }
2055
2056   buff->cur = result + len;
2057   return result;
2058 }