gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7
   8 This program is free software; you can redistribute it and/or modify it
   9 under the terms of the GNU General Public License as published by the
  10 Free Software Foundation; either version 2, or (at your option) any
  11 later version.
  12
  13 This program is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with this program; if not, write to the Free Software
  20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 /* Tokens with SPELL_STRING store their spelling in the token list,
  30    and it's length in the token->val.name.len.  */
  31 enum spell_type
  32 {
  33   SPELL_OPERATOR = 0,
  34   SPELL_CHAR,
  35   SPELL_IDENT,
  36   SPELL_NUMBER,
  37   SPELL_STRING,
  38   SPELL_NONE
  39 };
  40
  41 struct token_spelling
  42 {
  43   enum spell_type category;
  44   const unsigned char *name;
  45 };
  46
  47 static const unsigned char *const digraph_spellings[] =
  48 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
  49
  50 #define OP(e, s) { SPELL_OPERATOR, U s           },
  51 #define TK(e, s) { s,              U STRINGX (e) },
  52 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
  53 #undef OP
  54 #undef TK
  55
  56 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
  57 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
  58 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
  59
  60 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
  61 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
  62
  63 static int skip_line_comment PARAMS ((cpp_reader *));
  64 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
  65 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
  66 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
  67                                   unsigned int *));
  68 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
  69 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
  70 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
  71 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
  72                                   cppchar_t));
  73 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
  74 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
  75                                    const unsigned char *, cppchar_t *));
  76 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
  77
  78 static unsigned int hex_digit_value PARAMS ((unsigned int));
  79 static _cpp_buff *new_buff PARAMS ((size_t));
  80
  81
  82 /* Utility routine:
  83
  84    Compares, the token TOKEN to the NUL-terminated string STRING.
  85    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
  86 int
  87 cpp_ideq (token, string)
  88      const cpp_token *token;
  89      const char *string;
  90 {
  91   if (token->type != CPP_NAME)
  92     return 0;
  93
  94   return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
  95 }
  96
  97 /* Record a note TYPE at byte POS into the current cleaned logical
  98    line.  */
  99 static void
 100 add_line_note (buffer, pos, type)
 101      cpp_buffer *buffer;
 102      const uchar *pos;
 103      unsigned int type;
 104 {
 105   if (buffer->notes_used == buffer->notes_cap)
 106     {
 107       buffer->notes_cap = buffer->notes_cap * 2 + 200;
 108       buffer->notes = (_cpp_line_note *)
 109         xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
 110     }
 111
 112   buffer->notes[buffer->notes_used].pos = pos;
 113   buffer->notes[buffer->notes_used].type = type;
 114   buffer->notes_used++;
 115 }
 116
 117 /* Returns with a logical line that contains no escaped newlines or
 118    trigraphs.  This is a time-critical inner loop.  */
 119 void
 120 _cpp_clean_line (pfile)
 121      cpp_reader *pfile;
 122 {
 123   cpp_buffer *buffer;
 124   const uchar *s;
 125   uchar c, *d, *p;
 126
 127   buffer = pfile->buffer;
 128   buffer->cur_note = buffer->notes_used = 0;
 129   buffer->cur = buffer->line_base = buffer->next_line;
 130   buffer->need_line = false;
 131   s = buffer->next_line - 1;
 132
 133   if (!buffer->from_stage3)
 134     {
 135       d = (uchar *) s;
 136
 137       for (;;)
 138         {
 139           c = *++s;
 140           *++d = c;
 141
 142           if (c == '\n' || c == '\r')
 143             {
 144                   /* Handle DOS line endings.  */
 145               if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
 146                 s++;
 147               if (s == buffer->rlimit)
 148                 break;
 149
 150               /* Escaped?  */
 151               p = d;
 152               while (p != buffer->next_line && is_nvspace (p[-1]))
 153                 p--;
 154               if (p == buffer->next_line || p[-1] != '\\')
 155                 break;
 156
 157               add_line_note (buffer, p - 1,
 158                              p != d ? NOTE_ESC_SPACE_NL: NOTE_ESC_NL);
 159               d = p - 2;
 160               buffer->next_line = p - 1;
 161             }
 162           else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
 163             {
 164               /* Add a note regardless, for the benefit of -Wtrigraphs.  */
 165               add_line_note (buffer, d, NOTE_TRIGRAPH);
 166               if (CPP_OPTION (pfile, trigraphs))
 167                 {
 168                   *d = _cpp_trigraph_map[s[2]];
 169                   s += 2;
 170                 }
 171             }
 172         }
 173     }
 174   else
 175     {
 176       do
 177         s++;
 178       while (*s != '\n' && *s != '\r');
 179       d = (uchar *) s;
 180
 181       /* Handle DOS line endings.  */
 182       if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
 183         s++;
 184     }
 185
 186   *d = '\n';
 187   add_line_note (buffer, d + 1, NOTE_NEWLINE);
 188   buffer->next_line = s + 1;
 189 }
 190
 191 /* Process the notes created by add_line_note as far as the current
 192    location.  */
 193 void
 194 _cpp_process_line_notes (pfile, in_comment)
 195      cpp_reader *pfile;
 196      int in_comment;
 197 {
 198   cpp_buffer *buffer = pfile->buffer;
 199
 200   for (;;)
 201     {
 202       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
 203       unsigned int col;
 204
 205       if (note->pos > buffer->cur)
 206         break;
 207
 208       buffer->cur_note++;
 209       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
 210
 211       switch (note->type)
 212         {
 213         case NOTE_NEWLINE:
 214           /* This note is a kind of sentinel we should never reach.  */
 215           abort ();
 216
 217         case NOTE_TRIGRAPH:
 218           if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
 219             {
 220               if (CPP_OPTION (pfile, trigraphs))
 221                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 222                                      "trigraph converted to %c",
 223                                      (int) note->pos[0]);
 224               else
 225                 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 226                                      "trigraph ??%c ignored",
 227                                      (int) note->pos[2]);
 228             }
 229           break;
 230
 231         case NOTE_ESC_SPACE_NL:
 232           if (!in_comment)
 233             cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
 234                                  "backslash and newline separated by space");
 235           /* Fall through... */
 236         case NOTE_ESC_NL:
 237           if (buffer->next_line > buffer->rlimit)
 238             {
 239               cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
 240                                    "backslash-newline at end of file");
 241               /* Prevent "no newline at end of file" warning.  */
 242               buffer->next_line = buffer->rlimit;
 243             }
 244
 245           buffer->line_base = note->pos;
 246           pfile->line++;
 247         }
 248     }
 249 }
 250
 251 /* Obtain the next character, after trigraph conversion and skipping
 252    an arbitrarily long string of escaped newlines.  The common case of
 253    no trigraphs or escaped newlines falls through quickly.  On return,
 254    buffer->backup_to points to where to return to if the character is
 255    not to be processed.  */
 256 static cppchar_t
 257 get_effective_char (pfile)
 258      cpp_reader *pfile;
 259 {
 260   cpp_buffer *buffer = pfile->buffer;
 261
 262   buffer->backup_to = buffer->cur;
 263   return *buffer->cur++;
 264 }
 265
 266 /* Skip a C-style block comment.  We find the end of the comment by
 267    seeing if an asterisk is before every '/' we encounter.  Returns
 268    nonzero if comment terminated by EOF, zero otherwise.  */
 269 bool
 270 _cpp_skip_block_comment (pfile)
 271      cpp_reader *pfile;
 272 {
 273   cpp_buffer *buffer = pfile->buffer;
 274   cppchar_t c;
 275
 276   if (*buffer->cur == '/')
 277     buffer->cur++;
 278
 279   for (;;)
 280     {
 281       c = *buffer->cur++;
 282
 283       /* People like decorating comments with '*', so check for '/'
 284          instead for efficiency.  */
 285       if (c == '/')
 286         {
 287           if (buffer->cur[-2] == '*')
 288             break;
 289
 290           /* Warn about potential nested comments, but not if the '/'
 291              comes immediately before the true comment delimiter.
 292              Don't bother to get it right across escaped newlines.  */
 293           if (CPP_OPTION (pfile, warn_comments)
 294               && buffer->cur[0] == '*' && buffer->cur[1] != '/')
 295             cpp_error_with_line (pfile, DL_WARNING,
 296                                  pfile->line, CPP_BUF_COL (buffer),
 297                                  "\"/*\" within comment");
 298         }
 299       else if (c == '\n')
 300         {
 301           buffer->cur--;
 302           _cpp_process_line_notes (pfile, true);
 303           if (buffer->next_line >= buffer->rlimit)
 304             return true;
 305           _cpp_clean_line (pfile);
 306           pfile->line++;
 307         }
 308     }
 309
 310   return false;
 311 }
 312
 313 /* Skip a C++ line comment, leaving buffer->cur pointing to the
 314    terminating newline.  Handles escaped newlines.  Returns nonzero
 315    if a multiline comment.  */
 316 static int
 317 skip_line_comment (pfile)
 318      cpp_reader *pfile;
 319 {
 320   cpp_buffer *buffer = pfile->buffer;
 321   unsigned int orig_line = pfile->line;
 322
 323   while (*buffer->cur != '\n')
 324     buffer->cur++;
 325
 326   _cpp_process_line_notes (pfile, true);
 327   return orig_line != pfile->line;
 328 }
 329
 330 /* Skips whitespace, saving the next non-whitespace character.  */
 331 static void
 332 skip_whitespace (pfile, c)
 333      cpp_reader *pfile;
 334      cppchar_t c;
 335 {
 336   cpp_buffer *buffer = pfile->buffer;
 337   unsigned int warned = 0;
 338
 339   do
 340     {
 341       /* Horizontal space always OK.  */
 342       if (c == ' ' || c == '\t')
 343         ;
 344       /* Just \f \v or \0 left.  */
 345       else if (c == '\0')
 346         {
 347           if (!warned)
 348             {
 349               cpp_error (pfile, DL_WARNING, "null character(s) ignored");
 350               warned = 1;
 351             }
 352         }
 353       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
 354         cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
 355                              CPP_BUF_COL (buffer),
 356                              "%s in preprocessing directive",
 357                              c == '\f' ? "form feed" : "vertical tab");
 358
 359       c = *buffer->cur++;
 360     }
 361   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
 362   while (is_nvspace (c));
 363
 364   buffer->cur--;
 365 }
 366
 367 /* See if the characters of a number token are valid in a name (no
 368    '.', '+' or '-').  */
 369 static int
 370 name_p (pfile, string)
 371      cpp_reader *pfile;
 372      const cpp_string *string;
 373 {
 374   unsigned int i;
 375
 376   for (i = 0; i < string->len; i++)
 377     if (!is_idchar (string->text[i]))
 378       return 0;
 379
 380   return 1;
 381 }
 382
 383 /* Parse an identifier, skipping embedded backslash-newlines.  This is
 384    a critical inner loop.  The common case is an identifier which has
 385    not been split by backslash-newline, does not contain a dollar
 386    sign, and has already been scanned (roughly 10:1 ratio of
 387    seen:unseen identifiers in normal code; the distribution is
 388    Poisson-like).  Second most common case is a new identifier, not
 389    split and no dollar sign.  The other possibilities are rare and
 390    have been relegated to parse_slow.  */
 391 static cpp_hashnode *
 392 parse_identifier (pfile)
 393      cpp_reader *pfile;
 394 {
 395   cpp_hashnode *result;
 396   const uchar *cur, *base;
 397
 398   /* Fast-path loop.  Skim over a normal identifier.
 399      N.B. ISIDNUM does not include $.  */
 400   cur = pfile->buffer->cur;
 401   while (ISIDNUM (*cur))
 402     cur++;
 403
 404   /* Check for slow-path cases.  */
 405   if (*cur == '$')
 406     {
 407       unsigned int len;
 408
 409       base = parse_slow (pfile, cur, 0, &len);
 410       result = (cpp_hashnode *)
 411         ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
 412     }
 413   else
 414     {
 415       base = pfile->buffer->cur - 1;
 416       pfile->buffer->cur = cur;
 417       result = (cpp_hashnode *)
 418         ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
 419     }
 420
 421   /* Rarely, identifiers require diagnostics when lexed.
 422      XXX Has to be forced out of the fast path.  */
 423   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
 424                         && !pfile->state.skipping, 0))
 425     {
 426       /* It is allowed to poison the same identifier twice.  */
 427       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
 428         cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
 429                    NODE_NAME (result));
 430
 431       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
 432          replacement list of a variadic macro.  */
 433       if (result == pfile->spec_nodes.n__VA_ARGS__
 434           && !pfile->state.va_args_ok)
 435         cpp_error (pfile, DL_PEDWARN,
 436         "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
 437     }
 438
 439   return result;
 440 }
 441
 442 /* Slow path.  This handles numbers and identifiers which have been
 443    split, or contain dollar signs.  The part of the token from
 444    PFILE->buffer->cur-1 to CUR has already been scanned.  NUMBER_P is
 445    1 if it's a number, and 2 if it has a leading period.  Returns a
 446    pointer to the token's NUL-terminated spelling in permanent
 447    storage, and sets PLEN to its length.  */
 448 static uchar *
 449 parse_slow (pfile, cur, number_p, plen)
 450      cpp_reader *pfile;
 451      const uchar *cur;
 452      int number_p;
 453      unsigned int *plen;
 454 {
 455   cpp_buffer *buffer = pfile->buffer;
 456   const uchar *base = buffer->cur - 1;
 457   struct obstack *stack = &pfile->hash_table->stack;
 458   unsigned int c, prevc, saw_dollar = 0;
 459
 460   /* Place any leading period.  */
 461   if (number_p == 2)
 462     obstack_1grow (stack, '.');
 463
 464   /* Copy the part of the token which is known to be okay.  */
 465   obstack_grow (stack, base, cur - base);
 466
 467   /* Now process the part which isn't.  We are looking at one of
 468      '$', '\\', or '?' on entry to this loop.  */
 469   prevc = cur[-1];
 470   c = *cur++;
 471   buffer->cur = cur;
 472   for (;;)
 473     {
 474       /* Potential escaped newline?  */
 475       buffer->backup_to = buffer->cur - 1;
 476
 477       if (!is_idchar (c))
 478         {
 479           if (!number_p)
 480             break;
 481           if (c != '.' && !VALID_SIGN (c, prevc))
 482             break;
 483         }
 484
 485       /* Handle normal identifier characters in this loop.  */
 486       do
 487         {
 488           prevc = c;
 489           obstack_1grow (stack, c);
 490
 491           if (c == '$')
 492             saw_dollar++;
 493
 494           c = *buffer->cur++;
 495         }
 496       while (is_idchar (c));
 497     }
 498
 499   /* Step back over the unwanted char.  */
 500   BACKUP ();
 501
 502   /* $ is not an identifier character in the standard, but is commonly
 503      accepted as an extension.  Don't warn about it in skipped
 504      conditional blocks.  */
 505   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
 506     cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
 507
 508   /* Identifiers and numbers are null-terminated.  */
 509   *plen = obstack_object_size (stack);
 510   obstack_1grow (stack, '\0');
 511   return obstack_finish (stack);
 512 }
 513
 514 /* Parse a number, beginning with character C, skipping embedded
 515    backslash-newlines.  LEADING_PERIOD is nonzero if there was a "."
 516    before C.  Place the result in NUMBER.  */
 517 static void
 518 parse_number (pfile, number, leading_period)
 519      cpp_reader *pfile;
 520      cpp_string *number;
 521      int leading_period;
 522 {
 523   const uchar *cur;
 524
 525   /* Fast-path loop.  Skim over a normal number.
 526      N.B. ISIDNUM does not include $.  */
 527   cur = pfile->buffer->cur;
 528   while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
 529     cur++;
 530
 531   /* Check for slow-path cases.  */
 532   if (*cur == '$')
 533     number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
 534   else
 535     {
 536       const uchar *base = pfile->buffer->cur - 1;
 537       uchar *dest;
 538
 539       number->len = cur - base + leading_period;
 540       dest = _cpp_unaligned_alloc (pfile, number->len + 1);
 541       dest[number->len] = '\0';
 542       number->text = dest;
 543
 544       if (leading_period)
 545         *dest++ = '.';
 546       memcpy (dest, base, cur - base);
 547       pfile->buffer->cur = cur;
 548     }
 549 }
 550
 551 /* Subroutine of parse_string.  */
 552 static int
 553 unescaped_terminator_p (pfile, dest)
 554      cpp_reader *pfile;
 555      const unsigned char *dest;
 556 {
 557   const unsigned char *start, *temp;
 558
 559   /* In #include-style directives, terminators are not escapable.  */
 560   if (pfile->state.angled_headers)
 561     return 1;
 562
 563   start = BUFF_FRONT (pfile->u_buff);
 564
 565   /* An odd number of consecutive backslashes represents an escaped
 566      terminator.  */
 567   for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
 568     ;
 569
 570   return ((dest - temp) & 1) == 0;
 571 }
 572
 573 /* Parses a string, character constant, or angle-bracketed header file
 574    name.  Handles embedded trigraphs and escaped newlines.  The stored
 575    string is guaranteed NUL-terminated, but it is not guaranteed that
 576    this is the first NUL since embedded NULs are preserved.
 577
 578    When this function returns, buffer->cur points to the next
 579    character to be processed.  */
 580 static void
 581 parse_string (pfile, token, terminator)
 582      cpp_reader *pfile;
 583      cpp_token *token;
 584      cppchar_t terminator;
 585 {
 586   cpp_buffer *buffer = pfile->buffer;
 587   unsigned char *dest, *limit;
 588   cppchar_t c;
 589   bool warned_nulls = false;
 590
 591   dest = BUFF_FRONT (pfile->u_buff);
 592   limit = BUFF_LIMIT (pfile->u_buff);
 593
 594   for (;;)
 595     {
 596       /* We need room for another char, possibly the terminating NUL.  */
 597       if ((size_t) (limit - dest) < 1)
 598         {
 599           size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
 600           _cpp_extend_buff (pfile, &pfile->u_buff, 2);
 601           dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
 602           limit = BUFF_LIMIT (pfile->u_buff);
 603         }
 604
 605       c = *buffer->cur++;
 606
 607       if (c == terminator)
 608         {
 609           if (unescaped_terminator_p (pfile, dest))
 610             break;
 611         }
 612       else if (c == '\n')
 613         {
 614           /* No string literal may extend over multiple lines.  In
 615              assembly language, suppress the error except for <>
 616              includes.  This is a kludge around not knowing where
 617              comments are.  */
 618           if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
 619             cpp_error (pfile, DL_ERROR, "missing terminating %c character",
 620                        (int) terminator);
 621           buffer->cur--;
 622           break;
 623         }
 624       else if (c == '\0')
 625         {
 626           if (!warned_nulls)
 627             {
 628               warned_nulls = true;
 629               cpp_error (pfile, DL_WARNING,
 630                          "null character(s) preserved in literal");
 631             }
 632         }
 633         *dest++ = c;
 634     }
 635
 636   *dest = '\0';
 637
 638   token->val.str.text = BUFF_FRONT (pfile->u_buff);
 639   token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
 640   BUFF_FRONT (pfile->u_buff) = dest + 1;
 641 }
 642
 643 /* The stored comment includes the comment start and any terminator.  */
 644 static void
 645 save_comment (pfile, token, from, type)
 646      cpp_reader *pfile;
 647      cpp_token *token;
 648      const unsigned char *from;
 649      cppchar_t type;
 650 {
 651   unsigned char *buffer;
 652   unsigned int len, clen;
 653
 654   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 655
 656   /* C++ comments probably (not definitely) have moved past a new
 657      line, which we don't want to save in the comment.  */
 658   if (is_vspace (pfile->buffer->cur[-1]))
 659     len--;
 660
 661   /* If we are currently in a directive, then we need to store all
 662      C++ comments as C comments internally, and so we need to
 663      allocate a little extra space in that case.
 664
 665      Note that the only time we encounter a directive here is
 666      when we are saving comments in a "#define".  */
 667   clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
 668
 669   buffer = _cpp_unaligned_alloc (pfile, clen);
 670
 671   token->type = CPP_COMMENT;
 672   token->val.str.len = clen;
 673   token->val.str.text = buffer;
 674
 675   buffer[0] = '/';
 676   memcpy (buffer + 1, from, len - 1);
 677
 678   /* Finish conversion to a C comment, if necessary.  */
 679   if (pfile->state.in_directive && type == '/')
 680     {
 681       buffer[1] = '*';
 682       buffer[clen - 2] = '*';
 683       buffer[clen - 1] = '/';
 684     }
 685 }
 686
 687 /* Allocate COUNT tokens for RUN.  */
 688 void
 689 _cpp_init_tokenrun (run, count)
 690      tokenrun *run;
 691      unsigned int count;
 692 {
 693   run->base = xnewvec (cpp_token, count);
 694   run->limit = run->base + count;
 695   run->next = NULL;
 696 }
 697
 698 /* Returns the next tokenrun, or creates one if there is none.  */
 699 static tokenrun *
 700 next_tokenrun (run)
 701      tokenrun *run;
 702 {
 703   if (run->next == NULL)
 704     {
 705       run->next = xnew (tokenrun);
 706       run->next->prev = run;
 707       _cpp_init_tokenrun (run->next, 250);
 708     }
 709
 710   return run->next;
 711 }
 712
 713 /* Allocate a single token that is invalidated at the same time as the
 714    rest of the tokens on the line.  Has its line and col set to the
 715    same as the last lexed token, so that diagnostics appear in the
 716    right place.  */
 717 cpp_token *
 718 _cpp_temp_token (pfile)
 719      cpp_reader *pfile;
 720 {
 721   cpp_token *old, *result;
 722
 723   old = pfile->cur_token - 1;
 724   if (pfile->cur_token == pfile->cur_run->limit)
 725     {
 726       pfile->cur_run = next_tokenrun (pfile->cur_run);
 727       pfile->cur_token = pfile->cur_run->base;
 728     }
 729
 730   result = pfile->cur_token++;
 731   result->line = old->line;
 732   result->col = old->col;
 733   return result;
 734 }
 735
 736 /* Lex a token into RESULT (external interface).  Takes care of issues
 737    like directive handling, token lookahead, multiple include
 738    optimization and skipping.  */
 739 const cpp_token *
 740 _cpp_lex_token (pfile)
 741      cpp_reader *pfile;
 742 {
 743   cpp_token *result;
 744
 745   for (;;)
 746     {
 747       if (pfile->cur_token == pfile->cur_run->limit)
 748         {
 749           pfile->cur_run = next_tokenrun (pfile->cur_run);
 750           pfile->cur_token = pfile->cur_run->base;
 751         }
 752
 753       if (pfile->lookaheads)
 754         {
 755           pfile->lookaheads--;
 756           result = pfile->cur_token++;
 757         }
 758       else
 759         result = _cpp_lex_direct (pfile);
 760
 761       if (result->flags & BOL)
 762         {
 763           /* Is this a directive.  If _cpp_handle_directive returns
 764              false, it is an assembler #.  */
 765           if (result->type == CPP_HASH
 766               /* 6.10.3 p 11: Directives in a list of macro arguments
 767                  gives undefined behavior.  This implementation
 768                  handles the directive as normal.  */
 769               && pfile->state.parsing_args != 1
 770               && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
 771             continue;
 772           if (pfile->cb.line_change && !pfile->state.skipping)
 773             (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
 774         }
 775
 776       /* We don't skip tokens in directives.  */
 777       if (pfile->state.in_directive)
 778         break;
 779
 780       /* Outside a directive, invalidate controlling macros.  At file
 781          EOF, _cpp_lex_direct takes care of popping the buffer, so we never
 782          get here and MI optimisation works.  */
 783       pfile->mi_valid = false;
 784
 785       if (!pfile->state.skipping || result->type == CPP_EOF)
 786         break;
 787     }
 788
 789   return result;
 790 }
 791
 792 /* Returns true if a fresh line has been loaded.  */
 793 bool
 794 _cpp_get_fresh_line (pfile)
 795      cpp_reader *pfile;
 796 {
 797   /* We can't get a new line until we leave the current directive.  */
 798   if (pfile->state.in_directive)
 799     return false;
 800
 801   for (;;)
 802     {
 803       cpp_buffer *buffer = pfile->buffer;
 804
 805       if (!buffer->need_line)
 806         return true;
 807
 808       if (buffer->next_line < buffer->rlimit)
 809         {
 810           _cpp_clean_line (pfile);
 811           return true;
 812         }
 813
 814       /* First, get out of parsing arguments state.  */
 815       if (pfile->state.parsing_args)
 816         return false;
 817
 818       /* End of buffer.  Non-empty files should end in a newline.  */
 819       if (buffer->buf != buffer->rlimit
 820           && buffer->next_line > buffer->rlimit
 821           && !buffer->from_stage3)
 822         {
 823           /* Only warn once.  */
 824           buffer->next_line = buffer->rlimit;
 825           cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
 826                                CPP_BUF_COLUMN (buffer, buffer->cur),
 827                                "no newline at end of file");
 828         }
 829
 830       if (buffer->return_at_eof)
 831         {
 832           buffer->return_at_eof = false;
 833           return false;
 834         }
 835
 836       if (!buffer->prev)
 837         return false;
 838
 839       _cpp_pop_buffer (pfile);
 840     }
 841 }
 842
 843 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)  \
 844   do {                                          \
 845     if (get_effective_char (pfile) == CHAR)     \
 846       result->type = THEN_TYPE;                 \
 847     else                                        \
 848       {                                         \
 849         BACKUP ();                              \
 850         result->type = ELSE_TYPE;               \
 851       }                                         \
 852   } while (0)
 853
 854 /* Lex a token into pfile->cur_token, which is also incremented, to
 855    get diagnostics pointing to the correct location.
 856
 857    Does not handle issues such as token lookahead, multiple-include
 858    optimisation, directives, skipping etc.  This function is only
 859    suitable for use by _cpp_lex_token, and in special cases like
 860    lex_expansion_token which doesn't care for any of these issues.
 861
 862    When meeting a newline, returns CPP_EOF if parsing a directive,
 863    otherwise returns to the start of the token buffer if permissible.
 864    Returns the location of the lexed token.  */
 865 cpp_token *
 866 _cpp_lex_direct (pfile)
 867      cpp_reader *pfile;
 868 {
 869   cppchar_t c;
 870   cpp_buffer *buffer;
 871   const unsigned char *comment_start;
 872   cpp_token *result = pfile->cur_token++;
 873
 874  fresh_line:
 875   result->flags = 0;
 876   if (pfile->buffer->need_line)
 877     {
 878       if (!_cpp_get_fresh_line (pfile))
 879         {
 880           result->type = CPP_EOF;
 881           return result;
 882         }
 883       if (!pfile->keep_tokens)
 884         {
 885           pfile->cur_run = &pfile->base_run;
 886           result = pfile->base_run.base;
 887           pfile->cur_token = result + 1;
 888         }
 889       result->flags = BOL;
 890       if (pfile->state.parsing_args == 2)
 891         result->flags |= PREV_WHITE;
 892     }
 893   buffer = pfile->buffer;
 894  update_tokens_line:
 895   result->line = pfile->line;
 896
 897  skipped_white:
 898   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
 899       && !pfile->overlaid_buffer)
 900     {
 901       _cpp_process_line_notes (pfile, false);
 902       result->line = pfile->line;
 903     }
 904   c = *buffer->cur++;
 905   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
 906
 907   switch (c)
 908     {
 909     case ' ': case '\t': case '\f': case '\v': case '\0':
 910       result->flags |= PREV_WHITE;
 911       skip_whitespace (pfile, c);
 912       goto skipped_white;
 913
 914     case '\n':
 915       pfile->line++;
 916       buffer->need_line = true;
 917       goto fresh_line;
 918
 919     case '0': case '1': case '2': case '3': case '4':
 920     case '5': case '6': case '7': case '8': case '9':
 921       result->type = CPP_NUMBER;
 922       parse_number (pfile, &result->val.str, 0);
 923       break;
 924
 925     case 'L':
 926       /* 'L' may introduce wide characters or strings.  */
 927       {
 928         const unsigned char *pos = buffer->cur;
 929
 930         c = get_effective_char (pfile);
 931         if (c == '\'' || c == '"')
 932           {
 933             result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 934             parse_string (pfile, result, c);
 935             break;
 936           }
 937         buffer->cur = pos;
 938       }
 939       /* Fall through.  */
 940
 941     start_ident:
 942     case '_':
 943     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 944     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 945     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 946     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 947     case 'y': case 'z':
 948     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 949     case 'G': case 'H': case 'I': case 'J': case 'K':
 950     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 951     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 952     case 'Y': case 'Z':
 953       result->type = CPP_NAME;
 954       result->val.node = parse_identifier (pfile);
 955
 956       /* Convert named operators to their proper types.  */
 957       if (result->val.node->flags & NODE_OPERATOR)
 958         {
 959           result->flags |= NAMED_OP;
 960           result->type = result->val.node->directive_index;
 961         }
 962       break;
 963
 964     case '\'':
 965     case '"':
 966       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
 967       parse_string (pfile, result, c);
 968       break;
 969
 970     case '/':
 971       /* A potential block or line comment.  */
 972       comment_start = buffer->cur;
 973       c = get_effective_char (pfile);
 974
 975       if (c == '*')
 976         {
 977           if (_cpp_skip_block_comment (pfile))
 978             cpp_error (pfile, DL_ERROR, "unterminated comment");
 979         }
 980       else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
 981                             || CPP_IN_SYSTEM_HEADER (pfile)))
 982         {
 983           /* Warn about comments only if pedantically GNUC89, and not
 984              in system headers.  */
 985           if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
 986               && ! buffer->warned_cplusplus_comments)
 987             {
 988               cpp_error (pfile, DL_PEDWARN,
 989                          "C++ style comments are not allowed in ISO C90");
 990               cpp_error (pfile, DL_PEDWARN,
 991                          "(this will be reported only once per input file)");
 992               buffer->warned_cplusplus_comments = 1;
 993             }
 994
 995           if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
 996             cpp_error (pfile, DL_WARNING, "multi-line comment");
 997         }
 998       else if (c == '=')
 999         {
1000           result->type = CPP_DIV_EQ;
1001           break;
1002         }
1003       else
1004         {
1005           BACKUP ();
1006           result->type = CPP_DIV;
1007           break;
1008         }
1009
1010       if (!pfile->state.save_comments)
1011         {
1012           result->flags |= PREV_WHITE;
1013           goto update_tokens_line;
1014         }
1015
1016       /* Save the comment as a token in its own right.  */
1017       save_comment (pfile, result, comment_start, c);
1018       break;
1019
1020     case '<':
1021       if (pfile->state.angled_headers)
1022         {
1023           result->type = CPP_HEADER_NAME;
1024           parse_string (pfile, result, '>');
1025           break;
1026         }
1027
1028       c = get_effective_char (pfile);
1029       if (c == '=')
1030         result->type = CPP_LESS_EQ;
1031       else if (c == '<')
1032         IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1033       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1034         IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1035       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1036         {
1037           result->type = CPP_OPEN_SQUARE;
1038           result->flags |= DIGRAPH;
1039         }
1040       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1041         {
1042           result->type = CPP_OPEN_BRACE;
1043           result->flags |= DIGRAPH;
1044         }
1045       else
1046         {
1047           BACKUP ();
1048           result->type = CPP_LESS;
1049         }
1050       break;
1051
1052     case '>':
1053       c = get_effective_char (pfile);
1054       if (c == '=')
1055         result->type = CPP_GREATER_EQ;
1056       else if (c == '>')
1057         IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1058       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1059         IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1060       else
1061         {
1062           BACKUP ();
1063           result->type = CPP_GREATER;
1064         }
1065       break;
1066
1067     case '%':
1068       c = get_effective_char (pfile);
1069       if (c == '=')
1070         result->type = CPP_MOD_EQ;
1071       else if (CPP_OPTION (pfile, digraphs) && c == ':')
1072         {
1073           result->flags |= DIGRAPH;
1074           result->type = CPP_HASH;
1075           if (get_effective_char (pfile) == '%')
1076             {
1077               const unsigned char *pos = buffer->cur;
1078
1079               if (get_effective_char (pfile) == ':')
1080                 result->type = CPP_PASTE;
1081               else
1082                 buffer->cur = pos - 1;
1083             }
1084           else
1085             BACKUP ();
1086         }
1087       else if (CPP_OPTION (pfile, digraphs) && c == '>')
1088         {
1089           result->flags |= DIGRAPH;
1090           result->type = CPP_CLOSE_BRACE;
1091         }
1092       else
1093         {
1094           BACKUP ();
1095           result->type = CPP_MOD;
1096         }
1097       break;
1098
1099     case '.':
1100       result->type = CPP_DOT;
1101       c = get_effective_char (pfile);
1102       if (c == '.')
1103         {
1104           const unsigned char *pos = buffer->cur;
1105
1106           if (get_effective_char (pfile) == '.')
1107             result->type = CPP_ELLIPSIS;
1108           else
1109             buffer->cur = pos - 1;
1110         }
1111       /* All known character sets have 0...9 contiguous.  */
1112       else if (ISDIGIT (c))
1113         {
1114           result->type = CPP_NUMBER;
1115           parse_number (pfile, &result->val.str, 1);
1116         }
1117       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1118         result->type = CPP_DOT_STAR;
1119       else
1120         BACKUP ();
1121       break;
1122
1123     case '+':
1124       c = get_effective_char (pfile);
1125       if (c == '+')
1126         result->type = CPP_PLUS_PLUS;
1127       else if (c == '=')
1128         result->type = CPP_PLUS_EQ;
1129       else
1130         {
1131           BACKUP ();
1132           result->type = CPP_PLUS;
1133         }
1134       break;
1135
1136     case '-':
1137       c = get_effective_char (pfile);
1138       if (c == '>')
1139         {
1140           result->type = CPP_DEREF;
1141           if (CPP_OPTION (pfile, cplusplus))
1142             {
1143               if (get_effective_char (pfile) == '*')
1144                 result->type = CPP_DEREF_STAR;
1145               else
1146                 BACKUP ();
1147             }
1148         }
1149       else if (c == '-')
1150         result->type = CPP_MINUS_MINUS;
1151       else if (c == '=')
1152         result->type = CPP_MINUS_EQ;
1153       else
1154         {
1155           BACKUP ();
1156           result->type = CPP_MINUS;
1157         }
1158       break;
1159
1160     case '&':
1161       c = get_effective_char (pfile);
1162       if (c == '&')
1163         result->type = CPP_AND_AND;
1164       else if (c == '=')
1165         result->type = CPP_AND_EQ;
1166       else
1167         {
1168           BACKUP ();
1169           result->type = CPP_AND;
1170         }
1171       break;
1172
1173     case '|':
1174       c = get_effective_char (pfile);
1175       if (c == '|')
1176         result->type = CPP_OR_OR;
1177       else if (c == '=')
1178         result->type = CPP_OR_EQ;
1179       else
1180         {
1181           BACKUP ();
1182           result->type = CPP_OR;
1183         }
1184       break;
1185
1186     case ':':
1187       c = get_effective_char (pfile);
1188       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1189         result->type = CPP_SCOPE;
1190       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1191         {
1192           result->flags |= DIGRAPH;
1193           result->type = CPP_CLOSE_SQUARE;
1194         }
1195       else
1196         {
1197           BACKUP ();
1198           result->type = CPP_COLON;
1199         }
1200       break;
1201
1202     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1203     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1204     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1205     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1206     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1207
1208     case '?': result->type = CPP_QUERY; break;
1209     case '~': result->type = CPP_COMPL; break;
1210     case ',': result->type = CPP_COMMA; break;
1211     case '(': result->type = CPP_OPEN_PAREN; break;
1212     case ')': result->type = CPP_CLOSE_PAREN; break;
1213     case '[': result->type = CPP_OPEN_SQUARE; break;
1214     case ']': result->type = CPP_CLOSE_SQUARE; break;
1215     case '{': result->type = CPP_OPEN_BRACE; break;
1216     case '}': result->type = CPP_CLOSE_BRACE; break;
1217     case ';': result->type = CPP_SEMICOLON; break;
1218
1219       /* @ is a punctuator in Objective-C.  */
1220     case '@': result->type = CPP_ATSIGN; break;
1221
1222     case '$':
1223       if (CPP_OPTION (pfile, dollars_in_ident))
1224         goto start_ident;
1225       /* Fall through...  */
1226
1227     default:
1228       result->type = CPP_OTHER;
1229       result->val.c = c;
1230       break;
1231     }
1232
1233   return result;
1234 }
1235
1236 /* An upper bound on the number of bytes needed to spell TOKEN,
1237    including preceding whitespace.  */
1238 unsigned int
1239 cpp_token_len (token)
1240      const cpp_token *token;
1241 {
1242   unsigned int len;
1243
1244   switch (TOKEN_SPELL (token))
1245     {
1246     default:            len = 0;                                break;
1247     case SPELL_NUMBER:
1248     case SPELL_STRING:  len = token->val.str.len;               break;
1249     case SPELL_IDENT:   len = NODE_LEN (token->val.node);       break;
1250     }
1251   /* 1 for whitespace, 4 for comment delimiters.  */
1252   return len + 5;
1253 }
1254
1255 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1256    already contain the enough space to hold the token's spelling.
1257    Returns a pointer to the character after the last character
1258    written.  */
1259 unsigned char *
1260 cpp_spell_token (pfile, token, buffer)
1261      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1262      const cpp_token *token;
1263      unsigned char *buffer;
1264 {
1265   switch (TOKEN_SPELL (token))
1266     {
1267     case SPELL_OPERATOR:
1268       {
1269         const unsigned char *spelling;
1270         unsigned char c;
1271
1272         if (token->flags & DIGRAPH)
1273           spelling
1274             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1275         else if (token->flags & NAMED_OP)
1276           goto spell_ident;
1277         else
1278           spelling = TOKEN_NAME (token);
1279
1280         while ((c = *spelling++) != '\0')
1281           *buffer++ = c;
1282       }
1283       break;
1284
1285     case SPELL_CHAR:
1286       *buffer++ = token->val.c;
1287       break;
1288
1289     spell_ident:
1290     case SPELL_IDENT:
1291       memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1292       buffer += NODE_LEN (token->val.node);
1293       break;
1294
1295     case SPELL_NUMBER:
1296       memcpy (buffer, token->val.str.text, token->val.str.len);
1297       buffer += token->val.str.len;
1298       break;
1299
1300     case SPELL_STRING:
1301       {
1302         int left, right, tag;
1303         switch (token->type)
1304           {
1305           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1306           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1307           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1308           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1309           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1310           default:
1311             cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1312                        TOKEN_NAME (token));
1313             return buffer;
1314           }
1315         if (tag) *buffer++ = tag;
1316         *buffer++ = left;
1317         memcpy (buffer, token->val.str.text, token->val.str.len);
1318         buffer += token->val.str.len;
1319         *buffer++ = right;
1320       }
1321       break;
1322
1323     case SPELL_NONE:
1324       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1325       break;
1326     }
1327
1328   return buffer;
1329 }
1330
1331 /* Returns TOKEN spelt as a null-terminated string.  The string is
1332    freed when the reader is destroyed.  Useful for diagnostics.  */
1333 unsigned char *
1334 cpp_token_as_text (pfile, token)
1335      cpp_reader *pfile;
1336      const cpp_token *token;
1337 {
1338   unsigned int len = cpp_token_len (token);
1339   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1340
1341   end = cpp_spell_token (pfile, token, start);
1342   end[0] = '\0';
1343
1344   return start;
1345 }
1346
1347 /* Used by C front ends, which really should move to using
1348    cpp_token_as_text.  */
1349 const char *
1350 cpp_type2name (type)
1351      enum cpp_ttype type;
1352 {
1353   return (const char *) token_spellings[type].name;
1354 }
1355
1356 /* Writes the spelling of token to FP, without any preceding space.
1357    Separated from cpp_spell_token for efficiency - to avoid stdio
1358    double-buffering.  */
1359 void
1360 cpp_output_token (token, fp)
1361      const cpp_token *token;
1362      FILE *fp;
1363 {
1364   switch (TOKEN_SPELL (token))
1365     {
1366     case SPELL_OPERATOR:
1367       {
1368         const unsigned char *spelling;
1369         int c;
1370
1371         if (token->flags & DIGRAPH)
1372           spelling
1373             = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1374         else if (token->flags & NAMED_OP)
1375           goto spell_ident;
1376         else
1377           spelling = TOKEN_NAME (token);
1378
1379         c = *spelling;
1380         do
1381           putc (c, fp);
1382         while ((c = *++spelling) != '\0');
1383       }
1384       break;
1385
1386     case SPELL_CHAR:
1387       putc (token->val.c, fp);
1388       break;
1389
1390     spell_ident:
1391     case SPELL_IDENT:
1392       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1393     break;
1394
1395     case SPELL_NUMBER:
1396       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1397       break;
1398
1399     case SPELL_STRING:
1400       {
1401         int left, right, tag;
1402         switch (token->type)
1403           {
1404           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1405           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1406           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1407           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1408           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1409           default:
1410             fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1411             return;
1412           }
1413         if (tag) putc (tag, fp);
1414         putc (left, fp);
1415         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1416         putc (right, fp);
1417       }
1418       break;
1419
1420     case SPELL_NONE:
1421       /* An error, most probably.  */
1422       break;
1423     }
1424 }
1425
1426 /* Compare two tokens.  */
1427 int
1428 _cpp_equiv_tokens (a, b)
1429      const cpp_token *a, *b;
1430 {
1431   if (a->type == b->type && a->flags == b->flags)
1432     switch (TOKEN_SPELL (a))
1433       {
1434       default:                  /* Keep compiler happy.  */
1435       case SPELL_OPERATOR:
1436         return 1;
1437       case SPELL_CHAR:
1438         return a->val.c == b->val.c; /* Character.  */
1439       case SPELL_NONE:
1440         return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1441       case SPELL_IDENT:
1442         return a->val.node == b->val.node;
1443       case SPELL_NUMBER:
1444       case SPELL_STRING:
1445         return (a->val.str.len == b->val.str.len
1446                 && !memcmp (a->val.str.text, b->val.str.text,
1447                             a->val.str.len));
1448       }
1449
1450   return 0;
1451 }
1452
1453 /* Returns nonzero if a space should be inserted to avoid an
1454    accidental token paste for output.  For simplicity, it is
1455    conservative, and occasionally advises a space where one is not
1456    needed, e.g. "." and ".2".  */
1457 int
1458 cpp_avoid_paste (pfile, token1, token2)
1459      cpp_reader *pfile;
1460      const cpp_token *token1, *token2;
1461 {
1462   enum cpp_ttype a = token1->type, b = token2->type;
1463   cppchar_t c;
1464
1465   if (token1->flags & NAMED_OP)
1466     a = CPP_NAME;
1467   if (token2->flags & NAMED_OP)
1468     b = CPP_NAME;
1469
1470   c = EOF;
1471   if (token2->flags & DIGRAPH)
1472     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1473   else if (token_spellings[b].category == SPELL_OPERATOR)
1474     c = token_spellings[b].name[0];
1475
1476   /* Quickly get everything that can paste with an '='.  */
1477   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1478     return 1;
1479
1480   switch (a)
1481     {
1482     case CPP_GREATER:   return c == '>' || c == '?';
1483     case CPP_LESS:      return c == '<' || c == '?' || c == '%' || c == ':';
1484     case CPP_PLUS:      return c == '+';
1485     case CPP_MINUS:     return c == '-' || c == '>';
1486     case CPP_DIV:       return c == '/' || c == '*'; /* Comments.  */
1487     case CPP_MOD:       return c == ':' || c == '>';
1488     case CPP_AND:       return c == '&';
1489     case CPP_OR:        return c == '|';
1490     case CPP_COLON:     return c == ':' || c == '>';
1491     case CPP_DEREF:     return c == '*';
1492     case CPP_DOT:       return c == '.' || c == '%' || b == CPP_NUMBER;
1493     case CPP_HASH:      return c == '#' || c == '%'; /* Digraph form.  */
1494     case CPP_NAME:      return ((b == CPP_NUMBER
1495                                  && name_p (pfile, &token2->val.str))
1496                                 || b == CPP_NAME
1497                                 || b == CPP_CHAR || b == CPP_STRING); /* L */
1498     case CPP_NUMBER:    return (b == CPP_NUMBER || b == CPP_NAME
1499                                 || c == '.' || c == '+' || c == '-');
1500     case CPP_OTHER:     return (CPP_OPTION (pfile, objc)
1501                                 && token1->val.c == '@'
1502                                 && (b == CPP_NAME || b == CPP_STRING));
1503     default:            break;
1504     }
1505
1506   return 0;
1507 }
1508
1509 /* Output all the remaining tokens on the current line, and a newline
1510    character, to FP.  Leading whitespace is removed.  If there are
1511    macros, special token padding is not performed.  */
1512 void
1513 cpp_output_line (pfile, fp)
1514      cpp_reader *pfile;
1515      FILE *fp;
1516 {
1517   const cpp_token *token;
1518
1519   token = cpp_get_token (pfile);
1520   while (token->type != CPP_EOF)
1521     {
1522       cpp_output_token (token, fp);
1523       token = cpp_get_token (pfile);
1524       if (token->flags & PREV_WHITE)
1525         putc (' ', fp);
1526     }
1527
1528   putc ('\n', fp);
1529 }
1530
1531 /* Returns the value of a hexadecimal digit.  */
1532 static unsigned int
1533 hex_digit_value (c)
1534      unsigned int c;
1535 {
1536   if (hex_p (c))
1537     return hex_value (c);
1538   else
1539     abort ();
1540 }
1541
1542 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.  Returns 1 to indicate
1543    failure if cpplib is not parsing C++ or C99.  Such failure is
1544    silent, and no variables are updated.  Otherwise returns 0, and
1545    warns if -Wtraditional.
1546
1547    [lex.charset]: The character designated by the universal character
1548    name \UNNNNNNNN is that character whose character short name in
1549    ISO/IEC 10646 is NNNNNNNN; the character designated by the
1550    universal character name \uNNNN is that character whose character
1551    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
1552    for a universal character name is less than 0x20 or in the range
1553    0x7F-0x9F (inclusive), or if the universal character name
1554    designates a character in the basic source character set, then the
1555    program is ill-formed.
1556
1557    We assume that wchar_t is Unicode, so we don't need to do any
1558    mapping.  Is this ever wrong?
1559
1560    PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1561    LIMIT is the end of the string or charconst.  PSTR is updated to
1562    point after the UCS on return, and the UCS is written into PC.  */
1563
1564 static int
1565 maybe_read_ucs (pfile, pstr, limit, pc)
1566      cpp_reader *pfile;
1567      const unsigned char **pstr;
1568      const unsigned char *limit;
1569      cppchar_t *pc;
1570 {
1571   const unsigned char *p = *pstr;
1572   unsigned int code = 0;
1573   unsigned int c = *pc, length;
1574
1575   /* Only attempt to interpret a UCS for C++ and C99.  */
1576   if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1577     return 1;
1578
1579   if (CPP_WTRADITIONAL (pfile))
1580     cpp_error (pfile, DL_WARNING,
1581                "the meaning of '\\%c' is different in traditional C", c);
1582
1583   length = (c == 'u' ? 4: 8);
1584
1585   if ((size_t) (limit - p) < length)
1586     {
1587       cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1588       /* Skip to the end to avoid more diagnostics.  */
1589       p = limit;
1590     }
1591   else
1592     {
1593       for (; length; length--, p++)
1594         {
1595           c = *p;
1596           if (ISXDIGIT (c))
1597             code = (code << 4) + hex_digit_value (c);
1598           else
1599             {
1600               cpp_error (pfile, DL_ERROR,
1601                          "non-hex digit '%c' in universal-character-name", c);
1602               /* We shouldn't skip in case there are multibyte chars.  */
1603               break;
1604             }
1605         }
1606     }
1607
1608   if (CPP_OPTION (pfile, EBCDIC))
1609     {
1610       cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1611       code = 0x3f;  /* EBCDIC invalid character */
1612     }
1613   /* True extended characters are OK.  */
1614   else if (code >= 0xa0
1615            && !(code & 0x80000000)
1616            && !(code >= 0xD800 && code <= 0xDFFF))
1617     ;
1618   /* The standard permits $, @ and ` to be specified as UCNs.  We use
1619      hex escapes so that this also works with EBCDIC hosts.  */
1620   else if (code == 0x24 || code == 0x40 || code == 0x60)
1621     ;
1622   /* Don't give another error if one occurred above.  */
1623   else if (length == 0)
1624     cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1625
1626   *pstr = p;
1627   *pc = code;
1628   return 0;
1629 }
1630
1631 /* Returns the value of an escape sequence, truncated to the correct
1632    target precision.  PSTR points to the input pointer, which is just
1633    after the backslash.  LIMIT is how much text we have.  WIDE is true
1634    if the escape sequence is part of a wide character constant or
1635    string literal.  Handles all relevant diagnostics.  */
1636 cppchar_t
1637 cpp_parse_escape (pfile, pstr, limit, wide)
1638      cpp_reader *pfile;
1639      const unsigned char **pstr;
1640      const unsigned char *limit;
1641      int wide;
1642 {
1643   /* Values of \a \b \e \f \n \r \t \v respectively.  */
1644   static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
1645   static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
1646
1647   int unknown = 0;
1648   const unsigned char *str = *pstr, *charconsts;
1649   cppchar_t c, mask;
1650   unsigned int width;
1651
1652   if (CPP_OPTION (pfile, EBCDIC))
1653     charconsts = ebcdic;
1654   else
1655     charconsts = ascii;
1656
1657   if (wide)
1658     width = CPP_OPTION (pfile, wchar_precision);
1659   else
1660     width = CPP_OPTION (pfile, char_precision);
1661   if (width < BITS_PER_CPPCHAR_T)
1662     mask = ((cppchar_t) 1 << width) - 1;
1663   else
1664     mask = ~0;
1665
1666   c = *str++;
1667   switch (c)
1668     {
1669     case '\\': case '\'': case '"': case '?': break;
1670     case 'b': c = charconsts[1];  break;
1671     case 'f': c = charconsts[3];  break;
1672     case 'n': c = charconsts[4];  break;
1673     case 'r': c = charconsts[5];  break;
1674     case 't': c = charconsts[6];  break;
1675     case 'v': c = charconsts[7];  break;
1676
1677     case '(': case '{': case '[': case '%':
1678       /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1679          '\%' is used to prevent SCCS from getting confused.  */
1680       unknown = CPP_PEDANTIC (pfile);
1681       break;
1682
1683     case 'a':
1684       if (CPP_WTRADITIONAL (pfile))
1685         cpp_error (pfile, DL_WARNING,
1686                    "the meaning of '\\a' is different in traditional C");
1687       c = charconsts[0];
1688       break;
1689
1690     case 'e': case 'E':
1691       if (CPP_PEDANTIC (pfile))
1692         cpp_error (pfile, DL_PEDWARN,
1693                    "non-ISO-standard escape sequence, '\\%c'", (int) c);
1694       c = charconsts[2];
1695       break;
1696
1697     case 'u': case 'U':
1698       unknown = maybe_read_ucs (pfile, &str, limit, &c);
1699       break;
1700
1701     case 'x':
1702       if (CPP_WTRADITIONAL (pfile))
1703         cpp_error (pfile, DL_WARNING,
1704                    "the meaning of '\\x' is different in traditional C");
1705
1706       {
1707         cppchar_t i = 0, overflow = 0;
1708         int digits_found = 0;
1709
1710         while (str < limit)
1711           {
1712             c = *str;
1713             if (! ISXDIGIT (c))
1714               break;
1715             str++;
1716             overflow |= i ^ (i << 4 >> 4);
1717             i = (i << 4) + hex_digit_value (c);
1718             digits_found = 1;
1719           }
1720
1721         if (!digits_found)
1722           cpp_error (pfile, DL_ERROR,
1723                        "\\x used with no following hex digits");
1724
1725         if (overflow | (i != (i & mask)))
1726           {
1727             cpp_error (pfile, DL_PEDWARN,
1728                        "hex escape sequence out of range");
1729             i &= mask;
1730           }
1731         c = i;
1732       }
1733       break;
1734
1735     case '0':  case '1':  case '2':  case '3':
1736     case '4':  case '5':  case '6':  case '7':
1737       {
1738         size_t count = 0;
1739         cppchar_t i = c - '0';
1740
1741         while (str < limit && ++count < 3)
1742           {
1743             c = *str;
1744             if (c < '0' || c > '7')
1745               break;
1746             str++;
1747             i = (i << 3) + c - '0';
1748           }
1749
1750         if (i != (i & mask))
1751           {
1752             cpp_error (pfile, DL_PEDWARN,
1753                        "octal escape sequence out of range");
1754             i &= mask;
1755           }
1756         c = i;
1757       }
1758       break;
1759
1760     default:
1761       unknown = 1;
1762       break;
1763     }
1764
1765   if (unknown)
1766     {
1767       if (ISGRAPH (c))
1768         cpp_error (pfile, DL_PEDWARN,
1769                    "unknown escape sequence '\\%c'", (int) c);
1770       else
1771         cpp_error (pfile, DL_PEDWARN,
1772                    "unknown escape sequence: '\\%03o'", (int) c);
1773     }
1774
1775   if (c > mask)
1776     {
1777       cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1778       c &= mask;
1779     }
1780
1781   *pstr = str;
1782   return c;
1783 }
1784
1785 /* Interpret a (possibly wide) character constant in TOKEN.
1786    WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
1787    points to a variable that is filled in with the number of
1788    characters seen, and UNSIGNEDP to a variable that indicates whether
1789    the result has signed type.  */
1790 cppchar_t
1791 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1792      cpp_reader *pfile;
1793      const cpp_token *token;
1794      unsigned int *pchars_seen;
1795      int *unsignedp;
1796 {
1797   const unsigned char *str = token->val.str.text;
1798   const unsigned char *limit = str + token->val.str.len;
1799   unsigned int chars_seen = 0;
1800   size_t width, max_chars;
1801   cppchar_t c, mask, result = 0;
1802   bool unsigned_p;
1803
1804   /* Width in bits.  */
1805   if (token->type == CPP_CHAR)
1806     {
1807       width = CPP_OPTION (pfile, char_precision);
1808       max_chars = CPP_OPTION (pfile, int_precision) / width;
1809       unsigned_p = CPP_OPTION (pfile, unsigned_char);
1810     }
1811   else
1812     {
1813       width = CPP_OPTION (pfile, wchar_precision);
1814       max_chars = 1;
1815       unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1816     }
1817
1818   if (width < BITS_PER_CPPCHAR_T)
1819     mask = ((cppchar_t) 1 << width) - 1;
1820   else
1821     mask = ~0;
1822
1823   while (str < limit)
1824     {
1825       c = *str++;
1826
1827       if (c == '\\')
1828         c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1829
1830 #ifdef MAP_CHARACTER
1831       if (ISPRINT (c))
1832         c = MAP_CHARACTER (c);
1833 #endif
1834
1835       chars_seen++;
1836
1837       /* Truncate the character, scale the result and merge the two.  */
1838       c &= mask;
1839       if (width < BITS_PER_CPPCHAR_T)
1840         result = (result << width) | c;
1841       else
1842         result = c;
1843     }
1844
1845   if (chars_seen == 0)
1846     cpp_error (pfile, DL_ERROR, "empty character constant");
1847   else if (chars_seen > 1)
1848     {
1849       /* Multichar charconsts are of type int and therefore signed.  */
1850       unsigned_p = 0;
1851
1852       if (chars_seen > max_chars)
1853         {
1854           chars_seen = max_chars;
1855           cpp_error (pfile, DL_WARNING,
1856                      "character constant too long for its type");
1857         }
1858       else if (CPP_OPTION (pfile, warn_multichar))
1859         cpp_error (pfile, DL_WARNING, "multi-character character constant");
1860     }
1861
1862   /* Sign-extend or truncate the constant to cppchar_t.  The value is
1863      in WIDTH bits, but for multi-char charconsts it's value is the
1864      full target type's width.  */
1865   if (chars_seen > 1)
1866     width *= max_chars;
1867   if (width < BITS_PER_CPPCHAR_T)
1868     {
1869       mask = ((cppchar_t) 1 << width) - 1;
1870       if (unsigned_p || !(result & (1 << (width - 1))))
1871         result &= mask;
1872       else
1873         result |= ~mask;
1874     }
1875
1876   *pchars_seen = chars_seen;
1877   *unsignedp = unsigned_p;
1878   return result;
1879 }
1880
1881 /* Memory buffers.  Changing these three constants can have a dramatic
1882    effect on performance.  The values here are reasonable defaults,
1883    but might be tuned.  If you adjust them, be sure to test across a
1884    range of uses of cpplib, including heavy nested function-like macro
1885    expansion.  Also check the change in peak memory usage (NJAMD is a
1886    good tool for this).  */
1887 #define MIN_BUFF_SIZE 8000
1888 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1889 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1890         (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1891
1892 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1893   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1894 #endif
1895
1896 /* Create a new allocation buffer.  Place the control block at the end
1897    of the buffer, so that buffer overflows will cause immediate chaos.  */
1898 static _cpp_buff *
1899 new_buff (len)
1900      size_t len;
1901 {
1902   _cpp_buff *result;
1903   unsigned char *base;
1904
1905   if (len < MIN_BUFF_SIZE)
1906     len = MIN_BUFF_SIZE;
1907   len = CPP_ALIGN (len);
1908
1909   base = xmalloc (len + sizeof (_cpp_buff));
1910   result = (_cpp_buff *) (base + len);
1911   result->base = base;
1912   result->cur = base;
1913   result->limit = base + len;
1914   result->next = NULL;
1915   return result;
1916 }
1917
1918 /* Place a chain of unwanted allocation buffers on the free list.  */
1919 void
1920 _cpp_release_buff (pfile, buff)
1921      cpp_reader *pfile;
1922      _cpp_buff *buff;
1923 {
1924   _cpp_buff *end = buff;
1925
1926   while (end->next)
1927     end = end->next;
1928   end->next = pfile->free_buffs;
1929   pfile->free_buffs = buff;
1930 }
1931
1932 /* Return a free buffer of size at least MIN_SIZE.  */
1933 _cpp_buff *
1934 _cpp_get_buff (pfile, min_size)
1935      cpp_reader *pfile;
1936      size_t min_size;
1937 {
1938   _cpp_buff *result, **p;
1939
1940   for (p = &pfile->free_buffs;; p = &(*p)->next)
1941     {
1942       size_t size;
1943
1944       if (*p == NULL)
1945         return new_buff (min_size);
1946       result = *p;
1947       size = result->limit - result->base;
1948       /* Return a buffer that's big enough, but don't waste one that's
1949          way too big.  */
1950       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1951         break;
1952     }
1953
1954   *p = result->next;
1955   result->next = NULL;
1956   result->cur = result->base;
1957   return result;
1958 }
1959
1960 /* Creates a new buffer with enough space to hold the uncommitted
1961    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1962    the excess bytes to the new buffer.  Chains the new buffer after
1963    BUFF, and returns the new buffer.  */
1964 _cpp_buff *
1965 _cpp_append_extend_buff (pfile, buff, min_extra)
1966      cpp_reader *pfile;
1967      _cpp_buff *buff;
1968      size_t min_extra;
1969 {
1970   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1971   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1972
1973   buff->next = new_buff;
1974   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1975   return new_buff;
1976 }
1977
1978 /* Creates a new buffer with enough space to hold the uncommitted
1979    remaining bytes of the buffer pointed to by BUFF, and at least
1980    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1981    Chains the new buffer before the buffer pointed to by BUFF, and
1982    updates the pointer to point to the new buffer.  */
1983 void
1984 _cpp_extend_buff (pfile, pbuff, min_extra)
1985      cpp_reader *pfile;
1986      _cpp_buff **pbuff;
1987      size_t min_extra;
1988 {
1989   _cpp_buff *new_buff, *old_buff = *pbuff;
1990   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1991
1992   new_buff = _cpp_get_buff (pfile, size);
1993   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1994   new_buff->next = old_buff;
1995   *pbuff = new_buff;
1996 }
1997
1998 /* Free a chain of buffers starting at BUFF.  */
1999 void
2000 _cpp_free_buff (buff)
2001      _cpp_buff *buff;
2002 {
2003   _cpp_buff *next;
2004
2005   for (; buff; buff = next)
2006     {
2007       next = buff->next;
2008       free (buff->base);
2009     }
2010 }
2011
2012 /* Allocate permanent, unaligned storage of length LEN.  */
2013 unsigned char *
2014 _cpp_unaligned_alloc (pfile, len)
2015      cpp_reader *pfile;
2016      size_t len;
2017 {
2018   _cpp_buff *buff = pfile->u_buff;
2019   unsigned char *result = buff->cur;
2020
2021   if (len > (size_t) (buff->limit - result))
2022     {
2023       buff = _cpp_get_buff (pfile, len);
2024       buff->next = pfile->u_buff;
2025       pfile->u_buff = buff;
2026       result = buff->cur;
2027     }
2028
2029   buff->cur = result + len;
2030   return result;
2031 }
2032
2033 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2034    That buffer is used for growing allocations when saving macro
2035    replacement lists in a #define, and when parsing an answer to an
2036    assertion in #assert, #unassert or #if (and therefore possibly
2037    whilst expanding macros).  It therefore must not be used by any
2038    code that they might call: specifically the lexer and the guts of
2039    the macro expander.
2040
2041    All existing other uses clearly fit this restriction: storing
2042    registered pragmas during initialization.  */
2043 unsigned char *
2044 _cpp_aligned_alloc (pfile, len)
2045      cpp_reader *pfile;
2046      size_t len;
2047 {
2048   _cpp_buff *buff = pfile->a_buff;
2049   unsigned char *result = buff->cur;
2050
2051   if (len > (size_t) (buff->limit - result))
2052     {
2053       buff = _cpp_get_buff (pfile, len);
2054       buff->next = pfile->a_buff;
2055       pfile->a_buff = buff;
2056       result = buff->cur;
2057     }
2058
2059   buff->cur = result + len;
2060   return result;
2061 }