gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o Replace strncmp with memcmp almost everywhere.
  29 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  30 o Distinguish integers, floats, and 'other' pp-numbers.
  31 o Store ints and char constants as binary values.
  32 o New command-line assertion syntax.
  33 o Work towards functions in cpperror.c taking a message level parameter.
  34   If we do this, merge the common code of do_warning and do_error.
  35 o Comment all functions, and describe macro expansion algorithm.
  36 o Move as much out of header files as possible.
  37 o Remove single quote pairs `', and some '', from diagnostics.
  38 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  39
  40 */
  41
  42 #include "config.h"
  43 #include "system.h"
  44 #include "intl.h"
  45 #include "cpplib.h"
  46 #include "cpphash.h"
  47 #include "symcat.h"
  48
  49 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  50 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  51
  52 /* Flags for cpp_context.  */
  53 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  54 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  55 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  56 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  57
  58 typedef struct cpp_context cpp_context;
  59 struct cpp_context
  60 {
  61   union
  62   {
  63     const cpp_toklist *list;    /* Used for macro contexts only.  */
  64     const cpp_token **arg;      /* Used for arg contexts only.  */
  65   } u;
  66
  67   /* Pushed token to be returned by next call to get_raw_token.  */
  68   const cpp_token *pushed_token;
  69
  70   struct macro_args *args;      /* The arguments for a function-like
  71                                    macro.  NULL otherwise.  */
  72   unsigned short posn;          /* Current posn, index into u.  */
  73   unsigned short count;         /* No. of tokens in u.  */
  74   unsigned short level;
  75   unsigned char flags;
  76 };
  77
  78 typedef struct macro_args macro_args;
  79 struct macro_args
  80 {
  81   unsigned int *ends;
  82   const cpp_token **tokens;
  83   unsigned int capacity;
  84   unsigned int used;
  85   unsigned short level;
  86 };
  87
  88 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  89 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  90                                            macro_args *, unsigned int *));
  91 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  92 static void save_token PARAMS ((macro_args *, const cpp_token *));
  93 static int pop_context PARAMS ((cpp_reader *));
  94 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  96 static void free_macro_args PARAMS ((macro_args *));
  97
  98 #define auto_expand_name_space(list) \
  99     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
 100 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
 101                                          unsigned int));
 102 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 103                                          unsigned int));
 104
 105 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 106 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 107                                                 unsigned char *));
 108 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 109                                                      const unsigned char *));
 110 static int skip_block_comment PARAMS ((cpp_reader *));
 111 static int skip_line_comment PARAMS ((cpp_reader *));
 112 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 113 static void skip_whitespace PARAMS ((cpp_reader *, int));
 114 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 115                                    const U_CHAR *, const U_CHAR *));
 116 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 117 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 118                                   unsigned int));
 119 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 120 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 121                                   const unsigned char *,
 122                                   unsigned int, unsigned int));
 123 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 124 static int lex_next PARAMS ((cpp_reader *, int));
 125 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 126                                       const cpp_token *));
 127
 128 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 129 static void expand_context_stack PARAMS ((cpp_reader *));
 130 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 131                                             unsigned char *));
 132 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 133                                   const cpp_token *, int));
 134 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 135                                           cpp_token *));
 136 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 137                                             unsigned int));
 138 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 139 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 140                                                 const cpp_token *));
 141 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 142 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 143                                                        const cpp_token *));
 144 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 145                                          const cpp_token *, int *));
 146 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 147 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 148 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 149 static void release_temp_tokens         PARAMS ((cpp_reader *));
 150 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 151 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 152
 153 #define INIT_TOKEN_STR(list, token) \
 154   do {(token)->val.str.len = 0; \
 155       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 156   } while (0)
 157
 158 #define VALID_SIGN(c, prevc) \
 159   (((c) == '+' || (c) == '-') && \
 160    ((prevc) == 'e' || (prevc) == 'E' \
 161     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 162
 163 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 164    character, if any, is in buffer.  */
 165
 166 #define handle_newline(cur, limit, c) \
 167  do { \
 168   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 169     (cur)++; \
 170   pfile->buffer->lineno++; \
 171   pfile->buffer->line_base = (cur); \
 172   pfile->col_adjust = 0; \
 173  } while (0)
 174
 175 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 176 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 177
 178 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 179 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 180 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 181 #define BACKUP_DIGRAPH(ttype) do { \
 182   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 183
 184 /* An upper bound on the number of bytes needed to spell a token,
 185    including preceding whitespace.  */
 186 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 187 static inline size_t
 188 TOKEN_LEN (token)
 189      const cpp_token *token;
 190 {
 191   size_t len;
 192
 193   switch (TOKEN_SPELL (token))
 194     {
 195     default:            len = 0;                        break;
 196     case SPELL_STRING:  len = token->val.str.len;       break;
 197     case SPELL_IDENT:   len = token->val.node->length;  break;
 198     }
 199   return len + 5;
 200 }
 201
 202 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 203 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 204 #define ON_REST_ARG(c) \
 205  (((c)->flags & VAR_ARGS) \
 206   && (c)->u.list->tokens[(c)->posn].val.aux \
 207       == (unsigned int) ((c)->u.list->paramc - 1))
 208
 209 #define ASSIGN_FLAGS_AND_POS(d, s) \
 210   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 211       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 212   } while (0)
 213
 214 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 215 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 216   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 217       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 218   } while (0)
 219
 220 #define OP(e, s) { SPELL_OPERATOR, U s           },
 221 #define TK(e, s) { s,              U STRINGX (e) },
 222
 223 const struct token_spelling
 224 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 225
 226 #undef OP
 227 #undef TK
 228
 229 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 230    have designated initializers, it can be constant data; otherwise,
 231    it is set up at runtime by _cpp_init_input_buffer.  */
 232
 233 #if (GCC_VERSION >= 2007)
 234 #define init_trigraph_map()  /* nothing */
 235 #define TRIGRAPH_MAP \
 236 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 237 #define END };
 238 #define s(p, v) [p] = v,
 239 #else
 240 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 241  static void init_trigraph_map PARAMS ((void)) { \
 242  unsigned char *x = trigraph_map;
 243 #define END }
 244 #define s(p, v) x[p] = v;
 245 #endif
 246
 247 TRIGRAPH_MAP
 248   s('=', '#')   s(')', ']')     s('!', '|')
 249   s('(', '[')   s('\'', '^')    s('>', '}')
 250   s('/', '\\')  s('<', '{')     s('-', '~')
 251 END
 252
 253 #undef TRIGRAPH_MAP
 254 #undef END
 255 #undef s
 256
 257 /* Notify the compiler proper that the current line number has jumped,
 258    or the current file name has changed.  */
 259
 260 static void
 261 output_line_command (pfile, print, line)
 262      cpp_reader *pfile;
 263      cpp_printer *print;
 264      unsigned int line;
 265 {
 266   cpp_buffer *ip = CPP_BUFFER (pfile);
 267
 268   if (line == 0)
 269     return;
 270
 271   /* End the previous line of text.  */
 272   if (pfile->need_newline)
 273     {
 274       putc ('\n', print->outf);
 275       print->lineno++;
 276     }
 277   pfile->need_newline = 0;
 278
 279   if (CPP_OPTION (pfile, no_line_commands))
 280     return;
 281
 282   /* If the current file has not changed, we can output a few newlines
 283      instead if we want to increase the line number by a small amount.
 284      We cannot do this if print->lineno is zero, because that means we
 285      haven't output any line commands yet.  (The very first line
 286      command output is a `same_file' command.)
 287
 288      'nominal_fname' values are unique, so they can be compared by
 289      comparing pointers.  */
 290   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 291       && line >= print->lineno && line < print->lineno + 8)
 292     {
 293       while (line > print->lineno)
 294         {
 295           putc ('\n', print->outf);
 296           print->lineno++;
 297         }
 298       return;
 299     }
 300
 301   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 302            cpp_syshdr_flags (pfile, ip));
 303
 304   print->last_fname = ip->nominal_fname;
 305   print->lineno = line;
 306 }
 307
 308 /* Like fprintf, but writes to a printer object.  You should be sure
 309    always to generate a complete line when you use this function.  */
 310 void
 311 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 312                      const char *fmt, ...))
 313 {
 314   va_list ap;
 315 #ifndef ANSI_PROTOTYPES
 316   cpp_reader *pfile;
 317   cpp_printer *print;
 318   const char *fmt;
 319 #endif
 320
 321   VA_START (ap, fmt);
 322
 323 #ifndef ANSI_PROTOTYPES
 324   pfile = va_arg (ap, cpp_reader *);
 325   print = va_arg (ap, cpp_printer *);
 326   fmt = va_arg (ap, const char *);
 327 #endif
 328
 329   /* End the previous line of text.  */
 330   if (pfile->need_newline)
 331     putc ('\n', print->outf);
 332   pfile->need_newline = 0;
 333
 334   vfprintf (print->outf, fmt, ap);
 335   va_end (ap);
 336 }
 337
 338 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 339
 340 void
 341 cpp_scan_buffer_nooutput (pfile)
 342      cpp_reader *pfile;
 343 {
 344   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 345   const cpp_token *token;
 346
 347   /* In no-output mode, we can ignore everything but directives.  */
 348   for (;;)
 349     {
 350       token = _cpp_get_token (pfile);
 351
 352       if (token->type == CPP_EOF)
 353         {
 354           cpp_pop_buffer (pfile);
 355           if (CPP_BUFFER (pfile) == stop)
 356             break;
 357         }
 358
 359       if (token->type == CPP_HASH && token->flags & BOL
 360           && pfile->token_list.directive)
 361         {
 362           process_directive (pfile, token);
 363           continue;
 364         }
 365
 366       _cpp_skip_rest_of_line (pfile);
 367     }
 368 }
 369
 370 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 371 void
 372 cpp_scan_buffer (pfile, print)
 373      cpp_reader *pfile;
 374      cpp_printer *print;
 375 {
 376   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 377   const cpp_token *token, *prev = 0;
 378
 379   for (;;)
 380     {
 381       token = _cpp_get_token (pfile);
 382       if (token->type == CPP_EOF)
 383         {
 384           cpp_pop_buffer (pfile);
 385
 386           if (CPP_BUFFER (pfile) == stop)
 387             return;
 388
 389           prev = 0;
 390           continue;
 391         }
 392
 393       if (token->flags & BOL)
 394         {
 395           if (token->type == CPP_HASH && pfile->token_list.directive)
 396             {
 397               process_directive (pfile, token);
 398               continue;
 399             }
 400
 401           output_line_command (pfile, print, pfile->token_list.line);
 402           prev = 0;
 403         }
 404
 405       if (token->type != CPP_PLACEMARKER)
 406         {
 407           output_token (pfile, print->outf, token, prev, 1);
 408           pfile->need_newline = 1;
 409         }
 410
 411       prev = token;
 412     }
 413 }
 414
 415 /* Helper routine used by parse_include, which can't see spell_token.
 416    Reinterpret the current line as an h-char-sequence (< ... >); we are
 417    looking at the first token after the <.  */
 418 const cpp_token *
 419 _cpp_glue_header_name (pfile)
 420      cpp_reader *pfile;
 421 {
 422   const cpp_token *t;
 423   cpp_token *hdr;
 424   U_CHAR *buf, *p;
 425   size_t len, avail;
 426
 427   avail = 40;
 428   len = 0;
 429   buf = xmalloc (avail);
 430
 431   for (;;)
 432     {
 433       t = _cpp_get_token (pfile);
 434       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 435         break;
 436
 437       if (len + TOKEN_LEN (t) > avail)
 438         {
 439           avail = len + TOKEN_LEN (t) + 40;
 440           buf = xrealloc (buf, avail);
 441         }
 442
 443       if (t->flags & PREV_WHITE)
 444         buf[len++] = ' ';
 445
 446       p = spell_token (pfile, t, buf + len);
 447       len = (size_t) (p - buf);  /* p known >= buf */
 448     }
 449
 450   if (t->type == CPP_EOF)
 451     cpp_error (pfile, "missing terminating > character");
 452
 453   buf = xrealloc (buf, len);
 454
 455   hdr = get_temp_token (pfile);
 456   hdr->type = CPP_HEADER_NAME;
 457   hdr->flags = 0;
 458   hdr->val.str.text = buf;
 459   hdr->val.str.len = len;
 460   return hdr;
 461 }
 462
 463 /* Token-buffer helper functions.  */
 464
 465 /* Expand a token list's string space. It is *vital* that
 466    list->tokens_used is correct, to get pointer fix-up right.  */
 467 void
 468 _cpp_expand_name_space (list, len)
 469      cpp_toklist *list;
 470      unsigned int len;
 471 {
 472   const U_CHAR *old_namebuf;
 473
 474   old_namebuf = list->namebuf;
 475   list->name_cap += len;
 476   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 477
 478   /* Fix up token text pointers.  */
 479   if (list->namebuf != old_namebuf)
 480     {
 481       unsigned int i;
 482
 483       for (i = 0; i < list->tokens_used; i++)
 484         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 485           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 486     }
 487 }
 488
 489 /* If there is not enough room for LEN more characters, expand the
 490    list by just enough to have room for LEN characters.  */
 491 void
 492 _cpp_reserve_name_space (list, len)
 493      cpp_toklist *list;
 494      unsigned int len;
 495 {
 496   unsigned int room = list->name_cap - list->name_used;
 497
 498   if (room < len)
 499     _cpp_expand_name_space (list, len - room);
 500 }
 501
 502 /* Expand the number of tokens in a list.  */
 503 void
 504 _cpp_expand_token_space (list, count)
 505      cpp_toklist *list;
 506      unsigned int count;
 507 {
 508   unsigned int n;
 509
 510   list->tokens_cap += count;
 511   n = list->tokens_cap;
 512   if (list->flags & LIST_OFFSET)
 513     list->tokens--, n++;
 514   list->tokens = (cpp_token *)
 515     xrealloc (list->tokens, n * sizeof (cpp_token));
 516   if (list->flags & LIST_OFFSET)
 517     list->tokens++;             /* Skip the dummy.  */
 518 }
 519
 520 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 521    an extra token in front of the token list, as this allows the lexer
 522    to always peek at the previous token without worrying about
 523    underflowing the list, and some initial space.  Otherwise, no
 524    token- or name-space is allocated, and there is no dummy token.  */
 525 void
 526 _cpp_init_toklist (list, flags)
 527      cpp_toklist *list;
 528      int flags;
 529 {
 530   if (flags == NO_DUMMY_TOKEN)
 531     {
 532       list->tokens_cap = 0;
 533       list->tokens = 0;
 534       list->name_cap = 0;
 535       list->namebuf = 0;
 536       list->flags = 0;
 537     }
 538   else
 539     {
 540       /* Initialize token space.  Put a dummy token before the start
 541          that will fail matches.  */
 542       list->tokens_cap = 256;   /* 4K's worth.  */
 543       list->tokens = (cpp_token *)
 544         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 545       list->tokens[0].type = CPP_EOF;
 546       list->tokens++;
 547
 548       /* Initialize name space.  */
 549       list->name_cap = 1024;
 550       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 551       list->flags = LIST_OFFSET;
 552     }
 553
 554   _cpp_clear_toklist (list);
 555 }
 556
 557 /* Clear a token list.  */
 558 void
 559 _cpp_clear_toklist (list)
 560      cpp_toklist *list;
 561 {
 562   list->tokens_used = 0;
 563   list->name_used = 0;
 564   list->directive = 0;
 565   list->paramc = 0;
 566   list->params_len = 0;
 567   list->flags &= LIST_OFFSET;  /* clear all but that one */
 568 }
 569
 570 /* Free a token list.  Does not free the list itself, which may be
 571    embedded in a larger structure.  */
 572 void
 573 _cpp_free_toklist (list)
 574      const cpp_toklist *list;
 575 {
 576   if (list->flags & LIST_OFFSET)
 577     free (list->tokens - 1);    /* Backup over dummy token.  */
 578   else
 579     free (list->tokens);
 580   free (list->namebuf);
 581 }
 582
 583 /* Compare two tokens.  */
 584 int
 585 _cpp_equiv_tokens (a, b)
 586      const cpp_token *a, *b;
 587 {
 588   if (a->type == b->type && a->flags == b->flags)
 589     switch (TOKEN_SPELL (a))
 590       {
 591       default:                  /* Keep compiler happy.  */
 592       case SPELL_OPERATOR:
 593         return 1;
 594       case SPELL_CHAR:
 595       case SPELL_NONE:
 596         return a->val.aux == b->val.aux; /* arg_no or character.  */
 597       case SPELL_IDENT:
 598         return a->val.node == b->val.node;
 599       case SPELL_STRING:
 600         return (a->val.str.len == b->val.str.len
 601                 && !memcmp (a->val.str.text, b->val.str.text,
 602                             a->val.str.len));
 603       }
 604
 605   return 0;
 606 }
 607
 608 /* Compare two token lists.  */
 609 int
 610 _cpp_equiv_toklists (a, b)
 611      const cpp_toklist *a, *b;
 612 {
 613   unsigned int i;
 614
 615   if (a->tokens_used != b->tokens_used
 616       || a->flags != b->flags
 617       || a->paramc != b->paramc)
 618     return 0;
 619
 620   for (i = 0; i < a->tokens_used; i++)
 621     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 622       return 0;
 623   return 1;
 624 }
 625
 626 /* Utility routine:
 627
 628    Compares, the token TOKEN to the NUL-terminated string STRING.
 629    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 630
 631 int
 632 cpp_ideq (token, string)
 633      const cpp_token *token;
 634      const char *string;
 635 {
 636   if (token->type != CPP_NAME)
 637     return 0;
 638
 639   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 640 }
 641
 642 /* Lexing algorithm.
 643
 644  The original lexer in cpplib was made up of two passes: a first pass
 645  that replaced trigraphs and deleted esacped newlines, and a second
 646  pass that tokenized the result of the first pass.  Tokenisation was
 647  performed by peeking at the next character in the input stream.  For
 648  example, if the input stream contained "!=", the handler for the !
 649  character would peek at the next character, and if it were a '='
 650  would skip over it, and return a "!=" token, otherwise it would
 651  return just the "!" token.
 652
 653  To implement a single-pass lexer, this peeking ahead is unworkable.
 654  An arbitrary number of escaped newlines, and trigraphs (in particular
 655  ??/ which translates to the escape \), could separate the '!' and '='
 656  in the input stream, yet the next token is still a "!=".
 657
 658  Suppose instead that we lex by one logical line at a time, producing
 659  a token list or stack for each logical line, and when seeing the '!'
 660  push a CPP_NOT token on the list.  Then if the '!' is part of a
 661  longer token ("!=") we know we must see the remainder of the token by
 662  the time we reach the end of the logical line.  Thus we can have the
 663  '=' handler look at the previous token (at the end of the list / top
 664  of the stack) and see if it is a "!" token, and if so, instead of
 665  pushing a "=" token revise the existing token to be a "!=" token.
 666
 667  This works in the presence of escaped newlines, because the '\' would
 668  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 669  newline ('\n' or '\r') handler looks at the token at the top of the
 670  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 671  Hence the '=' handler would never see any intervening tokens.
 672
 673  To make trigraphs work in this context, as in precedence trigraphs
 674  are highest and converted before anything else, the '?' handler does
 675  lookahead to see if it is a trigraph, and if so skips the trigraph
 676  and pushes the token it represents onto the top of the stack.  This
 677  also works in the particular case of a CPP_BACKSLASH trigraph.
 678
 679  To the preprocessor, whitespace is only significant to the point of
 680  knowing whether whitespace precedes a particular token.  For example,
 681  the '=' handler needs to know whether there was whitespace between it
 682  and a "!" token on the top of the stack, to make the token conversion
 683  decision correctly.  So each token has a PREV_WHITE flag to
 684  indicate this - the standard permits consecutive whitespace to be
 685  regarded as a single space.  The compiler front ends are not
 686  interested in whitespace at all; they just require a token stream.
 687  Another place where whitespace is significant to the preprocessor is
 688  a #define statment - if there is whitespace between the macro name
 689  and an initial "(" token the macro is "object-like", otherwise it is
 690  a function-like macro that takes arguments.
 691
 692  However, all is not rosy.  Parsing of identifiers, numbers, comments
 693  and strings becomes trickier because of the possibility of raw
 694  trigraphs and escaped newlines in the input stream.
 695
 696  The trigraphs are three consecutive characters beginning with two
 697  question marks.  A question mark is not valid as part of a number or
 698  identifier, so parsing of a number or identifier terminates normally
 699  upon reaching it, returning to the mainloop which handles the
 700  trigraph just like it would in any other position.  Similarly for the
 701  backslash of a backslash-newline combination.  So we just need the
 702  escaped-newline dropper in the mainloop to check if the token on the
 703  top of the stack after dropping the escaped newline is a number or
 704  identifier, and if so to continue the processing it as if nothing had
 705  happened.
 706
 707  For strings, we replace trigraphs whenever we reach a quote or
 708  newline, because there might be a backslash trigraph escaping them.
 709  We need to be careful that we start trigraph replacing from where we
 710  left off previously, because it is possible for a first scan to leave
 711  "fake" trigraphs that a second scan would pick up as real (e.g. the
 712  sequence "????/\n=" would find a fake ??= trigraph after removing the
 713  escaped newline.)
 714
 715  For line comments, on reaching a newline we scan the previous
 716  character(s) to see if it escaped, and continue if it is.  Block
 717  comments ignore everything and just focus on finding the comment
 718  termination mark.  The only difficult thing, and it is surprisingly
 719  tricky, is checking if an asterisk precedes the final slash since
 720  they could be separated by escaped newlines.  If the preprocessor is
 721  invoked with the output comments option, we don't bother removing
 722  escaped newlines and replacing trigraphs for output.
 723
 724  Finally, numbers can begin with a period, which is pushed initially
 725  as a CPP_DOT token in its own right.  The digit handler checks if the
 726  previous token was a CPP_DOT not separated by whitespace, and if so
 727  pops it off the stack and pushes a period into the number's buffer
 728  before calling the number parser.
 729
 730 */
 731
 732 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 733                                                     U":>", U"<%", U"%>"};
 734
 735 /* Call when a trigraph is encountered.  It warns if necessary, and
 736    returns true if the trigraph should be honoured.  END is the third
 737    character of a trigraph in the input stream.  */
 738 static int
 739 trigraph_ok (pfile, end)
 740      cpp_reader *pfile;
 741      const unsigned char *end;
 742 {
 743   int accept = CPP_OPTION (pfile, trigraphs);
 744
 745   if (CPP_OPTION (pfile, warn_trigraphs))
 746     {
 747       unsigned int col = end - 1 - pfile->buffer->line_base;
 748       if (accept)
 749         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 750                                "trigraph ??%c converted to %c",
 751                                (int) *end, (int) trigraph_map[*end]);
 752       else
 753         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 754                                "trigraph ??%c ignored", (int) *end);
 755     }
 756   return accept;
 757 }
 758
 759 /* Scan a string for trigraphs, warning or replacing them inline as
 760    appropriate.  When parsing a string, we must call this routine
 761    before processing a newline character (if trigraphs are enabled),
 762    since the newline might be escaped by a preceding backslash
 763    trigraph sequence.  Returns a pointer to the end of the name after
 764    replacement.  */
 765
 766 static unsigned char *
 767 trigraph_replace (pfile, src, limit)
 768      cpp_reader *pfile;
 769      unsigned char *src;
 770      unsigned char *limit;
 771 {
 772   unsigned char *dest;
 773
 774   /* Starting with src[1], find two consecutive '?'.  The case of no
 775      trigraphs is streamlined.  */
 776
 777   for (src++; src + 1 < limit; src += 2)
 778     {
 779       if (src[0] != '?')
 780         continue;
 781
 782       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 783       if (src[-1] == '?')
 784         src--;
 785       else if (src + 2 == limit || src[1] != '?')
 786         continue;
 787
 788       /* Check if it really is a trigraph.  */
 789       if (trigraph_map[src[2]] == 0)
 790         continue;
 791
 792       dest = src;
 793       goto trigraph_found;
 794     }
 795   return limit;
 796
 797   /* Now we have a trigraph, we need to scan the remaining buffer, and
 798      copy-shifting its contents left if replacement is enabled.  */
 799   for (; src + 2 < limit; dest++, src++)
 800     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 801       {
 802       trigraph_found:
 803         src += 2;
 804         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 805           *dest = trigraph_map[*src];
 806       }
 807
 808   /* Copy remaining (at most 2) characters.  */
 809   while (src < limit)
 810     *dest++ = *src++;
 811   return dest;
 812 }
 813
 814 /* If CUR is a backslash or the end of a trigraphed backslash, return
 815    a pointer to its beginning, otherwise NULL.  We don't read beyond
 816    the buffer start, because there is the start of the comment in the
 817    buffer.  */
 818 static const unsigned char *
 819 backslash_start (pfile, cur)
 820      cpp_reader *pfile;
 821      const unsigned char *cur;
 822 {
 823   if (cur[0] == '\\')
 824     return cur;
 825   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 826       && trigraph_ok (pfile, cur))
 827     return cur - 2;
 828   return 0;
 829 }
 830
 831 /* Skip a C-style block comment.  This is probably the trickiest
 832    handler.  We find the end of the comment by seeing if an asterisk
 833    is before every '/' we encounter.  The nasty complication is that a
 834    previous asterisk may be separated by one or more escaped newlines.
 835    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 836 static int
 837 skip_block_comment (pfile)
 838      cpp_reader *pfile;
 839 {
 840   cpp_buffer *buffer = pfile->buffer;
 841   const unsigned char *char_after_star = 0;
 842   const unsigned char *cur = buffer->cur;
 843
 844   for (; cur < buffer->rlimit; )
 845     {
 846       unsigned char c = *cur++;
 847
 848       /* People like decorating comments with '*', so check for
 849          '/' instead for efficiency.  */
 850       if (c == '/')
 851         {
 852           /* Don't view / then * then / as finishing the comment.  */
 853           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 854               || cur - 1 == char_after_star)
 855             {
 856               buffer->cur = cur;
 857               return 0;
 858             }
 859
 860           /* Warn about potential nested comments, but not when
 861              the final character inside the comment is a '/'.
 862              Don't bother to get it right across escaped newlines.  */
 863           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 864               && cur[0] == '*' && cur[1] != '/')
 865             {
 866               buffer->cur = cur;
 867               cpp_warning (pfile, "'/*' within comment");
 868             }
 869         }
 870       else if (is_vspace (c))
 871         {
 872           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 873
 874           handle_newline (cur, buffer->rlimit, c);
 875           /* Work correctly if there is an asterisk before an
 876              arbirtrarily long sequence of escaped newlines.  */
 877           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 878             char_after_star = cur;
 879           else
 880             char_after_star = 0;
 881         }
 882       else if (c == '\t')
 883         adjust_column (pfile, cur - 1);
 884     }
 885
 886   buffer->cur = cur;
 887   return 1;
 888 }
 889
 890 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 891    non-zero if a multiline comment.  */
 892 static int
 893 skip_line_comment (pfile)
 894      cpp_reader *pfile;
 895 {
 896   cpp_buffer *buffer = pfile->buffer;
 897   register const unsigned char *cur = buffer->cur;
 898   int multiline = 0;
 899
 900   for (; cur < buffer->rlimit; )
 901     {
 902       unsigned char c = *cur++;
 903
 904       if (is_vspace (c))
 905         {
 906           /* Check for a (trigaph?) backslash escaping the newline.  */
 907           if (!backslash_start (pfile, cur - 2))
 908             goto out;
 909           multiline = 1;
 910           handle_newline (cur, buffer->rlimit, c);
 911         }
 912     }
 913   cur++;
 914
 915  out:
 916   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 917   return multiline;
 918 }
 919
 920 /* TAB points to a \t character.  Update col_adjust so we track the
 921    column correctly.  */
 922 static void
 923 adjust_column (pfile, tab)
 924      cpp_reader *pfile;
 925      const U_CHAR *tab;
 926 {
 927   /* Zero-based column.  */
 928   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
 929
 930   /* Round it up to multiple of the tabstop, but subtract 1 since the
 931      tab itself occupies a character position.  */
 932   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
 933                         - col % CPP_OPTION (pfile, tabstop)) - 1;
 934 }
 935
 936 /* Skips whitespace, stopping at next non-whitespace character.
 937    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 938    to be assigned the correct column.  */
 939 static void
 940 skip_whitespace (pfile, in_directive)
 941      cpp_reader *pfile;
 942      int in_directive;
 943 {
 944   cpp_buffer *buffer = pfile->buffer;
 945   unsigned short warned = 0;
 946
 947   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 948   while (buffer->cur < buffer->rlimit)
 949     {
 950       unsigned char c = *buffer->cur;
 951
 952       if (!is_nvspace (c))
 953         break;
 954
 955       buffer->cur++;
 956       /* Horizontal space always OK.  */
 957       if (c == ' ')
 958         continue;
 959       else if (c == '\t')
 960         adjust_column (pfile, buffer->cur - 1);
 961       /* Must be \f \v or \0.  */
 962       else if (c == '\0')
 963         {
 964           if (!warned)
 965             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 966                                    CPP_BUF_COL (buffer),
 967                                    "embedded null character ignored");
 968           warned = 1;
 969         }
 970       else if (in_directive && CPP_PEDANTIC (pfile))
 971         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 972                                CPP_BUF_COL (buffer),
 973                                "%s in preprocessing directive",
 974                                c == '\f' ? "form feed" : "vertical tab");
 975     }
 976 }
 977
 978 /* Parse (append) an identifier.  Calculates the hash value of the
 979    token while parsing, for performance.  The algorithm *must* match
 980    cpp_lookup().  */
 981 static const U_CHAR *
 982 parse_name (pfile, tok, cur, rlimit)
 983      cpp_reader *pfile;
 984      cpp_token *tok;
 985      const U_CHAR *cur, *rlimit;
 986 {
 987   const U_CHAR *name;
 988   unsigned int len;
 989   unsigned int r;
 990
 991   name = cur;
 992   r = 0;
 993   while (cur < rlimit)
 994     {
 995       if (! is_idchar (*cur))
 996         break;
 997       /* $ is not a identifier character in the standard, but is
 998          commonly accepted as an extension.  Don't warn about it in
 999          skipped conditional blocks. */
1000       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1001         {
1002           CPP_BUFFER (pfile)->cur = cur;
1003           cpp_pedwarn (pfile, "'$' character in identifier");
1004         }
1005
1006       r = HASHSTEP (r, cur);
1007       cur++;
1008     }
1009   len = cur - name;
1010
1011   if (tok->type == CPP_NAME && tok->val.node == 0)
1012     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1013   else
1014     {
1015       unsigned int oldlen;
1016       U_CHAR *newname;
1017
1018       if (tok->type == CPP_NAME)
1019         oldlen = tok->val.node->length;
1020       else
1021         oldlen = 1;
1022
1023       newname = alloca (oldlen + len);
1024
1025       if (tok->type == CPP_NAME)
1026         memcpy (newname, tok->val.node->name, oldlen);
1027       else
1028         newname[0] = tok->val.aux;
1029       memcpy (newname + oldlen, name, len);
1030       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1031       tok->type = CPP_NAME;
1032     }
1033
1034   return cur;
1035 }
1036
1037 /* Parse (append) a number.  */
1038 static void
1039 parse_number (pfile, list, name)
1040      cpp_reader *pfile;
1041      cpp_toklist *list;
1042      cpp_string *name;
1043 {
1044   const unsigned char *name_limit;
1045   unsigned char *namebuf;
1046   cpp_buffer *buffer = pfile->buffer;
1047   register const unsigned char *cur = buffer->cur;
1048
1049  expanded:
1050   name_limit = list->namebuf + list->name_cap;
1051   namebuf = list->namebuf + list->name_used;
1052
1053   for (; cur < buffer->rlimit && namebuf < name_limit; )
1054     {
1055       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1056
1057       /* Perhaps we should accept '$' here if we accept it for
1058          identifiers.  We know namebuf[-1] is safe, because for c to
1059          be a sign we must have pushed at least one character.  */
1060       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1061         goto out;
1062
1063       namebuf++;
1064       cur++;
1065     }
1066
1067   /* Run out of name space?  */
1068   if (cur < buffer->rlimit)
1069     {
1070       list->name_used = namebuf - list->namebuf;
1071       auto_expand_name_space (list);
1072       goto expanded;
1073     }
1074
1075  out:
1076   buffer->cur = cur;
1077   name->len = namebuf - name->text;
1078   list->name_used = namebuf - list->namebuf;
1079 }
1080
1081 /* Places a string terminated by an unescaped TERMINATOR into a
1082    cpp_string, which should be expandable and thus at the top of the
1083    list's stack.  Handles embedded trigraphs, if necessary, and
1084    escaped newlines.
1085
1086    Can be used for character constants (terminator = '\''), string
1087    constants ('"') and angled headers ('>').  Multi-line strings are
1088    allowed, except for within directives.  */
1089
1090 static void
1091 parse_string (pfile, list, token, terminator)
1092      cpp_reader *pfile;
1093      cpp_toklist *list;
1094      cpp_token *token;
1095      unsigned int terminator;
1096 {
1097   cpp_buffer *buffer = pfile->buffer;
1098   cpp_string *name = &token->val.str;
1099   register const unsigned char *cur = buffer->cur;
1100   const unsigned char *name_limit;
1101   unsigned char *namebuf;
1102   unsigned int null_count = 0;
1103   unsigned int trigraphed = list->name_used;
1104
1105  expanded:
1106   name_limit = list->namebuf + list->name_cap;
1107   namebuf = list->namebuf + list->name_used;
1108
1109   for (; cur < buffer->rlimit && namebuf < name_limit; )
1110     {
1111       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1112
1113       if (c == '\0')
1114         null_count++;
1115       else if (c == terminator || is_vspace (c))
1116         {
1117           /* Needed for trigraph_replace and multiline string warning.  */
1118           buffer->cur = cur;
1119
1120           /* Scan for trigraphs before checking if backslash-escaped.  */
1121           if ((CPP_OPTION (pfile, trigraphs)
1122                || CPP_OPTION (pfile, warn_trigraphs))
1123               && namebuf - (list->namebuf + trigraphed) >= 3)
1124             {
1125               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1126                                           namebuf);
1127               /* The test above guarantees trigraphed will be positive.  */
1128               trigraphed = namebuf - list->namebuf - 2;
1129             }
1130
1131           namebuf--;     /* Drop the newline / terminator from the name.  */
1132           if (is_vspace (c))
1133             {
1134               /* Drop a backslash newline, and continue. */
1135               if (namebuf[-1] == '\\')
1136                 {
1137                   handle_newline (cur, buffer->rlimit, c);
1138                   namebuf--;
1139                   continue;
1140                 }
1141
1142               cur--;
1143
1144               /* In assembly language, silently terminate strings of
1145                  either variety at end of line.  This is a kludge
1146                  around not knowing where comments are.  */
1147               if (CPP_OPTION (pfile, lang_asm))
1148                 goto out;
1149
1150               /* Character constants and header names may not extend
1151                  over multiple lines.  In Standard C, neither may
1152                  strings.  We accept multiline strings as an
1153                  extension.  (Even in directives - otherwise, glibc's
1154                  longlong.h breaks.)  */
1155               if (terminator != '"')
1156                 goto unterminated;
1157
1158               cur++;  /* Move forwards again.  */
1159
1160               if (pfile->multiline_string_line == 0)
1161                 {
1162                   pfile->multiline_string_line = token->line;
1163                   pfile->multiline_string_column = token->col;
1164                   if (CPP_PEDANTIC (pfile))
1165                     cpp_pedwarn (pfile, "multi-line string constant");
1166                 }
1167
1168               *namebuf++ = '\n';
1169               handle_newline (cur, buffer->rlimit, c);
1170             }
1171           else
1172             {
1173               unsigned char *temp;
1174
1175               /* An odd number of consecutive backslashes represents
1176                  an escaped terminator.  */
1177               temp = namebuf - 1;
1178               while (temp >= name->text && *temp == '\\')
1179                 temp--;
1180
1181               if ((namebuf - temp) & 1)
1182                 goto out;
1183               namebuf++;
1184             }
1185         }
1186     }
1187
1188   /* Run out of name space?  */
1189   if (cur < buffer->rlimit)
1190     {
1191       list->name_used = namebuf - list->namebuf;
1192       auto_expand_name_space (list);
1193       goto expanded;
1194     }
1195
1196   /* We may not have trigraph-replaced the input for this code path,
1197      but as the input is in error by being unterminated we don't
1198      bother.  Prevent warnings about no newlines at EOF.  */
1199   if (is_vspace (cur[-1]))
1200     cur--;
1201
1202  unterminated:
1203   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1204
1205   if (terminator == '\"' && pfile->multiline_string_line != list->line
1206       && pfile->multiline_string_line != 0)
1207     {
1208       cpp_error_with_line (pfile, pfile->multiline_string_line,
1209                            pfile->multiline_string_column,
1210                            "possible start of unterminated string literal");
1211       pfile->multiline_string_line = 0;
1212     }
1213
1214  out:
1215   buffer->cur = cur;
1216   name->len = namebuf - name->text;
1217   list->name_used = namebuf - list->namebuf;
1218
1219   if (null_count > 0)
1220     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1221                          : "null character preserved"));
1222 }
1223
1224 /* The character TYPE helps us distinguish comment types: '*' = C
1225    style, '/' = C++ style.  For code simplicity, the stored comment
1226    includes the comment start and any terminator.  */
1227
1228 #define COMMENT_START_LEN 2
1229 static void
1230 save_comment (list, token, from, len, type)
1231      cpp_toklist *list;
1232      cpp_token *token;
1233      const unsigned char *from;
1234      unsigned int len;
1235      unsigned int type;
1236 {
1237   unsigned char *buffer;
1238
1239   len += COMMENT_START_LEN;
1240
1241   if (list->name_used + len > list->name_cap)
1242     _cpp_expand_name_space (list, len);
1243
1244   INIT_TOKEN_STR (list, token);
1245   token->type = CPP_COMMENT;
1246   token->val.str.len = len;
1247
1248   buffer = list->namebuf + list->name_used;
1249   list->name_used += len;
1250
1251   /* Copy the comment.  */
1252   if (type == '*')
1253     {
1254       *buffer++ = '/';
1255       *buffer++ = '*';
1256     }
1257   else
1258     {
1259       *buffer++ = type;
1260       *buffer++ = type;
1261     }
1262   memcpy (buffer, from, len - COMMENT_START_LEN);
1263 }
1264
1265 /*
1266  *  The tokenizer's main loop.  Returns a token list, representing a
1267  *  logical line in the input file.  On EOF after some tokens have
1268  *  been processed, we return immediately.  Then in next call, or if
1269  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1270  *  token is placed in the list.
1271  *
1272  *  Implementation relies almost entirely on lookback, rather than
1273  *  looking forwards.  This means that tokenization requires just
1274  *  a single pass of the file, even in the presence of trigraphs and
1275  *  escaped newlines, providing significant performance benefits.
1276  *  Trigraph overhead is negligible if they are disabled, and low
1277  *  even when enabled.
1278  */
1279
1280 #define KNOWN_DIRECTIVE() (list->directive != 0)
1281 #define MIGHT_BE_DIRECTIVE() \
1282 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1283
1284 static void
1285 lex_line (pfile, list)
1286      cpp_reader *pfile;
1287      cpp_toklist *list;
1288 {
1289   cpp_token *cur_token, *token_limit, *first;
1290   cpp_buffer *buffer = pfile->buffer;
1291   const unsigned char *cur = buffer->cur;
1292   unsigned char flags = 0;
1293   unsigned int first_token = list->tokens_used;
1294
1295   if (!(list->flags & LIST_OFFSET))
1296     (abort) ();
1297
1298  retry:
1299   list->file = buffer->nominal_fname;
1300   list->line = CPP_BUF_LINE (buffer);
1301   pfile->col_adjust = 0;
1302   pfile->in_lex_line = 1;
1303   if (cur == buffer->buf)
1304     list->flags |= BEG_OF_FILE;
1305
1306  expanded:
1307   token_limit = list->tokens + list->tokens_cap;
1308   cur_token = list->tokens + list->tokens_used;
1309
1310   for (; cur < buffer->rlimit && cur_token < token_limit;)
1311     {
1312       unsigned char c;
1313
1314       /* Optimize non-vertical whitespace skipping; most tokens are
1315          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1316       c = *cur;
1317       if (is_nvspace (c))
1318         {
1319           buffer->cur = cur;
1320           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1321                                    && cur_token > &list->tokens[first_token]));
1322           cur = buffer->cur;
1323
1324           flags = PREV_WHITE;
1325           if (cur == buffer->rlimit)
1326             break;
1327           c = *cur;
1328         }
1329       cur++;
1330
1331       /* Initialize current token.  CPP_EOF will not be fixed up by
1332          expand_name_space.  */
1333       list->tokens_used = cur_token - list->tokens + 1;
1334       cur_token->type = CPP_EOF;
1335       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1336       cur_token->line = CPP_BUF_LINE (buffer);
1337       cur_token->flags = flags;
1338       flags = 0;
1339
1340       switch (c)
1341         {
1342         case '0': case '1': case '2': case '3': case '4':
1343         case '5': case '6': case '7': case '8': case '9':
1344           {
1345             int prev_dot;
1346
1347             cur--;              /* Backup character.  */
1348             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1349             if (prev_dot)
1350               cur_token--;
1351             INIT_TOKEN_STR (list, cur_token);
1352             /* Prepend an immediately previous CPP_DOT token.  */
1353             if (prev_dot)
1354               {
1355                 if (list->name_cap == list->name_used)
1356                   auto_expand_name_space (list);
1357
1358                 cur_token->val.str.len = 1;
1359                 list->namebuf[list->name_used++] = '.';
1360               }
1361
1362           continue_number:
1363             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1364             buffer->cur = cur;
1365             parse_number (pfile, list, &cur_token->val.str);
1366             cur = buffer->cur;
1367           }
1368           /* Check for # 123 form of #line.  */
1369           if (MIGHT_BE_DIRECTIVE ())
1370             list->directive = _cpp_check_linemarker (pfile, cur_token,
1371                                                      !(cur_token[-1].flags
1372                                                        & PREV_WHITE));
1373           cur_token++;
1374           break;
1375
1376         letter:
1377         case '_':
1378         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1379         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1380         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1381         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1382         case 'y': case 'z':
1383         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1384         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1385         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1386         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1387         case 'Y': case 'Z':
1388           cur--;                     /* Backup character.  */
1389
1390           /* In Objective C, '@' may begin certain keywords.  */
1391           if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
1392               && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
1393             cur_token--;
1394           else
1395             {
1396               cur_token->val.node = 0;
1397               cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1398             }
1399
1400         continue_name:
1401           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1402
1403           if (MIGHT_BE_DIRECTIVE ())
1404             list->directive = _cpp_check_directive (pfile, cur_token,
1405                                                     !(list->tokens[0].flags
1406                                                       & PREV_WHITE));
1407           /* Convert named operators to their proper types.  */
1408           if (cur_token->val.node->type == T_OPERATOR)
1409             {
1410               cur_token->flags |= NAMED_OP;
1411               cur_token->type = cur_token->val.node->value.code;
1412             }
1413
1414           cur_token++;
1415           break;
1416
1417         case '\'':
1418           cur_token->type = CPP_CHAR;
1419           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1420               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1421             BACKUP_TOKEN (CPP_WCHAR);
1422           goto do_parse_string;
1423
1424         case '\"':
1425           cur_token->type = CPP_STRING;
1426           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1427               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1428             BACKUP_TOKEN (CPP_WSTRING);
1429           else if (CPP_OPTION (pfile, objc)
1430                    && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
1431                    && cur_token[-1].val.aux == '@')
1432             BACKUP_TOKEN (CPP_OSTRING);
1433
1434         do_parse_string:
1435           /* Here c is one of ' " or >.  */
1436           INIT_TOKEN_STR (list, cur_token);
1437           buffer->cur = cur;
1438           parse_string (pfile, list, cur_token, c);
1439           cur = buffer->cur;
1440           cur_token++;
1441           break;
1442
1443         case '/':
1444           cur_token->type = CPP_DIV;
1445           if (IMMED_TOKEN ())
1446             {
1447               if (PREV_TOKEN_TYPE == CPP_DIV)
1448                 {
1449                   /* We silently allow C++ comments in system headers,
1450                      irrespective of conformance mode, because lots of
1451                      broken systems do that and trying to clean it up
1452                      in fixincludes is a nightmare.  */
1453                   if (CPP_IN_SYSTEM_HEADER (pfile))
1454                     goto do_line_comment;
1455                   else if (CPP_OPTION (pfile, cplusplus_comments))
1456                     {
1457                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1458                           && ! buffer->warned_cplusplus_comments)
1459                         {
1460                           buffer->cur = cur;
1461                           cpp_pedwarn (pfile,
1462                              "C++ style comments are not allowed in ISO C89");
1463                           cpp_pedwarn (pfile,
1464                           "(this will be reported only once per input file)");
1465                           buffer->warned_cplusplus_comments = 1;
1466                         }
1467                     do_line_comment:
1468                       buffer->cur = cur;
1469 #if 0 /* Leave until new lexer in place.  */
1470                       if (cur[-2] != c)
1471                         cpp_warning (pfile,
1472                                      "comment start split across lines");
1473 #endif
1474                       if (skip_line_comment (pfile))
1475                         cpp_warning (pfile, "multi-line comment");
1476
1477                       /* Back-up to first '-' or '/'.  */
1478                       cur_token--;
1479                       if (!CPP_OPTION (pfile, discard_comments)
1480                           && (!KNOWN_DIRECTIVE()
1481                               || (list->directive->flags & COMMENTS)))
1482                         save_comment (list, cur_token++, cur,
1483                                       buffer->cur - cur, c);
1484                       else
1485                         flags = PREV_WHITE;
1486
1487                       cur = buffer->cur;
1488                       break;
1489                     }
1490                 }
1491             }
1492           cur_token++;
1493           break;
1494
1495         case '*':
1496           cur_token->type = CPP_MULT;
1497           if (IMMED_TOKEN ())
1498             {
1499               if (PREV_TOKEN_TYPE == CPP_DIV)
1500                 {
1501                   buffer->cur = cur;
1502 #if 0 /* Leave until new lexer in place.  */
1503                   if (cur[-2] != '/')
1504                     cpp_warning (pfile,
1505                                  "comment start '/*' split across lines");
1506 #endif
1507                   if (skip_block_comment (pfile))
1508                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1509                                          "unterminated comment");
1510 #if 0 /* Leave until new lexer in place.  */
1511                   else if (buffer->cur[-2] != '*')
1512                     cpp_warning (pfile,
1513                                  "comment end '*/' split across lines");
1514 #endif
1515                   /* Back up to opening '/'.  */
1516                   cur_token--;
1517                   if (!CPP_OPTION (pfile, discard_comments)
1518                       && (!KNOWN_DIRECTIVE()
1519                           || (list->directive->flags & COMMENTS)))
1520                     save_comment (list, cur_token++, cur,
1521                                   buffer->cur - cur, c);
1522                   else
1523                     flags = PREV_WHITE;
1524
1525                   cur = buffer->cur;
1526                   break;
1527                 }
1528               else if (CPP_OPTION (pfile, cplusplus))
1529                 {
1530                   /* In C++, there are .* and ->* operators.  */
1531                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1532                     BACKUP_TOKEN (CPP_DEREF_STAR);
1533                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1534                     BACKUP_TOKEN (CPP_DOT_STAR);
1535                 }
1536             }
1537           cur_token++;
1538           break;
1539
1540         case '\n':
1541         case '\r':
1542           handle_newline (cur, buffer->rlimit, c);
1543           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1544             {
1545               if (IMMED_TOKEN ())
1546                 {
1547                   /* Remove the escaped newline.  Then continue to process
1548                      any interrupted name or number.  */
1549                   cur_token--;
1550                   /* Backslash-newline may not be immediately followed by
1551                      EOF (C99 5.1.1.2).  */
1552                   if (cur >= buffer->rlimit)
1553                     {
1554                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1555                       break;
1556                     }
1557                   if (IMMED_TOKEN ())
1558                     {
1559                       cur_token--;
1560                       if (cur_token->type == CPP_NAME)
1561                         goto continue_name;
1562                       else if (cur_token->type == CPP_NUMBER)
1563                         goto continue_number;
1564                       cur_token++;
1565                     }
1566                   /* Remember whitespace setting.  */
1567                   flags = cur_token->flags;
1568                   break;
1569                 }
1570               else
1571                 {
1572                   buffer->cur = cur;
1573                   cpp_warning (pfile,
1574                                "backslash and newline separated by space");
1575                 }
1576             }
1577           else if (MIGHT_BE_DIRECTIVE ())
1578             {
1579               /* "Null directive." C99 6.10.7: A preprocessing
1580                  directive of the form # <new-line> has no effect.
1581
1582                  But it is still a directive, and therefore disappears
1583                  from the output. */
1584               cur_token--;
1585               if (cur_token->flags & PREV_WHITE
1586                   && CPP_WTRADITIONAL (pfile))
1587                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1588             }
1589
1590           /* Skip vertical space until we have at least one token to
1591              return.  */
1592           if (cur_token != &list->tokens[first_token])
1593             goto out;
1594           list->line = CPP_BUF_LINE (buffer);
1595           break;
1596
1597         case '-':
1598           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1599             REVISE_TOKEN (CPP_MINUS_MINUS);
1600           else
1601             PUSH_TOKEN (CPP_MINUS);
1602           break;
1603
1604         make_hash:
1605         case '#':
1606           /* The digraph flag checking ensures that ## and %:%:
1607              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1608           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1609               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1610             REVISE_TOKEN (CPP_PASTE);
1611           else
1612             PUSH_TOKEN (CPP_HASH);
1613           break;
1614
1615         case ':':
1616           cur_token->type = CPP_COLON;
1617           if (IMMED_TOKEN ())
1618             {
1619               if (PREV_TOKEN_TYPE == CPP_COLON
1620                   && CPP_OPTION (pfile, cplusplus))
1621                 BACKUP_TOKEN (CPP_SCOPE);
1622               else if (CPP_OPTION (pfile, digraphs))
1623                 {
1624                   /* Digraph: "<:" is a '['  */
1625                   if (PREV_TOKEN_TYPE == CPP_LESS)
1626                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1627                   /* Digraph: "%:" is a '#'  */
1628                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1629                     {
1630                       (--cur_token)->flags |= DIGRAPH;
1631                       goto make_hash;
1632                     }
1633                 }
1634             }
1635           cur_token++;
1636           break;
1637
1638         case '&':
1639           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1640             REVISE_TOKEN (CPP_AND_AND);
1641           else
1642             PUSH_TOKEN (CPP_AND);
1643           break;
1644
1645         make_or:
1646         case '|':
1647           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1648             REVISE_TOKEN (CPP_OR_OR);
1649           else
1650             PUSH_TOKEN (CPP_OR);
1651           break;
1652
1653         case '+':
1654           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1655             REVISE_TOKEN (CPP_PLUS_PLUS);
1656           else
1657             PUSH_TOKEN (CPP_PLUS);
1658           break;
1659
1660         case '=':
1661             /* This relies on equidistance of "?=" and "?" tokens.  */
1662           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1663             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1664           else
1665             PUSH_TOKEN (CPP_EQ);
1666           break;
1667
1668         case '>':
1669           cur_token->type = CPP_GREATER;
1670           if (IMMED_TOKEN ())
1671             {
1672               if (PREV_TOKEN_TYPE == CPP_GREATER)
1673                 BACKUP_TOKEN (CPP_RSHIFT);
1674               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1675                 BACKUP_TOKEN (CPP_DEREF);
1676               else if (CPP_OPTION (pfile, digraphs))
1677                 {
1678                   /* Digraph: ":>" is a ']'  */
1679                   if (PREV_TOKEN_TYPE == CPP_COLON)
1680                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1681                   /* Digraph: "%>" is a '}'  */
1682                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1683                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1684                 }
1685             }
1686           cur_token++;
1687           break;
1688
1689         case '<':
1690           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1691             {
1692               REVISE_TOKEN (CPP_LSHIFT);
1693               break;
1694             }
1695           /* Is this the beginning of a header name?  */
1696           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1697             {
1698               c = '>';  /* Terminator.  */
1699               cur_token->type = CPP_HEADER_NAME;
1700               goto do_parse_string;
1701             }
1702           PUSH_TOKEN (CPP_LESS);
1703           break;
1704
1705         case '%':
1706           /* Digraph: "<%" is a '{'  */
1707           cur_token->type = CPP_MOD;
1708           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1709               && CPP_OPTION (pfile, digraphs))
1710             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1711           cur_token++;
1712           break;
1713
1714         case '?':
1715           if (cur + 1 < buffer->rlimit && *cur == '?'
1716               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1717             {
1718               /* Handle trigraph.  */
1719               cur++;
1720               switch (*cur++)
1721                 {
1722                 case '(': goto make_open_square;
1723                 case ')': goto make_close_square;
1724                 case '<': goto make_open_brace;
1725                 case '>': goto make_close_brace;
1726                 case '=': goto make_hash;
1727                 case '!': goto make_or;
1728                 case '-': goto make_complement;
1729                 case '/': goto make_backslash;
1730                 case '\'': goto make_xor;
1731                 }
1732             }
1733           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1734             {
1735               /* GNU C++ defines <? and >? operators.  */
1736               if (PREV_TOKEN_TYPE == CPP_LESS)
1737                 {
1738                   REVISE_TOKEN (CPP_MIN);
1739                   break;
1740                 }
1741               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1742                 {
1743                   REVISE_TOKEN (CPP_MAX);
1744                   break;
1745                 }
1746             }
1747           PUSH_TOKEN (CPP_QUERY);
1748           break;
1749
1750         case '.':
1751           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1752               && IMMED_TOKEN ()
1753               && !(cur_token[-1].flags & PREV_WHITE))
1754             {
1755               cur_token -= 2;
1756               PUSH_TOKEN (CPP_ELLIPSIS);
1757             }
1758           else
1759             PUSH_TOKEN (CPP_DOT);
1760           break;
1761
1762         make_complement:
1763         case '~': PUSH_TOKEN (CPP_COMPL); break;
1764         make_xor:
1765         case '^': PUSH_TOKEN (CPP_XOR); break;
1766         make_open_brace:
1767         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1768         make_close_brace:
1769         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1770         make_open_square:
1771         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1772         make_close_square:
1773         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1774         make_backslash:
1775         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1776         case '!': PUSH_TOKEN (CPP_NOT); break;
1777         case ',': PUSH_TOKEN (CPP_COMMA); break;
1778         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1779         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1780         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1781
1782         case '$':
1783           if (CPP_OPTION (pfile, dollars_in_ident))
1784             goto letter;
1785           /* Fall through */
1786         default:
1787           cur_token->val.aux = c;
1788           PUSH_TOKEN (CPP_OTHER);
1789           break;
1790         }
1791     }
1792
1793   /* Run out of token space?  */
1794   if (cur_token == token_limit)
1795     {
1796       list->tokens_used = cur_token - list->tokens;
1797       _cpp_expand_token_space (list, 256);
1798       goto expanded;
1799     }
1800
1801   cur_token->flags = flags;
1802   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1803     {
1804       if (cur > buffer->buf && !is_vspace (cur[-1]))
1805         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1806                                CPP_BUF_COLUMN (buffer, cur),
1807                                "no newline at end of file");
1808       cur_token++->type = CPP_EOF;
1809     }
1810
1811  out:
1812   /* All tokens are allocated, so the memory location is fixed.  */
1813   first = &list->tokens[first_token];
1814
1815   /* Don't complain about the null directive, nor directives in
1816      assembly source: we don't know where the comments are, and # may
1817      introduce assembler pseudo-ops.  Don't complain about invalid
1818      directives in skipped conditional groups (6.10 p4).  */
1819   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1820       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1821     {
1822       if (first[1].type == CPP_NAME)
1823         cpp_error (pfile, "invalid preprocessing directive #%s",
1824                    first[1].val.node->name);
1825       else
1826         cpp_error (pfile, "invalid preprocessing directive");
1827
1828       /* Discard this line to prevent further errors from cc1.  */
1829       _cpp_clear_toklist (list);
1830       goto retry;
1831     }
1832
1833   /* Put EOF at end of known directives.  This covers "directives do
1834      not extend beyond the end of the line (description 6.10 part 2)".  */
1835   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1836     {
1837       pfile->first_directive_token = first;
1838       cur_token++->type = CPP_EOF;
1839     }
1840
1841   first->flags |= BOL;
1842   if (first_token != 0)
1843     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1844        up the invocation of a function-like macro, new line is
1845        considered a normal white-space character.  */
1846     first->flags |= PREV_WHITE;
1847
1848   buffer->cur = cur;
1849   list->tokens_used = cur_token - list->tokens;
1850   pfile->in_lex_line = 0;
1851 }
1852
1853 /* Write the spelling of a token TOKEN, with any appropriate
1854    whitespace before it, to FP.  PREV is the previous token, which
1855    is used to determine if we need to shove in an extra space in order
1856    to avoid accidental token paste.  If WHITE is 0, do not insert any
1857    leading whitespace.  */
1858 static void
1859 output_token (pfile, fp, token, prev, white)
1860      cpp_reader *pfile;
1861      FILE *fp;
1862      const cpp_token *token, *prev;
1863      int white;
1864 {
1865   if (white)
1866     {
1867       int dummy;
1868
1869       if (token->col && (token->flags & BOL))
1870         {
1871           /* Supply enough whitespace to put this token in its original
1872              column.  Don't bother trying to reconstruct tabs; we can't
1873              get it right in general, and nothing ought to care.  (Yes,
1874              some things do care; the fault lies with them.)  */
1875           unsigned int spaces = token->col - 1;
1876
1877           while (spaces--)
1878             putc (' ', fp);
1879         }
1880       else if (token->flags & PREV_WHITE)
1881         putc (' ', fp);
1882       else
1883       /* Check for and prevent accidental token pasting.
1884          In addition to the cases handled by can_paste, consider
1885
1886          a + ++b - if there is not a space between the + and ++, it
1887          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1888          a valid token.  */
1889         if (prev
1890             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1891                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1892                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1893         putc (' ', fp);
1894     }
1895
1896   switch (TOKEN_SPELL (token))
1897     {
1898     case SPELL_OPERATOR:
1899       {
1900         const unsigned char *spelling;
1901
1902         if (token->flags & DIGRAPH)
1903           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1904         else if (token->flags & NAMED_OP)
1905           goto spell_ident;
1906         else
1907           spelling = TOKEN_NAME (token);
1908
1909         ufputs (spelling, fp);
1910       }
1911       break;
1912
1913     case SPELL_IDENT:
1914       spell_ident:
1915       ufputs (token->val.node->name, fp);
1916       break;
1917
1918     case SPELL_STRING:
1919       {
1920         int left, right, tag;
1921         switch (token->type)
1922           {
1923           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1924           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1925           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1926           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1927           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1928           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1929           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1930           }
1931         if (tag) putc (tag, fp);
1932         if (left) putc (left, fp);
1933         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1934         if (right) putc (right, fp);
1935       }
1936       break;
1937
1938     case SPELL_CHAR:
1939       putc (token->val.aux, fp);
1940       break;
1941
1942     case SPELL_NONE:
1943       /* Placemarker or EOF - no output.  (Macro args are handled
1944          elsewhere.  */
1945       break;
1946     }
1947 }
1948
1949 /* Dump the original user's spelling of argument index ARG_NO to the
1950    macro whose expansion is LIST.  */
1951 static void
1952 dump_param_spelling (fp, list, arg_no)
1953      FILE *fp;
1954      const cpp_toklist *list;
1955      unsigned int arg_no;
1956 {
1957   const U_CHAR *param = list->namebuf;
1958
1959   while (arg_no--)
1960     param += ustrlen (param) + 1;
1961   ufputs (param, fp);
1962 }
1963
1964 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1965 void
1966 cpp_output_list (pfile, fp, list, token)
1967      cpp_reader *pfile;
1968      FILE *fp;
1969      const cpp_toklist *list;
1970      const cpp_token *token;
1971 {
1972   const cpp_token *limit = list->tokens + list->tokens_used;
1973   const cpp_token *prev = 0;
1974   int white = 0;
1975
1976   while (token < limit)
1977     {
1978       /* XXX Find some way we can write macro args from inside
1979          output_token/spell_token.  */
1980       if (token->type == CPP_MACRO_ARG)
1981         {
1982           if (white && token->flags & PREV_WHITE)
1983             putc (' ', fp);
1984           if (token->flags & STRINGIFY_ARG)
1985             putc ('#', fp);
1986           dump_param_spelling (fp, list, token->val.aux);
1987         }
1988       else
1989         output_token (pfile, fp, token, prev, white);
1990       if (token->flags & PASTE_LEFT)
1991         fputs (" ##", fp);
1992       prev = token;
1993       token++;
1994       white = 1;
1995     }
1996 }
1997
1998
1999 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
2000    already contain the enough space to hold the token's spelling.
2001    Returns a pointer to the character after the last character
2002    written.  */
2003
2004 static unsigned char *
2005 spell_token (pfile, token, buffer)
2006      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
2007      const cpp_token *token;
2008      unsigned char *buffer;
2009 {
2010   switch (TOKEN_SPELL (token))
2011     {
2012     case SPELL_OPERATOR:
2013       {
2014         const unsigned char *spelling;
2015         unsigned char c;
2016
2017         if (token->flags & DIGRAPH)
2018           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
2019         else if (token->flags & NAMED_OP)
2020           goto spell_ident;
2021         else
2022           spelling = TOKEN_NAME (token);
2023
2024         while ((c = *spelling++) != '\0')
2025           *buffer++ = c;
2026       }
2027       break;
2028
2029     case SPELL_IDENT:
2030       spell_ident:
2031       memcpy (buffer, token->val.node->name, token->val.node->length);
2032       buffer += token->val.node->length;
2033       break;
2034
2035     case SPELL_STRING:
2036       {
2037         int left, right, tag;
2038         switch (token->type)
2039           {
2040           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
2041           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
2042           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
2043           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
2044           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
2045           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
2046           default:              left = '\0'; right = '\0'; tag = '\0'; break;
2047           }
2048         if (tag) *buffer++ = tag;
2049         if (left) *buffer++ = left;
2050         memcpy (buffer, token->val.str.text, token->val.str.len);
2051         buffer += token->val.str.len;
2052         if (right) *buffer++ = right;
2053       }
2054       break;
2055
2056     case SPELL_CHAR:
2057       *buffer++ = token->val.aux;
2058       break;
2059
2060     case SPELL_NONE:
2061       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
2062       break;
2063     }
2064
2065   return buffer;
2066 }
2067
2068 /* Macro expansion algorithm.
2069
2070 Macro expansion is implemented by a single-pass algorithm; there are
2071 no rescan passes involved.  cpp_get_token expands just enough to be
2072 able to return a token to the caller, a consequence is that when it
2073 returns the preprocessor can be in a state of mid-expansion.  The
2074 algorithm does not work by fully expanding a macro invocation into
2075 some kind of token list, and then returning them one by one.
2076
2077 Our expansion state is recorded in a context stack.  We start out with
2078 a single context on the stack, let's call it base context.  This
2079 consists of the token list returned by lex_line that forms the next
2080 logical line in the source file.
2081
2082 The current level in the context stack is stored in the cur_context
2083 member of the cpp_reader structure.  The context it references keeps,
2084 amongst other things, a count of how many tokens form that context and
2085 our position within those tokens.
2086
2087 Fundamentally, calling cpp_get_token will return the next token from
2088 the current context.  If we're at the end of the current context, that
2089 context is popped from the stack first, unless it is the base context,
2090 in which case the next logical line is lexed from the source file.
2091
2092 However, before returning the token, if it is a CPP_NAME token
2093 _cpp_get_token checks to see if it is a macro and if it is enabled.
2094 Each time it encounters a macro name, it calls push_macro_context.
2095 This function checks that the macro should be expanded (with
2096 is_macro_enabled), and if so pushes a new macro context on the stack
2097 which becomes the current context.  It then loops back to read the
2098 first token of the macro context.
2099
2100 A macro context basically consists of the token list representing the
2101 macro's replacement list, which was saved in the hash table by
2102 save_macro_expansion when its #define statement was parsed.  If the
2103 macro is function-like, it also contains the tokens that form the
2104 arguments to the macro.  I say more about macro arguments below, but
2105 for now just saying that each argument is a set of pointers to tokens
2106 is enough.
2107
2108 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2109 token.  This represents an argument passed to the macro, with the
2110 argument number stored in the token's AUX field.  The argument should
2111 be substituted, this is achieved by pushing an "argument context".  An
2112 argument context is just refers to the tokens forming the argument,
2113 which are obtained directly from the macro context.  The STRINGIFY
2114 flag on a CPP_MACRO_ARG token indicates that the argument should be
2115 stringified.
2116
2117 Here's a few simple rules the context stack obeys:-
2118
2119   1) The lex_line token list is always context zero.
2120
2121   2) Context 1, if it exists, must be a macro context.
2122
2123   3) An argument context can only appear above a macro context.
2124
2125   4) A macro context can appear above the base context, another macro
2126   context, or an argument context.
2127
2128   5) These imply that the minimal level of an argument context is 2.
2129
2130 The only tricky thing left is ensuring that macros are enabled and
2131 disabled correctly.  The algorithm controls macro expansion by the
2132 level of the context a token is taken from in the context stack.  If a
2133 token is taken from a level equal to no_expand_level (a member of
2134 struct cpp_reader), no expansion is performed.
2135
2136 When popping a context off the stack, if no_expand_level equals the
2137 level of the popped context, it is reduced by one to match the new
2138 context level, so that expansion is still disabled.  It does not
2139 increase if a context is pushed, though.  It starts out life as
2140 UINT_MAX, which has the effect that initially macro expansion is
2141 enabled.  I explain how this mechanism works below.
2142
2143 The standard requires:-
2144
2145   1) Arguments to be fully expanded before substitution.
2146
2147   2) Stringified arguments to not be expanded, nor the tokens
2148   immediately surrounding a ## operator.
2149
2150   3) Continual rescanning until there are no more macros left to
2151   replace.
2152
2153   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2154   expanded again during later rescans.  This prevents infinite
2155   recursion.
2156
2157 The first thing to observe is that stage 3) is mostly redundant.
2158 Since a macro is disabled once it has been expanded, how can a rescan
2159 find an unexpanded macro name?  There are only two cases where this is
2160 possible:-
2161
2162   a) If the macro name results from a token paste operation.
2163
2164   b) If the macro in question is a function-like macro that hasn't
2165   already been expanded because previously there was not the required
2166   '(' token immediately following it.  This is only possible when an
2167   argument is substituted, and after substitution the last token of
2168   the argument can bind with a parenthesis appearing in the tokens
2169   following the substitution.  Note that if the '(' appears within the
2170   argument, the ')' must too, as expanding macro arguments cannot
2171   "suck in" tokens outside the argument.
2172
2173 So we tackle this as follows.  When parsing the macro invocation for
2174 arguments, we record the tokens forming each argument as a list of
2175 pointers to those tokens.  We do not expand any tokens that are "raw",
2176 i.e. directly from the macro invocation, but other tokens that come
2177 from (nested) argument substitution are fully expanded.
2178
2179 This is achieved by setting the no_expand_level to that of the macro
2180 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2181 forming an argument, because parse_args (indirectly) calls
2182 get_raw_token which automatically pushes argument contexts and traces
2183 into them.  Since these contexts are at a higher level than the
2184 no_expand_level, they get fully macro expanded.
2185
2186 "Raw" and non-raw tokens are separated in arguments by null pointers,
2187 with the policy that the initial state of an argument is raw.  If the
2188 first token is not raw, it should be preceded by a null pointer.  When
2189 tracing through the tokens of an argument context, each time
2190 get_raw_token encounters a null pointer, it toggles the flag
2191 CONTEXT_RAW.
2192
2193 This flag, when set, indicates to is_macro_disabled that we are
2194 reading raw tokens which should be macro-expanded.  Similarly, if
2195 clear, is_macro_disabled suppresses re-expansion.
2196
2197 It's probably time for an example.
2198
2199 #define hash #
2200 #define str(x) #x
2201 #define xstr(y) str(y hash)
2202 str(hash)                       // "hash"
2203 xstr(hash)                      // "# hash"
2204
2205 In the invocation of str, parse_args turns off macro expansion and so
2206 parses the argument as <hash>.  This is the only token (pointer)
2207 passed as the argument to str.  Since <hash> is raw there is no need
2208 for an initial null pointer.  stringify_arg is called from
2209 get_raw_token when tracing through the expansion of str, since the
2210 argument has the STRINGIFY flag set.  stringify_arg turns off
2211 macro_expansion by setting the no_expand_level to that of the argument
2212 context.  Thus it gets the token <hash> and stringifies it to "hash"
2213 correctly.
2214
2215 Similary xstr is passed <hash>.  However, when parse_args is parsing
2216 the invocation of str() in xstr's expansion, get_raw_token encounters
2217 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2218 an argument context, and enters the tokens of the argument,
2219 i.e. <hash>.  This is at a higher context level than parse_args
2220 disabled, and so is_macro_disabled permits expansion of it and a macro
2221 context is pushed on top of the argument context.  This contains the
2222 <#> token, and the end result is that <hash> is macro expanded.
2223 However, after popping off the argument context, the <hash> of xstr's
2224 expansion does not get macro expanded because we're back at the
2225 no_expand_level.  The end result is that the argument passed to str is
2226 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2227 raw, <#> is not raw, but then <hash> is.
2228
2229 */
2230
2231
2232 /* Free the storage allocated for macro arguments.  */
2233 static void
2234 free_macro_args (args)
2235      macro_args *args;
2236 {
2237   if (args->tokens)
2238     free ((PTR) args->tokens);
2239   free (args->ends);
2240   free (args);
2241 }
2242
2243 /* Determines if a macro has been already used (and is therefore
2244    disabled).  */
2245 static int
2246 is_macro_disabled (pfile, expansion, token)
2247      cpp_reader *pfile;
2248      const cpp_toklist *expansion;
2249      const cpp_token *token;
2250 {
2251   cpp_context *context = CURRENT_CONTEXT (pfile);
2252
2253   /* Don't expand anything if this file has already been preprocessed.  */
2254   if (CPP_OPTION (pfile, preprocessed))
2255     return 1;
2256
2257   /* Arguments on either side of ## are inserted in place without
2258      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2259      occurs during a later rescan pass.  The effect is that we expand
2260      iff we would as part of the macro's expansion list, so we should
2261      drop to the macro's context.  */
2262   if (IS_ARG_CONTEXT (context))
2263     {
2264       if (token->flags & PASTED)
2265         context--;
2266       else if (!(context->flags & CONTEXT_RAW))
2267         return 1;
2268       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2269         context--;
2270     }
2271
2272   /* Have we already used this macro?  */
2273   while (context->level > 0)
2274     {
2275       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2276         return 1;
2277       /* Raw argument tokens are judged based on the token list they
2278          came from.  */
2279       if (context->flags & CONTEXT_RAW)
2280         context = pfile->contexts + context->level;
2281       else
2282         context--;
2283     }
2284
2285   /* Function-like macros may be disabled if the '(' is not in the
2286      current context.  We check this without disrupting the context
2287      stack.  */
2288   if (expansion->paramc >= 0)
2289     {
2290       const cpp_token *next;
2291       unsigned int prev_nme;
2292
2293       context = CURRENT_CONTEXT (pfile);
2294       /* Drop down any contexts we're at the end of: the '(' may
2295          appear in lower macro expansions, or in the rest of the file.  */
2296       while (context->posn == context->count && context > pfile->contexts)
2297         {
2298           context--;
2299           /* If we matched, we are disabled, as we appear in the
2300              expansion of each macro we meet.  */
2301           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2302             return 1;
2303         }
2304
2305       prev_nme = pfile->no_expand_level;
2306       pfile->no_expand_level = context - pfile->contexts;
2307       next = _cpp_get_token (pfile);
2308       restore_macro_expansion (pfile, prev_nme);
2309       if (next->type != CPP_OPEN_PAREN)
2310         {
2311           _cpp_push_token (pfile, next);
2312           if (CPP_WTRADITIONAL (pfile))
2313             cpp_warning (pfile,
2314          "function macro %s must be used with arguments in traditional C",
2315                          token->val.node->name);
2316           return 1;
2317         }
2318     }
2319
2320   return 0;
2321 }
2322
2323 /* Add a token to the set of tokens forming the arguments to the macro
2324    being parsed in parse_args.  */
2325 static void
2326 save_token (args, token)
2327      macro_args *args;
2328      const cpp_token *token;
2329 {
2330   if (args->used == args->capacity)
2331     {
2332       args->capacity += args->capacity + 100;
2333       args->tokens = (const cpp_token **)
2334         xrealloc ((PTR) args->tokens,
2335                   args->capacity * sizeof (const cpp_token *));
2336     }
2337   args->tokens[args->used++] = token;
2338 }
2339
2340 /* Take and save raw tokens until we finish one argument.  Empty
2341    arguments are saved as a single CPP_PLACEMARKER token.  */
2342 static const cpp_token *
2343 parse_arg (pfile, var_args, paren_context, args, pcount)
2344      cpp_reader *pfile;
2345      int var_args;
2346      unsigned int paren_context;
2347      macro_args *args;
2348      unsigned int *pcount;
2349 {
2350   const cpp_token *token;
2351   unsigned int paren = 0, count = 0;
2352   int raw, was_raw = 1;
2353
2354   for (count = 0;; count++)
2355     {
2356       token = _cpp_get_token (pfile);
2357
2358       switch (token->type)
2359         {
2360         default:
2361           break;
2362
2363         case CPP_OPEN_PAREN:
2364           paren++;
2365           break;
2366
2367         case CPP_CLOSE_PAREN:
2368           if (paren-- != 0)
2369             break;
2370           goto out;
2371
2372         case CPP_COMMA:
2373           /* Commas are not terminators within parantheses or var_args.  */
2374           if (paren || var_args)
2375             break;
2376           goto out;
2377
2378         case CPP_EOF:           /* Error reported by caller.  */
2379           goto out;
2380         }
2381
2382       raw = pfile->cur_context <= paren_context;
2383       if (raw != was_raw)
2384         {
2385           was_raw = raw;
2386           save_token (args, 0);
2387           count++;
2388         }
2389       save_token (args, token);
2390     }
2391
2392  out:
2393   if (count == 0)
2394     {
2395       /* Duplicate the placemarker.  Then we can set its flags and
2396          position and safely be using more than one.  */
2397       save_token (args, duplicate_token (pfile, &placemarker_token));
2398       count++;
2399     }
2400
2401   *pcount = count;
2402   return token;
2403 }
2404
2405 /* This macro returns true if the argument starting at offset O of arglist
2406    A is empty - that is, it's either a single PLACEMARKER token, or a null
2407    pointer followed by a PLACEMARKER.  */
2408
2409 #define empty_argument(A, O) \
2410  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2411                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2412
2413 /* Parse the arguments making up a macro invocation.  Nested arguments
2414    are automatically macro expanded, but immediate macros are not
2415    expanded; this enables e.g. operator # to work correctly.  Returns
2416    non-zero on error.  */
2417 static int
2418 parse_args (pfile, hp, args)
2419      cpp_reader *pfile;
2420      cpp_hashnode *hp;
2421      macro_args *args;
2422 {
2423   const cpp_token *token;
2424   const cpp_toklist *macro;
2425   unsigned int total = 0;
2426   unsigned int paren_context = pfile->cur_context;
2427   int argc = 0;
2428
2429   macro = hp->value.expansion;
2430   do
2431     {
2432       unsigned int count;
2433
2434       token = parse_arg (pfile, (argc + 1 == macro->paramc
2435                                  && (macro->flags & VAR_ARGS)),
2436                          paren_context, args, &count);
2437       if (argc < macro->paramc)
2438         {
2439           total += count;
2440           args->ends[argc] = total;
2441         }
2442       argc++;
2443     }
2444   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2445
2446   if (token->type == CPP_EOF)
2447     {
2448       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2449       return 1;
2450     }
2451   else if (argc < macro->paramc)
2452     {
2453       /* A rest argument is allowed to not appear in the invocation at all.
2454          e.g. #define debug(format, args...) ...
2455          debug("string");
2456          This is exactly the same as if the rest argument had received no
2457          tokens - debug("string",);  This extension is deprecated.  */
2458
2459       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2460         {
2461           /* Duplicate the placemarker.  Then we can set its flags and
2462              position and safely be using more than one.  */
2463           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2464           pm->flags = VOID_REST;
2465           save_token (args, pm);
2466           args->ends[argc] = total + 1;
2467
2468           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2469             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2470
2471           return 0;
2472         }
2473       else
2474         {
2475           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2476           return 1;
2477         }
2478     }
2479   /* An empty argument to an empty function-like macro is fine.  */
2480   else if (argc > macro->paramc
2481            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2482     {
2483       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2484       return 1;
2485     }
2486
2487   return 0;
2488 }
2489
2490 /* Adds backslashes before all backslashes and double quotes appearing
2491    in strings.  Non-printable characters are converted to octal.  */
2492 static U_CHAR *
2493 quote_string (dest, src, len)
2494      U_CHAR *dest;
2495      const U_CHAR *src;
2496      unsigned int len;
2497 {
2498   while (len--)
2499     {
2500       U_CHAR c = *src++;
2501
2502       if (c == '\\' || c == '"')
2503         {
2504           *dest++ = '\\';
2505           *dest++ = c;
2506         }
2507       else
2508         {
2509           if (ISPRINT (c))
2510             *dest++ = c;
2511           else
2512             {
2513               sprintf ((char *) dest, "\\%03o", c);
2514               dest += 4;
2515             }
2516         }
2517     }
2518
2519   return dest;
2520 }
2521
2522 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2523    CPP_STRING token containing TEXT in quoted form.  */
2524 static cpp_token *
2525 make_string_token (token, text, len)
2526      cpp_token *token;
2527      const U_CHAR *text;
2528      unsigned int len;
2529 {
2530   U_CHAR *buf;
2531
2532   buf = (U_CHAR *) xmalloc (len * 4);
2533   token->type = CPP_STRING;
2534   token->flags = 0;
2535   token->val.str.text = buf;
2536   token->val.str.len = quote_string (buf, text, len) - buf;
2537   return token;
2538 }
2539
2540 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2541    evaluating to NUMBER.  */
2542 static cpp_token *
2543 alloc_number_token (pfile, number)
2544      cpp_reader *pfile;
2545      int number;
2546 {
2547   cpp_token *result;
2548   char *buf;
2549
2550   result = get_temp_token (pfile);
2551   buf = xmalloc (20);
2552   sprintf (buf, "%d", number);
2553
2554   result->type = CPP_NUMBER;
2555   result->flags = 0;
2556   result->val.str.text = (U_CHAR *) buf;
2557   result->val.str.len = strlen (buf);
2558   return result;
2559 }
2560
2561 /* Returns a temporary token from the temporary token store of PFILE.  */
2562 static cpp_token *
2563 get_temp_token (pfile)
2564      cpp_reader *pfile;
2565 {
2566   if (pfile->temp_used == pfile->temp_alloced)
2567     {
2568       if (pfile->temp_used == pfile->temp_cap)
2569         {
2570           pfile->temp_cap += pfile->temp_cap + 20;
2571           pfile->temp_tokens = (cpp_token **) xrealloc
2572             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2573         }
2574       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2575         (sizeof (cpp_token));
2576     }
2577
2578   return pfile->temp_tokens[pfile->temp_used++];
2579 }
2580
2581 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2582 static void
2583 release_temp_tokens (pfile)
2584      cpp_reader *pfile;
2585 {
2586   while (pfile->temp_used)
2587     {
2588       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2589
2590       if (TOKEN_SPELL (token) == SPELL_STRING)
2591         {
2592           free ((char *) token->val.str.text);
2593           token->val.str.text = 0;
2594         }
2595     }
2596 }
2597
2598 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2599 void
2600 _cpp_free_temp_tokens (pfile)
2601      cpp_reader *pfile;
2602 {
2603   if (pfile->temp_tokens)
2604     {
2605       /* It is possible, though unlikely (looking for '(' of a funlike
2606          macro into EOF), that we haven't released the tokens yet.  */
2607       release_temp_tokens (pfile);
2608       while (pfile->temp_alloced)
2609         free (pfile->temp_tokens[--pfile->temp_alloced]);
2610       free (pfile->temp_tokens);
2611     }
2612
2613   if (pfile->date)
2614     {
2615       free ((char *) pfile->date->val.str.text);
2616       free (pfile->date);
2617       free ((char *) pfile->time->val.str.text);
2618       free (pfile->time);
2619     }
2620 }
2621
2622 /* Copy TOKEN into a temporary token from PFILE's store.  */
2623 static cpp_token *
2624 duplicate_token (pfile, token)
2625      cpp_reader *pfile;
2626      const cpp_token *token;
2627 {
2628   cpp_token *result = get_temp_token (pfile);
2629
2630   *result = *token;
2631   if (TOKEN_SPELL (token) == SPELL_STRING)
2632     {
2633       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2634       memcpy (buff, token->val.str.text, token->val.str.len);
2635       result->val.str.text = buff;
2636     }
2637   return result;
2638 }
2639
2640 /* Determine whether two tokens can be pasted together, and if so,
2641    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2642    be pasted, or the appropriate type for the merged token if they
2643    can.  */
2644 static enum cpp_ttype
2645 can_paste (pfile, token1, token2, digraph)
2646      cpp_reader * pfile;
2647      const cpp_token *token1, *token2;
2648      int* digraph;
2649 {
2650   enum cpp_ttype a = token1->type, b = token2->type;
2651   int cxx = CPP_OPTION (pfile, cplusplus);
2652
2653   /* Treat named operators as if they were ordinary NAMEs.  */
2654   if (token1->flags & NAMED_OP)
2655     a = CPP_NAME;
2656   if (token2->flags & NAMED_OP)
2657     b = CPP_NAME;
2658
2659   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2660     return a + (CPP_EQ_EQ - CPP_EQ);
2661
2662   switch (a)
2663     {
2664     case CPP_GREATER:
2665       if (b == a) return CPP_RSHIFT;
2666       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2667       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2668       break;
2669     case CPP_LESS:
2670       if (b == a) return CPP_LSHIFT;
2671       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2672       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2673       if (CPP_OPTION (pfile, digraphs))
2674         {
2675           if (b == CPP_COLON)
2676             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2677           if (b == CPP_MOD)
2678             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2679         }
2680       break;
2681
2682     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2683     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2684     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2685
2686     case CPP_MINUS:
2687       if (b == a)               return CPP_MINUS_MINUS;
2688       if (b == CPP_GREATER)     return CPP_DEREF;
2689       break;
2690     case CPP_COLON:
2691       if (b == a && cxx)        return CPP_SCOPE;
2692       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2693         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2694       break;
2695
2696     case CPP_MOD:
2697       if (CPP_OPTION (pfile, digraphs))
2698         {
2699           if (b == CPP_GREATER)
2700             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2701           if (b == CPP_COLON)
2702             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2703         }
2704       break;
2705     case CPP_DEREF:
2706       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2707       break;
2708     case CPP_DOT:
2709       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2710       if (b == CPP_NUMBER)      return CPP_NUMBER;
2711       break;
2712
2713     case CPP_HASH:
2714       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2715         /* %:%: digraph */
2716         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2717       break;
2718
2719     case CPP_NAME:
2720       if (b == CPP_NAME)        return CPP_NAME;
2721       if (b == CPP_NUMBER
2722           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2723       if (b == CPP_CHAR
2724           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2725       if (b == CPP_STRING
2726           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2727       break;
2728
2729     case CPP_NUMBER:
2730       if (b == CPP_NUMBER)      return CPP_NUMBER;
2731       if (b == CPP_NAME)        return CPP_NUMBER;
2732       if (b == CPP_DOT)         return CPP_NUMBER;
2733       /* Numbers cannot have length zero, so this is safe.  */
2734       if ((b == CPP_PLUS || b == CPP_MINUS)
2735           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2736         return CPP_NUMBER;
2737       break;
2738
2739     case CPP_OTHER:
2740       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2741         {
2742           if (b == CPP_NAME)    return CPP_NAME;
2743           if (b == CPP_STRING)  return CPP_OSTRING;
2744         }
2745
2746     default:
2747       break;
2748     }
2749
2750   return CPP_EOF;
2751 }
2752
2753 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2754 static const cpp_token *
2755 maybe_paste_with_next (pfile, token)
2756      cpp_reader *pfile;
2757      const cpp_token *token;
2758 {
2759   cpp_token *pasted;
2760   const cpp_token *second;
2761   cpp_context *context = CURRENT_CONTEXT (pfile);
2762
2763   /* Is this token on the LHS of ## ? */
2764
2765   while ((token->flags & PASTE_LEFT)
2766          || ((context->flags & CONTEXT_PASTEL)
2767              && context->posn == context->count))
2768     {
2769       /* Suppress macro expansion for next token, but don't conflict
2770          with the other method of suppression.  If it is an argument,
2771          macro expansion within the argument will still occur.  */
2772       pfile->paste_level = pfile->cur_context;
2773       second = _cpp_get_token (pfile);
2774       pfile->paste_level = 0;
2775
2776       /* Ignore placemarker argument tokens (cannot be from an empty
2777          macro since macros are not expanded).  */
2778       if (token->type == CPP_PLACEMARKER)
2779         pasted = duplicate_token (pfile, second);
2780       else if (second->type == CPP_PLACEMARKER)
2781         {
2782           /* GCC has special extended semantics for , ## b where b is
2783              a varargs parameter: the comma disappears if b was given
2784              no actual arguments (not merely if b is an empty
2785              argument).  */
2786           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2787             pasted = duplicate_token (pfile, second);
2788           else
2789             pasted = duplicate_token (pfile, token);
2790         }
2791       else
2792         {
2793           int digraph = 0;
2794           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2795
2796           if (type == CPP_EOF)
2797             {
2798               if (CPP_OPTION (pfile, warn_paste))
2799                 {
2800                   /* Do not complain about , ## <whatever> if
2801                      <whatever> came from a variable argument, because
2802                      the author probably intended the ## to trigger
2803                      the special extended semantics (see above).  */
2804                   if (token->type == CPP_COMMA
2805                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2806                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2807                     /* no warning */;
2808                   else
2809                     cpp_warning (pfile,
2810                         "pasting would not give a valid preprocessing token");
2811                 }
2812               _cpp_push_token (pfile, second);
2813               return token;
2814             }
2815
2816           if (type == CPP_NAME || type == CPP_NUMBER)
2817             {
2818               /* Join spellings.  */
2819               U_CHAR *buf, *end;
2820
2821               pasted = get_temp_token (pfile);
2822               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2823               end = spell_token (pfile, token, buf);
2824               end = spell_token (pfile, second, end);
2825               *end = '\0';
2826
2827               if (type == CPP_NAME)
2828                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2829               else
2830                 {
2831                   pasted->val.str.text = uxstrdup (buf);
2832                   pasted->val.str.len = end - buf;
2833                 }
2834             }
2835           else if (type == CPP_WCHAR || type == CPP_WSTRING
2836                    || type == CPP_OSTRING)
2837             pasted = duplicate_token (pfile, second);
2838           else
2839             {
2840               pasted = get_temp_token (pfile);
2841               pasted->val.integer = 0;
2842             }
2843
2844           pasted->type = type;
2845           pasted->flags = digraph ? DIGRAPH : 0;
2846
2847           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2848             {
2849               pasted->type = pasted->val.node->value.code;
2850               pasted->flags |= NAMED_OP;
2851             }
2852         }
2853
2854       /* The pasted token gets the whitespace flags and position of the
2855          first token, the PASTE_LEFT flag of the second token, plus the
2856          PASTED flag to indicate it is the result of a paste.  However, we
2857          want to preserve the DIGRAPH flag.  */
2858       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2859       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2860                         | (second->flags & PASTE_LEFT) | PASTED);
2861       pasted->col = token->col;
2862       pasted->line = token->line;
2863
2864       /* See if there is another token to be pasted onto the one we just
2865          constructed.  */
2866       token = pasted;
2867       context = CURRENT_CONTEXT (pfile);
2868       /* and loop */
2869     }
2870   return token;
2871 }
2872
2873 /* Convert a token sequence to a single string token according to the
2874    rules of the ISO C #-operator.  */
2875 #define INIT_SIZE 200
2876 static cpp_token *
2877 stringify_arg (pfile, token)
2878      cpp_reader *pfile;
2879      const cpp_token *token;
2880 {
2881   cpp_token *result;
2882   unsigned char *main_buf;
2883   unsigned int prev_value, backslash_count = 0;
2884   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2885
2886   push_arg_context (pfile, token);
2887   prev_value  = prevent_macro_expansion (pfile);
2888   main_buf = (unsigned char *) xmalloc (buf_cap);
2889
2890   result = get_temp_token (pfile);
2891   ASSIGN_FLAGS_AND_POS (result, token);
2892
2893   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2894     {
2895       int escape;
2896       unsigned char *buf;
2897       unsigned int len = TOKEN_LEN (token);
2898
2899       if (token->type == CPP_PLACEMARKER)
2900         continue;
2901
2902       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2903                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2904       if (escape)
2905         len *= 4 + 1;
2906
2907       if (buf_used + len > buf_cap)
2908         {
2909           buf_cap = buf_used + len + INIT_SIZE;
2910           main_buf = xrealloc (main_buf, buf_cap);
2911         }
2912
2913       if (whitespace && (token->flags & PREV_WHITE))
2914         main_buf[buf_used++] = ' ';
2915
2916       if (escape)
2917         buf = (unsigned char *) xmalloc (len);
2918       else
2919         buf = main_buf + buf_used;
2920
2921       len = spell_token (pfile, token, buf) - buf;
2922       if (escape)
2923         {
2924           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2925           free (buf);
2926         }
2927       else
2928         buf_used += len;
2929
2930       whitespace = 1;
2931       if (token->type == CPP_BACKSLASH)
2932         backslash_count++;
2933       else
2934         backslash_count = 0;
2935     }
2936
2937   /* Ignore the final \ of invalid string literals.  */
2938   if (backslash_count & 1)
2939     {
2940       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2941       buf_used--;
2942     }
2943
2944   result->type = CPP_STRING;
2945   result->val.str.text = main_buf;
2946   result->val.str.len = buf_used;
2947   restore_macro_expansion (pfile, prev_value);
2948   return result;
2949 }
2950
2951 /* Allocate more room on the context stack of PFILE.  */
2952 static void
2953 expand_context_stack (pfile)
2954      cpp_reader *pfile;
2955 {
2956   pfile->context_cap += pfile->context_cap + 20;
2957   pfile->contexts = (cpp_context *)
2958     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2959 }
2960
2961 /* Push the context of macro NODE onto the context stack.  TOKEN is
2962    the CPP_NAME token invoking the macro.  */
2963 static int
2964 push_macro_context (pfile, token)
2965      cpp_reader *pfile;
2966      const cpp_token *token;
2967 {
2968   unsigned char orig_flags;
2969   macro_args *args;
2970   cpp_context *context;
2971   cpp_hashnode *node = token->val.node;
2972
2973   /* Token's flags may change when parsing args containing a nested
2974      invocation of this macro.  */
2975   orig_flags = token->flags & (PREV_WHITE | BOL);
2976   args = 0;
2977   if (node->value.expansion->paramc >= 0)
2978     {
2979       unsigned int error, prev_nme;
2980
2981       /* Allocate room for the argument contexts, and parse them.  */
2982       args  = (macro_args *) xmalloc (sizeof (macro_args));
2983       args->ends = (unsigned int *)
2984         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2985       args->tokens = 0;
2986       args->capacity = 0;
2987       args->used = 0;
2988       args->level = pfile->cur_context;
2989
2990       prev_nme = prevent_macro_expansion (pfile);
2991       pfile->args = args;
2992       error = parse_args (pfile, node, args);
2993       pfile->args = 0;
2994       restore_macro_expansion (pfile, prev_nme);
2995       if (error)
2996         {
2997           free_macro_args (args);
2998           return 1;
2999         }
3000     }
3001
3002   /* Now push its context.  */
3003   pfile->cur_context++;
3004   if (pfile->cur_context == pfile->context_cap)
3005     expand_context_stack (pfile);
3006
3007   context = CURRENT_CONTEXT (pfile);
3008   context->u.list = node->value.expansion;
3009   context->args = args;
3010   context->posn = 0;
3011   context->count = context->u.list->tokens_used;
3012   context->level = pfile->cur_context;
3013   context->flags = 0;
3014   context->pushed_token = 0;
3015
3016   /* Set the flags of the first token.  We know there must
3017      be one, empty macros are a single placemarker token.  */
3018   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
3019
3020   return 0;
3021 }
3022
3023 /* Push an argument to the current macro onto the context stack.
3024    TOKEN is the MACRO_ARG token representing the argument expansion.  */
3025 static void
3026 push_arg_context (pfile, token)
3027      cpp_reader *pfile;
3028      const cpp_token *token;
3029 {
3030   cpp_context *context;
3031   macro_args *args;
3032
3033   pfile->cur_context++;
3034   if (pfile->cur_context == pfile->context_cap)
3035       expand_context_stack (pfile);
3036
3037   context = CURRENT_CONTEXT (pfile);
3038   args = context[-1].args;
3039
3040   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
3041   context->u.arg = args->tokens + context->count;
3042   context->count = args->ends[token->val.aux] - context->count;
3043   context->args = 0;
3044   context->posn = 0;
3045   context->level = args->level;
3046   context->flags = CONTEXT_ARG | CONTEXT_RAW;
3047   context->pushed_token = 0;
3048
3049   /* Set the flags of the first token.  There is one.  */
3050   {
3051     const cpp_token *first = context->u.arg[0];
3052     if (!first)
3053       first = context->u.arg[1];
3054
3055     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3056                           token->flags & (PREV_WHITE | BOL));
3057   }
3058
3059   if (token->flags & PASTE_LEFT)
3060     context->flags |= CONTEXT_PASTEL;
3061   if (pfile->paste_level)
3062     context->flags |= CONTEXT_PASTER;
3063 }
3064
3065 /* "Unget" a token.  It is effectively inserted in the token queue and
3066    will be returned by the next call to get_raw_token.  */
3067 void
3068 _cpp_push_token (pfile, token)
3069      cpp_reader *pfile;
3070      const cpp_token *token;
3071 {
3072   cpp_context *context = CURRENT_CONTEXT (pfile);
3073
3074   if (context->posn > 0)
3075     {
3076       const cpp_token *prev;
3077       if (IS_ARG_CONTEXT (context))
3078         prev = context->u.arg[context->posn - 1];
3079       else
3080         prev = &context->u.list->tokens[context->posn - 1];
3081
3082       if (prev == token)
3083         {
3084           context->posn--;
3085           return;
3086         }
3087     }
3088
3089   if (context->pushed_token)
3090     cpp_ice (pfile, "two tokens pushed in a row");
3091   if (token->type != CPP_EOF)
3092     context->pushed_token = token;
3093   /* Don't push back a directive's CPP_EOF, step back instead.  */
3094   else if (pfile->cur_context == 0)
3095     pfile->contexts[0].posn--;
3096 }
3097
3098 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3099    introducing the directive.  */
3100 static void
3101 process_directive (pfile, token)
3102      cpp_reader *pfile;
3103      const cpp_token *token;
3104 {
3105   const struct directive *d = pfile->token_list.directive;
3106   int prev_nme = 0;
3107
3108   /* Skip over the directive name.  */
3109   if (token[1].type == CPP_NAME)
3110     _cpp_get_raw_token (pfile);
3111   else if (token[1].type != CPP_NUMBER)
3112     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
3113
3114   if (! (d->flags & EXPAND))
3115     prev_nme = prevent_macro_expansion (pfile);
3116   (void) (*d->handler) (pfile);
3117   if (! (d->flags & EXPAND))
3118     restore_macro_expansion (pfile, prev_nme);
3119   _cpp_skip_rest_of_line (pfile);
3120 }
3121
3122 /* The external interface to return the next token.  All macro
3123    expansion and directive processing is handled internally, the
3124    caller only ever sees the output after preprocessing.  */
3125 const cpp_token *
3126 cpp_get_token (pfile)
3127      cpp_reader *pfile;
3128 {
3129   const cpp_token *token;
3130   /* Loop till we hit a non-directive, non-placemarker token.  */
3131   for (;;)
3132     {
3133       token = _cpp_get_token (pfile);
3134
3135       if (token->type == CPP_PLACEMARKER)
3136         continue;
3137
3138       if (token->type == CPP_HASH && token->flags & BOL
3139           && pfile->token_list.directive)
3140         {
3141           process_directive (pfile, token);
3142           continue;
3143         }
3144
3145       return token;
3146     }
3147 }
3148
3149 /* The internal interface to return the next token.  There are two
3150    differences between the internal and external interfaces: the
3151    internal interface may return a PLACEMARKER token, and it does not
3152    process directives.  */
3153 const cpp_token *
3154 _cpp_get_token (pfile)
3155      cpp_reader *pfile;
3156 {
3157   const cpp_token *token, *old_token;
3158   cpp_hashnode *node;
3159
3160   /* Loop until we hit a non-macro token.  */
3161   for (;;)
3162     {
3163       token = get_raw_token (pfile);
3164
3165       /* Short circuit EOF. */
3166       if (token->type == CPP_EOF)
3167         return token;
3168
3169       /* If we are skipping... */
3170       if (pfile->skipping)
3171         {
3172           /* we still have to process directives,  */
3173           if (pfile->token_list.directive)
3174             return token;
3175
3176           /* but everything else is ignored.  */
3177           _cpp_skip_rest_of_line (pfile);
3178           continue;
3179         }
3180
3181       /* If there's a potential control macro and we get here, then that
3182          #ifndef didn't cover the entire file and its argument shouldn't
3183          be taken as a control macro.  */
3184       pfile->potential_control_macro = 0;
3185
3186       old_token = token;
3187
3188       /* See if there's a token to paste with this one.  */
3189       if (!pfile->paste_level)
3190         token = maybe_paste_with_next (pfile, token);
3191
3192       /* If it isn't a macro, return it now.  */
3193       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3194         return token;
3195
3196       /* Is macro expansion disabled in general, or are we in the
3197          middle of a token paste, or was this token just pasted?
3198          (Note we don't check token->flags & PASTED, because that
3199          counts tokens that were pasted at some point in the past,
3200          we're only interested in tokens that were pasted by this call
3201          to maybe_paste_with_next.)  */
3202       if (pfile->no_expand_level == pfile->cur_context
3203           || pfile->paste_level
3204           || (token != old_token
3205               && pfile->no_expand_level + 1 == pfile->cur_context))
3206         return token;
3207
3208       node = token->val.node;
3209       if (node->type != T_MACRO)
3210         return special_symbol (pfile, node, token);
3211
3212       if (is_macro_disabled (pfile, node->value.expansion, token))
3213         return token;
3214
3215       if (pfile->cur_context > CPP_STACK_MAX)
3216         {
3217           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3218           return token;
3219         }
3220
3221       if (push_macro_context (pfile, token))
3222         return token;
3223       /* else loop */
3224     }
3225 }
3226
3227 /* Returns the next raw token, i.e. without performing macro
3228    expansion.  Argument contexts are automatically entered.  */
3229 static const cpp_token *
3230 get_raw_token (pfile)
3231      cpp_reader *pfile;
3232 {
3233   const cpp_token *result;
3234   cpp_context *context;
3235
3236   for (;;)
3237     {
3238       context = CURRENT_CONTEXT (pfile);
3239       if (context->pushed_token)
3240         {
3241           result = context->pushed_token;
3242           context->pushed_token = 0;
3243           return result;        /* Cannot be a CPP_MACRO_ARG */
3244         }
3245       else if (context->posn == context->count)
3246         {
3247           if (pop_context (pfile))
3248             return &eof_token;
3249           continue;
3250         }
3251       else if (IS_ARG_CONTEXT (context))
3252         {
3253           result = context->u.arg[context->posn++];
3254           if (result == 0)
3255             {
3256               context->flags ^= CONTEXT_RAW;
3257               result = context->u.arg[context->posn++];
3258             }
3259           return result;        /* Cannot be a CPP_MACRO_ARG */
3260         }
3261
3262       result = &context->u.list->tokens[context->posn++];
3263
3264       if (result->type != CPP_MACRO_ARG)
3265         return result;
3266
3267       if (result->flags & STRINGIFY_ARG)
3268         return stringify_arg (pfile, result);
3269
3270       push_arg_context (pfile, result);
3271     }
3272 }
3273
3274 /* Internal interface to get the token without macro expanding.  */
3275 const cpp_token *
3276 _cpp_get_raw_token (pfile)
3277      cpp_reader *pfile;
3278 {
3279   int prev_nme = prevent_macro_expansion (pfile);
3280   const cpp_token *result = _cpp_get_token (pfile);
3281   restore_macro_expansion (pfile, prev_nme);
3282   return result;
3283 }
3284
3285 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3286    list should be overwritten, or zero if we need to append
3287    (typically, if we are within the arguments to a macro, or looking
3288    for the '(' to start a function-like macro invocation).  */
3289 static int
3290 lex_next (pfile, clear)
3291      cpp_reader *pfile;
3292      int clear;
3293 {
3294   cpp_toklist *list = &pfile->token_list;
3295   const cpp_token *old_list = list->tokens;
3296   unsigned int old_used = list->tokens_used;
3297
3298   if (clear)
3299     {
3300       /* Release all temporary tokens.  */
3301       _cpp_clear_toklist (list);
3302       pfile->contexts[0].posn = 0;
3303       if (pfile->temp_used)
3304         release_temp_tokens (pfile);
3305     }
3306   lex_line (pfile, list);
3307   pfile->contexts[0].count = list->tokens_used;
3308
3309   if (!clear && pfile->args)
3310     {
3311       /* Fix up argument token pointers.  */
3312       if (old_list != list->tokens)
3313         {
3314           unsigned int i;
3315
3316           for (i = 0; i < pfile->args->used; i++)
3317             {
3318               const cpp_token *token = pfile->args->tokens[i];
3319               if (token >= old_list && token < old_list + old_used)
3320                 pfile->args->tokens[i] = (const cpp_token *)
3321                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3322             }
3323         }
3324
3325       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3326          tokens within the list of arguments that would otherwise act as
3327          preprocessing directives, the behavior is undefined.
3328
3329          This implementation will report a hard error and treat the
3330          'sequence of preprocessing tokens' as part of the macro argument,
3331          not a directive.
3332
3333          Note if pfile->args == 0, we're OK since we're only inside a
3334          macro argument after a '('.  */
3335       if (list->directive)
3336         {
3337           cpp_error_with_line (pfile, list->tokens[old_used].line,
3338                                list->tokens[old_used].col,
3339                                "#%s may not be used inside a macro argument",
3340                                list->directive->name);
3341           return 1;
3342         }
3343     }
3344
3345   return 0;
3346 }
3347
3348 /* Pops a context off the context stack.  If we're at the bottom, lexes
3349    the next logical line.  Returns EOF if we're at the end of the
3350    argument list to the # operator, or we should not "overflow"
3351    into the rest of the file (e.g. 6.10.3.1.1).  */
3352 static int
3353 pop_context (pfile)
3354      cpp_reader *pfile;
3355 {
3356   cpp_context *context;
3357
3358   if (pfile->cur_context == 0)
3359     {
3360       /* If we are currently processing a directive, do not advance.  6.10
3361          paragraph 2: A new-line character ends the directive even if it
3362          occurs within what would otherwise be an invocation of a
3363          function-like macro.  */
3364       if (pfile->token_list.directive)
3365         return 1;
3366
3367       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3368     }
3369
3370   /* Argument contexts, when parsing args or handling # operator
3371      return CPP_EOF at the end.  */
3372   context = CURRENT_CONTEXT (pfile);
3373   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3374     return 1;
3375
3376   /* Free resources when leaving macro contexts.  */
3377   if (context->args)
3378     free_macro_args (context->args);
3379
3380   if (pfile->cur_context == pfile->no_expand_level)
3381     pfile->no_expand_level--;
3382   pfile->cur_context--;
3383
3384   return 0;
3385 }
3386
3387 /* Turn off macro expansion at the current context level.  */
3388 static unsigned int
3389 prevent_macro_expansion (pfile)
3390      cpp_reader *pfile;
3391 {
3392   unsigned int prev_value = pfile->no_expand_level;
3393   pfile->no_expand_level = pfile->cur_context;
3394   return prev_value;
3395 }
3396
3397 /* Restore macro expansion to its previous state.  */
3398 static void
3399 restore_macro_expansion (pfile, prev_value)
3400      cpp_reader *pfile;
3401      unsigned int prev_value;
3402 {
3403   pfile->no_expand_level = prev_value;
3404 }
3405
3406 /* Used by cpperror.c to obtain the correct line and column to report
3407    in a diagnostic.  */
3408 unsigned int
3409 _cpp_get_line (pfile, pcol)
3410      cpp_reader *pfile;
3411      unsigned int *pcol;
3412 {
3413   unsigned int index;
3414   const cpp_token *cur_token;
3415
3416   if (pfile->in_lex_line)
3417     index = pfile->token_list.tokens_used;
3418   else
3419     index = pfile->contexts[0].posn;
3420
3421   if (index == 0)
3422     {
3423       if (pcol)
3424         *pcol = 0;
3425       return 0;
3426     }
3427
3428   cur_token = &pfile->token_list.tokens[index - 1];
3429   if (pcol)
3430     *pcol = cur_token->col;
3431   return cur_token->line;
3432 }
3433
3434 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3435 static const char * const monthnames[] =
3436 {
3437   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3438   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3439 };
3440
3441 /* Handle builtin macros like __FILE__.  */
3442 static const cpp_token *
3443 special_symbol (pfile, node, token)
3444      cpp_reader *pfile;
3445      cpp_hashnode *node;
3446      const cpp_token *token;
3447 {
3448   cpp_token *result;
3449   cpp_buffer *ip;
3450
3451   switch (node->type)
3452     {
3453     case T_FILE:
3454     case T_BASE_FILE:
3455       {
3456         const char *file;
3457
3458         ip = CPP_BUFFER (pfile);
3459         if (ip == 0)
3460           file = "";
3461         else
3462           {
3463             if (node->type == T_BASE_FILE)
3464               while (CPP_PREV_BUFFER (ip) != NULL)
3465                 ip = CPP_PREV_BUFFER (ip);
3466
3467             file = ip->nominal_fname;
3468           }
3469         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3470                                     strlen (file));
3471       }
3472       break;
3473
3474     case T_INCLUDE_LEVEL:
3475       /* pfile->include_depth counts the primary source as level 1,
3476          but historically __INCLUDE_DEPTH__ has called the primary
3477          source level 0.  */
3478       result = alloc_number_token (pfile, pfile->include_depth - 1);
3479       break;
3480
3481     case T_SPECLINE:
3482       /* If __LINE__ is embedded in a macro, it must expand to the
3483          line of the macro's invocation, not its definition.
3484          Otherwise things like assert() will not work properly.  */
3485       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3486       break;
3487
3488     case T_STDC:
3489       {
3490         int stdc = 1;
3491
3492 #ifdef STDC_0_IN_SYSTEM_HEADERS
3493         if (CPP_IN_SYSTEM_HEADER (pfile)
3494             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3495           stdc = 0;
3496 #endif
3497         result = alloc_number_token (pfile, stdc);
3498       }
3499       break;
3500
3501     case T_DATE:
3502     case T_TIME:
3503       if (pfile->date == 0)
3504         {
3505           /* Allocate __DATE__ and __TIME__ from permanent storage,
3506              and save them in pfile so we don't have to do this again.
3507              We don't generate these strings at init time because
3508              time() and localtime() are very slow on some systems.  */
3509           time_t tt = time (NULL);
3510           struct tm *tb = localtime (&tt);
3511
3512           pfile->date = make_string_token
3513             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3514           pfile->time = make_string_token
3515             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3516
3517           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3518                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3519           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3520                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3521         }
3522       result = node->type == T_DATE ? pfile->date: pfile->time;
3523       break;
3524
3525     case T_POISON:
3526       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3527       return token;
3528
3529     default:
3530       cpp_ice (pfile, "invalid special hash type");
3531       return token;
3532     }
3533
3534   ASSIGN_FLAGS_AND_POS (result, token);
3535   return result;
3536 }
3537 #undef DSC
3538
3539 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3540    if it hasn't happened already.  */
3541
3542 void
3543 _cpp_init_input_buffer (pfile)
3544      cpp_reader *pfile;
3545 {
3546   cpp_context *base;
3547
3548   init_trigraph_map ();
3549   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3550   pfile->no_expand_level = UINT_MAX;
3551   pfile->context_cap = 20;
3552   pfile->cur_context = 0;
3553
3554   pfile->contexts = (cpp_context *)
3555     xmalloc (pfile->context_cap * sizeof (cpp_context));
3556
3557   /* Clear the base context.  */
3558   base = &pfile->contexts[0];
3559   base->u.list = &pfile->token_list;
3560   base->posn = 0;
3561   base->count = 0;
3562   base->args = 0;
3563   base->level = 0;
3564   base->flags = 0;
3565   base->pushed_token = 0;
3566 }
3567
3568 /* Moves to the end of the directive line, popping contexts as
3569    necessary.  */
3570 void
3571 _cpp_skip_rest_of_line (pfile)
3572      cpp_reader *pfile;
3573 {
3574   /* Discard all stacked contexts.  */
3575   int i;
3576   for (i = pfile->cur_context; i > 0; i--)
3577     if (pfile->contexts[i].args)
3578       free_macro_args (pfile->contexts[i].args);
3579
3580   if (pfile->no_expand_level <= pfile->cur_context)
3581     pfile->no_expand_level = 0;
3582   pfile->cur_context = 0;
3583
3584   /* Clear the base context, and clear the directive pointer so that
3585      get_raw_token will advance to the next line.  */
3586   pfile->contexts[0].count = 0;
3587   pfile->contexts[0].posn = 0;
3588   pfile->token_list.directive = 0;
3589 }
3590
3591 /* Directive handler wrapper used by the command line option
3592    processor.  */
3593 void
3594 _cpp_run_directive (pfile, dir, buf, count)
3595      cpp_reader *pfile;
3596      const struct directive *dir;
3597      const char *buf;
3598      size_t count;
3599 {
3600   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3601     {
3602       unsigned int prev_lvl = 0;
3603
3604       /* Scan the line now, else prevent_macro_expansion won't work.  */
3605       lex_next (pfile, 1);
3606       if (! (dir->flags & EXPAND))
3607         prev_lvl = prevent_macro_expansion (pfile);
3608
3609       (void) (*dir->handler) (pfile);
3610
3611       if (! (dir->flags & EXPAND))
3612         restore_macro_expansion (pfile, prev_lvl);
3613
3614       _cpp_skip_rest_of_line (pfile);
3615       cpp_pop_buffer (pfile);
3616     }
3617 }