gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o Replace strncmp with memcmp almost everywhere.
  29 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  30 o Distinguish integers, floats, and 'other' pp-numbers.
  31 o Store ints and char constants as binary values.
  32 o New command-line assertion syntax.
  33 o Work towards functions in cpperror.c taking a message level parameter.
  34   If we do this, merge the common code of do_warning and do_error.
  35 o Comment all functions, and describe macro expansion algorithm.
  36 o Move as much out of header files as possible.
  37 o Remove single quote pairs `', and some '', from diagnostics.
  38 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  39
  40 */
  41
  42 #include "config.h"
  43 #include "system.h"
  44 #include "intl.h"
  45 #include "cpplib.h"
  46 #include "cpphash.h"
  47 #include "symcat.h"
  48
  49 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  50 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  51
  52 /* Flags for cpp_context.  */
  53 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  54 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  55 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  56 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  57
  58 typedef struct cpp_context cpp_context;
  59 struct cpp_context
  60 {
  61   union
  62   {
  63     const cpp_toklist *list;    /* Used for macro contexts only.  */
  64     const cpp_token **arg;      /* Used for arg contexts only.  */
  65   } u;
  66
  67   /* Pushed token to be returned by next call to get_raw_token.  */
  68   const cpp_token *pushed_token;
  69
  70   struct macro_args *args;      /* The arguments for a function-like
  71                                    macro.  NULL otherwise.  */
  72   unsigned short posn;          /* Current posn, index into u.  */
  73   unsigned short count;         /* No. of tokens in u.  */
  74   unsigned short level;
  75   unsigned char flags;
  76 };
  77
  78 typedef struct macro_args macro_args;
  79 struct macro_args
  80 {
  81   unsigned int *ends;
  82   const cpp_token **tokens;
  83   unsigned int capacity;
  84   unsigned int used;
  85   unsigned short level;
  86 };
  87
  88 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  89 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  90                                            macro_args *, unsigned int *));
  91 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  92 static void save_token PARAMS ((macro_args *, const cpp_token *));
  93 static int pop_context PARAMS ((cpp_reader *));
  94 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  96 static void free_macro_args PARAMS ((macro_args *));
  97
  98 #define auto_expand_name_space(list) \
  99     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
 100 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
 101                                          unsigned int));
 102 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 103                                          unsigned int));
 104
 105 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 106 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 107                                                 unsigned char *));
 108 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 109                                                      const unsigned char *));
 110 static int skip_block_comment PARAMS ((cpp_reader *));
 111 static int skip_line_comment PARAMS ((cpp_reader *));
 112 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 113 static void skip_whitespace PARAMS ((cpp_reader *, int));
 114 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 115                                    const U_CHAR *, const U_CHAR *));
 116 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 117 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 118                                   unsigned int));
 119 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 120 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 121                                   const unsigned char *,
 122                                   unsigned int, unsigned int));
 123 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 124 static int lex_next PARAMS ((cpp_reader *, int));
 125 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 126                                       const cpp_token *));
 127
 128 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 129 static void expand_context_stack PARAMS ((cpp_reader *));
 130 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 131                                             unsigned char *));
 132 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 133                                   const cpp_token *, int));
 134 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 135                                           cpp_token *));
 136 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 137                                             unsigned int));
 138 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 139 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 140                                                 const cpp_token *));
 141 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 142 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 143                                                        const cpp_token *));
 144 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 145                                          const cpp_token *, int *));
 146 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 147 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 148 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 149 static void release_temp_tokens         PARAMS ((cpp_reader *));
 150 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 151 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 152
 153 #define INIT_TOKEN_STR(list, token) \
 154   do {(token)->val.str.len = 0; \
 155       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 156   } while (0)
 157
 158 #define VALID_SIGN(c, prevc) \
 159   (((c) == '+' || (c) == '-') && \
 160    ((prevc) == 'e' || (prevc) == 'E' \
 161     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 162
 163 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 164    character, if any, is in buffer.  */
 165
 166 #define handle_newline(cur, limit, c) \
 167  do { \
 168   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 169     (cur)++; \
 170   pfile->buffer->lineno++; \
 171   pfile->buffer->line_base = (cur); \
 172   pfile->col_adjust = 0; \
 173  } while (0)
 174
 175 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 176 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 177
 178 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 179 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 180 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 181 #define BACKUP_DIGRAPH(ttype) do { \
 182   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 183
 184 /* An upper bound on the number of bytes needed to spell a token,
 185    including preceding whitespace.  */
 186 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 187 static inline size_t
 188 TOKEN_LEN (token)
 189      const cpp_token *token;
 190 {
 191   size_t len;
 192
 193   switch (TOKEN_SPELL (token))
 194     {
 195     default:            len = 0;                        break;
 196     case SPELL_STRING:  len = token->val.str.len;       break;
 197     case SPELL_IDENT:   len = token->val.node->length;  break;
 198     }
 199   return len + 5;
 200 }
 201
 202 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 203 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 204 #define ON_REST_ARG(c) \
 205  (((c)->flags & VAR_ARGS) \
 206   && (c)->u.list->tokens[(c)->posn].val.aux \
 207       == (unsigned int) ((c)->u.list->paramc - 1))
 208
 209 #define ASSIGN_FLAGS_AND_POS(d, s) \
 210   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 211       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 212   } while (0)
 213
 214 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 215 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 216   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 217       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 218   } while (0)
 219
 220 #define OP(e, s) { SPELL_OPERATOR, U s           },
 221 #define TK(e, s) { s,              U STRINGX (e) },
 222
 223 const struct token_spelling
 224 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 225
 226 #undef OP
 227 #undef TK
 228
 229 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 230    have designated initializers, it can be constant data; otherwise,
 231    it is set up at runtime by _cpp_init_input_buffer.  */
 232
 233 #if (GCC_VERSION >= 2007)
 234 #define init_trigraph_map()  /* nothing */
 235 #define TRIGRAPH_MAP \
 236 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 237 #define END };
 238 #define s(p, v) [p] = v,
 239 #else
 240 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 241  static void init_trigraph_map PARAMS ((void)) { \
 242  unsigned char *x = trigraph_map;
 243 #define END }
 244 #define s(p, v) x[p] = v;
 245 #endif
 246
 247 TRIGRAPH_MAP
 248   s('=', '#')   s(')', ']')     s('!', '|')
 249   s('(', '[')   s('\'', '^')    s('>', '}')
 250   s('/', '\\')  s('<', '{')     s('-', '~')
 251 END
 252
 253 #undef TRIGRAPH_MAP
 254 #undef END
 255 #undef s
 256
 257 /* Notify the compiler proper that the current line number has jumped,
 258    or the current file name has changed.  */
 259
 260 static void
 261 output_line_command (pfile, print, line)
 262      cpp_reader *pfile;
 263      cpp_printer *print;
 264      unsigned int line;
 265 {
 266   cpp_buffer *ip = CPP_BUFFER (pfile);
 267
 268   if (line == 0)
 269     return;
 270
 271   /* End the previous line of text.  */
 272   if (pfile->need_newline)
 273     {
 274       putc ('\n', print->outf);
 275       print->lineno++;
 276     }
 277   pfile->need_newline = 0;
 278
 279   if (CPP_OPTION (pfile, no_line_commands))
 280     return;
 281
 282   /* If the current file has not changed, we can output a few newlines
 283      instead if we want to increase the line number by a small amount.
 284      We cannot do this if print->lineno is zero, because that means we
 285      haven't output any line commands yet.  (The very first line
 286      command output is a `same_file' command.)
 287
 288      'nominal_fname' values are unique, so they can be compared by
 289      comparing pointers.  */
 290   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 291       && line >= print->lineno && line < print->lineno + 8)
 292     {
 293       while (line > print->lineno)
 294         {
 295           putc ('\n', print->outf);
 296           print->lineno++;
 297         }
 298       return;
 299     }
 300
 301   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 302            cpp_syshdr_flags (pfile, ip));
 303
 304   print->last_fname = ip->nominal_fname;
 305   print->lineno = line;
 306 }
 307
 308 /* Like fprintf, but writes to a printer object.  You should be sure
 309    always to generate a complete line when you use this function.  */
 310 void
 311 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 312                      const char *fmt, ...))
 313 {
 314   va_list ap;
 315 #ifndef ANSI_PROTOTYPES
 316   cpp_reader *pfile;
 317   cpp_printer *print;
 318   const char *fmt;
 319 #endif
 320
 321   VA_START (ap, fmt);
 322
 323 #ifndef ANSI_PROTOTYPES
 324   pfile = va_arg (ap, cpp_reader *);
 325   print = va_arg (ap, cpp_printer *);
 326   fmt = va_arg (ap, const char *);
 327 #endif
 328
 329   /* End the previous line of text.  */
 330   if (pfile->need_newline)
 331     putc ('\n', print->outf);
 332   pfile->need_newline = 0;
 333
 334   vfprintf (print->outf, fmt, ap);
 335   va_end (ap);
 336 }
 337
 338 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 339
 340 void
 341 cpp_scan_buffer_nooutput (pfile)
 342      cpp_reader *pfile;
 343 {
 344   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 345   const cpp_token *token;
 346
 347   /* In no-output mode, we can ignore everything but directives.  */
 348   for (;;)
 349     {
 350       token = _cpp_get_token (pfile);
 351
 352       if (token->type == CPP_EOF)
 353         {
 354           cpp_pop_buffer (pfile);
 355           if (CPP_BUFFER (pfile) == stop)
 356             break;
 357         }
 358
 359       if (token->type == CPP_HASH && token->flags & BOL
 360           && pfile->token_list.directive)
 361         {
 362           process_directive (pfile, token);
 363           continue;
 364         }
 365
 366       _cpp_skip_rest_of_line (pfile);
 367     }
 368 }
 369
 370 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 371 void
 372 cpp_scan_buffer (pfile, print)
 373      cpp_reader *pfile;
 374      cpp_printer *print;
 375 {
 376   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 377   const cpp_token *token, *prev = 0;
 378
 379   for (;;)
 380     {
 381       token = _cpp_get_token (pfile);
 382       if (token->type == CPP_EOF)
 383         {
 384           cpp_pop_buffer (pfile);
 385
 386           if (CPP_BUFFER (pfile) == stop)
 387             return;
 388
 389           prev = 0;
 390           continue;
 391         }
 392
 393       if (token->flags & BOL)
 394         {
 395           if (token->type == CPP_HASH && pfile->token_list.directive)
 396             {
 397               process_directive (pfile, token);
 398               continue;
 399             }
 400
 401           output_line_command (pfile, print, pfile->token_list.line);
 402           prev = 0;
 403         }
 404
 405       if (token->type != CPP_PLACEMARKER)
 406         {
 407           output_token (pfile, print->outf, token, prev, 1);
 408           pfile->need_newline = 1;
 409         }
 410
 411       prev = token;
 412     }
 413 }
 414
 415 /* Helper routine used by parse_include, which can't see spell_token.
 416    Reinterpret the current line as an h-char-sequence (< ... >); we are
 417    looking at the first token after the <.  */
 418 const cpp_token *
 419 _cpp_glue_header_name (pfile)
 420      cpp_reader *pfile;
 421 {
 422   const cpp_token *t;
 423   cpp_token *hdr;
 424   U_CHAR *buf, *p;
 425   size_t len, avail;
 426
 427   avail = 40;
 428   len = 0;
 429   buf = xmalloc (avail);
 430
 431   for (;;)
 432     {
 433       t = _cpp_get_token (pfile);
 434       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 435         break;
 436
 437       if (len + TOKEN_LEN (t) > avail)
 438         {
 439           avail = len + TOKEN_LEN (t) + 40;
 440           buf = xrealloc (buf, avail);
 441         }
 442
 443       if (t->flags & PREV_WHITE)
 444         buf[len++] = ' ';
 445
 446       p = spell_token (pfile, t, buf + len);
 447       len = (size_t) (p - buf);  /* p known >= buf */
 448     }
 449
 450   if (t->type == CPP_EOF)
 451     cpp_error (pfile, "missing terminating > character");
 452
 453   buf = xrealloc (buf, len);
 454
 455   hdr = get_temp_token (pfile);
 456   hdr->type = CPP_HEADER_NAME;
 457   hdr->flags = 0;
 458   hdr->val.str.text = buf;
 459   hdr->val.str.len = len;
 460   return hdr;
 461 }
 462
 463 /* Token-buffer helper functions.  */
 464
 465 /* Expand a token list's string space. It is *vital* that
 466    list->tokens_used is correct, to get pointer fix-up right.  */
 467 void
 468 _cpp_expand_name_space (list, len)
 469      cpp_toklist *list;
 470      unsigned int len;
 471 {
 472   const U_CHAR *old_namebuf;
 473
 474   old_namebuf = list->namebuf;
 475   list->name_cap += len;
 476   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 477
 478   /* Fix up token text pointers.  */
 479   if (list->namebuf != old_namebuf)
 480     {
 481       unsigned int i;
 482
 483       for (i = 0; i < list->tokens_used; i++)
 484         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 485           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 486     }
 487 }
 488
 489 /* If there is not enough room for LEN more characters, expand the
 490    list by just enough to have room for LEN characters.  */
 491 void
 492 _cpp_reserve_name_space (list, len)
 493      cpp_toklist *list;
 494      unsigned int len;
 495 {
 496   unsigned int room = list->name_cap - list->name_used;
 497
 498   if (room < len)
 499     _cpp_expand_name_space (list, len - room);
 500 }
 501
 502 /* Expand the number of tokens in a list.  */
 503 void
 504 _cpp_expand_token_space (list, count)
 505      cpp_toklist *list;
 506      unsigned int count;
 507 {
 508   unsigned int n;
 509
 510   list->tokens_cap += count;
 511   n = list->tokens_cap;
 512   if (list->flags & LIST_OFFSET)
 513     list->tokens--, n++;
 514   list->tokens = (cpp_token *)
 515     xrealloc (list->tokens, n * sizeof (cpp_token));
 516   if (list->flags & LIST_OFFSET)
 517     list->tokens++;             /* Skip the dummy.  */
 518 }
 519
 520 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 521    an extra token in front of the token list, as this allows the lexer
 522    to always peek at the previous token without worrying about
 523    underflowing the list, and some initial space.  Otherwise, no
 524    token- or name-space is allocated, and there is no dummy token.  */
 525 void
 526 _cpp_init_toklist (list, flags)
 527      cpp_toklist *list;
 528      int flags;
 529 {
 530   if (flags == NO_DUMMY_TOKEN)
 531     {
 532       list->tokens_cap = 0;
 533       list->tokens = 0;
 534       list->name_cap = 0;
 535       list->namebuf = 0;
 536       list->flags = 0;
 537     }
 538   else
 539     {
 540       /* Initialize token space.  Put a dummy token before the start
 541          that will fail matches.  */
 542       list->tokens_cap = 256;   /* 4K's worth.  */
 543       list->tokens = (cpp_token *)
 544         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 545       list->tokens[0].type = CPP_EOF;
 546       list->tokens++;
 547
 548       /* Initialize name space.  */
 549       list->name_cap = 1024;
 550       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 551       list->flags = LIST_OFFSET;
 552     }
 553
 554   _cpp_clear_toklist (list);
 555 }
 556
 557 /* Clear a token list.  */
 558 void
 559 _cpp_clear_toklist (list)
 560      cpp_toklist *list;
 561 {
 562   list->tokens_used = 0;
 563   list->name_used = 0;
 564   list->directive = 0;
 565   list->paramc = 0;
 566   list->params_len = 0;
 567   list->flags &= LIST_OFFSET;  /* clear all but that one */
 568 }
 569
 570 /* Free a token list.  Does not free the list itself, which may be
 571    embedded in a larger structure.  */
 572 void
 573 _cpp_free_toklist (list)
 574      const cpp_toklist *list;
 575 {
 576   if (list->flags & LIST_OFFSET)
 577     free (list->tokens - 1);    /* Backup over dummy token.  */
 578   else
 579     free (list->tokens);
 580   free (list->namebuf);
 581 }
 582
 583 /* Compare two tokens.  */
 584 int
 585 _cpp_equiv_tokens (a, b)
 586      const cpp_token *a, *b;
 587 {
 588   if (a->type == b->type && a->flags == b->flags)
 589     switch (TOKEN_SPELL (a))
 590       {
 591       default:                  /* Keep compiler happy.  */
 592       case SPELL_OPERATOR:
 593         return 1;
 594       case SPELL_CHAR:
 595       case SPELL_NONE:
 596         return a->val.aux == b->val.aux; /* arg_no or character.  */
 597       case SPELL_IDENT:
 598         return a->val.node == b->val.node;
 599       case SPELL_STRING:
 600         return (a->val.str.len == b->val.str.len
 601                 && !memcmp (a->val.str.text, b->val.str.text,
 602                             a->val.str.len));
 603       }
 604
 605   return 0;
 606 }
 607
 608 /* Compare two token lists.  */
 609 int
 610 _cpp_equiv_toklists (a, b)
 611      const cpp_toklist *a, *b;
 612 {
 613   unsigned int i;
 614
 615   if (a->tokens_used != b->tokens_used
 616       || a->flags != b->flags
 617       || a->paramc != b->paramc)
 618     return 0;
 619
 620   for (i = 0; i < a->tokens_used; i++)
 621     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 622       return 0;
 623   return 1;
 624 }
 625
 626 /* Utility routine:
 627
 628    Compares, the token TOKEN to the NUL-terminated string STRING.
 629    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 630
 631 int
 632 cpp_ideq (token, string)
 633      const cpp_token *token;
 634      const char *string;
 635 {
 636   if (token->type != CPP_NAME)
 637     return 0;
 638
 639   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 640 }
 641
 642 /* Lexing algorithm.
 643
 644  The original lexer in cpplib was made up of two passes: a first pass
 645  that replaced trigraphs and deleted esacped newlines, and a second
 646  pass that tokenized the result of the first pass.  Tokenisation was
 647  performed by peeking at the next character in the input stream.  For
 648  example, if the input stream contained "!=", the handler for the !
 649  character would peek at the next character, and if it were a '='
 650  would skip over it, and return a "!=" token, otherwise it would
 651  return just the "!" token.
 652
 653  To implement a single-pass lexer, this peeking ahead is unworkable.
 654  An arbitrary number of escaped newlines, and trigraphs (in particular
 655  ??/ which translates to the escape \), could separate the '!' and '='
 656  in the input stream, yet the next token is still a "!=".
 657
 658  Suppose instead that we lex by one logical line at a time, producing
 659  a token list or stack for each logical line, and when seeing the '!'
 660  push a CPP_NOT token on the list.  Then if the '!' is part of a
 661  longer token ("!=") we know we must see the remainder of the token by
 662  the time we reach the end of the logical line.  Thus we can have the
 663  '=' handler look at the previous token (at the end of the list / top
 664  of the stack) and see if it is a "!" token, and if so, instead of
 665  pushing a "=" token revise the existing token to be a "!=" token.
 666
 667  This works in the presence of escaped newlines, because the '\' would
 668  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 669  newline ('\n' or '\r') handler looks at the token at the top of the
 670  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 671  Hence the '=' handler would never see any intervening tokens.
 672
 673  To make trigraphs work in this context, as in precedence trigraphs
 674  are highest and converted before anything else, the '?' handler does
 675  lookahead to see if it is a trigraph, and if so skips the trigraph
 676  and pushes the token it represents onto the top of the stack.  This
 677  also works in the particular case of a CPP_BACKSLASH trigraph.
 678
 679  To the preprocessor, whitespace is only significant to the point of
 680  knowing whether whitespace precedes a particular token.  For example,
 681  the '=' handler needs to know whether there was whitespace between it
 682  and a "!" token on the top of the stack, to make the token conversion
 683  decision correctly.  So each token has a PREV_WHITE flag to
 684  indicate this - the standard permits consecutive whitespace to be
 685  regarded as a single space.  The compiler front ends are not
 686  interested in whitespace at all; they just require a token stream.
 687  Another place where whitespace is significant to the preprocessor is
 688  a #define statment - if there is whitespace between the macro name
 689  and an initial "(" token the macro is "object-like", otherwise it is
 690  a function-like macro that takes arguments.
 691
 692  However, all is not rosy.  Parsing of identifiers, numbers, comments
 693  and strings becomes trickier because of the possibility of raw
 694  trigraphs and escaped newlines in the input stream.
 695
 696  The trigraphs are three consecutive characters beginning with two
 697  question marks.  A question mark is not valid as part of a number or
 698  identifier, so parsing of a number or identifier terminates normally
 699  upon reaching it, returning to the mainloop which handles the
 700  trigraph just like it would in any other position.  Similarly for the
 701  backslash of a backslash-newline combination.  So we just need the
 702  escaped-newline dropper in the mainloop to check if the token on the
 703  top of the stack after dropping the escaped newline is a number or
 704  identifier, and if so to continue the processing it as if nothing had
 705  happened.
 706
 707  For strings, we replace trigraphs whenever we reach a quote or
 708  newline, because there might be a backslash trigraph escaping them.
 709  We need to be careful that we start trigraph replacing from where we
 710  left off previously, because it is possible for a first scan to leave
 711  "fake" trigraphs that a second scan would pick up as real (e.g. the
 712  sequence "????/\n=" would find a fake ??= trigraph after removing the
 713  escaped newline.)
 714
 715  For line comments, on reaching a newline we scan the previous
 716  character(s) to see if it escaped, and continue if it is.  Block
 717  comments ignore everything and just focus on finding the comment
 718  termination mark.  The only difficult thing, and it is surprisingly
 719  tricky, is checking if an asterisk precedes the final slash since
 720  they could be separated by escaped newlines.  If the preprocessor is
 721  invoked with the output comments option, we don't bother removing
 722  escaped newlines and replacing trigraphs for output.
 723
 724  Finally, numbers can begin with a period, which is pushed initially
 725  as a CPP_DOT token in its own right.  The digit handler checks if the
 726  previous token was a CPP_DOT not separated by whitespace, and if so
 727  pops it off the stack and pushes a period into the number's buffer
 728  before calling the number parser.
 729
 730 */
 731
 732 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 733                                                     U":>", U"<%", U"%>"};
 734
 735 /* Call when a trigraph is encountered.  It warns if necessary, and
 736    returns true if the trigraph should be honoured.  END is the third
 737    character of a trigraph in the input stream.  */
 738 static int
 739 trigraph_ok (pfile, end)
 740      cpp_reader *pfile;
 741      const unsigned char *end;
 742 {
 743   int accept = CPP_OPTION (pfile, trigraphs);
 744
 745   if (CPP_OPTION (pfile, warn_trigraphs))
 746     {
 747       unsigned int col = end - 1 - pfile->buffer->line_base;
 748       if (accept)
 749         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 750                                "trigraph ??%c converted to %c",
 751                                (int) *end, (int) trigraph_map[*end]);
 752       else
 753         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 754                                "trigraph ??%c ignored", (int) *end);
 755     }
 756   return accept;
 757 }
 758
 759 /* Scan a string for trigraphs, warning or replacing them inline as
 760    appropriate.  When parsing a string, we must call this routine
 761    before processing a newline character (if trigraphs are enabled),
 762    since the newline might be escaped by a preceding backslash
 763    trigraph sequence.  Returns a pointer to the end of the name after
 764    replacement.  */
 765
 766 static unsigned char *
 767 trigraph_replace (pfile, src, limit)
 768      cpp_reader *pfile;
 769      unsigned char *src;
 770      unsigned char *limit;
 771 {
 772   unsigned char *dest;
 773
 774   /* Starting with src[1], find two consecutive '?'.  The case of no
 775      trigraphs is streamlined.  */
 776
 777   for (src++; src + 1 < limit; src += 2)
 778     {
 779       if (src[0] != '?')
 780         continue;
 781
 782       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 783       if (src[-1] == '?')
 784         src--;
 785       else if (src + 2 == limit || src[1] != '?')
 786         continue;
 787
 788       /* Check if it really is a trigraph.  */
 789       if (trigraph_map[src[2]] == 0)
 790         continue;
 791
 792       dest = src;
 793       goto trigraph_found;
 794     }
 795   return limit;
 796
 797   /* Now we have a trigraph, we need to scan the remaining buffer, and
 798      copy-shifting its contents left if replacement is enabled.  */
 799   for (; src + 2 < limit; dest++, src++)
 800     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 801       {
 802       trigraph_found:
 803         src += 2;
 804         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 805           *dest = trigraph_map[*src];
 806       }
 807
 808   /* Copy remaining (at most 2) characters.  */
 809   while (src < limit)
 810     *dest++ = *src++;
 811   return dest;
 812 }
 813
 814 /* If CUR is a backslash or the end of a trigraphed backslash, return
 815    a pointer to its beginning, otherwise NULL.  We don't read beyond
 816    the buffer start, because there is the start of the comment in the
 817    buffer.  */
 818 static const unsigned char *
 819 backslash_start (pfile, cur)
 820      cpp_reader *pfile;
 821      const unsigned char *cur;
 822 {
 823   if (cur[0] == '\\')
 824     return cur;
 825   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 826       && trigraph_ok (pfile, cur))
 827     return cur - 2;
 828   return 0;
 829 }
 830
 831 /* Skip a C-style block comment.  This is probably the trickiest
 832    handler.  We find the end of the comment by seeing if an asterisk
 833    is before every '/' we encounter.  The nasty complication is that a
 834    previous asterisk may be separated by one or more escaped newlines.
 835    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 836 static int
 837 skip_block_comment (pfile)
 838      cpp_reader *pfile;
 839 {
 840   cpp_buffer *buffer = pfile->buffer;
 841   const unsigned char *char_after_star = 0;
 842   const unsigned char *cur = buffer->cur;
 843
 844   for (; cur < buffer->rlimit; )
 845     {
 846       unsigned char c = *cur++;
 847
 848       /* People like decorating comments with '*', so check for
 849          '/' instead for efficiency.  */
 850       if (c == '/')
 851         {
 852           /* Don't view / then * then / as finishing the comment.  */
 853           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 854               || cur - 1 == char_after_star)
 855             {
 856               buffer->cur = cur;
 857               return 0;
 858             }
 859
 860           /* Warn about potential nested comments, but not when
 861              the final character inside the comment is a '/'.
 862              Don't bother to get it right across escaped newlines.  */
 863           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 864               && cur[0] == '*' && cur[1] != '/')
 865             {
 866               buffer->cur = cur;
 867               cpp_warning (pfile, "'/*' within comment");
 868             }
 869         }
 870       else if (is_vspace (c))
 871         {
 872           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 873
 874           handle_newline (cur, buffer->rlimit, c);
 875           /* Work correctly if there is an asterisk before an
 876              arbirtrarily long sequence of escaped newlines.  */
 877           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 878             char_after_star = cur;
 879           else
 880             char_after_star = 0;
 881         }
 882       else if (c == '\t')
 883         adjust_column (pfile, cur - 1);
 884     }
 885
 886   buffer->cur = cur;
 887   return 1;
 888 }
 889
 890 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 891    non-zero if a multiline comment.  */
 892 static int
 893 skip_line_comment (pfile)
 894      cpp_reader *pfile;
 895 {
 896   cpp_buffer *buffer = pfile->buffer;
 897   register const unsigned char *cur = buffer->cur;
 898   int multiline = 0;
 899
 900   for (; cur < buffer->rlimit; )
 901     {
 902       unsigned char c = *cur++;
 903
 904       if (is_vspace (c))
 905         {
 906           /* Check for a (trigaph?) backslash escaping the newline.  */
 907           if (!backslash_start (pfile, cur - 2))
 908             goto out;
 909           multiline = 1;
 910           handle_newline (cur, buffer->rlimit, c);
 911         }
 912     }
 913   cur++;
 914
 915  out:
 916   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 917   return multiline;
 918 }
 919
 920 /* TAB points to a \t character.  Update col_adjust so we track the
 921    column correctly.  */
 922 static void
 923 adjust_column (pfile, tab)
 924      cpp_reader *pfile;
 925      const U_CHAR *tab;
 926 {
 927   /* Zero-based column.  */
 928   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
 929
 930   /* Round it up to multiple of the tabstop, but subtract 1 since the
 931      tab itself occupies a character position.  */
 932   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
 933                         - col % CPP_OPTION (pfile, tabstop)) - 1;
 934 }
 935
 936 /* Skips whitespace, stopping at next non-whitespace character.
 937    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 938    to be assigned the correct column.  */
 939 static void
 940 skip_whitespace (pfile, in_directive)
 941      cpp_reader *pfile;
 942      int in_directive;
 943 {
 944   cpp_buffer *buffer = pfile->buffer;
 945   unsigned short warned = 0;
 946
 947   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 948   while (buffer->cur < buffer->rlimit)
 949     {
 950       unsigned char c = *buffer->cur;
 951
 952       if (!is_nvspace (c))
 953         break;
 954
 955       buffer->cur++;
 956       /* Horizontal space always OK.  */
 957       if (c == ' ')
 958         continue;
 959       else if (c == '\t')
 960         adjust_column (pfile, buffer->cur - 1);
 961       /* Must be \f \v or \0.  */
 962       else if (c == '\0')
 963         {
 964           if (!warned)
 965             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 966                                    CPP_BUF_COL (buffer),
 967                                    "embedded null character ignored");
 968           warned = 1;
 969         }
 970       else if (in_directive && CPP_PEDANTIC (pfile))
 971         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 972                                CPP_BUF_COL (buffer),
 973                                "%s in preprocessing directive",
 974                                c == '\f' ? "form feed" : "vertical tab");
 975     }
 976 }
 977
 978 /* Parse (append) an identifier.  Calculates the hash value of the
 979    token while parsing, for performance.  The algorithm *must* match
 980    cpp_lookup().  */
 981 static const U_CHAR *
 982 parse_name (pfile, tok, cur, rlimit)
 983      cpp_reader *pfile;
 984      cpp_token *tok;
 985      const U_CHAR *cur, *rlimit;
 986 {
 987   const U_CHAR *name;
 988   unsigned int len;
 989   unsigned int r;
 990
 991   name = cur;
 992   r = 0;
 993   while (cur < rlimit)
 994     {
 995       if (! is_idchar (*cur))
 996         break;
 997       /* $ is not a identifier character in the standard, but is
 998          commonly accepted as an extension.  Don't warn about it in
 999          skipped conditional blocks. */
1000       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1001         {
1002           CPP_BUFFER (pfile)->cur = cur;
1003           cpp_pedwarn (pfile, "'$' character in identifier");
1004         }
1005
1006       r = HASHSTEP (r, cur);
1007       cur++;
1008     }
1009   len = cur - name;
1010
1011   if (tok->type == CPP_NAME && tok->val.node == 0)
1012     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1013   else
1014     {
1015       unsigned int oldlen;
1016       U_CHAR *newname;
1017
1018       if (tok->type == CPP_NAME)
1019         oldlen = tok->val.node->length;
1020       else
1021         oldlen = 1;
1022
1023       newname = alloca (oldlen + len);
1024
1025       if (tok->type == CPP_NAME)
1026         memcpy (newname, tok->val.node->name, oldlen);
1027       else
1028         newname[0] = tok->val.aux;
1029       memcpy (newname + oldlen, name, len);
1030       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1031       tok->type = CPP_NAME;
1032     }
1033
1034   return cur;
1035 }
1036
1037 /* Parse (append) a number.  */
1038 static void
1039 parse_number (pfile, list, name)
1040      cpp_reader *pfile;
1041      cpp_toklist *list;
1042      cpp_string *name;
1043 {
1044   const unsigned char *name_limit;
1045   unsigned char *namebuf;
1046   cpp_buffer *buffer = pfile->buffer;
1047   register const unsigned char *cur = buffer->cur;
1048
1049  expanded:
1050   name_limit = list->namebuf + list->name_cap;
1051   namebuf = list->namebuf + list->name_used;
1052
1053   for (; cur < buffer->rlimit && namebuf < name_limit; )
1054     {
1055       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1056
1057       /* Perhaps we should accept '$' here if we accept it for
1058          identifiers.  We know namebuf[-1] is safe, because for c to
1059          be a sign we must have pushed at least one character.  */
1060       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1061         goto out;
1062
1063       namebuf++;
1064       cur++;
1065     }
1066
1067   /* Run out of name space?  */
1068   if (cur < buffer->rlimit)
1069     {
1070       list->name_used = namebuf - list->namebuf;
1071       auto_expand_name_space (list);
1072       goto expanded;
1073     }
1074
1075  out:
1076   buffer->cur = cur;
1077   name->len = namebuf - name->text;
1078   list->name_used = namebuf - list->namebuf;
1079 }
1080
1081 /* Places a string terminated by an unescaped TERMINATOR into a
1082    cpp_string, which should be expandable and thus at the top of the
1083    list's stack.  Handles embedded trigraphs, if necessary, and
1084    escaped newlines.
1085
1086    Can be used for character constants (terminator = '\''), string
1087    constants ('"') and angled headers ('>').  Multi-line strings are
1088    allowed, except for within directives.  */
1089
1090 static void
1091 parse_string (pfile, list, token, terminator)
1092      cpp_reader *pfile;
1093      cpp_toklist *list;
1094      cpp_token *token;
1095      unsigned int terminator;
1096 {
1097   cpp_buffer *buffer = pfile->buffer;
1098   cpp_string *name = &token->val.str;
1099   register const unsigned char *cur = buffer->cur;
1100   const unsigned char *name_limit;
1101   unsigned char *namebuf;
1102   unsigned int null_count = 0;
1103   unsigned int trigraphed = list->name_used;
1104
1105  expanded:
1106   name_limit = list->namebuf + list->name_cap;
1107   namebuf = list->namebuf + list->name_used;
1108
1109   for (; cur < buffer->rlimit && namebuf < name_limit; )
1110     {
1111       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1112
1113       if (c == '\0')
1114         null_count++;
1115       else if (c == terminator || is_vspace (c))
1116         {
1117           /* Needed for trigraph_replace and multiline string warning.  */
1118           buffer->cur = cur;
1119
1120           /* Scan for trigraphs before checking if backslash-escaped.  */
1121           if ((CPP_OPTION (pfile, trigraphs)
1122                || CPP_OPTION (pfile, warn_trigraphs))
1123               && namebuf - (list->namebuf + trigraphed) >= 3)
1124             {
1125               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1126                                           namebuf);
1127               /* The test above guarantees trigraphed will be positive.  */
1128               trigraphed = namebuf - list->namebuf - 2;
1129             }
1130
1131           namebuf--;     /* Drop the newline / terminator from the name.  */
1132           if (is_vspace (c))
1133             {
1134               /* Drop a backslash newline, and continue. */
1135               if (namebuf[-1] == '\\')
1136                 {
1137                   handle_newline (cur, buffer->rlimit, c);
1138                   namebuf--;
1139                   continue;
1140                 }
1141
1142               cur--;
1143
1144               /* In assembly language, silently terminate strings of
1145                  either variety at end of line.  This is a kludge
1146                  around not knowing where comments are.  */
1147               if (CPP_OPTION (pfile, lang_asm))
1148                 goto out;
1149
1150               /* Character constants and header names may not extend
1151                  over multiple lines.  In Standard C, neither may
1152                  strings.  We accept multiline strings as an
1153                  extension.  (Even in directives - otherwise, glibc's
1154                  longlong.h breaks.)  */
1155               if (terminator != '"')
1156                 goto unterminated;
1157
1158               cur++;  /* Move forwards again.  */
1159
1160               if (pfile->multiline_string_line == 0)
1161                 {
1162                   pfile->multiline_string_line = token->line;
1163                   pfile->multiline_string_column = token->col;
1164                   if (CPP_PEDANTIC (pfile))
1165                     cpp_pedwarn (pfile, "multi-line string constant");
1166                 }
1167
1168               *namebuf++ = '\n';
1169               handle_newline (cur, buffer->rlimit, c);
1170             }
1171           else
1172             {
1173               unsigned char *temp;
1174
1175               /* An odd number of consecutive backslashes represents
1176                  an escaped terminator.  */
1177               temp = namebuf - 1;
1178               while (temp >= name->text && *temp == '\\')
1179                 temp--;
1180
1181               if ((namebuf - temp) & 1)
1182                 goto out;
1183               namebuf++;
1184             }
1185         }
1186     }
1187
1188   /* Run out of name space?  */
1189   if (cur < buffer->rlimit)
1190     {
1191       list->name_used = namebuf - list->namebuf;
1192       auto_expand_name_space (list);
1193       goto expanded;
1194     }
1195
1196   /* We may not have trigraph-replaced the input for this code path,
1197      but as the input is in error by being unterminated we don't
1198      bother.  Prevent warnings about no newlines at EOF.  */
1199   if (is_vspace (cur[-1]))
1200     cur--;
1201
1202  unterminated:
1203   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1204
1205   if (terminator == '\"' && pfile->multiline_string_line != list->line
1206       && pfile->multiline_string_line != 0)
1207     {
1208       cpp_error_with_line (pfile, pfile->multiline_string_line,
1209                            pfile->multiline_string_column,
1210                            "possible start of unterminated string literal");
1211       pfile->multiline_string_line = 0;
1212     }
1213
1214  out:
1215   buffer->cur = cur;
1216   name->len = namebuf - name->text;
1217   list->name_used = namebuf - list->namebuf;
1218
1219   if (null_count > 0)
1220     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1221                          : "null character preserved"));
1222 }
1223
1224 /* The character TYPE helps us distinguish comment types: '*' = C
1225    style, '/' = C++ style.  For code simplicity, the stored comment
1226    includes the comment start and any terminator.  */
1227
1228 #define COMMENT_START_LEN 2
1229 static void
1230 save_comment (list, token, from, len, type)
1231      cpp_toklist *list;
1232      cpp_token *token;
1233      const unsigned char *from;
1234      unsigned int len;
1235      unsigned int type;
1236 {
1237   unsigned char *buffer;
1238
1239   len += COMMENT_START_LEN;
1240
1241   if (list->name_used + len > list->name_cap)
1242     _cpp_expand_name_space (list, len);
1243
1244   INIT_TOKEN_STR (list, token);
1245   token->type = CPP_COMMENT;
1246   token->val.str.len = len;
1247
1248   buffer = list->namebuf + list->name_used;
1249   list->name_used += len;
1250
1251   /* Copy the comment.  */
1252   if (type == '*')
1253     {
1254       *buffer++ = '/';
1255       *buffer++ = '*';
1256     }
1257   else
1258     {
1259       *buffer++ = type;
1260       *buffer++ = type;
1261     }
1262   memcpy (buffer, from, len - COMMENT_START_LEN);
1263 }
1264
1265 /*
1266  *  The tokenizer's main loop.  Returns a token list, representing a
1267  *  logical line in the input file.  On EOF after some tokens have
1268  *  been processed, we return immediately.  Then in next call, or if
1269  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1270  *  token is placed in the list.
1271  *
1272  *  Implementation relies almost entirely on lookback, rather than
1273  *  looking forwards.  This means that tokenization requires just
1274  *  a single pass of the file, even in the presence of trigraphs and
1275  *  escaped newlines, providing significant performance benefits.
1276  *  Trigraph overhead is negligible if they are disabled, and low
1277  *  even when enabled.
1278  */
1279
1280 #define KNOWN_DIRECTIVE() (list->directive != 0)
1281 #define MIGHT_BE_DIRECTIVE() \
1282 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1283
1284 static void
1285 lex_line (pfile, list)
1286      cpp_reader *pfile;
1287      cpp_toklist *list;
1288 {
1289   cpp_token *cur_token, *token_limit, *first;
1290   cpp_buffer *buffer = pfile->buffer;
1291   const unsigned char *cur = buffer->cur;
1292   unsigned char flags = 0;
1293   unsigned int first_token = list->tokens_used;
1294
1295   if (!(list->flags & LIST_OFFSET))
1296     (abort) ();
1297
1298   list->file = buffer->nominal_fname;
1299   list->line = CPP_BUF_LINE (buffer);
1300   pfile->col_adjust = 0;
1301   pfile->in_lex_line = 1;
1302   if (cur == buffer->buf)
1303     list->flags |= BEG_OF_FILE;
1304
1305  expanded:
1306   token_limit = list->tokens + list->tokens_cap;
1307   cur_token = list->tokens + list->tokens_used;
1308
1309   for (; cur < buffer->rlimit && cur_token < token_limit;)
1310     {
1311       unsigned char c;
1312
1313       /* Optimize non-vertical whitespace skipping; most tokens are
1314          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1315       c = *cur;
1316       if (is_nvspace (c))
1317         {
1318           buffer->cur = cur;
1319           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1320                                    && cur_token > &list->tokens[first_token]));
1321           cur = buffer->cur;
1322
1323           flags = PREV_WHITE;
1324           if (cur == buffer->rlimit)
1325             break;
1326           c = *cur;
1327         }
1328       cur++;
1329
1330       /* Initialize current token.  CPP_EOF will not be fixed up by
1331          expand_name_space.  */
1332       list->tokens_used = cur_token - list->tokens + 1;
1333       cur_token->type = CPP_EOF;
1334       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1335       cur_token->line = CPP_BUF_LINE (buffer);
1336       cur_token->flags = flags;
1337       flags = 0;
1338
1339       switch (c)
1340         {
1341         case '0': case '1': case '2': case '3': case '4':
1342         case '5': case '6': case '7': case '8': case '9':
1343           {
1344             int prev_dot;
1345
1346             cur--;              /* Backup character.  */
1347             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1348             if (prev_dot)
1349               cur_token--;
1350             INIT_TOKEN_STR (list, cur_token);
1351             /* Prepend an immediately previous CPP_DOT token.  */
1352             if (prev_dot)
1353               {
1354                 if (list->name_cap == list->name_used)
1355                   auto_expand_name_space (list);
1356
1357                 cur_token->val.str.len = 1;
1358                 list->namebuf[list->name_used++] = '.';
1359               }
1360
1361           continue_number:
1362             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1363             buffer->cur = cur;
1364             parse_number (pfile, list, &cur_token->val.str);
1365             cur = buffer->cur;
1366           }
1367           /* Check for # 123 form of #line.  */
1368           if (MIGHT_BE_DIRECTIVE ())
1369             list->directive = _cpp_check_linemarker (pfile, cur_token,
1370                                                      !(cur_token[-1].flags
1371                                                        & PREV_WHITE));
1372           cur_token++;
1373           break;
1374
1375         letter:
1376         case '_':
1377         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1378         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1379         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1380         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1381         case 'y': case 'z':
1382         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1383         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1384         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1385         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1386         case 'Y': case 'Z':
1387           cur--;                     /* Backup character.  */
1388
1389           /* In Objective C, '@' may begin certain keywords.  */
1390           if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
1391               && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
1392             cur_token--;
1393           else
1394             {
1395               cur_token->val.node = 0;
1396               cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1397             }
1398
1399         continue_name:
1400           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1401
1402           if (MIGHT_BE_DIRECTIVE ())
1403             list->directive = _cpp_check_directive (pfile, cur_token,
1404                                                     !(list->tokens[0].flags
1405                                                       & PREV_WHITE));
1406           /* Convert named operators to their proper types.  */
1407           if (cur_token->val.node->type == T_OPERATOR)
1408             {
1409               cur_token->flags |= NAMED_OP;
1410               cur_token->type = cur_token->val.node->value.code;
1411             }
1412
1413           cur_token++;
1414           break;
1415
1416         case '\'':
1417           cur_token->type = CPP_CHAR;
1418           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1419               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1420             BACKUP_TOKEN (CPP_WCHAR);
1421           goto do_parse_string;
1422
1423         case '\"':
1424           cur_token->type = CPP_STRING;
1425           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1426               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1427             BACKUP_TOKEN (CPP_WSTRING);
1428           else if (CPP_OPTION (pfile, objc)
1429                    && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
1430                    && cur_token[-1].val.aux == '@')
1431             BACKUP_TOKEN (CPP_OSTRING);
1432
1433         do_parse_string:
1434           /* Here c is one of ' " or >.  */
1435           INIT_TOKEN_STR (list, cur_token);
1436           buffer->cur = cur;
1437           parse_string (pfile, list, cur_token, c);
1438           cur = buffer->cur;
1439           cur_token++;
1440           break;
1441
1442         case '/':
1443           cur_token->type = CPP_DIV;
1444           if (IMMED_TOKEN ())
1445             {
1446               if (PREV_TOKEN_TYPE == CPP_DIV)
1447                 {
1448                   /* We silently allow C++ comments in system headers,
1449                      irrespective of conformance mode, because lots of
1450                      broken systems do that and trying to clean it up
1451                      in fixincludes is a nightmare.  */
1452                   if (CPP_IN_SYSTEM_HEADER (pfile))
1453                     goto do_line_comment;
1454                   else if (CPP_OPTION (pfile, cplusplus_comments))
1455                     {
1456                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1457                           && ! buffer->warned_cplusplus_comments)
1458                         {
1459                           buffer->cur = cur;
1460                           cpp_pedwarn (pfile,
1461                              "C++ style comments are not allowed in ISO C89");
1462                           cpp_pedwarn (pfile,
1463                           "(this will be reported only once per input file)");
1464                           buffer->warned_cplusplus_comments = 1;
1465                         }
1466                     do_line_comment:
1467                       buffer->cur = cur;
1468 #if 0 /* Leave until new lexer in place.  */
1469                       if (cur[-2] != c)
1470                         cpp_warning (pfile,
1471                                      "comment start split across lines");
1472 #endif
1473                       if (skip_line_comment (pfile))
1474                         cpp_warning (pfile, "multi-line comment");
1475
1476                       /* Back-up to first '-' or '/'.  */
1477                       cur_token--;
1478                       if (!CPP_OPTION (pfile, discard_comments)
1479                           && (!KNOWN_DIRECTIVE()
1480                               || (list->directive->flags & COMMENTS)))
1481                         save_comment (list, cur_token++, cur,
1482                                       buffer->cur - cur, c);
1483                       else
1484                         flags = PREV_WHITE;
1485
1486                       cur = buffer->cur;
1487                       break;
1488                     }
1489                 }
1490             }
1491           cur_token++;
1492           break;
1493
1494         case '*':
1495           cur_token->type = CPP_MULT;
1496           if (IMMED_TOKEN ())
1497             {
1498               if (PREV_TOKEN_TYPE == CPP_DIV)
1499                 {
1500                   buffer->cur = cur;
1501 #if 0 /* Leave until new lexer in place.  */
1502                   if (cur[-2] != '/')
1503                     cpp_warning (pfile,
1504                                  "comment start '/*' split across lines");
1505 #endif
1506                   if (skip_block_comment (pfile))
1507                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1508                                          "unterminated comment");
1509 #if 0 /* Leave until new lexer in place.  */
1510                   else if (buffer->cur[-2] != '*')
1511                     cpp_warning (pfile,
1512                                  "comment end '*/' split across lines");
1513 #endif
1514                   /* Back up to opening '/'.  */
1515                   cur_token--;
1516                   if (!CPP_OPTION (pfile, discard_comments)
1517                       && (!KNOWN_DIRECTIVE()
1518                           || (list->directive->flags & COMMENTS)))
1519                     save_comment (list, cur_token++, cur,
1520                                   buffer->cur - cur, c);
1521                   else
1522                     flags = PREV_WHITE;
1523
1524                   cur = buffer->cur;
1525                   break;
1526                 }
1527               else if (CPP_OPTION (pfile, cplusplus))
1528                 {
1529                   /* In C++, there are .* and ->* operators.  */
1530                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1531                     BACKUP_TOKEN (CPP_DEREF_STAR);
1532                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1533                     BACKUP_TOKEN (CPP_DOT_STAR);
1534                 }
1535             }
1536           cur_token++;
1537           break;
1538
1539         case '\n':
1540         case '\r':
1541           handle_newline (cur, buffer->rlimit, c);
1542           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1543             {
1544               if (IMMED_TOKEN ())
1545                 {
1546                   /* Remove the escaped newline.  Then continue to process
1547                      any interrupted name or number.  */
1548                   cur_token--;
1549                   /* Backslash-newline may not be immediately followed by
1550                      EOF (C99 5.1.1.2).  */
1551                   if (cur >= buffer->rlimit)
1552                     {
1553                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1554                       break;
1555                     }
1556                   if (IMMED_TOKEN ())
1557                     {
1558                       cur_token--;
1559                       if (cur_token->type == CPP_NAME)
1560                         goto continue_name;
1561                       else if (cur_token->type == CPP_NUMBER)
1562                         goto continue_number;
1563                       cur_token++;
1564                     }
1565                   /* Remember whitespace setting.  */
1566                   flags = cur_token->flags;
1567                   break;
1568                 }
1569               else
1570                 {
1571                   buffer->cur = cur;
1572                   cpp_warning (pfile,
1573                                "backslash and newline separated by space");
1574                 }
1575             }
1576           else if (MIGHT_BE_DIRECTIVE ())
1577             {
1578               /* "Null directive." C99 6.10.7: A preprocessing
1579                  directive of the form # <new-line> has no effect.
1580
1581                  But it is still a directive, and therefore disappears
1582                  from the output. */
1583               cur_token--;
1584               if (cur_token->flags & PREV_WHITE
1585                   && CPP_WTRADITIONAL (pfile))
1586                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1587             }
1588
1589           /* Skip vertical space until we have at least one token to
1590              return.  */
1591           if (cur_token != &list->tokens[first_token])
1592             goto out;
1593           list->line = CPP_BUF_LINE (buffer);
1594           break;
1595
1596         case '-':
1597           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1598             REVISE_TOKEN (CPP_MINUS_MINUS);
1599           else
1600             PUSH_TOKEN (CPP_MINUS);
1601           break;
1602
1603         make_hash:
1604         case '#':
1605           /* The digraph flag checking ensures that ## and %:%:
1606              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1607           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1608               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1609             REVISE_TOKEN (CPP_PASTE);
1610           else
1611             PUSH_TOKEN (CPP_HASH);
1612           break;
1613
1614         case ':':
1615           cur_token->type = CPP_COLON;
1616           if (IMMED_TOKEN ())
1617             {
1618               if (PREV_TOKEN_TYPE == CPP_COLON
1619                   && CPP_OPTION (pfile, cplusplus))
1620                 BACKUP_TOKEN (CPP_SCOPE);
1621               else if (CPP_OPTION (pfile, digraphs))
1622                 {
1623                   /* Digraph: "<:" is a '['  */
1624                   if (PREV_TOKEN_TYPE == CPP_LESS)
1625                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1626                   /* Digraph: "%:" is a '#'  */
1627                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1628                     {
1629                       (--cur_token)->flags |= DIGRAPH;
1630                       goto make_hash;
1631                     }
1632                 }
1633             }
1634           cur_token++;
1635           break;
1636
1637         case '&':
1638           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1639             REVISE_TOKEN (CPP_AND_AND);
1640           else
1641             PUSH_TOKEN (CPP_AND);
1642           break;
1643
1644         make_or:
1645         case '|':
1646           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1647             REVISE_TOKEN (CPP_OR_OR);
1648           else
1649             PUSH_TOKEN (CPP_OR);
1650           break;
1651
1652         case '+':
1653           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1654             REVISE_TOKEN (CPP_PLUS_PLUS);
1655           else
1656             PUSH_TOKEN (CPP_PLUS);
1657           break;
1658
1659         case '=':
1660             /* This relies on equidistance of "?=" and "?" tokens.  */
1661           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1662             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1663           else
1664             PUSH_TOKEN (CPP_EQ);
1665           break;
1666
1667         case '>':
1668           cur_token->type = CPP_GREATER;
1669           if (IMMED_TOKEN ())
1670             {
1671               if (PREV_TOKEN_TYPE == CPP_GREATER)
1672                 BACKUP_TOKEN (CPP_RSHIFT);
1673               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1674                 BACKUP_TOKEN (CPP_DEREF);
1675               else if (CPP_OPTION (pfile, digraphs))
1676                 {
1677                   /* Digraph: ":>" is a ']'  */
1678                   if (PREV_TOKEN_TYPE == CPP_COLON)
1679                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1680                   /* Digraph: "%>" is a '}'  */
1681                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1682                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1683                 }
1684             }
1685           cur_token++;
1686           break;
1687
1688         case '<':
1689           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1690             {
1691               REVISE_TOKEN (CPP_LSHIFT);
1692               break;
1693             }
1694           /* Is this the beginning of a header name?  */
1695           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1696             {
1697               c = '>';  /* Terminator.  */
1698               cur_token->type = CPP_HEADER_NAME;
1699               goto do_parse_string;
1700             }
1701           PUSH_TOKEN (CPP_LESS);
1702           break;
1703
1704         case '%':
1705           /* Digraph: "<%" is a '{'  */
1706           cur_token->type = CPP_MOD;
1707           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1708               && CPP_OPTION (pfile, digraphs))
1709             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1710           cur_token++;
1711           break;
1712
1713         case '?':
1714           if (cur + 1 < buffer->rlimit && *cur == '?'
1715               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1716             {
1717               /* Handle trigraph.  */
1718               cur++;
1719               switch (*cur++)
1720                 {
1721                 case '(': goto make_open_square;
1722                 case ')': goto make_close_square;
1723                 case '<': goto make_open_brace;
1724                 case '>': goto make_close_brace;
1725                 case '=': goto make_hash;
1726                 case '!': goto make_or;
1727                 case '-': goto make_complement;
1728                 case '/': goto make_backslash;
1729                 case '\'': goto make_xor;
1730                 }
1731             }
1732           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1733             {
1734               /* GNU C++ defines <? and >? operators.  */
1735               if (PREV_TOKEN_TYPE == CPP_LESS)
1736                 {
1737                   REVISE_TOKEN (CPP_MIN);
1738                   break;
1739                 }
1740               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1741                 {
1742                   REVISE_TOKEN (CPP_MAX);
1743                   break;
1744                 }
1745             }
1746           PUSH_TOKEN (CPP_QUERY);
1747           break;
1748
1749         case '.':
1750           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1751               && IMMED_TOKEN ()
1752               && !(cur_token[-1].flags & PREV_WHITE))
1753             {
1754               cur_token -= 2;
1755               PUSH_TOKEN (CPP_ELLIPSIS);
1756             }
1757           else
1758             PUSH_TOKEN (CPP_DOT);
1759           break;
1760
1761         make_complement:
1762         case '~': PUSH_TOKEN (CPP_COMPL); break;
1763         make_xor:
1764         case '^': PUSH_TOKEN (CPP_XOR); break;
1765         make_open_brace:
1766         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1767         make_close_brace:
1768         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1769         make_open_square:
1770         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1771         make_close_square:
1772         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1773         make_backslash:
1774         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1775         case '!': PUSH_TOKEN (CPP_NOT); break;
1776         case ',': PUSH_TOKEN (CPP_COMMA); break;
1777         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1778         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1779         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1780
1781         case '$':
1782           if (CPP_OPTION (pfile, dollars_in_ident))
1783             goto letter;
1784           /* Fall through */
1785         default:
1786           cur_token->val.aux = c;
1787           PUSH_TOKEN (CPP_OTHER);
1788           break;
1789         }
1790     }
1791
1792   /* Run out of token space?  */
1793   if (cur_token == token_limit)
1794     {
1795       list->tokens_used = cur_token - list->tokens;
1796       _cpp_expand_token_space (list, 256);
1797       goto expanded;
1798     }
1799
1800   cur_token->flags = flags;
1801   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1802     {
1803       if (cur > buffer->buf && !is_vspace (cur[-1]))
1804         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1805                                CPP_BUF_COLUMN (buffer, cur),
1806                                "no newline at end of file");
1807       cur_token++->type = CPP_EOF;
1808     }
1809
1810  out:
1811   /* All tokens are allocated, so the memory location is fixed.  */
1812   first = &list->tokens[first_token];
1813
1814   /* Don't complain about the null directive, nor directives in
1815      assembly source: we don't know where the comments are, and # may
1816      introduce assembler pseudo-ops.  Don't complain about invalid
1817      directives in skipped conditional groups (6.10 p4).  */
1818   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1819       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1820     {
1821       if (first[1].type == CPP_NAME)
1822         cpp_error (pfile, "invalid preprocessing directive #%s",
1823                    first[1].val.node->name);
1824       else
1825         cpp_error (pfile, "invalid preprocessing directive");
1826     }
1827
1828   /* Put EOF at end of known directives.  This covers "directives do
1829      not extend beyond the end of the line (description 6.10 part 2)".  */
1830   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1831     {
1832       pfile->first_directive_token = first;
1833       cur_token++->type = CPP_EOF;
1834     }
1835
1836   first->flags |= BOL;
1837   if (first_token != 0)
1838     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1839        up the invocation of a function-like macro, new line is
1840        considered a normal white-space character.  */
1841     first->flags |= PREV_WHITE;
1842
1843   buffer->cur = cur;
1844   list->tokens_used = cur_token - list->tokens;
1845   pfile->in_lex_line = 0;
1846 }
1847
1848 /* Write the spelling of a token TOKEN, with any appropriate
1849    whitespace before it, to FP.  PREV is the previous token, which
1850    is used to determine if we need to shove in an extra space in order
1851    to avoid accidental token paste.  If WHITE is 0, do not insert any
1852    leading whitespace.  */
1853 static void
1854 output_token (pfile, fp, token, prev, white)
1855      cpp_reader *pfile;
1856      FILE *fp;
1857      const cpp_token *token, *prev;
1858      int white;
1859 {
1860   if (white)
1861     {
1862       int dummy;
1863
1864       if (token->col && (token->flags & BOL))
1865         {
1866           /* Supply enough whitespace to put this token in its original
1867              column.  Don't bother trying to reconstruct tabs; we can't
1868              get it right in general, and nothing ought to care.  (Yes,
1869              some things do care; the fault lies with them.)  */
1870           unsigned int spaces = token->col - 1;
1871
1872           while (spaces--)
1873             putc (' ', fp);
1874         }
1875       else if (token->flags & PREV_WHITE)
1876         putc (' ', fp);
1877       else
1878       /* Check for and prevent accidental token pasting.
1879          In addition to the cases handled by can_paste, consider
1880
1881          a + ++b - if there is not a space between the + and ++, it
1882          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1883          a valid token.  */
1884         if (prev
1885             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1886                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1887                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1888         putc (' ', fp);
1889     }
1890
1891   switch (TOKEN_SPELL (token))
1892     {
1893     case SPELL_OPERATOR:
1894       {
1895         const unsigned char *spelling;
1896
1897         if (token->flags & DIGRAPH)
1898           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1899         else if (token->flags & NAMED_OP)
1900           goto spell_ident;
1901         else
1902           spelling = TOKEN_NAME (token);
1903
1904         ufputs (spelling, fp);
1905       }
1906       break;
1907
1908     case SPELL_IDENT:
1909       spell_ident:
1910       ufputs (token->val.node->name, fp);
1911       break;
1912
1913     case SPELL_STRING:
1914       {
1915         int left, right, tag;
1916         switch (token->type)
1917           {
1918           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1919           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1920           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1921           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1922           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1923           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1924           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1925           }
1926         if (tag) putc (tag, fp);
1927         if (left) putc (left, fp);
1928         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1929         if (right) putc (right, fp);
1930       }
1931       break;
1932
1933     case SPELL_CHAR:
1934       putc (token->val.aux, fp);
1935       break;
1936
1937     case SPELL_NONE:
1938       /* Placemarker or EOF - no output.  (Macro args are handled
1939          elsewhere.  */
1940       break;
1941     }
1942 }
1943
1944 /* Dump the original user's spelling of argument index ARG_NO to the
1945    macro whose expansion is LIST.  */
1946 static void
1947 dump_param_spelling (fp, list, arg_no)
1948      FILE *fp;
1949      const cpp_toklist *list;
1950      unsigned int arg_no;
1951 {
1952   const U_CHAR *param = list->namebuf;
1953
1954   while (arg_no--)
1955     param += ustrlen (param) + 1;
1956   ufputs (param, fp);
1957 }
1958
1959 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1960 void
1961 cpp_output_list (pfile, fp, list, token)
1962      cpp_reader *pfile;
1963      FILE *fp;
1964      const cpp_toklist *list;
1965      const cpp_token *token;
1966 {
1967   const cpp_token *limit = list->tokens + list->tokens_used;
1968   const cpp_token *prev = 0;
1969   int white = 0;
1970
1971   while (token < limit)
1972     {
1973       /* XXX Find some way we can write macro args from inside
1974          output_token/spell_token.  */
1975       if (token->type == CPP_MACRO_ARG)
1976         {
1977           if (white && token->flags & PREV_WHITE)
1978             putc (' ', fp);
1979           if (token->flags & STRINGIFY_ARG)
1980             putc ('#', fp);
1981           dump_param_spelling (fp, list, token->val.aux);
1982         }
1983       else
1984         output_token (pfile, fp, token, prev, white);
1985       if (token->flags & PASTE_LEFT)
1986         fputs (" ##", fp);
1987       prev = token;
1988       token++;
1989       white = 1;
1990     }
1991 }
1992
1993
1994 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1995    already contain the enough space to hold the token's spelling.
1996    Returns a pointer to the character after the last character
1997    written.  */
1998
1999 static unsigned char *
2000 spell_token (pfile, token, buffer)
2001      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
2002      const cpp_token *token;
2003      unsigned char *buffer;
2004 {
2005   switch (TOKEN_SPELL (token))
2006     {
2007     case SPELL_OPERATOR:
2008       {
2009         const unsigned char *spelling;
2010         unsigned char c;
2011
2012         if (token->flags & DIGRAPH)
2013           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
2014         else if (token->flags & NAMED_OP)
2015           goto spell_ident;
2016         else
2017           spelling = TOKEN_NAME (token);
2018
2019         while ((c = *spelling++) != '\0')
2020           *buffer++ = c;
2021       }
2022       break;
2023
2024     case SPELL_IDENT:
2025       spell_ident:
2026       memcpy (buffer, token->val.node->name, token->val.node->length);
2027       buffer += token->val.node->length;
2028       break;
2029
2030     case SPELL_STRING:
2031       {
2032         int left, right, tag;
2033         switch (token->type)
2034           {
2035           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
2036           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
2037           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
2038           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
2039           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
2040           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
2041           default:              left = '\0'; right = '\0'; tag = '\0'; break;
2042           }
2043         if (tag) *buffer++ = tag;
2044         if (left) *buffer++ = left;
2045         memcpy (buffer, token->val.str.text, token->val.str.len);
2046         buffer += token->val.str.len;
2047         if (right) *buffer++ = right;
2048       }
2049       break;
2050
2051     case SPELL_CHAR:
2052       *buffer++ = token->val.aux;
2053       break;
2054
2055     case SPELL_NONE:
2056       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
2057       break;
2058     }
2059
2060   return buffer;
2061 }
2062
2063 /* Macro expansion algorithm.
2064
2065 Macro expansion is implemented by a single-pass algorithm; there are
2066 no rescan passes involved.  cpp_get_token expands just enough to be
2067 able to return a token to the caller, a consequence is that when it
2068 returns the preprocessor can be in a state of mid-expansion.  The
2069 algorithm does not work by fully expanding a macro invocation into
2070 some kind of token list, and then returning them one by one.
2071
2072 Our expansion state is recorded in a context stack.  We start out with
2073 a single context on the stack, let's call it base context.  This
2074 consists of the token list returned by lex_line that forms the next
2075 logical line in the source file.
2076
2077 The current level in the context stack is stored in the cur_context
2078 member of the cpp_reader structure.  The context it references keeps,
2079 amongst other things, a count of how many tokens form that context and
2080 our position within those tokens.
2081
2082 Fundamentally, calling cpp_get_token will return the next token from
2083 the current context.  If we're at the end of the current context, that
2084 context is popped from the stack first, unless it is the base context,
2085 in which case the next logical line is lexed from the source file.
2086
2087 However, before returning the token, if it is a CPP_NAME token
2088 _cpp_get_token checks to see if it is a macro and if it is enabled.
2089 Each time it encounters a macro name, it calls push_macro_context.
2090 This function checks that the macro should be expanded (with
2091 is_macro_enabled), and if so pushes a new macro context on the stack
2092 which becomes the current context.  It then loops back to read the
2093 first token of the macro context.
2094
2095 A macro context basically consists of the token list representing the
2096 macro's replacement list, which was saved in the hash table by
2097 save_macro_expansion when its #define statement was parsed.  If the
2098 macro is function-like, it also contains the tokens that form the
2099 arguments to the macro.  I say more about macro arguments below, but
2100 for now just saying that each argument is a set of pointers to tokens
2101 is enough.
2102
2103 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2104 token.  This represents an argument passed to the macro, with the
2105 argument number stored in the token's AUX field.  The argument should
2106 be substituted, this is achieved by pushing an "argument context".  An
2107 argument context is just refers to the tokens forming the argument,
2108 which are obtained directly from the macro context.  The STRINGIFY
2109 flag on a CPP_MACRO_ARG token indicates that the argument should be
2110 stringified.
2111
2112 Here's a few simple rules the context stack obeys:-
2113
2114   1) The lex_line token list is always context zero.
2115
2116   2) Context 1, if it exists, must be a macro context.
2117
2118   3) An argument context can only appear above a macro context.
2119
2120   4) A macro context can appear above the base context, another macro
2121   context, or an argument context.
2122
2123   5) These imply that the minimal level of an argument context is 2.
2124
2125 The only tricky thing left is ensuring that macros are enabled and
2126 disabled correctly.  The algorithm controls macro expansion by the
2127 level of the context a token is taken from in the context stack.  If a
2128 token is taken from a level equal to no_expand_level (a member of
2129 struct cpp_reader), no expansion is performed.
2130
2131 When popping a context off the stack, if no_expand_level equals the
2132 level of the popped context, it is reduced by one to match the new
2133 context level, so that expansion is still disabled.  It does not
2134 increase if a context is pushed, though.  It starts out life as
2135 UINT_MAX, which has the effect that initially macro expansion is
2136 enabled.  I explain how this mechanism works below.
2137
2138 The standard requires:-
2139
2140   1) Arguments to be fully expanded before substitution.
2141
2142   2) Stringified arguments to not be expanded, nor the tokens
2143   immediately surrounding a ## operator.
2144
2145   3) Continual rescanning until there are no more macros left to
2146   replace.
2147
2148   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2149   expanded again during later rescans.  This prevents infinite
2150   recursion.
2151
2152 The first thing to observe is that stage 3) is mostly redundant.
2153 Since a macro is disabled once it has been expanded, how can a rescan
2154 find an unexpanded macro name?  There are only two cases where this is
2155 possible:-
2156
2157   a) If the macro name results from a token paste operation.
2158
2159   b) If the macro in question is a function-like macro that hasn't
2160   already been expanded because previously there was not the required
2161   '(' token immediately following it.  This is only possible when an
2162   argument is substituted, and after substitution the last token of
2163   the argument can bind with a parenthesis appearing in the tokens
2164   following the substitution.  Note that if the '(' appears within the
2165   argument, the ')' must too, as expanding macro arguments cannot
2166   "suck in" tokens outside the argument.
2167
2168 So we tackle this as follows.  When parsing the macro invocation for
2169 arguments, we record the tokens forming each argument as a list of
2170 pointers to those tokens.  We do not expand any tokens that are "raw",
2171 i.e. directly from the macro invocation, but other tokens that come
2172 from (nested) argument substitution are fully expanded.
2173
2174 This is achieved by setting the no_expand_level to that of the macro
2175 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2176 forming an argument, because parse_args (indirectly) calls
2177 get_raw_token which automatically pushes argument contexts and traces
2178 into them.  Since these contexts are at a higher level than the
2179 no_expand_level, they get fully macro expanded.
2180
2181 "Raw" and non-raw tokens are separated in arguments by null pointers,
2182 with the policy that the initial state of an argument is raw.  If the
2183 first token is not raw, it should be preceded by a null pointer.  When
2184 tracing through the tokens of an argument context, each time
2185 get_raw_token encounters a null pointer, it toggles the flag
2186 CONTEXT_RAW.
2187
2188 This flag, when set, indicates to is_macro_disabled that we are
2189 reading raw tokens which should be macro-expanded.  Similarly, if
2190 clear, is_macro_disabled suppresses re-expansion.
2191
2192 It's probably time for an example.
2193
2194 #define hash #
2195 #define str(x) #x
2196 #define xstr(y) str(y hash)
2197 str(hash)                       // "hash"
2198 xstr(hash)                      // "# hash"
2199
2200 In the invocation of str, parse_args turns off macro expansion and so
2201 parses the argument as <hash>.  This is the only token (pointer)
2202 passed as the argument to str.  Since <hash> is raw there is no need
2203 for an initial null pointer.  stringify_arg is called from
2204 get_raw_token when tracing through the expansion of str, since the
2205 argument has the STRINGIFY flag set.  stringify_arg turns off
2206 macro_expansion by setting the no_expand_level to that of the argument
2207 context.  Thus it gets the token <hash> and stringifies it to "hash"
2208 correctly.
2209
2210 Similary xstr is passed <hash>.  However, when parse_args is parsing
2211 the invocation of str() in xstr's expansion, get_raw_token encounters
2212 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2213 an argument context, and enters the tokens of the argument,
2214 i.e. <hash>.  This is at a higher context level than parse_args
2215 disabled, and so is_macro_disabled permits expansion of it and a macro
2216 context is pushed on top of the argument context.  This contains the
2217 <#> token, and the end result is that <hash> is macro expanded.
2218 However, after popping off the argument context, the <hash> of xstr's
2219 expansion does not get macro expanded because we're back at the
2220 no_expand_level.  The end result is that the argument passed to str is
2221 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2222 raw, <#> is not raw, but then <hash> is.
2223
2224 */
2225
2226
2227 /* Free the storage allocated for macro arguments.  */
2228 static void
2229 free_macro_args (args)
2230      macro_args *args;
2231 {
2232   if (args->tokens)
2233     free ((PTR) args->tokens);
2234   free (args->ends);
2235   free (args);
2236 }
2237
2238 /* Determines if a macro has been already used (and is therefore
2239    disabled).  */
2240 static int
2241 is_macro_disabled (pfile, expansion, token)
2242      cpp_reader *pfile;
2243      const cpp_toklist *expansion;
2244      const cpp_token *token;
2245 {
2246   cpp_context *context = CURRENT_CONTEXT (pfile);
2247
2248   /* Don't expand anything if this file has already been preprocessed.  */
2249   if (CPP_OPTION (pfile, preprocessed))
2250     return 1;
2251
2252   /* Arguments on either side of ## are inserted in place without
2253      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2254      occurs during a later rescan pass.  The effect is that we expand
2255      iff we would as part of the macro's expansion list, so we should
2256      drop to the macro's context.  */
2257   if (IS_ARG_CONTEXT (context))
2258     {
2259       if (token->flags & PASTED)
2260         context--;
2261       else if (!(context->flags & CONTEXT_RAW))
2262         return 1;
2263       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2264         context--;
2265     }
2266
2267   /* Have we already used this macro?  */
2268   while (context->level > 0)
2269     {
2270       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2271         return 1;
2272       /* Raw argument tokens are judged based on the token list they
2273          came from.  */
2274       if (context->flags & CONTEXT_RAW)
2275         context = pfile->contexts + context->level;
2276       else
2277         context--;
2278     }
2279
2280   /* Function-like macros may be disabled if the '(' is not in the
2281      current context.  We check this without disrupting the context
2282      stack.  */
2283   if (expansion->paramc >= 0)
2284     {
2285       const cpp_token *next;
2286       unsigned int prev_nme;
2287
2288       context = CURRENT_CONTEXT (pfile);
2289       /* Drop down any contexts we're at the end of: the '(' may
2290          appear in lower macro expansions, or in the rest of the file.  */
2291       while (context->posn == context->count && context > pfile->contexts)
2292         {
2293           context--;
2294           /* If we matched, we are disabled, as we appear in the
2295              expansion of each macro we meet.  */
2296           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2297             return 1;
2298         }
2299
2300       prev_nme = pfile->no_expand_level;
2301       pfile->no_expand_level = context - pfile->contexts;
2302       next = _cpp_get_token (pfile);
2303       restore_macro_expansion (pfile, prev_nme);
2304       if (next->type != CPP_OPEN_PAREN)
2305         {
2306           _cpp_push_token (pfile, next);
2307           if (CPP_WTRADITIONAL (pfile))
2308             cpp_warning (pfile,
2309          "function macro %s must be used with arguments in traditional C",
2310                          token->val.node->name);
2311           return 1;
2312         }
2313     }
2314
2315   return 0;
2316 }
2317
2318 /* Add a token to the set of tokens forming the arguments to the macro
2319    being parsed in parse_args.  */
2320 static void
2321 save_token (args, token)
2322      macro_args *args;
2323      const cpp_token *token;
2324 {
2325   if (args->used == args->capacity)
2326     {
2327       args->capacity += args->capacity + 100;
2328       args->tokens = (const cpp_token **)
2329         xrealloc ((PTR) args->tokens,
2330                   args->capacity * sizeof (const cpp_token *));
2331     }
2332   args->tokens[args->used++] = token;
2333 }
2334
2335 /* Take and save raw tokens until we finish one argument.  Empty
2336    arguments are saved as a single CPP_PLACEMARKER token.  */
2337 static const cpp_token *
2338 parse_arg (pfile, var_args, paren_context, args, pcount)
2339      cpp_reader *pfile;
2340      int var_args;
2341      unsigned int paren_context;
2342      macro_args *args;
2343      unsigned int *pcount;
2344 {
2345   const cpp_token *token;
2346   unsigned int paren = 0, count = 0;
2347   int raw, was_raw = 1;
2348
2349   for (count = 0;; count++)
2350     {
2351       token = _cpp_get_token (pfile);
2352
2353       switch (token->type)
2354         {
2355         default:
2356           break;
2357
2358         case CPP_OPEN_PAREN:
2359           paren++;
2360           break;
2361
2362         case CPP_CLOSE_PAREN:
2363           if (paren-- != 0)
2364             break;
2365           goto out;
2366
2367         case CPP_COMMA:
2368           /* Commas are not terminators within parantheses or var_args.  */
2369           if (paren || var_args)
2370             break;
2371           goto out;
2372
2373         case CPP_EOF:           /* Error reported by caller.  */
2374           goto out;
2375         }
2376
2377       raw = pfile->cur_context <= paren_context;
2378       if (raw != was_raw)
2379         {
2380           was_raw = raw;
2381           save_token (args, 0);
2382           count++;
2383         }
2384       save_token (args, token);
2385     }
2386
2387  out:
2388   if (count == 0)
2389     {
2390       /* Duplicate the placemarker.  Then we can set its flags and
2391          position and safely be using more than one.  */
2392       save_token (args, duplicate_token (pfile, &placemarker_token));
2393       count++;
2394     }
2395
2396   *pcount = count;
2397   return token;
2398 }
2399
2400 /* This macro returns true if the argument starting at offset O of arglist
2401    A is empty - that is, it's either a single PLACEMARKER token, or a null
2402    pointer followed by a PLACEMARKER.  */
2403
2404 #define empty_argument(A, O) \
2405  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2406                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2407
2408 /* Parse the arguments making up a macro invocation.  Nested arguments
2409    are automatically macro expanded, but immediate macros are not
2410    expanded; this enables e.g. operator # to work correctly.  Returns
2411    non-zero on error.  */
2412 static int
2413 parse_args (pfile, hp, args)
2414      cpp_reader *pfile;
2415      cpp_hashnode *hp;
2416      macro_args *args;
2417 {
2418   const cpp_token *token;
2419   const cpp_toklist *macro;
2420   unsigned int total = 0;
2421   unsigned int paren_context = pfile->cur_context;
2422   int argc = 0;
2423
2424   macro = hp->value.expansion;
2425   do
2426     {
2427       unsigned int count;
2428
2429       token = parse_arg (pfile, (argc + 1 == macro->paramc
2430                                  && (macro->flags & VAR_ARGS)),
2431                          paren_context, args, &count);
2432       if (argc < macro->paramc)
2433         {
2434           total += count;
2435           args->ends[argc] = total;
2436         }
2437       argc++;
2438     }
2439   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2440
2441   if (token->type == CPP_EOF)
2442     {
2443       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2444       return 1;
2445     }
2446   else if (argc < macro->paramc)
2447     {
2448       /* A rest argument is allowed to not appear in the invocation at all.
2449          e.g. #define debug(format, args...) ...
2450          debug("string");
2451          This is exactly the same as if the rest argument had received no
2452          tokens - debug("string",);  This extension is deprecated.  */
2453
2454       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2455         {
2456           /* Duplicate the placemarker.  Then we can set its flags and
2457              position and safely be using more than one.  */
2458           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2459           pm->flags = VOID_REST;
2460           save_token (args, pm);
2461           args->ends[argc] = total + 1;
2462
2463           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2464             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2465
2466           return 0;
2467         }
2468       else
2469         {
2470           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2471           return 1;
2472         }
2473     }
2474   /* An empty argument to an empty function-like macro is fine.  */
2475   else if (argc > macro->paramc
2476            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2477     {
2478       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2479       return 1;
2480     }
2481
2482   return 0;
2483 }
2484
2485 /* Adds backslashes before all backslashes and double quotes appearing
2486    in strings.  Non-printable characters are converted to octal.  */
2487 static U_CHAR *
2488 quote_string (dest, src, len)
2489      U_CHAR *dest;
2490      const U_CHAR *src;
2491      unsigned int len;
2492 {
2493   while (len--)
2494     {
2495       U_CHAR c = *src++;
2496
2497       if (c == '\\' || c == '"')
2498         {
2499           *dest++ = '\\';
2500           *dest++ = c;
2501         }
2502       else
2503         {
2504           if (ISPRINT (c))
2505             *dest++ = c;
2506           else
2507             {
2508               sprintf ((char *) dest, "\\%03o", c);
2509               dest += 4;
2510             }
2511         }
2512     }
2513
2514   return dest;
2515 }
2516
2517 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2518    CPP_STRING token containing TEXT in quoted form.  */
2519 static cpp_token *
2520 make_string_token (token, text, len)
2521      cpp_token *token;
2522      const U_CHAR *text;
2523      unsigned int len;
2524 {
2525   U_CHAR *buf;
2526
2527   buf = (U_CHAR *) xmalloc (len * 4);
2528   token->type = CPP_STRING;
2529   token->flags = 0;
2530   token->val.str.text = buf;
2531   token->val.str.len = quote_string (buf, text, len) - buf;
2532   return token;
2533 }
2534
2535 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2536    evaluating to NUMBER.  */
2537 static cpp_token *
2538 alloc_number_token (pfile, number)
2539      cpp_reader *pfile;
2540      int number;
2541 {
2542   cpp_token *result;
2543   char *buf;
2544
2545   result = get_temp_token (pfile);
2546   buf = xmalloc (20);
2547   sprintf (buf, "%d", number);
2548
2549   result->type = CPP_NUMBER;
2550   result->flags = 0;
2551   result->val.str.text = (U_CHAR *) buf;
2552   result->val.str.len = strlen (buf);
2553   return result;
2554 }
2555
2556 /* Returns a temporary token from the temporary token store of PFILE.  */
2557 static cpp_token *
2558 get_temp_token (pfile)
2559      cpp_reader *pfile;
2560 {
2561   if (pfile->temp_used == pfile->temp_alloced)
2562     {
2563       if (pfile->temp_used == pfile->temp_cap)
2564         {
2565           pfile->temp_cap += pfile->temp_cap + 20;
2566           pfile->temp_tokens = (cpp_token **) xrealloc
2567             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2568         }
2569       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2570         (sizeof (cpp_token));
2571     }
2572
2573   return pfile->temp_tokens[pfile->temp_used++];
2574 }
2575
2576 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2577 static void
2578 release_temp_tokens (pfile)
2579      cpp_reader *pfile;
2580 {
2581   while (pfile->temp_used)
2582     {
2583       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2584
2585       if (TOKEN_SPELL (token) == SPELL_STRING)
2586         {
2587           free ((char *) token->val.str.text);
2588           token->val.str.text = 0;
2589         }
2590     }
2591 }
2592
2593 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2594 void
2595 _cpp_free_temp_tokens (pfile)
2596      cpp_reader *pfile;
2597 {
2598   if (pfile->temp_tokens)
2599     {
2600       /* It is possible, though unlikely (looking for '(' of a funlike
2601          macro into EOF), that we haven't released the tokens yet.  */
2602       release_temp_tokens (pfile);
2603       while (pfile->temp_alloced)
2604         free (pfile->temp_tokens[--pfile->temp_alloced]);
2605       free (pfile->temp_tokens);
2606     }
2607
2608   if (pfile->date)
2609     {
2610       free ((char *) pfile->date->val.str.text);
2611       free (pfile->date);
2612       free ((char *) pfile->time->val.str.text);
2613       free (pfile->time);
2614     }
2615 }
2616
2617 /* Copy TOKEN into a temporary token from PFILE's store.  */
2618 static cpp_token *
2619 duplicate_token (pfile, token)
2620      cpp_reader *pfile;
2621      const cpp_token *token;
2622 {
2623   cpp_token *result = get_temp_token (pfile);
2624
2625   *result = *token;
2626   if (TOKEN_SPELL (token) == SPELL_STRING)
2627     {
2628       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2629       memcpy (buff, token->val.str.text, token->val.str.len);
2630       result->val.str.text = buff;
2631     }
2632   return result;
2633 }
2634
2635 /* Determine whether two tokens can be pasted together, and if so,
2636    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2637    be pasted, or the appropriate type for the merged token if they
2638    can.  */
2639 static enum cpp_ttype
2640 can_paste (pfile, token1, token2, digraph)
2641      cpp_reader * pfile;
2642      const cpp_token *token1, *token2;
2643      int* digraph;
2644 {
2645   enum cpp_ttype a = token1->type, b = token2->type;
2646   int cxx = CPP_OPTION (pfile, cplusplus);
2647
2648   /* Treat named operators as if they were ordinary NAMEs.  */
2649   if (token1->flags & NAMED_OP)
2650     a = CPP_NAME;
2651   if (token2->flags & NAMED_OP)
2652     b = CPP_NAME;
2653
2654   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2655     return a + (CPP_EQ_EQ - CPP_EQ);
2656
2657   switch (a)
2658     {
2659     case CPP_GREATER:
2660       if (b == a) return CPP_RSHIFT;
2661       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2662       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2663       break;
2664     case CPP_LESS:
2665       if (b == a) return CPP_LSHIFT;
2666       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2667       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2668       if (CPP_OPTION (pfile, digraphs))
2669         {
2670           if (b == CPP_COLON)
2671             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2672           if (b == CPP_MOD)
2673             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2674         }
2675       break;
2676
2677     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2678     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2679     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2680
2681     case CPP_MINUS:
2682       if (b == a)               return CPP_MINUS_MINUS;
2683       if (b == CPP_GREATER)     return CPP_DEREF;
2684       break;
2685     case CPP_COLON:
2686       if (b == a && cxx)        return CPP_SCOPE;
2687       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2688         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2689       break;
2690
2691     case CPP_MOD:
2692       if (CPP_OPTION (pfile, digraphs))
2693         {
2694           if (b == CPP_GREATER)
2695             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2696           if (b == CPP_COLON)
2697             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2698         }
2699       break;
2700     case CPP_DEREF:
2701       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2702       break;
2703     case CPP_DOT:
2704       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2705       if (b == CPP_NUMBER)      return CPP_NUMBER;
2706       break;
2707
2708     case CPP_HASH:
2709       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2710         /* %:%: digraph */
2711         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2712       break;
2713
2714     case CPP_NAME:
2715       if (b == CPP_NAME)        return CPP_NAME;
2716       if (b == CPP_NUMBER
2717           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2718       if (b == CPP_CHAR
2719           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2720       if (b == CPP_STRING
2721           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2722       break;
2723
2724     case CPP_NUMBER:
2725       if (b == CPP_NUMBER)      return CPP_NUMBER;
2726       if (b == CPP_NAME)        return CPP_NUMBER;
2727       if (b == CPP_DOT)         return CPP_NUMBER;
2728       /* Numbers cannot have length zero, so this is safe.  */
2729       if ((b == CPP_PLUS || b == CPP_MINUS)
2730           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2731         return CPP_NUMBER;
2732       break;
2733
2734     case CPP_OTHER:
2735       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2736         {
2737           if (b == CPP_NAME)    return CPP_NAME;
2738           if (b == CPP_STRING)  return CPP_OSTRING;
2739         }
2740
2741     default:
2742       break;
2743     }
2744
2745   return CPP_EOF;
2746 }
2747
2748 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2749 static const cpp_token *
2750 maybe_paste_with_next (pfile, token)
2751      cpp_reader *pfile;
2752      const cpp_token *token;
2753 {
2754   cpp_token *pasted;
2755   const cpp_token *second;
2756   cpp_context *context = CURRENT_CONTEXT (pfile);
2757
2758   /* Is this token on the LHS of ## ? */
2759
2760   while ((token->flags & PASTE_LEFT)
2761          || ((context->flags & CONTEXT_PASTEL)
2762              && context->posn == context->count))
2763     {
2764       /* Suppress macro expansion for next token, but don't conflict
2765          with the other method of suppression.  If it is an argument,
2766          macro expansion within the argument will still occur.  */
2767       pfile->paste_level = pfile->cur_context;
2768       second = _cpp_get_token (pfile);
2769       pfile->paste_level = 0;
2770
2771       /* Ignore placemarker argument tokens (cannot be from an empty
2772          macro since macros are not expanded).  */
2773       if (token->type == CPP_PLACEMARKER)
2774         pasted = duplicate_token (pfile, second);
2775       else if (second->type == CPP_PLACEMARKER)
2776         {
2777           /* GCC has special extended semantics for , ## b where b is
2778              a varargs parameter: the comma disappears if b was given
2779              no actual arguments (not merely if b is an empty
2780              argument).  */
2781           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2782             pasted = duplicate_token (pfile, second);
2783           else
2784             pasted = duplicate_token (pfile, token);
2785         }
2786       else
2787         {
2788           int digraph = 0;
2789           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2790
2791           if (type == CPP_EOF)
2792             {
2793               if (CPP_OPTION (pfile, warn_paste))
2794                 {
2795                   /* Do not complain about , ## <whatever> if
2796                      <whatever> came from a variable argument, because
2797                      the author probably intended the ## to trigger
2798                      the special extended semantics (see above).  */
2799                   if (token->type == CPP_COMMA
2800                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2801                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2802                     /* no warning */;
2803                   else
2804                     cpp_warning (pfile,
2805                         "pasting would not give a valid preprocessing token");
2806                 }
2807               _cpp_push_token (pfile, second);
2808               return token;
2809             }
2810
2811           if (type == CPP_NAME || type == CPP_NUMBER)
2812             {
2813               /* Join spellings.  */
2814               U_CHAR *buf, *end;
2815
2816               pasted = get_temp_token (pfile);
2817               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2818               end = spell_token (pfile, token, buf);
2819               end = spell_token (pfile, second, end);
2820               *end = '\0';
2821
2822               if (type == CPP_NAME)
2823                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2824               else
2825                 {
2826                   pasted->val.str.text = uxstrdup (buf);
2827                   pasted->val.str.len = end - buf;
2828                 }
2829             }
2830           else if (type == CPP_WCHAR || type == CPP_WSTRING
2831                    || type == CPP_OSTRING)
2832             pasted = duplicate_token (pfile, second);
2833           else
2834             {
2835               pasted = get_temp_token (pfile);
2836               pasted->val.integer = 0;
2837             }
2838
2839           pasted->type = type;
2840           pasted->flags = digraph ? DIGRAPH : 0;
2841
2842           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2843             {
2844               pasted->type = pasted->val.node->value.code;
2845               pasted->flags |= NAMED_OP;
2846             }
2847         }
2848
2849       /* The pasted token gets the whitespace flags and position of the
2850          first token, the PASTE_LEFT flag of the second token, plus the
2851          PASTED flag to indicate it is the result of a paste.  However, we
2852          want to preserve the DIGRAPH flag.  */
2853       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2854       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2855                         | (second->flags & PASTE_LEFT) | PASTED);
2856       pasted->col = token->col;
2857       pasted->line = token->line;
2858
2859       /* See if there is another token to be pasted onto the one we just
2860          constructed.  */
2861       token = pasted;
2862       context = CURRENT_CONTEXT (pfile);
2863       /* and loop */
2864     }
2865   return token;
2866 }
2867
2868 /* Convert a token sequence to a single string token according to the
2869    rules of the ISO C #-operator.  */
2870 #define INIT_SIZE 200
2871 static cpp_token *
2872 stringify_arg (pfile, token)
2873      cpp_reader *pfile;
2874      const cpp_token *token;
2875 {
2876   cpp_token *result;
2877   unsigned char *main_buf;
2878   unsigned int prev_value, backslash_count = 0;
2879   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2880
2881   push_arg_context (pfile, token);
2882   prev_value  = prevent_macro_expansion (pfile);
2883   main_buf = (unsigned char *) xmalloc (buf_cap);
2884
2885   result = get_temp_token (pfile);
2886   ASSIGN_FLAGS_AND_POS (result, token);
2887
2888   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2889     {
2890       int escape;
2891       unsigned char *buf;
2892       unsigned int len = TOKEN_LEN (token);
2893
2894       if (token->type == CPP_PLACEMARKER)
2895         continue;
2896
2897       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2898                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2899       if (escape)
2900         len *= 4 + 1;
2901
2902       if (buf_used + len > buf_cap)
2903         {
2904           buf_cap = buf_used + len + INIT_SIZE;
2905           main_buf = xrealloc (main_buf, buf_cap);
2906         }
2907
2908       if (whitespace && (token->flags & PREV_WHITE))
2909         main_buf[buf_used++] = ' ';
2910
2911       if (escape)
2912         buf = (unsigned char *) xmalloc (len);
2913       else
2914         buf = main_buf + buf_used;
2915
2916       len = spell_token (pfile, token, buf) - buf;
2917       if (escape)
2918         {
2919           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2920           free (buf);
2921         }
2922       else
2923         buf_used += len;
2924
2925       whitespace = 1;
2926       if (token->type == CPP_BACKSLASH)
2927         backslash_count++;
2928       else
2929         backslash_count = 0;
2930     }
2931
2932   /* Ignore the final \ of invalid string literals.  */
2933   if (backslash_count & 1)
2934     {
2935       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2936       buf_used--;
2937     }
2938
2939   result->type = CPP_STRING;
2940   result->val.str.text = main_buf;
2941   result->val.str.len = buf_used;
2942   restore_macro_expansion (pfile, prev_value);
2943   return result;
2944 }
2945
2946 /* Allocate more room on the context stack of PFILE.  */
2947 static void
2948 expand_context_stack (pfile)
2949      cpp_reader *pfile;
2950 {
2951   pfile->context_cap += pfile->context_cap + 20;
2952   pfile->contexts = (cpp_context *)
2953     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2954 }
2955
2956 /* Push the context of macro NODE onto the context stack.  TOKEN is
2957    the CPP_NAME token invoking the macro.  */
2958 static int
2959 push_macro_context (pfile, token)
2960      cpp_reader *pfile;
2961      const cpp_token *token;
2962 {
2963   unsigned char orig_flags;
2964   macro_args *args;
2965   cpp_context *context;
2966   cpp_hashnode *node = token->val.node;
2967
2968   /* Token's flags may change when parsing args containing a nested
2969      invocation of this macro.  */
2970   orig_flags = token->flags & (PREV_WHITE | BOL);
2971   args = 0;
2972   if (node->value.expansion->paramc >= 0)
2973     {
2974       unsigned int error, prev_nme;
2975
2976       /* Allocate room for the argument contexts, and parse them.  */
2977       args  = (macro_args *) xmalloc (sizeof (macro_args));
2978       args->ends = (unsigned int *)
2979         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2980       args->tokens = 0;
2981       args->capacity = 0;
2982       args->used = 0;
2983       args->level = pfile->cur_context;
2984
2985       prev_nme = prevent_macro_expansion (pfile);
2986       pfile->args = args;
2987       error = parse_args (pfile, node, args);
2988       pfile->args = 0;
2989       restore_macro_expansion (pfile, prev_nme);
2990       if (error)
2991         {
2992           free_macro_args (args);
2993           return 1;
2994         }
2995     }
2996
2997   /* Now push its context.  */
2998   pfile->cur_context++;
2999   if (pfile->cur_context == pfile->context_cap)
3000     expand_context_stack (pfile);
3001
3002   context = CURRENT_CONTEXT (pfile);
3003   context->u.list = node->value.expansion;
3004   context->args = args;
3005   context->posn = 0;
3006   context->count = context->u.list->tokens_used;
3007   context->level = pfile->cur_context;
3008   context->flags = 0;
3009   context->pushed_token = 0;
3010
3011   /* Set the flags of the first token.  We know there must
3012      be one, empty macros are a single placemarker token.  */
3013   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
3014
3015   return 0;
3016 }
3017
3018 /* Push an argument to the current macro onto the context stack.
3019    TOKEN is the MACRO_ARG token representing the argument expansion.  */
3020 static void
3021 push_arg_context (pfile, token)
3022      cpp_reader *pfile;
3023      const cpp_token *token;
3024 {
3025   cpp_context *context;
3026   macro_args *args;
3027
3028   pfile->cur_context++;
3029   if (pfile->cur_context == pfile->context_cap)
3030       expand_context_stack (pfile);
3031
3032   context = CURRENT_CONTEXT (pfile);
3033   args = context[-1].args;
3034
3035   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
3036   context->u.arg = args->tokens + context->count;
3037   context->count = args->ends[token->val.aux] - context->count;
3038   context->args = 0;
3039   context->posn = 0;
3040   context->level = args->level;
3041   context->flags = CONTEXT_ARG | CONTEXT_RAW;
3042   context->pushed_token = 0;
3043
3044   /* Set the flags of the first token.  There is one.  */
3045   {
3046     const cpp_token *first = context->u.arg[0];
3047     if (!first)
3048       first = context->u.arg[1];
3049
3050     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3051                           token->flags & (PREV_WHITE | BOL));
3052   }
3053
3054   if (token->flags & PASTE_LEFT)
3055     context->flags |= CONTEXT_PASTEL;
3056   if (pfile->paste_level)
3057     context->flags |= CONTEXT_PASTER;
3058 }
3059
3060 /* "Unget" a token.  It is effectively inserted in the token queue and
3061    will be returned by the next call to get_raw_token.  */
3062 void
3063 _cpp_push_token (pfile, token)
3064      cpp_reader *pfile;
3065      const cpp_token *token;
3066 {
3067   cpp_context *context = CURRENT_CONTEXT (pfile);
3068
3069   if (context->posn > 0)
3070     {
3071       const cpp_token *prev;
3072       if (IS_ARG_CONTEXT (context))
3073         prev = context->u.arg[context->posn - 1];
3074       else
3075         prev = &context->u.list->tokens[context->posn - 1];
3076
3077       if (prev == token)
3078         {
3079           context->posn--;
3080           return;
3081         }
3082     }
3083
3084   if (context->pushed_token)
3085     cpp_ice (pfile, "two tokens pushed in a row");
3086   if (token->type != CPP_EOF)
3087     context->pushed_token = token;
3088   /* Don't push back a directive's CPP_EOF, step back instead.  */
3089   else if (pfile->cur_context == 0)
3090     pfile->contexts[0].posn--;
3091 }
3092
3093 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3094    introducing the directive.  */
3095 static void
3096 process_directive (pfile, token)
3097      cpp_reader *pfile;
3098      const cpp_token *token;
3099 {
3100   const struct directive *d = pfile->token_list.directive;
3101   int prev_nme = 0;
3102
3103   /* Skip over the directive name.  */
3104   if (token[1].type == CPP_NAME)
3105     _cpp_get_raw_token (pfile);
3106   else if (token[1].type != CPP_NUMBER)
3107     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
3108
3109   if (! (d->flags & EXPAND))
3110     prev_nme = prevent_macro_expansion (pfile);
3111   (void) (*d->handler) (pfile);
3112   if (! (d->flags & EXPAND))
3113     restore_macro_expansion (pfile, prev_nme);
3114   _cpp_skip_rest_of_line (pfile);
3115 }
3116
3117 /* The external interface to return the next token.  All macro
3118    expansion and directive processing is handled internally, the
3119    caller only ever sees the output after preprocessing.  */
3120 const cpp_token *
3121 cpp_get_token (pfile)
3122      cpp_reader *pfile;
3123 {
3124   const cpp_token *token;
3125   /* Loop till we hit a non-directive, non-placemarker token.  */
3126   for (;;)
3127     {
3128       token = _cpp_get_token (pfile);
3129
3130       if (token->type == CPP_PLACEMARKER)
3131         continue;
3132
3133       if (token->type == CPP_HASH && token->flags & BOL
3134           && pfile->token_list.directive)
3135         {
3136           process_directive (pfile, token);
3137           continue;
3138         }
3139
3140       return token;
3141     }
3142 }
3143
3144 /* The internal interface to return the next token.  There are two
3145    differences between the internal and external interfaces: the
3146    internal interface may return a PLACEMARKER token, and it does not
3147    process directives.  */
3148 const cpp_token *
3149 _cpp_get_token (pfile)
3150      cpp_reader *pfile;
3151 {
3152   const cpp_token *token, *old_token;
3153   cpp_hashnode *node;
3154
3155   /* Loop until we hit a non-macro token.  */
3156   for (;;)
3157     {
3158       token = get_raw_token (pfile);
3159
3160       /* Short circuit EOF. */
3161       if (token->type == CPP_EOF)
3162         return token;
3163
3164       /* If we are skipping... */
3165       if (pfile->skipping)
3166         {
3167           /* we still have to process directives,  */
3168           if (pfile->token_list.directive)
3169             return token;
3170
3171           /* but everything else is ignored.  */
3172           _cpp_skip_rest_of_line (pfile);
3173           continue;
3174         }
3175
3176       /* If there's a potential control macro and we get here, then that
3177          #ifndef didn't cover the entire file and its argument shouldn't
3178          be taken as a control macro.  */
3179       pfile->potential_control_macro = 0;
3180
3181       old_token = token;
3182
3183       /* See if there's a token to paste with this one.  */
3184       if (!pfile->paste_level)
3185         token = maybe_paste_with_next (pfile, token);
3186
3187       /* If it isn't a macro, return it now.  */
3188       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3189         return token;
3190
3191       /* Is macro expansion disabled in general, or are we in the
3192          middle of a token paste, or was this token just pasted?
3193          (Note we don't check token->flags & PASTED, because that
3194          counts tokens that were pasted at some point in the past,
3195          we're only interested in tokens that were pasted by this call
3196          to maybe_paste_with_next.)  */
3197       if (pfile->no_expand_level == pfile->cur_context
3198           || pfile->paste_level
3199           || (token != old_token
3200               && pfile->no_expand_level + 1 == pfile->cur_context))
3201         return token;
3202
3203       node = token->val.node;
3204       if (node->type != T_MACRO)
3205         return special_symbol (pfile, node, token);
3206
3207       if (is_macro_disabled (pfile, node->value.expansion, token))
3208         return token;
3209
3210       if (pfile->cur_context > CPP_STACK_MAX)
3211         {
3212           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3213           return token;
3214         }
3215
3216       if (push_macro_context (pfile, token))
3217         return token;
3218       /* else loop */
3219     }
3220 }
3221
3222 /* Returns the next raw token, i.e. without performing macro
3223    expansion.  Argument contexts are automatically entered.  */
3224 static const cpp_token *
3225 get_raw_token (pfile)
3226      cpp_reader *pfile;
3227 {
3228   const cpp_token *result;
3229   cpp_context *context;
3230
3231   for (;;)
3232     {
3233       context = CURRENT_CONTEXT (pfile);
3234       if (context->pushed_token)
3235         {
3236           result = context->pushed_token;
3237           context->pushed_token = 0;
3238           return result;        /* Cannot be a CPP_MACRO_ARG */
3239         }
3240       else if (context->posn == context->count)
3241         {
3242           if (pop_context (pfile))
3243             return &eof_token;
3244           continue;
3245         }
3246       else if (IS_ARG_CONTEXT (context))
3247         {
3248           result = context->u.arg[context->posn++];
3249           if (result == 0)
3250             {
3251               context->flags ^= CONTEXT_RAW;
3252               result = context->u.arg[context->posn++];
3253             }
3254           return result;        /* Cannot be a CPP_MACRO_ARG */
3255         }
3256
3257       result = &context->u.list->tokens[context->posn++];
3258
3259       if (result->type != CPP_MACRO_ARG)
3260         return result;
3261
3262       if (result->flags & STRINGIFY_ARG)
3263         return stringify_arg (pfile, result);
3264
3265       push_arg_context (pfile, result);
3266     }
3267 }
3268
3269 /* Internal interface to get the token without macro expanding.  */
3270 const cpp_token *
3271 _cpp_get_raw_token (pfile)
3272      cpp_reader *pfile;
3273 {
3274   int prev_nme = prevent_macro_expansion (pfile);
3275   const cpp_token *result = _cpp_get_token (pfile);
3276   restore_macro_expansion (pfile, prev_nme);
3277   return result;
3278 }
3279
3280 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3281    list should be overwritten, or zero if we need to append
3282    (typically, if we are within the arguments to a macro, or looking
3283    for the '(' to start a function-like macro invocation).  */
3284 static int
3285 lex_next (pfile, clear)
3286      cpp_reader *pfile;
3287      int clear;
3288 {
3289   cpp_toklist *list = &pfile->token_list;
3290   const cpp_token *old_list = list->tokens;
3291   unsigned int old_used = list->tokens_used;
3292
3293   if (clear)
3294     {
3295       /* Release all temporary tokens.  */
3296       _cpp_clear_toklist (list);
3297       pfile->contexts[0].posn = 0;
3298       if (pfile->temp_used)
3299         release_temp_tokens (pfile);
3300     }
3301   lex_line (pfile, list);
3302   pfile->contexts[0].count = list->tokens_used;
3303
3304   if (!clear && pfile->args)
3305     {
3306       /* Fix up argument token pointers.  */
3307       if (old_list != list->tokens)
3308         {
3309           unsigned int i;
3310
3311           for (i = 0; i < pfile->args->used; i++)
3312             {
3313               const cpp_token *token = pfile->args->tokens[i];
3314               if (token >= old_list && token < old_list + old_used)
3315                 pfile->args->tokens[i] = (const cpp_token *)
3316                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3317             }
3318         }
3319
3320       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3321          tokens within the list of arguments that would otherwise act as
3322          preprocessing directives, the behavior is undefined.
3323
3324          This implementation will report a hard error and treat the
3325          'sequence of preprocessing tokens' as part of the macro argument,
3326          not a directive.
3327
3328          Note if pfile->args == 0, we're OK since we're only inside a
3329          macro argument after a '('.  */
3330       if (list->directive)
3331         {
3332           cpp_error_with_line (pfile, list->tokens[old_used].line,
3333                                list->tokens[old_used].col,
3334                                "#%s may not be used inside a macro argument",
3335                                list->directive->name);
3336           return 1;
3337         }
3338     }
3339
3340   return 0;
3341 }
3342
3343 /* Pops a context off the context stack.  If we're at the bottom, lexes
3344    the next logical line.  Returns EOF if we're at the end of the
3345    argument list to the # operator, or we should not "overflow"
3346    into the rest of the file (e.g. 6.10.3.1.1).  */
3347 static int
3348 pop_context (pfile)
3349      cpp_reader *pfile;
3350 {
3351   cpp_context *context;
3352
3353   if (pfile->cur_context == 0)
3354     {
3355       /* If we are currently processing a directive, do not advance.  6.10
3356          paragraph 2: A new-line character ends the directive even if it
3357          occurs within what would otherwise be an invocation of a
3358          function-like macro.  */
3359       if (pfile->token_list.directive)
3360         return 1;
3361
3362       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3363     }
3364
3365   /* Argument contexts, when parsing args or handling # operator
3366      return CPP_EOF at the end.  */
3367   context = CURRENT_CONTEXT (pfile);
3368   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3369     return 1;
3370
3371   /* Free resources when leaving macro contexts.  */
3372   if (context->args)
3373     free_macro_args (context->args);
3374
3375   if (pfile->cur_context == pfile->no_expand_level)
3376     pfile->no_expand_level--;
3377   pfile->cur_context--;
3378
3379   return 0;
3380 }
3381
3382 /* Turn off macro expansion at the current context level.  */
3383 static unsigned int
3384 prevent_macro_expansion (pfile)
3385      cpp_reader *pfile;
3386 {
3387   unsigned int prev_value = pfile->no_expand_level;
3388   pfile->no_expand_level = pfile->cur_context;
3389   return prev_value;
3390 }
3391
3392 /* Restore macro expansion to its previous state.  */
3393 static void
3394 restore_macro_expansion (pfile, prev_value)
3395      cpp_reader *pfile;
3396      unsigned int prev_value;
3397 {
3398   pfile->no_expand_level = prev_value;
3399 }
3400
3401 /* Used by cpperror.c to obtain the correct line and column to report
3402    in a diagnostic.  */
3403 unsigned int
3404 _cpp_get_line (pfile, pcol)
3405      cpp_reader *pfile;
3406      unsigned int *pcol;
3407 {
3408   unsigned int index;
3409   const cpp_token *cur_token;
3410
3411   if (pfile->in_lex_line)
3412     index = pfile->token_list.tokens_used;
3413   else
3414     index = pfile->contexts[0].posn;
3415
3416   if (index == 0)
3417     {
3418       if (pcol)
3419         *pcol = 0;
3420       return 0;
3421     }
3422
3423   cur_token = &pfile->token_list.tokens[index - 1];
3424   if (pcol)
3425     *pcol = cur_token->col;
3426   return cur_token->line;
3427 }
3428
3429 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3430 static const char * const monthnames[] =
3431 {
3432   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3433   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3434 };
3435
3436 /* Handle builtin macros like __FILE__.  */
3437 static const cpp_token *
3438 special_symbol (pfile, node, token)
3439      cpp_reader *pfile;
3440      cpp_hashnode *node;
3441      const cpp_token *token;
3442 {
3443   cpp_token *result;
3444   cpp_buffer *ip;
3445
3446   switch (node->type)
3447     {
3448     case T_FILE:
3449     case T_BASE_FILE:
3450       {
3451         const char *file;
3452
3453         ip = CPP_BUFFER (pfile);
3454         if (ip == 0)
3455           file = "";
3456         else
3457           {
3458             if (node->type == T_BASE_FILE)
3459               while (CPP_PREV_BUFFER (ip) != NULL)
3460                 ip = CPP_PREV_BUFFER (ip);
3461
3462             file = ip->nominal_fname;
3463           }
3464         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3465                                     strlen (file));
3466       }
3467       break;
3468
3469     case T_INCLUDE_LEVEL:
3470       /* pfile->include_depth counts the primary source as level 1,
3471          but historically __INCLUDE_DEPTH__ has called the primary
3472          source level 0.  */
3473       result = alloc_number_token (pfile, pfile->include_depth - 1);
3474       break;
3475
3476     case T_SPECLINE:
3477       /* If __LINE__ is embedded in a macro, it must expand to the
3478          line of the macro's invocation, not its definition.
3479          Otherwise things like assert() will not work properly.  */
3480       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3481       break;
3482
3483     case T_STDC:
3484       {
3485         int stdc = 1;
3486
3487 #ifdef STDC_0_IN_SYSTEM_HEADERS
3488         if (CPP_IN_SYSTEM_HEADER (pfile)
3489             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3490           stdc = 0;
3491 #endif
3492         result = alloc_number_token (pfile, stdc);
3493       }
3494       break;
3495
3496     case T_DATE:
3497     case T_TIME:
3498       if (pfile->date == 0)
3499         {
3500           /* Allocate __DATE__ and __TIME__ from permanent storage,
3501              and save them in pfile so we don't have to do this again.
3502              We don't generate these strings at init time because
3503              time() and localtime() are very slow on some systems.  */
3504           time_t tt = time (NULL);
3505           struct tm *tb = localtime (&tt);
3506
3507           pfile->date = make_string_token
3508             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3509           pfile->time = make_string_token
3510             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3511
3512           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3513                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3514           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3515                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3516         }
3517       result = node->type == T_DATE ? pfile->date: pfile->time;
3518       break;
3519
3520     case T_POISON:
3521       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3522       return token;
3523
3524     default:
3525       cpp_ice (pfile, "invalid special hash type");
3526       return token;
3527     }
3528
3529   ASSIGN_FLAGS_AND_POS (result, token);
3530   return result;
3531 }
3532 #undef DSC
3533
3534 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3535    if it hasn't happened already.  */
3536
3537 void
3538 _cpp_init_input_buffer (pfile)
3539      cpp_reader *pfile;
3540 {
3541   cpp_context *base;
3542
3543   init_trigraph_map ();
3544   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3545   pfile->no_expand_level = UINT_MAX;
3546   pfile->context_cap = 20;
3547   pfile->cur_context = 0;
3548
3549   pfile->contexts = (cpp_context *)
3550     xmalloc (pfile->context_cap * sizeof (cpp_context));
3551
3552   /* Clear the base context.  */
3553   base = &pfile->contexts[0];
3554   base->u.list = &pfile->token_list;
3555   base->posn = 0;
3556   base->count = 0;
3557   base->args = 0;
3558   base->level = 0;
3559   base->flags = 0;
3560   base->pushed_token = 0;
3561 }
3562
3563 /* Moves to the end of the directive line, popping contexts as
3564    necessary.  */
3565 void
3566 _cpp_skip_rest_of_line (pfile)
3567      cpp_reader *pfile;
3568 {
3569   /* Discard all stacked contexts.  */
3570   int i;
3571   for (i = pfile->cur_context; i > 0; i--)
3572     if (pfile->contexts[i].args)
3573       free_macro_args (pfile->contexts[i].args);
3574
3575   if (pfile->no_expand_level <= pfile->cur_context)
3576     pfile->no_expand_level = 0;
3577   pfile->cur_context = 0;
3578
3579   /* Clear the base context, and clear the directive pointer so that
3580      get_raw_token will advance to the next line.  */
3581   pfile->contexts[0].count = 0;
3582   pfile->contexts[0].posn = 0;
3583   pfile->token_list.directive = 0;
3584 }
3585
3586 /* Directive handler wrapper used by the command line option
3587    processor.  */
3588 void
3589 _cpp_run_directive (pfile, dir, buf, count)
3590      cpp_reader *pfile;
3591      const struct directive *dir;
3592      const char *buf;
3593      size_t count;
3594 {
3595   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3596     {
3597       unsigned int prev_lvl = 0;
3598
3599       /* Scan the line now, else prevent_macro_expansion won't work.  */
3600       lex_next (pfile, 1);
3601       if (! (dir->flags & EXPAND))
3602         prev_lvl = prevent_macro_expansion (pfile);
3603
3604       (void) (*dir->handler) (pfile);
3605
3606       if (! (dir->flags & EXPAND))
3607         restore_macro_expansion (pfile, prev_lvl);
3608
3609       _cpp_skip_rest_of_line (pfile);
3610       cpp_pop_buffer (pfile);
3611     }
3612 }