gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o Replace strncmp with memcmp almost everywhere.
  29 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  30 o Distinguish integers, floats, and 'other' pp-numbers.
  31 o Store ints and char constants as binary values.
  32 o New command-line assertion syntax.
  33 o Work towards functions in cpperror.c taking a message level parameter.
  34   If we do this, merge the common code of do_warning and do_error.
  35 o Comment all functions, and describe macro expansion algorithm.
  36 o Move as much out of header files as possible.
  37 o Remove single quote pairs `', and some '', from diagnostics.
  38 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  39
  40 */
  41
  42 #include "config.h"
  43 #include "system.h"
  44 #include "intl.h"
  45 #include "cpplib.h"
  46 #include "cpphash.h"
  47 #include "symcat.h"
  48
  49 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  50 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  51
  52 /* Flags for cpp_context.  */
  53 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  54 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  55 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  56 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  57
  58 typedef struct cpp_context cpp_context;
  59 struct cpp_context
  60 {
  61   union
  62   {
  63     const cpp_toklist *list;    /* Used for macro contexts only.  */
  64     const cpp_token **arg;      /* Used for arg contexts only.  */
  65   } u;
  66
  67   /* Pushed token to be returned by next call to get_raw_token.  */
  68   const cpp_token *pushed_token;
  69
  70   struct macro_args *args;      /* The arguments for a function-like
  71                                    macro.  NULL otherwise.  */
  72   unsigned short posn;          /* Current posn, index into u.  */
  73   unsigned short count;         /* No. of tokens in u.  */
  74   unsigned short level;
  75   unsigned char flags;
  76 };
  77
  78 typedef struct macro_args macro_args;
  79 struct macro_args
  80 {
  81   unsigned int *ends;
  82   const cpp_token **tokens;
  83   unsigned int capacity;
  84   unsigned int used;
  85   unsigned short level;
  86 };
  87
  88 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  89 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  90                                            macro_args *, unsigned int *));
  91 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  92 static void save_token PARAMS ((macro_args *, const cpp_token *));
  93 static int pop_context PARAMS ((cpp_reader *));
  94 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  96 static void free_macro_args PARAMS ((macro_args *));
  97
  98 #define auto_expand_name_space(list) \
  99     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
 100 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
 101                                          unsigned int));
 102 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 103                                          unsigned int));
 104
 105 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 106 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 107                                                 unsigned char *));
 108 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 109                                                      const unsigned char *));
 110 static int skip_block_comment PARAMS ((cpp_reader *));
 111 static int skip_line_comment PARAMS ((cpp_reader *));
 112 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 113 static void skip_whitespace PARAMS ((cpp_reader *, int));
 114 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 115                                    const U_CHAR *, const U_CHAR *));
 116 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 117 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 118                                   unsigned int));
 119 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 120 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 121                                   const unsigned char *,
 122                                   unsigned int, unsigned int));
 123 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 124 static int lex_next PARAMS ((cpp_reader *, int));
 125 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 126                                       const cpp_token *));
 127
 128 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 129 static void expand_context_stack PARAMS ((cpp_reader *));
 130 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 131                                             unsigned char *));
 132 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 133                                   const cpp_token *, int));
 134 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 135                                           cpp_token *));
 136 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 137                                             unsigned int));
 138 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 139 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 140                                                 const cpp_token *));
 141 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 142 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 143                                                        const cpp_token *));
 144 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 145                                          const cpp_token *, int *));
 146 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 147 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 148 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 149 static void release_temp_tokens         PARAMS ((cpp_reader *));
 150 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 151 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 152
 153 #define INIT_TOKEN_STR(list, token) \
 154   do {(token)->val.str.len = 0; \
 155       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 156   } while (0)
 157
 158 #define VALID_SIGN(c, prevc) \
 159   (((c) == '+' || (c) == '-') && \
 160    ((prevc) == 'e' || (prevc) == 'E' \
 161     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 162
 163 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 164    character, if any, is in buffer.  */
 165
 166 #define handle_newline(cur, limit, c) \
 167  do { \
 168   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 169     (cur)++; \
 170   pfile->buffer->lineno++; \
 171   pfile->buffer->line_base = (cur); \
 172   pfile->col_adjust = 0; \
 173  } while (0)
 174
 175 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 176 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 177
 178 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 179 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 180 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 181 #define BACKUP_DIGRAPH(ttype) do { \
 182   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 183
 184 /* An upper bound on the number of bytes needed to spell a token,
 185    including preceding whitespace.  */
 186 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 187 static inline size_t
 188 TOKEN_LEN (token)
 189      const cpp_token *token;
 190 {
 191   size_t len;
 192
 193   switch (TOKEN_SPELL (token))
 194     {
 195     default:            len = 0;                        break;
 196     case SPELL_STRING:  len = token->val.str.len;       break;
 197     case SPELL_IDENT:   len = token->val.node->length;  break;
 198     }
 199   return len + 5;
 200 }
 201
 202 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 203 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 204 #define ON_REST_ARG(c) \
 205  (((c)->flags & VAR_ARGS) \
 206   && (c)->u.list->tokens[(c)->posn].val.aux \
 207       == (unsigned int) ((c)->u.list->paramc - 1))
 208
 209 #define ASSIGN_FLAGS_AND_POS(d, s) \
 210   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 211       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 212   } while (0)
 213
 214 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 215 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 216   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 217       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 218   } while (0)
 219
 220 #define OP(e, s) { SPELL_OPERATOR, U s           },
 221 #define TK(e, s) { s,              U STRINGX (e) },
 222
 223 const struct token_spelling
 224 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 225
 226 #undef OP
 227 #undef TK
 228
 229 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 230    have designated initializers, it can be constant data; otherwise,
 231    it is set up at runtime by _cpp_init_input_buffer.  */
 232
 233 #if (GCC_VERSION >= 2007)
 234 #define init_trigraph_map()  /* nothing */
 235 #define TRIGRAPH_MAP \
 236 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 237 #define END };
 238 #define s(p, v) [p] = v,
 239 #else
 240 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 241  static void init_trigraph_map PARAMS ((void)) { \
 242  unsigned char *x = trigraph_map;
 243 #define END }
 244 #define s(p, v) x[p] = v;
 245 #endif
 246
 247 TRIGRAPH_MAP
 248   s('=', '#')   s(')', ']')     s('!', '|')
 249   s('(', '[')   s('\'', '^')    s('>', '}')
 250   s('/', '\\')  s('<', '{')     s('-', '~')
 251 END
 252
 253 #undef TRIGRAPH_MAP
 254 #undef END
 255 #undef s
 256
 257 /* Notify the compiler proper that the current line number has jumped,
 258    or the current file name has changed.  */
 259
 260 static void
 261 output_line_command (pfile, print, line)
 262      cpp_reader *pfile;
 263      cpp_printer *print;
 264      unsigned int line;
 265 {
 266   cpp_buffer *ip = CPP_BUFFER (pfile);
 267
 268   if (line == 0)
 269     return;
 270
 271   /* End the previous line of text.  */
 272   if (pfile->need_newline)
 273     {
 274       putc ('\n', print->outf);
 275       print->lineno++;
 276     }
 277   pfile->need_newline = 0;
 278
 279   if (CPP_OPTION (pfile, no_line_commands))
 280     return;
 281
 282   /* If the current file has not changed, we can output a few newlines
 283      instead if we want to increase the line number by a small amount.
 284      We cannot do this if print->lineno is zero, because that means we
 285      haven't output any line commands yet.  (The very first line
 286      command output is a `same_file' command.)
 287
 288      'nominal_fname' values are unique, so they can be compared by
 289      comparing pointers.  */
 290   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 291       && line >= print->lineno && line < print->lineno + 8)
 292     {
 293       while (line > print->lineno)
 294         {
 295           putc ('\n', print->outf);
 296           print->lineno++;
 297         }
 298       return;
 299     }
 300
 301   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 302            cpp_syshdr_flags (pfile, ip));
 303
 304   print->last_fname = ip->nominal_fname;
 305   print->lineno = line;
 306 }
 307
 308 /* Like fprintf, but writes to a printer object.  You should be sure
 309    always to generate a complete line when you use this function.  */
 310 void
 311 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 312                      const char *fmt, ...))
 313 {
 314   va_list ap;
 315 #ifndef ANSI_PROTOTYPES
 316   cpp_reader *pfile;
 317   cpp_printer *print;
 318   const char *fmt;
 319 #endif
 320
 321   VA_START (ap, fmt);
 322
 323 #ifndef ANSI_PROTOTYPES
 324   pfile = va_arg (ap, cpp_reader *);
 325   print = va_arg (ap, cpp_printer *);
 326   fmt = va_arg (ap, const char *);
 327 #endif
 328
 329   /* End the previous line of text.  */
 330   if (pfile->need_newline)
 331     putc ('\n', print->outf);
 332   pfile->need_newline = 0;
 333
 334   vfprintf (print->outf, fmt, ap);
 335   va_end (ap);
 336 }
 337
 338 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 339
 340 void
 341 cpp_scan_buffer_nooutput (pfile)
 342      cpp_reader *pfile;
 343 {
 344   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 345   const cpp_token *token;
 346
 347   /* In no-output mode, we can ignore everything but directives.  */
 348   for (;;)
 349     {
 350       token = _cpp_get_token (pfile);
 351
 352       if (token->type == CPP_EOF)
 353         {
 354           cpp_pop_buffer (pfile);
 355           if (CPP_BUFFER (pfile) == stop)
 356             break;
 357         }
 358
 359       if (token->type == CPP_HASH && token->flags & BOL
 360           && pfile->token_list.directive)
 361         {
 362           process_directive (pfile, token);
 363           continue;
 364         }
 365
 366       _cpp_skip_rest_of_line (pfile);
 367     }
 368 }
 369
 370 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 371 void
 372 cpp_scan_buffer (pfile, print)
 373      cpp_reader *pfile;
 374      cpp_printer *print;
 375 {
 376   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 377   const cpp_token *token, *prev = 0;
 378
 379   for (;;)
 380     {
 381       token = _cpp_get_token (pfile);
 382       if (token->type == CPP_EOF)
 383         {
 384           cpp_pop_buffer (pfile);
 385
 386           if (CPP_BUFFER (pfile) == stop)
 387             return;
 388
 389           prev = 0;
 390           continue;
 391         }
 392
 393       if (token->flags & BOL)
 394         {
 395           if (token->type == CPP_HASH && pfile->token_list.directive)
 396             {
 397               process_directive (pfile, token);
 398               continue;
 399             }
 400
 401           output_line_command (pfile, print, pfile->token_list.line);
 402           prev = 0;
 403         }
 404
 405       if (token->type != CPP_PLACEMARKER)
 406         {
 407           output_token (pfile, print->outf, token, prev, 1);
 408           pfile->need_newline = 1;
 409         }
 410
 411       prev = token;
 412     }
 413 }
 414
 415 /* Helper routine used by parse_include, which can't see spell_token.
 416    Reinterpret the current line as an h-char-sequence (< ... >); we are
 417    looking at the first token after the <.  */
 418 const cpp_token *
 419 _cpp_glue_header_name (pfile)
 420      cpp_reader *pfile;
 421 {
 422   const cpp_token *t;
 423   cpp_token *hdr;
 424   U_CHAR *buf, *p;
 425   size_t len, avail;
 426
 427   avail = 40;
 428   len = 0;
 429   buf = xmalloc (avail);
 430
 431   for (;;)
 432     {
 433       t = _cpp_get_token (pfile);
 434       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 435         break;
 436
 437       if (len + TOKEN_LEN (t) > avail)
 438         {
 439           avail = len + TOKEN_LEN (t) + 40;
 440           buf = xrealloc (buf, avail);
 441         }
 442
 443       if (t->flags & PREV_WHITE)
 444         buf[len++] = ' ';
 445
 446       p = spell_token (pfile, t, buf + len);
 447       len = (size_t) (p - buf);  /* p known >= buf */
 448     }
 449
 450   if (t->type == CPP_EOF)
 451     cpp_error (pfile, "missing terminating > character");
 452
 453   buf = xrealloc (buf, len);
 454
 455   hdr = get_temp_token (pfile);
 456   hdr->type = CPP_HEADER_NAME;
 457   hdr->flags = 0;
 458   hdr->val.str.text = buf;
 459   hdr->val.str.len = len;
 460   return hdr;
 461 }
 462
 463 /* Token-buffer helper functions.  */
 464
 465 /* Expand a token list's string space. It is *vital* that
 466    list->tokens_used is correct, to get pointer fix-up right.  */
 467 void
 468 _cpp_expand_name_space (list, len)
 469      cpp_toklist *list;
 470      unsigned int len;
 471 {
 472   const U_CHAR *old_namebuf;
 473
 474   old_namebuf = list->namebuf;
 475   list->name_cap += len;
 476   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 477
 478   /* Fix up token text pointers.  */
 479   if (list->namebuf != old_namebuf)
 480     {
 481       unsigned int i;
 482
 483       for (i = 0; i < list->tokens_used; i++)
 484         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 485           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 486     }
 487 }
 488
 489 /* If there is not enough room for LEN more characters, expand the
 490    list by just enough to have room for LEN characters.  */
 491 void
 492 _cpp_reserve_name_space (list, len)
 493      cpp_toklist *list;
 494      unsigned int len;
 495 {
 496   unsigned int room = list->name_cap - list->name_used;
 497
 498   if (room < len)
 499     _cpp_expand_name_space (list, len - room);
 500 }
 501
 502 /* Expand the number of tokens in a list.  */
 503 void
 504 _cpp_expand_token_space (list, count)
 505      cpp_toklist *list;
 506      unsigned int count;
 507 {
 508   unsigned int n;
 509
 510   list->tokens_cap += count;
 511   n = list->tokens_cap;
 512   if (list->flags & LIST_OFFSET)
 513     list->tokens--, n++;
 514   list->tokens = (cpp_token *)
 515     xrealloc (list->tokens, n * sizeof (cpp_token));
 516   if (list->flags & LIST_OFFSET)
 517     list->tokens++;             /* Skip the dummy.  */
 518 }
 519
 520 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 521    an extra token in front of the token list, as this allows the lexer
 522    to always peek at the previous token without worrying about
 523    underflowing the list, and some initial space.  Otherwise, no
 524    token- or name-space is allocated, and there is no dummy token.  */
 525 void
 526 _cpp_init_toklist (list, flags)
 527      cpp_toklist *list;
 528      int flags;
 529 {
 530   if (flags == NO_DUMMY_TOKEN)
 531     {
 532       list->tokens_cap = 0;
 533       list->tokens = 0;
 534       list->name_cap = 0;
 535       list->namebuf = 0;
 536       list->flags = 0;
 537     }
 538   else
 539     {
 540       /* Initialize token space.  Put a dummy token before the start
 541          that will fail matches.  */
 542       list->tokens_cap = 256;   /* 4K's worth.  */
 543       list->tokens = (cpp_token *)
 544         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 545       list->tokens[0].type = CPP_EOF;
 546       list->tokens++;
 547
 548       /* Initialize name space.  */
 549       list->name_cap = 1024;
 550       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 551       list->flags = LIST_OFFSET;
 552     }
 553
 554   _cpp_clear_toklist (list);
 555 }
 556
 557 /* Clear a token list.  */
 558 void
 559 _cpp_clear_toklist (list)
 560      cpp_toklist *list;
 561 {
 562   list->tokens_used = 0;
 563   list->name_used = 0;
 564   list->directive = 0;
 565   list->paramc = 0;
 566   list->params_len = 0;
 567   list->flags &= LIST_OFFSET;  /* clear all but that one */
 568 }
 569
 570 /* Free a token list.  Does not free the list itself, which may be
 571    embedded in a larger structure.  */
 572 void
 573 _cpp_free_toklist (list)
 574      const cpp_toklist *list;
 575 {
 576   if (list->flags & LIST_OFFSET)
 577     free (list->tokens - 1);    /* Backup over dummy token.  */
 578   else
 579     free (list->tokens);
 580   free (list->namebuf);
 581 }
 582
 583 /* Compare two tokens.  */
 584 int
 585 _cpp_equiv_tokens (a, b)
 586      const cpp_token *a, *b;
 587 {
 588   if (a->type == b->type && a->flags == b->flags)
 589     switch (TOKEN_SPELL (a))
 590       {
 591       default:                  /* Keep compiler happy.  */
 592       case SPELL_OPERATOR:
 593         return 1;
 594       case SPELL_CHAR:
 595       case SPELL_NONE:
 596         return a->val.aux == b->val.aux; /* arg_no or character.  */
 597       case SPELL_IDENT:
 598         return a->val.node == b->val.node;
 599       case SPELL_STRING:
 600         return (a->val.str.len == b->val.str.len
 601                 && !memcmp (a->val.str.text, b->val.str.text,
 602                             a->val.str.len));
 603       }
 604
 605   return 0;
 606 }
 607
 608 /* Compare two token lists.  */
 609 int
 610 _cpp_equiv_toklists (a, b)
 611      const cpp_toklist *a, *b;
 612 {
 613   unsigned int i;
 614
 615   if (a->tokens_used != b->tokens_used
 616       || a->flags != b->flags
 617       || a->paramc != b->paramc)
 618     return 0;
 619
 620   for (i = 0; i < a->tokens_used; i++)
 621     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 622       return 0;
 623   return 1;
 624 }
 625
 626 /* Utility routine:
 627
 628    Compares, the token TOKEN to the NUL-terminated string STRING.
 629    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 630
 631 int
 632 cpp_ideq (token, string)
 633      const cpp_token *token;
 634      const char *string;
 635 {
 636   if (token->type != CPP_NAME)
 637     return 0;
 638
 639   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 640 }
 641
 642 /* Lexing algorithm.
 643
 644  The original lexer in cpplib was made up of two passes: a first pass
 645  that replaced trigraphs and deleted esacped newlines, and a second
 646  pass that tokenized the result of the first pass.  Tokenisation was
 647  performed by peeking at the next character in the input stream.  For
 648  example, if the input stream contained "!=", the handler for the !
 649  character would peek at the next character, and if it were a '='
 650  would skip over it, and return a "!=" token, otherwise it would
 651  return just the "!" token.
 652
 653  To implement a single-pass lexer, this peeking ahead is unworkable.
 654  An arbitrary number of escaped newlines, and trigraphs (in particular
 655  ??/ which translates to the escape \), could separate the '!' and '='
 656  in the input stream, yet the next token is still a "!=".
 657
 658  Suppose instead that we lex by one logical line at a time, producing
 659  a token list or stack for each logical line, and when seeing the '!'
 660  push a CPP_NOT token on the list.  Then if the '!' is part of a
 661  longer token ("!=") we know we must see the remainder of the token by
 662  the time we reach the end of the logical line.  Thus we can have the
 663  '=' handler look at the previous token (at the end of the list / top
 664  of the stack) and see if it is a "!" token, and if so, instead of
 665  pushing a "=" token revise the existing token to be a "!=" token.
 666
 667  This works in the presence of escaped newlines, because the '\' would
 668  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 669  newline ('\n' or '\r') handler looks at the token at the top of the
 670  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 671  Hence the '=' handler would never see any intervening tokens.
 672
 673  To make trigraphs work in this context, as in precedence trigraphs
 674  are highest and converted before anything else, the '?' handler does
 675  lookahead to see if it is a trigraph, and if so skips the trigraph
 676  and pushes the token it represents onto the top of the stack.  This
 677  also works in the particular case of a CPP_BACKSLASH trigraph.
 678
 679  To the preprocessor, whitespace is only significant to the point of
 680  knowing whether whitespace precedes a particular token.  For example,
 681  the '=' handler needs to know whether there was whitespace between it
 682  and a "!" token on the top of the stack, to make the token conversion
 683  decision correctly.  So each token has a PREV_WHITE flag to
 684  indicate this - the standard permits consecutive whitespace to be
 685  regarded as a single space.  The compiler front ends are not
 686  interested in whitespace at all; they just require a token stream.
 687  Another place where whitespace is significant to the preprocessor is
 688  a #define statment - if there is whitespace between the macro name
 689  and an initial "(" token the macro is "object-like", otherwise it is
 690  a function-like macro that takes arguments.
 691
 692  However, all is not rosy.  Parsing of identifiers, numbers, comments
 693  and strings becomes trickier because of the possibility of raw
 694  trigraphs and escaped newlines in the input stream.
 695
 696  The trigraphs are three consecutive characters beginning with two
 697  question marks.  A question mark is not valid as part of a number or
 698  identifier, so parsing of a number or identifier terminates normally
 699  upon reaching it, returning to the mainloop which handles the
 700  trigraph just like it would in any other position.  Similarly for the
 701  backslash of a backslash-newline combination.  So we just need the
 702  escaped-newline dropper in the mainloop to check if the token on the
 703  top of the stack after dropping the escaped newline is a number or
 704  identifier, and if so to continue the processing it as if nothing had
 705  happened.
 706
 707  For strings, we replace trigraphs whenever we reach a quote or
 708  newline, because there might be a backslash trigraph escaping them.
 709  We need to be careful that we start trigraph replacing from where we
 710  left off previously, because it is possible for a first scan to leave
 711  "fake" trigraphs that a second scan would pick up as real (e.g. the
 712  sequence "????/\n=" would find a fake ??= trigraph after removing the
 713  escaped newline.)
 714
 715  For line comments, on reaching a newline we scan the previous
 716  character(s) to see if it escaped, and continue if it is.  Block
 717  comments ignore everything and just focus on finding the comment
 718  termination mark.  The only difficult thing, and it is surprisingly
 719  tricky, is checking if an asterisk precedes the final slash since
 720  they could be separated by escaped newlines.  If the preprocessor is
 721  invoked with the output comments option, we don't bother removing
 722  escaped newlines and replacing trigraphs for output.
 723
 724  Finally, numbers can begin with a period, which is pushed initially
 725  as a CPP_DOT token in its own right.  The digit handler checks if the
 726  previous token was a CPP_DOT not separated by whitespace, and if so
 727  pops it off the stack and pushes a period into the number's buffer
 728  before calling the number parser.
 729
 730 */
 731
 732 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 733                                                     U":>", U"<%", U"%>"};
 734
 735 /* Call when a trigraph is encountered.  It warns if necessary, and
 736    returns true if the trigraph should be honoured.  END is the third
 737    character of a trigraph in the input stream.  */
 738 static int
 739 trigraph_ok (pfile, end)
 740      cpp_reader *pfile;
 741      const unsigned char *end;
 742 {
 743   int accept = CPP_OPTION (pfile, trigraphs);
 744
 745   if (CPP_OPTION (pfile, warn_trigraphs))
 746     {
 747       unsigned int col = end - 1 - pfile->buffer->line_base;
 748       if (accept)
 749         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 750                                "trigraph ??%c converted to %c",
 751                                (int) *end, (int) trigraph_map[*end]);
 752       else
 753         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 754                                "trigraph ??%c ignored", (int) *end);
 755     }
 756   return accept;
 757 }
 758
 759 /* Scan a string for trigraphs, warning or replacing them inline as
 760    appropriate.  When parsing a string, we must call this routine
 761    before processing a newline character (if trigraphs are enabled),
 762    since the newline might be escaped by a preceding backslash
 763    trigraph sequence.  Returns a pointer to the end of the name after
 764    replacement.  */
 765
 766 static unsigned char *
 767 trigraph_replace (pfile, src, limit)
 768      cpp_reader *pfile;
 769      unsigned char *src;
 770      unsigned char *limit;
 771 {
 772   unsigned char *dest;
 773
 774   /* Starting with src[1], find two consecutive '?'.  The case of no
 775      trigraphs is streamlined.  */
 776
 777   for (src++; src + 1 < limit; src += 2)
 778     {
 779       if (src[0] != '?')
 780         continue;
 781
 782       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 783       if (src[-1] == '?')
 784         src--;
 785       else if (src + 2 == limit || src[1] != '?')
 786         continue;
 787
 788       /* Check if it really is a trigraph.  */
 789       if (trigraph_map[src[2]] == 0)
 790         continue;
 791
 792       dest = src;
 793       goto trigraph_found;
 794     }
 795   return limit;
 796
 797   /* Now we have a trigraph, we need to scan the remaining buffer, and
 798      copy-shifting its contents left if replacement is enabled.  */
 799   for (; src + 2 < limit; dest++, src++)
 800     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 801       {
 802       trigraph_found:
 803         src += 2;
 804         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 805           *dest = trigraph_map[*src];
 806       }
 807
 808   /* Copy remaining (at most 2) characters.  */
 809   while (src < limit)
 810     *dest++ = *src++;
 811   return dest;
 812 }
 813
 814 /* If CUR is a backslash or the end of a trigraphed backslash, return
 815    a pointer to its beginning, otherwise NULL.  We don't read beyond
 816    the buffer start, because there is the start of the comment in the
 817    buffer.  */
 818 static const unsigned char *
 819 backslash_start (pfile, cur)
 820      cpp_reader *pfile;
 821      const unsigned char *cur;
 822 {
 823   if (cur[0] == '\\')
 824     return cur;
 825   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 826       && trigraph_ok (pfile, cur))
 827     return cur - 2;
 828   return 0;
 829 }
 830
 831 /* Skip a C-style block comment.  This is probably the trickiest
 832    handler.  We find the end of the comment by seeing if an asterisk
 833    is before every '/' we encounter.  The nasty complication is that a
 834    previous asterisk may be separated by one or more escaped newlines.
 835    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 836 static int
 837 skip_block_comment (pfile)
 838      cpp_reader *pfile;
 839 {
 840   cpp_buffer *buffer = pfile->buffer;
 841   const unsigned char *char_after_star = 0;
 842   const unsigned char *cur = buffer->cur;
 843
 844   for (; cur < buffer->rlimit; )
 845     {
 846       unsigned char c = *cur++;
 847
 848       /* People like decorating comments with '*', so check for
 849          '/' instead for efficiency.  */
 850       if (c == '/')
 851         {
 852           /* Don't view / then * then / as finishing the comment.  */
 853           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 854               || cur - 1 == char_after_star)
 855             {
 856               buffer->cur = cur;
 857               return 0;
 858             }
 859
 860           /* Warn about potential nested comments, but not when
 861              the final character inside the comment is a '/'.
 862              Don't bother to get it right across escaped newlines.  */
 863           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 864               && cur[0] == '*' && cur[1] != '/')
 865             {
 866               buffer->cur = cur;
 867               cpp_warning (pfile, "'/*' within comment");
 868             }
 869         }
 870       else if (is_vspace (c))
 871         {
 872           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 873
 874           handle_newline (cur, buffer->rlimit, c);
 875           /* Work correctly if there is an asterisk before an
 876              arbirtrarily long sequence of escaped newlines.  */
 877           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 878             char_after_star = cur;
 879           else
 880             char_after_star = 0;
 881         }
 882       else if (c == '\t')
 883         adjust_column (pfile, cur - 1);
 884     }
 885
 886   buffer->cur = cur;
 887   return 1;
 888 }
 889
 890 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 891    non-zero if a multiline comment.  */
 892 static int
 893 skip_line_comment (pfile)
 894      cpp_reader *pfile;
 895 {
 896   cpp_buffer *buffer = pfile->buffer;
 897   register const unsigned char *cur = buffer->cur;
 898   int multiline = 0;
 899
 900   for (; cur < buffer->rlimit; )
 901     {
 902       unsigned char c = *cur++;
 903
 904       if (is_vspace (c))
 905         {
 906           /* Check for a (trigaph?) backslash escaping the newline.  */
 907           if (!backslash_start (pfile, cur - 2))
 908             goto out;
 909           multiline = 1;
 910           handle_newline (cur, buffer->rlimit, c);
 911         }
 912     }
 913   cur++;
 914
 915  out:
 916   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 917   return multiline;
 918 }
 919
 920 /* TAB points to a \t character.  Update col_adjust so we track the
 921    column correctly.  */
 922 static void
 923 adjust_column (pfile, tab)
 924      cpp_reader *pfile;
 925      const U_CHAR *tab;
 926 {
 927   /* Zero-based column.  */
 928   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
 929
 930   /* Round it up to multiple of the tabstop, but subtract 1 since the
 931      tab itself occupies a character position.  */
 932   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
 933                         - col % CPP_OPTION (pfile, tabstop)) - 1;
 934 }
 935
 936 /* Skips whitespace, stopping at next non-whitespace character.
 937    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 938    to be assigned the correct column.  */
 939 static void
 940 skip_whitespace (pfile, in_directive)
 941      cpp_reader *pfile;
 942      int in_directive;
 943 {
 944   cpp_buffer *buffer = pfile->buffer;
 945   unsigned short warned = 0;
 946
 947   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 948   while (buffer->cur < buffer->rlimit)
 949     {
 950       unsigned char c = *buffer->cur;
 951
 952       if (!is_nvspace (c))
 953         break;
 954
 955       buffer->cur++;
 956       /* Horizontal space always OK.  */
 957       if (c == ' ')
 958         continue;
 959       else if (c == '\t')
 960         adjust_column (pfile, buffer->cur - 1);
 961       /* Must be \f \v or \0.  */
 962       else if (c == '\0')
 963         {
 964           if (!warned)
 965             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 966                                    CPP_BUF_COL (buffer),
 967                                    "embedded null character ignored");
 968           warned = 1;
 969         }
 970       else if (in_directive && CPP_PEDANTIC (pfile))
 971         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 972                                CPP_BUF_COL (buffer),
 973                                "%s in preprocessing directive",
 974                                c == '\f' ? "form feed" : "vertical tab");
 975     }
 976 }
 977
 978 /* Parse (append) an identifier.  Calculates the hash value of the
 979    token while parsing, for performance.  The algorithm *must* match
 980    cpp_lookup().  */
 981 static const U_CHAR *
 982 parse_name (pfile, tok, cur, rlimit)
 983      cpp_reader *pfile;
 984      cpp_token *tok;
 985      const U_CHAR *cur, *rlimit;
 986 {
 987   const U_CHAR *name;
 988   unsigned int len;
 989   unsigned int r;
 990
 991   name = cur;
 992   r = 0;
 993   while (cur < rlimit)
 994     {
 995       if (! is_idchar (*cur))
 996         break;
 997       /* $ is not a identifier character in the standard, but is
 998          commonly accepted as an extension.  Don't warn about it in
 999          skipped conditional blocks. */
1000       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1001         {
1002           CPP_BUFFER (pfile)->cur = cur;
1003           cpp_pedwarn (pfile, "'$' character in identifier");
1004         }
1005
1006       r = HASHSTEP (r, cur);
1007       cur++;
1008     }
1009   len = cur - name;
1010
1011   if (tok->val.node == 0)
1012     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1013   else
1014     {
1015       unsigned int oldlen = tok->val.node->length;
1016       U_CHAR *newname = alloca (oldlen + len);
1017       memcpy (newname, tok->val.node->name, oldlen);
1018       memcpy (newname + oldlen, name, len);
1019       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1020     }
1021
1022   return cur;
1023 }
1024
1025 /* Parse (append) a number.  */
1026 static void
1027 parse_number (pfile, list, name)
1028      cpp_reader *pfile;
1029      cpp_toklist *list;
1030      cpp_string *name;
1031 {
1032   const unsigned char *name_limit;
1033   unsigned char *namebuf;
1034   cpp_buffer *buffer = pfile->buffer;
1035   register const unsigned char *cur = buffer->cur;
1036
1037  expanded:
1038   name_limit = list->namebuf + list->name_cap;
1039   namebuf = list->namebuf + list->name_used;
1040
1041   for (; cur < buffer->rlimit && namebuf < name_limit; )
1042     {
1043       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1044
1045       /* Perhaps we should accept '$' here if we accept it for
1046          identifiers.  We know namebuf[-1] is safe, because for c to
1047          be a sign we must have pushed at least one character.  */
1048       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1049         goto out;
1050
1051       namebuf++;
1052       cur++;
1053     }
1054
1055   /* Run out of name space?  */
1056   if (cur < buffer->rlimit)
1057     {
1058       list->name_used = namebuf - list->namebuf;
1059       auto_expand_name_space (list);
1060       goto expanded;
1061     }
1062
1063  out:
1064   buffer->cur = cur;
1065   name->len = namebuf - name->text;
1066   list->name_used = namebuf - list->namebuf;
1067 }
1068
1069 /* Places a string terminated by an unescaped TERMINATOR into a
1070    cpp_string, which should be expandable and thus at the top of the
1071    list's stack.  Handles embedded trigraphs, if necessary, and
1072    escaped newlines.
1073
1074    Can be used for character constants (terminator = '\''), string
1075    constants ('"') and angled headers ('>').  Multi-line strings are
1076    allowed, except for within directives.  */
1077
1078 static void
1079 parse_string (pfile, list, token, terminator)
1080      cpp_reader *pfile;
1081      cpp_toklist *list;
1082      cpp_token *token;
1083      unsigned int terminator;
1084 {
1085   cpp_buffer *buffer = pfile->buffer;
1086   cpp_string *name = &token->val.str;
1087   register const unsigned char *cur = buffer->cur;
1088   const unsigned char *name_limit;
1089   unsigned char *namebuf;
1090   unsigned int null_count = 0;
1091   unsigned int trigraphed = list->name_used;
1092
1093  expanded:
1094   name_limit = list->namebuf + list->name_cap;
1095   namebuf = list->namebuf + list->name_used;
1096
1097   for (; cur < buffer->rlimit && namebuf < name_limit; )
1098     {
1099       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1100
1101       if (c == '\0')
1102         null_count++;
1103       else if (c == terminator || is_vspace (c))
1104         {
1105           /* Needed for trigraph_replace and multiline string warning.  */
1106           buffer->cur = cur;
1107
1108           /* Scan for trigraphs before checking if backslash-escaped.  */
1109           if ((CPP_OPTION (pfile, trigraphs)
1110                || CPP_OPTION (pfile, warn_trigraphs))
1111               && namebuf - (list->namebuf + trigraphed) >= 3)
1112             {
1113               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1114                                           namebuf);
1115               /* The test above guarantees trigraphed will be positive.  */
1116               trigraphed = namebuf - list->namebuf - 2;
1117             }
1118
1119           namebuf--;     /* Drop the newline / terminator from the name.  */
1120           if (is_vspace (c))
1121             {
1122               /* Drop a backslash newline, and continue. */
1123               if (namebuf[-1] == '\\')
1124                 {
1125                   handle_newline (cur, buffer->rlimit, c);
1126                   namebuf--;
1127                   continue;
1128                 }
1129
1130               cur--;
1131
1132               /* In assembly language, silently terminate strings of
1133                  either variety at end of line.  This is a kludge
1134                  around not knowing where comments are.  */
1135               if (CPP_OPTION (pfile, lang_asm))
1136                 goto out;
1137
1138               /* Character constants and header names may not extend
1139                  over multiple lines.  In Standard C, neither may
1140                  strings.  We accept multiline strings as an
1141                  extension.  (Even in directives - otherwise, glibc's
1142                  longlong.h breaks.)  */
1143               if (terminator != '"')
1144                 goto unterminated;
1145
1146               cur++;  /* Move forwards again.  */
1147
1148               if (pfile->multiline_string_line == 0)
1149                 {
1150                   pfile->multiline_string_line = token->line;
1151                   pfile->multiline_string_column = token->col;
1152                   if (CPP_PEDANTIC (pfile))
1153                     cpp_pedwarn (pfile, "multi-line string constant");
1154                 }
1155
1156               *namebuf++ = '\n';
1157               handle_newline (cur, buffer->rlimit, c);
1158             }
1159           else
1160             {
1161               unsigned char *temp;
1162
1163               /* An odd number of consecutive backslashes represents
1164                  an escaped terminator.  */
1165               temp = namebuf - 1;
1166               while (temp >= name->text && *temp == '\\')
1167                 temp--;
1168
1169               if ((namebuf - temp) & 1)
1170                 goto out;
1171               namebuf++;
1172             }
1173         }
1174     }
1175
1176   /* Run out of name space?  */
1177   if (cur < buffer->rlimit)
1178     {
1179       list->name_used = namebuf - list->namebuf;
1180       auto_expand_name_space (list);
1181       goto expanded;
1182     }
1183
1184   /* We may not have trigraph-replaced the input for this code path,
1185      but as the input is in error by being unterminated we don't
1186      bother.  Prevent warnings about no newlines at EOF.  */
1187   if (is_vspace (cur[-1]))
1188     cur--;
1189
1190  unterminated:
1191   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1192
1193   if (terminator == '\"' && pfile->multiline_string_line != list->line
1194       && pfile->multiline_string_line != 0)
1195     {
1196       cpp_error_with_line (pfile, pfile->multiline_string_line,
1197                            pfile->multiline_string_column,
1198                            "possible start of unterminated string literal");
1199       pfile->multiline_string_line = 0;
1200     }
1201
1202  out:
1203   buffer->cur = cur;
1204   name->len = namebuf - name->text;
1205   list->name_used = namebuf - list->namebuf;
1206
1207   if (null_count > 0)
1208     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1209                          : "null character preserved"));
1210 }
1211
1212 /* The character TYPE helps us distinguish comment types: '*' = C
1213    style, '/' = C++ style.  For code simplicity, the stored comment
1214    includes the comment start and any terminator.  */
1215
1216 #define COMMENT_START_LEN 2
1217 static void
1218 save_comment (list, token, from, len, type)
1219      cpp_toklist *list;
1220      cpp_token *token;
1221      const unsigned char *from;
1222      unsigned int len;
1223      unsigned int type;
1224 {
1225   unsigned char *buffer;
1226
1227   len += COMMENT_START_LEN;
1228
1229   if (list->name_used + len > list->name_cap)
1230     _cpp_expand_name_space (list, len);
1231
1232   INIT_TOKEN_STR (list, token);
1233   token->type = CPP_COMMENT;
1234   token->val.str.len = len;
1235
1236   buffer = list->namebuf + list->name_used;
1237   list->name_used += len;
1238
1239   /* Copy the comment.  */
1240   if (type == '*')
1241     {
1242       *buffer++ = '/';
1243       *buffer++ = '*';
1244     }
1245   else
1246     {
1247       *buffer++ = type;
1248       *buffer++ = type;
1249     }
1250   memcpy (buffer, from, len - COMMENT_START_LEN);
1251 }
1252
1253 /*
1254  *  The tokenizer's main loop.  Returns a token list, representing a
1255  *  logical line in the input file.  On EOF after some tokens have
1256  *  been processed, we return immediately.  Then in next call, or if
1257  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1258  *  token is placed in the list.
1259  *
1260  *  Implementation relies almost entirely on lookback, rather than
1261  *  looking forwards.  This means that tokenization requires just
1262  *  a single pass of the file, even in the presence of trigraphs and
1263  *  escaped newlines, providing significant performance benefits.
1264  *  Trigraph overhead is negligible if they are disabled, and low
1265  *  even when enabled.
1266  */
1267
1268 #define KNOWN_DIRECTIVE() (list->directive != 0)
1269 #define MIGHT_BE_DIRECTIVE() \
1270 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1271
1272 static void
1273 lex_line (pfile, list)
1274      cpp_reader *pfile;
1275      cpp_toklist *list;
1276 {
1277   cpp_token *cur_token, *token_limit, *first;
1278   cpp_buffer *buffer = pfile->buffer;
1279   const unsigned char *cur = buffer->cur;
1280   unsigned char flags = 0;
1281   unsigned int first_token = list->tokens_used;
1282
1283   if (!(list->flags & LIST_OFFSET))
1284     (abort) ();
1285
1286   list->file = buffer->nominal_fname;
1287   list->line = CPP_BUF_LINE (buffer);
1288   pfile->col_adjust = 0;
1289   pfile->in_lex_line = 1;
1290   if (cur == buffer->buf)
1291     list->flags |= BEG_OF_FILE;
1292
1293  expanded:
1294   token_limit = list->tokens + list->tokens_cap;
1295   cur_token = list->tokens + list->tokens_used;
1296
1297   for (; cur < buffer->rlimit && cur_token < token_limit;)
1298     {
1299       unsigned char c;
1300
1301       /* Optimize non-vertical whitespace skipping; most tokens are
1302          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1303       c = *cur;
1304       if (is_nvspace (c))
1305         {
1306           buffer->cur = cur;
1307           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1308                                    && cur_token > &list->tokens[first_token]));
1309           cur = buffer->cur;
1310
1311           flags = PREV_WHITE;
1312           if (cur == buffer->rlimit)
1313             break;
1314           c = *cur;
1315         }
1316       cur++;
1317
1318       /* Initialize current token.  CPP_EOF will not be fixed up by
1319          expand_name_space.  */
1320       list->tokens_used = cur_token - list->tokens + 1;
1321       cur_token->type = CPP_EOF;
1322       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1323       cur_token->line = CPP_BUF_LINE (buffer);
1324       cur_token->flags = flags;
1325       flags = 0;
1326
1327       switch (c)
1328         {
1329         case '0': case '1': case '2': case '3': case '4':
1330         case '5': case '6': case '7': case '8': case '9':
1331           {
1332             int prev_dot;
1333
1334             cur--;              /* Backup character.  */
1335             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1336             if (prev_dot)
1337               cur_token--;
1338             INIT_TOKEN_STR (list, cur_token);
1339             /* Prepend an immediately previous CPP_DOT token.  */
1340             if (prev_dot)
1341               {
1342                 if (list->name_cap == list->name_used)
1343                   auto_expand_name_space (list);
1344
1345                 cur_token->val.str.len = 1;
1346                 list->namebuf[list->name_used++] = '.';
1347               }
1348
1349           continue_number:
1350             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1351             buffer->cur = cur;
1352             parse_number (pfile, list, &cur_token->val.str);
1353             cur = buffer->cur;
1354           }
1355           /* Check for # 123 form of #line.  */
1356           if (MIGHT_BE_DIRECTIVE ())
1357             list->directive = _cpp_check_linemarker (pfile, cur_token,
1358                                                      !(cur_token[-1].flags
1359                                                        & PREV_WHITE));
1360           cur_token++;
1361           break;
1362
1363         letter:
1364         case '_':
1365         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1366         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1367         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1368         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1369         case 'y': case 'z':
1370         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1371         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1372         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1373         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1374         case 'Y': case 'Z':
1375           cur--;                     /* Backup character.  */
1376           cur_token->val.node = 0;
1377           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1378
1379         continue_name:
1380           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1381
1382           if (MIGHT_BE_DIRECTIVE ())
1383             list->directive = _cpp_check_directive (pfile, cur_token,
1384                                                     !(list->tokens[0].flags
1385                                                       & PREV_WHITE));
1386           /* Convert named operators to their proper types.  */
1387           if (cur_token->val.node->type == T_OPERATOR)
1388             {
1389               cur_token->flags |= NAMED_OP;
1390               cur_token->type = cur_token->val.node->value.code;
1391             }
1392
1393           cur_token++;
1394           break;
1395
1396         case '\'':
1397         case '\"':
1398           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1399           /* Do we have a wide string?  */
1400           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1401               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1402             BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1403
1404         do_parse_string:
1405           /* Here c is one of ' " or >.  */
1406           INIT_TOKEN_STR (list, cur_token);
1407           buffer->cur = cur;
1408           parse_string (pfile, list, cur_token, c);
1409           cur = buffer->cur;
1410           cur_token++;
1411           break;
1412
1413         case '/':
1414           cur_token->type = CPP_DIV;
1415           if (IMMED_TOKEN ())
1416             {
1417               if (PREV_TOKEN_TYPE == CPP_DIV)
1418                 {
1419                   /* We silently allow C++ comments in system headers,
1420                      irrespective of conformance mode, because lots of
1421                      broken systems do that and trying to clean it up
1422                      in fixincludes is a nightmare.  */
1423                   if (CPP_IN_SYSTEM_HEADER (pfile))
1424                     goto do_line_comment;
1425                   else if (CPP_OPTION (pfile, cplusplus_comments))
1426                     {
1427                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1428                           && ! buffer->warned_cplusplus_comments)
1429                         {
1430                           buffer->cur = cur;
1431                           cpp_pedwarn (pfile,
1432                              "C++ style comments are not allowed in ISO C89");
1433                           cpp_pedwarn (pfile,
1434                           "(this will be reported only once per input file)");
1435                           buffer->warned_cplusplus_comments = 1;
1436                         }
1437                     do_line_comment:
1438                       buffer->cur = cur;
1439 #if 0 /* Leave until new lexer in place.  */
1440                       if (cur[-2] != c)
1441                         cpp_warning (pfile,
1442                                      "comment start split across lines");
1443 #endif
1444                       if (skip_line_comment (pfile))
1445                         cpp_warning (pfile, "multi-line comment");
1446
1447                       /* Back-up to first '-' or '/'.  */
1448                       cur_token--;
1449                       if (!CPP_OPTION (pfile, discard_comments)
1450                           && (!KNOWN_DIRECTIVE()
1451                               || (list->directive->flags & COMMENTS)))
1452                         save_comment (list, cur_token++, cur,
1453                                       buffer->cur - cur, c);
1454                       else
1455                         flags = PREV_WHITE;
1456
1457                       cur = buffer->cur;
1458                       break;
1459                     }
1460                 }
1461             }
1462           cur_token++;
1463           break;
1464
1465         case '*':
1466           cur_token->type = CPP_MULT;
1467           if (IMMED_TOKEN ())
1468             {
1469               if (PREV_TOKEN_TYPE == CPP_DIV)
1470                 {
1471                   buffer->cur = cur;
1472 #if 0 /* Leave until new lexer in place.  */
1473                   if (cur[-2] != '/')
1474                     cpp_warning (pfile,
1475                                  "comment start '/*' split across lines");
1476 #endif
1477                   if (skip_block_comment (pfile))
1478                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1479                                          "unterminated comment");
1480 #if 0 /* Leave until new lexer in place.  */
1481                   else if (buffer->cur[-2] != '*')
1482                     cpp_warning (pfile,
1483                                  "comment end '*/' split across lines");
1484 #endif
1485                   /* Back up to opening '/'.  */
1486                   cur_token--;
1487                   if (!CPP_OPTION (pfile, discard_comments)
1488                       && (!KNOWN_DIRECTIVE()
1489                           || (list->directive->flags & COMMENTS)))
1490                     save_comment (list, cur_token++, cur,
1491                                   buffer->cur - cur, c);
1492                   else
1493                     flags = PREV_WHITE;
1494
1495                   cur = buffer->cur;
1496                   break;
1497                 }
1498               else if (CPP_OPTION (pfile, cplusplus))
1499                 {
1500                   /* In C++, there are .* and ->* operators.  */
1501                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1502                     BACKUP_TOKEN (CPP_DEREF_STAR);
1503                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1504                     BACKUP_TOKEN (CPP_DOT_STAR);
1505                 }
1506             }
1507           cur_token++;
1508           break;
1509
1510         case '\n':
1511         case '\r':
1512           handle_newline (cur, buffer->rlimit, c);
1513           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1514             {
1515               if (IMMED_TOKEN ())
1516                 {
1517                   /* Remove the escaped newline.  Then continue to process
1518                      any interrupted name or number.  */
1519                   cur_token--;
1520                   /* Backslash-newline may not be immediately followed by
1521                      EOF (C99 5.1.1.2).  */
1522                   if (cur >= buffer->rlimit)
1523                     {
1524                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1525                       break;
1526                     }
1527                   if (IMMED_TOKEN ())
1528                     {
1529                       cur_token--;
1530                       if (cur_token->type == CPP_NAME)
1531                         goto continue_name;
1532                       else if (cur_token->type == CPP_NUMBER)
1533                         goto continue_number;
1534                       cur_token++;
1535                     }
1536                   /* Remember whitespace setting.  */
1537                   flags = cur_token->flags;
1538                   break;
1539                 }
1540               else
1541                 {
1542                   buffer->cur = cur;
1543                   cpp_warning (pfile,
1544                                "backslash and newline separated by space");
1545                 }
1546             }
1547           else if (MIGHT_BE_DIRECTIVE ())
1548             {
1549               /* "Null directive." C99 6.10.7: A preprocessing
1550                  directive of the form # <new-line> has no effect.
1551
1552                  But it is still a directive, and therefore disappears
1553                  from the output. */
1554               cur_token--;
1555               if (cur_token->flags & PREV_WHITE
1556                   && CPP_WTRADITIONAL (pfile))
1557                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1558             }
1559
1560           /* Skip vertical space until we have at least one token to
1561              return.  */
1562           if (cur_token != &list->tokens[first_token])
1563             goto out;
1564           list->line = CPP_BUF_LINE (buffer);
1565           break;
1566
1567         case '-':
1568           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1569             REVISE_TOKEN (CPP_MINUS_MINUS);
1570           else
1571             PUSH_TOKEN (CPP_MINUS);
1572           break;
1573
1574         make_hash:
1575         case '#':
1576           /* The digraph flag checking ensures that ## and %:%:
1577              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1578           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1579               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1580             REVISE_TOKEN (CPP_PASTE);
1581           else
1582             PUSH_TOKEN (CPP_HASH);
1583           break;
1584
1585         case ':':
1586           cur_token->type = CPP_COLON;
1587           if (IMMED_TOKEN ())
1588             {
1589               if (PREV_TOKEN_TYPE == CPP_COLON
1590                   && CPP_OPTION (pfile, cplusplus))
1591                 BACKUP_TOKEN (CPP_SCOPE);
1592               else if (CPP_OPTION (pfile, digraphs))
1593                 {
1594                   /* Digraph: "<:" is a '['  */
1595                   if (PREV_TOKEN_TYPE == CPP_LESS)
1596                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1597                   /* Digraph: "%:" is a '#'  */
1598                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1599                     {
1600                       (--cur_token)->flags |= DIGRAPH;
1601                       goto make_hash;
1602                     }
1603                 }
1604             }
1605           cur_token++;
1606           break;
1607
1608         case '&':
1609           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1610             REVISE_TOKEN (CPP_AND_AND);
1611           else
1612             PUSH_TOKEN (CPP_AND);
1613           break;
1614
1615         make_or:
1616         case '|':
1617           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1618             REVISE_TOKEN (CPP_OR_OR);
1619           else
1620             PUSH_TOKEN (CPP_OR);
1621           break;
1622
1623         case '+':
1624           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1625             REVISE_TOKEN (CPP_PLUS_PLUS);
1626           else
1627             PUSH_TOKEN (CPP_PLUS);
1628           break;
1629
1630         case '=':
1631             /* This relies on equidistance of "?=" and "?" tokens.  */
1632           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1633             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1634           else
1635             PUSH_TOKEN (CPP_EQ);
1636           break;
1637
1638         case '>':
1639           cur_token->type = CPP_GREATER;
1640           if (IMMED_TOKEN ())
1641             {
1642               if (PREV_TOKEN_TYPE == CPP_GREATER)
1643                 BACKUP_TOKEN (CPP_RSHIFT);
1644               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1645                 BACKUP_TOKEN (CPP_DEREF);
1646               else if (CPP_OPTION (pfile, digraphs))
1647                 {
1648                   /* Digraph: ":>" is a ']'  */
1649                   if (PREV_TOKEN_TYPE == CPP_COLON)
1650                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1651                   /* Digraph: "%>" is a '}'  */
1652                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1653                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1654                 }
1655             }
1656           cur_token++;
1657           break;
1658
1659         case '<':
1660           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1661             {
1662               REVISE_TOKEN (CPP_LSHIFT);
1663               break;
1664             }
1665           /* Is this the beginning of a header name?  */
1666           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1667             {
1668               c = '>';  /* Terminator.  */
1669               cur_token->type = CPP_HEADER_NAME;
1670               goto do_parse_string;
1671             }
1672           PUSH_TOKEN (CPP_LESS);
1673           break;
1674
1675         case '%':
1676           /* Digraph: "<%" is a '{'  */
1677           cur_token->type = CPP_MOD;
1678           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1679               && CPP_OPTION (pfile, digraphs))
1680             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1681           cur_token++;
1682           break;
1683
1684         case '?':
1685           if (cur + 1 < buffer->rlimit && *cur == '?'
1686               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1687             {
1688               /* Handle trigraph.  */
1689               cur++;
1690               switch (*cur++)
1691                 {
1692                 case '(': goto make_open_square;
1693                 case ')': goto make_close_square;
1694                 case '<': goto make_open_brace;
1695                 case '>': goto make_close_brace;
1696                 case '=': goto make_hash;
1697                 case '!': goto make_or;
1698                 case '-': goto make_complement;
1699                 case '/': goto make_backslash;
1700                 case '\'': goto make_xor;
1701                 }
1702             }
1703           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1704             {
1705               /* GNU C++ defines <? and >? operators.  */
1706               if (PREV_TOKEN_TYPE == CPP_LESS)
1707                 {
1708                   REVISE_TOKEN (CPP_MIN);
1709                   break;
1710                 }
1711               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1712                 {
1713                   REVISE_TOKEN (CPP_MAX);
1714                   break;
1715                 }
1716             }
1717           PUSH_TOKEN (CPP_QUERY);
1718           break;
1719
1720         case '.':
1721           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1722               && IMMED_TOKEN ()
1723               && !(cur_token[-1].flags & PREV_WHITE))
1724             {
1725               cur_token -= 2;
1726               PUSH_TOKEN (CPP_ELLIPSIS);
1727             }
1728           else
1729             PUSH_TOKEN (CPP_DOT);
1730           break;
1731
1732         make_complement:
1733         case '~': PUSH_TOKEN (CPP_COMPL); break;
1734         make_xor:
1735         case '^': PUSH_TOKEN (CPP_XOR); break;
1736         make_open_brace:
1737         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1738         make_close_brace:
1739         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1740         make_open_square:
1741         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1742         make_close_square:
1743         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1744         make_backslash:
1745         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1746         case '!': PUSH_TOKEN (CPP_NOT); break;
1747         case ',': PUSH_TOKEN (CPP_COMMA); break;
1748         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1749         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1750         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1751
1752         case '$':
1753           if (CPP_OPTION (pfile, dollars_in_ident))
1754             goto letter;
1755           /* Fall through */
1756         default:
1757           cur_token->val.aux = c;
1758           PUSH_TOKEN (CPP_OTHER);
1759           break;
1760         }
1761     }
1762
1763   /* Run out of token space?  */
1764   if (cur_token == token_limit)
1765     {
1766       list->tokens_used = cur_token - list->tokens;
1767       _cpp_expand_token_space (list, 256);
1768       goto expanded;
1769     }
1770
1771   cur_token->flags = flags;
1772   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1773     {
1774       if (cur > buffer->buf && !is_vspace (cur[-1]))
1775         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1776                                CPP_BUF_COLUMN (buffer, cur),
1777                                "no newline at end of file");
1778       cur_token++->type = CPP_EOF;
1779     }
1780
1781  out:
1782   /* All tokens are allocated, so the memory location is fixed.  */
1783   first = &list->tokens[first_token];
1784
1785   /* Don't complain about the null directive, nor directives in
1786      assembly source: we don't know where the comments are, and # may
1787      introduce assembler pseudo-ops.  Don't complain about invalid
1788      directives in skipped conditional groups (6.10 p4).  */
1789   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1790       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1791     {
1792       if (first[1].type == CPP_NAME)
1793         cpp_error (pfile, "invalid preprocessing directive #%s",
1794                    first[1].val.node->name);
1795       else
1796         cpp_error (pfile, "invalid preprocessing directive");
1797     }
1798
1799   /* Put EOF at end of known directives.  This covers "directives do
1800      not extend beyond the end of the line (description 6.10 part 2)".  */
1801   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1802     {
1803       pfile->first_directive_token = first;
1804       cur_token++->type = CPP_EOF;
1805     }
1806
1807   first->flags |= BOL;
1808   if (first_token != 0)
1809     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1810        up the invocation of a function-like macro, new line is
1811        considered a normal white-space character.  */
1812     first->flags |= PREV_WHITE;
1813
1814   buffer->cur = cur;
1815   list->tokens_used = cur_token - list->tokens;
1816   pfile->in_lex_line = 0;
1817 }
1818
1819 /* Write the spelling of a token TOKEN, with any appropriate
1820    whitespace before it, to FP.  PREV is the previous token, which
1821    is used to determine if we need to shove in an extra space in order
1822    to avoid accidental token paste.  If WHITE is 0, do not insert any
1823    leading whitespace.  */
1824 static void
1825 output_token (pfile, fp, token, prev, white)
1826      cpp_reader *pfile;
1827      FILE *fp;
1828      const cpp_token *token, *prev;
1829      int white;
1830 {
1831   if (white)
1832     {
1833       int dummy;
1834
1835       if (token->col && (token->flags & BOL))
1836         {
1837           /* Supply enough whitespace to put this token in its original
1838              column.  Don't bother trying to reconstruct tabs; we can't
1839              get it right in general, and nothing ought to care.  (Yes,
1840              some things do care; the fault lies with them.)  */
1841           unsigned int spaces = token->col - 1;
1842
1843           while (spaces--)
1844             putc (' ', fp);
1845         }
1846       else if (token->flags & PREV_WHITE)
1847         putc (' ', fp);
1848       else
1849       /* Check for and prevent accidental token pasting.
1850          In addition to the cases handled by can_paste, consider
1851
1852          a + ++b - if there is not a space between the + and ++, it
1853          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1854          a valid token.  */
1855         if (prev
1856             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1857                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1858                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1859         putc (' ', fp);
1860     }
1861
1862   switch (TOKEN_SPELL (token))
1863     {
1864     case SPELL_OPERATOR:
1865       {
1866         const unsigned char *spelling;
1867
1868         if (token->flags & DIGRAPH)
1869           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1870         else if (token->flags & NAMED_OP)
1871           goto spell_ident;
1872         else
1873           spelling = TOKEN_NAME (token);
1874
1875         ufputs (spelling, fp);
1876       }
1877       break;
1878
1879     case SPELL_IDENT:
1880       spell_ident:
1881       ufputs (token->val.node->name, fp);
1882       break;
1883
1884     case SPELL_STRING:
1885       {
1886         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1887           putc ('L', fp);
1888
1889         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1890           putc ('"', fp);
1891         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1892           putc ('\'', fp);
1893
1894         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1895
1896         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1897           putc ('"', fp);
1898         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1899           putc ('\'', fp);
1900       }
1901       break;
1902
1903     case SPELL_CHAR:
1904       putc (token->val.aux, fp);
1905       break;
1906
1907     case SPELL_NONE:
1908       /* Placemarker or EOF - no output.  (Macro args are handled
1909          elsewhere.  */
1910       break;
1911     }
1912 }
1913
1914 /* Dump the original user's spelling of argument index ARG_NO to the
1915    macro whose expansion is LIST.  */
1916 static void
1917 dump_param_spelling (fp, list, arg_no)
1918      FILE *fp;
1919      const cpp_toklist *list;
1920      unsigned int arg_no;
1921 {
1922   const U_CHAR *param = list->namebuf;
1923
1924   while (arg_no--)
1925     param += ustrlen (param) + 1;
1926   ufputs (param, fp);
1927 }
1928
1929 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1930 void
1931 cpp_output_list (pfile, fp, list, token)
1932      cpp_reader *pfile;
1933      FILE *fp;
1934      const cpp_toklist *list;
1935      const cpp_token *token;
1936 {
1937   const cpp_token *limit = list->tokens + list->tokens_used;
1938   const cpp_token *prev = 0;
1939   int white = 0;
1940
1941   while (token < limit)
1942     {
1943       /* XXX Find some way we can write macro args from inside
1944          output_token/spell_token.  */
1945       if (token->type == CPP_MACRO_ARG)
1946         {
1947           if (white && token->flags & PREV_WHITE)
1948             putc (' ', fp);
1949           if (token->flags & STRINGIFY_ARG)
1950             putc ('#', fp);
1951           dump_param_spelling (fp, list, token->val.aux);
1952         }
1953       else
1954         output_token (pfile, fp, token, prev, white);
1955       if (token->flags & PASTE_LEFT)
1956         fputs (" ##", fp);
1957       prev = token;
1958       token++;
1959       white = 1;
1960     }
1961 }
1962
1963
1964 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1965    already contain the enough space to hold the token's spelling.
1966    Returns a pointer to the character after the last character
1967    written.  */
1968
1969 static unsigned char *
1970 spell_token (pfile, token, buffer)
1971      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1972      const cpp_token *token;
1973      unsigned char *buffer;
1974 {
1975   switch (TOKEN_SPELL (token))
1976     {
1977     case SPELL_OPERATOR:
1978       {
1979         const unsigned char *spelling;
1980         unsigned char c;
1981
1982         if (token->flags & DIGRAPH)
1983           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1984         else if (token->flags & NAMED_OP)
1985           goto spell_ident;
1986         else
1987           spelling = TOKEN_NAME (token);
1988
1989         while ((c = *spelling++) != '\0')
1990           *buffer++ = c;
1991       }
1992       break;
1993
1994     case SPELL_IDENT:
1995       spell_ident:
1996       memcpy (buffer, token->val.node->name, token->val.node->length);
1997       buffer += token->val.node->length;
1998       break;
1999
2000     case SPELL_STRING:
2001       {
2002         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
2003           *buffer++ = 'L';
2004
2005         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
2006           *buffer++ = '"';
2007         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2008           *buffer++ = '\'';
2009
2010         memcpy (buffer, token->val.str.text, token->val.str.len);
2011         buffer += token->val.str.len;
2012
2013         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
2014           *buffer++ = '"';
2015         if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2016           *buffer++ = '\'';
2017       }
2018       break;
2019
2020     case SPELL_CHAR:
2021       *buffer++ = token->val.aux;
2022       break;
2023
2024     case SPELL_NONE:
2025       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
2026       break;
2027     }
2028
2029   return buffer;
2030 }
2031
2032 /* Macro expansion algorithm.
2033
2034 Macro expansion is implemented by a single-pass algorithm; there are
2035 no rescan passes involved.  cpp_get_token expands just enough to be
2036 able to return a token to the caller, a consequence is that when it
2037 returns the preprocessor can be in a state of mid-expansion.  The
2038 algorithm does not work by fully expanding a macro invocation into
2039 some kind of token list, and then returning them one by one.
2040
2041 Our expansion state is recorded in a context stack.  We start out with
2042 a single context on the stack, let's call it base context.  This
2043 consists of the token list returned by lex_line that forms the next
2044 logical line in the source file.
2045
2046 The current level in the context stack is stored in the cur_context
2047 member of the cpp_reader structure.  The context it references keeps,
2048 amongst other things, a count of how many tokens form that context and
2049 our position within those tokens.
2050
2051 Fundamentally, calling cpp_get_token will return the next token from
2052 the current context.  If we're at the end of the current context, that
2053 context is popped from the stack first, unless it is the base context,
2054 in which case the next logical line is lexed from the source file.
2055
2056 However, before returning the token, if it is a CPP_NAME token
2057 _cpp_get_token checks to see if it is a macro and if it is enabled.
2058 Each time it encounters a macro name, it calls push_macro_context.
2059 This function checks that the macro should be expanded (with
2060 is_macro_enabled), and if so pushes a new macro context on the stack
2061 which becomes the current context.  It then loops back to read the
2062 first token of the macro context.
2063
2064 A macro context basically consists of the token list representing the
2065 macro's replacement list, which was saved in the hash table by
2066 save_macro_expansion when its #define statement was parsed.  If the
2067 macro is function-like, it also contains the tokens that form the
2068 arguments to the macro.  I say more about macro arguments below, but
2069 for now just saying that each argument is a set of pointers to tokens
2070 is enough.
2071
2072 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2073 token.  This represents an argument passed to the macro, with the
2074 argument number stored in the token's AUX field.  The argument should
2075 be substituted, this is achieved by pushing an "argument context".  An
2076 argument context is just refers to the tokens forming the argument,
2077 which are obtained directly from the macro context.  The STRINGIFY
2078 flag on a CPP_MACRO_ARG token indicates that the argument should be
2079 stringified.
2080
2081 Here's a few simple rules the context stack obeys:-
2082
2083   1) The lex_line token list is always context zero.
2084
2085   2) Context 1, if it exists, must be a macro context.
2086
2087   3) An argument context can only appear above a macro context.
2088
2089   4) A macro context can appear above the base context, another macro
2090   context, or an argument context.
2091
2092   5) These imply that the minimal level of an argument context is 2.
2093
2094 The only tricky thing left is ensuring that macros are enabled and
2095 disabled correctly.  The algorithm controls macro expansion by the
2096 level of the context a token is taken from in the context stack.  If a
2097 token is taken from a level equal to no_expand_level (a member of
2098 struct cpp_reader), no expansion is performed.
2099
2100 When popping a context off the stack, if no_expand_level equals the
2101 level of the popped context, it is reduced by one to match the new
2102 context level, so that expansion is still disabled.  It does not
2103 increase if a context is pushed, though.  It starts out life as
2104 UINT_MAX, which has the effect that initially macro expansion is
2105 enabled.  I explain how this mechanism works below.
2106
2107 The standard requires:-
2108
2109   1) Arguments to be fully expanded before substitution.
2110
2111   2) Stringified arguments to not be expanded, nor the tokens
2112   immediately surrounding a ## operator.
2113
2114   3) Continual rescanning until there are no more macros left to
2115   replace.
2116
2117   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2118   expanded again during later rescans.  This prevents infinite
2119   recursion.
2120
2121 The first thing to observe is that stage 3) is mostly redundant.
2122 Since a macro is disabled once it has been expanded, how can a rescan
2123 find an unexpanded macro name?  There are only two cases where this is
2124 possible:-
2125
2126   a) If the macro name results from a token paste operation.
2127
2128   b) If the macro in question is a function-like macro that hasn't
2129   already been expanded because previously there was not the required
2130   '(' token immediately following it.  This is only possible when an
2131   argument is substituted, and after substitution the last token of
2132   the argument can bind with a parenthesis appearing in the tokens
2133   following the substitution.  Note that if the '(' appears within the
2134   argument, the ')' must too, as expanding macro arguments cannot
2135   "suck in" tokens outside the argument.
2136
2137 So we tackle this as follows.  When parsing the macro invocation for
2138 arguments, we record the tokens forming each argument as a list of
2139 pointers to those tokens.  We do not expand any tokens that are "raw",
2140 i.e. directly from the macro invocation, but other tokens that come
2141 from (nested) argument substitution are fully expanded.
2142
2143 This is achieved by setting the no_expand_level to that of the macro
2144 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2145 forming an argument, because parse_args (indirectly) calls
2146 get_raw_token which automatically pushes argument contexts and traces
2147 into them.  Since these contexts are at a higher level than the
2148 no_expand_level, they get fully macro expanded.
2149
2150 "Raw" and non-raw tokens are separated in arguments by null pointers,
2151 with the policy that the initial state of an argument is raw.  If the
2152 first token is not raw, it should be preceded by a null pointer.  When
2153 tracing through the tokens of an argument context, each time
2154 get_raw_token encounters a null pointer, it toggles the flag
2155 CONTEXT_RAW.
2156
2157 This flag, when set, indicates to is_macro_disabled that we are
2158 reading raw tokens which should be macro-expanded.  Similarly, if
2159 clear, is_macro_disabled suppresses re-expansion.
2160
2161 It's probably time for an example.
2162
2163 #define hash #
2164 #define str(x) #x
2165 #define xstr(y) str(y hash)
2166 str(hash)                       // "hash"
2167 xstr(hash)                      // "# hash"
2168
2169 In the invocation of str, parse_args turns off macro expansion and so
2170 parses the argument as <hash>.  This is the only token (pointer)
2171 passed as the argument to str.  Since <hash> is raw there is no need
2172 for an initial null pointer.  stringify_arg is called from
2173 get_raw_token when tracing through the expansion of str, since the
2174 argument has the STRINGIFY flag set.  stringify_arg turns off
2175 macro_expansion by setting the no_expand_level to that of the argument
2176 context.  Thus it gets the token <hash> and stringifies it to "hash"
2177 correctly.
2178
2179 Similary xstr is passed <hash>.  However, when parse_args is parsing
2180 the invocation of str() in xstr's expansion, get_raw_token encounters
2181 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2182 an argument context, and enters the tokens of the argument,
2183 i.e. <hash>.  This is at a higher context level than parse_args
2184 disabled, and so is_macro_disabled permits expansion of it and a macro
2185 context is pushed on top of the argument context.  This contains the
2186 <#> token, and the end result is that <hash> is macro expanded.
2187 However, after popping off the argument context, the <hash> of xstr's
2188 expansion does not get macro expanded because we're back at the
2189 no_expand_level.  The end result is that the argument passed to str is
2190 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2191 raw, <#> is not raw, but then <hash> is.
2192
2193 */
2194
2195
2196 /* Free the storage allocated for macro arguments.  */
2197 static void
2198 free_macro_args (args)
2199      macro_args *args;
2200 {
2201   if (args->tokens)
2202     free ((PTR) args->tokens);
2203   free (args->ends);
2204   free (args);
2205 }
2206
2207 /* Determines if a macro has been already used (and is therefore
2208    disabled).  */
2209 static int
2210 is_macro_disabled (pfile, expansion, token)
2211      cpp_reader *pfile;
2212      const cpp_toklist *expansion;
2213      const cpp_token *token;
2214 {
2215   cpp_context *context = CURRENT_CONTEXT (pfile);
2216
2217   /* Don't expand anything if this file has already been preprocessed.  */
2218   if (CPP_OPTION (pfile, preprocessed))
2219     return 1;
2220
2221   /* Arguments on either side of ## are inserted in place without
2222      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2223      occurs during a later rescan pass.  The effect is that we expand
2224      iff we would as part of the macro's expansion list, so we should
2225      drop to the macro's context.  */
2226   if (IS_ARG_CONTEXT (context))
2227     {
2228       if (token->flags & PASTED)
2229         context--;
2230       else if (!(context->flags & CONTEXT_RAW))
2231         return 1;
2232       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2233         context--;
2234     }
2235
2236   /* Have we already used this macro?  */
2237   while (context->level > 0)
2238     {
2239       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2240         return 1;
2241       /* Raw argument tokens are judged based on the token list they
2242          came from.  */
2243       if (context->flags & CONTEXT_RAW)
2244         context = pfile->contexts + context->level;
2245       else
2246         context--;
2247     }
2248
2249   /* Function-like macros may be disabled if the '(' is not in the
2250      current context.  We check this without disrupting the context
2251      stack.  */
2252   if (expansion->paramc >= 0)
2253     {
2254       const cpp_token *next;
2255       unsigned int prev_nme;
2256
2257       context = CURRENT_CONTEXT (pfile);
2258       /* Drop down any contexts we're at the end of: the '(' may
2259          appear in lower macro expansions, or in the rest of the file.  */
2260       while (context->posn == context->count && context > pfile->contexts)
2261         {
2262           context--;
2263           /* If we matched, we are disabled, as we appear in the
2264              expansion of each macro we meet.  */
2265           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2266             return 1;
2267         }
2268
2269       prev_nme = pfile->no_expand_level;
2270       pfile->no_expand_level = context - pfile->contexts;
2271       next = _cpp_get_token (pfile);
2272       restore_macro_expansion (pfile, prev_nme);
2273       if (next->type != CPP_OPEN_PAREN)
2274         {
2275           _cpp_push_token (pfile, next);
2276           if (CPP_WTRADITIONAL (pfile))
2277             cpp_warning (pfile,
2278          "function macro %s must be used with arguments in traditional C",
2279                          token->val.node->name);
2280           return 1;
2281         }
2282     }
2283
2284   return 0;
2285 }
2286
2287 /* Add a token to the set of tokens forming the arguments to the macro
2288    being parsed in parse_args.  */
2289 static void
2290 save_token (args, token)
2291      macro_args *args;
2292      const cpp_token *token;
2293 {
2294   if (args->used == args->capacity)
2295     {
2296       args->capacity += args->capacity + 100;
2297       args->tokens = (const cpp_token **)
2298         xrealloc ((PTR) args->tokens,
2299                   args->capacity * sizeof (const cpp_token *));
2300     }
2301   args->tokens[args->used++] = token;
2302 }
2303
2304 /* Take and save raw tokens until we finish one argument.  Empty
2305    arguments are saved as a single CPP_PLACEMARKER token.  */
2306 static const cpp_token *
2307 parse_arg (pfile, var_args, paren_context, args, pcount)
2308      cpp_reader *pfile;
2309      int var_args;
2310      unsigned int paren_context;
2311      macro_args *args;
2312      unsigned int *pcount;
2313 {
2314   const cpp_token *token;
2315   unsigned int paren = 0, count = 0;
2316   int raw, was_raw = 1;
2317
2318   for (count = 0;; count++)
2319     {
2320       token = _cpp_get_token (pfile);
2321
2322       switch (token->type)
2323         {
2324         default:
2325           break;
2326
2327         case CPP_OPEN_PAREN:
2328           paren++;
2329           break;
2330
2331         case CPP_CLOSE_PAREN:
2332           if (paren-- != 0)
2333             break;
2334           goto out;
2335
2336         case CPP_COMMA:
2337           /* Commas are not terminators within parantheses or var_args.  */
2338           if (paren || var_args)
2339             break;
2340           goto out;
2341
2342         case CPP_EOF:           /* Error reported by caller.  */
2343           goto out;
2344         }
2345
2346       raw = pfile->cur_context <= paren_context;
2347       if (raw != was_raw)
2348         {
2349           was_raw = raw;
2350           save_token (args, 0);
2351           count++;
2352         }
2353       save_token (args, token);
2354     }
2355
2356  out:
2357   if (count == 0)
2358     {
2359       /* Duplicate the placemarker.  Then we can set its flags and
2360          position and safely be using more than one.  */
2361       save_token (args, duplicate_token (pfile, &placemarker_token));
2362       count++;
2363     }
2364
2365   *pcount = count;
2366   return token;
2367 }
2368
2369 /* This macro returns true if the argument starting at offset O of arglist
2370    A is empty - that is, it's either a single PLACEMARKER token, or a null
2371    pointer followed by a PLACEMARKER.  */
2372
2373 #define empty_argument(A, O) \
2374  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2375                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2376
2377 /* Parse the arguments making up a macro invocation.  Nested arguments
2378    are automatically macro expanded, but immediate macros are not
2379    expanded; this enables e.g. operator # to work correctly.  Returns
2380    non-zero on error.  */
2381 static int
2382 parse_args (pfile, hp, args)
2383      cpp_reader *pfile;
2384      cpp_hashnode *hp;
2385      macro_args *args;
2386 {
2387   const cpp_token *token;
2388   const cpp_toklist *macro;
2389   unsigned int total = 0;
2390   unsigned int paren_context = pfile->cur_context;
2391   int argc = 0;
2392
2393   macro = hp->value.expansion;
2394   do
2395     {
2396       unsigned int count;
2397
2398       token = parse_arg (pfile, (argc + 1 == macro->paramc
2399                                  && (macro->flags & VAR_ARGS)),
2400                          paren_context, args, &count);
2401       if (argc < macro->paramc)
2402         {
2403           total += count;
2404           args->ends[argc] = total;
2405         }
2406       argc++;
2407     }
2408   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2409
2410   if (token->type == CPP_EOF)
2411     {
2412       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2413       return 1;
2414     }
2415   else if (argc < macro->paramc)
2416     {
2417       /* A rest argument is allowed to not appear in the invocation at all.
2418          e.g. #define debug(format, args...) ...
2419          debug("string");
2420          This is exactly the same as if the rest argument had received no
2421          tokens - debug("string",);  This extension is deprecated.  */
2422
2423       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2424         {
2425           /* Duplicate the placemarker.  Then we can set its flags and
2426              position and safely be using more than one.  */
2427           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2428           pm->flags = VOID_REST;
2429           save_token (args, pm);
2430           args->ends[argc] = total + 1;
2431
2432           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2433             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2434
2435           return 0;
2436         }
2437       else
2438         {
2439           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2440           return 1;
2441         }
2442     }
2443   /* An empty argument to an empty function-like macro is fine.  */
2444   else if (argc > macro->paramc
2445            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2446     {
2447       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2448       return 1;
2449     }
2450
2451   return 0;
2452 }
2453
2454 /* Adds backslashes before all backslashes and double quotes appearing
2455    in strings.  Non-printable characters are converted to octal.  */
2456 static U_CHAR *
2457 quote_string (dest, src, len)
2458      U_CHAR *dest;
2459      const U_CHAR *src;
2460      unsigned int len;
2461 {
2462   while (len--)
2463     {
2464       U_CHAR c = *src++;
2465
2466       if (c == '\\' || c == '"')
2467         {
2468           *dest++ = '\\';
2469           *dest++ = c;
2470         }
2471       else
2472         {
2473           if (ISPRINT (c))
2474             *dest++ = c;
2475           else
2476             {
2477               sprintf ((char *) dest, "\\%03o", c);
2478               dest += 4;
2479             }
2480         }
2481     }
2482
2483   return dest;
2484 }
2485
2486 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2487    CPP_STRING token containing TEXT in quoted form.  */
2488 static cpp_token *
2489 make_string_token (token, text, len)
2490      cpp_token *token;
2491      const U_CHAR *text;
2492      unsigned int len;
2493 {
2494   U_CHAR *buf;
2495
2496   buf = (U_CHAR *) xmalloc (len * 4);
2497   token->type = CPP_STRING;
2498   token->flags = 0;
2499   token->val.str.text = buf;
2500   token->val.str.len = quote_string (buf, text, len) - buf;
2501   return token;
2502 }
2503
2504 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2505    evaluating to NUMBER.  */
2506 static cpp_token *
2507 alloc_number_token (pfile, number)
2508      cpp_reader *pfile;
2509      int number;
2510 {
2511   cpp_token *result;
2512   char *buf;
2513
2514   result = get_temp_token (pfile);
2515   buf = xmalloc (20);
2516   sprintf (buf, "%d", number);
2517
2518   result->type = CPP_NUMBER;
2519   result->flags = 0;
2520   result->val.str.text = (U_CHAR *) buf;
2521   result->val.str.len = strlen (buf);
2522   return result;
2523 }
2524
2525 /* Returns a temporary token from the temporary token store of PFILE.  */
2526 static cpp_token *
2527 get_temp_token (pfile)
2528      cpp_reader *pfile;
2529 {
2530   if (pfile->temp_used == pfile->temp_alloced)
2531     {
2532       if (pfile->temp_used == pfile->temp_cap)
2533         {
2534           pfile->temp_cap += pfile->temp_cap + 20;
2535           pfile->temp_tokens = (cpp_token **) xrealloc
2536             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2537         }
2538       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2539         (sizeof (cpp_token));
2540     }
2541
2542   return pfile->temp_tokens[pfile->temp_used++];
2543 }
2544
2545 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2546 static void
2547 release_temp_tokens (pfile)
2548      cpp_reader *pfile;
2549 {
2550   while (pfile->temp_used)
2551     {
2552       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2553
2554       if (TOKEN_SPELL (token) == SPELL_STRING)
2555         {
2556           free ((char *) token->val.str.text);
2557           token->val.str.text = 0;
2558         }
2559     }
2560 }
2561
2562 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2563 void
2564 _cpp_free_temp_tokens (pfile)
2565      cpp_reader *pfile;
2566 {
2567   if (pfile->temp_tokens)
2568     {
2569       /* It is possible, though unlikely (looking for '(' of a funlike
2570          macro into EOF), that we haven't released the tokens yet.  */
2571       release_temp_tokens (pfile);
2572       while (pfile->temp_alloced)
2573         free (pfile->temp_tokens[--pfile->temp_alloced]);
2574       free (pfile->temp_tokens);
2575     }
2576
2577   if (pfile->date)
2578     {
2579       free ((char *) pfile->date->val.str.text);
2580       free (pfile->date);
2581       free ((char *) pfile->time->val.str.text);
2582       free (pfile->time);
2583     }
2584 }
2585
2586 /* Copy TOKEN into a temporary token from PFILE's store.  */
2587 static cpp_token *
2588 duplicate_token (pfile, token)
2589      cpp_reader *pfile;
2590      const cpp_token *token;
2591 {
2592   cpp_token *result = get_temp_token (pfile);
2593
2594   *result = *token;
2595   if (TOKEN_SPELL (token) == SPELL_STRING)
2596     {
2597       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2598       memcpy (buff, token->val.str.text, token->val.str.len);
2599       result->val.str.text = buff;
2600     }
2601   return result;
2602 }
2603
2604 /* Determine whether two tokens can be pasted together, and if so,
2605    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2606    be pasted, or the appropriate type for the merged token if they
2607    can.  */
2608 static enum cpp_ttype
2609 can_paste (pfile, token1, token2, digraph)
2610      cpp_reader * pfile;
2611      const cpp_token *token1, *token2;
2612      int* digraph;
2613 {
2614   enum cpp_ttype a = token1->type, b = token2->type;
2615   int cxx = CPP_OPTION (pfile, cplusplus);
2616
2617   /* Treat named operators as if they were ordinary NAMEs.  */
2618   if (token1->flags & NAMED_OP)
2619     a = CPP_NAME;
2620   if (token2->flags & NAMED_OP)
2621     b = CPP_NAME;
2622
2623   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2624     return a + (CPP_EQ_EQ - CPP_EQ);
2625
2626   switch (a)
2627     {
2628     case CPP_GREATER:
2629       if (b == a) return CPP_RSHIFT;
2630       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2631       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2632       break;
2633     case CPP_LESS:
2634       if (b == a) return CPP_LSHIFT;
2635       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2636       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2637       if (CPP_OPTION (pfile, digraphs))
2638         {
2639           if (b == CPP_COLON)
2640             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2641           if (b == CPP_MOD)
2642             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2643         }
2644       break;
2645
2646     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2647     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2648     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2649
2650     case CPP_MINUS:
2651       if (b == a)               return CPP_MINUS_MINUS;
2652       if (b == CPP_GREATER)     return CPP_DEREF;
2653       break;
2654     case CPP_COLON:
2655       if (b == a && cxx)        return CPP_SCOPE;
2656       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2657         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2658       break;
2659
2660     case CPP_MOD:
2661       if (CPP_OPTION (pfile, digraphs))
2662         {
2663           if (b == CPP_GREATER)
2664             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2665           if (b == CPP_COLON)
2666             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2667         }
2668       break;
2669     case CPP_DEREF:
2670       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2671       break;
2672     case CPP_DOT:
2673       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2674       if (b == CPP_NUMBER)      return CPP_NUMBER;
2675       break;
2676
2677     case CPP_HASH:
2678       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2679         /* %:%: digraph */
2680         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2681       break;
2682
2683     case CPP_NAME:
2684       if (b == CPP_NAME)        return CPP_NAME;
2685       if (b == CPP_NUMBER
2686           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2687       if (b == CPP_CHAR
2688           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2689       if (b == CPP_STRING
2690           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2691       break;
2692
2693     case CPP_NUMBER:
2694       if (b == CPP_NUMBER)      return CPP_NUMBER;
2695       if (b == CPP_NAME)        return CPP_NUMBER;
2696       if (b == CPP_DOT)         return CPP_NUMBER;
2697       /* Numbers cannot have length zero, so this is safe.  */
2698       if ((b == CPP_PLUS || b == CPP_MINUS)
2699           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2700         return CPP_NUMBER;
2701       break;
2702
2703     default:
2704       break;
2705     }
2706
2707   return CPP_EOF;
2708 }
2709
2710 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2711 static const cpp_token *
2712 maybe_paste_with_next (pfile, token)
2713      cpp_reader *pfile;
2714      const cpp_token *token;
2715 {
2716   cpp_token *pasted;
2717   const cpp_token *second;
2718   cpp_context *context = CURRENT_CONTEXT (pfile);
2719
2720   /* Is this token on the LHS of ## ? */
2721
2722   while ((token->flags & PASTE_LEFT)
2723          || ((context->flags & CONTEXT_PASTEL)
2724              && context->posn == context->count))
2725     {
2726       /* Suppress macro expansion for next token, but don't conflict
2727          with the other method of suppression.  If it is an argument,
2728          macro expansion within the argument will still occur.  */
2729       pfile->paste_level = pfile->cur_context;
2730       second = _cpp_get_token (pfile);
2731       pfile->paste_level = 0;
2732
2733       /* Ignore placemarker argument tokens (cannot be from an empty
2734          macro since macros are not expanded).  */
2735       if (token->type == CPP_PLACEMARKER)
2736         pasted = duplicate_token (pfile, second);
2737       else if (second->type == CPP_PLACEMARKER)
2738         {
2739           /* GCC has special extended semantics for , ## b where b is
2740              a varargs parameter: the comma disappears if b was given
2741              no actual arguments (not merely if b is an empty
2742              argument).  */
2743           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2744             pasted = duplicate_token (pfile, second);
2745           else
2746             pasted = duplicate_token (pfile, token);
2747         }
2748       else
2749         {
2750           int digraph = 0;
2751           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2752
2753           if (type == CPP_EOF)
2754             {
2755               if (CPP_OPTION (pfile, warn_paste))
2756                 {
2757                   /* Do not complain about , ## <whatever> if
2758                      <whatever> came from a variable argument, because
2759                      the author probably intended the ## to trigger
2760                      the special extended semantics (see above).  */
2761                   if (token->type == CPP_COMMA
2762                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2763                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2764                     /* no warning */;
2765                   else
2766                     cpp_warning (pfile,
2767                         "pasting would not give a valid preprocessing token");
2768                 }
2769               _cpp_push_token (pfile, second);
2770               return token;
2771             }
2772
2773           if (type == CPP_NAME || type == CPP_NUMBER)
2774             {
2775               /* Join spellings.  */
2776               U_CHAR *buf, *end;
2777
2778               pasted = get_temp_token (pfile);
2779               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2780               end = spell_token (pfile, token, buf);
2781               end = spell_token (pfile, second, end);
2782               *end = '\0';
2783
2784               if (type == CPP_NAME)
2785                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2786               else
2787                 {
2788                   pasted->val.str.text = uxstrdup (buf);
2789                   pasted->val.str.len = end - buf;
2790                 }
2791             }
2792           else if (type == CPP_WCHAR || type == CPP_WSTRING)
2793             pasted = duplicate_token (pfile, second);
2794           else
2795             {
2796               pasted = get_temp_token (pfile);
2797               pasted->val.integer = 0;
2798             }
2799
2800           pasted->type = type;
2801           pasted->flags = digraph ? DIGRAPH : 0;
2802
2803           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2804             {
2805               pasted->type = pasted->val.node->value.code;
2806               pasted->flags |= NAMED_OP;
2807             }
2808         }
2809
2810       /* The pasted token gets the whitespace flags and position of the
2811          first token, the PASTE_LEFT flag of the second token, plus the
2812          PASTED flag to indicate it is the result of a paste.  However, we
2813          want to preserve the DIGRAPH flag.  */
2814       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2815       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2816                         | (second->flags & PASTE_LEFT) | PASTED);
2817       pasted->col = token->col;
2818       pasted->line = token->line;
2819
2820       /* See if there is another token to be pasted onto the one we just
2821          constructed.  */
2822       token = pasted;
2823       context = CURRENT_CONTEXT (pfile);
2824       /* and loop */
2825     }
2826   return token;
2827 }
2828
2829 /* Convert a token sequence to a single string token according to the
2830    rules of the ISO C #-operator.  */
2831 #define INIT_SIZE 200
2832 static cpp_token *
2833 stringify_arg (pfile, token)
2834      cpp_reader *pfile;
2835      const cpp_token *token;
2836 {
2837   cpp_token *result;
2838   unsigned char *main_buf;
2839   unsigned int prev_value, backslash_count = 0;
2840   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2841
2842   push_arg_context (pfile, token);
2843   prev_value  = prevent_macro_expansion (pfile);
2844   main_buf = (unsigned char *) xmalloc (buf_cap);
2845
2846   result = get_temp_token (pfile);
2847   ASSIGN_FLAGS_AND_POS (result, token);
2848
2849   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2850     {
2851       int escape;
2852       unsigned char *buf;
2853       unsigned int len = TOKEN_LEN (token);
2854
2855       if (token->type == CPP_PLACEMARKER)
2856         continue;
2857
2858       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2859                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2860       if (escape)
2861         len *= 4 + 1;
2862
2863       if (buf_used + len > buf_cap)
2864         {
2865           buf_cap = buf_used + len + INIT_SIZE;
2866           main_buf = xrealloc (main_buf, buf_cap);
2867         }
2868
2869       if (whitespace && (token->flags & PREV_WHITE))
2870         main_buf[buf_used++] = ' ';
2871
2872       if (escape)
2873         buf = (unsigned char *) xmalloc (len);
2874       else
2875         buf = main_buf + buf_used;
2876
2877       len = spell_token (pfile, token, buf) - buf;
2878       if (escape)
2879         {
2880           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2881           free (buf);
2882         }
2883       else
2884         buf_used += len;
2885
2886       whitespace = 1;
2887       if (token->type == CPP_BACKSLASH)
2888         backslash_count++;
2889       else
2890         backslash_count = 0;
2891     }
2892
2893   /* Ignore the final \ of invalid string literals.  */
2894   if (backslash_count & 1)
2895     {
2896       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2897       buf_used--;
2898     }
2899
2900   result->type = CPP_STRING;
2901   result->val.str.text = main_buf;
2902   result->val.str.len = buf_used;
2903   restore_macro_expansion (pfile, prev_value);
2904   return result;
2905 }
2906
2907 /* Allocate more room on the context stack of PFILE.  */
2908 static void
2909 expand_context_stack (pfile)
2910      cpp_reader *pfile;
2911 {
2912   pfile->context_cap += pfile->context_cap + 20;
2913   pfile->contexts = (cpp_context *)
2914     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2915 }
2916
2917 /* Push the context of macro NODE onto the context stack.  TOKEN is
2918    the CPP_NAME token invoking the macro.  */
2919 static int
2920 push_macro_context (pfile, token)
2921      cpp_reader *pfile;
2922      const cpp_token *token;
2923 {
2924   unsigned char orig_flags;
2925   macro_args *args;
2926   cpp_context *context;
2927   cpp_hashnode *node = token->val.node;
2928
2929   /* Token's flags may change when parsing args containing a nested
2930      invocation of this macro.  */
2931   orig_flags = token->flags & (PREV_WHITE | BOL);
2932   args = 0;
2933   if (node->value.expansion->paramc >= 0)
2934     {
2935       unsigned int error, prev_nme;
2936
2937       /* Allocate room for the argument contexts, and parse them.  */
2938       args  = (macro_args *) xmalloc (sizeof (macro_args));
2939       args->ends = (unsigned int *)
2940         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2941       args->tokens = 0;
2942       args->capacity = 0;
2943       args->used = 0;
2944       args->level = pfile->cur_context;
2945
2946       prev_nme = prevent_macro_expansion (pfile);
2947       pfile->args = args;
2948       error = parse_args (pfile, node, args);
2949       pfile->args = 0;
2950       restore_macro_expansion (pfile, prev_nme);
2951       if (error)
2952         {
2953           free_macro_args (args);
2954           return 1;
2955         }
2956     }
2957
2958   /* Now push its context.  */
2959   pfile->cur_context++;
2960   if (pfile->cur_context == pfile->context_cap)
2961     expand_context_stack (pfile);
2962
2963   context = CURRENT_CONTEXT (pfile);
2964   context->u.list = node->value.expansion;
2965   context->args = args;
2966   context->posn = 0;
2967   context->count = context->u.list->tokens_used;
2968   context->level = pfile->cur_context;
2969   context->flags = 0;
2970   context->pushed_token = 0;
2971
2972   /* Set the flags of the first token.  We know there must
2973      be one, empty macros are a single placemarker token.  */
2974   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2975
2976   return 0;
2977 }
2978
2979 /* Push an argument to the current macro onto the context stack.
2980    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2981 static void
2982 push_arg_context (pfile, token)
2983      cpp_reader *pfile;
2984      const cpp_token *token;
2985 {
2986   cpp_context *context;
2987   macro_args *args;
2988
2989   pfile->cur_context++;
2990   if (pfile->cur_context == pfile->context_cap)
2991       expand_context_stack (pfile);
2992
2993   context = CURRENT_CONTEXT (pfile);
2994   args = context[-1].args;
2995
2996   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2997   context->u.arg = args->tokens + context->count;
2998   context->count = args->ends[token->val.aux] - context->count;
2999   context->args = 0;
3000   context->posn = 0;
3001   context->level = args->level;
3002   context->flags = CONTEXT_ARG | CONTEXT_RAW;
3003   context->pushed_token = 0;
3004
3005   /* Set the flags of the first token.  There is one.  */
3006   {
3007     const cpp_token *first = context->u.arg[0];
3008     if (!first)
3009       first = context->u.arg[1];
3010
3011     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3012                           token->flags & (PREV_WHITE | BOL));
3013   }
3014
3015   if (token->flags & PASTE_LEFT)
3016     context->flags |= CONTEXT_PASTEL;
3017   if (pfile->paste_level)
3018     context->flags |= CONTEXT_PASTER;
3019 }
3020
3021 /* "Unget" a token.  It is effectively inserted in the token queue and
3022    will be returned by the next call to get_raw_token.  */
3023 void
3024 _cpp_push_token (pfile, token)
3025      cpp_reader *pfile;
3026      const cpp_token *token;
3027 {
3028   cpp_context *context = CURRENT_CONTEXT (pfile);
3029
3030   if (context->posn > 0)
3031     {
3032       const cpp_token *prev;
3033       if (IS_ARG_CONTEXT (context))
3034         prev = context->u.arg[context->posn - 1];
3035       else
3036         prev = &context->u.list->tokens[context->posn - 1];
3037
3038       if (prev == token)
3039         {
3040           context->posn--;
3041           return;
3042         }
3043     }
3044
3045   if (context->pushed_token)
3046     cpp_ice (pfile, "two tokens pushed in a row");
3047   if (token->type != CPP_EOF)
3048     context->pushed_token = token;
3049   /* Don't push back a directive's CPP_EOF, step back instead.  */
3050   else if (pfile->cur_context == 0)
3051     pfile->contexts[0].posn--;
3052 }
3053
3054 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3055    introducing the directive.  */
3056 static void
3057 process_directive (pfile, token)
3058      cpp_reader *pfile;
3059      const cpp_token *token;
3060 {
3061   const struct directive *d = pfile->token_list.directive;
3062   int prev_nme = 0;
3063
3064   /* Skip over the directive name.  */
3065   if (token[1].type == CPP_NAME)
3066     _cpp_get_raw_token (pfile);
3067   else if (token[1].type != CPP_NUMBER)
3068     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
3069
3070   if (! (d->flags & EXPAND))
3071     prev_nme = prevent_macro_expansion (pfile);
3072   (void) (*d->handler) (pfile);
3073   if (! (d->flags & EXPAND))
3074     restore_macro_expansion (pfile, prev_nme);
3075   _cpp_skip_rest_of_line (pfile);
3076 }
3077
3078 /* The external interface to return the next token.  All macro
3079    expansion and directive processing is handled internally, the
3080    caller only ever sees the output after preprocessing.  */
3081 const cpp_token *
3082 cpp_get_token (pfile)
3083      cpp_reader *pfile;
3084 {
3085   const cpp_token *token;
3086   /* Loop till we hit a non-directive, non-placemarker token.  */
3087   for (;;)
3088     {
3089       token = _cpp_get_token (pfile);
3090
3091       if (token->type == CPP_PLACEMARKER)
3092         continue;
3093
3094       if (token->type == CPP_HASH && token->flags & BOL
3095           && pfile->token_list.directive)
3096         {
3097           process_directive (pfile, token);
3098           continue;
3099         }
3100
3101       return token;
3102     }
3103 }
3104
3105 /* The internal interface to return the next token.  There are two
3106    differences between the internal and external interfaces: the
3107    internal interface may return a PLACEMARKER token, and it does not
3108    process directives.  */
3109 const cpp_token *
3110 _cpp_get_token (pfile)
3111      cpp_reader *pfile;
3112 {
3113   const cpp_token *token, *old_token;
3114   cpp_hashnode *node;
3115
3116   /* Loop until we hit a non-macro token.  */
3117   for (;;)
3118     {
3119       token = get_raw_token (pfile);
3120
3121       /* Short circuit EOF. */
3122       if (token->type == CPP_EOF)
3123         return token;
3124
3125       /* If we are skipping... */
3126       if (pfile->skipping)
3127         {
3128           /* we still have to process directives,  */
3129           if (pfile->token_list.directive)
3130             return token;
3131
3132           /* but everything else is ignored.  */
3133           _cpp_skip_rest_of_line (pfile);
3134           continue;
3135         }
3136
3137       /* If there's a potential control macro and we get here, then that
3138          #ifndef didn't cover the entire file and its argument shouldn't
3139          be taken as a control macro.  */
3140       pfile->potential_control_macro = 0;
3141
3142       old_token = token;
3143
3144       /* See if there's a token to paste with this one.  */
3145       if (!pfile->paste_level)
3146         token = maybe_paste_with_next (pfile, token);
3147
3148       /* If it isn't a macro, return it now.  */
3149       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3150         return token;
3151
3152       /* Is macro expansion disabled in general, or are we in the
3153          middle of a token paste, or was this token just pasted?
3154          (Note we don't check token->flags & PASTED, because that
3155          counts tokens that were pasted at some point in the past,
3156          we're only interested in tokens that were pasted by this call
3157          to maybe_paste_with_next.)  */
3158       if (pfile->no_expand_level == pfile->cur_context
3159           || pfile->paste_level
3160           || (token != old_token
3161               && pfile->no_expand_level + 1 == pfile->cur_context))
3162         return token;
3163
3164       node = token->val.node;
3165       if (node->type != T_MACRO)
3166         return special_symbol (pfile, node, token);
3167
3168       if (is_macro_disabled (pfile, node->value.expansion, token))
3169         return token;
3170
3171       if (pfile->cur_context > CPP_STACK_MAX)
3172         {
3173           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3174           return token;
3175         }
3176
3177       if (push_macro_context (pfile, token))
3178         return token;
3179       /* else loop */
3180     }
3181 }
3182
3183 /* Returns the next raw token, i.e. without performing macro
3184    expansion.  Argument contexts are automatically entered.  */
3185 static const cpp_token *
3186 get_raw_token (pfile)
3187      cpp_reader *pfile;
3188 {
3189   const cpp_token *result;
3190   cpp_context *context;
3191
3192   for (;;)
3193     {
3194       context = CURRENT_CONTEXT (pfile);
3195       if (context->pushed_token)
3196         {
3197           result = context->pushed_token;
3198           context->pushed_token = 0;
3199           return result;        /* Cannot be a CPP_MACRO_ARG */
3200         }
3201       else if (context->posn == context->count)
3202         {
3203           if (pop_context (pfile))
3204             return &eof_token;
3205           continue;
3206         }
3207       else if (IS_ARG_CONTEXT (context))
3208         {
3209           result = context->u.arg[context->posn++];
3210           if (result == 0)
3211             {
3212               context->flags ^= CONTEXT_RAW;
3213               result = context->u.arg[context->posn++];
3214             }
3215           return result;        /* Cannot be a CPP_MACRO_ARG */
3216         }
3217
3218       result = &context->u.list->tokens[context->posn++];
3219
3220       if (result->type != CPP_MACRO_ARG)
3221         return result;
3222
3223       if (result->flags & STRINGIFY_ARG)
3224         return stringify_arg (pfile, result);
3225
3226       push_arg_context (pfile, result);
3227     }
3228 }
3229
3230 /* Internal interface to get the token without macro expanding.  */
3231 const cpp_token *
3232 _cpp_get_raw_token (pfile)
3233      cpp_reader *pfile;
3234 {
3235   int prev_nme = prevent_macro_expansion (pfile);
3236   const cpp_token *result = _cpp_get_token (pfile);
3237   restore_macro_expansion (pfile, prev_nme);
3238   return result;
3239 }
3240
3241 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3242    list should be overwritten, or zero if we need to append
3243    (typically, if we are within the arguments to a macro, or looking
3244    for the '(' to start a function-like macro invocation).  */
3245 static int
3246 lex_next (pfile, clear)
3247      cpp_reader *pfile;
3248      int clear;
3249 {
3250   cpp_toklist *list = &pfile->token_list;
3251   const cpp_token *old_list = list->tokens;
3252   unsigned int old_used = list->tokens_used;
3253
3254   if (clear)
3255     {
3256       /* Release all temporary tokens.  */
3257       _cpp_clear_toklist (list);
3258       pfile->contexts[0].posn = 0;
3259       if (pfile->temp_used)
3260         release_temp_tokens (pfile);
3261     }
3262   lex_line (pfile, list);
3263   pfile->contexts[0].count = list->tokens_used;
3264
3265   if (!clear && pfile->args)
3266     {
3267       /* Fix up argument token pointers.  */
3268       if (old_list != list->tokens)
3269         {
3270           unsigned int i;
3271
3272           for (i = 0; i < pfile->args->used; i++)
3273             {
3274               const cpp_token *token = pfile->args->tokens[i];
3275               if (token >= old_list && token < old_list + old_used)
3276                 pfile->args->tokens[i] = (const cpp_token *)
3277                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3278             }
3279         }
3280
3281       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3282          tokens within the list of arguments that would otherwise act as
3283          preprocessing directives, the behavior is undefined.
3284
3285          This implementation will report a hard error and treat the
3286          'sequence of preprocessing tokens' as part of the macro argument,
3287          not a directive.
3288
3289          Note if pfile->args == 0, we're OK since we're only inside a
3290          macro argument after a '('.  */
3291       if (list->directive)
3292         {
3293           cpp_error_with_line (pfile, list->tokens[old_used].line,
3294                                list->tokens[old_used].col,
3295                                "#%s may not be used inside a macro argument",
3296                                list->directive->name);
3297           return 1;
3298         }
3299     }
3300
3301   return 0;
3302 }
3303
3304 /* Pops a context off the context stack.  If we're at the bottom, lexes
3305    the next logical line.  Returns EOF if we're at the end of the
3306    argument list to the # operator, or we should not "overflow"
3307    into the rest of the file (e.g. 6.10.3.1.1).  */
3308 static int
3309 pop_context (pfile)
3310      cpp_reader *pfile;
3311 {
3312   cpp_context *context;
3313
3314   if (pfile->cur_context == 0)
3315     {
3316       /* If we are currently processing a directive, do not advance.  6.10
3317          paragraph 2: A new-line character ends the directive even if it
3318          occurs within what would otherwise be an invocation of a
3319          function-like macro.  */
3320       if (pfile->token_list.directive)
3321         return 1;
3322
3323       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3324     }
3325
3326   /* Argument contexts, when parsing args or handling # operator
3327      return CPP_EOF at the end.  */
3328   context = CURRENT_CONTEXT (pfile);
3329   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3330     return 1;
3331
3332   /* Free resources when leaving macro contexts.  */
3333   if (context->args)
3334     free_macro_args (context->args);
3335
3336   if (pfile->cur_context == pfile->no_expand_level)
3337     pfile->no_expand_level--;
3338   pfile->cur_context--;
3339
3340   return 0;
3341 }
3342
3343 /* Turn off macro expansion at the current context level.  */
3344 static unsigned int
3345 prevent_macro_expansion (pfile)
3346      cpp_reader *pfile;
3347 {
3348   unsigned int prev_value = pfile->no_expand_level;
3349   pfile->no_expand_level = pfile->cur_context;
3350   return prev_value;
3351 }
3352
3353 /* Restore macro expansion to its previous state.  */
3354 static void
3355 restore_macro_expansion (pfile, prev_value)
3356      cpp_reader *pfile;
3357      unsigned int prev_value;
3358 {
3359   pfile->no_expand_level = prev_value;
3360 }
3361
3362 /* Used by cpperror.c to obtain the correct line and column to report
3363    in a diagnostic.  */
3364 unsigned int
3365 _cpp_get_line (pfile, pcol)
3366      cpp_reader *pfile;
3367      unsigned int *pcol;
3368 {
3369   unsigned int index;
3370   const cpp_token *cur_token;
3371
3372   if (pfile->in_lex_line)
3373     index = pfile->token_list.tokens_used;
3374   else
3375     index = pfile->contexts[0].posn;
3376
3377   if (index == 0)
3378     {
3379       if (pcol)
3380         *pcol = 0;
3381       return 0;
3382     }
3383
3384   cur_token = &pfile->token_list.tokens[index - 1];
3385   if (pcol)
3386     *pcol = cur_token->col;
3387   return cur_token->line;
3388 }
3389
3390 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3391 static const char * const monthnames[] =
3392 {
3393   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3394   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3395 };
3396
3397 /* Handle builtin macros like __FILE__.  */
3398 static const cpp_token *
3399 special_symbol (pfile, node, token)
3400      cpp_reader *pfile;
3401      cpp_hashnode *node;
3402      const cpp_token *token;
3403 {
3404   cpp_token *result;
3405   cpp_buffer *ip;
3406
3407   switch (node->type)
3408     {
3409     case T_FILE:
3410     case T_BASE_FILE:
3411       {
3412         const char *file;
3413
3414         ip = CPP_BUFFER (pfile);
3415         if (ip == 0)
3416           file = "";
3417         else
3418           {
3419             if (node->type == T_BASE_FILE)
3420               while (CPP_PREV_BUFFER (ip) != NULL)
3421                 ip = CPP_PREV_BUFFER (ip);
3422
3423             file = ip->nominal_fname;
3424           }
3425         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3426                                     strlen (file));
3427       }
3428       break;
3429
3430     case T_INCLUDE_LEVEL:
3431       /* pfile->include_depth counts the primary source as level 1,
3432          but historically __INCLUDE_DEPTH__ has called the primary
3433          source level 0.  */
3434       result = alloc_number_token (pfile, pfile->include_depth - 1);
3435       break;
3436
3437     case T_SPECLINE:
3438       /* If __LINE__ is embedded in a macro, it must expand to the
3439          line of the macro's invocation, not its definition.
3440          Otherwise things like assert() will not work properly.  */
3441       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3442       break;
3443
3444     case T_STDC:
3445       {
3446         int stdc = 1;
3447
3448 #ifdef STDC_0_IN_SYSTEM_HEADERS
3449         if (CPP_IN_SYSTEM_HEADER (pfile)
3450             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3451           stdc = 0;
3452 #endif
3453         result = alloc_number_token (pfile, stdc);
3454       }
3455       break;
3456
3457     case T_DATE:
3458     case T_TIME:
3459       if (pfile->date == 0)
3460         {
3461           /* Allocate __DATE__ and __TIME__ from permanent storage,
3462              and save them in pfile so we don't have to do this again.
3463              We don't generate these strings at init time because
3464              time() and localtime() are very slow on some systems.  */
3465           time_t tt = time (NULL);
3466           struct tm *tb = localtime (&tt);
3467
3468           pfile->date = make_string_token
3469             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3470           pfile->time = make_string_token
3471             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3472
3473           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3474                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3475           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3476                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3477         }
3478       result = node->type == T_DATE ? pfile->date: pfile->time;
3479       break;
3480
3481     case T_POISON:
3482       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3483       return token;
3484
3485     default:
3486       cpp_ice (pfile, "invalid special hash type");
3487       return token;
3488     }
3489
3490   ASSIGN_FLAGS_AND_POS (result, token);
3491   return result;
3492 }
3493 #undef DSC
3494
3495 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3496    if it hasn't happened already.  */
3497
3498 void
3499 _cpp_init_input_buffer (pfile)
3500      cpp_reader *pfile;
3501 {
3502   cpp_context *base;
3503
3504   init_trigraph_map ();
3505   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3506   pfile->no_expand_level = UINT_MAX;
3507   pfile->context_cap = 20;
3508   pfile->cur_context = 0;
3509
3510   pfile->contexts = (cpp_context *)
3511     xmalloc (pfile->context_cap * sizeof (cpp_context));
3512
3513   /* Clear the base context.  */
3514   base = &pfile->contexts[0];
3515   base->u.list = &pfile->token_list;
3516   base->posn = 0;
3517   base->count = 0;
3518   base->args = 0;
3519   base->level = 0;
3520   base->flags = 0;
3521   base->pushed_token = 0;
3522 }
3523
3524 /* Moves to the end of the directive line, popping contexts as
3525    necessary.  */
3526 void
3527 _cpp_skip_rest_of_line (pfile)
3528      cpp_reader *pfile;
3529 {
3530   /* Discard all stacked contexts.  */
3531   int i;
3532   for (i = pfile->cur_context; i > 0; i--)
3533     if (pfile->contexts[i].args)
3534       free_macro_args (pfile->contexts[i].args);
3535
3536   if (pfile->no_expand_level <= pfile->cur_context)
3537     pfile->no_expand_level = 0;
3538   pfile->cur_context = 0;
3539
3540   /* Clear the base context, and clear the directive pointer so that
3541      get_raw_token will advance to the next line.  */
3542   pfile->contexts[0].count = 0;
3543   pfile->contexts[0].posn = 0;
3544   pfile->token_list.directive = 0;
3545 }
3546
3547 /* Directive handler wrapper used by the command line option
3548    processor.  */
3549 void
3550 _cpp_run_directive (pfile, dir, buf, count)
3551      cpp_reader *pfile;
3552      const struct directive *dir;
3553      const char *buf;
3554      size_t count;
3555 {
3556   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3557     {
3558       unsigned int prev_lvl = 0;
3559
3560       /* Scan the line now, else prevent_macro_expansion won't work.  */
3561       lex_next (pfile, 1);
3562       if (! (dir->flags & EXPAND))
3563         prev_lvl = prevent_macro_expansion (pfile);
3564
3565       (void) (*dir->handler) (pfile);
3566
3567       if (! (dir->flags & EXPAND))
3568         restore_macro_expansion (pfile, prev_lvl);
3569
3570       _cpp_skip_rest_of_line (pfile);
3571       cpp_pop_buffer (pfile);
3572     }
3573 }