gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  29 o Distinguish integers, floats, and 'other' pp-numbers.
  30 o Store ints and char constants as binary values.
  31 o New command-line assertion syntax.
  32 o Work towards functions in cpperror.c taking a message level parameter.
  33   If we do this, merge the common code of do_warning and do_error.
  34 o Comment all functions, and describe macro expansion algorithm.
  35 o Move as much out of header files as possible.
  36 o Remove single quote pairs `', and some '', from diagnostics.
  37 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  38
  39 */
  40
  41 #include "config.h"
  42 #include "system.h"
  43 #include "intl.h"
  44 #include "cpplib.h"
  45 #include "cpphash.h"
  46 #include "symcat.h"
  47
  48 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  49 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  50
  51 /* Flags for cpp_context.  */
  52 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  53 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  54 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  55 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  56
  57 typedef struct cpp_context cpp_context;
  58 struct cpp_context
  59 {
  60   union
  61   {
  62     const cpp_toklist *list;    /* Used for macro contexts only.  */
  63     const cpp_token **arg;      /* Used for arg contexts only.  */
  64   } u;
  65
  66   /* Pushed token to be returned by next call to get_raw_token.  */
  67   const cpp_token *pushed_token;
  68
  69   struct macro_args *args;      /* The arguments for a function-like
  70                                    macro.  NULL otherwise.  */
  71   unsigned short posn;          /* Current posn, index into u.  */
  72   unsigned short count;         /* No. of tokens in u.  */
  73   unsigned short level;
  74   unsigned char flags;
  75 };
  76
  77 typedef struct macro_args macro_args;
  78 struct macro_args
  79 {
  80   unsigned int *ends;
  81   const cpp_token **tokens;
  82   unsigned int capacity;
  83   unsigned int used;
  84   unsigned short level;
  85 };
  86
  87 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  88 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  89                                            macro_args *, unsigned int *));
  90 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  91 static void save_token PARAMS ((macro_args *, const cpp_token *));
  92 static int pop_context PARAMS ((cpp_reader *));
  93 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  94 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void free_macro_args PARAMS ((macro_args *));
  96
  97 #define auto_expand_name_space(list) \
  98     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
  99 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
 100                                          unsigned int));
 101 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 102                                          unsigned int));
 103
 104 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 105 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 106                                                 unsigned char *));
 107 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 108                                                      const unsigned char *));
 109 static int skip_block_comment PARAMS ((cpp_reader *));
 110 static int skip_line_comment PARAMS ((cpp_reader *));
 111 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 112 static void skip_whitespace PARAMS ((cpp_reader *, int));
 113 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 114                                    const U_CHAR *, const U_CHAR *));
 115 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 116 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 117                                   unsigned int));
 118 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 119 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 120                                   const unsigned char *,
 121                                   unsigned int, unsigned int));
 122 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 123 static int lex_next PARAMS ((cpp_reader *, int));
 124 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 125                                       const cpp_token *));
 126
 127 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 128 static void expand_context_stack PARAMS ((cpp_reader *));
 129 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 130                                             unsigned char *));
 131 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 132                                   const cpp_token *, int));
 133 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 134                                           cpp_token *));
 135 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 136                                             unsigned int));
 137 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 138 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 139                                                 const cpp_token *));
 140 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 141 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 142                                                        const cpp_token *));
 143 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 144                                          const cpp_token *, int *));
 145 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 146 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 147 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 148 static void release_temp_tokens         PARAMS ((cpp_reader *));
 149 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 150 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 151
 152 #define INIT_TOKEN_STR(list, token) \
 153   do {(token)->val.str.len = 0; \
 154       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 155   } while (0)
 156
 157 #define VALID_SIGN(c, prevc) \
 158   (((c) == '+' || (c) == '-') && \
 159    ((prevc) == 'e' || (prevc) == 'E' \
 160     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 161
 162 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 163    character, if any, is in buffer.  */
 164
 165 #define handle_newline(cur, limit, c) \
 166  do { \
 167   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 168     (cur)++; \
 169   pfile->buffer->lineno++; \
 170   pfile->buffer->line_base = (cur); \
 171   pfile->col_adjust = 0; \
 172  } while (0)
 173
 174 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 175 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 176
 177 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 178 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 179 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 180 #define BACKUP_DIGRAPH(ttype) do { \
 181   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 182
 183 /* An upper bound on the number of bytes needed to spell a token,
 184    including preceding whitespace.  */
 185 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 186 static inline size_t
 187 TOKEN_LEN (token)
 188      const cpp_token *token;
 189 {
 190   size_t len;
 191
 192   switch (TOKEN_SPELL (token))
 193     {
 194     default:            len = 0;                        break;
 195     case SPELL_STRING:  len = token->val.str.len;       break;
 196     case SPELL_IDENT:   len = token->val.node->length;  break;
 197     }
 198   return len + 5;
 199 }
 200
 201 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 202 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 203 #define ON_REST_ARG(c) \
 204  (((c)->flags & VAR_ARGS) \
 205   && (c)->u.list->tokens[(c)->posn].val.aux \
 206       == (unsigned int) ((c)->u.list->paramc - 1))
 207
 208 #define ASSIGN_FLAGS_AND_POS(d, s) \
 209   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 210       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 211   } while (0)
 212
 213 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 214 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 215   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 216       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 217   } while (0)
 218
 219 #define OP(e, s) { SPELL_OPERATOR, U s           },
 220 #define TK(e, s) { s,              U STRINGX (e) },
 221
 222 const struct token_spelling
 223 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 224
 225 #undef OP
 226 #undef TK
 227
 228 /* Notify the compiler proper that the current line number has jumped,
 229    or the current file name has changed.  */
 230
 231 static void
 232 output_line_command (pfile, print, line)
 233      cpp_reader *pfile;
 234      cpp_printer *print;
 235      unsigned int line;
 236 {
 237   cpp_buffer *ip = CPP_BUFFER (pfile);
 238
 239   if (line == 0)
 240     return;
 241
 242   /* End the previous line of text.  */
 243   if (pfile->need_newline)
 244     {
 245       putc ('\n', print->outf);
 246       print->lineno++;
 247     }
 248   pfile->need_newline = 0;
 249
 250   if (CPP_OPTION (pfile, no_line_commands))
 251     return;
 252
 253   /* If the current file has not changed, we can output a few newlines
 254      instead if we want to increase the line number by a small amount.
 255      We cannot do this if print->lineno is zero, because that means we
 256      haven't output any line commands yet.  (The very first line
 257      command output is a `same_file' command.)
 258
 259      'nominal_fname' values are unique, so they can be compared by
 260      comparing pointers.  */
 261   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 262       && line >= print->lineno && line < print->lineno + 8)
 263     {
 264       while (line > print->lineno)
 265         {
 266           putc ('\n', print->outf);
 267           print->lineno++;
 268         }
 269       return;
 270     }
 271
 272   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 273            cpp_syshdr_flags (pfile, ip));
 274
 275   print->last_fname = ip->nominal_fname;
 276   print->lineno = line;
 277 }
 278
 279 /* Like fprintf, but writes to a printer object.  You should be sure
 280    always to generate a complete line when you use this function.  */
 281 void
 282 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 283                      const char *fmt, ...))
 284 {
 285   va_list ap;
 286 #ifndef ANSI_PROTOTYPES
 287   cpp_reader *pfile;
 288   cpp_printer *print;
 289   const char *fmt;
 290 #endif
 291
 292   VA_START (ap, fmt);
 293
 294 #ifndef ANSI_PROTOTYPES
 295   pfile = va_arg (ap, cpp_reader *);
 296   print = va_arg (ap, cpp_printer *);
 297   fmt = va_arg (ap, const char *);
 298 #endif
 299
 300   /* End the previous line of text.  */
 301   if (pfile->need_newline)
 302     {
 303       putc ('\n', print->outf);
 304       print->lineno++;
 305     }
 306   pfile->need_newline = 0;
 307
 308   vfprintf (print->outf, fmt, ap);
 309   va_end (ap);
 310 }
 311
 312 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 313
 314 void
 315 cpp_scan_buffer_nooutput (pfile)
 316      cpp_reader *pfile;
 317 {
 318   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 319   const cpp_token *token;
 320
 321   /* In no-output mode, we can ignore everything but directives.  */
 322   for (;;)
 323     {
 324       token = _cpp_get_token (pfile);
 325
 326       if (token->type == CPP_EOF)
 327         {
 328           cpp_pop_buffer (pfile);
 329           if (CPP_BUFFER (pfile) == stop)
 330             break;
 331         }
 332
 333       if (token->type == CPP_HASH && token->flags & BOL
 334           && pfile->token_list.directive)
 335         {
 336           process_directive (pfile, token);
 337           continue;
 338         }
 339
 340       _cpp_skip_rest_of_line (pfile);
 341     }
 342 }
 343
 344 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 345 void
 346 cpp_scan_buffer (pfile, print)
 347      cpp_reader *pfile;
 348      cpp_printer *print;
 349 {
 350   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 351   const cpp_token *token, *prev = 0;
 352
 353   for (;;)
 354     {
 355       token = _cpp_get_token (pfile);
 356       if (token->type == CPP_EOF)
 357         {
 358           cpp_pop_buffer (pfile);
 359
 360           if (CPP_BUFFER (pfile) == stop)
 361             return;
 362
 363           prev = 0;
 364           continue;
 365         }
 366
 367       if (token->flags & BOL)
 368         {
 369           output_line_command (pfile, print, pfile->token_list.line);
 370           prev = 0;
 371
 372           if (token->type == CPP_HASH && pfile->token_list.directive)
 373             {
 374               process_directive (pfile, token);
 375               continue;
 376             }
 377         }
 378
 379       if (token->type != CPP_PLACEMARKER)
 380         {
 381           output_token (pfile, print->outf, token, prev, 1);
 382           pfile->need_newline = 1;
 383         }
 384
 385       prev = token;
 386     }
 387 }
 388
 389 /* Helper routine used by parse_include, which can't see spell_token.
 390    Reinterpret the current line as an h-char-sequence (< ... >); we are
 391    looking at the first token after the <.  */
 392 const cpp_token *
 393 _cpp_glue_header_name (pfile)
 394      cpp_reader *pfile;
 395 {
 396   const cpp_token *t;
 397   cpp_token *hdr;
 398   U_CHAR *buf, *p;
 399   size_t len, avail;
 400
 401   avail = 40;
 402   len = 0;
 403   buf = xmalloc (avail);
 404
 405   for (;;)
 406     {
 407       t = _cpp_get_token (pfile);
 408       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 409         break;
 410
 411       if (len + TOKEN_LEN (t) > avail)
 412         {
 413           avail = len + TOKEN_LEN (t) + 40;
 414           buf = xrealloc (buf, avail);
 415         }
 416
 417       if (t->flags & PREV_WHITE)
 418         buf[len++] = ' ';
 419
 420       p = spell_token (pfile, t, buf + len);
 421       len = (size_t) (p - buf);  /* p known >= buf */
 422     }
 423
 424   if (t->type == CPP_EOF)
 425     cpp_error (pfile, "missing terminating > character");
 426
 427   buf = xrealloc (buf, len);
 428
 429   hdr = get_temp_token (pfile);
 430   hdr->type = CPP_HEADER_NAME;
 431   hdr->flags = 0;
 432   hdr->val.str.text = buf;
 433   hdr->val.str.len = len;
 434   return hdr;
 435 }
 436
 437 /* Token-buffer helper functions.  */
 438
 439 /* Expand a token list's string space. It is *vital* that
 440    list->tokens_used is correct, to get pointer fix-up right.  */
 441 void
 442 _cpp_expand_name_space (list, len)
 443      cpp_toklist *list;
 444      unsigned int len;
 445 {
 446   const U_CHAR *old_namebuf;
 447
 448   old_namebuf = list->namebuf;
 449   list->name_cap += len;
 450   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 451
 452   /* Fix up token text pointers.  */
 453   if (list->namebuf != old_namebuf)
 454     {
 455       unsigned int i;
 456
 457       for (i = 0; i < list->tokens_used; i++)
 458         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 459           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 460     }
 461 }
 462
 463 /* If there is not enough room for LEN more characters, expand the
 464    list by just enough to have room for LEN characters.  */
 465 void
 466 _cpp_reserve_name_space (list, len)
 467      cpp_toklist *list;
 468      unsigned int len;
 469 {
 470   unsigned int room = list->name_cap - list->name_used;
 471
 472   if (room < len)
 473     _cpp_expand_name_space (list, len - room);
 474 }
 475
 476 /* Expand the number of tokens in a list.  */
 477 void
 478 _cpp_expand_token_space (list, count)
 479      cpp_toklist *list;
 480      unsigned int count;
 481 {
 482   unsigned int n;
 483
 484   list->tokens_cap += count;
 485   n = list->tokens_cap;
 486   if (list->flags & LIST_OFFSET)
 487     list->tokens--, n++;
 488   list->tokens = (cpp_token *)
 489     xrealloc (list->tokens, n * sizeof (cpp_token));
 490   if (list->flags & LIST_OFFSET)
 491     list->tokens++;             /* Skip the dummy.  */
 492 }
 493
 494 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 495    an extra token in front of the token list, as this allows the lexer
 496    to always peek at the previous token without worrying about
 497    underflowing the list, and some initial space.  Otherwise, no
 498    token- or name-space is allocated, and there is no dummy token.  */
 499 void
 500 _cpp_init_toklist (list, flags)
 501      cpp_toklist *list;
 502      int flags;
 503 {
 504   if (flags == NO_DUMMY_TOKEN)
 505     {
 506       list->tokens_cap = 0;
 507       list->tokens = 0;
 508       list->name_cap = 0;
 509       list->namebuf = 0;
 510       list->flags = 0;
 511     }
 512   else
 513     {
 514       /* Initialize token space.  Put a dummy token before the start
 515          that will fail matches.  */
 516       list->tokens_cap = 256;   /* 4K's worth.  */
 517       list->tokens = (cpp_token *)
 518         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 519       list->tokens[0].type = CPP_EOF;
 520       list->tokens++;
 521
 522       /* Initialize name space.  */
 523       list->name_cap = 1024;
 524       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 525       list->flags = LIST_OFFSET;
 526     }
 527
 528   _cpp_clear_toklist (list);
 529 }
 530
 531 /* Clear a token list.  */
 532 void
 533 _cpp_clear_toklist (list)
 534      cpp_toklist *list;
 535 {
 536   list->tokens_used = 0;
 537   list->name_used = 0;
 538   list->directive = 0;
 539   list->paramc = 0;
 540   list->params_len = 0;
 541   list->flags &= LIST_OFFSET;  /* clear all but that one */
 542 }
 543
 544 /* Free a token list.  Does not free the list itself, which may be
 545    embedded in a larger structure.  */
 546 void
 547 _cpp_free_toklist (list)
 548      const cpp_toklist *list;
 549 {
 550   if (list->flags & LIST_OFFSET)
 551     free (list->tokens - 1);    /* Backup over dummy token.  */
 552   else
 553     free (list->tokens);
 554   free (list->namebuf);
 555 }
 556
 557 /* Compare two tokens.  */
 558 int
 559 _cpp_equiv_tokens (a, b)
 560      const cpp_token *a, *b;
 561 {
 562   if (a->type == b->type && a->flags == b->flags)
 563     switch (TOKEN_SPELL (a))
 564       {
 565       default:                  /* Keep compiler happy.  */
 566       case SPELL_OPERATOR:
 567         return 1;
 568       case SPELL_CHAR:
 569       case SPELL_NONE:
 570         return a->val.aux == b->val.aux; /* arg_no or character.  */
 571       case SPELL_IDENT:
 572         return a->val.node == b->val.node;
 573       case SPELL_STRING:
 574         return (a->val.str.len == b->val.str.len
 575                 && !memcmp (a->val.str.text, b->val.str.text,
 576                             a->val.str.len));
 577       }
 578
 579   return 0;
 580 }
 581
 582 /* Compare two token lists.  */
 583 int
 584 _cpp_equiv_toklists (a, b)
 585      const cpp_toklist *a, *b;
 586 {
 587   unsigned int i;
 588
 589   if (a->tokens_used != b->tokens_used
 590       || a->flags != b->flags
 591       || a->paramc != b->paramc)
 592     return 0;
 593
 594   for (i = 0; i < a->tokens_used; i++)
 595     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 596       return 0;
 597   return 1;
 598 }
 599
 600 /* Utility routine:
 601
 602    Compares, the token TOKEN to the NUL-terminated string STRING.
 603    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 604
 605 int
 606 cpp_ideq (token, string)
 607      const cpp_token *token;
 608      const char *string;
 609 {
 610   if (token->type != CPP_NAME)
 611     return 0;
 612
 613   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 614 }
 615
 616 /* Lexing algorithm.
 617
 618  The original lexer in cpplib was made up of two passes: a first pass
 619  that replaced trigraphs and deleted esacped newlines, and a second
 620  pass that tokenized the result of the first pass.  Tokenisation was
 621  performed by peeking at the next character in the input stream.  For
 622  example, if the input stream contained "!=", the handler for the !
 623  character would peek at the next character, and if it were a '='
 624  would skip over it, and return a "!=" token, otherwise it would
 625  return just the "!" token.
 626
 627  To implement a single-pass lexer, this peeking ahead is unworkable.
 628  An arbitrary number of escaped newlines, and trigraphs (in particular
 629  ??/ which translates to the escape \), could separate the '!' and '='
 630  in the input stream, yet the next token is still a "!=".
 631
 632  Suppose instead that we lex by one logical line at a time, producing
 633  a token list or stack for each logical line, and when seeing the '!'
 634  push a CPP_NOT token on the list.  Then if the '!' is part of a
 635  longer token ("!=") we know we must see the remainder of the token by
 636  the time we reach the end of the logical line.  Thus we can have the
 637  '=' handler look at the previous token (at the end of the list / top
 638  of the stack) and see if it is a "!" token, and if so, instead of
 639  pushing a "=" token revise the existing token to be a "!=" token.
 640
 641  This works in the presence of escaped newlines, because the '\' would
 642  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 643  newline ('\n' or '\r') handler looks at the token at the top of the
 644  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 645  Hence the '=' handler would never see any intervening tokens.
 646
 647  To make trigraphs work in this context, as in precedence trigraphs
 648  are highest and converted before anything else, the '?' handler does
 649  lookahead to see if it is a trigraph, and if so skips the trigraph
 650  and pushes the token it represents onto the top of the stack.  This
 651  also works in the particular case of a CPP_BACKSLASH trigraph.
 652
 653  To the preprocessor, whitespace is only significant to the point of
 654  knowing whether whitespace precedes a particular token.  For example,
 655  the '=' handler needs to know whether there was whitespace between it
 656  and a "!" token on the top of the stack, to make the token conversion
 657  decision correctly.  So each token has a PREV_WHITE flag to
 658  indicate this - the standard permits consecutive whitespace to be
 659  regarded as a single space.  The compiler front ends are not
 660  interested in whitespace at all; they just require a token stream.
 661  Another place where whitespace is significant to the preprocessor is
 662  a #define statment - if there is whitespace between the macro name
 663  and an initial "(" token the macro is "object-like", otherwise it is
 664  a function-like macro that takes arguments.
 665
 666  However, all is not rosy.  Parsing of identifiers, numbers, comments
 667  and strings becomes trickier because of the possibility of raw
 668  trigraphs and escaped newlines in the input stream.
 669
 670  The trigraphs are three consecutive characters beginning with two
 671  question marks.  A question mark is not valid as part of a number or
 672  identifier, so parsing of a number or identifier terminates normally
 673  upon reaching it, returning to the mainloop which handles the
 674  trigraph just like it would in any other position.  Similarly for the
 675  backslash of a backslash-newline combination.  So we just need the
 676  escaped-newline dropper in the mainloop to check if the token on the
 677  top of the stack after dropping the escaped newline is a number or
 678  identifier, and if so to continue the processing it as if nothing had
 679  happened.
 680
 681  For strings, we replace trigraphs whenever we reach a quote or
 682  newline, because there might be a backslash trigraph escaping them.
 683  We need to be careful that we start trigraph replacing from where we
 684  left off previously, because it is possible for a first scan to leave
 685  "fake" trigraphs that a second scan would pick up as real (e.g. the
 686  sequence "????/\n=" would find a fake ??= trigraph after removing the
 687  escaped newline.)
 688
 689  For line comments, on reaching a newline we scan the previous
 690  character(s) to see if it escaped, and continue if it is.  Block
 691  comments ignore everything and just focus on finding the comment
 692  termination mark.  The only difficult thing, and it is surprisingly
 693  tricky, is checking if an asterisk precedes the final slash since
 694  they could be separated by escaped newlines.  If the preprocessor is
 695  invoked with the output comments option, we don't bother removing
 696  escaped newlines and replacing trigraphs for output.
 697
 698  Finally, numbers can begin with a period, which is pushed initially
 699  as a CPP_DOT token in its own right.  The digit handler checks if the
 700  previous token was a CPP_DOT not separated by whitespace, and if so
 701  pops it off the stack and pushes a period into the number's buffer
 702  before calling the number parser.
 703
 704 */
 705
 706 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 707                                                     U":>", U"<%", U"%>"};
 708
 709 /* Call when a trigraph is encountered.  It warns if necessary, and
 710    returns true if the trigraph should be honoured.  END is the third
 711    character of a trigraph in the input stream.  */
 712 static int
 713 trigraph_ok (pfile, end)
 714      cpp_reader *pfile;
 715      const unsigned char *end;
 716 {
 717   int accept = CPP_OPTION (pfile, trigraphs);
 718
 719   if (CPP_OPTION (pfile, warn_trigraphs))
 720     {
 721       unsigned int col = end - 1 - pfile->buffer->line_base;
 722       if (accept)
 723         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 724                                "trigraph ??%c converted to %c",
 725                                (int) *end, (int) _cpp_trigraph_map[*end]);
 726       else
 727         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 728                                "trigraph ??%c ignored", (int) *end);
 729     }
 730   return accept;
 731 }
 732
 733 /* Scan a string for trigraphs, warning or replacing them inline as
 734    appropriate.  When parsing a string, we must call this routine
 735    before processing a newline character (if trigraphs are enabled),
 736    since the newline might be escaped by a preceding backslash
 737    trigraph sequence.  Returns a pointer to the end of the name after
 738    replacement.  */
 739
 740 static unsigned char *
 741 trigraph_replace (pfile, src, limit)
 742      cpp_reader *pfile;
 743      unsigned char *src;
 744      unsigned char *limit;
 745 {
 746   unsigned char *dest;
 747
 748   /* Starting with src[1], find two consecutive '?'.  The case of no
 749      trigraphs is streamlined.  */
 750
 751   for (src++; src + 1 < limit; src += 2)
 752     {
 753       if (src[0] != '?')
 754         continue;
 755
 756       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 757       if (src[-1] == '?')
 758         src--;
 759       else if (src + 2 == limit || src[1] != '?')
 760         continue;
 761
 762       /* Check if it really is a trigraph.  */
 763       if (_cpp_trigraph_map[src[2]] == 0)
 764         continue;
 765
 766       dest = src;
 767       goto trigraph_found;
 768     }
 769   return limit;
 770
 771   /* Now we have a trigraph, we need to scan the remaining buffer, and
 772      copy-shifting its contents left if replacement is enabled.  */
 773   for (; src + 2 < limit; dest++, src++)
 774     if ((*dest = *src) == '?' && src[1] == '?' && _cpp_trigraph_map[src[2]])
 775       {
 776       trigraph_found:
 777         src += 2;
 778         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 779           *dest = _cpp_trigraph_map[*src];
 780       }
 781
 782   /* Copy remaining (at most 2) characters.  */
 783   while (src < limit)
 784     *dest++ = *src++;
 785   return dest;
 786 }
 787
 788 /* If CUR is a backslash or the end of a trigraphed backslash, return
 789    a pointer to its beginning, otherwise NULL.  We don't read beyond
 790    the buffer start, because there is the start of the comment in the
 791    buffer.  */
 792 static const unsigned char *
 793 backslash_start (pfile, cur)
 794      cpp_reader *pfile;
 795      const unsigned char *cur;
 796 {
 797   if (cur[0] == '\\')
 798     return cur;
 799   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 800       && trigraph_ok (pfile, cur))
 801     return cur - 2;
 802   return 0;
 803 }
 804
 805 /* Skip a C-style block comment.  This is probably the trickiest
 806    handler.  We find the end of the comment by seeing if an asterisk
 807    is before every '/' we encounter.  The nasty complication is that a
 808    previous asterisk may be separated by one or more escaped newlines.
 809    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 810 static int
 811 skip_block_comment (pfile)
 812      cpp_reader *pfile;
 813 {
 814   cpp_buffer *buffer = pfile->buffer;
 815   const unsigned char *char_after_star = 0;
 816   const unsigned char *cur = buffer->cur;
 817
 818   for (; cur < buffer->rlimit; )
 819     {
 820       unsigned char c = *cur++;
 821
 822       /* People like decorating comments with '*', so check for
 823          '/' instead for efficiency.  */
 824       if (c == '/')
 825         {
 826           /* Don't view / then * then / as finishing the comment.  */
 827           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 828               || cur - 1 == char_after_star)
 829             {
 830               buffer->cur = cur;
 831               return 0;
 832             }
 833
 834           /* Warn about potential nested comments, but not when
 835              the final character inside the comment is a '/'.
 836              Don't bother to get it right across escaped newlines.  */
 837           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 838               && cur[0] == '*' && cur[1] != '/')
 839             {
 840               buffer->cur = cur;
 841               cpp_warning (pfile, "'/*' within comment");
 842             }
 843         }
 844       else if (is_vspace (c))
 845         {
 846           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 847
 848           handle_newline (cur, buffer->rlimit, c);
 849           /* Work correctly if there is an asterisk before an
 850              arbirtrarily long sequence of escaped newlines.  */
 851           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 852             char_after_star = cur;
 853           else
 854             char_after_star = 0;
 855         }
 856       else if (c == '\t')
 857         adjust_column (pfile, cur - 1);
 858     }
 859
 860   buffer->cur = cur;
 861   return 1;
 862 }
 863
 864 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 865    non-zero if a multiline comment.  */
 866 static int
 867 skip_line_comment (pfile)
 868      cpp_reader *pfile;
 869 {
 870   cpp_buffer *buffer = pfile->buffer;
 871   register const unsigned char *cur = buffer->cur;
 872   int multiline = 0;
 873
 874   for (; cur < buffer->rlimit; )
 875     {
 876       unsigned char c = *cur++;
 877
 878       if (is_vspace (c))
 879         {
 880           /* Check for a (trigaph?) backslash escaping the newline.  */
 881           if (!backslash_start (pfile, cur - 2))
 882             goto out;
 883           multiline = 1;
 884           handle_newline (cur, buffer->rlimit, c);
 885         }
 886     }
 887   cur++;
 888
 889  out:
 890   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 891   return multiline;
 892 }
 893
 894 /* TAB points to a \t character.  Update col_adjust so we track the
 895    column correctly.  */
 896 static void
 897 adjust_column (pfile, tab)
 898      cpp_reader *pfile;
 899      const U_CHAR *tab;
 900 {
 901   /* Zero-based column.  */
 902   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
 903
 904   /* Round it up to multiple of the tabstop, but subtract 1 since the
 905      tab itself occupies a character position.  */
 906   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
 907                         - col % CPP_OPTION (pfile, tabstop)) - 1;
 908 }
 909
 910 /* Skips whitespace, stopping at next non-whitespace character.
 911    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 912    to be assigned the correct column.  */
 913 static void
 914 skip_whitespace (pfile, in_directive)
 915      cpp_reader *pfile;
 916      int in_directive;
 917 {
 918   cpp_buffer *buffer = pfile->buffer;
 919   unsigned short warned = 0;
 920
 921   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 922   while (buffer->cur < buffer->rlimit)
 923     {
 924       unsigned char c = *buffer->cur;
 925
 926       if (!is_nvspace (c))
 927         break;
 928
 929       buffer->cur++;
 930       /* Horizontal space always OK.  */
 931       if (c == ' ')
 932         continue;
 933       else if (c == '\t')
 934         adjust_column (pfile, buffer->cur - 1);
 935       /* Must be \f \v or \0.  */
 936       else if (c == '\0')
 937         {
 938           if (!warned)
 939             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 940                                    CPP_BUF_COL (buffer),
 941                                    "embedded null character ignored");
 942           warned = 1;
 943         }
 944       else if (in_directive && CPP_PEDANTIC (pfile))
 945         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 946                                CPP_BUF_COL (buffer),
 947                                "%s in preprocessing directive",
 948                                c == '\f' ? "form feed" : "vertical tab");
 949     }
 950 }
 951
 952 /* Parse (append) an identifier.  Calculates the hash value of the
 953    token while parsing, for performance.  The algorithm *must* match
 954    cpp_lookup().  */
 955 static const U_CHAR *
 956 parse_name (pfile, tok, cur, rlimit)
 957      cpp_reader *pfile;
 958      cpp_token *tok;
 959      const U_CHAR *cur, *rlimit;
 960 {
 961   const U_CHAR *name;
 962   unsigned int len;
 963   unsigned int r;
 964
 965   name = cur;
 966   r = 0;
 967   while (cur < rlimit)
 968     {
 969       if (! is_idchar (*cur))
 970         break;
 971       /* $ is not a identifier character in the standard, but is
 972          commonly accepted as an extension.  Don't warn about it in
 973          skipped conditional blocks. */
 974       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 975         {
 976           CPP_BUFFER (pfile)->cur = cur;
 977           cpp_pedwarn (pfile, "'$' character in identifier");
 978         }
 979
 980       r = HASHSTEP (r, cur);
 981       cur++;
 982     }
 983   len = cur - name;
 984
 985   if (tok->type == CPP_NAME && tok->val.node == 0)
 986     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
 987   else
 988     {
 989       unsigned int oldlen;
 990       U_CHAR *newname;
 991
 992       if (tok->type == CPP_NAME)
 993         oldlen = tok->val.node->length;
 994       else
 995         oldlen = 1;
 996
 997       newname = alloca (oldlen + len);
 998
 999       if (tok->type == CPP_NAME)
1000         memcpy (newname, tok->val.node->name, oldlen);
1001       else
1002         newname[0] = tok->val.aux;
1003       memcpy (newname + oldlen, name, len);
1004       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1005       tok->type = CPP_NAME;
1006     }
1007
1008   return cur;
1009 }
1010
1011 /* Parse (append) a number.  */
1012 static void
1013 parse_number (pfile, list, name)
1014      cpp_reader *pfile;
1015      cpp_toklist *list;
1016      cpp_string *name;
1017 {
1018   const unsigned char *name_limit;
1019   unsigned char *namebuf;
1020   cpp_buffer *buffer = pfile->buffer;
1021   register const unsigned char *cur = buffer->cur;
1022
1023  expanded:
1024   name_limit = list->namebuf + list->name_cap;
1025   namebuf = list->namebuf + list->name_used;
1026
1027   for (; cur < buffer->rlimit && namebuf < name_limit; )
1028     {
1029       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1030
1031       /* Perhaps we should accept '$' here if we accept it for
1032          identifiers.  We know namebuf[-1] is safe, because for c to
1033          be a sign we must have pushed at least one character.  */
1034       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1035         goto out;
1036
1037       namebuf++;
1038       cur++;
1039     }
1040
1041   /* Run out of name space?  */
1042   if (cur < buffer->rlimit)
1043     {
1044       list->name_used = namebuf - list->namebuf;
1045       auto_expand_name_space (list);
1046       goto expanded;
1047     }
1048
1049  out:
1050   buffer->cur = cur;
1051   name->len = namebuf - name->text;
1052   list->name_used = namebuf - list->namebuf;
1053 }
1054
1055 /* Places a string terminated by an unescaped TERMINATOR into a
1056    cpp_string, which should be expandable and thus at the top of the
1057    list's stack.  Handles embedded trigraphs, if necessary, and
1058    escaped newlines.
1059
1060    Can be used for character constants (terminator = '\''), string
1061    constants ('"') and angled headers ('>').  Multi-line strings are
1062    allowed, except for within directives.  */
1063
1064 static void
1065 parse_string (pfile, list, token, terminator)
1066      cpp_reader *pfile;
1067      cpp_toklist *list;
1068      cpp_token *token;
1069      unsigned int terminator;
1070 {
1071   cpp_buffer *buffer = pfile->buffer;
1072   cpp_string *name = &token->val.str;
1073   register const unsigned char *cur = buffer->cur;
1074   const unsigned char *name_limit;
1075   unsigned char *namebuf;
1076   unsigned int null_count = 0;
1077   unsigned int trigraphed = list->name_used;
1078
1079  expanded:
1080   name_limit = list->namebuf + list->name_cap;
1081   namebuf = list->namebuf + list->name_used;
1082
1083   for (; cur < buffer->rlimit && namebuf < name_limit; )
1084     {
1085       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1086
1087       if (c == '\0')
1088         null_count++;
1089       else if (c == terminator || is_vspace (c))
1090         {
1091           /* Needed for trigraph_replace and multiline string warning.  */
1092           buffer->cur = cur;
1093
1094           /* Scan for trigraphs before checking if backslash-escaped.  */
1095           if ((CPP_OPTION (pfile, trigraphs)
1096                || CPP_OPTION (pfile, warn_trigraphs))
1097               && namebuf - (list->namebuf + trigraphed) >= 3)
1098             {
1099               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1100                                           namebuf);
1101               /* The test above guarantees trigraphed will be positive.  */
1102               trigraphed = namebuf - list->namebuf - 2;
1103             }
1104
1105           namebuf--;     /* Drop the newline / terminator from the name.  */
1106           if (is_vspace (c))
1107             {
1108               /* Drop a backslash newline, and continue. */
1109               if (namebuf > list->namebuf && namebuf[-1] == '\\')
1110                 {
1111                   handle_newline (cur, buffer->rlimit, c);
1112                   namebuf--;
1113                   continue;
1114                 }
1115
1116               cur--;
1117
1118               /* In assembly language, silently terminate strings of
1119                  either variety at end of line.  This is a kludge
1120                  around not knowing where comments are.  */
1121               if (CPP_OPTION (pfile, lang_asm))
1122                 goto out;
1123
1124               /* Character constants and header names may not extend
1125                  over multiple lines.  In Standard C, neither may
1126                  strings.  We accept multiline strings as an
1127                  extension.  (Even in directives - otherwise, glibc's
1128                  longlong.h breaks.)  */
1129               if (terminator != '"')
1130                 goto unterminated;
1131
1132               cur++;  /* Move forwards again.  */
1133
1134               if (pfile->multiline_string_line == 0)
1135                 {
1136                   pfile->multiline_string_line = token->line;
1137                   pfile->multiline_string_column = token->col;
1138                   if (CPP_PEDANTIC (pfile))
1139                     cpp_pedwarn (pfile, "multi-line string constant");
1140                 }
1141
1142               *namebuf++ = '\n';
1143               handle_newline (cur, buffer->rlimit, c);
1144             }
1145           else
1146             {
1147               unsigned char *temp;
1148
1149               /* An odd number of consecutive backslashes represents
1150                  an escaped terminator.  */
1151               temp = namebuf - 1;
1152               while (temp >= name->text && *temp == '\\')
1153                 temp--;
1154
1155               if ((namebuf - temp) & 1)
1156                 goto out;
1157               namebuf++;
1158             }
1159         }
1160     }
1161
1162   /* Run out of name space?  */
1163   if (cur < buffer->rlimit)
1164     {
1165       list->name_used = namebuf - list->namebuf;
1166       auto_expand_name_space (list);
1167       goto expanded;
1168     }
1169
1170   /* We may not have trigraph-replaced the input for this code path,
1171      but as the input is in error by being unterminated we don't
1172      bother.  Prevent warnings about no newlines at EOF.  */
1173   if (is_vspace (cur[-1]))
1174     cur--;
1175
1176  unterminated:
1177   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1178
1179   if (terminator == '\"' && pfile->multiline_string_line != list->line
1180       && pfile->multiline_string_line != 0)
1181     {
1182       cpp_error_with_line (pfile, pfile->multiline_string_line,
1183                            pfile->multiline_string_column,
1184                            "possible start of unterminated string literal");
1185       pfile->multiline_string_line = 0;
1186     }
1187
1188  out:
1189   buffer->cur = cur;
1190   name->len = namebuf - name->text;
1191   list->name_used = namebuf - list->namebuf;
1192
1193   if (null_count > 0)
1194     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1195                          : "null character preserved"));
1196 }
1197
1198 /* The character TYPE helps us distinguish comment types: '*' = C
1199    style, '/' = C++ style.  For code simplicity, the stored comment
1200    includes the comment start and any terminator.  */
1201
1202 #define COMMENT_START_LEN 2
1203 static void
1204 save_comment (list, token, from, len, type)
1205      cpp_toklist *list;
1206      cpp_token *token;
1207      const unsigned char *from;
1208      unsigned int len;
1209      unsigned int type;
1210 {
1211   unsigned char *buffer;
1212
1213   len += COMMENT_START_LEN;
1214
1215   if (list->name_used + len > list->name_cap)
1216     _cpp_expand_name_space (list, len);
1217
1218   INIT_TOKEN_STR (list, token);
1219   token->type = CPP_COMMENT;
1220   token->val.str.len = len;
1221
1222   buffer = list->namebuf + list->name_used;
1223   list->name_used += len;
1224
1225   /* Copy the comment.  */
1226   if (type == '*')
1227     {
1228       *buffer++ = '/';
1229       *buffer++ = '*';
1230     }
1231   else
1232     {
1233       *buffer++ = type;
1234       *buffer++ = type;
1235     }
1236   memcpy (buffer, from, len - COMMENT_START_LEN);
1237 }
1238
1239 /*
1240  *  The tokenizer's main loop.  Returns a token list, representing a
1241  *  logical line in the input file.  On EOF after some tokens have
1242  *  been processed, we return immediately.  Then in next call, or if
1243  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1244  *  token is placed in the list.
1245  *
1246  *  Implementation relies almost entirely on lookback, rather than
1247  *  looking forwards.  This means that tokenization requires just
1248  *  a single pass of the file, even in the presence of trigraphs and
1249  *  escaped newlines, providing significant performance benefits.
1250  *  Trigraph overhead is negligible if they are disabled, and low
1251  *  even when enabled.
1252  */
1253
1254 #define KNOWN_DIRECTIVE() (list->directive != 0)
1255 #define MIGHT_BE_DIRECTIVE() \
1256 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1257
1258 static void
1259 lex_line (pfile, list)
1260      cpp_reader *pfile;
1261      cpp_toklist *list;
1262 {
1263   cpp_token *cur_token, *token_limit, *first;
1264   cpp_buffer *buffer = pfile->buffer;
1265   const unsigned char *cur = buffer->cur;
1266   unsigned char flags = 0;
1267   unsigned int first_token = list->tokens_used;
1268
1269   if (!(list->flags & LIST_OFFSET))
1270     (abort) ();
1271
1272  retry:
1273   list->file = buffer->nominal_fname;
1274   list->line = CPP_BUF_LINE (buffer);
1275   pfile->col_adjust = 0;
1276   pfile->in_lex_line = 1;
1277   if (cur == buffer->buf)
1278     list->flags |= BEG_OF_FILE;
1279
1280  expanded:
1281   token_limit = list->tokens + list->tokens_cap;
1282   cur_token = list->tokens + list->tokens_used;
1283
1284   for (; cur < buffer->rlimit && cur_token < token_limit;)
1285     {
1286       unsigned char c;
1287
1288       /* Optimize non-vertical whitespace skipping; most tokens are
1289          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1290       c = *cur;
1291       if (is_nvspace (c))
1292         {
1293           buffer->cur = cur;
1294           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1295                                    && cur_token > &list->tokens[first_token]));
1296           cur = buffer->cur;
1297
1298           flags = PREV_WHITE;
1299           if (cur == buffer->rlimit)
1300             break;
1301           c = *cur;
1302         }
1303       cur++;
1304
1305       /* Initialize current token.  CPP_EOF will not be fixed up by
1306          expand_name_space.  */
1307       list->tokens_used = cur_token - list->tokens + 1;
1308       cur_token->type = CPP_EOF;
1309       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1310       cur_token->line = CPP_BUF_LINE (buffer);
1311       cur_token->flags = flags;
1312       flags = 0;
1313
1314       switch (c)
1315         {
1316         case '0': case '1': case '2': case '3': case '4':
1317         case '5': case '6': case '7': case '8': case '9':
1318           {
1319             int prev_dot;
1320
1321             cur--;              /* Backup character.  */
1322             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1323             if (prev_dot)
1324               cur_token--;
1325             INIT_TOKEN_STR (list, cur_token);
1326             /* Prepend an immediately previous CPP_DOT token.  */
1327             if (prev_dot)
1328               {
1329                 if (list->name_cap == list->name_used)
1330                   auto_expand_name_space (list);
1331
1332                 cur_token->val.str.len = 1;
1333                 list->namebuf[list->name_used++] = '.';
1334               }
1335
1336           continue_number:
1337             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1338             buffer->cur = cur;
1339             parse_number (pfile, list, &cur_token->val.str);
1340             cur = buffer->cur;
1341           }
1342           /* Check for # 123 form of #line.  */
1343           if (MIGHT_BE_DIRECTIVE ())
1344             list->directive = _cpp_check_linemarker (pfile, cur_token,
1345                                                      !(cur_token[-1].flags
1346                                                        & PREV_WHITE));
1347           cur_token++;
1348           break;
1349
1350         letter:
1351         case '_':
1352         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1353         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1354         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1355         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1356         case 'y': case 'z':
1357         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1358         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1359         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1360         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1361         case 'Y': case 'Z':
1362           cur--;                     /* Backup character.  */
1363
1364           /* In Objective C, '@' may begin certain keywords.  */
1365           if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
1366               && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
1367             cur_token--;
1368           else
1369             {
1370               cur_token->val.node = 0;
1371               cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1372             }
1373
1374         continue_name:
1375           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1376
1377           if (MIGHT_BE_DIRECTIVE ())
1378             list->directive = _cpp_check_directive (pfile, cur_token,
1379                                                     !(list->tokens[0].flags
1380                                                       & PREV_WHITE));
1381           /* Convert named operators to their proper types.  */
1382           if (cur_token->val.node->type == T_OPERATOR)
1383             {
1384               cur_token->flags |= NAMED_OP;
1385               cur_token->type = cur_token->val.node->value.code;
1386             }
1387
1388           cur_token++;
1389           break;
1390
1391         case '\'':
1392           cur_token->type = CPP_CHAR;
1393           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1394               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1395             BACKUP_TOKEN (CPP_WCHAR);
1396           goto do_parse_string;
1397
1398         case '\"':
1399           cur_token->type = CPP_STRING;
1400           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1401               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1402             BACKUP_TOKEN (CPP_WSTRING);
1403           else if (CPP_OPTION (pfile, objc)
1404                    && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
1405                    && cur_token[-1].val.aux == '@')
1406             BACKUP_TOKEN (CPP_OSTRING);
1407
1408         do_parse_string:
1409           /* Here c is one of ' " or >.  */
1410           INIT_TOKEN_STR (list, cur_token);
1411           buffer->cur = cur;
1412           parse_string (pfile, list, cur_token, c);
1413           cur = buffer->cur;
1414           cur_token++;
1415           break;
1416
1417         case '/':
1418           cur_token->type = CPP_DIV;
1419           if (IMMED_TOKEN ())
1420             {
1421               if (PREV_TOKEN_TYPE == CPP_DIV)
1422                 {
1423                   /* We silently allow C++ comments in system headers,
1424                      irrespective of conformance mode, because lots of
1425                      broken systems do that and trying to clean it up
1426                      in fixincludes is a nightmare.  */
1427                   if (CPP_IN_SYSTEM_HEADER (pfile))
1428                     goto do_line_comment;
1429                   else if (CPP_OPTION (pfile, cplusplus_comments))
1430                     {
1431                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1432                           && ! buffer->warned_cplusplus_comments)
1433                         {
1434                           buffer->cur = cur;
1435                           cpp_pedwarn (pfile,
1436                              "C++ style comments are not allowed in ISO C89");
1437                           cpp_pedwarn (pfile,
1438                           "(this will be reported only once per input file)");
1439                           buffer->warned_cplusplus_comments = 1;
1440                         }
1441                     do_line_comment:
1442                       buffer->cur = cur;
1443 #if 0 /* Leave until new lexer in place.  */
1444                       if (cur[-2] != c)
1445                         cpp_warning (pfile,
1446                                      "comment start split across lines");
1447 #endif
1448                       if (skip_line_comment (pfile))
1449                         cpp_warning (pfile, "multi-line comment");
1450
1451                       /* Back-up to first '-' or '/'.  */
1452                       cur_token--;
1453                       if (!CPP_OPTION (pfile, discard_comments)
1454                           && (!KNOWN_DIRECTIVE()
1455                               || (list->directive->flags & COMMENTS)))
1456                         save_comment (list, cur_token++, cur,
1457                                       buffer->cur - cur, c);
1458                       else
1459                         flags = PREV_WHITE;
1460
1461                       cur = buffer->cur;
1462                       break;
1463                     }
1464                 }
1465             }
1466           cur_token++;
1467           break;
1468
1469         case '*':
1470           cur_token->type = CPP_MULT;
1471           if (IMMED_TOKEN ())
1472             {
1473               if (PREV_TOKEN_TYPE == CPP_DIV)
1474                 {
1475                   buffer->cur = cur;
1476 #if 0 /* Leave until new lexer in place.  */
1477                   if (cur[-2] != '/')
1478                     cpp_warning (pfile,
1479                                  "comment start '/*' split across lines");
1480 #endif
1481                   if (skip_block_comment (pfile))
1482                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1483                                          "unterminated comment");
1484 #if 0 /* Leave until new lexer in place.  */
1485                   else if (buffer->cur[-2] != '*')
1486                     cpp_warning (pfile,
1487                                  "comment end '*/' split across lines");
1488 #endif
1489                   /* Back up to opening '/'.  */
1490                   cur_token--;
1491                   if (!CPP_OPTION (pfile, discard_comments)
1492                       && (!KNOWN_DIRECTIVE()
1493                           || (list->directive->flags & COMMENTS)))
1494                     save_comment (list, cur_token++, cur,
1495                                   buffer->cur - cur, c);
1496                   else
1497                     flags = PREV_WHITE;
1498
1499                   cur = buffer->cur;
1500                   break;
1501                 }
1502               else if (CPP_OPTION (pfile, cplusplus))
1503                 {
1504                   /* In C++, there are .* and ->* operators.  */
1505                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1506                     BACKUP_TOKEN (CPP_DEREF_STAR);
1507                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1508                     BACKUP_TOKEN (CPP_DOT_STAR);
1509                 }
1510             }
1511           cur_token++;
1512           break;
1513
1514         case '\n':
1515         case '\r':
1516           handle_newline (cur, buffer->rlimit, c);
1517           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1518             {
1519               if (IMMED_TOKEN ())
1520                 {
1521                   /* Remove the escaped newline.  Then continue to process
1522                      any interrupted name or number.  */
1523                   cur_token--;
1524                   /* Backslash-newline may not be immediately followed by
1525                      EOF (C99 5.1.1.2).  */
1526                   if (cur >= buffer->rlimit)
1527                     {
1528                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1529                       break;
1530                     }
1531                   if (IMMED_TOKEN ())
1532                     {
1533                       cur_token--;
1534                       if (cur_token->type == CPP_NAME)
1535                         goto continue_name;
1536                       else if (cur_token->type == CPP_NUMBER)
1537                         goto continue_number;
1538                       cur_token++;
1539                     }
1540                   /* Remember whitespace setting.  */
1541                   flags = cur_token->flags;
1542                   break;
1543                 }
1544               else
1545                 {
1546                   buffer->cur = cur;
1547                   cpp_warning (pfile,
1548                                "backslash and newline separated by space");
1549                 }
1550             }
1551           else if (MIGHT_BE_DIRECTIVE ())
1552             {
1553               /* "Null directive." C99 6.10.7: A preprocessing
1554                  directive of the form # <new-line> has no effect.
1555
1556                  But it is still a directive, and therefore disappears
1557                  from the output. */
1558               cur_token--;
1559               if (cur_token->flags & PREV_WHITE
1560                   && CPP_WTRADITIONAL (pfile))
1561                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1562             }
1563
1564           /* Skip vertical space until we have at least one token to
1565              return.  */
1566           if (cur_token != &list->tokens[first_token])
1567             goto out;
1568           list->line = CPP_BUF_LINE (buffer);
1569           break;
1570
1571         case '-':
1572           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1573             REVISE_TOKEN (CPP_MINUS_MINUS);
1574           else
1575             PUSH_TOKEN (CPP_MINUS);
1576           break;
1577
1578         make_hash:
1579         case '#':
1580           /* The digraph flag checking ensures that ## and %:%:
1581              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1582           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1583               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1584             REVISE_TOKEN (CPP_PASTE);
1585           else
1586             PUSH_TOKEN (CPP_HASH);
1587           break;
1588
1589         case ':':
1590           cur_token->type = CPP_COLON;
1591           if (IMMED_TOKEN ())
1592             {
1593               if (PREV_TOKEN_TYPE == CPP_COLON
1594                   && CPP_OPTION (pfile, cplusplus))
1595                 BACKUP_TOKEN (CPP_SCOPE);
1596               else if (CPP_OPTION (pfile, digraphs))
1597                 {
1598                   /* Digraph: "<:" is a '['  */
1599                   if (PREV_TOKEN_TYPE == CPP_LESS)
1600                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1601                   /* Digraph: "%:" is a '#'  */
1602                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1603                     {
1604                       (--cur_token)->flags |= DIGRAPH;
1605                       goto make_hash;
1606                     }
1607                 }
1608             }
1609           cur_token++;
1610           break;
1611
1612         case '&':
1613           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1614             REVISE_TOKEN (CPP_AND_AND);
1615           else
1616             PUSH_TOKEN (CPP_AND);
1617           break;
1618
1619         make_or:
1620         case '|':
1621           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1622             REVISE_TOKEN (CPP_OR_OR);
1623           else
1624             PUSH_TOKEN (CPP_OR);
1625           break;
1626
1627         case '+':
1628           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1629             REVISE_TOKEN (CPP_PLUS_PLUS);
1630           else
1631             PUSH_TOKEN (CPP_PLUS);
1632           break;
1633
1634         case '=':
1635             /* This relies on equidistance of "?=" and "?" tokens.  */
1636           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1637             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1638           else
1639             PUSH_TOKEN (CPP_EQ);
1640           break;
1641
1642         case '>':
1643           cur_token->type = CPP_GREATER;
1644           if (IMMED_TOKEN ())
1645             {
1646               if (PREV_TOKEN_TYPE == CPP_GREATER)
1647                 BACKUP_TOKEN (CPP_RSHIFT);
1648               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1649                 BACKUP_TOKEN (CPP_DEREF);
1650               else if (CPP_OPTION (pfile, digraphs))
1651                 {
1652                   /* Digraph: ":>" is a ']'  */
1653                   if (PREV_TOKEN_TYPE == CPP_COLON)
1654                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1655                   /* Digraph: "%>" is a '}'  */
1656                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1657                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1658                 }
1659             }
1660           cur_token++;
1661           break;
1662
1663         case '<':
1664           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1665             {
1666               REVISE_TOKEN (CPP_LSHIFT);
1667               break;
1668             }
1669           /* Is this the beginning of a header name?  */
1670           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1671             {
1672               c = '>';  /* Terminator.  */
1673               cur_token->type = CPP_HEADER_NAME;
1674               goto do_parse_string;
1675             }
1676           PUSH_TOKEN (CPP_LESS);
1677           break;
1678
1679         case '%':
1680           /* Digraph: "<%" is a '{'  */
1681           cur_token->type = CPP_MOD;
1682           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1683               && CPP_OPTION (pfile, digraphs))
1684             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1685           cur_token++;
1686           break;
1687
1688         case '?':
1689           if (cur + 1 < buffer->rlimit && *cur == '?'
1690               && _cpp_trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1691             {
1692               /* Handle trigraph.  */
1693               cur++;
1694               switch (*cur++)
1695                 {
1696                 case '(': goto make_open_square;
1697                 case ')': goto make_close_square;
1698                 case '<': goto make_open_brace;
1699                 case '>': goto make_close_brace;
1700                 case '=': goto make_hash;
1701                 case '!': goto make_or;
1702                 case '-': goto make_complement;
1703                 case '/': goto make_backslash;
1704                 case '\'': goto make_xor;
1705                 }
1706             }
1707           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1708             {
1709               /* GNU C++ defines <? and >? operators.  */
1710               if (PREV_TOKEN_TYPE == CPP_LESS)
1711                 {
1712                   REVISE_TOKEN (CPP_MIN);
1713                   break;
1714                 }
1715               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1716                 {
1717                   REVISE_TOKEN (CPP_MAX);
1718                   break;
1719                 }
1720             }
1721           PUSH_TOKEN (CPP_QUERY);
1722           break;
1723
1724         case '.':
1725           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1726               && IMMED_TOKEN ()
1727               && !(cur_token[-1].flags & PREV_WHITE))
1728             {
1729               cur_token -= 2;
1730               PUSH_TOKEN (CPP_ELLIPSIS);
1731             }
1732           else
1733             PUSH_TOKEN (CPP_DOT);
1734           break;
1735
1736         make_complement:
1737         case '~': PUSH_TOKEN (CPP_COMPL); break;
1738         make_xor:
1739         case '^': PUSH_TOKEN (CPP_XOR); break;
1740         make_open_brace:
1741         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1742         make_close_brace:
1743         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1744         make_open_square:
1745         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1746         make_close_square:
1747         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1748         make_backslash:
1749         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1750         case '!': PUSH_TOKEN (CPP_NOT); break;
1751         case ',': PUSH_TOKEN (CPP_COMMA); break;
1752         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1753         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1754         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1755
1756         case '$':
1757           if (CPP_OPTION (pfile, dollars_in_ident))
1758             goto letter;
1759           /* Fall through */
1760         default:
1761           cur_token->val.aux = c;
1762           PUSH_TOKEN (CPP_OTHER);
1763           break;
1764         }
1765     }
1766
1767   /* Run out of token space?  */
1768   if (cur_token == token_limit)
1769     {
1770       list->tokens_used = cur_token - list->tokens;
1771       _cpp_expand_token_space (list, 256);
1772       goto expanded;
1773     }
1774
1775   cur_token->flags = flags;
1776   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1777     {
1778       if (cur > buffer->buf && !is_vspace (cur[-1]))
1779         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1780                                CPP_BUF_COLUMN (buffer, cur),
1781                                "no newline at end of file");
1782       cur_token++->type = CPP_EOF;
1783     }
1784
1785  out:
1786   /* All tokens are allocated, so the memory location is fixed.  */
1787   first = &list->tokens[first_token];
1788
1789   /* Don't complain about the null directive, nor directives in
1790      assembly source: we don't know where the comments are, and # may
1791      introduce assembler pseudo-ops.  Don't complain about invalid
1792      directives in skipped conditional groups (6.10 p4).  */
1793   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1794       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1795     {
1796       if (first[1].type == CPP_NAME)
1797         cpp_error (pfile, "invalid preprocessing directive #%s",
1798                    first[1].val.node->name);
1799       else
1800         cpp_error (pfile, "invalid preprocessing directive");
1801
1802       /* Discard this line to prevent further errors from cc1.  */
1803       _cpp_clear_toklist (list);
1804       goto retry;
1805     }
1806
1807   /* Put EOF at end of known directives.  This covers "directives do
1808      not extend beyond the end of the line (description 6.10 part 2)".  */
1809   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1810     {
1811       pfile->first_directive_token = first;
1812       cur_token++->type = CPP_EOF;
1813     }
1814
1815   first->flags |= BOL;
1816   if (first_token != 0)
1817     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1818        up the invocation of a function-like macro, new line is
1819        considered a normal white-space character.  */
1820     first->flags |= PREV_WHITE;
1821
1822   buffer->cur = cur;
1823   list->tokens_used = cur_token - list->tokens;
1824   pfile->in_lex_line = 0;
1825 }
1826
1827 /* Write the spelling of a token TOKEN, with any appropriate
1828    whitespace before it, to FP.  PREV is the previous token, which
1829    is used to determine if we need to shove in an extra space in order
1830    to avoid accidental token paste.  If WHITE is 0, do not insert any
1831    leading whitespace.  */
1832 static void
1833 output_token (pfile, fp, token, prev, white)
1834      cpp_reader *pfile;
1835      FILE *fp;
1836      const cpp_token *token, *prev;
1837      int white;
1838 {
1839   if (white)
1840     {
1841       int dummy;
1842
1843       if (token->col && (token->flags & BOL))
1844         {
1845           /* Supply enough whitespace to put this token in its original
1846              column.  Don't bother trying to reconstruct tabs; we can't
1847              get it right in general, and nothing ought to care.  (Yes,
1848              some things do care; the fault lies with them.)  */
1849           unsigned int spaces = token->col - 1;
1850
1851           while (spaces--)
1852             putc (' ', fp);
1853         }
1854       else if (token->flags & PREV_WHITE)
1855         putc (' ', fp);
1856       else
1857       /* Check for and prevent accidental token pasting.
1858          In addition to the cases handled by can_paste, consider
1859
1860          a + ++b - if there is not a space between the + and ++, it
1861          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1862          a valid token.  */
1863         if (prev
1864             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1865                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1866                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1867         putc (' ', fp);
1868     }
1869
1870   switch (TOKEN_SPELL (token))
1871     {
1872     case SPELL_OPERATOR:
1873       {
1874         const unsigned char *spelling;
1875
1876         if (token->flags & DIGRAPH)
1877           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1878         else if (token->flags & NAMED_OP)
1879           goto spell_ident;
1880         else
1881           spelling = TOKEN_NAME (token);
1882
1883         ufputs (spelling, fp);
1884       }
1885       break;
1886
1887     case SPELL_IDENT:
1888       spell_ident:
1889       ufputs (token->val.node->name, fp);
1890       break;
1891
1892     case SPELL_STRING:
1893       {
1894         int left, right, tag;
1895         switch (token->type)
1896           {
1897           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1898           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1899           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1900           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1901           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1902           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1903           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1904           }
1905         if (tag) putc (tag, fp);
1906         if (left) putc (left, fp);
1907         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1908         if (right) putc (right, fp);
1909       }
1910       break;
1911
1912     case SPELL_CHAR:
1913       putc (token->val.aux, fp);
1914       break;
1915
1916     case SPELL_NONE:
1917       /* Placemarker or EOF - no output.  (Macro args are handled
1918          elsewhere.  */
1919       break;
1920     }
1921 }
1922
1923 /* Dump the original user's spelling of argument index ARG_NO to the
1924    macro whose expansion is LIST.  */
1925 static void
1926 dump_param_spelling (fp, list, arg_no)
1927      FILE *fp;
1928      const cpp_toklist *list;
1929      unsigned int arg_no;
1930 {
1931   const U_CHAR *param = list->namebuf;
1932
1933   while (arg_no--)
1934     param += ustrlen (param) + 1;
1935   ufputs (param, fp);
1936 }
1937
1938 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1939 void
1940 cpp_output_list (pfile, fp, list, token)
1941      cpp_reader *pfile;
1942      FILE *fp;
1943      const cpp_toklist *list;
1944      const cpp_token *token;
1945 {
1946   const cpp_token *limit = list->tokens + list->tokens_used;
1947   const cpp_token *prev = 0;
1948   int white = 0;
1949
1950   while (token < limit)
1951     {
1952       /* XXX Find some way we can write macro args from inside
1953          output_token/spell_token.  */
1954       if (token->type == CPP_MACRO_ARG)
1955         {
1956           if (white && token->flags & PREV_WHITE)
1957             putc (' ', fp);
1958           if (token->flags & STRINGIFY_ARG)
1959             putc ('#', fp);
1960           dump_param_spelling (fp, list, token->val.aux);
1961         }
1962       else
1963         output_token (pfile, fp, token, prev, white);
1964       if (token->flags & PASTE_LEFT)
1965         fputs (" ##", fp);
1966       prev = token;
1967       token++;
1968       white = 1;
1969     }
1970 }
1971
1972
1973 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1974    already contain the enough space to hold the token's spelling.
1975    Returns a pointer to the character after the last character
1976    written.  */
1977
1978 static unsigned char *
1979 spell_token (pfile, token, buffer)
1980      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1981      const cpp_token *token;
1982      unsigned char *buffer;
1983 {
1984   switch (TOKEN_SPELL (token))
1985     {
1986     case SPELL_OPERATOR:
1987       {
1988         const unsigned char *spelling;
1989         unsigned char c;
1990
1991         if (token->flags & DIGRAPH)
1992           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1993         else if (token->flags & NAMED_OP)
1994           goto spell_ident;
1995         else
1996           spelling = TOKEN_NAME (token);
1997
1998         while ((c = *spelling++) != '\0')
1999           *buffer++ = c;
2000       }
2001       break;
2002
2003     case SPELL_IDENT:
2004       spell_ident:
2005       memcpy (buffer, token->val.node->name, token->val.node->length);
2006       buffer += token->val.node->length;
2007       break;
2008
2009     case SPELL_STRING:
2010       {
2011         int left, right, tag;
2012         switch (token->type)
2013           {
2014           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
2015           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
2016           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
2017           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
2018           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
2019           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
2020           default:              left = '\0'; right = '\0'; tag = '\0'; break;
2021           }
2022         if (tag) *buffer++ = tag;
2023         if (left) *buffer++ = left;
2024         memcpy (buffer, token->val.str.text, token->val.str.len);
2025         buffer += token->val.str.len;
2026         if (right) *buffer++ = right;
2027       }
2028       break;
2029
2030     case SPELL_CHAR:
2031       *buffer++ = token->val.aux;
2032       break;
2033
2034     case SPELL_NONE:
2035       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
2036       break;
2037     }
2038
2039   return buffer;
2040 }
2041
2042 /* Macro expansion algorithm.
2043
2044 Macro expansion is implemented by a single-pass algorithm; there are
2045 no rescan passes involved.  cpp_get_token expands just enough to be
2046 able to return a token to the caller, a consequence is that when it
2047 returns the preprocessor can be in a state of mid-expansion.  The
2048 algorithm does not work by fully expanding a macro invocation into
2049 some kind of token list, and then returning them one by one.
2050
2051 Our expansion state is recorded in a context stack.  We start out with
2052 a single context on the stack, let's call it base context.  This
2053 consists of the token list returned by lex_line that forms the next
2054 logical line in the source file.
2055
2056 The current level in the context stack is stored in the cur_context
2057 member of the cpp_reader structure.  The context it references keeps,
2058 amongst other things, a count of how many tokens form that context and
2059 our position within those tokens.
2060
2061 Fundamentally, calling cpp_get_token will return the next token from
2062 the current context.  If we're at the end of the current context, that
2063 context is popped from the stack first, unless it is the base context,
2064 in which case the next logical line is lexed from the source file.
2065
2066 However, before returning the token, if it is a CPP_NAME token
2067 _cpp_get_token checks to see if it is a macro and if it is enabled.
2068 Each time it encounters a macro name, it calls push_macro_context.
2069 This function checks that the macro should be expanded (with
2070 is_macro_enabled), and if so pushes a new macro context on the stack
2071 which becomes the current context.  It then loops back to read the
2072 first token of the macro context.
2073
2074 A macro context basically consists of the token list representing the
2075 macro's replacement list, which was saved in the hash table by
2076 save_macro_expansion when its #define statement was parsed.  If the
2077 macro is function-like, it also contains the tokens that form the
2078 arguments to the macro.  I say more about macro arguments below, but
2079 for now just saying that each argument is a set of pointers to tokens
2080 is enough.
2081
2082 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2083 token.  This represents an argument passed to the macro, with the
2084 argument number stored in the token's AUX field.  The argument should
2085 be substituted, this is achieved by pushing an "argument context".  An
2086 argument context is just refers to the tokens forming the argument,
2087 which are obtained directly from the macro context.  The STRINGIFY
2088 flag on a CPP_MACRO_ARG token indicates that the argument should be
2089 stringified.
2090
2091 Here's a few simple rules the context stack obeys:-
2092
2093   1) The lex_line token list is always context zero.
2094
2095   2) Context 1, if it exists, must be a macro context.
2096
2097   3) An argument context can only appear above a macro context.
2098
2099   4) A macro context can appear above the base context, another macro
2100   context, or an argument context.
2101
2102   5) These imply that the minimal level of an argument context is 2.
2103
2104 The only tricky thing left is ensuring that macros are enabled and
2105 disabled correctly.  The algorithm controls macro expansion by the
2106 level of the context a token is taken from in the context stack.  If a
2107 token is taken from a level equal to no_expand_level (a member of
2108 struct cpp_reader), no expansion is performed.
2109
2110 When popping a context off the stack, if no_expand_level equals the
2111 level of the popped context, it is reduced by one to match the new
2112 context level, so that expansion is still disabled.  It does not
2113 increase if a context is pushed, though.  It starts out life as
2114 UINT_MAX, which has the effect that initially macro expansion is
2115 enabled.  I explain how this mechanism works below.
2116
2117 The standard requires:-
2118
2119   1) Arguments to be fully expanded before substitution.
2120
2121   2) Stringified arguments to not be expanded, nor the tokens
2122   immediately surrounding a ## operator.
2123
2124   3) Continual rescanning until there are no more macros left to
2125   replace.
2126
2127   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2128   expanded again during later rescans.  This prevents infinite
2129   recursion.
2130
2131 The first thing to observe is that stage 3) is mostly redundant.
2132 Since a macro is disabled once it has been expanded, how can a rescan
2133 find an unexpanded macro name?  There are only two cases where this is
2134 possible:-
2135
2136   a) If the macro name results from a token paste operation.
2137
2138   b) If the macro in question is a function-like macro that hasn't
2139   already been expanded because previously there was not the required
2140   '(' token immediately following it.  This is only possible when an
2141   argument is substituted, and after substitution the last token of
2142   the argument can bind with a parenthesis appearing in the tokens
2143   following the substitution.  Note that if the '(' appears within the
2144   argument, the ')' must too, as expanding macro arguments cannot
2145   "suck in" tokens outside the argument.
2146
2147 So we tackle this as follows.  When parsing the macro invocation for
2148 arguments, we record the tokens forming each argument as a list of
2149 pointers to those tokens.  We do not expand any tokens that are "raw",
2150 i.e. directly from the macro invocation, but other tokens that come
2151 from (nested) argument substitution are fully expanded.
2152
2153 This is achieved by setting the no_expand_level to that of the macro
2154 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2155 forming an argument, because parse_args (indirectly) calls
2156 get_raw_token which automatically pushes argument contexts and traces
2157 into them.  Since these contexts are at a higher level than the
2158 no_expand_level, they get fully macro expanded.
2159
2160 "Raw" and non-raw tokens are separated in arguments by null pointers,
2161 with the policy that the initial state of an argument is raw.  If the
2162 first token is not raw, it should be preceded by a null pointer.  When
2163 tracing through the tokens of an argument context, each time
2164 get_raw_token encounters a null pointer, it toggles the flag
2165 CONTEXT_RAW.
2166
2167 This flag, when set, indicates to is_macro_disabled that we are
2168 reading raw tokens which should be macro-expanded.  Similarly, if
2169 clear, is_macro_disabled suppresses re-expansion.
2170
2171 It's probably time for an example.
2172
2173 #define hash #
2174 #define str(x) #x
2175 #define xstr(y) str(y hash)
2176 str(hash)                       // "hash"
2177 xstr(hash)                      // "# hash"
2178
2179 In the invocation of str, parse_args turns off macro expansion and so
2180 parses the argument as <hash>.  This is the only token (pointer)
2181 passed as the argument to str.  Since <hash> is raw there is no need
2182 for an initial null pointer.  stringify_arg is called from
2183 get_raw_token when tracing through the expansion of str, since the
2184 argument has the STRINGIFY flag set.  stringify_arg turns off
2185 macro_expansion by setting the no_expand_level to that of the argument
2186 context.  Thus it gets the token <hash> and stringifies it to "hash"
2187 correctly.
2188
2189 Similary xstr is passed <hash>.  However, when parse_args is parsing
2190 the invocation of str() in xstr's expansion, get_raw_token encounters
2191 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2192 an argument context, and enters the tokens of the argument,
2193 i.e. <hash>.  This is at a higher context level than parse_args
2194 disabled, and so is_macro_disabled permits expansion of it and a macro
2195 context is pushed on top of the argument context.  This contains the
2196 <#> token, and the end result is that <hash> is macro expanded.
2197 However, after popping off the argument context, the <hash> of xstr's
2198 expansion does not get macro expanded because we're back at the
2199 no_expand_level.  The end result is that the argument passed to str is
2200 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2201 raw, <#> is not raw, but then <hash> is.
2202
2203 */
2204
2205
2206 /* Free the storage allocated for macro arguments.  */
2207 static void
2208 free_macro_args (args)
2209      macro_args *args;
2210 {
2211   if (args->tokens)
2212     free ((PTR) args->tokens);
2213   free (args->ends);
2214   free (args);
2215 }
2216
2217 /* Determines if a macro has been already used (and is therefore
2218    disabled).  */
2219 static int
2220 is_macro_disabled (pfile, expansion, token)
2221      cpp_reader *pfile;
2222      const cpp_toklist *expansion;
2223      const cpp_token *token;
2224 {
2225   cpp_context *context = CURRENT_CONTEXT (pfile);
2226
2227   /* Arguments on either side of ## are inserted in place without
2228      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2229      occurs during a later rescan pass.  The effect is that we expand
2230      iff we would as part of the macro's expansion list, so we should
2231      drop to the macro's context.  */
2232   if (IS_ARG_CONTEXT (context))
2233     {
2234       if (token->flags & PASTED)
2235         context--;
2236       else if (!(context->flags & CONTEXT_RAW))
2237         return 1;
2238       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2239         context--;
2240     }
2241
2242   /* Have we already used this macro?  */
2243   while (context->level > 0)
2244     {
2245       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2246         return 1;
2247       /* Raw argument tokens are judged based on the token list they
2248          came from.  */
2249       if (context->flags & CONTEXT_RAW)
2250         context = pfile->contexts + context->level;
2251       else
2252         context--;
2253     }
2254
2255   /* Function-like macros may be disabled if the '(' is not in the
2256      current context.  We check this without disrupting the context
2257      stack.  */
2258   if (expansion->paramc >= 0)
2259     {
2260       const cpp_token *next;
2261       unsigned int prev_nme;
2262
2263       context = CURRENT_CONTEXT (pfile);
2264       /* Drop down any contexts we're at the end of: the '(' may
2265          appear in lower macro expansions, or in the rest of the file.  */
2266       while (context->posn == context->count && context > pfile->contexts)
2267         {
2268           context--;
2269           /* If we matched, we are disabled, as we appear in the
2270              expansion of each macro we meet.  */
2271           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2272             return 1;
2273         }
2274
2275       prev_nme = pfile->no_expand_level;
2276       pfile->no_expand_level = context - pfile->contexts;
2277       next = _cpp_get_token (pfile);
2278       restore_macro_expansion (pfile, prev_nme);
2279       if (next->type != CPP_OPEN_PAREN)
2280         {
2281           _cpp_push_token (pfile, next);
2282           if (CPP_WTRADITIONAL (pfile))
2283             cpp_warning (pfile,
2284          "function macro %s must be used with arguments in traditional C",
2285                          token->val.node->name);
2286           return 1;
2287         }
2288     }
2289
2290   return 0;
2291 }
2292
2293 /* Add a token to the set of tokens forming the arguments to the macro
2294    being parsed in parse_args.  */
2295 static void
2296 save_token (args, token)
2297      macro_args *args;
2298      const cpp_token *token;
2299 {
2300   if (args->used == args->capacity)
2301     {
2302       args->capacity += args->capacity + 100;
2303       args->tokens = (const cpp_token **)
2304         xrealloc ((PTR) args->tokens,
2305                   args->capacity * sizeof (const cpp_token *));
2306     }
2307   args->tokens[args->used++] = token;
2308 }
2309
2310 /* Take and save raw tokens until we finish one argument.  Empty
2311    arguments are saved as a single CPP_PLACEMARKER token.  */
2312 static const cpp_token *
2313 parse_arg (pfile, var_args, paren_context, args, pcount)
2314      cpp_reader *pfile;
2315      int var_args;
2316      unsigned int paren_context;
2317      macro_args *args;
2318      unsigned int *pcount;
2319 {
2320   const cpp_token *token;
2321   unsigned int paren = 0, count = 0;
2322   int raw, was_raw = 1;
2323
2324   for (count = 0;; count++)
2325     {
2326       token = _cpp_get_token (pfile);
2327
2328       switch (token->type)
2329         {
2330         default:
2331           break;
2332
2333         case CPP_OPEN_PAREN:
2334           paren++;
2335           break;
2336
2337         case CPP_CLOSE_PAREN:
2338           if (paren-- != 0)
2339             break;
2340           goto out;
2341
2342         case CPP_COMMA:
2343           /* Commas are not terminators within parantheses or var_args.  */
2344           if (paren || var_args)
2345             break;
2346           goto out;
2347
2348         case CPP_EOF:           /* Error reported by caller.  */
2349           goto out;
2350         }
2351
2352       raw = pfile->cur_context <= paren_context;
2353       if (raw != was_raw)
2354         {
2355           was_raw = raw;
2356           save_token (args, 0);
2357           count++;
2358         }
2359       save_token (args, token);
2360     }
2361
2362  out:
2363   if (count == 0)
2364     {
2365       /* Duplicate the placemarker.  Then we can set its flags and
2366          position and safely be using more than one.  */
2367       save_token (args, duplicate_token (pfile, &placemarker_token));
2368       count++;
2369     }
2370
2371   *pcount = count;
2372   return token;
2373 }
2374
2375 /* This macro returns true if the argument starting at offset O of arglist
2376    A is empty - that is, it's either a single PLACEMARKER token, or a null
2377    pointer followed by a PLACEMARKER.  */
2378
2379 #define empty_argument(A, O) \
2380  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2381                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2382
2383 /* Parse the arguments making up a macro invocation.  Nested arguments
2384    are automatically macro expanded, but immediate macros are not
2385    expanded; this enables e.g. operator # to work correctly.  Returns
2386    non-zero on error.  */
2387 static int
2388 parse_args (pfile, hp, args)
2389      cpp_reader *pfile;
2390      cpp_hashnode *hp;
2391      macro_args *args;
2392 {
2393   const cpp_token *token;
2394   const cpp_toklist *macro;
2395   unsigned int total = 0;
2396   unsigned int paren_context = pfile->cur_context;
2397   int argc = 0;
2398
2399   macro = hp->value.expansion;
2400   do
2401     {
2402       unsigned int count;
2403
2404       token = parse_arg (pfile, (argc + 1 == macro->paramc
2405                                  && (macro->flags & VAR_ARGS)),
2406                          paren_context, args, &count);
2407       if (argc < macro->paramc)
2408         {
2409           total += count;
2410           args->ends[argc] = total;
2411         }
2412       argc++;
2413     }
2414   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2415
2416   if (token->type == CPP_EOF)
2417     {
2418       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2419       return 1;
2420     }
2421   else if (argc < macro->paramc)
2422     {
2423       /* A rest argument is allowed to not appear in the invocation at all.
2424          e.g. #define debug(format, args...) ...
2425          debug("string");
2426          This is exactly the same as if the rest argument had received no
2427          tokens - debug("string",);  This extension is deprecated.  */
2428
2429       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2430         {
2431           /* Duplicate the placemarker.  Then we can set its flags and
2432              position and safely be using more than one.  */
2433           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2434           pm->flags = VOID_REST;
2435           save_token (args, pm);
2436           args->ends[argc] = total + 1;
2437
2438           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2439             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2440
2441           return 0;
2442         }
2443       else
2444         {
2445           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2446           return 1;
2447         }
2448     }
2449   /* An empty argument to an empty function-like macro is fine.  */
2450   else if (argc > macro->paramc
2451            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2452     {
2453       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2454       return 1;
2455     }
2456
2457   return 0;
2458 }
2459
2460 /* Adds backslashes before all backslashes and double quotes appearing
2461    in strings.  Non-printable characters are converted to octal.  */
2462 static U_CHAR *
2463 quote_string (dest, src, len)
2464      U_CHAR *dest;
2465      const U_CHAR *src;
2466      unsigned int len;
2467 {
2468   while (len--)
2469     {
2470       U_CHAR c = *src++;
2471
2472       if (c == '\\' || c == '"')
2473         {
2474           *dest++ = '\\';
2475           *dest++ = c;
2476         }
2477       else
2478         {
2479           if (ISPRINT (c))
2480             *dest++ = c;
2481           else
2482             {
2483               sprintf ((char *) dest, "\\%03o", c);
2484               dest += 4;
2485             }
2486         }
2487     }
2488
2489   return dest;
2490 }
2491
2492 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2493    CPP_STRING token containing TEXT in quoted form.  */
2494 static cpp_token *
2495 make_string_token (token, text, len)
2496      cpp_token *token;
2497      const U_CHAR *text;
2498      unsigned int len;
2499 {
2500   U_CHAR *buf;
2501
2502   buf = (U_CHAR *) xmalloc (len * 4);
2503   token->type = CPP_STRING;
2504   token->flags = 0;
2505   token->val.str.text = buf;
2506   token->val.str.len = quote_string (buf, text, len) - buf;
2507   return token;
2508 }
2509
2510 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2511    evaluating to NUMBER.  */
2512 static cpp_token *
2513 alloc_number_token (pfile, number)
2514      cpp_reader *pfile;
2515      int number;
2516 {
2517   cpp_token *result;
2518   char *buf;
2519
2520   result = get_temp_token (pfile);
2521   buf = xmalloc (20);
2522   sprintf (buf, "%d", number);
2523
2524   result->type = CPP_NUMBER;
2525   result->flags = 0;
2526   result->val.str.text = (U_CHAR *) buf;
2527   result->val.str.len = strlen (buf);
2528   return result;
2529 }
2530
2531 /* Returns a temporary token from the temporary token store of PFILE.  */
2532 static cpp_token *
2533 get_temp_token (pfile)
2534      cpp_reader *pfile;
2535 {
2536   if (pfile->temp_used == pfile->temp_alloced)
2537     {
2538       if (pfile->temp_used == pfile->temp_cap)
2539         {
2540           pfile->temp_cap += pfile->temp_cap + 20;
2541           pfile->temp_tokens = (cpp_token **) xrealloc
2542             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2543         }
2544       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2545         (sizeof (cpp_token));
2546     }
2547
2548   return pfile->temp_tokens[pfile->temp_used++];
2549 }
2550
2551 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2552 static void
2553 release_temp_tokens (pfile)
2554      cpp_reader *pfile;
2555 {
2556   while (pfile->temp_used)
2557     {
2558       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2559
2560       if (TOKEN_SPELL (token) == SPELL_STRING)
2561         {
2562           free ((char *) token->val.str.text);
2563           token->val.str.text = 0;
2564         }
2565     }
2566 }
2567
2568 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2569 void
2570 _cpp_free_temp_tokens (pfile)
2571      cpp_reader *pfile;
2572 {
2573   if (pfile->temp_tokens)
2574     {
2575       /* It is possible, though unlikely (looking for '(' of a funlike
2576          macro into EOF), that we haven't released the tokens yet.  */
2577       release_temp_tokens (pfile);
2578       while (pfile->temp_alloced)
2579         free (pfile->temp_tokens[--pfile->temp_alloced]);
2580       free (pfile->temp_tokens);
2581     }
2582
2583   if (pfile->date)
2584     {
2585       free ((char *) pfile->date->val.str.text);
2586       free (pfile->date);
2587       free ((char *) pfile->time->val.str.text);
2588       free (pfile->time);
2589     }
2590 }
2591
2592 /* Copy TOKEN into a temporary token from PFILE's store.  */
2593 static cpp_token *
2594 duplicate_token (pfile, token)
2595      cpp_reader *pfile;
2596      const cpp_token *token;
2597 {
2598   cpp_token *result = get_temp_token (pfile);
2599
2600   *result = *token;
2601   if (TOKEN_SPELL (token) == SPELL_STRING)
2602     {
2603       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2604       memcpy (buff, token->val.str.text, token->val.str.len);
2605       result->val.str.text = buff;
2606     }
2607   return result;
2608 }
2609
2610 /* Determine whether two tokens can be pasted together, and if so,
2611    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2612    be pasted, or the appropriate type for the merged token if they
2613    can.  */
2614 static enum cpp_ttype
2615 can_paste (pfile, token1, token2, digraph)
2616      cpp_reader * pfile;
2617      const cpp_token *token1, *token2;
2618      int* digraph;
2619 {
2620   enum cpp_ttype a = token1->type, b = token2->type;
2621   int cxx = CPP_OPTION (pfile, cplusplus);
2622
2623   /* Treat named operators as if they were ordinary NAMEs.  */
2624   if (token1->flags & NAMED_OP)
2625     a = CPP_NAME;
2626   if (token2->flags & NAMED_OP)
2627     b = CPP_NAME;
2628
2629   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2630     return a + (CPP_EQ_EQ - CPP_EQ);
2631
2632   switch (a)
2633     {
2634     case CPP_GREATER:
2635       if (b == a) return CPP_RSHIFT;
2636       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2637       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2638       break;
2639     case CPP_LESS:
2640       if (b == a) return CPP_LSHIFT;
2641       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2642       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2643       if (CPP_OPTION (pfile, digraphs))
2644         {
2645           if (b == CPP_COLON)
2646             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2647           if (b == CPP_MOD)
2648             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2649         }
2650       break;
2651
2652     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2653     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2654     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2655
2656     case CPP_MINUS:
2657       if (b == a)               return CPP_MINUS_MINUS;
2658       if (b == CPP_GREATER)     return CPP_DEREF;
2659       break;
2660     case CPP_COLON:
2661       if (b == a && cxx)        return CPP_SCOPE;
2662       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2663         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2664       break;
2665
2666     case CPP_MOD:
2667       if (CPP_OPTION (pfile, digraphs))
2668         {
2669           if (b == CPP_GREATER)
2670             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2671           if (b == CPP_COLON)
2672             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2673         }
2674       break;
2675     case CPP_DEREF:
2676       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2677       break;
2678     case CPP_DOT:
2679       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2680       if (b == CPP_NUMBER)      return CPP_NUMBER;
2681       break;
2682
2683     case CPP_HASH:
2684       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2685         /* %:%: digraph */
2686         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2687       break;
2688
2689     case CPP_NAME:
2690       if (b == CPP_NAME)        return CPP_NAME;
2691       if (b == CPP_NUMBER
2692           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2693       if (b == CPP_CHAR
2694           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2695       if (b == CPP_STRING
2696           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2697       break;
2698
2699     case CPP_NUMBER:
2700       if (b == CPP_NUMBER)      return CPP_NUMBER;
2701       if (b == CPP_NAME)        return CPP_NUMBER;
2702       if (b == CPP_DOT)         return CPP_NUMBER;
2703       /* Numbers cannot have length zero, so this is safe.  */
2704       if ((b == CPP_PLUS || b == CPP_MINUS)
2705           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2706         return CPP_NUMBER;
2707       break;
2708
2709     case CPP_OTHER:
2710       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2711         {
2712           if (b == CPP_NAME)    return CPP_NAME;
2713           if (b == CPP_STRING)  return CPP_OSTRING;
2714         }
2715
2716     default:
2717       break;
2718     }
2719
2720   return CPP_EOF;
2721 }
2722
2723 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2724 static const cpp_token *
2725 maybe_paste_with_next (pfile, token)
2726      cpp_reader *pfile;
2727      const cpp_token *token;
2728 {
2729   cpp_token *pasted;
2730   const cpp_token *second;
2731   cpp_context *context = CURRENT_CONTEXT (pfile);
2732
2733   /* Is this token on the LHS of ## ? */
2734
2735   while ((token->flags & PASTE_LEFT)
2736          || ((context->flags & CONTEXT_PASTEL)
2737              && context->posn == context->count))
2738     {
2739       /* Suppress macro expansion for next token, but don't conflict
2740          with the other method of suppression.  If it is an argument,
2741          macro expansion within the argument will still occur.  */
2742       pfile->paste_level = pfile->cur_context;
2743       second = _cpp_get_token (pfile);
2744       pfile->paste_level = 0;
2745
2746       /* Ignore placemarker argument tokens (cannot be from an empty
2747          macro since macros are not expanded).  */
2748       if (token->type == CPP_PLACEMARKER)
2749         pasted = duplicate_token (pfile, second);
2750       else if (second->type == CPP_PLACEMARKER)
2751         {
2752           /* GCC has special extended semantics for , ## b where b is
2753              a varargs parameter: the comma disappears if b was given
2754              no actual arguments (not merely if b is an empty
2755              argument).  */
2756           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2757             pasted = duplicate_token (pfile, second);
2758           else
2759             pasted = duplicate_token (pfile, token);
2760         }
2761       else
2762         {
2763           int digraph = 0;
2764           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2765
2766           if (type == CPP_EOF)
2767             {
2768               if (CPP_OPTION (pfile, warn_paste))
2769                 {
2770                   /* Do not complain about , ## <whatever> if
2771                      <whatever> came from a variable argument, because
2772                      the author probably intended the ## to trigger
2773                      the special extended semantics (see above).  */
2774                   if (token->type == CPP_COMMA
2775                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2776                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2777                     /* no warning */;
2778                   else
2779                     cpp_warning (pfile,
2780                         "pasting would not give a valid preprocessing token");
2781                 }
2782               _cpp_push_token (pfile, second);
2783               return token;
2784             }
2785
2786           if (type == CPP_NAME || type == CPP_NUMBER)
2787             {
2788               /* Join spellings.  */
2789               U_CHAR *buf, *end;
2790
2791               pasted = get_temp_token (pfile);
2792               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2793               end = spell_token (pfile, token, buf);
2794               end = spell_token (pfile, second, end);
2795               *end = '\0';
2796
2797               if (type == CPP_NAME)
2798                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2799               else
2800                 {
2801                   pasted->val.str.text = uxstrdup (buf);
2802                   pasted->val.str.len = end - buf;
2803                 }
2804             }
2805           else if (type == CPP_WCHAR || type == CPP_WSTRING
2806                    || type == CPP_OSTRING)
2807             pasted = duplicate_token (pfile, second);
2808           else
2809             {
2810               pasted = get_temp_token (pfile);
2811               pasted->val.integer = 0;
2812             }
2813
2814           pasted->type = type;
2815           pasted->flags = digraph ? DIGRAPH : 0;
2816
2817           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2818             {
2819               pasted->type = pasted->val.node->value.code;
2820               pasted->flags |= NAMED_OP;
2821             }
2822         }
2823
2824       /* The pasted token gets the whitespace flags and position of the
2825          first token, the PASTE_LEFT flag of the second token, plus the
2826          PASTED flag to indicate it is the result of a paste.  However, we
2827          want to preserve the DIGRAPH flag.  */
2828       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2829       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2830                         | (second->flags & PASTE_LEFT) | PASTED);
2831       pasted->col = token->col;
2832       pasted->line = token->line;
2833
2834       /* See if there is another token to be pasted onto the one we just
2835          constructed.  */
2836       token = pasted;
2837       context = CURRENT_CONTEXT (pfile);
2838       /* and loop */
2839     }
2840   return token;
2841 }
2842
2843 /* Convert a token sequence to a single string token according to the
2844    rules of the ISO C #-operator.  */
2845 #define INIT_SIZE 200
2846 static cpp_token *
2847 stringify_arg (pfile, token)
2848      cpp_reader *pfile;
2849      const cpp_token *token;
2850 {
2851   cpp_token *result;
2852   unsigned char *main_buf;
2853   unsigned int prev_value, backslash_count = 0;
2854   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2855
2856   push_arg_context (pfile, token);
2857   prev_value  = prevent_macro_expansion (pfile);
2858   main_buf = (unsigned char *) xmalloc (buf_cap);
2859
2860   result = get_temp_token (pfile);
2861   ASSIGN_FLAGS_AND_POS (result, token);
2862
2863   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2864     {
2865       int escape;
2866       unsigned char *buf;
2867       unsigned int len = TOKEN_LEN (token);
2868
2869       if (token->type == CPP_PLACEMARKER)
2870         continue;
2871
2872       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2873                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2874       if (escape)
2875         len *= 4 + 1;
2876
2877       if (buf_used + len > buf_cap)
2878         {
2879           buf_cap = buf_used + len + INIT_SIZE;
2880           main_buf = xrealloc (main_buf, buf_cap);
2881         }
2882
2883       if (whitespace && (token->flags & PREV_WHITE))
2884         main_buf[buf_used++] = ' ';
2885
2886       if (escape)
2887         buf = (unsigned char *) xmalloc (len);
2888       else
2889         buf = main_buf + buf_used;
2890
2891       len = spell_token (pfile, token, buf) - buf;
2892       if (escape)
2893         {
2894           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2895           free (buf);
2896         }
2897       else
2898         buf_used += len;
2899
2900       whitespace = 1;
2901       if (token->type == CPP_BACKSLASH)
2902         backslash_count++;
2903       else
2904         backslash_count = 0;
2905     }
2906
2907   /* Ignore the final \ of invalid string literals.  */
2908   if (backslash_count & 1)
2909     {
2910       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2911       buf_used--;
2912     }
2913
2914   result->type = CPP_STRING;
2915   result->val.str.text = main_buf;
2916   result->val.str.len = buf_used;
2917   restore_macro_expansion (pfile, prev_value);
2918   return result;
2919 }
2920
2921 /* Allocate more room on the context stack of PFILE.  */
2922 static void
2923 expand_context_stack (pfile)
2924      cpp_reader *pfile;
2925 {
2926   pfile->context_cap += pfile->context_cap + 20;
2927   pfile->contexts = (cpp_context *)
2928     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2929 }
2930
2931 /* Push the context of macro NODE onto the context stack.  TOKEN is
2932    the CPP_NAME token invoking the macro.  */
2933 static int
2934 push_macro_context (pfile, token)
2935      cpp_reader *pfile;
2936      const cpp_token *token;
2937 {
2938   unsigned char orig_flags;
2939   macro_args *args;
2940   cpp_context *context;
2941   cpp_hashnode *node = token->val.node;
2942
2943   /* Token's flags may change when parsing args containing a nested
2944      invocation of this macro.  */
2945   orig_flags = token->flags & (PREV_WHITE | BOL);
2946   args = 0;
2947   if (node->value.expansion->paramc >= 0)
2948     {
2949       unsigned int error, prev_nme;
2950
2951       /* Allocate room for the argument contexts, and parse them.  */
2952       args  = (macro_args *) xmalloc (sizeof (macro_args));
2953       args->ends = (unsigned int *)
2954         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2955       args->tokens = 0;
2956       args->capacity = 0;
2957       args->used = 0;
2958       args->level = pfile->cur_context;
2959
2960       prev_nme = prevent_macro_expansion (pfile);
2961       pfile->args = args;
2962       error = parse_args (pfile, node, args);
2963       pfile->args = 0;
2964       restore_macro_expansion (pfile, prev_nme);
2965       if (error)
2966         {
2967           free_macro_args (args);
2968           return 1;
2969         }
2970     }
2971
2972   /* Now push its context.  */
2973   pfile->cur_context++;
2974   if (pfile->cur_context == pfile->context_cap)
2975     expand_context_stack (pfile);
2976
2977   context = CURRENT_CONTEXT (pfile);
2978   context->u.list = node->value.expansion;
2979   context->args = args;
2980   context->posn = 0;
2981   context->count = context->u.list->tokens_used;
2982   context->level = pfile->cur_context;
2983   context->flags = 0;
2984   context->pushed_token = 0;
2985
2986   /* Set the flags of the first token.  We know there must
2987      be one, empty macros are a single placemarker token.  */
2988   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2989
2990   return 0;
2991 }
2992
2993 /* Push an argument to the current macro onto the context stack.
2994    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2995 static void
2996 push_arg_context (pfile, token)
2997      cpp_reader *pfile;
2998      const cpp_token *token;
2999 {
3000   cpp_context *context;
3001   macro_args *args;
3002
3003   pfile->cur_context++;
3004   if (pfile->cur_context == pfile->context_cap)
3005       expand_context_stack (pfile);
3006
3007   context = CURRENT_CONTEXT (pfile);
3008   args = context[-1].args;
3009
3010   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
3011   context->u.arg = args->tokens + context->count;
3012   context->count = args->ends[token->val.aux] - context->count;
3013   context->args = 0;
3014   context->posn = 0;
3015   context->level = args->level;
3016   context->flags = CONTEXT_ARG | CONTEXT_RAW;
3017   context->pushed_token = 0;
3018
3019   /* Set the flags of the first token.  There is one.  */
3020   {
3021     const cpp_token *first = context->u.arg[0];
3022     if (!first)
3023       first = context->u.arg[1];
3024
3025     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3026                           token->flags & (PREV_WHITE | BOL));
3027   }
3028
3029   if (token->flags & PASTE_LEFT)
3030     context->flags |= CONTEXT_PASTEL;
3031   if (pfile->paste_level)
3032     context->flags |= CONTEXT_PASTER;
3033 }
3034
3035 /* "Unget" a token.  It is effectively inserted in the token queue and
3036    will be returned by the next call to get_raw_token.  */
3037 void
3038 _cpp_push_token (pfile, token)
3039      cpp_reader *pfile;
3040      const cpp_token *token;
3041 {
3042   cpp_context *context = CURRENT_CONTEXT (pfile);
3043
3044   if (context->posn > 0)
3045     {
3046       const cpp_token *prev;
3047       if (IS_ARG_CONTEXT (context))
3048         prev = context->u.arg[context->posn - 1];
3049       else
3050         prev = &context->u.list->tokens[context->posn - 1];
3051
3052       if (prev == token)
3053         {
3054           context->posn--;
3055           return;
3056         }
3057     }
3058
3059   if (context->pushed_token)
3060     cpp_ice (pfile, "two tokens pushed in a row");
3061   if (token->type != CPP_EOF)
3062     context->pushed_token = token;
3063   /* Don't push back a directive's CPP_EOF, step back instead.  */
3064   else if (pfile->cur_context == 0)
3065     pfile->contexts[0].posn--;
3066 }
3067
3068 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3069    introducing the directive.  */
3070 static void
3071 process_directive (pfile, token)
3072      cpp_reader *pfile;
3073      const cpp_token *token;
3074 {
3075   const struct directive *d = pfile->token_list.directive;
3076   int prev_nme = 0;
3077
3078   /* Skip over the directive name.  */
3079   if (token[1].type == CPP_NAME)
3080     _cpp_get_raw_token (pfile);
3081   else if (token[1].type != CPP_NUMBER)
3082     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
3083
3084   if (! (d->flags & EXPAND))
3085     prev_nme = prevent_macro_expansion (pfile);
3086   (void) (*d->handler) (pfile);
3087   if (! (d->flags & EXPAND))
3088     restore_macro_expansion (pfile, prev_nme);
3089   _cpp_skip_rest_of_line (pfile);
3090 }
3091
3092 /* The external interface to return the next token.  All macro
3093    expansion and directive processing is handled internally, the
3094    caller only ever sees the output after preprocessing.  */
3095 const cpp_token *
3096 cpp_get_token (pfile)
3097      cpp_reader *pfile;
3098 {
3099   const cpp_token *token;
3100   /* Loop till we hit a non-directive, non-placemarker token.  */
3101   for (;;)
3102     {
3103       token = _cpp_get_token (pfile);
3104
3105       if (token->type == CPP_PLACEMARKER)
3106         continue;
3107
3108       if (token->type == CPP_HASH && token->flags & BOL
3109           && pfile->token_list.directive)
3110         {
3111           process_directive (pfile, token);
3112           continue;
3113         }
3114
3115       return token;
3116     }
3117 }
3118
3119 /* The internal interface to return the next token.  There are two
3120    differences between the internal and external interfaces: the
3121    internal interface may return a PLACEMARKER token, and it does not
3122    process directives.  */
3123 const cpp_token *
3124 _cpp_get_token (pfile)
3125      cpp_reader *pfile;
3126 {
3127   const cpp_token *token, *old_token;
3128   cpp_hashnode *node;
3129
3130   /* Loop until we hit a non-macro token.  */
3131   for (;;)
3132     {
3133       token = get_raw_token (pfile);
3134
3135       /* Short circuit EOF. */
3136       if (token->type == CPP_EOF)
3137         return token;
3138
3139       /* If we are skipping... */
3140       if (pfile->skipping)
3141         {
3142           /* we still have to process directives,  */
3143           if (pfile->token_list.directive)
3144             return token;
3145
3146           /* but everything else is ignored.  */
3147           _cpp_skip_rest_of_line (pfile);
3148           continue;
3149         }
3150
3151       /* If there's a potential control macro and we get here, then that
3152          #ifndef didn't cover the entire file and its argument shouldn't
3153          be taken as a control macro.  */
3154       pfile->potential_control_macro = 0;
3155
3156       /* If we are rescanning preprocessed input, no macro expansion or
3157          token pasting may occur.  */
3158       if (CPP_OPTION (pfile, preprocessed))
3159         return token;
3160
3161       old_token = token;
3162
3163       /* See if there's a token to paste with this one.  */
3164       if (!pfile->paste_level)
3165         token = maybe_paste_with_next (pfile, token);
3166
3167       /* If it isn't a macro, return it now.  */
3168       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3169         return token;
3170
3171       /* Is macro expansion disabled in general, or are we in the
3172          middle of a token paste, or was this token just pasted?
3173          (Note we don't check token->flags & PASTED, because that
3174          counts tokens that were pasted at some point in the past,
3175          we're only interested in tokens that were pasted by this call
3176          to maybe_paste_with_next.)  */
3177       if (pfile->no_expand_level == pfile->cur_context
3178           || pfile->paste_level
3179           || (token != old_token
3180               && pfile->no_expand_level + 1 == pfile->cur_context))
3181         return token;
3182
3183       node = token->val.node;
3184       if (node->type != T_MACRO)
3185         return special_symbol (pfile, node, token);
3186
3187       if (is_macro_disabled (pfile, node->value.expansion, token))
3188         return token;
3189
3190       if (pfile->cur_context > CPP_STACK_MAX)
3191         {
3192           cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3193           return token;
3194         }
3195
3196       if (push_macro_context (pfile, token))
3197         return token;
3198       /* else loop */
3199     }
3200 }
3201
3202 /* Returns the next raw token, i.e. without performing macro
3203    expansion.  Argument contexts are automatically entered.  */
3204 static const cpp_token *
3205 get_raw_token (pfile)
3206      cpp_reader *pfile;
3207 {
3208   const cpp_token *result;
3209   cpp_context *context;
3210
3211   for (;;)
3212     {
3213       context = CURRENT_CONTEXT (pfile);
3214       if (context->pushed_token)
3215         {
3216           result = context->pushed_token;
3217           context->pushed_token = 0;
3218           return result;        /* Cannot be a CPP_MACRO_ARG */
3219         }
3220       else if (context->posn == context->count)
3221         {
3222           if (pop_context (pfile))
3223             return &eof_token;
3224           continue;
3225         }
3226       else if (IS_ARG_CONTEXT (context))
3227         {
3228           result = context->u.arg[context->posn++];
3229           if (result == 0)
3230             {
3231               context->flags ^= CONTEXT_RAW;
3232               result = context->u.arg[context->posn++];
3233             }
3234           return result;        /* Cannot be a CPP_MACRO_ARG */
3235         }
3236
3237       result = &context->u.list->tokens[context->posn++];
3238
3239       if (result->type != CPP_MACRO_ARG)
3240         return result;
3241
3242       if (result->flags & STRINGIFY_ARG)
3243         return stringify_arg (pfile, result);
3244
3245       push_arg_context (pfile, result);
3246     }
3247 }
3248
3249 /* Internal interface to get the token without macro expanding.  */
3250 const cpp_token *
3251 _cpp_get_raw_token (pfile)
3252      cpp_reader *pfile;
3253 {
3254   int prev_nme = prevent_macro_expansion (pfile);
3255   const cpp_token *result = _cpp_get_token (pfile);
3256   restore_macro_expansion (pfile, prev_nme);
3257   return result;
3258 }
3259
3260 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3261    list should be overwritten, or zero if we need to append
3262    (typically, if we are within the arguments to a macro, or looking
3263    for the '(' to start a function-like macro invocation).  */
3264 static int
3265 lex_next (pfile, clear)
3266      cpp_reader *pfile;
3267      int clear;
3268 {
3269   cpp_toklist *list = &pfile->token_list;
3270   const cpp_token *old_list = list->tokens;
3271   unsigned int old_used = list->tokens_used;
3272
3273   if (clear)
3274     {
3275       /* Release all temporary tokens.  */
3276       _cpp_clear_toklist (list);
3277       pfile->contexts[0].posn = 0;
3278       if (pfile->temp_used)
3279         release_temp_tokens (pfile);
3280     }
3281   lex_line (pfile, list);
3282   pfile->contexts[0].count = list->tokens_used;
3283
3284   if (!clear && pfile->args)
3285     {
3286       /* Fix up argument token pointers.  */
3287       if (old_list != list->tokens)
3288         {
3289           unsigned int i;
3290
3291           for (i = 0; i < pfile->args->used; i++)
3292             {
3293               const cpp_token *token = pfile->args->tokens[i];
3294               if (token >= old_list && token < old_list + old_used)
3295                 pfile->args->tokens[i] = (const cpp_token *)
3296                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3297             }
3298         }
3299
3300       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3301          tokens within the list of arguments that would otherwise act as
3302          preprocessing directives, the behavior is undefined.
3303
3304          This implementation will report a hard error and treat the
3305          'sequence of preprocessing tokens' as part of the macro argument,
3306          not a directive.
3307
3308          Note if pfile->args == 0, we're OK since we're only inside a
3309          macro argument after a '('.  */
3310       if (list->directive)
3311         {
3312           cpp_error_with_line (pfile, list->tokens[old_used].line,
3313                                list->tokens[old_used].col,
3314                                "#%s may not be used inside a macro argument",
3315                                list->directive->name);
3316           return 1;
3317         }
3318     }
3319
3320   return 0;
3321 }
3322
3323 /* Pops a context off the context stack.  If we're at the bottom, lexes
3324    the next logical line.  Returns EOF if we're at the end of the
3325    argument list to the # operator, or we should not "overflow"
3326    into the rest of the file (e.g. 6.10.3.1.1).  */
3327 static int
3328 pop_context (pfile)
3329      cpp_reader *pfile;
3330 {
3331   cpp_context *context;
3332
3333   if (pfile->cur_context == 0)
3334     {
3335       /* If we are currently processing a directive, do not advance.  6.10
3336          paragraph 2: A new-line character ends the directive even if it
3337          occurs within what would otherwise be an invocation of a
3338          function-like macro.  */
3339       if (pfile->token_list.directive)
3340         return 1;
3341
3342       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3343     }
3344
3345   /* Argument contexts, when parsing args or handling # operator
3346      return CPP_EOF at the end.  */
3347   context = CURRENT_CONTEXT (pfile);
3348   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3349     return 1;
3350
3351   /* Free resources when leaving macro contexts.  */
3352   if (context->args)
3353     free_macro_args (context->args);
3354
3355   if (pfile->cur_context == pfile->no_expand_level)
3356     pfile->no_expand_level--;
3357   pfile->cur_context--;
3358
3359   return 0;
3360 }
3361
3362 /* Turn off macro expansion at the current context level.  */
3363 static unsigned int
3364 prevent_macro_expansion (pfile)
3365      cpp_reader *pfile;
3366 {
3367   unsigned int prev_value = pfile->no_expand_level;
3368   pfile->no_expand_level = pfile->cur_context;
3369   return prev_value;
3370 }
3371
3372 /* Restore macro expansion to its previous state.  */
3373 static void
3374 restore_macro_expansion (pfile, prev_value)
3375      cpp_reader *pfile;
3376      unsigned int prev_value;
3377 {
3378   pfile->no_expand_level = prev_value;
3379 }
3380
3381 /* Used by cpperror.c to obtain the correct line and column to report
3382    in a diagnostic.  */
3383 unsigned int
3384 _cpp_get_line (pfile, pcol)
3385      cpp_reader *pfile;
3386      unsigned int *pcol;
3387 {
3388   unsigned int index;
3389   const cpp_token *cur_token;
3390
3391   if (pfile->in_lex_line)
3392     index = pfile->token_list.tokens_used;
3393   else
3394     index = pfile->contexts[0].posn;
3395
3396   if (index == 0)
3397     {
3398       if (pcol)
3399         *pcol = 0;
3400       return 0;
3401     }
3402
3403   cur_token = &pfile->token_list.tokens[index - 1];
3404   if (pcol)
3405     *pcol = cur_token->col;
3406   return cur_token->line;
3407 }
3408
3409 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3410 static const char * const monthnames[] =
3411 {
3412   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3413   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3414 };
3415
3416 /* Handle builtin macros like __FILE__.  */
3417 static const cpp_token *
3418 special_symbol (pfile, node, token)
3419      cpp_reader *pfile;
3420      cpp_hashnode *node;
3421      const cpp_token *token;
3422 {
3423   cpp_token *result;
3424   cpp_buffer *ip;
3425
3426   switch (node->type)
3427     {
3428     case T_FILE:
3429     case T_BASE_FILE:
3430       {
3431         const char *file;
3432
3433         ip = CPP_BUFFER (pfile);
3434         if (ip == 0)
3435           file = "";
3436         else
3437           {
3438             if (node->type == T_BASE_FILE)
3439               while (CPP_PREV_BUFFER (ip) != NULL)
3440                 ip = CPP_PREV_BUFFER (ip);
3441
3442             file = ip->nominal_fname;
3443           }
3444         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3445                                     strlen (file));
3446       }
3447       break;
3448
3449     case T_INCLUDE_LEVEL:
3450       /* pfile->include_depth counts the primary source as level 1,
3451          but historically __INCLUDE_DEPTH__ has called the primary
3452          source level 0.  */
3453       result = alloc_number_token (pfile, pfile->include_depth - 1);
3454       break;
3455
3456     case T_SPECLINE:
3457       /* If __LINE__ is embedded in a macro, it must expand to the
3458          line of the macro's invocation, not its definition.
3459          Otherwise things like assert() will not work properly.  */
3460       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3461       break;
3462
3463     case T_STDC:
3464       {
3465         int stdc = 1;
3466
3467 #ifdef STDC_0_IN_SYSTEM_HEADERS
3468         if (CPP_IN_SYSTEM_HEADER (pfile)
3469             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3470           stdc = 0;
3471 #endif
3472         result = alloc_number_token (pfile, stdc);
3473       }
3474       break;
3475
3476     case T_DATE:
3477     case T_TIME:
3478       if (pfile->date == 0)
3479         {
3480           /* Allocate __DATE__ and __TIME__ from permanent storage,
3481              and save them in pfile so we don't have to do this again.
3482              We don't generate these strings at init time because
3483              time() and localtime() are very slow on some systems.  */
3484           time_t tt = time (NULL);
3485           struct tm *tb = localtime (&tt);
3486
3487           pfile->date = make_string_token
3488             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3489           pfile->time = make_string_token
3490             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3491
3492           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3493                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3494           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3495                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3496         }
3497       result = node->type == T_DATE ? pfile->date: pfile->time;
3498       break;
3499
3500     case T_POISON:
3501       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3502       return token;
3503
3504     default:
3505       cpp_ice (pfile, "invalid special hash type");
3506       return token;
3507     }
3508
3509   ASSIGN_FLAGS_AND_POS (result, token);
3510   return result;
3511 }
3512 #undef DSC
3513
3514 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3515    if it hasn't happened already.  */
3516
3517 void
3518 _cpp_init_input_buffer (pfile)
3519      cpp_reader *pfile;
3520 {
3521   cpp_context *base;
3522
3523   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3524   pfile->no_expand_level = UINT_MAX;
3525   pfile->context_cap = 20;
3526   pfile->cur_context = 0;
3527
3528   pfile->contexts = (cpp_context *)
3529     xmalloc (pfile->context_cap * sizeof (cpp_context));
3530
3531   /* Clear the base context.  */
3532   base = &pfile->contexts[0];
3533   base->u.list = &pfile->token_list;
3534   base->posn = 0;
3535   base->count = 0;
3536   base->args = 0;
3537   base->level = 0;
3538   base->flags = 0;
3539   base->pushed_token = 0;
3540 }
3541
3542 /* Moves to the end of the directive line, popping contexts as
3543    necessary.  */
3544 void
3545 _cpp_skip_rest_of_line (pfile)
3546      cpp_reader *pfile;
3547 {
3548   /* Discard all stacked contexts.  */
3549   int i;
3550   for (i = pfile->cur_context; i > 0; i--)
3551     if (pfile->contexts[i].args)
3552       free_macro_args (pfile->contexts[i].args);
3553
3554   if (pfile->no_expand_level <= pfile->cur_context)
3555     pfile->no_expand_level = 0;
3556   pfile->cur_context = 0;
3557
3558   /* Clear the base context, and clear the directive pointer so that
3559      get_raw_token will advance to the next line.  */
3560   pfile->contexts[0].count = 0;
3561   pfile->contexts[0].posn = 0;
3562   pfile->token_list.directive = 0;
3563 }
3564
3565 /* Directive handler wrapper used by the command line option
3566    processor.  */
3567 void
3568 _cpp_run_directive (pfile, dir, buf, count, name)
3569      cpp_reader *pfile;
3570      const struct directive *dir;
3571      const char *buf;
3572      size_t count;
3573      const char *name;
3574 {
3575   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3576     {
3577       unsigned int prev_lvl = 0;
3578
3579       if (name)
3580         CPP_BUFFER (pfile)->nominal_fname = name;
3581       else
3582         CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3583       CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3584
3585       /* Scan the line now, else prevent_macro_expansion won't work.  */
3586       lex_next (pfile, 1);
3587       if (! (dir->flags & EXPAND))
3588         prev_lvl = prevent_macro_expansion (pfile);
3589
3590       (void) (*dir->handler) (pfile);
3591
3592       if (! (dir->flags & EXPAND))
3593         restore_macro_expansion (pfile, prev_lvl);
3594
3595       _cpp_skip_rest_of_line (pfile);
3596       cpp_pop_buffer (pfile);
3597     }
3598 }