gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  29 o Distinguish integers, floats, and 'other' pp-numbers.
  30 o Store ints and char constants as binary values.
  31 o New command-line assertion syntax.
  32 o Work towards functions in cpperror.c taking a message level parameter.
  33   If we do this, merge the common code of do_warning and do_error.
  34 o Comment all functions, and describe macro expansion algorithm.
  35 o Move as much out of header files as possible.
  36 o Remove single quote pairs `', and some '', from diagnostics.
  37 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  38
  39 */
  40
  41 #include "config.h"
  42 #include "system.h"
  43 #include "intl.h"
  44 #include "cpplib.h"
  45 #include "cpphash.h"
  46 #include "symcat.h"
  47
  48 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
  49 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  50
  51 /* Flags for cpp_context.  */
  52 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  53 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  54 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  55 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  56
  57 typedef struct cpp_context cpp_context;
  58 struct cpp_context
  59 {
  60   union
  61   {
  62     const cpp_toklist *list;    /* Used for macro contexts only.  */
  63     const cpp_token **arg;      /* Used for arg contexts only.  */
  64   } u;
  65
  66   /* Pushed token to be returned by next call to get_raw_token.  */
  67   const cpp_token *pushed_token;
  68
  69   struct macro_args *args;      /* The arguments for a function-like
  70                                    macro.  NULL otherwise.  */
  71   unsigned short posn;          /* Current posn, index into u.  */
  72   unsigned short count;         /* No. of tokens in u.  */
  73   unsigned short level;
  74   unsigned char flags;
  75 };
  76
  77 typedef struct macro_args macro_args;
  78 struct macro_args
  79 {
  80   unsigned int *ends;
  81   const cpp_token **tokens;
  82   unsigned int capacity;
  83   unsigned int used;
  84   unsigned short level;
  85 };
  86
  87 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  88 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  89                                            macro_args *, unsigned int *));
  90 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  91 static void save_token PARAMS ((macro_args *, const cpp_token *));
  92 static int pop_context PARAMS ((cpp_reader *));
  93 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  94 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void free_macro_args PARAMS ((macro_args *));
  96
  97 #define auto_expand_name_space(list) \
  98     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
  99 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
 100                                          unsigned int));
 101 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
 102                                          unsigned int));
 103
 104 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 105 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
 106                                                 unsigned char *));
 107 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
 108                                                      const unsigned char *));
 109 static int skip_block_comment PARAMS ((cpp_reader *));
 110 static int skip_line_comment PARAMS ((cpp_reader *));
 111 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
 112 static void skip_whitespace PARAMS ((cpp_reader *, int));
 113 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
 114                                    const U_CHAR *, const U_CHAR *));
 115 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
 116 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
 117                                   unsigned int));
 118 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
 119 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
 120                                   const unsigned char *,
 121                                   unsigned int, unsigned int));
 122 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 123 static int lex_next PARAMS ((cpp_reader *, int));
 124 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 125                                       const cpp_token *));
 126
 127 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 128 static void expand_context_stack PARAMS ((cpp_reader *));
 129 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 130                                             unsigned char *));
 131 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 132                                   const cpp_token *, int));
 133 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 134                                           cpp_token *));
 135 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 136                                             unsigned int));
 137 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 138 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 139                                                 const cpp_token *));
 140 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 141 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 142                                                        const cpp_token *));
 143 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 144                                          const cpp_token *, int *));
 145 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 146 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 147 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 148 static void release_temp_tokens         PARAMS ((cpp_reader *));
 149 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 150 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 151
 152 #define INIT_TOKEN_STR(list, token) \
 153   do {(token)->val.str.len = 0; \
 154       (token)->val.str.text = (list)->namebuf + (list)->name_used; \
 155   } while (0)
 156
 157 #define VALID_SIGN(c, prevc) \
 158   (((c) == '+' || (c) == '-') && \
 159    ((prevc) == 'e' || (prevc) == 'E' \
 160     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 161
 162 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 163    character, if any, is in buffer.  */
 164
 165 #define handle_newline(cur, limit, c) \
 166  do { \
 167   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 168     (cur)++; \
 169   pfile->buffer->lineno++; \
 170   pfile->buffer->line_base = (cur); \
 171   pfile->col_adjust = 0; \
 172  } while (0)
 173
 174 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 175 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 176
 177 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
 178 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
 179 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
 180 #define BACKUP_DIGRAPH(ttype) do { \
 181   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 182
 183 /* An upper bound on the number of bytes needed to spell a token,
 184    including preceding whitespace.  */
 185 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 186 static inline size_t
 187 TOKEN_LEN (token)
 188      const cpp_token *token;
 189 {
 190   size_t len;
 191
 192   switch (TOKEN_SPELL (token))
 193     {
 194     default:            len = 0;                        break;
 195     case SPELL_STRING:  len = token->val.str.len;       break;
 196     case SPELL_IDENT:   len = token->val.node->length;  break;
 197     }
 198   return len + 5;
 199 }
 200
 201 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 202 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 203 #define ON_REST_ARG(c) \
 204  (((c)->flags & VAR_ARGS) \
 205   && ((c)-1)->u.list->tokens[((c)-1)->posn - 1].val.aux \
 206       == (unsigned int) (((c)-1)->u.list->paramc - 1))
 207
 208 #define ASSIGN_FLAGS_AND_POS(d, s) \
 209   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 210       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 211   } while (0)
 212
 213 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 214 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 215   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 216       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 217   } while (0)
 218
 219 #define OP(e, s) { SPELL_OPERATOR, U s           },
 220 #define TK(e, s) { s,              U STRINGX (e) },
 221
 222 const struct token_spelling
 223 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 224
 225 #undef OP
 226 #undef TK
 227
 228 /* Notify the compiler proper that the current line number has jumped,
 229    or the current file name has changed.  */
 230
 231 static void
 232 output_line_command (pfile, print, line)
 233      cpp_reader *pfile;
 234      cpp_printer *print;
 235      unsigned int line;
 236 {
 237   cpp_buffer *ip = CPP_BUFFER (pfile);
 238
 239   if (line == 0)
 240     return;
 241
 242   /* End the previous line of text.  */
 243   if (pfile->need_newline)
 244     {
 245       putc ('\n', print->outf);
 246       print->lineno++;
 247     }
 248   pfile->need_newline = 0;
 249
 250   if (CPP_OPTION (pfile, no_line_commands))
 251     return;
 252
 253   /* If the current file has not changed, we can output a few newlines
 254      instead if we want to increase the line number by a small amount.
 255      We cannot do this if print->lineno is zero, because that means we
 256      haven't output any line commands yet.  (The very first line
 257      command output is a `same_file' command.)
 258
 259      'nominal_fname' values are unique, so they can be compared by
 260      comparing pointers.  */
 261   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 262       && line >= print->lineno && line < print->lineno + 8)
 263     {
 264       while (line > print->lineno)
 265         {
 266           putc ('\n', print->outf);
 267           print->lineno++;
 268         }
 269       return;
 270     }
 271
 272   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 273            cpp_syshdr_flags (pfile, ip));
 274
 275   print->last_fname = ip->nominal_fname;
 276   print->lineno = line;
 277 }
 278
 279 /* Like fprintf, but writes to a printer object.  You should be sure
 280    always to generate a complete line when you use this function.  */
 281 void
 282 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 283                      const char *fmt, ...))
 284 {
 285   va_list ap;
 286 #ifndef ANSI_PROTOTYPES
 287   cpp_reader *pfile;
 288   cpp_printer *print;
 289   const char *fmt;
 290 #endif
 291
 292   VA_START (ap, fmt);
 293
 294 #ifndef ANSI_PROTOTYPES
 295   pfile = va_arg (ap, cpp_reader *);
 296   print = va_arg (ap, cpp_printer *);
 297   fmt = va_arg (ap, const char *);
 298 #endif
 299
 300   /* End the previous line of text.  */
 301   if (pfile->need_newline)
 302     {
 303       putc ('\n', print->outf);
 304       print->lineno++;
 305     }
 306   pfile->need_newline = 0;
 307
 308   vfprintf (print->outf, fmt, ap);
 309   va_end (ap);
 310 }
 311
 312 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 313
 314 void
 315 cpp_scan_buffer_nooutput (pfile)
 316      cpp_reader *pfile;
 317 {
 318   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 319   const cpp_token *token;
 320
 321   /* In no-output mode, we can ignore everything but directives.  */
 322   for (;;)
 323     {
 324       token = _cpp_get_token (pfile);
 325
 326       if (token->type == CPP_EOF)
 327         {
 328           cpp_pop_buffer (pfile);
 329           if (CPP_BUFFER (pfile) == stop)
 330             break;
 331         }
 332
 333       if (token->type == CPP_HASH && token->flags & BOL
 334           && pfile->token_list.directive)
 335         {
 336           process_directive (pfile, token);
 337           continue;
 338         }
 339
 340       _cpp_skip_rest_of_line (pfile);
 341     }
 342 }
 343
 344 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 345 void
 346 cpp_scan_buffer (pfile, print)
 347      cpp_reader *pfile;
 348      cpp_printer *print;
 349 {
 350   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 351   const cpp_token *token, *prev = 0;
 352
 353   for (;;)
 354     {
 355       token = _cpp_get_token (pfile);
 356       if (token->type == CPP_EOF)
 357         {
 358           cpp_pop_buffer (pfile);
 359
 360           if (CPP_BUFFER (pfile) == stop)
 361             return;
 362
 363           prev = 0;
 364           continue;
 365         }
 366
 367       if (token->flags & BOL)
 368         {
 369           output_line_command (pfile, print, pfile->token_list.line);
 370           prev = 0;
 371
 372           if (token->type == CPP_HASH && pfile->token_list.directive)
 373             {
 374               process_directive (pfile, token);
 375               continue;
 376             }
 377         }
 378
 379       if (token->type != CPP_PLACEMARKER)
 380         {
 381           output_token (pfile, print->outf, token, prev, 1);
 382           pfile->need_newline = 1;
 383         }
 384
 385       prev = token;
 386     }
 387 }
 388
 389 /* Helper routine used by parse_include, which can't see spell_token.
 390    Reinterpret the current line as an h-char-sequence (< ... >); we are
 391    looking at the first token after the <.  */
 392 const cpp_token *
 393 _cpp_glue_header_name (pfile)
 394      cpp_reader *pfile;
 395 {
 396   const cpp_token *t;
 397   cpp_token *hdr;
 398   U_CHAR *buf, *p;
 399   size_t len, avail;
 400
 401   avail = 40;
 402   len = 0;
 403   buf = xmalloc (avail);
 404
 405   for (;;)
 406     {
 407       t = _cpp_get_token (pfile);
 408       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 409         break;
 410
 411       if (len + TOKEN_LEN (t) > avail)
 412         {
 413           avail = len + TOKEN_LEN (t) + 40;
 414           buf = xrealloc (buf, avail);
 415         }
 416
 417       if (t->flags & PREV_WHITE)
 418         buf[len++] = ' ';
 419
 420       p = spell_token (pfile, t, buf + len);
 421       len = (size_t) (p - buf);  /* p known >= buf */
 422     }
 423
 424   if (t->type == CPP_EOF)
 425     cpp_error (pfile, "missing terminating > character");
 426
 427   buf = xrealloc (buf, len);
 428
 429   hdr = get_temp_token (pfile);
 430   hdr->type = CPP_HEADER_NAME;
 431   hdr->flags = 0;
 432   hdr->val.str.text = buf;
 433   hdr->val.str.len = len;
 434   return hdr;
 435 }
 436
 437 /* Token-buffer helper functions.  */
 438
 439 /* Expand a token list's string space. It is *vital* that
 440    list->tokens_used is correct, to get pointer fix-up right.  */
 441 void
 442 _cpp_expand_name_space (list, len)
 443      cpp_toklist *list;
 444      unsigned int len;
 445 {
 446   const U_CHAR *old_namebuf;
 447
 448   old_namebuf = list->namebuf;
 449   list->name_cap += len;
 450   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 451
 452   /* Fix up token text pointers.  */
 453   if (list->namebuf != old_namebuf)
 454     {
 455       unsigned int i;
 456
 457       for (i = 0; i < list->tokens_used; i++)
 458         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 459           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 460     }
 461 }
 462
 463 /* If there is not enough room for LEN more characters, expand the
 464    list by just enough to have room for LEN characters.  */
 465 void
 466 _cpp_reserve_name_space (list, len)
 467      cpp_toklist *list;
 468      unsigned int len;
 469 {
 470   unsigned int room = list->name_cap - list->name_used;
 471
 472   if (room < len)
 473     _cpp_expand_name_space (list, len - room);
 474 }
 475
 476 /* Expand the number of tokens in a list.  */
 477 void
 478 _cpp_expand_token_space (list, count)
 479      cpp_toklist *list;
 480      unsigned int count;
 481 {
 482   unsigned int n;
 483
 484   list->tokens_cap += count;
 485   n = list->tokens_cap;
 486   if (list->flags & LIST_OFFSET)
 487     list->tokens--, n++;
 488   list->tokens = (cpp_token *)
 489     xrealloc (list->tokens, n * sizeof (cpp_token));
 490   if (list->flags & LIST_OFFSET)
 491     list->tokens++;             /* Skip the dummy.  */
 492 }
 493
 494 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 495    an extra token in front of the token list, as this allows the lexer
 496    to always peek at the previous token without worrying about
 497    underflowing the list, and some initial space.  Otherwise, no
 498    token- or name-space is allocated, and there is no dummy token.  */
 499 void
 500 _cpp_init_toklist (list, flags)
 501      cpp_toklist *list;
 502      int flags;
 503 {
 504   if (flags == NO_DUMMY_TOKEN)
 505     {
 506       list->tokens_cap = 0;
 507       list->tokens = 0;
 508       list->name_cap = 0;
 509       list->namebuf = 0;
 510       list->flags = 0;
 511     }
 512   else
 513     {
 514       /* Initialize token space.  Put a dummy token before the start
 515          that will fail matches.  */
 516       list->tokens_cap = 256;   /* 4K's worth.  */
 517       list->tokens = (cpp_token *)
 518         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 519       list->tokens[0].type = CPP_EOF;
 520       list->tokens++;
 521
 522       /* Initialize name space.  */
 523       list->name_cap = 1024;
 524       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 525       list->flags = LIST_OFFSET;
 526     }
 527
 528   _cpp_clear_toklist (list);
 529 }
 530
 531 /* Clear a token list.  */
 532 void
 533 _cpp_clear_toklist (list)
 534      cpp_toklist *list;
 535 {
 536   list->tokens_used = 0;
 537   list->name_used = 0;
 538   list->directive = 0;
 539   list->paramc = 0;
 540   list->params_len = 0;
 541   list->flags &= LIST_OFFSET;  /* clear all but that one */
 542 }
 543
 544 /* Free a token list.  Does not free the list itself, which may be
 545    embedded in a larger structure.  */
 546 void
 547 _cpp_free_toklist (list)
 548      const cpp_toklist *list;
 549 {
 550   if (list->flags & LIST_OFFSET)
 551     free (list->tokens - 1);    /* Backup over dummy token.  */
 552   else
 553     free (list->tokens);
 554   free (list->namebuf);
 555 }
 556
 557 /* Compare two tokens.  */
 558 int
 559 _cpp_equiv_tokens (a, b)
 560      const cpp_token *a, *b;
 561 {
 562   if (a->type == b->type && a->flags == b->flags)
 563     switch (TOKEN_SPELL (a))
 564       {
 565       default:                  /* Keep compiler happy.  */
 566       case SPELL_OPERATOR:
 567         return 1;
 568       case SPELL_CHAR:
 569       case SPELL_NONE:
 570         return a->val.aux == b->val.aux; /* arg_no or character.  */
 571       case SPELL_IDENT:
 572         return a->val.node == b->val.node;
 573       case SPELL_STRING:
 574         return (a->val.str.len == b->val.str.len
 575                 && !memcmp (a->val.str.text, b->val.str.text,
 576                             a->val.str.len));
 577       }
 578
 579   return 0;
 580 }
 581
 582 /* Compare two token lists.  */
 583 int
 584 _cpp_equiv_toklists (a, b)
 585      const cpp_toklist *a, *b;
 586 {
 587   unsigned int i;
 588
 589   if (a->tokens_used != b->tokens_used
 590       || a->flags != b->flags
 591       || a->paramc != b->paramc)
 592     return 0;
 593
 594   for (i = 0; i < a->tokens_used; i++)
 595     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 596       return 0;
 597   return 1;
 598 }
 599
 600 /* Utility routine:
 601
 602    Compares, the token TOKEN to the NUL-terminated string STRING.
 603    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 604
 605 int
 606 cpp_ideq (token, string)
 607      const cpp_token *token;
 608      const char *string;
 609 {
 610   if (token->type != CPP_NAME)
 611     return 0;
 612
 613   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 614 }
 615
 616 /* Lexing algorithm.
 617
 618  The original lexer in cpplib was made up of two passes: a first pass
 619  that replaced trigraphs and deleted esacped newlines, and a second
 620  pass that tokenized the result of the first pass.  Tokenisation was
 621  performed by peeking at the next character in the input stream.  For
 622  example, if the input stream contained "!=", the handler for the !
 623  character would peek at the next character, and if it were a '='
 624  would skip over it, and return a "!=" token, otherwise it would
 625  return just the "!" token.
 626
 627  To implement a single-pass lexer, this peeking ahead is unworkable.
 628  An arbitrary number of escaped newlines, and trigraphs (in particular
 629  ??/ which translates to the escape \), could separate the '!' and '='
 630  in the input stream, yet the next token is still a "!=".
 631
 632  Suppose instead that we lex by one logical line at a time, producing
 633  a token list or stack for each logical line, and when seeing the '!'
 634  push a CPP_NOT token on the list.  Then if the '!' is part of a
 635  longer token ("!=") we know we must see the remainder of the token by
 636  the time we reach the end of the logical line.  Thus we can have the
 637  '=' handler look at the previous token (at the end of the list / top
 638  of the stack) and see if it is a "!" token, and if so, instead of
 639  pushing a "=" token revise the existing token to be a "!=" token.
 640
 641  This works in the presence of escaped newlines, because the '\' would
 642  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 643  newline ('\n' or '\r') handler looks at the token at the top of the
 644  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 645  Hence the '=' handler would never see any intervening tokens.
 646
 647  To make trigraphs work in this context, as in precedence trigraphs
 648  are highest and converted before anything else, the '?' handler does
 649  lookahead to see if it is a trigraph, and if so skips the trigraph
 650  and pushes the token it represents onto the top of the stack.  This
 651  also works in the particular case of a CPP_BACKSLASH trigraph.
 652
 653  To the preprocessor, whitespace is only significant to the point of
 654  knowing whether whitespace precedes a particular token.  For example,
 655  the '=' handler needs to know whether there was whitespace between it
 656  and a "!" token on the top of the stack, to make the token conversion
 657  decision correctly.  So each token has a PREV_WHITE flag to
 658  indicate this - the standard permits consecutive whitespace to be
 659  regarded as a single space.  The compiler front ends are not
 660  interested in whitespace at all; they just require a token stream.
 661  Another place where whitespace is significant to the preprocessor is
 662  a #define statment - if there is whitespace between the macro name
 663  and an initial "(" token the macro is "object-like", otherwise it is
 664  a function-like macro that takes arguments.
 665
 666  However, all is not rosy.  Parsing of identifiers, numbers, comments
 667  and strings becomes trickier because of the possibility of raw
 668  trigraphs and escaped newlines in the input stream.
 669
 670  The trigraphs are three consecutive characters beginning with two
 671  question marks.  A question mark is not valid as part of a number or
 672  identifier, so parsing of a number or identifier terminates normally
 673  upon reaching it, returning to the mainloop which handles the
 674  trigraph just like it would in any other position.  Similarly for the
 675  backslash of a backslash-newline combination.  So we just need the
 676  escaped-newline dropper in the mainloop to check if the token on the
 677  top of the stack after dropping the escaped newline is a number or
 678  identifier, and if so to continue the processing it as if nothing had
 679  happened.
 680
 681  For strings, we replace trigraphs whenever we reach a quote or
 682  newline, because there might be a backslash trigraph escaping them.
 683  We need to be careful that we start trigraph replacing from where we
 684  left off previously, because it is possible for a first scan to leave
 685  "fake" trigraphs that a second scan would pick up as real (e.g. the
 686  sequence "????/\n=" would find a fake ??= trigraph after removing the
 687  escaped newline.)
 688
 689  For line comments, on reaching a newline we scan the previous
 690  character(s) to see if it escaped, and continue if it is.  Block
 691  comments ignore everything and just focus on finding the comment
 692  termination mark.  The only difficult thing, and it is surprisingly
 693  tricky, is checking if an asterisk precedes the final slash since
 694  they could be separated by escaped newlines.  If the preprocessor is
 695  invoked with the output comments option, we don't bother removing
 696  escaped newlines and replacing trigraphs for output.
 697
 698  Finally, numbers can begin with a period, which is pushed initially
 699  as a CPP_DOT token in its own right.  The digit handler checks if the
 700  previous token was a CPP_DOT not separated by whitespace, and if so
 701  pops it off the stack and pushes a period into the number's buffer
 702  before calling the number parser.
 703
 704 */
 705
 706 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 707                                                     U":>", U"<%", U"%>"};
 708
 709 /* Call when a trigraph is encountered.  It warns if necessary, and
 710    returns true if the trigraph should be honoured.  END is the third
 711    character of a trigraph in the input stream.  */
 712 static int
 713 trigraph_ok (pfile, end)
 714      cpp_reader *pfile;
 715      const unsigned char *end;
 716 {
 717   int accept = CPP_OPTION (pfile, trigraphs);
 718
 719   if (CPP_OPTION (pfile, warn_trigraphs))
 720     {
 721       unsigned int col = end - 1 - pfile->buffer->line_base;
 722       if (accept)
 723         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 724                                "trigraph ??%c converted to %c",
 725                                (int) *end, (int) _cpp_trigraph_map[*end]);
 726       else
 727         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 728                                "trigraph ??%c ignored", (int) *end);
 729     }
 730   return accept;
 731 }
 732
 733 /* Scan a string for trigraphs, warning or replacing them inline as
 734    appropriate.  When parsing a string, we must call this routine
 735    before processing a newline character (if trigraphs are enabled),
 736    since the newline might be escaped by a preceding backslash
 737    trigraph sequence.  Returns a pointer to the end of the name after
 738    replacement.  */
 739
 740 static unsigned char *
 741 trigraph_replace (pfile, src, limit)
 742      cpp_reader *pfile;
 743      unsigned char *src;
 744      unsigned char *limit;
 745 {
 746   unsigned char *dest;
 747
 748   /* Starting with src[1], find two consecutive '?'.  The case of no
 749      trigraphs is streamlined.  */
 750
 751   for (src++; src + 1 < limit; src += 2)
 752     {
 753       if (src[0] != '?')
 754         continue;
 755
 756       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 757       if (src[-1] == '?')
 758         src--;
 759       else if (src + 2 == limit || src[1] != '?')
 760         continue;
 761
 762       /* Check if it really is a trigraph.  */
 763       if (_cpp_trigraph_map[src[2]] == 0)
 764         continue;
 765
 766       dest = src;
 767       goto trigraph_found;
 768     }
 769   return limit;
 770
 771   /* Now we have a trigraph, we need to scan the remaining buffer, and
 772      copy-shifting its contents left if replacement is enabled.  */
 773   for (; src + 2 < limit; dest++, src++)
 774     if ((*dest = *src) == '?' && src[1] == '?' && _cpp_trigraph_map[src[2]])
 775       {
 776       trigraph_found:
 777         src += 2;
 778         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 779           *dest = _cpp_trigraph_map[*src];
 780       }
 781
 782   /* Copy remaining (at most 2) characters.  */
 783   while (src < limit)
 784     *dest++ = *src++;
 785   return dest;
 786 }
 787
 788 /* If CUR is a backslash or the end of a trigraphed backslash, return
 789    a pointer to its beginning, otherwise NULL.  We don't read beyond
 790    the buffer start, because there is the start of the comment in the
 791    buffer.  */
 792 static const unsigned char *
 793 backslash_start (pfile, cur)
 794      cpp_reader *pfile;
 795      const unsigned char *cur;
 796 {
 797   if (cur[0] == '\\')
 798     return cur;
 799   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 800       && trigraph_ok (pfile, cur))
 801     return cur - 2;
 802   return 0;
 803 }
 804
 805 /* Skip a C-style block comment.  This is probably the trickiest
 806    handler.  We find the end of the comment by seeing if an asterisk
 807    is before every '/' we encounter.  The nasty complication is that a
 808    previous asterisk may be separated by one or more escaped newlines.
 809    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 810 static int
 811 skip_block_comment (pfile)
 812      cpp_reader *pfile;
 813 {
 814   cpp_buffer *buffer = pfile->buffer;
 815   const unsigned char *char_after_star = 0;
 816   const unsigned char *cur = buffer->cur;
 817
 818   for (; cur < buffer->rlimit; )
 819     {
 820       unsigned char c = *cur++;
 821
 822       /* People like decorating comments with '*', so check for
 823          '/' instead for efficiency.  */
 824       if (c == '/')
 825         {
 826           /* Don't view / then * then / as finishing the comment.  */
 827           if ((cur[-2] == '*' && cur - 1 > buffer->cur)
 828               || cur - 1 == char_after_star)
 829             {
 830               buffer->cur = cur;
 831               return 0;
 832             }
 833
 834           /* Warn about potential nested comments, but not when
 835              the final character inside the comment is a '/'.
 836              Don't bother to get it right across escaped newlines.  */
 837           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 838               && cur[0] == '*' && cur[1] != '/')
 839             {
 840               buffer->cur = cur;
 841               cpp_warning (pfile, "'/*' within comment");
 842             }
 843         }
 844       else if (is_vspace (c))
 845         {
 846           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 847
 848           handle_newline (cur, buffer->rlimit, c);
 849           /* Work correctly if there is an asterisk before an
 850              arbirtrarily long sequence of escaped newlines.  */
 851           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 852             char_after_star = cur;
 853           else
 854             char_after_star = 0;
 855         }
 856       else if (c == '\t')
 857         adjust_column (pfile, cur - 1);
 858     }
 859
 860   buffer->cur = cur;
 861   return 1;
 862 }
 863
 864 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 865    non-zero if a multiline comment.  */
 866 static int
 867 skip_line_comment (pfile)
 868      cpp_reader *pfile;
 869 {
 870   cpp_buffer *buffer = pfile->buffer;
 871   register const unsigned char *cur = buffer->cur;
 872   int multiline = 0;
 873
 874   for (; cur < buffer->rlimit; )
 875     {
 876       unsigned char c = *cur++;
 877
 878       if (is_vspace (c))
 879         {
 880           /* Check for a (trigaph?) backslash escaping the newline.  */
 881           if (!backslash_start (pfile, cur - 2))
 882             goto out;
 883           multiline = 1;
 884           handle_newline (cur, buffer->rlimit, c);
 885         }
 886     }
 887   cur++;
 888
 889  out:
 890   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 891   return multiline;
 892 }
 893
 894 /* TAB points to a \t character.  Update col_adjust so we track the
 895    column correctly.  */
 896 static void
 897 adjust_column (pfile, tab)
 898      cpp_reader *pfile;
 899      const U_CHAR *tab;
 900 {
 901   /* Zero-based column.  */
 902   unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
 903
 904   /* Round it up to multiple of the tabstop, but subtract 1 since the
 905      tab itself occupies a character position.  */
 906   pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
 907                         - col % CPP_OPTION (pfile, tabstop)) - 1;
 908 }
 909
 910 /* Skips whitespace, stopping at next non-whitespace character.
 911    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 912    to be assigned the correct column.  */
 913 static void
 914 skip_whitespace (pfile, in_directive)
 915      cpp_reader *pfile;
 916      int in_directive;
 917 {
 918   cpp_buffer *buffer = pfile->buffer;
 919   unsigned short warned = 0;
 920
 921   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 922   while (buffer->cur < buffer->rlimit)
 923     {
 924       unsigned char c = *buffer->cur;
 925
 926       if (!is_nvspace (c))
 927         break;
 928
 929       buffer->cur++;
 930       /* Horizontal space always OK.  */
 931       if (c == ' ')
 932         continue;
 933       else if (c == '\t')
 934         adjust_column (pfile, buffer->cur - 1);
 935       /* Must be \f \v or \0.  */
 936       else if (c == '\0')
 937         {
 938           if (!warned)
 939             cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 940                                    CPP_BUF_COL (buffer),
 941                                    "embedded null character ignored");
 942           warned = 1;
 943         }
 944       else if (in_directive && CPP_PEDANTIC (pfile))
 945         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 946                                CPP_BUF_COL (buffer),
 947                                "%s in preprocessing directive",
 948                                c == '\f' ? "form feed" : "vertical tab");
 949     }
 950 }
 951
 952 /* Parse (append) an identifier.  Calculates the hash value of the
 953    token while parsing, for performance.  The algorithm *must* match
 954    cpp_lookup().  */
 955 static const U_CHAR *
 956 parse_name (pfile, tok, cur, rlimit)
 957      cpp_reader *pfile;
 958      cpp_token *tok;
 959      const U_CHAR *cur, *rlimit;
 960 {
 961   const U_CHAR *name;
 962   unsigned int len;
 963   unsigned int r;
 964
 965   name = cur;
 966   r = 0;
 967   while (cur < rlimit)
 968     {
 969       if (! is_idchar (*cur))
 970         break;
 971       /* $ is not a identifier character in the standard, but is
 972          commonly accepted as an extension.  Don't warn about it in
 973          skipped conditional blocks. */
 974       if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 975         {
 976           CPP_BUFFER (pfile)->cur = cur;
 977           cpp_pedwarn (pfile, "'$' character in identifier");
 978         }
 979
 980       r = HASHSTEP (r, cur);
 981       cur++;
 982     }
 983   len = cur - name;
 984
 985   if (tok->type == CPP_NAME && tok->val.node == 0)
 986     tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
 987   else
 988     {
 989       unsigned int oldlen;
 990       U_CHAR *newname;
 991
 992       if (tok->type == CPP_NAME)
 993         oldlen = tok->val.node->length;
 994       else
 995         oldlen = 1;
 996
 997       newname = alloca (oldlen + len);
 998
 999       if (tok->type == CPP_NAME)
1000         memcpy (newname, tok->val.node->name, oldlen);
1001       else
1002         newname[0] = tok->val.aux;
1003       memcpy (newname + oldlen, name, len);
1004       tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1005       tok->type = CPP_NAME;
1006     }
1007
1008   return cur;
1009 }
1010
1011 /* Parse (append) a number.  */
1012 static void
1013 parse_number (pfile, list, name)
1014      cpp_reader *pfile;
1015      cpp_toklist *list;
1016      cpp_string *name;
1017 {
1018   const unsigned char *name_limit;
1019   unsigned char *namebuf;
1020   cpp_buffer *buffer = pfile->buffer;
1021   register const unsigned char *cur = buffer->cur;
1022
1023  expanded:
1024   name_limit = list->namebuf + list->name_cap;
1025   namebuf = list->namebuf + list->name_used;
1026
1027   for (; cur < buffer->rlimit && namebuf < name_limit; )
1028     {
1029       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1030
1031       /* Perhaps we should accept '$' here if we accept it for
1032          identifiers.  We know namebuf[-1] is safe, because for c to
1033          be a sign we must have pushed at least one character.  */
1034       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1035         goto out;
1036
1037       namebuf++;
1038       cur++;
1039     }
1040
1041   /* Run out of name space?  */
1042   if (cur < buffer->rlimit)
1043     {
1044       list->name_used = namebuf - list->namebuf;
1045       auto_expand_name_space (list);
1046       goto expanded;
1047     }
1048
1049  out:
1050   buffer->cur = cur;
1051   name->len = namebuf - name->text;
1052   list->name_used = namebuf - list->namebuf;
1053 }
1054
1055 /* Places a string terminated by an unescaped TERMINATOR into a
1056    cpp_string, which should be expandable and thus at the top of the
1057    list's stack.  Handles embedded trigraphs, if necessary, and
1058    escaped newlines.
1059
1060    Can be used for character constants (terminator = '\''), string
1061    constants ('"') and angled headers ('>').  Multi-line strings are
1062    allowed, except for within directives.  */
1063
1064 static void
1065 parse_string (pfile, list, token, terminator)
1066      cpp_reader *pfile;
1067      cpp_toklist *list;
1068      cpp_token *token;
1069      unsigned int terminator;
1070 {
1071   cpp_buffer *buffer = pfile->buffer;
1072   cpp_string *name = &token->val.str;
1073   register const unsigned char *cur = buffer->cur;
1074   const unsigned char *name_limit;
1075   unsigned char *namebuf;
1076   unsigned int null_count = 0;
1077   unsigned int trigraphed = list->name_used;
1078
1079  expanded:
1080   name_limit = list->namebuf + list->name_cap;
1081   namebuf = list->namebuf + list->name_used;
1082
1083   for (; cur < buffer->rlimit && namebuf < name_limit; )
1084     {
1085       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1086
1087       if (c == '\0')
1088         null_count++;
1089       else if (c == terminator || is_vspace (c))
1090         {
1091           /* Needed for trigraph_replace and multiline string warning.  */
1092           buffer->cur = cur;
1093
1094           /* Scan for trigraphs before checking if backslash-escaped.  */
1095           if ((CPP_OPTION (pfile, trigraphs)
1096                || CPP_OPTION (pfile, warn_trigraphs))
1097               && namebuf - (list->namebuf + trigraphed) >= 3)
1098             {
1099               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1100                                           namebuf);
1101               /* The test above guarantees trigraphed will be positive.  */
1102               trigraphed = namebuf - list->namebuf - 2;
1103             }
1104
1105           namebuf--;     /* Drop the newline / terminator from the name.  */
1106           if (is_vspace (c))
1107             {
1108               /* Drop a backslash newline, and continue. */
1109               U_CHAR *old = namebuf;
1110               while (namebuf > list->namebuf && is_hspace (namebuf[-1]))
1111                 namebuf--;
1112               if (namebuf > list->namebuf && namebuf[-1] == '\\')
1113                 {
1114                   handle_newline (cur, buffer->rlimit, c);
1115                   namebuf--;
1116                   if (old[-1] != '\\')
1117                     {
1118                       buffer->cur = cur;
1119                       cpp_warning (pfile,
1120                                    "backslash and newline separated by space");
1121                     }
1122                   continue;
1123                 }
1124               else
1125                 namebuf = old;
1126
1127               cur--;
1128
1129               /* In assembly language, silently terminate strings of
1130                  either variety at end of line.  This is a kludge
1131                  around not knowing where comments are.  */
1132               if (CPP_OPTION (pfile, lang_asm))
1133                 goto out;
1134
1135               /* Character constants and header names may not extend
1136                  over multiple lines.  In Standard C, neither may
1137                  strings.  We accept multiline strings as an
1138                  extension.  (Even in directives - otherwise, glibc's
1139                  longlong.h breaks.)  */
1140               if (terminator != '"')
1141                 goto unterminated;
1142
1143               cur++;  /* Move forwards again.  */
1144
1145               if (pfile->multiline_string_line == 0)
1146                 {
1147                   pfile->multiline_string_line = token->line;
1148                   pfile->multiline_string_column = token->col;
1149                   if (CPP_PEDANTIC (pfile))
1150                     cpp_pedwarn (pfile, "multi-line string constant");
1151                 }
1152
1153               *namebuf++ = '\n';
1154               handle_newline (cur, buffer->rlimit, c);
1155             }
1156           else
1157             {
1158               unsigned char *temp;
1159
1160               /* An odd number of consecutive backslashes represents
1161                  an escaped terminator.  */
1162               temp = namebuf - 1;
1163               while (temp >= name->text && *temp == '\\')
1164                 temp--;
1165
1166               if ((namebuf - temp) & 1)
1167                 goto out;
1168               namebuf++;
1169             }
1170         }
1171     }
1172
1173   /* Run out of name space?  */
1174   if (cur < buffer->rlimit)
1175     {
1176       list->name_used = namebuf - list->namebuf;
1177       auto_expand_name_space (list);
1178       goto expanded;
1179     }
1180
1181   /* We may not have trigraph-replaced the input for this code path,
1182      but as the input is in error by being unterminated we don't
1183      bother.  Prevent warnings about no newlines at EOF.  */
1184   if (is_vspace (cur[-1]))
1185     cur--;
1186
1187  unterminated:
1188   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1189
1190   if (terminator == '\"' && pfile->multiline_string_line != list->line
1191       && pfile->multiline_string_line != 0)
1192     {
1193       cpp_error_with_line (pfile, pfile->multiline_string_line,
1194                            pfile->multiline_string_column,
1195                            "possible start of unterminated string literal");
1196       pfile->multiline_string_line = 0;
1197     }
1198
1199  out:
1200   buffer->cur = cur;
1201   name->len = namebuf - name->text;
1202   list->name_used = namebuf - list->namebuf;
1203
1204   if (null_count > 0)
1205     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1206                          : "null character preserved"));
1207 }
1208
1209 /* The character TYPE helps us distinguish comment types: '*' = C
1210    style, '/' = C++ style.  For code simplicity, the stored comment
1211    includes the comment start and any terminator.  */
1212
1213 #define COMMENT_START_LEN 2
1214 static void
1215 save_comment (list, token, from, len, type)
1216      cpp_toklist *list;
1217      cpp_token *token;
1218      const unsigned char *from;
1219      unsigned int len;
1220      unsigned int type;
1221 {
1222   unsigned char *buffer;
1223
1224   len += COMMENT_START_LEN;
1225
1226   if (list->name_used + len > list->name_cap)
1227     _cpp_expand_name_space (list, len);
1228
1229   INIT_TOKEN_STR (list, token);
1230   token->type = CPP_COMMENT;
1231   token->val.str.len = len;
1232
1233   buffer = list->namebuf + list->name_used;
1234   list->name_used += len;
1235
1236   /* Copy the comment.  */
1237   if (type == '*')
1238     {
1239       *buffer++ = '/';
1240       *buffer++ = '*';
1241     }
1242   else
1243     {
1244       *buffer++ = type;
1245       *buffer++ = type;
1246     }
1247   memcpy (buffer, from, len - COMMENT_START_LEN);
1248 }
1249
1250 /*
1251  *  The tokenizer's main loop.  Returns a token list, representing a
1252  *  logical line in the input file.  On EOF after some tokens have
1253  *  been processed, we return immediately.  Then in next call, or if
1254  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1255  *  token is placed in the list.
1256  *
1257  *  Implementation relies almost entirely on lookback, rather than
1258  *  looking forwards.  This means that tokenization requires just
1259  *  a single pass of the file, even in the presence of trigraphs and
1260  *  escaped newlines, providing significant performance benefits.
1261  *  Trigraph overhead is negligible if they are disabled, and low
1262  *  even when enabled.
1263  */
1264
1265 #define KNOWN_DIRECTIVE() (list->directive != 0)
1266 #define MIGHT_BE_DIRECTIVE() \
1267 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1268
1269 static void
1270 lex_line (pfile, list)
1271      cpp_reader *pfile;
1272      cpp_toklist *list;
1273 {
1274   cpp_token *cur_token, *token_limit, *first;
1275   cpp_buffer *buffer = pfile->buffer;
1276   const unsigned char *cur = buffer->cur;
1277   unsigned char flags = 0;
1278   unsigned int first_token = list->tokens_used;
1279
1280   if (!(list->flags & LIST_OFFSET))
1281     (abort) ();
1282
1283  retry:
1284   list->file = buffer->nominal_fname;
1285   list->line = CPP_BUF_LINE (buffer);
1286   pfile->col_adjust = 0;
1287   pfile->in_lex_line = 1;
1288   if (cur == buffer->buf)
1289     list->flags |= BEG_OF_FILE;
1290
1291  expanded:
1292   token_limit = list->tokens + list->tokens_cap;
1293   cur_token = list->tokens + list->tokens_used;
1294
1295   for (; cur < buffer->rlimit && cur_token < token_limit;)
1296     {
1297       unsigned char c;
1298
1299       /* Optimize non-vertical whitespace skipping; most tokens are
1300          probably separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1301       c = *cur;
1302       if (is_nvspace (c))
1303         {
1304           buffer->cur = cur;
1305           skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1306                                    && cur_token > &list->tokens[first_token]));
1307           cur = buffer->cur;
1308
1309           flags = PREV_WHITE;
1310           if (cur == buffer->rlimit)
1311             break;
1312           c = *cur;
1313         }
1314       cur++;
1315
1316       /* Initialize current token.  CPP_EOF will not be fixed up by
1317          expand_name_space.  */
1318       list->tokens_used = cur_token - list->tokens + 1;
1319       cur_token->type = CPP_EOF;
1320       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1321       cur_token->line = CPP_BUF_LINE (buffer);
1322       cur_token->flags = flags;
1323       flags = 0;
1324
1325       switch (c)
1326         {
1327         case '0': case '1': case '2': case '3': case '4':
1328         case '5': case '6': case '7': case '8': case '9':
1329           {
1330             int prev_dot;
1331
1332             cur--;              /* Backup character.  */
1333             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1334             if (prev_dot)
1335               cur_token--;
1336             INIT_TOKEN_STR (list, cur_token);
1337             /* Prepend an immediately previous CPP_DOT token.  */
1338             if (prev_dot)
1339               {
1340                 if (list->name_cap == list->name_used)
1341                   auto_expand_name_space (list);
1342
1343                 cur_token->val.str.len = 1;
1344                 list->namebuf[list->name_used++] = '.';
1345               }
1346
1347           continue_number:
1348             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1349             buffer->cur = cur;
1350             parse_number (pfile, list, &cur_token->val.str);
1351             cur = buffer->cur;
1352           }
1353           /* Check for # 123 form of #line.  */
1354           if (MIGHT_BE_DIRECTIVE ())
1355             list->directive = _cpp_check_linemarker (pfile, cur_token,
1356                                                      !(cur_token[-1].flags
1357                                                        & PREV_WHITE));
1358           cur_token++;
1359           break;
1360
1361         letter:
1362         case '_':
1363         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1364         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1365         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1366         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1367         case 'y': case 'z':
1368         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1369         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1370         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1371         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1372         case 'Y': case 'Z':
1373           cur--;                     /* Backup character.  */
1374
1375           /* In Objective C, '@' may begin certain keywords.  */
1376           if (CPP_OPTION (pfile, objc) && cur_token[-1].type == CPP_OTHER
1377               && cur_token[-1].val.aux == '@' && IMMED_TOKEN ())
1378             cur_token--;
1379           else
1380             {
1381               cur_token->val.node = 0;
1382               cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1383             }
1384
1385         continue_name:
1386           cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1387
1388           if (MIGHT_BE_DIRECTIVE ())
1389             list->directive = _cpp_check_directive (pfile, cur_token,
1390                                                     !(list->tokens[0].flags
1391                                                       & PREV_WHITE));
1392           /* Convert named operators to their proper types.  */
1393           if (cur_token->val.node->type == T_OPERATOR)
1394             {
1395               cur_token->flags |= NAMED_OP;
1396               cur_token->type = cur_token->val.node->value.code;
1397             }
1398
1399           cur_token++;
1400           break;
1401
1402         case '\'':
1403           cur_token->type = CPP_CHAR;
1404           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1405               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1406             BACKUP_TOKEN (CPP_WCHAR);
1407           goto do_parse_string;
1408
1409         case '\"':
1410           cur_token->type = CPP_STRING;
1411           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1412               && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1413             BACKUP_TOKEN (CPP_WSTRING);
1414           else if (CPP_OPTION (pfile, objc)
1415                    && cur_token[-1].type == CPP_OTHER && IMMED_TOKEN ()
1416                    && cur_token[-1].val.aux == '@')
1417             BACKUP_TOKEN (CPP_OSTRING);
1418
1419         do_parse_string:
1420           /* Here c is one of ' " or >.  */
1421           INIT_TOKEN_STR (list, cur_token);
1422           buffer->cur = cur;
1423           parse_string (pfile, list, cur_token, c);
1424           cur = buffer->cur;
1425           cur_token++;
1426           break;
1427
1428         case '/':
1429           cur_token->type = CPP_DIV;
1430           if (IMMED_TOKEN ())
1431             {
1432               if (PREV_TOKEN_TYPE == CPP_DIV)
1433                 {
1434                   /* We silently allow C++ comments in system headers,
1435                      irrespective of conformance mode, because lots of
1436                      broken systems do that and trying to clean it up
1437                      in fixincludes is a nightmare.  */
1438                   if (CPP_IN_SYSTEM_HEADER (pfile))
1439                     goto do_line_comment;
1440                   else if (CPP_OPTION (pfile, cplusplus_comments))
1441                     {
1442                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1443                           && ! buffer->warned_cplusplus_comments)
1444                         {
1445                           buffer->cur = cur;
1446                           cpp_pedwarn (pfile,
1447                              "C++ style comments are not allowed in ISO C89");
1448                           cpp_pedwarn (pfile,
1449                           "(this will be reported only once per input file)");
1450                           buffer->warned_cplusplus_comments = 1;
1451                         }
1452                     do_line_comment:
1453                       buffer->cur = cur;
1454 #if 0 /* Leave until new lexer in place.  */
1455                       if (cur[-2] != c)
1456                         cpp_warning (pfile,
1457                                      "comment start split across lines");
1458 #endif
1459                       if (skip_line_comment (pfile))
1460                         cpp_warning (pfile, "multi-line comment");
1461
1462                       /* Back-up to first '-' or '/'.  */
1463                       cur_token--;
1464                       if (!CPP_OPTION (pfile, discard_comments)
1465                           && (!KNOWN_DIRECTIVE()
1466                               || (list->directive->flags & COMMENTS)))
1467                         save_comment (list, cur_token++, cur,
1468                                       buffer->cur - cur, c);
1469                       else
1470                         flags = PREV_WHITE;
1471
1472                       cur = buffer->cur;
1473                       break;
1474                     }
1475                 }
1476             }
1477           cur_token++;
1478           break;
1479
1480         case '*':
1481           cur_token->type = CPP_MULT;
1482           if (IMMED_TOKEN ())
1483             {
1484               if (PREV_TOKEN_TYPE == CPP_DIV)
1485                 {
1486                   buffer->cur = cur;
1487 #if 0 /* Leave until new lexer in place.  */
1488                   if (cur[-2] != '/')
1489                     cpp_warning (pfile,
1490                                  "comment start '/*' split across lines");
1491 #endif
1492                   if (skip_block_comment (pfile))
1493                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1494                                          "unterminated comment");
1495 #if 0 /* Leave until new lexer in place.  */
1496                   else if (buffer->cur[-2] != '*')
1497                     cpp_warning (pfile,
1498                                  "comment end '*/' split across lines");
1499 #endif
1500                   /* Back up to opening '/'.  */
1501                   cur_token--;
1502                   if (!CPP_OPTION (pfile, discard_comments)
1503                       && (!KNOWN_DIRECTIVE()
1504                           || (list->directive->flags & COMMENTS)))
1505                     save_comment (list, cur_token++, cur,
1506                                   buffer->cur - cur, c);
1507                   else
1508                     flags = PREV_WHITE;
1509
1510                   cur = buffer->cur;
1511                   break;
1512                 }
1513               else if (CPP_OPTION (pfile, cplusplus))
1514                 {
1515                   /* In C++, there are .* and ->* operators.  */
1516                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1517                     BACKUP_TOKEN (CPP_DEREF_STAR);
1518                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1519                     BACKUP_TOKEN (CPP_DOT_STAR);
1520                 }
1521             }
1522           cur_token++;
1523           break;
1524
1525         case '\n':
1526         case '\r':
1527           handle_newline (cur, buffer->rlimit, c);
1528           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1529             {
1530               /* backslash space newline is still treated as backslash-newline;
1531                  we think this is standard conforming, with some reservations
1532                  about actually _using_ the weasel words in C99 5.1.1.2
1533                  (translation phase 1 is allowed to do whatever it wants to
1534                  your input as long as it's documented).  */
1535               if (! IMMED_TOKEN ())
1536                 {
1537                   buffer->cur = cur;
1538                   cpp_warning (pfile,
1539                                "backslash and newline separated by space");
1540                 }
1541
1542               /* Remove the escaped newline.  Then continue to process
1543                  any interrupted name or number.  */
1544               cur_token--;
1545               /* Backslash-newline may not be immediately followed by
1546                  EOF (C99 5.1.1.2).  */
1547               if (cur >= buffer->rlimit)
1548                 {
1549                   cpp_pedwarn (pfile, "backslash-newline at end of file");
1550                   break;
1551                 }
1552               if (IMMED_TOKEN ())
1553                 {
1554                   cur_token--;
1555                   if (cur_token->type == CPP_NAME)
1556                     goto continue_name;
1557                   else if (cur_token->type == CPP_NUMBER)
1558                     goto continue_number;
1559                   cur_token++;
1560                 }
1561               /* Remember whitespace setting.  */
1562               flags = cur_token->flags;
1563               break;
1564             }
1565           else if (MIGHT_BE_DIRECTIVE ())
1566             {
1567               /* "Null directive." C99 6.10.7: A preprocessing
1568                  directive of the form # <new-line> has no effect.
1569
1570                  But it is still a directive, and therefore disappears
1571                  from the output. */
1572               cur_token--;
1573               if (cur_token->flags & PREV_WHITE
1574                   && CPP_WTRADITIONAL (pfile))
1575                 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1576             }
1577
1578           /* Skip vertical space until we have at least one token to
1579              return.  */
1580           if (cur_token != &list->tokens[first_token])
1581             goto out;
1582           list->line = CPP_BUF_LINE (buffer);
1583           break;
1584
1585         case '-':
1586           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1587             REVISE_TOKEN (CPP_MINUS_MINUS);
1588           else
1589             PUSH_TOKEN (CPP_MINUS);
1590           break;
1591
1592         make_hash:
1593         case '#':
1594           /* The digraph flag checking ensures that ## and %:%:
1595              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1596           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1597               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1598             REVISE_TOKEN (CPP_PASTE);
1599           else
1600             PUSH_TOKEN (CPP_HASH);
1601           break;
1602
1603         case ':':
1604           cur_token->type = CPP_COLON;
1605           if (IMMED_TOKEN ())
1606             {
1607               if (PREV_TOKEN_TYPE == CPP_COLON
1608                   && CPP_OPTION (pfile, cplusplus))
1609                 BACKUP_TOKEN (CPP_SCOPE);
1610               else if (CPP_OPTION (pfile, digraphs))
1611                 {
1612                   /* Digraph: "<:" is a '['  */
1613                   if (PREV_TOKEN_TYPE == CPP_LESS)
1614                     BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1615                   /* Digraph: "%:" is a '#'  */
1616                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1617                     {
1618                       (--cur_token)->flags |= DIGRAPH;
1619                       goto make_hash;
1620                     }
1621                 }
1622             }
1623           cur_token++;
1624           break;
1625
1626         case '&':
1627           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1628             REVISE_TOKEN (CPP_AND_AND);
1629           else
1630             PUSH_TOKEN (CPP_AND);
1631           break;
1632
1633         make_or:
1634         case '|':
1635           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1636             REVISE_TOKEN (CPP_OR_OR);
1637           else
1638             PUSH_TOKEN (CPP_OR);
1639           break;
1640
1641         case '+':
1642           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1643             REVISE_TOKEN (CPP_PLUS_PLUS);
1644           else
1645             PUSH_TOKEN (CPP_PLUS);
1646           break;
1647
1648         case '=':
1649             /* This relies on equidistance of "?=" and "?" tokens.  */
1650           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1651             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1652           else
1653             PUSH_TOKEN (CPP_EQ);
1654           break;
1655
1656         case '>':
1657           cur_token->type = CPP_GREATER;
1658           if (IMMED_TOKEN ())
1659             {
1660               if (PREV_TOKEN_TYPE == CPP_GREATER)
1661                 BACKUP_TOKEN (CPP_RSHIFT);
1662               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1663                 BACKUP_TOKEN (CPP_DEREF);
1664               else if (CPP_OPTION (pfile, digraphs))
1665                 {
1666                   /* Digraph: ":>" is a ']'  */
1667                   if (PREV_TOKEN_TYPE == CPP_COLON)
1668                     BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1669                   /* Digraph: "%>" is a '}'  */
1670                   else if (PREV_TOKEN_TYPE == CPP_MOD)
1671                     BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1672                 }
1673             }
1674           cur_token++;
1675           break;
1676
1677         case '<':
1678           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1679             {
1680               REVISE_TOKEN (CPP_LSHIFT);
1681               break;
1682             }
1683           /* Is this the beginning of a header name?  */
1684           if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1685             {
1686               c = '>';  /* Terminator.  */
1687               cur_token->type = CPP_HEADER_NAME;
1688               goto do_parse_string;
1689             }
1690           PUSH_TOKEN (CPP_LESS);
1691           break;
1692
1693         case '%':
1694           /* Digraph: "<%" is a '{'  */
1695           cur_token->type = CPP_MOD;
1696           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1697               && CPP_OPTION (pfile, digraphs))
1698             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1699           cur_token++;
1700           break;
1701
1702         case '?':
1703           if (cur + 1 < buffer->rlimit && *cur == '?'
1704               && _cpp_trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1705             {
1706               /* Handle trigraph.  */
1707               cur++;
1708               switch (*cur++)
1709                 {
1710                 case '(': goto make_open_square;
1711                 case ')': goto make_close_square;
1712                 case '<': goto make_open_brace;
1713                 case '>': goto make_close_brace;
1714                 case '=': goto make_hash;
1715                 case '!': goto make_or;
1716                 case '-': goto make_complement;
1717                 case '/': goto make_backslash;
1718                 case '\'': goto make_xor;
1719                 }
1720             }
1721           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1722             {
1723               /* GNU C++ defines <? and >? operators.  */
1724               if (PREV_TOKEN_TYPE == CPP_LESS)
1725                 {
1726                   REVISE_TOKEN (CPP_MIN);
1727                   break;
1728                 }
1729               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1730                 {
1731                   REVISE_TOKEN (CPP_MAX);
1732                   break;
1733                 }
1734             }
1735           PUSH_TOKEN (CPP_QUERY);
1736           break;
1737
1738         case '.':
1739           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1740               && IMMED_TOKEN ()
1741               && !(cur_token[-1].flags & PREV_WHITE))
1742             {
1743               cur_token -= 2;
1744               PUSH_TOKEN (CPP_ELLIPSIS);
1745             }
1746           else
1747             PUSH_TOKEN (CPP_DOT);
1748           break;
1749
1750         make_complement:
1751         case '~': PUSH_TOKEN (CPP_COMPL); break;
1752         make_xor:
1753         case '^': PUSH_TOKEN (CPP_XOR); break;
1754         make_open_brace:
1755         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1756         make_close_brace:
1757         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1758         make_open_square:
1759         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1760         make_close_square:
1761         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1762         make_backslash:
1763         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1764         case '!': PUSH_TOKEN (CPP_NOT); break;
1765         case ',': PUSH_TOKEN (CPP_COMMA); break;
1766         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1767         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1768         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1769
1770         case '$':
1771           if (CPP_OPTION (pfile, dollars_in_ident))
1772             goto letter;
1773           /* Fall through */
1774         default:
1775           cur_token->val.aux = c;
1776           PUSH_TOKEN (CPP_OTHER);
1777           break;
1778         }
1779     }
1780
1781   /* Run out of token space?  */
1782   if (cur_token == token_limit)
1783     {
1784       list->tokens_used = cur_token - list->tokens;
1785       _cpp_expand_token_space (list, 256);
1786       goto expanded;
1787     }
1788
1789   cur_token->flags = flags;
1790   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1791     {
1792       if (cur > buffer->buf && !is_vspace (cur[-1]))
1793         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1794                                CPP_BUF_COLUMN (buffer, cur),
1795                                "no newline at end of file");
1796       cur_token++->type = CPP_EOF;
1797     }
1798
1799  out:
1800   /* All tokens are allocated, so the memory location is fixed.  */
1801   first = &list->tokens[first_token];
1802
1803   /* Don't complain about the null directive, nor directives in
1804      assembly source: we don't know where the comments are, and # may
1805      introduce assembler pseudo-ops.  Don't complain about invalid
1806      directives in skipped conditional groups (6.10 p4).  */
1807   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1808       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1809     {
1810       if (first[1].type == CPP_NAME)
1811         cpp_error (pfile, "invalid preprocessing directive #%s",
1812                    first[1].val.node->name);
1813       else
1814         cpp_error (pfile, "invalid preprocessing directive");
1815
1816       /* Discard this line to prevent further errors from cc1.  */
1817       _cpp_clear_toklist (list);
1818       goto retry;
1819     }
1820
1821   /* Put EOF at end of known directives.  This covers "directives do
1822      not extend beyond the end of the line (description 6.10 part 2)".  */
1823   if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1824     {
1825       pfile->first_directive_token = first;
1826       cur_token++->type = CPP_EOF;
1827     }
1828
1829   first->flags |= BOL;
1830   if (first_token != 0)
1831     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1832        up the invocation of a function-like macro, new line is
1833        considered a normal white-space character.  */
1834     first->flags |= PREV_WHITE;
1835
1836   buffer->cur = cur;
1837   list->tokens_used = cur_token - list->tokens;
1838   pfile->in_lex_line = 0;
1839 }
1840
1841 /* Write the spelling of a token TOKEN, with any appropriate
1842    whitespace before it, to FP.  PREV is the previous token, which
1843    is used to determine if we need to shove in an extra space in order
1844    to avoid accidental token paste.  If WHITE is 0, do not insert any
1845    leading whitespace.  */
1846 static void
1847 output_token (pfile, fp, token, prev, white)
1848      cpp_reader *pfile;
1849      FILE *fp;
1850      const cpp_token *token, *prev;
1851      int white;
1852 {
1853   if (white)
1854     {
1855       int dummy;
1856
1857       if (token->col && (token->flags & BOL))
1858         {
1859           /* Supply enough whitespace to put this token in its original
1860              column.  Don't bother trying to reconstruct tabs; we can't
1861              get it right in general, and nothing ought to care.  (Yes,
1862              some things do care; the fault lies with them.)  */
1863           unsigned int spaces = token->col - 1;
1864
1865           while (spaces--)
1866             putc (' ', fp);
1867         }
1868       else if (token->flags & PREV_WHITE)
1869         putc (' ', fp);
1870       else
1871       /* Check for and prevent accidental token pasting.
1872          In addition to the cases handled by can_paste, consider
1873
1874          a + ++b - if there is not a space between the + and ++, it
1875          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1876          a valid token.  */
1877         if (prev
1878             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1879                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1880                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1881         putc (' ', fp);
1882     }
1883
1884   switch (TOKEN_SPELL (token))
1885     {
1886     case SPELL_OPERATOR:
1887       {
1888         const unsigned char *spelling;
1889
1890         if (token->flags & DIGRAPH)
1891           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1892         else if (token->flags & NAMED_OP)
1893           goto spell_ident;
1894         else
1895           spelling = TOKEN_NAME (token);
1896
1897         ufputs (spelling, fp);
1898       }
1899       break;
1900
1901     case SPELL_IDENT:
1902       spell_ident:
1903       ufputs (token->val.node->name, fp);
1904       break;
1905
1906     case SPELL_STRING:
1907       {
1908         int left, right, tag;
1909         switch (token->type)
1910           {
1911           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1912           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1913           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1914           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1915           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1916           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1917           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1918           }
1919         if (tag) putc (tag, fp);
1920         if (left) putc (left, fp);
1921         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1922         if (right) putc (right, fp);
1923       }
1924       break;
1925
1926     case SPELL_CHAR:
1927       putc (token->val.aux, fp);
1928       break;
1929
1930     case SPELL_NONE:
1931       /* Placemarker or EOF - no output.  (Macro args are handled
1932          elsewhere.  */
1933       break;
1934     }
1935 }
1936
1937 /* Dump the original user's spelling of argument index ARG_NO to the
1938    macro whose expansion is LIST.  */
1939 static void
1940 dump_param_spelling (fp, list, arg_no)
1941      FILE *fp;
1942      const cpp_toklist *list;
1943      unsigned int arg_no;
1944 {
1945   const U_CHAR *param = list->namebuf;
1946
1947   while (arg_no--)
1948     param += ustrlen (param) + 1;
1949   ufputs (param, fp);
1950 }
1951
1952 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1953 void
1954 cpp_output_list (pfile, fp, list, token)
1955      cpp_reader *pfile;
1956      FILE *fp;
1957      const cpp_toklist *list;
1958      const cpp_token *token;
1959 {
1960   const cpp_token *limit = list->tokens + list->tokens_used;
1961   const cpp_token *prev = 0;
1962   int white = 0;
1963
1964   while (token < limit)
1965     {
1966       /* XXX Find some way we can write macro args from inside
1967          output_token/spell_token.  */
1968       if (token->type == CPP_MACRO_ARG)
1969         {
1970           if (white && token->flags & PREV_WHITE)
1971             putc (' ', fp);
1972           if (token->flags & STRINGIFY_ARG)
1973             putc ('#', fp);
1974           dump_param_spelling (fp, list, token->val.aux);
1975         }
1976       else
1977         output_token (pfile, fp, token, prev, white);
1978       if (token->flags & PASTE_LEFT)
1979         fputs (" ##", fp);
1980       prev = token;
1981       token++;
1982       white = 1;
1983     }
1984 }
1985
1986
1987 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1988    already contain the enough space to hold the token's spelling.
1989    Returns a pointer to the character after the last character
1990    written.  */
1991
1992 static unsigned char *
1993 spell_token (pfile, token, buffer)
1994      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1995      const cpp_token *token;
1996      unsigned char *buffer;
1997 {
1998   switch (TOKEN_SPELL (token))
1999     {
2000     case SPELL_OPERATOR:
2001       {
2002         const unsigned char *spelling;
2003         unsigned char c;
2004
2005         if (token->flags & DIGRAPH)
2006           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
2007         else if (token->flags & NAMED_OP)
2008           goto spell_ident;
2009         else
2010           spelling = TOKEN_NAME (token);
2011
2012         while ((c = *spelling++) != '\0')
2013           *buffer++ = c;
2014       }
2015       break;
2016
2017     case SPELL_IDENT:
2018       spell_ident:
2019       memcpy (buffer, token->val.node->name, token->val.node->length);
2020       buffer += token->val.node->length;
2021       break;
2022
2023     case SPELL_STRING:
2024       {
2025         int left, right, tag;
2026         switch (token->type)
2027           {
2028           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
2029           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
2030           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
2031           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
2032           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
2033           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
2034           default:              left = '\0'; right = '\0'; tag = '\0'; break;
2035           }
2036         if (tag) *buffer++ = tag;
2037         if (left) *buffer++ = left;
2038         memcpy (buffer, token->val.str.text, token->val.str.len);
2039         buffer += token->val.str.len;
2040         if (right) *buffer++ = right;
2041       }
2042       break;
2043
2044     case SPELL_CHAR:
2045       *buffer++ = token->val.aux;
2046       break;
2047
2048     case SPELL_NONE:
2049       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
2050       break;
2051     }
2052
2053   return buffer;
2054 }
2055
2056 /* Macro expansion algorithm.
2057
2058 Macro expansion is implemented by a single-pass algorithm; there are
2059 no rescan passes involved.  cpp_get_token expands just enough to be
2060 able to return a token to the caller, a consequence is that when it
2061 returns the preprocessor can be in a state of mid-expansion.  The
2062 algorithm does not work by fully expanding a macro invocation into
2063 some kind of token list, and then returning them one by one.
2064
2065 Our expansion state is recorded in a context stack.  We start out with
2066 a single context on the stack, let's call it base context.  This
2067 consists of the token list returned by lex_line that forms the next
2068 logical line in the source file.
2069
2070 The current level in the context stack is stored in the cur_context
2071 member of the cpp_reader structure.  The context it references keeps,
2072 amongst other things, a count of how many tokens form that context and
2073 our position within those tokens.
2074
2075 Fundamentally, calling cpp_get_token will return the next token from
2076 the current context.  If we're at the end of the current context, that
2077 context is popped from the stack first, unless it is the base context,
2078 in which case the next logical line is lexed from the source file.
2079
2080 However, before returning the token, if it is a CPP_NAME token
2081 _cpp_get_token checks to see if it is a macro and if it is enabled.
2082 Each time it encounters a macro name, it calls push_macro_context.
2083 This function checks that the macro should be expanded (with
2084 is_macro_enabled), and if so pushes a new macro context on the stack
2085 which becomes the current context.  It then loops back to read the
2086 first token of the macro context.
2087
2088 A macro context basically consists of the token list representing the
2089 macro's replacement list, which was saved in the hash table by
2090 save_macro_expansion when its #define statement was parsed.  If the
2091 macro is function-like, it also contains the tokens that form the
2092 arguments to the macro.  I say more about macro arguments below, but
2093 for now just saying that each argument is a set of pointers to tokens
2094 is enough.
2095
2096 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2097 token.  This represents an argument passed to the macro, with the
2098 argument number stored in the token's AUX field.  The argument should
2099 be substituted, this is achieved by pushing an "argument context".  An
2100 argument context is just refers to the tokens forming the argument,
2101 which are obtained directly from the macro context.  The STRINGIFY
2102 flag on a CPP_MACRO_ARG token indicates that the argument should be
2103 stringified.
2104
2105 Here's a few simple rules the context stack obeys:-
2106
2107   1) The lex_line token list is always context zero.
2108
2109   2) Context 1, if it exists, must be a macro context.
2110
2111   3) An argument context can only appear above a macro context.
2112
2113   4) A macro context can appear above the base context, another macro
2114   context, or an argument context.
2115
2116   5) These imply that the minimal level of an argument context is 2.
2117
2118 The only tricky thing left is ensuring that macros are enabled and
2119 disabled correctly.  The algorithm controls macro expansion by the
2120 level of the context a token is taken from in the context stack.  If a
2121 token is taken from a level equal to no_expand_level (a member of
2122 struct cpp_reader), no expansion is performed.
2123
2124 When popping a context off the stack, if no_expand_level equals the
2125 level of the popped context, it is reduced by one to match the new
2126 context level, so that expansion is still disabled.  It does not
2127 increase if a context is pushed, though.  It starts out life as
2128 UINT_MAX, which has the effect that initially macro expansion is
2129 enabled.  I explain how this mechanism works below.
2130
2131 The standard requires:-
2132
2133   1) Arguments to be fully expanded before substitution.
2134
2135   2) Stringified arguments to not be expanded, nor the tokens
2136   immediately surrounding a ## operator.
2137
2138   3) Continual rescanning until there are no more macros left to
2139   replace.
2140
2141   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2142   expanded again during later rescans.  This prevents infinite
2143   recursion.
2144
2145 The first thing to observe is that stage 3) is mostly redundant.
2146 Since a macro is disabled once it has been expanded, how can a rescan
2147 find an unexpanded macro name?  There are only two cases where this is
2148 possible:-
2149
2150   a) If the macro name results from a token paste operation.
2151
2152   b) If the macro in question is a function-like macro that hasn't
2153   already been expanded because previously there was not the required
2154   '(' token immediately following it.  This is only possible when an
2155   argument is substituted, and after substitution the last token of
2156   the argument can bind with a parenthesis appearing in the tokens
2157   following the substitution.  Note that if the '(' appears within the
2158   argument, the ')' must too, as expanding macro arguments cannot
2159   "suck in" tokens outside the argument.
2160
2161 So we tackle this as follows.  When parsing the macro invocation for
2162 arguments, we record the tokens forming each argument as a list of
2163 pointers to those tokens.  We do not expand any tokens that are "raw",
2164 i.e. directly from the macro invocation, but other tokens that come
2165 from (nested) argument substitution are fully expanded.
2166
2167 This is achieved by setting the no_expand_level to that of the macro
2168 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2169 forming an argument, because parse_args (indirectly) calls
2170 get_raw_token which automatically pushes argument contexts and traces
2171 into them.  Since these contexts are at a higher level than the
2172 no_expand_level, they get fully macro expanded.
2173
2174 "Raw" and non-raw tokens are separated in arguments by null pointers,
2175 with the policy that the initial state of an argument is raw.  If the
2176 first token is not raw, it should be preceded by a null pointer.  When
2177 tracing through the tokens of an argument context, each time
2178 get_raw_token encounters a null pointer, it toggles the flag
2179 CONTEXT_RAW.
2180
2181 This flag, when set, indicates to is_macro_disabled that we are
2182 reading raw tokens which should be macro-expanded.  Similarly, if
2183 clear, is_macro_disabled suppresses re-expansion.
2184
2185 It's probably time for an example.
2186
2187 #define hash #
2188 #define str(x) #x
2189 #define xstr(y) str(y hash)
2190 str(hash)                       // "hash"
2191 xstr(hash)                      // "# hash"
2192
2193 In the invocation of str, parse_args turns off macro expansion and so
2194 parses the argument as <hash>.  This is the only token (pointer)
2195 passed as the argument to str.  Since <hash> is raw there is no need
2196 for an initial null pointer.  stringify_arg is called from
2197 get_raw_token when tracing through the expansion of str, since the
2198 argument has the STRINGIFY flag set.  stringify_arg turns off
2199 macro_expansion by setting the no_expand_level to that of the argument
2200 context.  Thus it gets the token <hash> and stringifies it to "hash"
2201 correctly.
2202
2203 Similary xstr is passed <hash>.  However, when parse_args is parsing
2204 the invocation of str() in xstr's expansion, get_raw_token encounters
2205 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2206 an argument context, and enters the tokens of the argument,
2207 i.e. <hash>.  This is at a higher context level than parse_args
2208 disabled, and so is_macro_disabled permits expansion of it and a macro
2209 context is pushed on top of the argument context.  This contains the
2210 <#> token, and the end result is that <hash> is macro expanded.
2211 However, after popping off the argument context, the <hash> of xstr's
2212 expansion does not get macro expanded because we're back at the
2213 no_expand_level.  The end result is that the argument passed to str is
2214 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2215 raw, <#> is not raw, but then <hash> is.
2216
2217 */
2218
2219
2220 /* Free the storage allocated for macro arguments.  */
2221 static void
2222 free_macro_args (args)
2223      macro_args *args;
2224 {
2225   if (args->tokens)
2226     free ((PTR) args->tokens);
2227   free (args->ends);
2228   free (args);
2229 }
2230
2231 /* Determines if a macro has been already used (and is therefore
2232    disabled).  */
2233 static int
2234 is_macro_disabled (pfile, expansion, token)
2235      cpp_reader *pfile;
2236      const cpp_toklist *expansion;
2237      const cpp_token *token;
2238 {
2239   cpp_context *context = CURRENT_CONTEXT (pfile);
2240
2241   /* Arguments on either side of ## are inserted in place without
2242      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2243      occurs during a later rescan pass.  The effect is that we expand
2244      iff we would as part of the macro's expansion list, so we should
2245      drop to the macro's context.  */
2246   if (IS_ARG_CONTEXT (context))
2247     {
2248       if (token->flags & PASTED)
2249         context--;
2250       else if (!(context->flags & CONTEXT_RAW))
2251         return 1;
2252       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2253         context--;
2254     }
2255
2256   /* Have we already used this macro?  */
2257   while (context->level > 0)
2258     {
2259       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2260         return 1;
2261       /* Raw argument tokens are judged based on the token list they
2262          came from.  */
2263       if (context->flags & CONTEXT_RAW)
2264         context = pfile->contexts + context->level;
2265       else
2266         context--;
2267     }
2268
2269   /* Function-like macros may be disabled if the '(' is not in the
2270      current context.  We check this without disrupting the context
2271      stack.  */
2272   if (expansion->paramc >= 0)
2273     {
2274       const cpp_token *next;
2275       unsigned int prev_nme;
2276
2277       context = CURRENT_CONTEXT (pfile);
2278       /* Drop down any contexts we're at the end of: the '(' may
2279          appear in lower macro expansions, or in the rest of the file.  */
2280       while (context->posn == context->count && context > pfile->contexts)
2281         {
2282           context--;
2283           /* If we matched, we are disabled, as we appear in the
2284              expansion of each macro we meet.  */
2285           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2286             return 1;
2287         }
2288
2289       prev_nme = pfile->no_expand_level;
2290       pfile->no_expand_level = context - pfile->contexts;
2291       next = _cpp_get_token (pfile);
2292       restore_macro_expansion (pfile, prev_nme);
2293       if (next->type != CPP_OPEN_PAREN)
2294         {
2295           _cpp_push_token (pfile, next);
2296           if (CPP_WTRADITIONAL (pfile))
2297             cpp_warning (pfile,
2298          "function macro %s must be used with arguments in traditional C",
2299                          token->val.node->name);
2300           return 1;
2301         }
2302     }
2303
2304   return 0;
2305 }
2306
2307 /* Add a token to the set of tokens forming the arguments to the macro
2308    being parsed in parse_args.  */
2309 static void
2310 save_token (args, token)
2311      macro_args *args;
2312      const cpp_token *token;
2313 {
2314   if (args->used == args->capacity)
2315     {
2316       args->capacity += args->capacity + 100;
2317       args->tokens = (const cpp_token **)
2318         xrealloc ((PTR) args->tokens,
2319                   args->capacity * sizeof (const cpp_token *));
2320     }
2321   args->tokens[args->used++] = token;
2322 }
2323
2324 /* Take and save raw tokens until we finish one argument.  Empty
2325    arguments are saved as a single CPP_PLACEMARKER token.  */
2326 static const cpp_token *
2327 parse_arg (pfile, var_args, paren_context, args, pcount)
2328      cpp_reader *pfile;
2329      int var_args;
2330      unsigned int paren_context;
2331      macro_args *args;
2332      unsigned int *pcount;
2333 {
2334   const cpp_token *token;
2335   unsigned int paren = 0, count = 0;
2336   int raw, was_raw = 1;
2337
2338   for (count = 0;; count++)
2339     {
2340       token = _cpp_get_token (pfile);
2341
2342       switch (token->type)
2343         {
2344         default:
2345           break;
2346
2347         case CPP_OPEN_PAREN:
2348           paren++;
2349           break;
2350
2351         case CPP_CLOSE_PAREN:
2352           if (paren-- != 0)
2353             break;
2354           goto out;
2355
2356         case CPP_COMMA:
2357           /* Commas are not terminators within parantheses or var_args.  */
2358           if (paren || var_args)
2359             break;
2360           goto out;
2361
2362         case CPP_EOF:           /* Error reported by caller.  */
2363           goto out;
2364         }
2365
2366       raw = pfile->cur_context <= paren_context;
2367       if (raw != was_raw)
2368         {
2369           was_raw = raw;
2370           save_token (args, 0);
2371           count++;
2372         }
2373       save_token (args, token);
2374     }
2375
2376  out:
2377   if (count == 0)
2378     {
2379       /* Duplicate the placemarker.  Then we can set its flags and
2380          position and safely be using more than one.  */
2381       save_token (args, duplicate_token (pfile, &placemarker_token));
2382       count++;
2383     }
2384
2385   *pcount = count;
2386   return token;
2387 }
2388
2389 /* This macro returns true if the argument starting at offset O of arglist
2390    A is empty - that is, it's either a single PLACEMARKER token, or a null
2391    pointer followed by a PLACEMARKER.  */
2392
2393 #define empty_argument(A, O) \
2394  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2395                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2396
2397 /* Parse the arguments making up a macro invocation.  Nested arguments
2398    are automatically macro expanded, but immediate macros are not
2399    expanded; this enables e.g. operator # to work correctly.  Returns
2400    non-zero on error.  */
2401 static int
2402 parse_args (pfile, hp, args)
2403      cpp_reader *pfile;
2404      cpp_hashnode *hp;
2405      macro_args *args;
2406 {
2407   const cpp_token *token;
2408   const cpp_toklist *macro;
2409   unsigned int total = 0;
2410   unsigned int paren_context = pfile->cur_context;
2411   int argc = 0;
2412
2413   macro = hp->value.expansion;
2414   do
2415     {
2416       unsigned int count;
2417
2418       token = parse_arg (pfile, (argc + 1 == macro->paramc
2419                                  && (macro->flags & VAR_ARGS)),
2420                          paren_context, args, &count);
2421       if (argc < macro->paramc)
2422         {
2423           total += count;
2424           args->ends[argc] = total;
2425         }
2426       argc++;
2427     }
2428   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2429
2430   if (token->type == CPP_EOF)
2431     {
2432       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2433       return 1;
2434     }
2435   else if (argc < macro->paramc)
2436     {
2437       /* A rest argument is allowed to not appear in the invocation at all.
2438          e.g. #define debug(format, args...) ...
2439          debug("string");
2440          This is exactly the same as if the rest argument had received no
2441          tokens - debug("string",);  This extension is deprecated.  */
2442
2443       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2444         {
2445           /* Duplicate the placemarker.  Then we can set its flags and
2446              position and safely be using more than one.  */
2447           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2448           pm->flags = VOID_REST;
2449           save_token (args, pm);
2450           args->ends[argc] = total + 1;
2451
2452           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2453             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2454
2455           return 0;
2456         }
2457       else
2458         {
2459           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2460           return 1;
2461         }
2462     }
2463   /* An empty argument to an empty function-like macro is fine.  */
2464   else if (argc > macro->paramc
2465            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2466     {
2467       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2468       return 1;
2469     }
2470
2471   return 0;
2472 }
2473
2474 /* Adds backslashes before all backslashes and double quotes appearing
2475    in strings.  Non-printable characters are converted to octal.  */
2476 static U_CHAR *
2477 quote_string (dest, src, len)
2478      U_CHAR *dest;
2479      const U_CHAR *src;
2480      unsigned int len;
2481 {
2482   while (len--)
2483     {
2484       U_CHAR c = *src++;
2485
2486       if (c == '\\' || c == '"')
2487         {
2488           *dest++ = '\\';
2489           *dest++ = c;
2490         }
2491       else
2492         {
2493           if (ISPRINT (c))
2494             *dest++ = c;
2495           else
2496             {
2497               sprintf ((char *) dest, "\\%03o", c);
2498               dest += 4;
2499             }
2500         }
2501     }
2502
2503   return dest;
2504 }
2505
2506 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2507    CPP_STRING token containing TEXT in quoted form.  */
2508 static cpp_token *
2509 make_string_token (token, text, len)
2510      cpp_token *token;
2511      const U_CHAR *text;
2512      unsigned int len;
2513 {
2514   U_CHAR *buf;
2515
2516   buf = (U_CHAR *) xmalloc (len * 4);
2517   token->type = CPP_STRING;
2518   token->flags = 0;
2519   token->val.str.text = buf;
2520   token->val.str.len = quote_string (buf, text, len) - buf;
2521   return token;
2522 }
2523
2524 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2525    evaluating to NUMBER.  */
2526 static cpp_token *
2527 alloc_number_token (pfile, number)
2528      cpp_reader *pfile;
2529      int number;
2530 {
2531   cpp_token *result;
2532   char *buf;
2533
2534   result = get_temp_token (pfile);
2535   buf = xmalloc (20);
2536   sprintf (buf, "%d", number);
2537
2538   result->type = CPP_NUMBER;
2539   result->flags = 0;
2540   result->val.str.text = (U_CHAR *) buf;
2541   result->val.str.len = strlen (buf);
2542   return result;
2543 }
2544
2545 /* Returns a temporary token from the temporary token store of PFILE.  */
2546 static cpp_token *
2547 get_temp_token (pfile)
2548      cpp_reader *pfile;
2549 {
2550   if (pfile->temp_used == pfile->temp_alloced)
2551     {
2552       if (pfile->temp_used == pfile->temp_cap)
2553         {
2554           pfile->temp_cap += pfile->temp_cap + 20;
2555           pfile->temp_tokens = (cpp_token **) xrealloc
2556             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2557         }
2558       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2559         (sizeof (cpp_token));
2560     }
2561
2562   return pfile->temp_tokens[pfile->temp_used++];
2563 }
2564
2565 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2566 static void
2567 release_temp_tokens (pfile)
2568      cpp_reader *pfile;
2569 {
2570   while (pfile->temp_used)
2571     {
2572       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2573
2574       if (TOKEN_SPELL (token) == SPELL_STRING)
2575         {
2576           free ((char *) token->val.str.text);
2577           token->val.str.text = 0;
2578         }
2579     }
2580 }
2581
2582 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2583 void
2584 _cpp_free_temp_tokens (pfile)
2585      cpp_reader *pfile;
2586 {
2587   if (pfile->temp_tokens)
2588     {
2589       /* It is possible, though unlikely (looking for '(' of a funlike
2590          macro into EOF), that we haven't released the tokens yet.  */
2591       release_temp_tokens (pfile);
2592       while (pfile->temp_alloced)
2593         free (pfile->temp_tokens[--pfile->temp_alloced]);
2594       free (pfile->temp_tokens);
2595     }
2596
2597   if (pfile->date)
2598     {
2599       free ((char *) pfile->date->val.str.text);
2600       free (pfile->date);
2601       free ((char *) pfile->time->val.str.text);
2602       free (pfile->time);
2603     }
2604 }
2605
2606 /* Copy TOKEN into a temporary token from PFILE's store.  */
2607 static cpp_token *
2608 duplicate_token (pfile, token)
2609      cpp_reader *pfile;
2610      const cpp_token *token;
2611 {
2612   cpp_token *result = get_temp_token (pfile);
2613
2614   *result = *token;
2615   if (TOKEN_SPELL (token) == SPELL_STRING)
2616     {
2617       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2618       memcpy (buff, token->val.str.text, token->val.str.len);
2619       result->val.str.text = buff;
2620     }
2621   return result;
2622 }
2623
2624 /* Determine whether two tokens can be pasted together, and if so,
2625    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2626    be pasted, or the appropriate type for the merged token if they
2627    can.  */
2628 static enum cpp_ttype
2629 can_paste (pfile, token1, token2, digraph)
2630      cpp_reader * pfile;
2631      const cpp_token *token1, *token2;
2632      int* digraph;
2633 {
2634   enum cpp_ttype a = token1->type, b = token2->type;
2635   int cxx = CPP_OPTION (pfile, cplusplus);
2636
2637   /* Treat named operators as if they were ordinary NAMEs.  */
2638   if (token1->flags & NAMED_OP)
2639     a = CPP_NAME;
2640   if (token2->flags & NAMED_OP)
2641     b = CPP_NAME;
2642
2643   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2644     return a + (CPP_EQ_EQ - CPP_EQ);
2645
2646   switch (a)
2647     {
2648     case CPP_GREATER:
2649       if (b == a) return CPP_RSHIFT;
2650       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2651       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2652       break;
2653     case CPP_LESS:
2654       if (b == a) return CPP_LSHIFT;
2655       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2656       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2657       if (CPP_OPTION (pfile, digraphs))
2658         {
2659           if (b == CPP_COLON)
2660             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2661           if (b == CPP_MOD)
2662             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2663         }
2664       break;
2665
2666     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2667     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2668     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2669
2670     case CPP_MINUS:
2671       if (b == a)               return CPP_MINUS_MINUS;
2672       if (b == CPP_GREATER)     return CPP_DEREF;
2673       break;
2674     case CPP_COLON:
2675       if (b == a && cxx)        return CPP_SCOPE;
2676       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2677         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2678       break;
2679
2680     case CPP_MOD:
2681       if (CPP_OPTION (pfile, digraphs))
2682         {
2683           if (b == CPP_GREATER)
2684             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2685           if (b == CPP_COLON)
2686             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2687         }
2688       break;
2689     case CPP_DEREF:
2690       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2691       break;
2692     case CPP_DOT:
2693       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2694       if (b == CPP_NUMBER)      return CPP_NUMBER;
2695       break;
2696
2697     case CPP_HASH:
2698       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2699         /* %:%: digraph */
2700         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2701       break;
2702
2703     case CPP_NAME:
2704       if (b == CPP_NAME)        return CPP_NAME;
2705       if (b == CPP_NUMBER
2706           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2707       if (b == CPP_CHAR
2708           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2709       if (b == CPP_STRING
2710           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2711       break;
2712
2713     case CPP_NUMBER:
2714       if (b == CPP_NUMBER)      return CPP_NUMBER;
2715       if (b == CPP_NAME)        return CPP_NUMBER;
2716       if (b == CPP_DOT)         return CPP_NUMBER;
2717       /* Numbers cannot have length zero, so this is safe.  */
2718       if ((b == CPP_PLUS || b == CPP_MINUS)
2719           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2720         return CPP_NUMBER;
2721       break;
2722
2723     case CPP_OTHER:
2724       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2725         {
2726           if (b == CPP_NAME)    return CPP_NAME;
2727           if (b == CPP_STRING)  return CPP_OSTRING;
2728         }
2729
2730     default:
2731       break;
2732     }
2733
2734   return CPP_EOF;
2735 }
2736
2737 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2738 static const cpp_token *
2739 maybe_paste_with_next (pfile, token)
2740      cpp_reader *pfile;
2741      const cpp_token *token;
2742 {
2743   cpp_token *pasted;
2744   const cpp_token *second;
2745   cpp_context *context = CURRENT_CONTEXT (pfile);
2746
2747   /* Is this token on the LHS of ## ? */
2748
2749   while ((token->flags & PASTE_LEFT)
2750          || ((context->flags & CONTEXT_PASTEL)
2751              && context->posn == context->count))
2752     {
2753       /* Suppress macro expansion for next token, but don't conflict
2754          with the other method of suppression.  If it is an argument,
2755          macro expansion within the argument will still occur.  */
2756       pfile->paste_level = pfile->cur_context;
2757       second = _cpp_get_token (pfile);
2758       pfile->paste_level = 0;
2759
2760       /* Ignore placemarker argument tokens (cannot be from an empty
2761          macro since macros are not expanded).  */
2762       if (token->type == CPP_PLACEMARKER)
2763         pasted = duplicate_token (pfile, second);
2764       else if (second->type == CPP_PLACEMARKER)
2765         {
2766           /* GCC has special extended semantics for , ## b where b is
2767              a varargs parameter: the comma disappears if b was given
2768              no actual arguments (not merely if b is an empty
2769              argument).  */
2770           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2771             pasted = duplicate_token (pfile, second);
2772           else
2773             pasted = duplicate_token (pfile, token);
2774         }
2775       else
2776         {
2777           int digraph = 0;
2778           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2779
2780           if (type == CPP_EOF)
2781             {
2782               if (CPP_OPTION (pfile, warn_paste))
2783                 {
2784                   /* Do not complain about , ## <whatever> if
2785                      <whatever> came from a variable argument, because
2786                      the author probably intended the ## to trigger
2787                      the special extended semantics (see above).  */
2788                   if (token->type == CPP_COMMA
2789                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2790                       && ON_REST_ARG (CURRENT_CONTEXT (pfile)))
2791                     /* no warning */;
2792                   else
2793                     cpp_warning (pfile,
2794                         "pasting would not give a valid preprocessing token");
2795                 }
2796               _cpp_push_token (pfile, second);
2797               return token;
2798             }
2799
2800           if (type == CPP_NAME || type == CPP_NUMBER)
2801             {
2802               /* Join spellings.  */
2803               U_CHAR *buf, *end;
2804
2805               pasted = get_temp_token (pfile);
2806               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2807               end = spell_token (pfile, token, buf);
2808               end = spell_token (pfile, second, end);
2809               *end = '\0';
2810
2811               if (type == CPP_NAME)
2812                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2813               else
2814                 {
2815                   pasted->val.str.text = uxstrdup (buf);
2816                   pasted->val.str.len = end - buf;
2817                 }
2818             }
2819           else if (type == CPP_WCHAR || type == CPP_WSTRING
2820                    || type == CPP_OSTRING)
2821             pasted = duplicate_token (pfile, second);
2822           else
2823             {
2824               pasted = get_temp_token (pfile);
2825               pasted->val.integer = 0;
2826             }
2827
2828           pasted->type = type;
2829           pasted->flags = digraph ? DIGRAPH : 0;
2830
2831           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2832             {
2833               pasted->type = pasted->val.node->value.code;
2834               pasted->flags |= NAMED_OP;
2835             }
2836         }
2837
2838       /* The pasted token gets the whitespace flags and position of the
2839          first token, the PASTE_LEFT flag of the second token, plus the
2840          PASTED flag to indicate it is the result of a paste.  However, we
2841          want to preserve the DIGRAPH flag.  */
2842       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2843       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2844                         | (second->flags & PASTE_LEFT) | PASTED);
2845       pasted->col = token->col;
2846       pasted->line = token->line;
2847
2848       /* See if there is another token to be pasted onto the one we just
2849          constructed.  */
2850       token = pasted;
2851       context = CURRENT_CONTEXT (pfile);
2852       /* and loop */
2853     }
2854   return token;
2855 }
2856
2857 /* Convert a token sequence to a single string token according to the
2858    rules of the ISO C #-operator.  */
2859 #define INIT_SIZE 200
2860 static cpp_token *
2861 stringify_arg (pfile, token)
2862      cpp_reader *pfile;
2863      const cpp_token *token;
2864 {
2865   cpp_token *result;
2866   unsigned char *main_buf;
2867   unsigned int prev_value, backslash_count = 0;
2868   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2869
2870   push_arg_context (pfile, token);
2871   prev_value  = prevent_macro_expansion (pfile);
2872   main_buf = (unsigned char *) xmalloc (buf_cap);
2873
2874   result = get_temp_token (pfile);
2875   ASSIGN_FLAGS_AND_POS (result, token);
2876
2877   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2878     {
2879       int escape;
2880       unsigned char *buf;
2881       unsigned int len = TOKEN_LEN (token);
2882
2883       if (token->type == CPP_PLACEMARKER)
2884         continue;
2885
2886       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2887                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2888       if (escape)
2889         len *= 4 + 1;
2890
2891       if (buf_used + len > buf_cap)
2892         {
2893           buf_cap = buf_used + len + INIT_SIZE;
2894           main_buf = xrealloc (main_buf, buf_cap);
2895         }
2896
2897       if (whitespace && (token->flags & PREV_WHITE))
2898         main_buf[buf_used++] = ' ';
2899
2900       if (escape)
2901         buf = (unsigned char *) xmalloc (len);
2902       else
2903         buf = main_buf + buf_used;
2904
2905       len = spell_token (pfile, token, buf) - buf;
2906       if (escape)
2907         {
2908           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2909           free (buf);
2910         }
2911       else
2912         buf_used += len;
2913
2914       whitespace = 1;
2915       if (token->type == CPP_BACKSLASH)
2916         backslash_count++;
2917       else
2918         backslash_count = 0;
2919     }
2920
2921   /* Ignore the final \ of invalid string literals.  */
2922   if (backslash_count & 1)
2923     {
2924       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2925       buf_used--;
2926     }
2927
2928   result->type = CPP_STRING;
2929   result->val.str.text = main_buf;
2930   result->val.str.len = buf_used;
2931   restore_macro_expansion (pfile, prev_value);
2932   return result;
2933 }
2934
2935 /* Allocate more room on the context stack of PFILE.  */
2936 static void
2937 expand_context_stack (pfile)
2938      cpp_reader *pfile;
2939 {
2940   pfile->context_cap += pfile->context_cap + 20;
2941   pfile->contexts = (cpp_context *)
2942     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2943 }
2944
2945 /* Push the context of macro NODE onto the context stack.  TOKEN is
2946    the CPP_NAME token invoking the macro.  */
2947 static int
2948 push_macro_context (pfile, token)
2949      cpp_reader *pfile;
2950      const cpp_token *token;
2951 {
2952   unsigned char orig_flags;
2953   macro_args *args;
2954   cpp_context *context;
2955   cpp_hashnode *node = token->val.node;
2956
2957   /* Token's flags may change when parsing args containing a nested
2958      invocation of this macro.  */
2959   orig_flags = token->flags & (PREV_WHITE | BOL);
2960   args = 0;
2961   if (node->value.expansion->paramc >= 0)
2962     {
2963       unsigned int error, prev_nme;
2964
2965       /* Allocate room for the argument contexts, and parse them.  */
2966       args  = (macro_args *) xmalloc (sizeof (macro_args));
2967       args->ends = (unsigned int *)
2968         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2969       args->tokens = 0;
2970       args->capacity = 0;
2971       args->used = 0;
2972       args->level = pfile->cur_context;
2973
2974       prev_nme = prevent_macro_expansion (pfile);
2975       pfile->args = args;
2976       error = parse_args (pfile, node, args);
2977       pfile->args = 0;
2978       restore_macro_expansion (pfile, prev_nme);
2979       if (error)
2980         {
2981           free_macro_args (args);
2982           return 1;
2983         }
2984     }
2985
2986   /* Now push its context.  */
2987   pfile->cur_context++;
2988   if (pfile->cur_context == pfile->context_cap)
2989     expand_context_stack (pfile);
2990
2991   context = CURRENT_CONTEXT (pfile);
2992   context->u.list = node->value.expansion;
2993   context->args = args;
2994   context->posn = 0;
2995   context->count = context->u.list->tokens_used;
2996   context->level = pfile->cur_context;
2997   context->flags = 0;
2998   context->pushed_token = 0;
2999
3000   /* Set the flags of the first token.  We know there must
3001      be one, empty macros are a single placemarker token.  */
3002   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
3003
3004   return 0;
3005 }
3006
3007 /* Push an argument to the current macro onto the context stack.
3008    TOKEN is the MACRO_ARG token representing the argument expansion.  */
3009 static void
3010 push_arg_context (pfile, token)
3011      cpp_reader *pfile;
3012      const cpp_token *token;
3013 {
3014   cpp_context *context;
3015   macro_args *args;
3016
3017   pfile->cur_context++;
3018   if (pfile->cur_context == pfile->context_cap)
3019       expand_context_stack (pfile);
3020
3021   context = CURRENT_CONTEXT (pfile);
3022   args = context[-1].args;
3023
3024   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
3025   context->u.arg = args->tokens + context->count;
3026   context->count = args->ends[token->val.aux] - context->count;
3027   context->args = 0;
3028   context->posn = 0;
3029   context->level = args->level;
3030   context->flags = CONTEXT_ARG | CONTEXT_RAW;
3031   context->pushed_token = 0;
3032
3033   /* Set the flags of the first token.  There is one.  */
3034   {
3035     const cpp_token *first = context->u.arg[0];
3036     if (!first)
3037       first = context->u.arg[1];
3038
3039     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
3040                           token->flags & (PREV_WHITE | BOL));
3041   }
3042
3043   if (token->flags & PASTE_LEFT)
3044     context->flags |= CONTEXT_PASTEL;
3045   if (pfile->paste_level)
3046     context->flags |= CONTEXT_PASTER;
3047 }
3048
3049 /* "Unget" a token.  It is effectively inserted in the token queue and
3050    will be returned by the next call to get_raw_token.  */
3051 void
3052 _cpp_push_token (pfile, token)
3053      cpp_reader *pfile;
3054      const cpp_token *token;
3055 {
3056   cpp_context *context = CURRENT_CONTEXT (pfile);
3057
3058   if (context->posn > 0)
3059     {
3060       const cpp_token *prev;
3061       if (IS_ARG_CONTEXT (context))
3062         prev = context->u.arg[context->posn - 1];
3063       else
3064         prev = &context->u.list->tokens[context->posn - 1];
3065
3066       if (prev == token)
3067         {
3068           context->posn--;
3069           return;
3070         }
3071     }
3072
3073   if (context->pushed_token)
3074     cpp_ice (pfile, "two tokens pushed in a row");
3075   if (token->type != CPP_EOF)
3076     context->pushed_token = token;
3077   /* Don't push back a directive's CPP_EOF, step back instead.  */
3078   else if (pfile->cur_context == 0)
3079     pfile->contexts[0].posn--;
3080 }
3081
3082 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
3083    introducing the directive.  */
3084 static void
3085 process_directive (pfile, token)
3086      cpp_reader *pfile;
3087      const cpp_token *token;
3088 {
3089   const struct directive *d = pfile->token_list.directive;
3090   int prev_nme = 0;
3091
3092   /* Skip over the directive name.  */
3093   if (token[1].type == CPP_NAME)
3094     _cpp_get_raw_token (pfile);
3095   else if (token[1].type != CPP_NUMBER)
3096     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
3097
3098   if (! (d->flags & EXPAND))
3099     prev_nme = prevent_macro_expansion (pfile);
3100   (void) (*d->handler) (pfile);
3101   if (! (d->flags & EXPAND))
3102     restore_macro_expansion (pfile, prev_nme);
3103   _cpp_skip_rest_of_line (pfile);
3104 }
3105
3106 /* The external interface to return the next token.  All macro
3107    expansion and directive processing is handled internally, the
3108    caller only ever sees the output after preprocessing.  */
3109 const cpp_token *
3110 cpp_get_token (pfile)
3111      cpp_reader *pfile;
3112 {
3113   const cpp_token *token;
3114   /* Loop till we hit a non-directive, non-placemarker token.  */
3115   for (;;)
3116     {
3117       token = _cpp_get_token (pfile);
3118
3119       if (token->type == CPP_PLACEMARKER)
3120         continue;
3121
3122       if (token->type == CPP_HASH && token->flags & BOL
3123           && pfile->token_list.directive)
3124         {
3125           process_directive (pfile, token);
3126           continue;
3127         }
3128
3129       return token;
3130     }
3131 }
3132
3133 /* The internal interface to return the next token.  There are two
3134    differences between the internal and external interfaces: the
3135    internal interface may return a PLACEMARKER token, and it does not
3136    process directives.  */
3137 const cpp_token *
3138 _cpp_get_token (pfile)
3139      cpp_reader *pfile;
3140 {
3141   const cpp_token *token, *old_token;
3142   cpp_hashnode *node;
3143
3144   /* Loop until we hit a non-macro token.  */
3145   for (;;)
3146     {
3147       token = get_raw_token (pfile);
3148
3149       /* Short circuit EOF. */
3150       if (token->type == CPP_EOF)
3151         return token;
3152
3153       /* If we are skipping... */
3154       if (pfile->skipping)
3155         {
3156           /* we still have to process directives,  */
3157           if (pfile->token_list.directive)
3158             return token;
3159
3160           /* but everything else is ignored.  */
3161           _cpp_skip_rest_of_line (pfile);
3162           continue;
3163         }
3164
3165       /* If there's a potential control macro and we get here, then that
3166          #ifndef didn't cover the entire file and its argument shouldn't
3167          be taken as a control macro.  */
3168       pfile->potential_control_macro = 0;
3169
3170       /* If we are rescanning preprocessed input, no macro expansion or
3171          token pasting may occur.  */
3172       if (CPP_OPTION (pfile, preprocessed))
3173         return token;
3174
3175       old_token = token;
3176
3177       /* See if there's a token to paste with this one.  */
3178       if (!pfile->paste_level)
3179         token = maybe_paste_with_next (pfile, token);
3180
3181       /* If it isn't a macro, return it now.  */
3182       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3183         return token;
3184
3185       /* Is macro expansion disabled in general, or are we in the
3186          middle of a token paste, or was this token just pasted?
3187          (Note we don't check token->flags & PASTED, because that
3188          counts tokens that were pasted at some point in the past,
3189          we're only interested in tokens that were pasted by this call
3190          to maybe_paste_with_next.)  */
3191       if (pfile->no_expand_level == pfile->cur_context
3192           || pfile->paste_level
3193           || (token != old_token
3194               && pfile->no_expand_level + 1 == pfile->cur_context))
3195         return token;
3196
3197       node = token->val.node;
3198       if (node->type != T_MACRO)
3199         return special_symbol (pfile, node, token);
3200
3201       if (is_macro_disabled (pfile, node->value.expansion, token))
3202         return token;
3203
3204       if (push_macro_context (pfile, token))
3205         return token;
3206       /* else loop */
3207     }
3208 }
3209
3210 /* Returns the next raw token, i.e. without performing macro
3211    expansion.  Argument contexts are automatically entered.  */
3212 static const cpp_token *
3213 get_raw_token (pfile)
3214      cpp_reader *pfile;
3215 {
3216   const cpp_token *result;
3217   cpp_context *context;
3218
3219   for (;;)
3220     {
3221       context = CURRENT_CONTEXT (pfile);
3222       if (context->pushed_token)
3223         {
3224           result = context->pushed_token;
3225           context->pushed_token = 0;
3226           return result;        /* Cannot be a CPP_MACRO_ARG */
3227         }
3228       else if (context->posn == context->count)
3229         {
3230           if (pop_context (pfile))
3231             return &eof_token;
3232           continue;
3233         }
3234       else if (IS_ARG_CONTEXT (context))
3235         {
3236           result = context->u.arg[context->posn++];
3237           if (result == 0)
3238             {
3239               context->flags ^= CONTEXT_RAW;
3240               result = context->u.arg[context->posn++];
3241             }
3242           return result;        /* Cannot be a CPP_MACRO_ARG */
3243         }
3244
3245       result = &context->u.list->tokens[context->posn++];
3246
3247       if (result->type != CPP_MACRO_ARG)
3248         return result;
3249
3250       if (result->flags & STRINGIFY_ARG)
3251         return stringify_arg (pfile, result);
3252
3253       push_arg_context (pfile, result);
3254     }
3255 }
3256
3257 /* Internal interface to get the token without macro expanding.  */
3258 const cpp_token *
3259 _cpp_get_raw_token (pfile)
3260      cpp_reader *pfile;
3261 {
3262   int prev_nme = prevent_macro_expansion (pfile);
3263   const cpp_token *result = _cpp_get_token (pfile);
3264   restore_macro_expansion (pfile, prev_nme);
3265   return result;
3266 }
3267
3268 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3269    list should be overwritten, or zero if we need to append
3270    (typically, if we are within the arguments to a macro, or looking
3271    for the '(' to start a function-like macro invocation).  */
3272 static int
3273 lex_next (pfile, clear)
3274      cpp_reader *pfile;
3275      int clear;
3276 {
3277   cpp_toklist *list = &pfile->token_list;
3278   const cpp_token *old_list = list->tokens;
3279   unsigned int old_used = list->tokens_used;
3280
3281   if (clear)
3282     {
3283       /* Release all temporary tokens.  */
3284       _cpp_clear_toklist (list);
3285       pfile->contexts[0].posn = 0;
3286       if (pfile->temp_used)
3287         release_temp_tokens (pfile);
3288     }
3289   lex_line (pfile, list);
3290   pfile->contexts[0].count = list->tokens_used;
3291
3292   if (!clear && pfile->args)
3293     {
3294       /* Fix up argument token pointers.  */
3295       if (old_list != list->tokens)
3296         {
3297           unsigned int i;
3298
3299           for (i = 0; i < pfile->args->used; i++)
3300             {
3301               const cpp_token *token = pfile->args->tokens[i];
3302               if (token >= old_list && token < old_list + old_used)
3303                 pfile->args->tokens[i] = (const cpp_token *)
3304                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3305             }
3306         }
3307
3308       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3309          tokens within the list of arguments that would otherwise act as
3310          preprocessing directives, the behavior is undefined.
3311
3312          This implementation will report a hard error and treat the
3313          'sequence of preprocessing tokens' as part of the macro argument,
3314          not a directive.
3315
3316          Note if pfile->args == 0, we're OK since we're only inside a
3317          macro argument after a '('.  */
3318       if (list->directive)
3319         {
3320           cpp_error_with_line (pfile, list->tokens[old_used].line,
3321                                list->tokens[old_used].col,
3322                                "#%s may not be used inside a macro argument",
3323                                list->directive->name);
3324           return 1;
3325         }
3326     }
3327
3328   return 0;
3329 }
3330
3331 /* Pops a context off the context stack.  If we're at the bottom, lexes
3332    the next logical line.  Returns EOF if we're at the end of the
3333    argument list to the # operator, or we should not "overflow"
3334    into the rest of the file (e.g. 6.10.3.1.1).  */
3335 static int
3336 pop_context (pfile)
3337      cpp_reader *pfile;
3338 {
3339   cpp_context *context;
3340
3341   if (pfile->cur_context == 0)
3342     {
3343       /* If we are currently processing a directive, do not advance.  6.10
3344          paragraph 2: A new-line character ends the directive even if it
3345          occurs within what would otherwise be an invocation of a
3346          function-like macro.  */
3347       if (pfile->token_list.directive)
3348         return 1;
3349
3350       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3351     }
3352
3353   /* Argument contexts, when parsing args or handling # operator
3354      return CPP_EOF at the end.  */
3355   context = CURRENT_CONTEXT (pfile);
3356   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3357     return 1;
3358
3359   /* Free resources when leaving macro contexts.  */
3360   if (context->args)
3361     free_macro_args (context->args);
3362
3363   if (pfile->cur_context == pfile->no_expand_level)
3364     pfile->no_expand_level--;
3365   pfile->cur_context--;
3366
3367   return 0;
3368 }
3369
3370 /* Turn off macro expansion at the current context level.  */
3371 static unsigned int
3372 prevent_macro_expansion (pfile)
3373      cpp_reader *pfile;
3374 {
3375   unsigned int prev_value = pfile->no_expand_level;
3376   pfile->no_expand_level = pfile->cur_context;
3377   return prev_value;
3378 }
3379
3380 /* Restore macro expansion to its previous state.  */
3381 static void
3382 restore_macro_expansion (pfile, prev_value)
3383      cpp_reader *pfile;
3384      unsigned int prev_value;
3385 {
3386   pfile->no_expand_level = prev_value;
3387 }
3388
3389 /* Used by cpperror.c to obtain the correct line and column to report
3390    in a diagnostic.  */
3391 unsigned int
3392 _cpp_get_line (pfile, pcol)
3393      cpp_reader *pfile;
3394      unsigned int *pcol;
3395 {
3396   unsigned int index;
3397   const cpp_token *cur_token;
3398
3399   if (pfile->in_lex_line)
3400     index = pfile->token_list.tokens_used;
3401   else
3402     index = pfile->contexts[0].posn;
3403
3404   if (index == 0)
3405     {
3406       if (pcol)
3407         *pcol = 0;
3408       return 0;
3409     }
3410
3411   cur_token = &pfile->token_list.tokens[index - 1];
3412   if (pcol)
3413     *pcol = cur_token->col;
3414   return cur_token->line;
3415 }
3416
3417 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3418 static const char * const monthnames[] =
3419 {
3420   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3421   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3422 };
3423
3424 /* Handle builtin macros like __FILE__.  */
3425 static const cpp_token *
3426 special_symbol (pfile, node, token)
3427      cpp_reader *pfile;
3428      cpp_hashnode *node;
3429      const cpp_token *token;
3430 {
3431   cpp_token *result;
3432   cpp_buffer *ip;
3433
3434   switch (node->type)
3435     {
3436     case T_FILE:
3437     case T_BASE_FILE:
3438       {
3439         const char *file;
3440
3441         ip = CPP_BUFFER (pfile);
3442         if (ip == 0)
3443           file = "";
3444         else
3445           {
3446             if (node->type == T_BASE_FILE)
3447               while (CPP_PREV_BUFFER (ip) != NULL)
3448                 ip = CPP_PREV_BUFFER (ip);
3449
3450             file = ip->nominal_fname;
3451           }
3452         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3453                                     strlen (file));
3454       }
3455       break;
3456
3457     case T_INCLUDE_LEVEL:
3458       /* pfile->include_depth counts the primary source as level 1,
3459          but historically __INCLUDE_DEPTH__ has called the primary
3460          source level 0.  */
3461       result = alloc_number_token (pfile, pfile->include_depth - 1);
3462       break;
3463
3464     case T_SPECLINE:
3465       /* If __LINE__ is embedded in a macro, it must expand to the
3466          line of the macro's invocation, not its definition.
3467          Otherwise things like assert() will not work properly.  */
3468       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3469       break;
3470
3471     case T_STDC:
3472       {
3473         int stdc = 1;
3474
3475 #ifdef STDC_0_IN_SYSTEM_HEADERS
3476         if (CPP_IN_SYSTEM_HEADER (pfile)
3477             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3478           stdc = 0;
3479 #endif
3480         result = alloc_number_token (pfile, stdc);
3481       }
3482       break;
3483
3484     case T_DATE:
3485     case T_TIME:
3486       if (pfile->date == 0)
3487         {
3488           /* Allocate __DATE__ and __TIME__ from permanent storage,
3489              and save them in pfile so we don't have to do this again.
3490              We don't generate these strings at init time because
3491              time() and localtime() are very slow on some systems.  */
3492           time_t tt = time (NULL);
3493           struct tm *tb = localtime (&tt);
3494
3495           pfile->date = make_string_token
3496             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3497           pfile->time = make_string_token
3498             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3499
3500           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3501                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3502           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3503                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3504         }
3505       result = node->type == T_DATE ? pfile->date: pfile->time;
3506       break;
3507
3508     case T_POISON:
3509       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3510       return token;
3511
3512     default:
3513       cpp_ice (pfile, "invalid special hash type");
3514       return token;
3515     }
3516
3517   ASSIGN_FLAGS_AND_POS (result, token);
3518   return result;
3519 }
3520 #undef DSC
3521
3522 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3523    if it hasn't happened already.  */
3524
3525 void
3526 _cpp_init_input_buffer (pfile)
3527      cpp_reader *pfile;
3528 {
3529   cpp_context *base;
3530
3531   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3532   pfile->no_expand_level = UINT_MAX;
3533   pfile->context_cap = 20;
3534   pfile->cur_context = 0;
3535
3536   pfile->contexts = (cpp_context *)
3537     xmalloc (pfile->context_cap * sizeof (cpp_context));
3538
3539   /* Clear the base context.  */
3540   base = &pfile->contexts[0];
3541   base->u.list = &pfile->token_list;
3542   base->posn = 0;
3543   base->count = 0;
3544   base->args = 0;
3545   base->level = 0;
3546   base->flags = 0;
3547   base->pushed_token = 0;
3548 }
3549
3550 /* Moves to the end of the directive line, popping contexts as
3551    necessary.  */
3552 void
3553 _cpp_skip_rest_of_line (pfile)
3554      cpp_reader *pfile;
3555 {
3556   /* Discard all stacked contexts.  */
3557   int i;
3558   for (i = pfile->cur_context; i > 0; i--)
3559     if (pfile->contexts[i].args)
3560       free_macro_args (pfile->contexts[i].args);
3561
3562   if (pfile->no_expand_level <= pfile->cur_context)
3563     pfile->no_expand_level = 0;
3564   pfile->cur_context = 0;
3565
3566   /* Clear the base context, and clear the directive pointer so that
3567      get_raw_token will advance to the next line.  */
3568   pfile->contexts[0].count = 0;
3569   pfile->contexts[0].posn = 0;
3570   pfile->token_list.directive = 0;
3571 }
3572
3573 /* Directive handler wrapper used by the command line option
3574    processor.  */
3575 void
3576 _cpp_run_directive (pfile, dir, buf, count, name)
3577      cpp_reader *pfile;
3578      const struct directive *dir;
3579      const char *buf;
3580      size_t count;
3581      const char *name;
3582 {
3583   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3584     {
3585       unsigned int prev_lvl = 0;
3586
3587       if (name)
3588         CPP_BUFFER (pfile)->nominal_fname = name;
3589       else
3590         CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3591       CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3592
3593       /* Scan the line now, else prevent_macro_expansion won't work.  */
3594       lex_next (pfile, 1);
3595       if (! (dir->flags & EXPAND))
3596         prev_lvl = prevent_macro_expansion (pfile);
3597
3598       (void) (*dir->handler) (pfile);
3599
3600       if (! (dir->flags & EXPAND))
3601         restore_macro_expansion (pfile, prev_lvl);
3602
3603       _cpp_skip_rest_of_line (pfile);
3604       cpp_pop_buffer (pfile);
3605     }
3606 }