gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Check line numbers assigned to all errors.
  28 o Distinguish integers, floats, and 'other' pp-numbers.
  29 o Store ints and char constants as binary values.
  30 o New command-line assertion syntax.
  31 o Work towards functions in cpperror.c taking a message level parameter.
  32   If we do this, merge the common code of do_warning and do_error.
  33 o Comment all functions, and describe macro expansion algorithm.
  34 o Move as much out of header files as possible.
  35 o Remove single quote pairs `', and some '', from diagnostics.
  36 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  37
  38 */
  39
  40 #include "config.h"
  41 #include "system.h"
  42 #include "intl.h"
  43 #include "cpplib.h"
  44 #include "cpphash.h"
  45 #include "symcat.h"
  46
  47 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
  48                                             0 UNION_INIT_ZERO};
  49 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  50
  51 /* Flags for cpp_context.  */
  52 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  53 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  54 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  55 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  56
  57 typedef struct cpp_context cpp_context;
  58 struct cpp_context
  59 {
  60   union
  61   {
  62     const cpp_toklist *list;    /* Used for macro contexts only.  */
  63     const cpp_token **arg;      /* Used for arg contexts only.  */
  64   } u;
  65
  66   /* Pushed token to be returned by next call to get_raw_token.  */
  67   const cpp_token *pushed_token;
  68
  69   struct macro_args *args;      /* The arguments for a function-like
  70                                    macro.  NULL otherwise.  */
  71   unsigned short posn;          /* Current posn, index into u.  */
  72   unsigned short count;         /* No. of tokens in u.  */
  73   unsigned short level;
  74   unsigned char flags;
  75 };
  76
  77 typedef struct macro_args macro_args;
  78 struct macro_args
  79 {
  80   unsigned int *ends;
  81   const cpp_token **tokens;
  82   unsigned int capacity;
  83   unsigned int used;
  84   unsigned short level;
  85 };
  86
  87 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  88 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  89                                            macro_args *, unsigned int *));
  90 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  91 static void save_token PARAMS ((macro_args *, const cpp_token *));
  92 static int pop_context PARAMS ((cpp_reader *));
  93 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  94 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  95 static void free_macro_args PARAMS ((macro_args *));
  96 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
  97                                          unsigned int));
  98 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  99                                          unsigned int));
 100
 101 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
 102 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
 103 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
 104
 105 static int skip_block_comment PARAMS ((cpp_reader *));
 106 static int skip_line_comment PARAMS ((cpp_buffer *));
 107 static void adjust_column PARAMS ((cpp_reader *));
 108 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
 109 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
 110 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
 111 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
 112 static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
 113 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
 114 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 115 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 116 static void check_long_token PARAMS ((cpp_buffer *,
 117                                       cpp_token *,
 118                                       cppchar_t,
 119                                       enum cpp_ttype));
 120 static void lex_token PARAMS ((cpp_reader *, cpp_token *));
 121 static int lex_next PARAMS ((cpp_reader *, int));
 122
 123 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
 124 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 125                                       const cpp_token *));
 126
 127 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 128 static void expand_context_stack PARAMS ((cpp_reader *));
 129 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 130                                             unsigned char *));
 131 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
 132                                   const cpp_token *, int));
 133 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 134                                           cpp_token *));
 135 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 136                                             unsigned int));
 137 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 138 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 139                                                 const cpp_token *));
 140 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 141 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 142                                                        const cpp_token *));
 143 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 144                                          const cpp_token *, int *));
 145 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 146 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 147 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 148 static void release_temp_tokens         PARAMS ((cpp_reader *));
 149 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 150 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 151
 152 #define VALID_SIGN(c, prevc) \
 153   (((c) == '+' || (c) == '-') && \
 154    ((prevc) == 'e' || (prevc) == 'E' \
 155     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 156
 157 /* An upper bound on the number of bytes needed to spell a token,
 158    including preceding whitespace.  */
 159 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 160 static inline size_t
 161 TOKEN_LEN (token)
 162      const cpp_token *token;
 163 {
 164   size_t len;
 165
 166   switch (TOKEN_SPELL (token))
 167     {
 168     default:            len = 0;                        break;
 169     case SPELL_STRING:  len = token->val.str.len;       break;
 170     case SPELL_IDENT:   len = token->val.node->length;  break;
 171     }
 172   return len + 5;
 173 }
 174
 175 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 176 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 177 #define ON_REST_ARG(c) \
 178  (((c)->u.list->flags & VAR_ARGS) \
 179   && (c)->u.list->tokens[(c)->posn - 1].val.aux \
 180       == (unsigned int) ((c)->u.list->paramc - 1))
 181
 182 #define ASSIGN_FLAGS_AND_POS(d, s) \
 183   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 184       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 185   } while (0)
 186
 187 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 188 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 189   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 190       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 191   } while (0)
 192
 193 #define OP(e, s) { SPELL_OPERATOR, U s           },
 194 #define TK(e, s) { s,              U STRINGX (e) },
 195
 196 const struct token_spelling
 197 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 198
 199 #undef OP
 200 #undef TK
 201
 202 /* Notify the compiler proper that the current line number has jumped,
 203    or the current file name has changed.  */
 204
 205 static void
 206 output_line_command (pfile, print, line)
 207      cpp_reader *pfile;
 208      cpp_printer *print;
 209      unsigned int line;
 210 {
 211   cpp_buffer *ip = CPP_BUFFER (pfile);
 212
 213   if (line == 0)
 214     return;
 215
 216   /* End the previous line of text.  */
 217   if (pfile->need_newline)
 218     {
 219       putc ('\n', print->outf);
 220       print->lineno++;
 221     }
 222   pfile->need_newline = 0;
 223
 224   if (CPP_OPTION (pfile, no_line_commands))
 225     return;
 226
 227   /* If the current file has not changed, we can output a few newlines
 228      instead if we want to increase the line number by a small amount.
 229      We cannot do this if print->lineno is zero, because that means we
 230      haven't output any line commands yet.  (The very first line
 231      command output is a `same_file' command.)
 232
 233      'nominal_fname' values are unique, so they can be compared by
 234      comparing pointers.  */
 235   if (ip->nominal_fname == print->last_fname && print->lineno > 0
 236       && line >= print->lineno && line < print->lineno + 8)
 237     {
 238       while (line > print->lineno)
 239         {
 240           putc ('\n', print->outf);
 241           print->lineno++;
 242         }
 243       return;
 244     }
 245
 246   fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
 247            cpp_syshdr_flags (pfile, ip));
 248
 249   print->last_fname = ip->nominal_fname;
 250   print->lineno = line;
 251 }
 252
 253 /* Like fprintf, but writes to a printer object.  You should be sure
 254    always to generate a complete line when you use this function.  */
 255 void
 256 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
 257                      const char *fmt, ...))
 258 {
 259   va_list ap;
 260 #ifndef ANSI_PROTOTYPES
 261   cpp_reader *pfile;
 262   cpp_printer *print;
 263   const char *fmt;
 264 #endif
 265
 266   VA_START (ap, fmt);
 267
 268 #ifndef ANSI_PROTOTYPES
 269   pfile = va_arg (ap, cpp_reader *);
 270   print = va_arg (ap, cpp_printer *);
 271   fmt = va_arg (ap, const char *);
 272 #endif
 273
 274   /* End the previous line of text.  */
 275   if (pfile->need_newline)
 276     {
 277       putc ('\n', print->outf);
 278       print->lineno++;
 279     }
 280   pfile->need_newline = 0;
 281
 282   vfprintf (print->outf, fmt, ap);
 283   va_end (ap);
 284 }
 285
 286 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 287
 288 void
 289 cpp_scan_buffer_nooutput (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 293   const cpp_token *token;
 294
 295   /* In no-output mode, we can ignore everything but directives.  */
 296   for (;;)
 297     {
 298       token = _cpp_get_token (pfile);
 299
 300       if (token->type == CPP_EOF)
 301         {
 302           cpp_pop_buffer (pfile);
 303           if (CPP_BUFFER (pfile) == stop)
 304             break;
 305         }
 306
 307       if (token->type == CPP_HASH && token->flags & BOL
 308           && pfile->token_list.directive)
 309         {
 310           process_directive (pfile, token);
 311           continue;
 312         }
 313
 314       _cpp_skip_rest_of_line (pfile);
 315     }
 316 }
 317
 318 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 319 void
 320 cpp_scan_buffer (pfile, print)
 321      cpp_reader *pfile;
 322      cpp_printer *print;
 323 {
 324   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 325   const cpp_token *token, *prev = 0;
 326
 327   for (;;)
 328     {
 329       token = _cpp_get_token (pfile);
 330       if (token->type == CPP_EOF)
 331         {
 332           cpp_pop_buffer (pfile);
 333
 334           if (CPP_BUFFER (pfile) == stop)
 335             return;
 336
 337           prev = 0;
 338           continue;
 339         }
 340
 341       if (token->flags & BOL)
 342         {
 343           output_line_command (pfile, print, token->line);
 344           prev = 0;
 345
 346           if (token->type == CPP_HASH && pfile->token_list.directive)
 347             {
 348               process_directive (pfile, token);
 349               continue;
 350             }
 351         }
 352
 353       if (token->type != CPP_PLACEMARKER)
 354         {
 355           output_token (pfile, print->outf, token, prev, 1);
 356           pfile->need_newline = 1;
 357         }
 358
 359       prev = token;
 360     }
 361 }
 362
 363 /* Helper routine used by parse_include, which can't see spell_token.
 364    Reinterpret the current line as an h-char-sequence (< ... >); we are
 365    looking at the first token after the <.  */
 366 const cpp_token *
 367 _cpp_glue_header_name (pfile)
 368      cpp_reader *pfile;
 369 {
 370   const cpp_token *t;
 371   cpp_token *hdr;
 372   U_CHAR *buf, *p;
 373   size_t len, avail;
 374
 375   avail = 40;
 376   len = 0;
 377   buf = xmalloc (avail);
 378
 379   for (;;)
 380     {
 381       t = _cpp_get_token (pfile);
 382       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 383         break;
 384
 385       if (len + TOKEN_LEN (t) > avail)
 386         {
 387           avail = len + TOKEN_LEN (t) + 40;
 388           buf = xrealloc (buf, avail);
 389         }
 390
 391       if (t->flags & PREV_WHITE)
 392         buf[len++] = ' ';
 393
 394       p = spell_token (pfile, t, buf + len);
 395       len = (size_t) (p - buf);  /* p known >= buf */
 396     }
 397
 398   if (t->type == CPP_EOF)
 399     cpp_error (pfile, "missing terminating > character");
 400
 401   buf = xrealloc (buf, len);
 402
 403   hdr = get_temp_token (pfile);
 404   hdr->type = CPP_HEADER_NAME;
 405   hdr->flags = 0;
 406   hdr->val.str.text = buf;
 407   hdr->val.str.len = len;
 408   return hdr;
 409 }
 410
 411 /* Token-buffer helper functions.  */
 412
 413 /* Expand a token list's string space. It is *vital* that
 414    list->tokens_used is correct, to get pointer fix-up right.  */
 415 void
 416 _cpp_expand_name_space (list, len)
 417      cpp_toklist *list;
 418      unsigned int len;
 419 {
 420   const U_CHAR *old_namebuf;
 421
 422   old_namebuf = list->namebuf;
 423   list->name_cap += len;
 424   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 425
 426   /* Fix up token text pointers.  */
 427   if (list->namebuf != old_namebuf)
 428     {
 429       unsigned int i;
 430
 431       for (i = 0; i < list->tokens_used; i++)
 432         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 433           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 434     }
 435 }
 436
 437 /* If there is not enough room for LEN more characters, expand the
 438    list by just enough to have room for LEN characters.  */
 439 void
 440 _cpp_reserve_name_space (list, len)
 441      cpp_toklist *list;
 442      unsigned int len;
 443 {
 444   unsigned int room = list->name_cap - list->name_used;
 445
 446   if (room < len)
 447     _cpp_expand_name_space (list, len - room);
 448 }
 449
 450 /* Expand the number of tokens in a list.  */
 451 void
 452 _cpp_expand_token_space (list, count)
 453      cpp_toklist *list;
 454      unsigned int count;
 455 {
 456   unsigned int n;
 457
 458   list->tokens_cap += count;
 459   n = list->tokens_cap;
 460   if (list->flags & LIST_OFFSET)
 461     list->tokens--, n++;
 462   list->tokens = (cpp_token *)
 463     xrealloc (list->tokens, n * sizeof (cpp_token));
 464   if (list->flags & LIST_OFFSET)
 465     list->tokens++;             /* Skip the dummy.  */
 466 }
 467
 468 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 469    an extra token in front of the token list, as this allows the lexer
 470    to always peek at the previous token without worrying about
 471    underflowing the list, and some initial space.  Otherwise, no
 472    token- or name-space is allocated, and there is no dummy token.  */
 473 void
 474 _cpp_init_toklist (list, flags)
 475      cpp_toklist *list;
 476      int flags;
 477 {
 478   if (flags == NO_DUMMY_TOKEN)
 479     {
 480       list->tokens_cap = 0;
 481       list->tokens = 0;
 482       list->name_cap = 0;
 483       list->namebuf = 0;
 484       list->flags = 0;
 485     }
 486   else
 487     {
 488       /* Initialize token space.  Put a dummy token before the start
 489          that will fail matches.  */
 490       list->tokens_cap = 256;   /* 4K's worth.  */
 491       list->tokens = (cpp_token *)
 492         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 493       list->tokens[0].type = CPP_EOF;
 494       list->tokens++;
 495
 496       /* Initialize name space.  */
 497       list->name_cap = 1024;
 498       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 499       list->flags = LIST_OFFSET;
 500     }
 501
 502   _cpp_clear_toklist (list);
 503 }
 504
 505 /* Clear a token list.  */
 506 void
 507 _cpp_clear_toklist (list)
 508      cpp_toklist *list;
 509 {
 510   list->tokens_used = 0;
 511   list->name_used = 0;
 512   list->directive = 0;
 513   list->paramc = 0;
 514   list->params_len = 0;
 515   list->flags &= LIST_OFFSET;  /* clear all but that one */
 516 }
 517
 518 /* Free a token list.  Does not free the list itself, which may be
 519    embedded in a larger structure.  */
 520 void
 521 _cpp_free_toklist (list)
 522      const cpp_toklist *list;
 523 {
 524   if (list->flags & LIST_OFFSET)
 525     free (list->tokens - 1);    /* Backup over dummy token.  */
 526   else
 527     free (list->tokens);
 528   free (list->namebuf);
 529 }
 530
 531 /* Compare two tokens.  */
 532 int
 533 _cpp_equiv_tokens (a, b)
 534      const cpp_token *a, *b;
 535 {
 536   if (a->type == b->type && a->flags == b->flags)
 537     switch (TOKEN_SPELL (a))
 538       {
 539       default:                  /* Keep compiler happy.  */
 540       case SPELL_OPERATOR:
 541         return 1;
 542       case SPELL_CHAR:
 543       case SPELL_NONE:
 544         return a->val.aux == b->val.aux; /* arg_no or character.  */
 545       case SPELL_IDENT:
 546         return a->val.node == b->val.node;
 547       case SPELL_STRING:
 548         return (a->val.str.len == b->val.str.len
 549                 && !memcmp (a->val.str.text, b->val.str.text,
 550                             a->val.str.len));
 551       }
 552
 553   return 0;
 554 }
 555
 556 /* Compare two token lists.  */
 557 int
 558 _cpp_equiv_toklists (a, b)
 559      const cpp_toklist *a, *b;
 560 {
 561   unsigned int i;
 562
 563   if (a->tokens_used != b->tokens_used
 564       || a->flags != b->flags
 565       || a->paramc != b->paramc)
 566     return 0;
 567
 568   for (i = 0; i < a->tokens_used; i++)
 569     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 570       return 0;
 571   return 1;
 572 }
 573
 574 /* Utility routine:
 575
 576    Compares, the token TOKEN to the NUL-terminated string STRING.
 577    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 578
 579 int
 580 cpp_ideq (token, string)
 581      const cpp_token *token;
 582      const char *string;
 583 {
 584   if (token->type != CPP_NAME)
 585     return 0;
 586
 587   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 588 }
 589
 590 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 591                                                     U":>", U"<%", U"%>"};
 592
 593 /* Call when meeting a newline.  Returns the character after the newline
 594    (or carriage-return newline combination), or EOF.  */
 595 static cppchar_t
 596 handle_newline (buffer, newline_char)
 597      cpp_buffer *buffer;
 598      cppchar_t newline_char;
 599 {
 600   cppchar_t next = EOF;
 601
 602   buffer->col_adjust = 0;
 603   buffer->lineno++;
 604   buffer->line_base = buffer->cur;
 605
 606   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 607   if (buffer->cur < buffer->rlimit)
 608     {
 609       next = *buffer->cur++;
 610       if (next + newline_char == '\r' + '\n')
 611         {
 612           buffer->line_base = buffer->cur;
 613           if (buffer->cur < buffer->rlimit)
 614             next = *buffer->cur++;
 615           else
 616             next = EOF;
 617         }
 618     }
 619
 620   buffer->read_ahead = next;
 621   return next;
 622 }
 623
 624 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 625    encountered.  It warns if necessary, and returns true if the
 626    trigraph should be honoured.  FROM_CHAR is the third character of a
 627    trigraph, and presumed to be the previous character for position
 628    reporting.  */
 629 static int
 630 trigraph_ok (pfile, from_char)
 631      cpp_reader *pfile;
 632      cppchar_t from_char;
 633 {
 634   int accept = CPP_OPTION (pfile, trigraphs);
 635
 636   if (CPP_OPTION (pfile, warn_trigraphs))
 637     {
 638       cpp_buffer *buffer = pfile->buffer;
 639       if (accept)
 640         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 641                                "trigraph ??%c converted to %c",
 642                                (int) from_char,
 643                                (int) _cpp_trigraph_map[from_char]);
 644       else
 645         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 646                                "trigraph ??%c ignored", (int) from_char);
 647     }
 648
 649   return accept;
 650 }
 651
 652 /* Assumes local variables buffer and result.  */
 653 #define ACCEPT_CHAR(t) \
 654   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 655
 656 /* When we move to multibyte character sets, add to these something
 657    that saves and restores the state of the multibyte conversion
 658    library.  This probably involves saving and restoring a "cookie".
 659    In the case of glibc it is an 8-byte structure, so is not a high
 660    overhead operation.  In any case, it's out of the fast path.  */
 661 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 662 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 663
 664 /* Skips any escaped newlines introduced by NEXT, which is either a
 665    '?' or a '\\'.  Returns the next character, which will also have
 666    been placed in buffer->read_ahead.  */
 667 static cppchar_t
 668 skip_escaped_newlines (buffer, next)
 669      cpp_buffer *buffer;
 670      cppchar_t next;
 671 {
 672   cppchar_t next1;
 673   const unsigned char *saved_cur;
 674   int space;
 675
 676   do
 677     {
 678       if (buffer->cur == buffer->rlimit)
 679         break;
 680
 681       SAVE_STATE ();
 682       if (next == '?')
 683         {
 684           next1 = *buffer->cur++;
 685           if (next1 != '?' || buffer->cur == buffer->rlimit)
 686             {
 687               RESTORE_STATE ();
 688               break;
 689             }
 690
 691           next1 = *buffer->cur++;
 692           if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
 693             {
 694               RESTORE_STATE ();
 695               break;
 696             }
 697
 698           /* We have a full trigraph here.  */
 699           next = _cpp_trigraph_map[next1];
 700           if (next != '\\' || buffer->cur == buffer->rlimit)
 701             break;
 702           SAVE_STATE ();
 703         }
 704
 705       /* We have a backslash, and room for at least one more character.  */
 706       space = 0;
 707       do
 708         {
 709           next1 = *buffer->cur++;
 710           if (!is_nvspace (next1))
 711             break;
 712           space = 1;
 713         }
 714       while (buffer->cur < buffer->rlimit);
 715
 716       if (!is_vspace (next1))
 717         {
 718           RESTORE_STATE ();
 719           break;
 720         }
 721
 722       if (space)
 723         cpp_warning (buffer->pfile,
 724                      "backslash and newline separated by space");
 725
 726       next = handle_newline (buffer, next1);
 727       if (next == EOF)
 728         cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 729     }
 730   while (next == '\\' || next == '?');
 731
 732   buffer->read_ahead = next;
 733   return next;
 734 }
 735
 736 /* Obtain the next character, after trigraph conversion and skipping
 737    an arbitrary string of escaped newlines.  The common case of no
 738    trigraphs or escaped newlines falls through quickly.  */
 739 static cppchar_t
 740 get_effective_char (buffer)
 741      cpp_buffer *buffer;
 742 {
 743   cppchar_t next = EOF;
 744
 745   if (buffer->cur < buffer->rlimit)
 746     {
 747       next = *buffer->cur++;
 748
 749       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 750          can introduce escaped newlines, which we want to skip, or
 751          UCNs, which, depending upon lexer state, we will handle in
 752          the future.  */
 753       if (next == '?' || next == '\\')
 754         next = skip_escaped_newlines (buffer, next);
 755     }
 756
 757   buffer->read_ahead = next;
 758   return next;
 759 }
 760
 761 /* Skip a C-style block comment.  We find the end of the comment by
 762    seeing if an asterisk is before every '/' we encounter.  Returns
 763    non-zero if comment terminated by EOF, zero otherwise.  */
 764 static int
 765 skip_block_comment (pfile)
 766      cpp_reader *pfile;
 767 {
 768   cpp_buffer *buffer = pfile->buffer;
 769   cppchar_t c = EOF, prevc;
 770
 771   while (buffer->cur != buffer->rlimit)
 772     {
 773       prevc = c, c = *buffer->cur++;
 774
 775     next_char:
 776       /* FIXME: For speed, create a new character class of characters
 777          of no interest inside block comments.  */
 778       if (c == '?' || c == '\\')
 779         c = skip_escaped_newlines (buffer, c);
 780
 781       /* People like decorating comments with '*', so check for '/'
 782          instead for efficiency.  */
 783       if (c == '/')
 784         {
 785           if (prevc == '*')
 786             break;
 787
 788           /* Warn about potential nested comments, but not if the '/'
 789              comes immediately before the true comment delimeter.
 790              Don't bother to get it right across escaped newlines.  */
 791           if (CPP_OPTION (pfile, warn_comments)
 792               && buffer->cur != buffer->rlimit)
 793             {
 794               prevc = c, c = *buffer->cur++;
 795               if (c == '*' && buffer->cur != buffer->rlimit)
 796                 {
 797                   prevc = c, c = *buffer->cur++;
 798                   if (c != '/')
 799                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 800                                            CPP_BUF_COL (buffer),
 801                                            "\"/*\" within comment");
 802                 }
 803               goto next_char;
 804             }
 805         }
 806       else if (is_vspace (c))
 807         {
 808           prevc = c, c = handle_newline (buffer, c);
 809           goto next_char;
 810         }
 811       else if (c == '\t')
 812         adjust_column (pfile);
 813     }
 814
 815   buffer->read_ahead = EOF;
 816   return c != '/' || prevc != '*';
 817 }
 818
 819 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 820    non-zero if a multiline comment.  The following new line, if any,
 821    is left in buffer->read_ahead.  */
 822 static int
 823 skip_line_comment (buffer)
 824      cpp_buffer *buffer;
 825 {
 826   unsigned int orig_lineno = buffer->lineno;
 827   cppchar_t c;
 828
 829   do
 830     {
 831       c = EOF;
 832       if (buffer->cur == buffer->rlimit)
 833         break;
 834
 835       c = *buffer->cur++;
 836       if (c == '?' || c == '\\')
 837         c = skip_escaped_newlines (buffer, c);
 838     }
 839   while (!is_vspace (c));
 840
 841   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 842   return orig_lineno != buffer->lineno;
 843 }
 844
 845 /* pfile->buffer->cur is one beyond the \t character.  Update
 846    col_adjust so we track the column correctly.  */
 847 static void
 848 adjust_column (pfile)
 849      cpp_reader *pfile;
 850 {
 851   cpp_buffer *buffer = pfile->buffer;
 852   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 853
 854   /* Round it up to multiple of the tabstop, but subtract 1 since the
 855      tab itself occupies a character position.  */
 856   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 857                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 858 }
 859
 860 /* Skips whitespace, saving the next non-whitespace character.
 861    Adjusts pfile->col_adjust to account for tabs.  Without this,
 862    tokens might be assigned an incorrect column.  */
 863 static void
 864 skip_whitespace (pfile, c)
 865      cpp_reader *pfile;
 866      cppchar_t c;
 867 {
 868   cpp_buffer *buffer = pfile->buffer;
 869   unsigned int warned = 0;
 870
 871   do
 872     {
 873       /* Horizontal space always OK.  */
 874       if (c == ' ')
 875         ;
 876       else if (c == '\t')
 877         adjust_column (pfile);
 878       /* Just \f \v or \0 left.  */
 879       else if (c == '\0')
 880         {
 881           if (!warned)
 882             {
 883               cpp_warning (pfile, "null character(s) ignored");
 884               warned = 1;
 885             }
 886         }
 887       else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
 888         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 889                                CPP_BUF_COL (buffer),
 890                                "%s in preprocessing directive",
 891                                c == '\f' ? "form feed" : "vertical tab");
 892
 893       c = EOF;
 894       if (buffer->cur == buffer->rlimit)
 895         break;
 896       c = *buffer->cur++;
 897     }
 898   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 899   while (is_nvspace (c));
 900
 901   /* Remember the next character.  */
 902   buffer->read_ahead = c;
 903 }
 904
 905 /* Parse an identifier, skipping embedded backslash-newlines.
 906    Calculate the hash value of the token while parsing, for improved
 907    performance.  The hashing algorithm *must* match cpp_lookup().  */
 908
 909 static cpp_hashnode *
 910 parse_identifier (pfile, c)
 911      cpp_reader *pfile;
 912      cppchar_t c;
 913 {
 914   cpp_buffer *buffer = pfile->buffer;
 915   unsigned int r = 0, saw_dollar = 0;
 916   unsigned int orig_used = pfile->token_list.name_used;
 917
 918   do
 919     {
 920       do
 921         {
 922           if (pfile->token_list.name_used == pfile->token_list.name_cap)
 923             _cpp_expand_name_space (&pfile->token_list,
 924                                     pfile->token_list.name_used + 256);
 925           pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
 926           r = HASHSTEP (r, c);
 927
 928           if (c == '$')
 929             saw_dollar++;
 930
 931           c = EOF;
 932           if (buffer->cur == buffer->rlimit)
 933             break;
 934
 935           c = *buffer->cur++;
 936         }
 937       while (is_idchar (c));
 938
 939       /* Potential escaped newline?  */
 940       if (c != '?' && c != '\\')
 941         break;
 942       c = skip_escaped_newlines (buffer, c);
 943     }
 944   while (is_idchar (c));
 945
 946   /* $ is not a identifier character in the standard, but is commonly
 947      accepted as an extension.  Don't warn about it in skipped
 948      conditional blocks.  */
 949   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 950     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 951
 952   /* Remember the next character.  */
 953   buffer->read_ahead = c;
 954   return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
 955                                 pfile->token_list.name_used - orig_used, r);
 956 }
 957
 958 /* Parse a number, skipping embedded backslash-newlines.  */
 959 static void
 960 parse_number (pfile, number, c)
 961      cpp_reader *pfile;
 962      cpp_string *number;
 963      cppchar_t c;
 964 {
 965   cppchar_t prevc;
 966   cpp_buffer *buffer = pfile->buffer;
 967   unsigned int orig_used = pfile->token_list.name_used;
 968
 969   do
 970     {
 971       do
 972         {
 973           if (pfile->token_list.name_used == pfile->token_list.name_cap)
 974             _cpp_expand_name_space (&pfile->token_list,
 975                                     pfile->token_list.name_used + 256);
 976           pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
 977
 978           prevc = c;
 979           c = EOF;
 980           if (buffer->cur == buffer->rlimit)
 981             break;
 982
 983           c = *buffer->cur++;
 984         }
 985       while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
 986
 987       /* Potential escaped newline?  */
 988       if (c != '?' && c != '\\')
 989         break;
 990       c = skip_escaped_newlines (buffer, c);
 991     }
 992   while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
 993
 994   /* Remember the next character.  */
 995   buffer->read_ahead = c;
 996
 997   number->text = &pfile->token_list.namebuf[orig_used];
 998   number->len = pfile->token_list.name_used - orig_used;
 999 }
1000
1001 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
1002 static void
1003 unterminated (pfile, line, term)
1004      cpp_reader *pfile;
1005      unsigned int line;
1006      int term;
1007 {
1008   cpp_error (pfile, "missing terminating %c character", term);
1009
1010   if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
1011     {
1012       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
1013                            "possible start of unterminated string literal");
1014       pfile->mls_line = 0;
1015     }
1016 }
1017
1018 /* Parses a string, character constant, or angle-bracketed header file
1019    name.  Handles embedded trigraphs and escaped newlines.
1020
1021    Multi-line strings are allowed, but they are deprecated within
1022    directives.  */
1023 static void
1024 parse_string (pfile, token, terminator)
1025      cpp_reader *pfile;
1026      cpp_token *token;
1027      cppchar_t terminator;
1028 {
1029   cpp_buffer *buffer = pfile->buffer;
1030   unsigned int orig_used = pfile->token_list.name_used;
1031   cppchar_t c;
1032   unsigned int nulls = 0;
1033
1034   for (;;)
1035     {
1036       if (buffer->cur == buffer->rlimit)
1037         {
1038           c = EOF;
1039           unterminated (pfile, token->line, terminator);
1040           break;
1041         }
1042       c = *buffer->cur++;
1043
1044     have_char:
1045       /* Handle trigraphs, escaped newlines etc.  */
1046       if (c == '?' || c == '\\')
1047         c = skip_escaped_newlines (buffer, c);
1048
1049       if (c == terminator)
1050         {
1051           unsigned int u = pfile->token_list.name_used;
1052
1053           /* An odd number of consecutive backslashes represents an
1054              escaped terminator.  */
1055           while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
1056             u--;
1057
1058           if ((pfile->token_list.name_used - u) % 2 == 0)
1059             {
1060               c = EOF;
1061               break;
1062             }
1063         }
1064       else if (is_vspace (c))
1065         {
1066           /* In assembly language, silently terminate string and
1067              character literals at end of line.  This is a kludge
1068              around not knowing where comments are.  */
1069           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
1070             break;
1071
1072           /* Character constants and header names may not extend over
1073              multiple lines.  In Standard C, neither may strings.
1074              Unfortunately, we accept multiline strings as an
1075              extension.  (Deprecatedly even in directives - otherwise,
1076              glibc's longlong.h breaks.)  */
1077           if (terminator != '"')
1078             {
1079               unterminated (pfile, token->line, terminator);
1080               break;
1081             }
1082
1083           if (pfile->mls_line == 0)
1084             {
1085               pfile->mls_line = token->line;
1086               pfile->mls_column = token->col;
1087               if (CPP_PEDANTIC (pfile))
1088                 cpp_pedwarn (pfile, "multi-line string constant");
1089             }
1090
1091           handle_newline (buffer, c);  /* Stores to read_ahead.  */
1092           c = '\n';
1093         }
1094       else if (c == '\0')
1095         {
1096           if (nulls++ == 0)
1097             cpp_warning (pfile, "null character(s) preserved in literal");
1098         }
1099
1100       if (pfile->token_list.name_used == pfile->token_list.name_cap)
1101         _cpp_expand_name_space (&pfile->token_list,
1102                                 pfile->token_list.name_used + 256);
1103
1104       pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
1105       /* If we had a new line, the next character is in read_ahead.  */
1106       if (c != '\n')
1107         continue;
1108       c = buffer->read_ahead;
1109       if (c != EOF)
1110         goto have_char;
1111     }
1112
1113   buffer->read_ahead = c;
1114
1115   token->val.str.text = &pfile->token_list.namebuf[orig_used];
1116   token->val.str.len = pfile->token_list.name_used - orig_used;
1117 }
1118
1119 /* For output routine simplicity, the stored comment includes the
1120    comment start and any terminator.  */
1121 static void
1122 save_comment (pfile, token, from)
1123      cpp_reader *pfile;
1124      cpp_token *token;
1125      const unsigned char *from;
1126 {
1127   unsigned char *buffer;
1128   unsigned int len;
1129   cpp_toklist *list = &pfile->token_list;
1130
1131 #define COMMENT_START_LEN 2
1132   len = pfile->buffer->cur - from + COMMENT_START_LEN;
1133   _cpp_reserve_name_space (list, len);
1134   buffer = list->namebuf + list->name_used;
1135   list->name_used += len;
1136
1137   token->type = CPP_COMMENT;
1138   token->val.str.len = len;
1139   token->val.str.text = buffer;
1140
1141   /* from[-1] is '/' or '*' depending on the comment type.  */
1142   *buffer++ = '/';
1143   *buffer++ = from[-1];
1144   memcpy (buffer, from, len - COMMENT_START_LEN);
1145 }
1146
1147 /* A helper routine for lex_token.  With some long tokens, we need
1148    to read ahead to see if that is the token we have, but back-track
1149    if not.  */
1150 static void
1151 check_long_token (buffer, result, wanted, type)
1152      cpp_buffer *buffer;
1153      cpp_token *result;
1154      cppchar_t wanted;
1155      enum cpp_ttype type;
1156 {
1157   const unsigned char *saved_cur;
1158   cppchar_t c = buffer->read_ahead;
1159
1160   SAVE_STATE ();
1161   if (get_effective_char (buffer) == wanted)
1162     ACCEPT_CHAR (type);
1163   else
1164     {
1165       /* Restore state.  */
1166       RESTORE_STATE ();
1167       buffer->read_ahead = c;
1168     }
1169 }
1170
1171 static void
1172 lex_token (pfile, result)
1173      cpp_reader *pfile;
1174      cpp_token *result;
1175 {
1176   cppchar_t c;
1177   cpp_buffer *buffer = pfile->buffer;
1178   const unsigned char *comment_start;
1179
1180   result->flags = 0;
1181  next_char:
1182   result->line = CPP_BUF_LINE (buffer);
1183  next_char2:
1184   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1185
1186   c = buffer->read_ahead;
1187   if (c == EOF && buffer->cur < buffer->rlimit)
1188     {
1189       c = *buffer->cur++;
1190       result->col++;
1191     }
1192
1193  do_switch:
1194   buffer->read_ahead = EOF;
1195   switch (c)
1196     {
1197     case EOF:
1198       /* Non-empty files should end in a newline.  Testing
1199          skip_newlines ensures we only emit the warning once.  */
1200       if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1201           && pfile->state.skip_newlines)
1202         cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1203                                "no newline at end of file");
1204       result->type = CPP_EOF;
1205       break;
1206
1207     case ' ': case '\t': case '\f': case '\v': case '\0':
1208       skip_whitespace (pfile, c);
1209       result->flags |= PREV_WHITE;
1210       goto next_char2;
1211
1212     case '\n': case '\r':
1213       result->type = CPP_EOF;
1214       handle_newline (buffer, c);
1215       /* Handling here will change significantly when moving to
1216          token-at-a-time.  */
1217       if (pfile->state.skip_newlines)
1218         {
1219           result->flags &= ~PREV_WHITE; /* Clear any whitespace flag.   */
1220           goto next_char;
1221         }
1222       break;
1223
1224     case '?':
1225     case '\\':
1226       /* These could start an escaped newline, or '?' a trigraph.  Let
1227          skip_escaped_newlines do all the work.  */
1228       {
1229         unsigned int lineno = buffer->lineno;
1230
1231         c = skip_escaped_newlines (buffer, c);
1232         if (lineno != buffer->lineno)
1233           /* We had at least one escaped newline of some sort, and the
1234              next character is in buffer->read_ahead.  Update the
1235              token's line and column.  */
1236             goto next_char;
1237
1238         /* We are either the original '?' or '\\', or a trigraph.  */
1239         result->type = CPP_QUERY;
1240         buffer->read_ahead = EOF;
1241         if (c == '\\')
1242           result->type = CPP_BACKSLASH;
1243         else if (c != '?')
1244           goto do_switch;
1245       }
1246       break;
1247
1248     make_number:
1249     case '0': case '1': case '2': case '3': case '4':
1250     case '5': case '6': case '7': case '8': case '9':
1251       result->type = CPP_NUMBER;
1252       parse_number (pfile, &result->val.str, c);
1253       break;
1254
1255     case '$':
1256       if (!CPP_OPTION (pfile, dollars_in_ident))
1257         goto random_char;
1258       /* Fall through... */
1259
1260     case '_':
1261     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1262     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1263     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1264     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1265     case 'y': case 'z':
1266     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1267     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1268     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1269     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1270     case 'Y': case 'Z':
1271       result->type = CPP_NAME;
1272       result->val.node = parse_identifier (pfile, c);
1273
1274       /* 'L' may introduce wide characters or strings.  */
1275       if (result->val.node == pfile->spec_nodes->n_L)
1276         {
1277           c = buffer->read_ahead; /* For make_string.  */
1278           if (c == '\'' || c == '"')
1279             {
1280               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1281               goto make_string;
1282             }
1283         }
1284       /* Convert named operators to their proper types.  */
1285       else if (result->val.node->type == T_OPERATOR)
1286         {
1287           result->flags |= NAMED_OP;
1288           result->type = result->val.node->value.code;
1289         }
1290       break;
1291
1292     case '\'':
1293     case '"':
1294       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1295     make_string:
1296       parse_string (pfile, result, c);
1297       break;
1298
1299     case '/':
1300       result->type = CPP_DIV;
1301       c = get_effective_char (buffer);
1302       if (c == '=')
1303         ACCEPT_CHAR (CPP_DIV_EQ);
1304       else if (c == '*')
1305         {
1306           comment_start = buffer->cur;
1307
1308           /* Skip_block_comment updates buffer->read_ahead.  */
1309           if (skip_block_comment (pfile))
1310             cpp_error_with_line (pfile, result->line, result->col,
1311                                  "unterminated comment");
1312           if (!pfile->state.save_comments)
1313             {
1314               result->flags |= PREV_WHITE;
1315               goto next_char;
1316             }
1317
1318           /* Save the comment as a token in its own right.  */
1319           save_comment (pfile, result, comment_start);
1320         }
1321       else if (c == '/')
1322         {
1323           /* We silently allow C++ comments in system headers,
1324              irrespective of conformance mode, because lots of
1325              broken systems do that and trying to clean it up in
1326              fixincludes is a nightmare.  */
1327           if (CPP_IN_SYSTEM_HEADER (pfile))
1328             goto do_line_comment;
1329           if (CPP_OPTION (pfile, cplusplus_comments))
1330             {
1331               if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1332                   && ! buffer->warned_cplusplus_comments)
1333                 {
1334                   cpp_pedwarn (pfile,
1335                        "C++ style comments are not allowed in ISO C89");
1336                   cpp_pedwarn (pfile,
1337                        "(this will be reported only once per input file)");
1338                   buffer->warned_cplusplus_comments = 1;
1339                 }
1340
1341             do_line_comment:
1342               comment_start = buffer->cur;
1343
1344               /* Skip_line_comment updates buffer->read_ahead.  */
1345               if (skip_line_comment (buffer))
1346                 cpp_warning_with_line (pfile, result->line, result->col,
1347                                        "multi-line comment");
1348
1349               if (!pfile->state.save_comments)
1350                 {
1351                   result->flags |= PREV_WHITE;
1352                   goto next_char;
1353                 }
1354
1355               /* Save the comment as a token in its own right.  */
1356               save_comment (pfile, result, comment_start);
1357             }
1358         }
1359       break;
1360
1361     case '<':
1362       if (pfile->state.angled_headers)
1363         {
1364           result->type = CPP_HEADER_NAME;
1365           c = '>';              /* terminator.  */
1366           goto make_string;
1367         }
1368
1369       result->type = CPP_LESS;
1370       c = get_effective_char (buffer);
1371       if (c == '=')
1372         ACCEPT_CHAR (CPP_LESS_EQ);
1373       else if (c == '<')
1374         {
1375           ACCEPT_CHAR (CPP_LSHIFT);
1376           if (get_effective_char (buffer) == '=')
1377             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1378         }
1379       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1380         {
1381           ACCEPT_CHAR (CPP_MIN);
1382           if (get_effective_char (buffer) == '=')
1383             ACCEPT_CHAR (CPP_MIN_EQ);
1384         }
1385       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1386         {
1387           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1388           result->flags |= DIGRAPH;
1389         }
1390       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1391         {
1392           ACCEPT_CHAR (CPP_OPEN_BRACE);
1393           result->flags |= DIGRAPH;
1394         }
1395       break;
1396
1397     case '>':
1398       result->type = CPP_GREATER;
1399       c = get_effective_char (buffer);
1400       if (c == '=')
1401         ACCEPT_CHAR (CPP_GREATER_EQ);
1402       else if (c == '>')
1403         {
1404           ACCEPT_CHAR (CPP_RSHIFT);
1405           if (get_effective_char (buffer) == '=')
1406             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1407         }
1408       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1409         {
1410           ACCEPT_CHAR (CPP_MAX);
1411           if (get_effective_char (buffer) == '=')
1412             ACCEPT_CHAR (CPP_MAX_EQ);
1413         }
1414       break;
1415
1416     case '.':
1417       {
1418         const unsigned char *saved_cur;
1419         cppchar_t c1;
1420
1421         /* Save state to avoid needing to pass 2 chars to parse_number.  */
1422         SAVE_STATE ();
1423         c1 = get_effective_char (buffer);
1424         /* All known character sets have 0...9 contiguous.  */
1425         if (c1 >= '0' && c1 <= '9')
1426           {
1427             RESTORE_STATE ();
1428             goto make_number;
1429           }
1430
1431         result->type = CPP_DOT;
1432         if (c1 == '.')
1433           {
1434             if (get_effective_char (buffer) == '.')
1435               ACCEPT_CHAR (CPP_ELLIPSIS);
1436             else
1437               {
1438                 buffer->read_ahead = EOF;
1439                 RESTORE_STATE ();
1440               }
1441           }
1442         else if (c1 == '*' && CPP_OPTION (pfile, cplusplus))
1443           ACCEPT_CHAR (CPP_DOT_STAR);
1444       }
1445       break;
1446
1447     case '%':
1448       result->type = CPP_MOD;
1449       c = get_effective_char (buffer);
1450       if (c == '=')
1451         ACCEPT_CHAR (CPP_MOD_EQ);
1452       else if (CPP_OPTION (pfile, digraphs))
1453         {
1454           if (c == ':')
1455             {
1456               result->flags |= DIGRAPH;
1457               ACCEPT_CHAR (CPP_HASH);
1458               if (get_effective_char (buffer) == '%')
1459                 check_long_token (buffer, result, ':', CPP_PASTE);
1460             }
1461           else if (c == '>')
1462             {
1463               result->flags |= DIGRAPH;
1464               ACCEPT_CHAR (CPP_CLOSE_BRACE);
1465             }
1466         }
1467       break;
1468
1469     case '+':
1470       result->type = CPP_PLUS;
1471       c = get_effective_char (buffer);
1472       if (c == '=')
1473         ACCEPT_CHAR (CPP_PLUS_EQ);
1474       else if (c == '+')
1475         ACCEPT_CHAR (CPP_PLUS_PLUS);
1476       break;
1477
1478     case '-':
1479       result->type = CPP_MINUS;
1480       c = get_effective_char (buffer);
1481       if (c == '>')
1482         {
1483           ACCEPT_CHAR (CPP_DEREF);
1484           if (CPP_OPTION (pfile, cplusplus)
1485               && get_effective_char (buffer) == '*')
1486             ACCEPT_CHAR (CPP_DEREF_STAR);
1487         }
1488       else if (c == '=')
1489         ACCEPT_CHAR (CPP_MINUS_EQ);
1490       else if (c == '-')
1491         ACCEPT_CHAR (CPP_MINUS_MINUS);
1492       break;
1493
1494     case '*':
1495       result->type = CPP_MULT;
1496       if (get_effective_char (buffer) == '=')
1497         ACCEPT_CHAR (CPP_MULT_EQ);
1498       break;
1499
1500     case '=':
1501       result->type = CPP_EQ;
1502       if (get_effective_char (buffer) == '=')
1503         ACCEPT_CHAR (CPP_EQ_EQ);
1504       break;
1505
1506     case '!':
1507       result->type = CPP_NOT;
1508       if (get_effective_char (buffer) == '=')
1509         ACCEPT_CHAR (CPP_NOT_EQ);
1510       break;
1511
1512     case '&':
1513       result->type = CPP_AND;
1514       c = get_effective_char (buffer);
1515       if (c == '=')
1516         ACCEPT_CHAR (CPP_AND_EQ);
1517       else if (c == '&')
1518         ACCEPT_CHAR (CPP_AND_AND);
1519       break;
1520
1521     case '#':
1522       result->type = CPP_HASH;
1523       if (get_effective_char (buffer) == '#')
1524         ACCEPT_CHAR (CPP_PASTE);
1525       break;
1526
1527     case '|':
1528       result->type = CPP_OR;
1529       c = get_effective_char (buffer);
1530       if (c == '=')
1531         ACCEPT_CHAR (CPP_OR_EQ);
1532       else if (c == '|')
1533         ACCEPT_CHAR (CPP_OR_OR);
1534       break;
1535
1536     case '^':
1537       result->type = CPP_XOR;
1538       if (get_effective_char (buffer) == '=')
1539         ACCEPT_CHAR (CPP_XOR_EQ);
1540       break;
1541
1542     case ':':
1543       result->type = CPP_COLON;
1544       c = get_effective_char (buffer);
1545       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1546         ACCEPT_CHAR (CPP_SCOPE);
1547       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1548         {
1549           result->flags |= DIGRAPH;
1550           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1551         }
1552       break;
1553
1554     case '~': result->type = CPP_COMPL; break;
1555     case ',': result->type = CPP_COMMA; break;
1556     case '(': result->type = CPP_OPEN_PAREN; break;
1557     case ')': result->type = CPP_CLOSE_PAREN; break;
1558     case '[': result->type = CPP_OPEN_SQUARE; break;
1559     case ']': result->type = CPP_CLOSE_SQUARE; break;
1560     case '{': result->type = CPP_OPEN_BRACE; break;
1561     case '}': result->type = CPP_CLOSE_BRACE; break;
1562     case ';': result->type = CPP_SEMICOLON; break;
1563
1564     case '@':
1565       if (CPP_OPTION (pfile, objc))
1566         {
1567           /* In Objective C, '@' may begin keywords or strings, like
1568              @keyword or @"string".  It would be nice to call
1569              get_effective_char here and test the result.  However, we
1570              would then need to pass 2 characters to parse_identifier,
1571              making it ugly and slowing down its main loop.  Instead,
1572              we assume we have an identifier, and recover if not.  */
1573           result->type = CPP_NAME;
1574           result->val.node = parse_identifier (pfile, c);
1575           if (result->val.node->length != 1)
1576             break;
1577
1578           /* OK, so it wasn't an identifier.  Maybe a string?  */
1579           if (buffer->read_ahead == '"')
1580             {
1581               c = '"';
1582               ACCEPT_CHAR (CPP_OSTRING);
1583               goto make_string;
1584             }
1585         }
1586       goto random_char;
1587
1588     random_char:
1589     default:
1590       result->type = CPP_OTHER;
1591       result->val.aux = c;
1592       break;
1593     }
1594 }
1595
1596 /*
1597  *  The tokenizer's main loop.  Returns a token list, representing a
1598  *  logical line in the input file.  On EOF after some tokens have
1599  *  been processed, we return immediately.  Then in next call, or if
1600  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1601  *  token is placed in the list.
1602  */
1603
1604 static void
1605 lex_line (pfile, list)
1606      cpp_reader *pfile;
1607      cpp_toklist *list;
1608 {
1609   unsigned int first_token;
1610   cpp_token *cur_token, *first;
1611   cpp_buffer *buffer = pfile->buffer;
1612
1613   if (!(list->flags & LIST_OFFSET))
1614     (abort) ();
1615
1616   pfile->state.in_lex_line = 1;
1617   if (pfile->buffer->cur == pfile->buffer->buf)
1618     list->flags |= BEG_OF_FILE;
1619
1620  retry:
1621   pfile->state.in_directive = 0;
1622   pfile->state.angled_headers = 0;
1623   pfile->state.skip_newlines = 1;
1624   pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1625   first_token = list->tokens_used;
1626   list->file = buffer->nominal_fname;
1627
1628   do
1629     {
1630       if (list->tokens_used >= list->tokens_cap)
1631         _cpp_expand_token_space (list, 256);
1632
1633       cur_token = list->tokens + list->tokens_used;
1634       lex_token (pfile, cur_token);
1635
1636       if (pfile->state.skip_newlines)
1637         {
1638           pfile->state.skip_newlines = 0;
1639           list->line = buffer->lineno;
1640           if (cur_token->type == CPP_HASH)
1641             {
1642               pfile->state.in_directive = 1;
1643               pfile->state.save_comments = 0;
1644               pfile->state.indented = cur_token->flags & PREV_WHITE;
1645             }
1646           /* 6.10.3.10: Within the sequence of preprocessing tokens
1647              making up the invocation of a function-like macro, new
1648              line is considered a normal white-space character.  */
1649           else if (first_token != 0)
1650             cur_token->flags |= PREV_WHITE;
1651         }
1652       else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1653         {
1654           if (cur_token->type == CPP_NUMBER)
1655             list->directive = _cpp_check_linemarker (pfile, cur_token);
1656           else
1657             list->directive = _cpp_check_directive (pfile, cur_token);
1658         }
1659
1660       /* _cpp_get_line assumes list->tokens_used refers to the current
1661          token being lexed.  So do this after _cpp_check_directive to
1662          get the warnings therein correct.  */
1663       list->tokens_used++;
1664     }
1665   while (cur_token->type != CPP_EOF);
1666
1667   /* All tokens are allocated, so the memory location is fixed.  */
1668   first = &list->tokens[first_token];
1669   first->flags |= BOL;
1670   pfile->first_directive_token = first;
1671
1672   /* Don't complain about the null directive, nor directives in
1673      assembly source: we don't know where the comments are, and # may
1674      introduce assembler pseudo-ops.  Don't complain about invalid
1675      directives in skipped conditional groups (6.10 p4).  */
1676   if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1677       && !CPP_OPTION (pfile, lang_asm))
1678     {
1679       if (cur_token > first + 1)
1680         {
1681           if (first[1].type == CPP_NAME)
1682             cpp_error_with_line (pfile, first->line, first->col,
1683                                  "invalid preprocessing directive #%s",
1684                                  first[1].val.node->name);
1685           else
1686             cpp_error_with_line (pfile, first->line, first->col,
1687                                  "invalid preprocessing directive");
1688         }
1689
1690       /* Discard this line to prevent further errors from cc1.  */
1691       _cpp_clear_toklist (list);
1692       goto retry;
1693     }
1694
1695   /* Drop the EOF unless really at EOF or in a directive.  */
1696   if (cur_token != first && !KNOWN_DIRECTIVE (list)
1697       && pfile->done_initializing)
1698     list->tokens_used--;
1699
1700   pfile->state.in_lex_line = 0;
1701 }
1702
1703 /* Write the spelling of a token TOKEN, with any appropriate
1704    whitespace before it, to FP.  PREV is the previous token, which
1705    is used to determine if we need to shove in an extra space in order
1706    to avoid accidental token paste.  If WHITE is 0, do not insert any
1707    leading whitespace.  */
1708 static void
1709 output_token (pfile, fp, token, prev, white)
1710      cpp_reader *pfile;
1711      FILE *fp;
1712      const cpp_token *token, *prev;
1713      int white;
1714 {
1715   if (white)
1716     {
1717       int dummy;
1718
1719       if (token->col && (token->flags & BOL))
1720         {
1721           /* Supply enough whitespace to put this token in its original
1722              column.  Don't bother trying to reconstruct tabs; we can't
1723              get it right in general, and nothing ought to care.  (Yes,
1724              some things do care; the fault lies with them.)  */
1725           unsigned int spaces = token->col - 1;
1726
1727           while (spaces--)
1728             putc (' ', fp);
1729         }
1730       else if (token->flags & PREV_WHITE)
1731         putc (' ', fp);
1732       else
1733       /* Check for and prevent accidental token pasting.
1734          In addition to the cases handled by can_paste, consider
1735
1736          a + ++b - if there is not a space between the + and ++, it
1737          will be misparsed as a++ + b.  But + ## ++ doesn't produce
1738          a valid token.  */
1739         if (prev
1740             && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1741                 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1742                 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1743         putc (' ', fp);
1744     }
1745
1746   switch (TOKEN_SPELL (token))
1747     {
1748     case SPELL_OPERATOR:
1749       {
1750         const unsigned char *spelling;
1751
1752         if (token->flags & DIGRAPH)
1753           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1754         else if (token->flags & NAMED_OP)
1755           goto spell_ident;
1756         else
1757           spelling = TOKEN_NAME (token);
1758
1759         ufputs (spelling, fp);
1760       }
1761       break;
1762
1763     case SPELL_IDENT:
1764       spell_ident:
1765       ufputs (token->val.node->name, fp);
1766       break;
1767
1768     case SPELL_STRING:
1769       {
1770         int left, right, tag;
1771         switch (token->type)
1772           {
1773           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1774           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1775           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1776           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1777           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1778           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1779           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1780           }
1781         if (tag) putc (tag, fp);
1782         if (left) putc (left, fp);
1783         fwrite (token->val.str.text, 1, token->val.str.len, fp);
1784         if (right) putc (right, fp);
1785       }
1786       break;
1787
1788     case SPELL_CHAR:
1789       putc (token->val.aux, fp);
1790       break;
1791
1792     case SPELL_NONE:
1793       /* Placemarker or EOF - no output.  (Macro args are handled
1794          elsewhere.  */
1795       break;
1796     }
1797 }
1798
1799 /* Dump the original user's spelling of argument index ARG_NO to the
1800    macro whose expansion is LIST.  */
1801 static void
1802 dump_param_spelling (fp, list, arg_no)
1803      FILE *fp;
1804      const cpp_toklist *list;
1805      unsigned int arg_no;
1806 {
1807   const U_CHAR *param = list->namebuf;
1808
1809   while (arg_no--)
1810     param += ustrlen (param) + 1;
1811   ufputs (param, fp);
1812 }
1813
1814 /* Output all the tokens of LIST, starting at TOKEN, to FP.  */
1815 void
1816 cpp_output_list (pfile, fp, list, token)
1817      cpp_reader *pfile;
1818      FILE *fp;
1819      const cpp_toklist *list;
1820      const cpp_token *token;
1821 {
1822   const cpp_token *limit = list->tokens + list->tokens_used;
1823   const cpp_token *prev = 0;
1824   int white = 0;
1825
1826   while (token < limit)
1827     {
1828       /* XXX Find some way we can write macro args from inside
1829          output_token/spell_token.  */
1830       if (token->type == CPP_MACRO_ARG)
1831         {
1832           if (white && token->flags & PREV_WHITE)
1833             putc (' ', fp);
1834           if (token->flags & STRINGIFY_ARG)
1835             putc ('#', fp);
1836           dump_param_spelling (fp, list, token->val.aux);
1837         }
1838       else
1839         output_token (pfile, fp, token, prev, white);
1840       if (token->flags & PASTE_LEFT)
1841         fputs (" ##", fp);
1842       prev = token;
1843       token++;
1844       white = 1;
1845     }
1846 }
1847
1848
1849 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1850    already contain the enough space to hold the token's spelling.
1851    Returns a pointer to the character after the last character
1852    written.  */
1853
1854 static unsigned char *
1855 spell_token (pfile, token, buffer)
1856      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1857      const cpp_token *token;
1858      unsigned char *buffer;
1859 {
1860   switch (TOKEN_SPELL (token))
1861     {
1862     case SPELL_OPERATOR:
1863       {
1864         const unsigned char *spelling;
1865         unsigned char c;
1866
1867         if (token->flags & DIGRAPH)
1868           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1869         else if (token->flags & NAMED_OP)
1870           goto spell_ident;
1871         else
1872           spelling = TOKEN_NAME (token);
1873
1874         while ((c = *spelling++) != '\0')
1875           *buffer++ = c;
1876       }
1877       break;
1878
1879     case SPELL_IDENT:
1880       spell_ident:
1881       memcpy (buffer, token->val.node->name, token->val.node->length);
1882       buffer += token->val.node->length;
1883       break;
1884
1885     case SPELL_STRING:
1886       {
1887         int left, right, tag;
1888         switch (token->type)
1889           {
1890           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1891           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1892           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1893           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1894           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1895           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1896           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1897           }
1898         if (tag) *buffer++ = tag;
1899         if (left) *buffer++ = left;
1900         memcpy (buffer, token->val.str.text, token->val.str.len);
1901         buffer += token->val.str.len;
1902         if (right) *buffer++ = right;
1903       }
1904       break;
1905
1906     case SPELL_CHAR:
1907       *buffer++ = token->val.aux;
1908       break;
1909
1910     case SPELL_NONE:
1911       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1912       break;
1913     }
1914
1915   return buffer;
1916 }
1917
1918 /* Macro expansion algorithm.
1919
1920 Macro expansion is implemented by a single-pass algorithm; there are
1921 no rescan passes involved.  cpp_get_token expands just enough to be
1922 able to return a token to the caller, a consequence is that when it
1923 returns the preprocessor can be in a state of mid-expansion.  The
1924 algorithm does not work by fully expanding a macro invocation into
1925 some kind of token list, and then returning them one by one.
1926
1927 Our expansion state is recorded in a context stack.  We start out with
1928 a single context on the stack, let's call it base context.  This
1929 consists of the token list returned by lex_line that forms the next
1930 logical line in the source file.
1931
1932 The current level in the context stack is stored in the cur_context
1933 member of the cpp_reader structure.  The context it references keeps,
1934 amongst other things, a count of how many tokens form that context and
1935 our position within those tokens.
1936
1937 Fundamentally, calling cpp_get_token will return the next token from
1938 the current context.  If we're at the end of the current context, that
1939 context is popped from the stack first, unless it is the base context,
1940 in which case the next logical line is lexed from the source file.
1941
1942 However, before returning the token, if it is a CPP_NAME token
1943 _cpp_get_token checks to see if it is a macro and if it is enabled.
1944 Each time it encounters a macro name, it calls push_macro_context.
1945 This function checks that the macro should be expanded (with
1946 is_macro_enabled), and if so pushes a new macro context on the stack
1947 which becomes the current context.  It then loops back to read the
1948 first token of the macro context.
1949
1950 A macro context basically consists of the token list representing the
1951 macro's replacement list, which was saved in the hash table by
1952 save_macro_expansion when its #define statement was parsed.  If the
1953 macro is function-like, it also contains the tokens that form the
1954 arguments to the macro.  I say more about macro arguments below, but
1955 for now just saying that each argument is a set of pointers to tokens
1956 is enough.
1957
1958 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1959 token.  This represents an argument passed to the macro, with the
1960 argument number stored in the token's AUX field.  The argument should
1961 be substituted, this is achieved by pushing an "argument context".  An
1962 argument context is just refers to the tokens forming the argument,
1963 which are obtained directly from the macro context.  The STRINGIFY
1964 flag on a CPP_MACRO_ARG token indicates that the argument should be
1965 stringified.
1966
1967 Here's a few simple rules the context stack obeys:-
1968
1969   1) The lex_line token list is always context zero.
1970
1971   2) Context 1, if it exists, must be a macro context.
1972
1973   3) An argument context can only appear above a macro context.
1974
1975   4) A macro context can appear above the base context, another macro
1976   context, or an argument context.
1977
1978   5) These imply that the minimal level of an argument context is 2.
1979
1980 The only tricky thing left is ensuring that macros are enabled and
1981 disabled correctly.  The algorithm controls macro expansion by the
1982 level of the context a token is taken from in the context stack.  If a
1983 token is taken from a level equal to no_expand_level (a member of
1984 struct cpp_reader), no expansion is performed.
1985
1986 When popping a context off the stack, if no_expand_level equals the
1987 level of the popped context, it is reduced by one to match the new
1988 context level, so that expansion is still disabled.  It does not
1989 increase if a context is pushed, though.  It starts out life as
1990 UINT_MAX, which has the effect that initially macro expansion is
1991 enabled.  I explain how this mechanism works below.
1992
1993 The standard requires:-
1994
1995   1) Arguments to be fully expanded before substitution.
1996
1997   2) Stringified arguments to not be expanded, nor the tokens
1998   immediately surrounding a ## operator.
1999
2000   3) Continual rescanning until there are no more macros left to
2001   replace.
2002
2003   4) Once a macro has been expanded in stage 1) or 3), it cannot be
2004   expanded again during later rescans.  This prevents infinite
2005   recursion.
2006
2007 The first thing to observe is that stage 3) is mostly redundant.
2008 Since a macro is disabled once it has been expanded, how can a rescan
2009 find an unexpanded macro name?  There are only two cases where this is
2010 possible:-
2011
2012   a) If the macro name results from a token paste operation.
2013
2014   b) If the macro in question is a function-like macro that hasn't
2015   already been expanded because previously there was not the required
2016   '(' token immediately following it.  This is only possible when an
2017   argument is substituted, and after substitution the last token of
2018   the argument can bind with a parenthesis appearing in the tokens
2019   following the substitution.  Note that if the '(' appears within the
2020   argument, the ')' must too, as expanding macro arguments cannot
2021   "suck in" tokens outside the argument.
2022
2023 So we tackle this as follows.  When parsing the macro invocation for
2024 arguments, we record the tokens forming each argument as a list of
2025 pointers to those tokens.  We do not expand any tokens that are "raw",
2026 i.e. directly from the macro invocation, but other tokens that come
2027 from (nested) argument substitution are fully expanded.
2028
2029 This is achieved by setting the no_expand_level to that of the macro
2030 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
2031 forming an argument, because parse_args (indirectly) calls
2032 get_raw_token which automatically pushes argument contexts and traces
2033 into them.  Since these contexts are at a higher level than the
2034 no_expand_level, they get fully macro expanded.
2035
2036 "Raw" and non-raw tokens are separated in arguments by null pointers,
2037 with the policy that the initial state of an argument is raw.  If the
2038 first token is not raw, it should be preceded by a null pointer.  When
2039 tracing through the tokens of an argument context, each time
2040 get_raw_token encounters a null pointer, it toggles the flag
2041 CONTEXT_RAW.
2042
2043 This flag, when set, indicates to is_macro_disabled that we are
2044 reading raw tokens which should be macro-expanded.  Similarly, if
2045 clear, is_macro_disabled suppresses re-expansion.
2046
2047 It's probably time for an example.
2048
2049 #define hash #
2050 #define str(x) #x
2051 #define xstr(y) str(y hash)
2052 str(hash)                       // "hash"
2053 xstr(hash)                      // "# hash"
2054
2055 In the invocation of str, parse_args turns off macro expansion and so
2056 parses the argument as <hash>.  This is the only token (pointer)
2057 passed as the argument to str.  Since <hash> is raw there is no need
2058 for an initial null pointer.  stringify_arg is called from
2059 get_raw_token when tracing through the expansion of str, since the
2060 argument has the STRINGIFY flag set.  stringify_arg turns off
2061 macro_expansion by setting the no_expand_level to that of the argument
2062 context.  Thus it gets the token <hash> and stringifies it to "hash"
2063 correctly.
2064
2065 Similary xstr is passed <hash>.  However, when parse_args is parsing
2066 the invocation of str() in xstr's expansion, get_raw_token encounters
2067 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
2068 an argument context, and enters the tokens of the argument,
2069 i.e. <hash>.  This is at a higher context level than parse_args
2070 disabled, and so is_macro_disabled permits expansion of it and a macro
2071 context is pushed on top of the argument context.  This contains the
2072 <#> token, and the end result is that <hash> is macro expanded.
2073 However, after popping off the argument context, the <hash> of xstr's
2074 expansion does not get macro expanded because we're back at the
2075 no_expand_level.  The end result is that the argument passed to str is
2076 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
2077 raw, <#> is not raw, but then <hash> is.
2078
2079 */
2080
2081
2082 /* Free the storage allocated for macro arguments.  */
2083 static void
2084 free_macro_args (args)
2085      macro_args *args;
2086 {
2087   if (args->tokens)
2088     free ((PTR) args->tokens);
2089   free (args->ends);
2090   free (args);
2091 }
2092
2093 /* Determines if a macro has been already used (and is therefore
2094    disabled).  */
2095 static int
2096 is_macro_disabled (pfile, expansion, token)
2097      cpp_reader *pfile;
2098      const cpp_toklist *expansion;
2099      const cpp_token *token;
2100 {
2101   cpp_context *context = CURRENT_CONTEXT (pfile);
2102
2103   /* Arguments on either side of ## are inserted in place without
2104      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2105      occurs during a later rescan pass.  The effect is that we expand
2106      iff we would as part of the macro's expansion list, so we should
2107      drop to the macro's context.  */
2108   if (IS_ARG_CONTEXT (context))
2109     {
2110       if (token->flags & PASTED)
2111         context--;
2112       else if (!(context->flags & CONTEXT_RAW))
2113         return 1;
2114       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2115         context--;
2116     }
2117
2118   /* Have we already used this macro?  */
2119   while (context->level > 0)
2120     {
2121       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2122         return 1;
2123       /* Raw argument tokens are judged based on the token list they
2124          came from.  */
2125       if (context->flags & CONTEXT_RAW)
2126         context = pfile->contexts + context->level;
2127       else
2128         context--;
2129     }
2130
2131   /* Function-like macros may be disabled if the '(' is not in the
2132      current context.  We check this without disrupting the context
2133      stack.  */
2134   if (expansion->paramc >= 0)
2135     {
2136       const cpp_token *next;
2137       unsigned int prev_nme;
2138
2139       context = CURRENT_CONTEXT (pfile);
2140       /* Drop down any contexts we're at the end of: the '(' may
2141          appear in lower macro expansions, or in the rest of the file.  */
2142       while (context->posn == context->count && context > pfile->contexts)
2143         {
2144           context--;
2145           /* If we matched, we are disabled, as we appear in the
2146              expansion of each macro we meet.  */
2147           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2148             return 1;
2149         }
2150
2151       prev_nme = pfile->no_expand_level;
2152       pfile->no_expand_level = context - pfile->contexts;
2153       next = _cpp_get_token (pfile);
2154       restore_macro_expansion (pfile, prev_nme);
2155       if (next->type != CPP_OPEN_PAREN)
2156         {
2157           _cpp_push_token (pfile, next);
2158           if (CPP_WTRADITIONAL (pfile))
2159             cpp_warning (pfile,
2160          "function macro %s must be used with arguments in traditional C",
2161                          token->val.node->name);
2162           return 1;
2163         }
2164     }
2165
2166   return 0;
2167 }
2168
2169 /* Add a token to the set of tokens forming the arguments to the macro
2170    being parsed in parse_args.  */
2171 static void
2172 save_token (args, token)
2173      macro_args *args;
2174      const cpp_token *token;
2175 {
2176   if (args->used == args->capacity)
2177     {
2178       args->capacity += args->capacity + 100;
2179       args->tokens = (const cpp_token **)
2180         xrealloc ((PTR) args->tokens,
2181                   args->capacity * sizeof (const cpp_token *));
2182     }
2183   args->tokens[args->used++] = token;
2184 }
2185
2186 /* Take and save raw tokens until we finish one argument.  Empty
2187    arguments are saved as a single CPP_PLACEMARKER token.  */
2188 static const cpp_token *
2189 parse_arg (pfile, var_args, paren_context, args, pcount)
2190      cpp_reader *pfile;
2191      int var_args;
2192      unsigned int paren_context;
2193      macro_args *args;
2194      unsigned int *pcount;
2195 {
2196   const cpp_token *token;
2197   unsigned int paren = 0, count = 0;
2198   int raw, was_raw = 1;
2199
2200   for (count = 0;; count++)
2201     {
2202       token = _cpp_get_token (pfile);
2203
2204       switch (token->type)
2205         {
2206         default:
2207           break;
2208
2209         case CPP_OPEN_PAREN:
2210           paren++;
2211           break;
2212
2213         case CPP_CLOSE_PAREN:
2214           if (paren-- != 0)
2215             break;
2216           goto out;
2217
2218         case CPP_COMMA:
2219           /* Commas are not terminators within parantheses or var_args.  */
2220           if (paren || var_args)
2221             break;
2222           goto out;
2223
2224         case CPP_EOF:           /* Error reported by caller.  */
2225           goto out;
2226         }
2227
2228       raw = pfile->cur_context <= paren_context;
2229       if (raw != was_raw)
2230         {
2231           was_raw = raw;
2232           save_token (args, 0);
2233           count++;
2234         }
2235       save_token (args, token);
2236     }
2237
2238  out:
2239   if (count == 0)
2240     {
2241       /* Duplicate the placemarker.  Then we can set its flags and
2242          position and safely be using more than one.  */
2243       save_token (args, duplicate_token (pfile, &placemarker_token));
2244       count++;
2245     }
2246
2247   *pcount = count;
2248   return token;
2249 }
2250
2251 /* This macro returns true if the argument starting at offset O of arglist
2252    A is empty - that is, it's either a single PLACEMARKER token, or a null
2253    pointer followed by a PLACEMARKER.  */
2254
2255 #define empty_argument(A, O) \
2256  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2257                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2258
2259 /* Parse the arguments making up a macro invocation.  Nested arguments
2260    are automatically macro expanded, but immediate macros are not
2261    expanded; this enables e.g. operator # to work correctly.  Returns
2262    non-zero on error.  */
2263 static int
2264 parse_args (pfile, hp, args)
2265      cpp_reader *pfile;
2266      cpp_hashnode *hp;
2267      macro_args *args;
2268 {
2269   const cpp_token *token;
2270   const cpp_toklist *macro;
2271   unsigned int total = 0;
2272   unsigned int paren_context = pfile->cur_context;
2273   int argc = 0;
2274
2275   macro = hp->value.expansion;
2276   do
2277     {
2278       unsigned int count;
2279
2280       token = parse_arg (pfile, (argc + 1 == macro->paramc
2281                                  && (macro->flags & VAR_ARGS)),
2282                          paren_context, args, &count);
2283       if (argc < macro->paramc)
2284         {
2285           total += count;
2286           args->ends[argc] = total;
2287         }
2288       argc++;
2289     }
2290   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2291
2292   if (token->type == CPP_EOF)
2293     {
2294       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2295       return 1;
2296     }
2297   else if (argc < macro->paramc)
2298     {
2299       /* A rest argument is allowed to not appear in the invocation at all.
2300          e.g. #define debug(format, args...) ...
2301          debug("string");
2302          This is exactly the same as if the rest argument had received no
2303          tokens - debug("string",);  This extension is deprecated.  */
2304
2305       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2306         {
2307           /* Duplicate the placemarker.  Then we can set its flags and
2308              position and safely be using more than one.  */
2309           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2310           pm->flags = VOID_REST;
2311           save_token (args, pm);
2312           args->ends[argc] = total + 1;
2313
2314           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2315             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2316
2317           return 0;
2318         }
2319       else
2320         {
2321           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2322           return 1;
2323         }
2324     }
2325   /* An empty argument to an empty function-like macro is fine.  */
2326   else if (argc > macro->paramc
2327            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2328     {
2329       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2330       return 1;
2331     }
2332
2333   return 0;
2334 }
2335
2336 /* Adds backslashes before all backslashes and double quotes appearing
2337    in strings.  Non-printable characters are converted to octal.  */
2338 static U_CHAR *
2339 quote_string (dest, src, len)
2340      U_CHAR *dest;
2341      const U_CHAR *src;
2342      unsigned int len;
2343 {
2344   while (len--)
2345     {
2346       U_CHAR c = *src++;
2347
2348       if (c == '\\' || c == '"')
2349         {
2350           *dest++ = '\\';
2351           *dest++ = c;
2352         }
2353       else
2354         {
2355           if (ISPRINT (c))
2356             *dest++ = c;
2357           else
2358             {
2359               sprintf ((char *) dest, "\\%03o", c);
2360               dest += 4;
2361             }
2362         }
2363     }
2364
2365   return dest;
2366 }
2367
2368 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2369    CPP_STRING token containing TEXT in quoted form.  */
2370 static cpp_token *
2371 make_string_token (token, text, len)
2372      cpp_token *token;
2373      const U_CHAR *text;
2374      unsigned int len;
2375 {
2376   U_CHAR *buf;
2377
2378   buf = (U_CHAR *) xmalloc (len * 4);
2379   token->type = CPP_STRING;
2380   token->flags = 0;
2381   token->val.str.text = buf;
2382   token->val.str.len = quote_string (buf, text, len) - buf;
2383   return token;
2384 }
2385
2386 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2387    evaluating to NUMBER.  */
2388 static cpp_token *
2389 alloc_number_token (pfile, number)
2390      cpp_reader *pfile;
2391      int number;
2392 {
2393   cpp_token *result;
2394   char *buf;
2395
2396   result = get_temp_token (pfile);
2397   buf = xmalloc (20);
2398   sprintf (buf, "%d", number);
2399
2400   result->type = CPP_NUMBER;
2401   result->flags = 0;
2402   result->val.str.text = (U_CHAR *) buf;
2403   result->val.str.len = strlen (buf);
2404   return result;
2405 }
2406
2407 /* Returns a temporary token from the temporary token store of PFILE.  */
2408 static cpp_token *
2409 get_temp_token (pfile)
2410      cpp_reader *pfile;
2411 {
2412   if (pfile->temp_used == pfile->temp_alloced)
2413     {
2414       if (pfile->temp_used == pfile->temp_cap)
2415         {
2416           pfile->temp_cap += pfile->temp_cap + 20;
2417           pfile->temp_tokens = (cpp_token **) xrealloc
2418             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2419         }
2420       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2421         (sizeof (cpp_token));
2422     }
2423
2424   return pfile->temp_tokens[pfile->temp_used++];
2425 }
2426
2427 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2428 static void
2429 release_temp_tokens (pfile)
2430      cpp_reader *pfile;
2431 {
2432   while (pfile->temp_used)
2433     {
2434       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2435
2436       if (TOKEN_SPELL (token) == SPELL_STRING)
2437         {
2438           free ((char *) token->val.str.text);
2439           token->val.str.text = 0;
2440         }
2441     }
2442 }
2443
2444 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2445 void
2446 _cpp_free_temp_tokens (pfile)
2447      cpp_reader *pfile;
2448 {
2449   if (pfile->temp_tokens)
2450     {
2451       /* It is possible, though unlikely (looking for '(' of a funlike
2452          macro into EOF), that we haven't released the tokens yet.  */
2453       release_temp_tokens (pfile);
2454       while (pfile->temp_alloced)
2455         free (pfile->temp_tokens[--pfile->temp_alloced]);
2456       free (pfile->temp_tokens);
2457     }
2458
2459   if (pfile->date)
2460     {
2461       free ((char *) pfile->date->val.str.text);
2462       free (pfile->date);
2463       free ((char *) pfile->time->val.str.text);
2464       free (pfile->time);
2465     }
2466 }
2467
2468 /* Copy TOKEN into a temporary token from PFILE's store.  */
2469 static cpp_token *
2470 duplicate_token (pfile, token)
2471      cpp_reader *pfile;
2472      const cpp_token *token;
2473 {
2474   cpp_token *result = get_temp_token (pfile);
2475
2476   *result = *token;
2477   if (TOKEN_SPELL (token) == SPELL_STRING)
2478     {
2479       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2480       memcpy (buff, token->val.str.text, token->val.str.len);
2481       result->val.str.text = buff;
2482     }
2483   return result;
2484 }
2485
2486 /* Determine whether two tokens can be pasted together, and if so,
2487    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2488    be pasted, or the appropriate type for the merged token if they
2489    can.  */
2490 static enum cpp_ttype
2491 can_paste (pfile, token1, token2, digraph)
2492      cpp_reader * pfile;
2493      const cpp_token *token1, *token2;
2494      int* digraph;
2495 {
2496   enum cpp_ttype a = token1->type, b = token2->type;
2497   int cxx = CPP_OPTION (pfile, cplusplus);
2498
2499   /* Treat named operators as if they were ordinary NAMEs.  */
2500   if (token1->flags & NAMED_OP)
2501     a = CPP_NAME;
2502   if (token2->flags & NAMED_OP)
2503     b = CPP_NAME;
2504
2505   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2506     return a + (CPP_EQ_EQ - CPP_EQ);
2507
2508   switch (a)
2509     {
2510     case CPP_GREATER:
2511       if (b == a) return CPP_RSHIFT;
2512       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2513       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2514       break;
2515     case CPP_LESS:
2516       if (b == a) return CPP_LSHIFT;
2517       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2518       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2519       if (CPP_OPTION (pfile, digraphs))
2520         {
2521           if (b == CPP_COLON)
2522             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2523           if (b == CPP_MOD)
2524             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2525         }
2526       break;
2527
2528     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2529     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2530     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2531
2532     case CPP_MINUS:
2533       if (b == a)               return CPP_MINUS_MINUS;
2534       if (b == CPP_GREATER)     return CPP_DEREF;
2535       break;
2536     case CPP_COLON:
2537       if (b == a && cxx)        return CPP_SCOPE;
2538       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2539         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2540       break;
2541
2542     case CPP_MOD:
2543       if (CPP_OPTION (pfile, digraphs))
2544         {
2545           if (b == CPP_GREATER)
2546             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2547           if (b == CPP_COLON)
2548             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2549         }
2550       break;
2551     case CPP_DEREF:
2552       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2553       break;
2554     case CPP_DOT:
2555       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2556       if (b == CPP_NUMBER)      return CPP_NUMBER;
2557       break;
2558
2559     case CPP_HASH:
2560       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2561         /* %:%: digraph */
2562         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2563       break;
2564
2565     case CPP_NAME:
2566       if (b == CPP_NAME)        return CPP_NAME;
2567       if (b == CPP_NUMBER
2568           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2569       if (b == CPP_CHAR
2570           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2571       if (b == CPP_STRING
2572           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2573       break;
2574
2575     case CPP_NUMBER:
2576       if (b == CPP_NUMBER)      return CPP_NUMBER;
2577       if (b == CPP_NAME)        return CPP_NUMBER;
2578       if (b == CPP_DOT)         return CPP_NUMBER;
2579       /* Numbers cannot have length zero, so this is safe.  */
2580       if ((b == CPP_PLUS || b == CPP_MINUS)
2581           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2582         return CPP_NUMBER;
2583       break;
2584
2585     case CPP_OTHER:
2586       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2587         {
2588           if (b == CPP_NAME)    return CPP_NAME;
2589           if (b == CPP_STRING)  return CPP_OSTRING;
2590         }
2591
2592     default:
2593       break;
2594     }
2595
2596   return CPP_EOF;
2597 }
2598
2599 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2600 static const cpp_token *
2601 maybe_paste_with_next (pfile, token)
2602      cpp_reader *pfile;
2603      const cpp_token *token;
2604 {
2605   cpp_token *pasted;
2606   const cpp_token *second;
2607   cpp_context *context = CURRENT_CONTEXT (pfile);
2608
2609   /* Is this token on the LHS of ## ? */
2610
2611   while ((token->flags & PASTE_LEFT)
2612          || ((context->flags & CONTEXT_PASTEL)
2613              && context->posn == context->count))
2614     {
2615       /* Suppress macro expansion for next token, but don't conflict
2616          with the other method of suppression.  If it is an argument,
2617          macro expansion within the argument will still occur.  */
2618       pfile->paste_level = pfile->cur_context;
2619       second = _cpp_get_token (pfile);
2620       pfile->paste_level = 0;
2621
2622       /* Ignore placemarker argument tokens (cannot be from an empty
2623          macro since macros are not expanded).  */
2624       if (token->type == CPP_PLACEMARKER)
2625         pasted = duplicate_token (pfile, second);
2626       else if (second->type == CPP_PLACEMARKER)
2627         {
2628           /* GCC has special extended semantics for , ## b where b is
2629              a varargs parameter: the comma disappears if b was given
2630              no actual arguments (not merely if b is an empty
2631              argument).  */
2632           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2633             pasted = duplicate_token (pfile, second);
2634           else
2635             pasted = duplicate_token (pfile, token);
2636         }
2637       else
2638         {
2639           int digraph = 0;
2640           enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2641
2642           if (type == CPP_EOF)
2643             {
2644               if (CPP_OPTION (pfile, warn_paste))
2645                 {
2646                   /* Do not complain about , ## <whatever> if
2647                      <whatever> came from a variable argument, because
2648                      the author probably intended the ## to trigger
2649                      the special extended semantics (see above).  */
2650                   if (token->type == CPP_COMMA
2651                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2652                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2653                     /* no warning */;
2654                   else
2655                     cpp_warning (pfile,
2656                         "pasting would not give a valid preprocessing token");
2657                 }
2658               _cpp_push_token (pfile, second);
2659               /* A short term hack to safely clear the PASTE_LEFT flag.  */
2660               pasted = duplicate_token (pfile, token);
2661               pasted->flags &= ~PASTE_LEFT;
2662               return pasted;
2663             }
2664
2665           if (type == CPP_NAME || type == CPP_NUMBER)
2666             {
2667               /* Join spellings.  */
2668               U_CHAR *buf, *end;
2669
2670               pasted = get_temp_token (pfile);
2671               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2672               end = spell_token (pfile, token, buf);
2673               end = spell_token (pfile, second, end);
2674               *end = '\0';
2675
2676               if (type == CPP_NAME)
2677                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2678               else
2679                 {
2680                   pasted->val.str.text = uxstrdup (buf);
2681                   pasted->val.str.len = end - buf;
2682                 }
2683             }
2684           else if (type == CPP_WCHAR || type == CPP_WSTRING
2685                    || type == CPP_OSTRING)
2686             pasted = duplicate_token (pfile, second);
2687           else
2688             {
2689               pasted = get_temp_token (pfile);
2690               pasted->val.integer = 0;
2691             }
2692
2693           pasted->type = type;
2694           pasted->flags = digraph ? DIGRAPH : 0;
2695
2696           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2697             {
2698               pasted->type = pasted->val.node->value.code;
2699               pasted->flags |= NAMED_OP;
2700             }
2701         }
2702
2703       /* The pasted token gets the whitespace flags and position of the
2704          first token, the PASTE_LEFT flag of the second token, plus the
2705          PASTED flag to indicate it is the result of a paste.  However, we
2706          want to preserve the DIGRAPH flag.  */
2707       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2708       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2709                         | (second->flags & PASTE_LEFT) | PASTED);
2710       pasted->col = token->col;
2711       pasted->line = token->line;
2712
2713       /* See if there is another token to be pasted onto the one we just
2714          constructed.  */
2715       token = pasted;
2716       context = CURRENT_CONTEXT (pfile);
2717       /* and loop */
2718     }
2719   return token;
2720 }
2721
2722 /* Convert a token sequence to a single string token according to the
2723    rules of the ISO C #-operator.  */
2724 #define INIT_SIZE 200
2725 static cpp_token *
2726 stringify_arg (pfile, token)
2727      cpp_reader *pfile;
2728      const cpp_token *token;
2729 {
2730   cpp_token *result;
2731   unsigned char *main_buf;
2732   unsigned int prev_value, backslash_count = 0;
2733   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2734
2735   push_arg_context (pfile, token);
2736   prev_value  = prevent_macro_expansion (pfile);
2737   main_buf = (unsigned char *) xmalloc (buf_cap);
2738
2739   result = get_temp_token (pfile);
2740   ASSIGN_FLAGS_AND_POS (result, token);
2741
2742   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2743     {
2744       int escape;
2745       unsigned char *buf;
2746       unsigned int len = TOKEN_LEN (token);
2747
2748       if (token->type == CPP_PLACEMARKER)
2749         continue;
2750
2751       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2752                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2753       if (escape)
2754         len *= 4 + 1;
2755
2756       if (buf_used + len > buf_cap)
2757         {
2758           buf_cap = buf_used + len + INIT_SIZE;
2759           main_buf = xrealloc (main_buf, buf_cap);
2760         }
2761
2762       if (whitespace && (token->flags & PREV_WHITE))
2763         main_buf[buf_used++] = ' ';
2764
2765       if (escape)
2766         buf = (unsigned char *) xmalloc (len);
2767       else
2768         buf = main_buf + buf_used;
2769
2770       len = spell_token (pfile, token, buf) - buf;
2771       if (escape)
2772         {
2773           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2774           free (buf);
2775         }
2776       else
2777         buf_used += len;
2778
2779       whitespace = 1;
2780       if (token->type == CPP_BACKSLASH)
2781         backslash_count++;
2782       else
2783         backslash_count = 0;
2784     }
2785
2786   /* Ignore the final \ of invalid string literals.  */
2787   if (backslash_count & 1)
2788     {
2789       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2790       buf_used--;
2791     }
2792
2793   result->type = CPP_STRING;
2794   result->val.str.text = main_buf;
2795   result->val.str.len = buf_used;
2796   restore_macro_expansion (pfile, prev_value);
2797   return result;
2798 }
2799
2800 /* Allocate more room on the context stack of PFILE.  */
2801 static void
2802 expand_context_stack (pfile)
2803      cpp_reader *pfile;
2804 {
2805   pfile->context_cap += pfile->context_cap + 20;
2806   pfile->contexts = (cpp_context *)
2807     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2808 }
2809
2810 /* Push the context of macro NODE onto the context stack.  TOKEN is
2811    the CPP_NAME token invoking the macro.  */
2812 static int
2813 push_macro_context (pfile, token)
2814      cpp_reader *pfile;
2815      const cpp_token *token;
2816 {
2817   unsigned char orig_flags;
2818   macro_args *args;
2819   cpp_context *context;
2820   cpp_hashnode *node = token->val.node;
2821
2822   /* Token's flags may change when parsing args containing a nested
2823      invocation of this macro.  */
2824   orig_flags = token->flags & (PREV_WHITE | BOL);
2825   args = 0;
2826   if (node->value.expansion->paramc >= 0)
2827     {
2828       unsigned int error, prev_nme;
2829
2830       /* Allocate room for the argument contexts, and parse them.  */
2831       args  = (macro_args *) xmalloc (sizeof (macro_args));
2832       args->ends = (unsigned int *)
2833         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2834       args->tokens = 0;
2835       args->capacity = 0;
2836       args->used = 0;
2837
2838       prev_nme = prevent_macro_expansion (pfile);
2839       pfile->args = args;
2840       error = parse_args (pfile, node, args);
2841       pfile->args = 0;
2842       restore_macro_expansion (pfile, prev_nme);
2843       if (error)
2844         {
2845           free_macro_args (args);
2846           return 1;
2847         }
2848       /* Set the level after the call to parse_args.  */
2849       args->level = pfile->cur_context;
2850     }
2851
2852   /* Now push its context.  */
2853   pfile->cur_context++;
2854   if (pfile->cur_context == pfile->context_cap)
2855     expand_context_stack (pfile);
2856
2857   context = CURRENT_CONTEXT (pfile);
2858   context->u.list = node->value.expansion;
2859   context->args = args;
2860   context->posn = 0;
2861   context->count = context->u.list->tokens_used;
2862   context->level = pfile->cur_context;
2863   context->flags = 0;
2864   context->pushed_token = 0;
2865
2866   /* Set the flags of the first token.  We know there must
2867      be one, empty macros are a single placemarker token.  */
2868   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2869
2870   return 0;
2871 }
2872
2873 /* Push an argument to the current macro onto the context stack.
2874    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2875 static void
2876 push_arg_context (pfile, token)
2877      cpp_reader *pfile;
2878      const cpp_token *token;
2879 {
2880   cpp_context *context;
2881   macro_args *args;
2882
2883   pfile->cur_context++;
2884   if (pfile->cur_context == pfile->context_cap)
2885       expand_context_stack (pfile);
2886
2887   context = CURRENT_CONTEXT (pfile);
2888   args = context[-1].args;
2889
2890   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2891   context->u.arg = args->tokens + context->count;
2892   context->count = args->ends[token->val.aux] - context->count;
2893   context->args = 0;
2894   context->posn = 0;
2895   context->level = args->level;
2896   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2897   context->pushed_token = 0;
2898
2899   /* Set the flags of the first token.  There is one.  */
2900   {
2901     const cpp_token *first = context->u.arg[0];
2902     if (!first)
2903       first = context->u.arg[1];
2904
2905     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2906                           token->flags & (PREV_WHITE | BOL));
2907   }
2908
2909   if (token->flags & PASTE_LEFT)
2910     context->flags |= CONTEXT_PASTEL;
2911   if (pfile->paste_level)
2912     context->flags |= CONTEXT_PASTER;
2913 }
2914
2915 /* "Unget" a token.  It is effectively inserted in the token queue and
2916    will be returned by the next call to get_raw_token.  */
2917 void
2918 _cpp_push_token (pfile, token)
2919      cpp_reader *pfile;
2920      const cpp_token *token;
2921 {
2922   cpp_context *context = CURRENT_CONTEXT (pfile);
2923
2924   if (context->posn > 0)
2925     {
2926       const cpp_token *prev;
2927       if (IS_ARG_CONTEXT (context))
2928         prev = context->u.arg[context->posn - 1];
2929       else
2930         prev = &context->u.list->tokens[context->posn - 1];
2931
2932       if (prev == token)
2933         {
2934           context->posn--;
2935           return;
2936         }
2937     }
2938
2939   if (context->pushed_token)
2940     cpp_ice (pfile, "two tokens pushed in a row");
2941   if (token->type != CPP_EOF)
2942     context->pushed_token = token;
2943   /* Don't push back a directive's CPP_EOF, step back instead.  */
2944   else if (pfile->cur_context == 0)
2945     pfile->contexts[0].posn--;
2946 }
2947
2948 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
2949    introducing the directive.  */
2950 static void
2951 process_directive (pfile, token)
2952      cpp_reader *pfile;
2953      const cpp_token *token;
2954 {
2955   const struct directive *d = pfile->token_list.directive;
2956   int prev_nme = 0;
2957
2958   /* Skip over the directive name.  */
2959   if (token[1].type == CPP_NAME)
2960     _cpp_get_raw_token (pfile);
2961   else if (token[1].type != CPP_NUMBER)
2962     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
2963
2964   if (! (d->flags & EXPAND))
2965     prev_nme = prevent_macro_expansion (pfile);
2966   (void) (*d->handler) (pfile);
2967   if (! (d->flags & EXPAND))
2968     restore_macro_expansion (pfile, prev_nme);
2969   _cpp_skip_rest_of_line (pfile);
2970 }
2971
2972 /* The external interface to return the next token.  All macro
2973    expansion and directive processing is handled internally, the
2974    caller only ever sees the output after preprocessing.  */
2975 const cpp_token *
2976 cpp_get_token (pfile)
2977      cpp_reader *pfile;
2978 {
2979   const cpp_token *token;
2980   /* Loop till we hit a non-directive, non-placemarker token.  */
2981   for (;;)
2982     {
2983       token = _cpp_get_token (pfile);
2984
2985       if (token->type == CPP_PLACEMARKER)
2986         continue;
2987
2988       if (token->type == CPP_HASH && token->flags & BOL
2989           && pfile->token_list.directive)
2990         {
2991           process_directive (pfile, token);
2992           continue;
2993         }
2994
2995       return token;
2996     }
2997 }
2998
2999 /* The internal interface to return the next token.  There are two
3000    differences between the internal and external interfaces: the
3001    internal interface may return a PLACEMARKER token, and it does not
3002    process directives.  */
3003 const cpp_token *
3004 _cpp_get_token (pfile)
3005      cpp_reader *pfile;
3006 {
3007   const cpp_token *token, *old_token;
3008   cpp_hashnode *node;
3009
3010   /* Loop until we hit a non-macro token.  */
3011   for (;;)
3012     {
3013       token = get_raw_token (pfile);
3014
3015       /* Short circuit EOF. */
3016       if (token->type == CPP_EOF)
3017         return token;
3018
3019       /* If we are skipping... */
3020       if (pfile->skipping)
3021         {
3022           /* we still have to process directives,  */
3023           if (pfile->token_list.directive)
3024             return token;
3025
3026           /* but everything else is ignored.  */
3027           _cpp_skip_rest_of_line (pfile);
3028           continue;
3029         }
3030
3031       /* If there's a potential control macro and we get here, then that
3032          #ifndef didn't cover the entire file and its argument shouldn't
3033          be taken as a control macro.  */
3034       pfile->potential_control_macro = 0;
3035
3036       /* If we are rescanning preprocessed input, no macro expansion or
3037          token pasting may occur.  */
3038       if (CPP_OPTION (pfile, preprocessed))
3039         return token;
3040
3041       old_token = token;
3042
3043       /* See if there's a token to paste with this one.  */
3044       if (!pfile->paste_level)
3045         token = maybe_paste_with_next (pfile, token);
3046
3047       /* If it isn't a macro, return it now.  */
3048       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3049         return token;
3050
3051       /* Is macro expansion disabled in general, or are we in the
3052          middle of a token paste, or was this token just pasted?
3053          (Note we don't check token->flags & PASTED, because that
3054          counts tokens that were pasted at some point in the past,
3055          we're only interested in tokens that were pasted by this call
3056          to maybe_paste_with_next.)  */
3057       if (pfile->no_expand_level == pfile->cur_context
3058           || pfile->paste_level
3059           || (token != old_token
3060               && pfile->no_expand_level + 1 == pfile->cur_context))
3061         return token;
3062
3063       node = token->val.node;
3064       if (node->type != T_MACRO)
3065         return special_symbol (pfile, node, token);
3066
3067       if (is_macro_disabled (pfile, node->value.expansion, token))
3068         return token;
3069
3070       if (push_macro_context (pfile, token))
3071         return token;
3072       /* else loop */
3073     }
3074 }
3075
3076 /* Returns the next raw token, i.e. without performing macro
3077    expansion.  Argument contexts are automatically entered.  */
3078 static const cpp_token *
3079 get_raw_token (pfile)
3080      cpp_reader *pfile;
3081 {
3082   const cpp_token *result;
3083   cpp_context *context;
3084
3085   for (;;)
3086     {
3087       context = CURRENT_CONTEXT (pfile);
3088       if (context->pushed_token)
3089         {
3090           result = context->pushed_token;
3091           context->pushed_token = 0;
3092           return result;        /* Cannot be a CPP_MACRO_ARG */
3093         }
3094       else if (context->posn == context->count)
3095         {
3096           if (pop_context (pfile))
3097             return &eof_token;
3098           continue;
3099         }
3100       else if (IS_ARG_CONTEXT (context))
3101         {
3102           result = context->u.arg[context->posn++];
3103           if (result == 0)
3104             {
3105               context->flags ^= CONTEXT_RAW;
3106               result = context->u.arg[context->posn++];
3107             }
3108           return result;        /* Cannot be a CPP_MACRO_ARG */
3109         }
3110
3111       result = &context->u.list->tokens[context->posn++];
3112
3113       if (result->type != CPP_MACRO_ARG)
3114         return result;
3115
3116       if (result->flags & STRINGIFY_ARG)
3117         return stringify_arg (pfile, result);
3118
3119       push_arg_context (pfile, result);
3120     }
3121 }
3122
3123 /* Internal interface to get the token without macro expanding.  */
3124 const cpp_token *
3125 _cpp_get_raw_token (pfile)
3126      cpp_reader *pfile;
3127 {
3128   int prev_nme = prevent_macro_expansion (pfile);
3129   const cpp_token *result = _cpp_get_token (pfile);
3130   restore_macro_expansion (pfile, prev_nme);
3131   return result;
3132 }
3133
3134 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
3135    list should be overwritten, or zero if we need to append
3136    (typically, if we are within the arguments to a macro, or looking
3137    for the '(' to start a function-like macro invocation).  */
3138 static int
3139 lex_next (pfile, clear)
3140      cpp_reader *pfile;
3141      int clear;
3142 {
3143   cpp_toklist *list = &pfile->token_list;
3144   const cpp_token *old_list = list->tokens;
3145   unsigned int old_used = list->tokens_used;
3146
3147   if (clear)
3148     {
3149       /* Release all temporary tokens.  */
3150       _cpp_clear_toklist (list);
3151       pfile->contexts[0].posn = 0;
3152       if (pfile->temp_used)
3153         release_temp_tokens (pfile);
3154     }
3155   lex_line (pfile, list);
3156   pfile->contexts[0].count = list->tokens_used;
3157
3158   if (!clear && pfile->args)
3159     {
3160       /* Fix up argument token pointers.  */
3161       if (old_list != list->tokens)
3162         {
3163           unsigned int i;
3164
3165           for (i = 0; i < pfile->args->used; i++)
3166             {
3167               const cpp_token *token = pfile->args->tokens[i];
3168               if (token >= old_list && token < old_list + old_used)
3169                 pfile->args->tokens[i] = (const cpp_token *)
3170                 ((char *) token + ((char *) list->tokens - (char *) old_list));
3171             }
3172         }
3173
3174       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3175          tokens within the list of arguments that would otherwise act as
3176          preprocessing directives, the behavior is undefined.
3177
3178          This implementation will report a hard error and treat the
3179          'sequence of preprocessing tokens' as part of the macro argument,
3180          not a directive.
3181
3182          Note if pfile->args == 0, we're OK since we're only inside a
3183          macro argument after a '('.  */
3184       if (list->directive)
3185         {
3186           cpp_error_with_line (pfile, list->tokens[old_used].line,
3187                                list->tokens[old_used].col,
3188                                "#%s may not be used inside a macro argument",
3189                                list->directive->name);
3190           return 1;
3191         }
3192     }
3193
3194   return 0;
3195 }
3196
3197 /* Pops a context off the context stack.  If we're at the bottom, lexes
3198    the next logical line.  Returns EOF if we're at the end of the
3199    argument list to the # operator, or we should not "overflow"
3200    into the rest of the file (e.g. 6.10.3.1.1).  */
3201 static int
3202 pop_context (pfile)
3203      cpp_reader *pfile;
3204 {
3205   cpp_context *context;
3206
3207   if (pfile->cur_context == 0)
3208     {
3209       /* If we are currently processing a directive, do not advance.  6.10
3210          paragraph 2: A new-line character ends the directive even if it
3211          occurs within what would otherwise be an invocation of a
3212          function-like macro.  */
3213       if (pfile->token_list.directive)
3214         return 1;
3215
3216       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3217     }
3218
3219   /* Argument contexts, when parsing args or handling # operator
3220      return CPP_EOF at the end.  */
3221   context = CURRENT_CONTEXT (pfile);
3222   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3223     return 1;
3224
3225   /* Free resources when leaving macro contexts.  */
3226   if (context->args)
3227     free_macro_args (context->args);
3228
3229   if (pfile->cur_context == pfile->no_expand_level)
3230     pfile->no_expand_level--;
3231   pfile->cur_context--;
3232
3233   return 0;
3234 }
3235
3236 /* Turn off macro expansion at the current context level.  */
3237 static unsigned int
3238 prevent_macro_expansion (pfile)
3239      cpp_reader *pfile;
3240 {
3241   unsigned int prev_value = pfile->no_expand_level;
3242   pfile->no_expand_level = pfile->cur_context;
3243   return prev_value;
3244 }
3245
3246 /* Restore macro expansion to its previous state.  */
3247 static void
3248 restore_macro_expansion (pfile, prev_value)
3249      cpp_reader *pfile;
3250      unsigned int prev_value;
3251 {
3252   pfile->no_expand_level = prev_value;
3253 }
3254
3255 /* Used by cpperror.c to obtain the correct line and column to report
3256    in a diagnostic.  */
3257 unsigned int
3258 _cpp_get_line (pfile, pcol)
3259      cpp_reader *pfile;
3260      unsigned int *pcol;
3261 {
3262   unsigned int index;
3263   const cpp_token *cur_token;
3264
3265   if (pfile->state.in_lex_line)
3266     index = pfile->token_list.tokens_used;
3267   else
3268     {
3269       index = pfile->contexts[0].posn;
3270
3271       if (index == 0)
3272         {
3273           if (pcol)
3274             *pcol = 0;
3275           return 0;
3276         }
3277       index--;
3278     }
3279
3280   cur_token = &pfile->token_list.tokens[index];
3281   if (pcol)
3282     *pcol = cur_token->col;
3283   return cur_token->line;
3284 }
3285
3286 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3287 static const char * const monthnames[] =
3288 {
3289   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3290   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3291 };
3292
3293 /* Handle builtin macros like __FILE__.  */
3294 static const cpp_token *
3295 special_symbol (pfile, node, token)
3296      cpp_reader *pfile;
3297      cpp_hashnode *node;
3298      const cpp_token *token;
3299 {
3300   cpp_token *result;
3301   cpp_buffer *ip;
3302
3303   switch (node->type)
3304     {
3305     case T_FILE:
3306     case T_BASE_FILE:
3307       {
3308         const char *file;
3309
3310         ip = CPP_BUFFER (pfile);
3311         if (ip == 0)
3312           file = "";
3313         else
3314           {
3315             if (node->type == T_BASE_FILE)
3316               while (CPP_PREV_BUFFER (ip) != NULL)
3317                 ip = CPP_PREV_BUFFER (ip);
3318
3319             file = ip->nominal_fname;
3320           }
3321         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3322                                     strlen (file));
3323       }
3324       break;
3325
3326     case T_INCLUDE_LEVEL:
3327       /* pfile->include_depth counts the primary source as level 1,
3328          but historically __INCLUDE_DEPTH__ has called the primary
3329          source level 0.  */
3330       result = alloc_number_token (pfile, pfile->include_depth - 1);
3331       break;
3332
3333     case T_SPECLINE:
3334       /* If __LINE__ is embedded in a macro, it must expand to the
3335          line of the macro's invocation, not its definition.
3336          Otherwise things like assert() will not work properly.  */
3337       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3338       break;
3339
3340     case T_STDC:
3341       {
3342         int stdc = 1;
3343
3344 #ifdef STDC_0_IN_SYSTEM_HEADERS
3345         if (CPP_IN_SYSTEM_HEADER (pfile)
3346             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3347           stdc = 0;
3348 #endif
3349         result = alloc_number_token (pfile, stdc);
3350       }
3351       break;
3352
3353     case T_DATE:
3354     case T_TIME:
3355       if (pfile->date == 0)
3356         {
3357           /* Allocate __DATE__ and __TIME__ from permanent storage,
3358              and save them in pfile so we don't have to do this again.
3359              We don't generate these strings at init time because
3360              time() and localtime() are very slow on some systems.  */
3361           time_t tt = time (NULL);
3362           struct tm *tb = localtime (&tt);
3363
3364           pfile->date = make_string_token
3365             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3366           pfile->time = make_string_token
3367             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3368
3369           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3370                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3371           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3372                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3373         }
3374       result = node->type == T_DATE ? pfile->date: pfile->time;
3375       break;
3376
3377     case T_POISON:
3378       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3379       return token;
3380
3381     default:
3382       cpp_ice (pfile, "invalid special hash type");
3383       return token;
3384     }
3385
3386   ASSIGN_FLAGS_AND_POS (result, token);
3387   return result;
3388 }
3389 #undef DSC
3390
3391 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3392    if it hasn't happened already.  */
3393
3394 void
3395 _cpp_init_input_buffer (pfile)
3396      cpp_reader *pfile;
3397 {
3398   cpp_context *base;
3399
3400   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3401   pfile->no_expand_level = UINT_MAX;
3402   pfile->context_cap = 20;
3403   pfile->cur_context = 0;
3404
3405   pfile->contexts = (cpp_context *)
3406     xmalloc (pfile->context_cap * sizeof (cpp_context));
3407
3408   /* Clear the base context.  */
3409   base = &pfile->contexts[0];
3410   base->u.list = &pfile->token_list;
3411   base->posn = 0;
3412   base->count = 0;
3413   base->args = 0;
3414   base->level = 0;
3415   base->flags = 0;
3416   base->pushed_token = 0;
3417 }
3418
3419 /* Moves to the end of the directive line, popping contexts as
3420    necessary.  */
3421 void
3422 _cpp_skip_rest_of_line (pfile)
3423      cpp_reader *pfile;
3424 {
3425   /* Discard all stacked contexts.  */
3426   int i;
3427   for (i = pfile->cur_context; i > 0; i--)
3428     if (pfile->contexts[i].args)
3429       free_macro_args (pfile->contexts[i].args);
3430
3431   if (pfile->no_expand_level <= pfile->cur_context)
3432     pfile->no_expand_level = 0;
3433   pfile->cur_context = 0;
3434
3435   /* Clear the base context, and clear the directive pointer so that
3436      get_raw_token will advance to the next line.  */
3437   pfile->contexts[0].count = 0;
3438   pfile->contexts[0].posn = 0;
3439   pfile->token_list.directive = 0;
3440 }
3441
3442 /* Directive handler wrapper used by the command line option
3443    processor.  */
3444 void
3445 _cpp_run_directive (pfile, dir, buf, count, name)
3446      cpp_reader *pfile;
3447      const struct directive *dir;
3448      const char *buf;
3449      size_t count;
3450      const char *name;
3451 {
3452   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3453     {
3454       unsigned int prev_lvl = 0;
3455
3456       if (name)
3457         CPP_BUFFER (pfile)->nominal_fname = name;
3458       else
3459         CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3460       CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3461
3462       /* Scan the line now, else prevent_macro_expansion won't work.  */
3463       lex_next (pfile, 1);
3464       if (! (dir->flags & EXPAND))
3465         prev_lvl = prevent_macro_expansion (pfile);
3466
3467       (void) (*dir->handler) (pfile);
3468
3469       if (! (dir->flags & EXPAND))
3470         restore_macro_expansion (pfile, prev_lvl);
3471
3472       _cpp_skip_rest_of_line (pfile);
3473       cpp_pop_buffer (pfile);
3474     }
3475 }